summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS2
-rw-r--r--Makefile.target3
-rw-r--r--arch_init.c1611
-rw-r--r--cpus.c11
-rw-r--r--default-configs/mips-softmmu.mak5
-rw-r--r--default-configs/mips64-softmmu.mak1
-rw-r--r--default-configs/mips64el-softmmu.mak1
-rw-r--r--default-configs/mipsel-softmmu.mak5
-rw-r--r--disas/mips.c3
-rw-r--r--docs/migration.txt11
-rw-r--r--exec.c21
-rw-r--r--hw/acpi/ich9.c10
-rw-r--r--hw/acpi/piix4.c10
-rw-r--r--hw/block/fdc.c42
-rw-r--r--hw/char/serial.c41
-rw-r--r--hw/display/qxl.c11
-rw-r--r--hw/display/vga.c11
-rw-r--r--hw/dma/rc4030.c462
-rw-r--r--hw/i386/pc_piix.c2
-rw-r--r--hw/i386/pc_q35.c2
-rw-r--r--hw/ide/core.c32
-rw-r--r--hw/ide/pci.c16
-rw-r--r--hw/input/pckbd.c22
-rw-r--r--hw/input/ps2.c11
-rw-r--r--hw/intc/apic_common.c10
-rw-r--r--hw/isa/lpc_ich9.c10
-rw-r--r--hw/mips/Makefile.objs3
-rw-r--r--hw/mips/mips_jazz.c53
-rw-r--r--hw/mips/mips_malta.c15
-rw-r--r--hw/net/dp8393x.c369
-rw-r--r--hw/net/e1000.c11
-rw-r--r--hw/net/rtl8139.c11
-rw-r--r--hw/net/vmxnet3.c12
-rw-r--r--hw/pci-host/piix.c10
-rw-r--r--hw/scsi/scsi-bus.c11
-rw-r--r--hw/sh4/r2d.c12
-rw-r--r--hw/timer/hpet.c11
-rw-r--r--hw/timer/mc146818rtc.c23
-rw-r--r--hw/usb/hcd-ohci.c11
-rw-r--r--hw/usb/redirect.c34
-rw-r--r--hw/virtio/virtio.c16
-rw-r--r--include/exec/cpu-common.h4
-rw-r--r--include/exec/exec-all.h2
-rw-r--r--include/hw/mips/mips.h11
-rw-r--r--include/migration/migration.h17
-rw-r--r--include/migration/qemu-file.h5
-rw-r--r--include/migration/vmstate.h10
-rw-r--r--include/qemu/typedefs.h2
-rw-r--r--include/sysemu/arch_init.h1
-rw-r--r--include/sysemu/sysemu.h1
-rw-r--r--linux-user/elfload.c29
-rw-r--r--linux-user/main.c2
-rw-r--r--migration/migration.c34
-rw-r--r--migration/qemu-file.c29
-rw-r--r--migration/ram.c1628
-rw-r--r--migration/rdma.c78
-rw-r--r--migration/savevm.c (renamed from savevm.c)257
-rw-r--r--migration/vmstate.c21
-rwxr-xr-xscripts/analyze-migration.py5
-rw-r--r--softmmu_template.h22
-rw-r--r--target-arm/machine.c26
-rw-r--r--target-i386/machine.c81
-rw-r--r--target-mips/cpu.h52
-rw-r--r--target-mips/helper.h11
-rw-r--r--target-mips/machine.c21
-rw-r--r--target-mips/mips-defs.h4
-rw-r--r--target-mips/op_helper.c244
-rw-r--r--target-mips/translate.c802
-rw-r--r--target-mips/translate_init.c37
-rw-r--r--target-ppc/machine.c62
-rw-r--r--target-s390x/machine.c30
-rw-r--r--target-sh4/cpu.c3
-rw-r--r--target-sh4/cpu.h50
-rw-r--r--target-sh4/gdbstub.c8
-rw-r--r--target-sh4/helper.c29
-rw-r--r--target-sh4/helper.h1
-rw-r--r--target-sh4/op_helper.c148
-rw-r--r--target-sh4/translate.c327
-rw-r--r--tests/endianness-test.c2
-rw-r--r--trace-events11
80 files changed, 3737 insertions, 3338 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 4ed82154ce..e728d3a1d2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1014,8 +1014,6 @@ M: Amit Shah <amit.shah@redhat.com>
 S: Maintained
 F: include/migration/
 F: migration/
-F: savevm.c
-F: arch_init.c
 F: scripts/vmstate-static-checker.py
 F: tests/vmstate-static-checker-data/
 
diff --git a/Makefile.target b/Makefile.target
index ec5b92cb60..3e7aafd72d 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -132,9 +132,10 @@ obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
 obj-y += qtest.o bootdevice.o
 obj-y += hw/
 obj-$(CONFIG_KVM) += kvm-all.o
-obj-y += memory.o savevm.o cputlb.o
+obj-y += memory.o cputlb.o
 obj-y += memory_mapping.o
 obj-y += dump.o
+obj-y += migration/ram.o migration/savevm.o
 LIBS := $(libs_softmmu) $(LIBS)
 
 # xen support
diff --git a/arch_init.c b/arch_init.c
index d29447497b..725c638ece 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -22,46 +22,15 @@
  * THE SOFTWARE.
  */
 #include <stdint.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <zlib.h>
-#ifndef _WIN32
-#include <sys/types.h>
-#include <sys/mman.h>
-#endif
-#include "config.h"
-#include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
-#include "qemu/bitops.h"
-#include "qemu/bitmap.h"
 #include "sysemu/arch_init.h"
-#include "audio/audio.h"
-#include "hw/i386/pc.h"
 #include "hw/pci/pci.h"
 #include "hw/audio/audio.h"
-#include "sysemu/kvm.h"
-#include "migration/migration.h"
 #include "hw/i386/smbios.h"
-#include "exec/address-spaces.h"
-#include "hw/audio/pcspk.h"
-#include "migration/page_cache.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qmp-commands.h"
-#include "trace.h"
-#include "exec/cpu-all.h"
-#include "exec/ram_addr.h"
 #include "hw/acpi/acpi.h"
-#include "qemu/host-utils.h"
-#include "qemu/rcu_queue.h"
-
-#ifdef DEBUG_ARCH_INIT
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
 
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
@@ -111,24 +80,6 @@ int graphic_depth = 32;
 #endif
 
 const uint32_t arch_type = QEMU_ARCH;
-static bool mig_throttle_on;
-static int dirty_rate_high_cnt;
-static void check_guest_throttling(void);
-
-static uint64_t bitmap_sync_count;
-
-/***********************************************************/
-/* ram save/restore */
-
-#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
-#define RAM_SAVE_FLAG_COMPRESS 0x02
-#define RAM_SAVE_FLAG_MEM_SIZE 0x04
-#define RAM_SAVE_FLAG_PAGE     0x08
-#define RAM_SAVE_FLAG_EOS      0x10
-#define RAM_SAVE_FLAG_CONTINUE 0x20
-#define RAM_SAVE_FLAG_XBZRLE   0x40
-/* 0x80 is reserved in migration.h start with 0x100 next */
-#define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
 
 static struct defconfig_file {
     const char *filename;
@@ -139,8 +90,6 @@ static struct defconfig_file {
     { NULL }, /* end of list */
 };
 
-static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
-
 int qemu_read_default_config_files(bool userconfig)
 {
     int ret;
@@ -159,1517 +108,6 @@ int qemu_read_default_config_files(bool userconfig)
     return 0;
 }
 
-static inline bool is_zero_range(uint8_t *p, uint64_t size)
-{
-    return buffer_find_nonzero_offset(p, size) == size;
-}
-
-/* struct contains XBZRLE cache and a static page
-   used by the compression */
-static struct {
-    /* buffer used for XBZRLE encoding */
-    uint8_t *encoded_buf;
-    /* buffer for storing page content */
-    uint8_t *current_buf;
-    /* Cache for XBZRLE, Protected by lock. */
-    PageCache *cache;
-    QemuMutex lock;
-} XBZRLE;
-
-/* buffer used for XBZRLE decoding */
-static uint8_t *xbzrle_decoded_buf;
-
-static void XBZRLE_cache_lock(void)
-{
-    if (migrate_use_xbzrle())
-        qemu_mutex_lock(&XBZRLE.lock);
-}
-
-static void XBZRLE_cache_unlock(void)
-{
-    if (migrate_use_xbzrle())
-        qemu_mutex_unlock(&XBZRLE.lock);
-}
-
-/*
- * called from qmp_migrate_set_cache_size in main thread, possibly while
- * a migration is in progress.
- * A running migration maybe using the cache and might finish during this
- * call, hence changes to the cache are protected by XBZRLE.lock().
- */
-int64_t xbzrle_cache_resize(int64_t new_size)
-{
-    PageCache *new_cache;
-    int64_t ret;
-
-    if (new_size < TARGET_PAGE_SIZE) {
-        return -1;
-    }
-
-    XBZRLE_cache_lock();
-
-    if (XBZRLE.cache != NULL) {
-        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
-            goto out_new_size;
-        }
-        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
-                                        TARGET_PAGE_SIZE);
-        if (!new_cache) {
-            error_report("Error creating cache");
-            ret = -1;
-            goto out;
-        }
-
-        cache_fini(XBZRLE.cache);
-        XBZRLE.cache = new_cache;
-    }
-
-out_new_size:
-    ret = pow2floor(new_size);
-out:
-    XBZRLE_cache_unlock();
-    return ret;
-}
-
-/* accounting for migration statistics */
-typedef struct AccountingInfo {
-    uint64_t dup_pages;
-    uint64_t skipped_pages;
-    uint64_t norm_pages;
-    uint64_t iterations;
-    uint64_t xbzrle_bytes;
-    uint64_t xbzrle_pages;
-    uint64_t xbzrle_cache_miss;
-    double xbzrle_cache_miss_rate;
-    uint64_t xbzrle_overflows;
-} AccountingInfo;
-
-static AccountingInfo acct_info;
-
-static void acct_clear(void)
-{
-    memset(&acct_info, 0, sizeof(acct_info));
-}
-
-uint64_t dup_mig_bytes_transferred(void)
-{
-    return acct_info.dup_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t dup_mig_pages_transferred(void)
-{
-    return acct_info.dup_pages;
-}
-
-uint64_t skipped_mig_bytes_transferred(void)
-{
-    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t skipped_mig_pages_transferred(void)
-{
-    return acct_info.skipped_pages;
-}
-
-uint64_t norm_mig_bytes_transferred(void)
-{
-    return acct_info.norm_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t norm_mig_pages_transferred(void)
-{
-    return acct_info.norm_pages;
-}
-
-uint64_t xbzrle_mig_bytes_transferred(void)
-{
-    return acct_info.xbzrle_bytes;
-}
-
-uint64_t xbzrle_mig_pages_transferred(void)
-{
-    return acct_info.xbzrle_pages;
-}
-
-uint64_t xbzrle_mig_pages_cache_miss(void)
-{
-    return acct_info.xbzrle_cache_miss;
-}
-
-double xbzrle_mig_cache_miss_rate(void)
-{
-    return acct_info.xbzrle_cache_miss_rate;
-}
-
-uint64_t xbzrle_mig_pages_overflow(void)
-{
-    return acct_info.xbzrle_overflows;
-}
-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
-struct CompressParam {
-    bool start;
-    bool done;
-    QEMUFile *file;
-    QemuMutex mutex;
-    QemuCond cond;
-    RAMBlock *block;
-    ram_addr_t offset;
-};
-typedef struct CompressParam CompressParam;
-
-struct DecompressParam {
-    bool start;
-    QemuMutex mutex;
-    QemuCond cond;
-    void *des;
-    uint8 *compbuf;
-    int len;
-};
-typedef struct DecompressParam DecompressParam;
-
-static CompressParam *comp_param;
-static QemuThread *compress_threads;
-/* comp_done_cond is used to wake up the migration thread when
- * one of the compression threads has finished the compression.
- * comp_done_lock is used to co-work with comp_done_cond.
- */
-static QemuMutex *comp_done_lock;
-static QemuCond *comp_done_cond;
-/* The empty QEMUFileOps will be used by file in CompressParam */
-static const QEMUFileOps empty_ops = { };
-
-static bool compression_switch;
-static bool quit_comp_thread;
-static bool quit_decomp_thread;
-static DecompressParam *decomp_param;
-static QemuThread *decompress_threads;
-static uint8_t *compressed_data_buf;
-
-static int do_compress_ram_page(CompressParam *param);
-
-static void *do_data_compress(void *opaque)
-{
-    CompressParam *param = opaque;
-
-    while (!quit_comp_thread) {
-        qemu_mutex_lock(&param->mutex);
-        /* Re-check the quit_comp_thread in case of
-         * terminate_compression_threads is called just before
-         * qemu_mutex_lock(&param->mutex) and after
-         * while(!quit_comp_thread), re-check it here can make
-         * sure the compression thread terminate as expected.
-         */
-        while (!param->start && !quit_comp_thread) {
-            qemu_cond_wait(&param->cond, &param->mutex);
-        }
-        if (!quit_comp_thread) {
-            do_compress_ram_page(param);
-        }
-        param->start = false;
-        qemu_mutex_unlock(&param->mutex);
-
-        qemu_mutex_lock(comp_done_lock);
-        param->done = true;
-        qemu_cond_signal(comp_done_cond);
-        qemu_mutex_unlock(comp_done_lock);
-    }
-
-    return NULL;
-}
-
-static inline void terminate_compression_threads(void)
-{
-    int idx, thread_count;
-
-    thread_count = migrate_compress_threads();
-    quit_comp_thread = true;
-    for (idx = 0; idx < thread_count; idx++) {
-        qemu_mutex_lock(&comp_param[idx].mutex);
-        qemu_cond_signal(&comp_param[idx].cond);
-        qemu_mutex_unlock(&comp_param[idx].mutex);
-    }
-}
-
-void migrate_compress_threads_join(void)
-{
-    int i, thread_count;
-
-    if (!migrate_use_compression()) {
-        return;
-    }
-    terminate_compression_threads();
-    thread_count = migrate_compress_threads();
-    for (i = 0; i < thread_count; i++) {
-        qemu_thread_join(compress_threads + i);
-        qemu_fclose(comp_param[i].file);
-        qemu_mutex_destroy(&comp_param[i].mutex);
-        qemu_cond_destroy(&comp_param[i].cond);
-    }
-    qemu_mutex_destroy(comp_done_lock);
-    qemu_cond_destroy(comp_done_cond);
-    g_free(compress_threads);
-    g_free(comp_param);
-    g_free(comp_done_cond);
-    g_free(comp_done_lock);
-    compress_threads = NULL;
-    comp_param = NULL;
-    comp_done_cond = NULL;
-    comp_done_lock = NULL;
-}
-
-void migrate_compress_threads_create(void)
-{
-    int i, thread_count;
-
-    if (!migrate_use_compression()) {
-        return;
-    }
-    quit_comp_thread = false;
-    compression_switch = true;
-    thread_count = migrate_compress_threads();
-    compress_threads = g_new0(QemuThread, thread_count);
-    comp_param = g_new0(CompressParam, thread_count);
-    comp_done_cond = g_new0(QemuCond, 1);
-    comp_done_lock = g_new0(QemuMutex, 1);
-    qemu_cond_init(comp_done_cond);
-    qemu_mutex_init(comp_done_lock);
-    for (i = 0; i < thread_count; i++) {
-        /* com_param[i].file is just used as a dummy buffer to save data, set
-         * it's ops to empty.
-         */
-        comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
-        comp_param[i].done = true;
-        qemu_mutex_init(&comp_param[i].mutex);
-        qemu_cond_init(&comp_param[i].cond);
-        qemu_thread_create(compress_threads + i, "compress",
-                           do_data_compress, comp_param + i,
-                           QEMU_THREAD_JOINABLE);
-    }
-}
-
-/**
- * save_page_header: Write page header to wire
- *
- * If this is the 1st block, it also writes the block identification
- *
- * Returns: Number of bytes written
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- *          in the lower bits, it contains flags
- */
-static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
-{
-    size_t size;
-
-    qemu_put_be64(f, offset);
-    size = 8;
-
-    if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
-        qemu_put_byte(f, strlen(block->idstr));
-        qemu_put_buffer(f, (uint8_t *)block->idstr,
-                        strlen(block->idstr));
-        size += 1 + strlen(block->idstr);
-    }
-    return size;
-}
-
-/* Update the xbzrle cache to reflect a page that's been sent as all 0.
- * The important thing is that a stale (not-yet-0'd) page be replaced
- * by the new data.
- * As a bonus, if the page wasn't in the cache it gets added so that
- * when a small write is made into the 0'd page it gets XBZRLE sent
- */
-static void xbzrle_cache_zero_page(ram_addr_t current_addr)
-{
-    if (ram_bulk_stage || !migrate_use_xbzrle()) {
-        return;
-    }
-
-    /* We don't care if this fails to allocate a new cache page
-     * as long as it updated an old one */
-    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
-                 bitmap_sync_count);
-}
-
-#define ENCODING_FLAG_XBZRLE 0x1
-
-/**
- * save_xbzrle_page: compress and send current page
- *
- * Returns: 1 means that we wrote the page
- *          0 means that page is identical to the one already sent
- *          -1 means that xbzrle would be longer than normal
- *
- * @f: QEMUFile where to send the data
- * @current_data:
- * @current_addr:
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
-                            ram_addr_t current_addr, RAMBlock *block,
-                            ram_addr_t offset, bool last_stage,
-                            uint64_t *bytes_transferred)
-{
-    int encoded_len = 0, bytes_xbzrle;
-    uint8_t *prev_cached_page;
-
-    if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
-        acct_info.xbzrle_cache_miss++;
-        if (!last_stage) {
-            if (cache_insert(XBZRLE.cache, current_addr, *current_data,
-                             bitmap_sync_count) == -1) {
-                return -1;
-            } else {
-                /* update *current_data when the page has been
-                   inserted into cache */
-                *current_data = get_cached_data(XBZRLE.cache, current_addr);
-            }
-        }
-        return -1;
-    }
-
-    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
-
-    /* save current buffer into memory */
-    memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
-
-    /* XBZRLE encoding (if there is no overflow) */
-    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
-                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
-                                       TARGET_PAGE_SIZE);
-    if (encoded_len == 0) {
-        DPRINTF("Skipping unmodified page\n");
-        return 0;
-    } else if (encoded_len == -1) {
-        DPRINTF("Overflow\n");
-        acct_info.xbzrle_overflows++;
-        /* update data in the cache */
-        if (!last_stage) {
-            memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
-            *current_data = prev_cached_page;
-        }
-        return -1;
-    }
-
-    /* we need to update the data in the cache, in order to get the same data */
-    if (!last_stage) {
-        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
-    }
-
-    /* Send XBZRLE based compressed page */
-    bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
-    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
-    qemu_put_be16(f, encoded_len);
-    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
-    bytes_xbzrle += encoded_len + 1 + 2;
-    acct_info.xbzrle_pages++;
-    acct_info.xbzrle_bytes += bytes_xbzrle;
-    *bytes_transferred += bytes_xbzrle;
-
-    return 1;
-}
-
-static inline
-ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
-                                                 ram_addr_t start)
-{
-    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
-    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
-    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
-    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
-
-    unsigned long next;
-
-    if (ram_bulk_stage && nr > base) {
-        next = nr + 1;
-    } else {
-        next = find_next_bit(migration_bitmap, size, nr);
-    }
-
-    if (next < size) {
-        clear_bit(next, migration_bitmap);
-        migration_dirty_pages--;
-    }
-    return (next - base) << TARGET_PAGE_BITS;
-}
-
-static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
-{
-    migration_dirty_pages +=
-        cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
-}
-
-
-/* Fix me: there are too many global variables used in migration process. */
-static int64_t start_time;
-static int64_t bytes_xfer_prev;
-static int64_t num_dirty_pages_period;
-static uint64_t xbzrle_cache_miss_prev;
-static uint64_t iterations_prev;
-
-static void migration_bitmap_sync_init(void)
-{
-    start_time = 0;
-    bytes_xfer_prev = 0;
-    num_dirty_pages_period = 0;
-    xbzrle_cache_miss_prev = 0;
-    iterations_prev = 0;
-}
-
-/* Called with iothread lock held, to protect ram_list.dirty_memory[] */
-static void migration_bitmap_sync(void)
-{
-    RAMBlock *block;
-    uint64_t num_dirty_pages_init = migration_dirty_pages;
-    MigrationState *s = migrate_get_current();
-    int64_t end_time;
-    int64_t bytes_xfer_now;
-
-    bitmap_sync_count++;
-
-    if (!bytes_xfer_prev) {
-        bytes_xfer_prev = ram_bytes_transferred();
-    }
-
-    if (!start_time) {
-        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-    }
-
-    trace_migration_bitmap_sync_start();
-    address_space_sync_dirty_bitmap(&address_space_memory);
-
-    rcu_read_lock();
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-        migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
-    }
-    rcu_read_unlock();
-
-    trace_migration_bitmap_sync_end(migration_dirty_pages
-                                    - num_dirty_pages_init);
-    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
-    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-
-    /* more than 1 second = 1000 millisecons */
-    if (end_time > start_time + 1000) {
-        if (migrate_auto_converge()) {
-            /* The following detection logic can be refined later. For now:
-               Check to see if the dirtied bytes is 50% more than the approx.
-               amount of bytes that just got transferred since the last time we
-               were in this routine. If that happens >N times (for now N==4)
-               we turn on the throttle down logic */
-            bytes_xfer_now = ram_bytes_transferred();
-            if (s->dirty_pages_rate &&
-               (num_dirty_pages_period * TARGET_PAGE_SIZE >
-                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
-               (dirty_rate_high_cnt++ > 4)) {
-                    trace_migration_throttle();
-                    mig_throttle_on = true;
-                    dirty_rate_high_cnt = 0;
-             }
-             bytes_xfer_prev = bytes_xfer_now;
-        } else {
-             mig_throttle_on = false;
-        }
-        if (migrate_use_xbzrle()) {
-            if (iterations_prev != acct_info.iterations) {
-                acct_info.xbzrle_cache_miss_rate =
-                   (double)(acct_info.xbzrle_cache_miss -
-                            xbzrle_cache_miss_prev) /
-                   (acct_info.iterations - iterations_prev);
-            }
-            iterations_prev = acct_info.iterations;
-            xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
-        }
-        s->dirty_pages_rate = num_dirty_pages_period * 1000
-            / (end_time - start_time);
-        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
-        start_time = end_time;
-        num_dirty_pages_period = 0;
-    }
-    s->dirty_sync_count = bitmap_sync_count;
-}
-
-/**
- * save_zero_page: Send the zero page to the stream
- *
- * Returns: Number of pages written.
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @p: pointer to the page
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
-                          uint8_t *p, uint64_t *bytes_transferred)
-{
-    int pages = -1;
-
-    if (is_zero_range(p, TARGET_PAGE_SIZE)) {
-        acct_info.dup_pages++;
-        *bytes_transferred += save_page_header(f, block,
-                                               offset | RAM_SAVE_FLAG_COMPRESS);
-        qemu_put_byte(f, 0);
-        *bytes_transferred += 1;
-        pages = 1;
-    }
-
-    return pages;
-}
-
-/**
- * ram_save_page: Send the given page to the stream
- *
- * Returns: Number of pages written.
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
-                         bool last_stage, uint64_t *bytes_transferred)
-{
-    int pages = -1;
-    uint64_t bytes_xmit;
-    ram_addr_t current_addr;
-    MemoryRegion *mr = block->mr;
-    uint8_t *p;
-    int ret;
-    bool send_async = true;
-
-    p = memory_region_get_ram_ptr(mr) + offset;
-
-    /* In doubt sent page as normal */
-    bytes_xmit = 0;
-    ret = ram_control_save_page(f, block->offset,
-                           offset, TARGET_PAGE_SIZE, &bytes_xmit);
-    if (bytes_xmit) {
-        *bytes_transferred += bytes_xmit;
-        pages = 1;
-    }
-
-    XBZRLE_cache_lock();
-
-    current_addr = block->offset + offset;
-
-    if (block == last_sent_block) {
-        offset |= RAM_SAVE_FLAG_CONTINUE;
-    }
-    if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
-        if (ret != RAM_SAVE_CONTROL_DELAYED) {
-            if (bytes_xmit > 0) {
-                acct_info.norm_pages++;
-            } else if (bytes_xmit == 0) {
-                acct_info.dup_pages++;
-            }
-        }
-    } else {
-        pages = save_zero_page(f, block, offset, p, bytes_transferred);
-        if (pages > 0) {
-            /* Must let xbzrle know, otherwise a previous (now 0'd) cached
-             * page would be stale
-             */
-            xbzrle_cache_zero_page(current_addr);
-        } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
-            pages = save_xbzrle_page(f, &p, current_addr, block,
-                                     offset, last_stage, bytes_transferred);
-            if (!last_stage) {
-                /* Can't send this cached data async, since the cache page
-                 * might get updated before it gets to the wire
-                 */
-                send_async = false;
-            }
-        }
-    }
-
-    /* XBZRLE overflow or normal page */
-    if (pages == -1) {
-        *bytes_transferred += save_page_header(f, block,
-                                               offset | RAM_SAVE_FLAG_PAGE);
-        if (send_async) {
-            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
-        } else {
-            qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
-        }
-        *bytes_transferred += TARGET_PAGE_SIZE;
-        pages = 1;
-        acct_info.norm_pages++;
-    }
-
-    XBZRLE_cache_unlock();
-
-    return pages;
-}
-
-static int do_compress_ram_page(CompressParam *param)
-{
-    int bytes_sent, blen;
-    uint8_t *p;
-    RAMBlock *block = param->block;
-    ram_addr_t offset = param->offset;
-
-    p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK);
-
-    bytes_sent = save_page_header(param->file, block, offset |
-                                  RAM_SAVE_FLAG_COMPRESS_PAGE);
-    blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE,
-                                     migrate_compress_level());
-    bytes_sent += blen;
-
-    return bytes_sent;
-}
-
-static inline void start_compression(CompressParam *param)
-{
-    param->done = false;
-    qemu_mutex_lock(&param->mutex);
-    param->start = true;
-    qemu_cond_signal(&param->cond);
-    qemu_mutex_unlock(&param->mutex);
-}
-
-static inline void start_decompression(DecompressParam *param)
-{
-    qemu_mutex_lock(&param->mutex);
-    param->start = true;
-    qemu_cond_signal(&param->cond);
-    qemu_mutex_unlock(&param->mutex);
-}
-
-static uint64_t bytes_transferred;
-
-static void flush_compressed_data(QEMUFile *f)
-{
-    int idx, len, thread_count;
-
-    if (!migrate_use_compression()) {
-        return;
-    }
-    thread_count = migrate_compress_threads();
-    for (idx = 0; idx < thread_count; idx++) {
-        if (!comp_param[idx].done) {
-            qemu_mutex_lock(comp_done_lock);
-            while (!comp_param[idx].done && !quit_comp_thread) {
-                qemu_cond_wait(comp_done_cond, comp_done_lock);
-            }
-            qemu_mutex_unlock(comp_done_lock);
-        }
-        if (!quit_comp_thread) {
-            len = qemu_put_qemu_file(f, comp_param[idx].file);
-            bytes_transferred += len;
-        }
-    }
-}
-
-static inline void set_compress_params(CompressParam *param, RAMBlock *block,
-                                       ram_addr_t offset)
-{
-    param->block = block;
-    param->offset = offset;
-}
-
-static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
-                                           ram_addr_t offset,
-                                           uint64_t *bytes_transferred)
-{
-    int idx, thread_count, bytes_xmit = -1, pages = -1;
-
-    thread_count = migrate_compress_threads();
-    qemu_mutex_lock(comp_done_lock);
-    while (true) {
-        for (idx = 0; idx < thread_count; idx++) {
-            if (comp_param[idx].done) {
-                bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
-                set_compress_params(&comp_param[idx], block, offset);
-                start_compression(&comp_param[idx]);
-                pages = 1;
-                acct_info.norm_pages++;
-                *bytes_transferred += bytes_xmit;
-                break;
-            }
-        }
-        if (pages > 0) {
-            break;
-        } else {
-            qemu_cond_wait(comp_done_cond, comp_done_lock);
-        }
-    }
-    qemu_mutex_unlock(comp_done_lock);
-
-    return pages;
-}
-
-/**
- * ram_save_compressed_page: compress the given page and send it to the stream
- *
- * Returns: Number of pages written.
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
-                                    ram_addr_t offset, bool last_stage,
-                                    uint64_t *bytes_transferred)
-{
-    int pages = -1;
-    uint64_t bytes_xmit;
-    MemoryRegion *mr = block->mr;
-    uint8_t *p;
-    int ret;
-
-    p = memory_region_get_ram_ptr(mr) + offset;
-
-    bytes_xmit = 0;
-    ret = ram_control_save_page(f, block->offset,
-                                offset, TARGET_PAGE_SIZE, &bytes_xmit);
-    if (bytes_xmit) {
-        *bytes_transferred += bytes_xmit;
-        pages = 1;
-    }
-    if (block == last_sent_block) {
-        offset |= RAM_SAVE_FLAG_CONTINUE;
-    }
-    if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
-        if (ret != RAM_SAVE_CONTROL_DELAYED) {
-            if (bytes_xmit > 0) {
-                acct_info.norm_pages++;
-            } else if (bytes_xmit == 0) {
-                acct_info.dup_pages++;
-            }
-        }
-    } else {
-        /* When starting the process of a new block, the first page of
-         * the block should be sent out before other pages in the same
-         * block, and all the pages in last block should have been sent
-         * out, keeping this order is important, because the 'cont' flag
-         * is used to avoid resending the block name.
-         */
-        if (block != last_sent_block) {
-            flush_compressed_data(f);
-            pages = save_zero_page(f, block, offset, p, bytes_transferred);
-            if (pages == -1) {
-                set_compress_params(&comp_param[0], block, offset);
-                /* Use the qemu thread to compress the data to make sure the
-                 * first page is sent out before other pages
-                 */
-                bytes_xmit = do_compress_ram_page(&comp_param[0]);
-                acct_info.norm_pages++;
-                qemu_put_qemu_file(f, comp_param[0].file);
-                *bytes_transferred += bytes_xmit;
-                pages = 1;
-            }
-        } else {
-            pages = save_zero_page(f, block, offset, p, bytes_transferred);
-            if (pages == -1) {
-                pages = compress_page_with_multi_thread(f, block, offset,
-                                                        bytes_transferred);
-            }
-        }
-    }
-
-    return pages;
-}
-
-/**
- * ram_find_and_save_block: Finds a dirty page and sends it to f
- *
- * Called within an RCU critical section.
- *
- * Returns:  The number of pages written
- *           0 means no dirty pages
- *
- * @f: QEMUFile where to send the data
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-
-static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
-                                   uint64_t *bytes_transferred)
-{
-    RAMBlock *block = last_seen_block;
-    ram_addr_t offset = last_offset;
-    bool complete_round = false;
-    int pages = 0;
-    MemoryRegion *mr;
-
-    if (!block)
-        block = QLIST_FIRST_RCU(&ram_list.blocks);
-
-    while (true) {
-        mr = block->mr;
-        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
-        if (complete_round && block == last_seen_block &&
-            offset >= last_offset) {
-            break;
-        }
-        if (offset >= block->used_length) {
-            offset = 0;
-            block = QLIST_NEXT_RCU(block, next);
-            if (!block) {
-                block = QLIST_FIRST_RCU(&ram_list.blocks);
-                complete_round = true;
-                ram_bulk_stage = false;
-                if (migrate_use_xbzrle()) {
-                    /* If xbzrle is on, stop using the data compression at this
-                     * point. In theory, xbzrle can do better than compression.
-                     */
-                    flush_compressed_data(f);
-                    compression_switch = false;
-                }
-            }
-        } else {
-            if (compression_switch && migrate_use_compression()) {
-                pages = ram_save_compressed_page(f, block, offset, last_stage,
-                                                 bytes_transferred);
-            } else {
-                pages = ram_save_page(f, block, offset, last_stage,
-                                      bytes_transferred);
-            }
-
-            /* if page is unmodified, continue to the next */
-            if (pages > 0) {
-                last_sent_block = block;
-                break;
-            }
-        }
-    }
-
-    last_seen_block = block;
-    last_offset = offset;
-
-    return pages;
-}
-
-void acct_update_position(QEMUFile *f, size_t size, bool zero)
-{
-    uint64_t pages = size / TARGET_PAGE_SIZE;
-    if (zero) {
-        acct_info.dup_pages += pages;
-    } else {
-        acct_info.norm_pages += pages;
-        bytes_transferred += size;
-        qemu_update_position(f, size);
-    }
-}
-
-static ram_addr_t ram_save_remaining(void)
-{
-    return migration_dirty_pages;
-}
-
-uint64_t ram_bytes_remaining(void)
-{
-    return ram_save_remaining() * TARGET_PAGE_SIZE;
-}
-
-uint64_t ram_bytes_transferred(void)
-{
-    return bytes_transferred;
-}
-
-uint64_t ram_bytes_total(void)
-{
-    RAMBlock *block;
-    uint64_t total = 0;
-
-    rcu_read_lock();
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
-        total += block->used_length;
-    rcu_read_unlock();
-    return total;
-}
-
-void free_xbzrle_decoded_buf(void)
-{
-    g_free(xbzrle_decoded_buf);
-    xbzrle_decoded_buf = NULL;
-}
-
-static void migration_end(void)
-{
-    if (migration_bitmap) {
-        memory_global_dirty_log_stop();
-        g_free(migration_bitmap);
-        migration_bitmap = NULL;
-    }
-
-    XBZRLE_cache_lock();
-    if (XBZRLE.cache) {
-        cache_fini(XBZRLE.cache);
-        g_free(XBZRLE.encoded_buf);
-        g_free(XBZRLE.current_buf);
-        XBZRLE.cache = NULL;
-        XBZRLE.encoded_buf = NULL;
-        XBZRLE.current_buf = NULL;
-    }
-    XBZRLE_cache_unlock();
-}
-
-static void ram_migration_cancel(void *opaque)
-{
-    migration_end();
-}
-
-static void reset_ram_globals(void)
-{
-    last_seen_block = NULL;
-    last_sent_block = NULL;
-    last_offset = 0;
-    last_version = ram_list.version;
-    ram_bulk_stage = true;
-}
-
-#define MAX_WAIT 50 /* ms, half buffered_file limit */
-
-
-/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
- * long-running RCU critical section.  When rcu-reclaims in the code
- * start to become numerous it will be necessary to reduce the
- * granularity of these critical sections.
- */
-
-static int ram_save_setup(QEMUFile *f, void *opaque)
-{
-    RAMBlock *block;
-    int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
-
-    mig_throttle_on = false;
-    dirty_rate_high_cnt = 0;
-    bitmap_sync_count = 0;
-    migration_bitmap_sync_init();
-
-    if (migrate_use_xbzrle()) {
-        XBZRLE_cache_lock();
-        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
-                                  TARGET_PAGE_SIZE,
-                                  TARGET_PAGE_SIZE);
-        if (!XBZRLE.cache) {
-            XBZRLE_cache_unlock();
-            error_report("Error creating cache");
-            return -1;
-        }
-        XBZRLE_cache_unlock();
-
-        /* We prefer not to abort if there is no memory */
-        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
-        if (!XBZRLE.encoded_buf) {
-            error_report("Error allocating encoded_buf");
-            return -1;
-        }
-
-        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
-        if (!XBZRLE.current_buf) {
-            error_report("Error allocating current_buf");
-            g_free(XBZRLE.encoded_buf);
-            XBZRLE.encoded_buf = NULL;
-            return -1;
-        }
-
-        acct_clear();
-    }
-
-    /* iothread lock needed for ram_list.dirty_memory[] */
-    qemu_mutex_lock_iothread();
-    qemu_mutex_lock_ramlist();
-    rcu_read_lock();
-    bytes_transferred = 0;
-    reset_ram_globals();
-
-    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
-    migration_bitmap = bitmap_new(ram_bitmap_pages);
-    bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
-
-    /*
-     * Count the total number of pages used by ram blocks not including any
-     * gaps due to alignment or unplugs.
-     */
-    migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
-
-    memory_global_dirty_log_start();
-    migration_bitmap_sync();
-    qemu_mutex_unlock_ramlist();
-    qemu_mutex_unlock_iothread();
-
-    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
-
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-        qemu_put_byte(f, strlen(block->idstr));
-        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
-        qemu_put_be64(f, block->used_length);
-    }
-
-    rcu_read_unlock();
-
-    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
-    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
-
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
-    return 0;
-}
-
-static int ram_save_iterate(QEMUFile *f, void *opaque)
-{
-    int ret;
-    int i;
-    int64_t t0;
-    int pages_sent = 0;
-
-    rcu_read_lock();
-    if (ram_list.version != last_version) {
-        reset_ram_globals();
-    }
-
-    /* Read version before ram_list.blocks */
-    smp_rmb();
-
-    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
-
-    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    i = 0;
-    while ((ret = qemu_file_rate_limit(f)) == 0) {
-        int pages;
-
-        pages = ram_find_and_save_block(f, false, &bytes_transferred);
-        /* no more pages to sent */
-        if (pages == 0) {
-            break;
-        }
-        pages_sent += pages;
-        acct_info.iterations++;
-        check_guest_throttling();
-        /* we want to check in the 1st loop, just in case it was the 1st time
-           and we had to sync the dirty bitmap.
-           qemu_get_clock_ns() is a bit expensive, so we only check each some
-           iterations
-        */
-        if ((i & 63) == 0) {
-            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
-            if (t1 > MAX_WAIT) {
-                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
-                        t1, i);
-                break;
-            }
-        }
-        i++;
-    }
-    flush_compressed_data(f);
-    rcu_read_unlock();
-
-    /*
-     * Must occur before EOS (or any QEMUFile operation)
-     * because of RDMA protocol.
-     */
-    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
-
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-    bytes_transferred += 8;
-
-    ret = qemu_file_get_error(f);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return pages_sent;
-}
-
-/* Called with iothread lock */
-static int ram_save_complete(QEMUFile *f, void *opaque)
-{
-    rcu_read_lock();
-
-    migration_bitmap_sync();
-
-    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
-
-    /* try transferring iterative blocks of memory */
-
-    /* flush all remaining blocks regardless of rate limiting */
-    while (true) {
-        int pages;
-
-        pages = ram_find_and_save_block(f, true, &bytes_transferred);
-        /* no more blocks to sent */
-        if (pages == 0) {
-            break;
-        }
-    }
-
-    flush_compressed_data(f);
-    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
-    migration_end();
-
-    rcu_read_unlock();
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
-    return 0;
-}
-
-static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
-{
-    uint64_t remaining_size;
-
-    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
-
-    if (remaining_size < max_size) {
-        qemu_mutex_lock_iothread();
-        rcu_read_lock();
-        migration_bitmap_sync();
-        rcu_read_unlock();
-        qemu_mutex_unlock_iothread();
-        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
-    }
-    return remaining_size;
-}
-
-static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
-{
-    unsigned int xh_len;
-    int xh_flags;
-
-    if (!xbzrle_decoded_buf) {
-        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
-    }
-
-    /* extract RLE header */
-    xh_flags = qemu_get_byte(f);
-    xh_len = qemu_get_be16(f);
-
-    if (xh_flags != ENCODING_FLAG_XBZRLE) {
-        error_report("Failed to load XBZRLE page - wrong compression!");
-        return -1;
-    }
-
-    if (xh_len > TARGET_PAGE_SIZE) {
-        error_report("Failed to load XBZRLE page - len overflow!");
-        return -1;
-    }
-    /* load data and decode */
-    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
-
-    /* decode RLE */
-    if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
-                             TARGET_PAGE_SIZE) == -1) {
-        error_report("Failed to load XBZRLE page - decode error!");
-        return -1;
-    }
-
-    return 0;
-}
-
-/* Must be called from within a rcu critical section.
- * Returns a pointer from within the RCU-protected ram_list.
- */
-static inline void *host_from_stream_offset(QEMUFile *f,
-                                            ram_addr_t offset,
-                                            int flags)
-{
-    static RAMBlock *block = NULL;
-    char id[256];
-    uint8_t len;
-
-    if (flags & RAM_SAVE_FLAG_CONTINUE) {
-        if (!block || block->max_length <= offset) {
-            error_report("Ack, bad migration stream!");
-            return NULL;
-        }
-
-        return memory_region_get_ram_ptr(block->mr) + offset;
-    }
-
-    len = qemu_get_byte(f);
-    qemu_get_buffer(f, (uint8_t *)id, len);
-    id[len] = 0;
-
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-        if (!strncmp(id, block->idstr, sizeof(id)) &&
-            block->max_length > offset) {
-            return memory_region_get_ram_ptr(block->mr) + offset;
-        }
-    }
-
-    error_report("Can't find block %s!", id);
-    return NULL;
-}
-
-/*
- * If a page (or a whole RDMA chunk) has been
- * determined to be zero, then zap it.
- */
-void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
-{
-    if (ch != 0 || !is_zero_range(host, size)) {
-        memset(host, ch, size);
-    }
-}
-
-static void *do_data_decompress(void *opaque)
-{
-    DecompressParam *param = opaque;
-    unsigned long pagesize;
-
-    while (!quit_decomp_thread) {
-        qemu_mutex_lock(&param->mutex);
-        while (!param->start && !quit_decomp_thread) {
-            qemu_cond_wait(&param->cond, &param->mutex);
-            pagesize = TARGET_PAGE_SIZE;
-            if (!quit_decomp_thread) {
-                /* uncompress() will return failed in some case, especially
-                 * when the page is dirted when doing the compression, it's
-                 * not a problem because the dirty page will be retransferred
-                 * and uncompress() won't break the data in other pages.
-                 */
-                uncompress((Bytef *)param->des, &pagesize,
-                           (const Bytef *)param->compbuf, param->len);
-            }
-            param->start = false;
-        }
-        qemu_mutex_unlock(&param->mutex);
-    }
-
-    return NULL;
-}
-
-void migrate_decompress_threads_create(void)
-{
-    int i, thread_count;
-
-    thread_count = migrate_decompress_threads();
-    decompress_threads = g_new0(QemuThread, thread_count);
-    decomp_param = g_new0(DecompressParam, thread_count);
-    compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
-    quit_decomp_thread = false;
-    for (i = 0; i < thread_count; i++) {
-        qemu_mutex_init(&decomp_param[i].mutex);
-        qemu_cond_init(&decomp_param[i].cond);
-        decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
-        qemu_thread_create(decompress_threads + i, "decompress",
-                           do_data_decompress, decomp_param + i,
-                           QEMU_THREAD_JOINABLE);
-    }
-}
-
-void migrate_decompress_threads_join(void)
-{
-    int i, thread_count;
-
-    quit_decomp_thread = true;
-    thread_count = migrate_decompress_threads();
-    for (i = 0; i < thread_count; i++) {
-        qemu_mutex_lock(&decomp_param[i].mutex);
-        qemu_cond_signal(&decomp_param[i].cond);
-        qemu_mutex_unlock(&decomp_param[i].mutex);
-    }
-    for (i = 0; i < thread_count; i++) {
-        qemu_thread_join(decompress_threads + i);
-        qemu_mutex_destroy(&decomp_param[i].mutex);
-        qemu_cond_destroy(&decomp_param[i].cond);
-        g_free(decomp_param[i].compbuf);
-    }
-    g_free(decompress_threads);
-    g_free(decomp_param);
-    g_free(compressed_data_buf);
-    decompress_threads = NULL;
-    decomp_param = NULL;
-    compressed_data_buf = NULL;
-}
-
-static void decompress_data_with_multi_threads(uint8_t *compbuf,
-                                               void *host, int len)
-{
-    int idx, thread_count;
-
-    thread_count = migrate_decompress_threads();
-    while (true) {
-        for (idx = 0; idx < thread_count; idx++) {
-            if (!decomp_param[idx].start) {
-                memcpy(decomp_param[idx].compbuf, compbuf, len);
-                decomp_param[idx].des = host;
-                decomp_param[idx].len = len;
-                start_decompression(&decomp_param[idx]);
-                break;
-            }
-        }
-        if (idx < thread_count) {
-            break;
-        }
-    }
-}
-
-static int ram_load(QEMUFile *f, void *opaque, int version_id)
-{
-    int flags = 0, ret = 0;
-    static uint64_t seq_iter;
-    int len = 0;
-
-    seq_iter++;
-
-    if (version_id != 4) {
-        ret = -EINVAL;
-    }
-
-    /* This RCU critical section can be very long running.
-     * When RCU reclaims in the code start to become numerous,
-     * it will be necessary to reduce the granularity of this
-     * critical section.
-     */
-    rcu_read_lock();
-    while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
-        ram_addr_t addr, total_ram_bytes;
-        void *host;
-        uint8_t ch;
-
-        addr = qemu_get_be64(f);
-        flags = addr & ~TARGET_PAGE_MASK;
-        addr &= TARGET_PAGE_MASK;
-
-        switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
-        case RAM_SAVE_FLAG_MEM_SIZE:
-            /* Synchronize RAM block list */
-            total_ram_bytes = addr;
-            while (!ret && total_ram_bytes) {
-                RAMBlock *block;
-                uint8_t len;
-                char id[256];
-                ram_addr_t length;
-
-                len = qemu_get_byte(f);
-                qemu_get_buffer(f, (uint8_t *)id, len);
-                id[len] = 0;
-                length = qemu_get_be64(f);
-
-                QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-                    if (!strncmp(id, block->idstr, sizeof(id))) {
-                        if (length != block->used_length) {
-                            Error *local_err = NULL;
-
-                            ret = qemu_ram_resize(block->offset, length, &local_err);
-                            if (local_err) {
-                                error_report_err(local_err);
-                            }
-                        }
-                        break;
-                    }
-                }
-
-                if (!block) {
-                    error_report("Unknown ramblock \"%s\", cannot "
-                                 "accept migration", id);
-                    ret = -EINVAL;
-                }
-
-                total_ram_bytes -= length;
-            }
-            break;
-        case RAM_SAVE_FLAG_COMPRESS:
-            host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
-                ret = -EINVAL;
-                break;
-            }
-            ch = qemu_get_byte(f);
-            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
-            break;
-        case RAM_SAVE_FLAG_PAGE:
-            host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
-                ret = -EINVAL;
-                break;
-            }
-            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
-            break;
-        case RAM_SAVE_FLAG_COMPRESS_PAGE:
-            host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                error_report("Invalid RAM offset " RAM_ADDR_FMT, addr);
-                ret = -EINVAL;
-                break;
-            }
-
-            len = qemu_get_be32(f);
-            if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
-                error_report("Invalid compressed data length: %d", len);
-                ret = -EINVAL;
-                break;
-            }
-            qemu_get_buffer(f, compressed_data_buf, len);
-            decompress_data_with_multi_threads(compressed_data_buf, host, len);
-            break;
-        case RAM_SAVE_FLAG_XBZRLE:
-            host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
-                ret = -EINVAL;
-                break;
-            }
-            if (load_xbzrle(f, addr, host) < 0) {
-                error_report("Failed to decompress XBZRLE page at "
-                             RAM_ADDR_FMT, addr);
-                ret = -EINVAL;
-                break;
-            }
-            break;
-        case RAM_SAVE_FLAG_EOS:
-            /* normal exit */
-            break;
-        default:
-            if (flags & RAM_SAVE_FLAG_HOOK) {
-                ram_control_load_hook(f, flags);
-            } else {
-                error_report("Unknown combination of migration flags: %#x",
-                             flags);
-                ret = -EINVAL;
-            }
-        }
-        if (!ret) {
-            ret = qemu_file_get_error(f);
-        }
-    }
-
-    rcu_read_unlock();
-    DPRINTF("Completed load of VM with exit code %d seq iteration "
-            "%" PRIu64 "\n", ret, seq_iter);
-    return ret;
-}
-
-static SaveVMHandlers savevm_ram_handlers = {
-    .save_live_setup = ram_save_setup,
-    .save_live_iterate = ram_save_iterate,
-    .save_live_complete = ram_save_complete,
-    .save_live_pending = ram_save_pending,
-    .load_state = ram_load,
-    .cancel = ram_migration_cancel,
-};
-
-void ram_mig_init(void)
-{
-    qemu_mutex_init(&XBZRLE.lock);
-    register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
-}
-
 struct soundhw {
     const char *name;
     const char *descr;
@@ -1869,52 +307,3 @@ TargetInfo *qmp_query_target(Error **errp)
 
     return info;
 }
-
-/* Stub function that's gets run on the vcpu when its brought out of the
-   VM to run inside qemu via async_run_on_cpu()*/
-static void mig_sleep_cpu(void *opq)
-{
-    qemu_mutex_unlock_iothread();
-    g_usleep(30*1000);
-    qemu_mutex_lock_iothread();
-}
-
-/* To reduce the dirty rate explicitly disallow the VCPUs from spending
-   much time in the VM. The migration thread will try to catchup.
-   Workload will experience a performance drop.
-*/
-static void mig_throttle_guest_down(void)
-{
-    CPUState *cpu;
-
-    qemu_mutex_lock_iothread();
-    CPU_FOREACH(cpu) {
-        async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
-    }
-    qemu_mutex_unlock_iothread();
-}
-
-static void check_guest_throttling(void)
-{
-    static int64_t t0;
-    int64_t        t1;
-
-    if (!mig_throttle_on) {
-        return;
-    }
-
-    if (!t0)  {
-        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-        return;
-    }
-
-    t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-
-    /* If it has been more than 40 ms since the last time the guest
-     * was throttled then do it again.
-     */
-    if (40 < (t1-t0)/1000000) {
-        mig_throttle_guest_down();
-        t0 = t1;
-    }
-}
diff --git a/cpus.c b/cpus.c
index f38b858f9b..b85fb5f03f 100644
--- a/cpus.c
+++ b/cpus.c
@@ -480,6 +480,7 @@ static const VMStateDescription icount_vmstate_timers = {
     .name = "timer/icount",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = icount_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT64(qemu_icount_bias, TimersState),
         VMSTATE_INT64(qemu_icount, TimersState),
@@ -497,13 +498,9 @@ static const VMStateDescription vmstate_timers = {
         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &icount_vmstate_timers,
-            .needed = icount_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &icount_vmstate_timers,
+        NULL
     }
 };
 
diff --git a/default-configs/mips-softmmu.mak b/default-configs/mips-softmmu.mak
index fd0607db6b..44467c37c1 100644
--- a/default-configs/mips-softmmu.mak
+++ b/default-configs/mips-softmmu.mak
@@ -24,14 +24,9 @@ CONFIG_PIIX4=y
 CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
 CONFIG_NE2000_ISA=y
-CONFIG_RC4030=y
-CONFIG_DP8393X=y
-CONFIG_DS1225Y=y
 CONFIG_MIPSNET=y
 CONFIG_PFLASH_CFI01=y
-CONFIG_G364FB=y
 CONFIG_I8259=y
-CONFIG_JAZZ_LED=y
 CONFIG_MC146818RTC=y
 CONFIG_ISA_TESTDEV=y
 CONFIG_EMPTY_SLOT=y
diff --git a/default-configs/mips64-softmmu.mak b/default-configs/mips64-softmmu.mak
index b8c791021a..66ed5f94c5 100644
--- a/default-configs/mips64-softmmu.mak
+++ b/default-configs/mips64-softmmu.mak
@@ -29,6 +29,7 @@ CONFIG_DP8393X=y
 CONFIG_DS1225Y=y
 CONFIG_MIPSNET=y
 CONFIG_PFLASH_CFI01=y
+CONFIG_JAZZ=y
 CONFIG_G364FB=y
 CONFIG_I8259=y
 CONFIG_JAZZ_LED=y
diff --git a/default-configs/mips64el-softmmu.mak b/default-configs/mips64el-softmmu.mak
index ae4274b3a7..bfca2b2b7c 100644
--- a/default-configs/mips64el-softmmu.mak
+++ b/default-configs/mips64el-softmmu.mak
@@ -31,6 +31,7 @@ CONFIG_DS1225Y=y
 CONFIG_MIPSNET=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_FULONG=y
+CONFIG_JAZZ=y
 CONFIG_G364FB=y
 CONFIG_I8259=y
 CONFIG_JAZZ_LED=y
diff --git a/default-configs/mipsel-softmmu.mak b/default-configs/mipsel-softmmu.mak
index 1e2374be13..0162ef0249 100644
--- a/default-configs/mipsel-softmmu.mak
+++ b/default-configs/mipsel-softmmu.mak
@@ -24,14 +24,9 @@ CONFIG_PIIX4=y
 CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
 CONFIG_NE2000_ISA=y
-CONFIG_RC4030=y
-CONFIG_DP8393X=y
-CONFIG_DS1225Y=y
 CONFIG_MIPSNET=y
 CONFIG_PFLASH_CFI01=y
-CONFIG_G364FB=y
 CONFIG_I8259=y
-CONFIG_JAZZ_LED=y
 CONFIG_MC146818RTC=y
 CONFIG_ISA_TESTDEV=y
 CONFIG_EMPTY_SLOT=y
diff --git a/disas/mips.c b/disas/mips.c
index 1afe0c5511..32940feb95 100644
--- a/disas/mips.c
+++ b/disas/mips.c
@@ -2238,6 +2238,8 @@ const struct mips_opcode mips_builtin_opcodes[] =
 {"ceil.l.s", "D,S",	0x4600000a, 0xffff003f, WR_D|RD_S|FP_S|FP_D,	0,		I3|I33	},
 {"ceil.w.d", "D,S",	0x4620000e, 0xffff003f, WR_D|RD_S|FP_S|FP_D,	0,		I2	},
 {"ceil.w.s", "D,S",	0x4600000e, 0xffff003f, WR_D|RD_S|FP_S,		0,		I2	},
+{"mfhc0",   "t,G,H",    0x40400000, 0xffe007f8, LCD|WR_t|RD_C0,       0, I33},
+{"mthc0",   "t,G,H",    0x40c00000, 0xffe007f8, COD|RD_t|WR_C0|WR_CC, 0, I33},
 {"cfc0",    "t,G",	0x40400000, 0xffe007ff,	LCD|WR_t|RD_C0,		0,		I1	},
 {"cfc1",    "t,G",	0x44400000, 0xffe007ff,	LCD|WR_t|RD_C1|FP_S,	0,		I1	},
 {"cfc1",    "t,S",	0x44400000, 0xffe007ff,	LCD|WR_t|RD_C1|FP_S,	0,		I1	},
@@ -2407,6 +2409,7 @@ const struct mips_opcode mips_builtin_opcodes[] =
 {"emt",     "",		0x41600be1, 0xffffffff, TRAP,			0,		MT32	},
 {"emt",     "t",	0x41600be1, 0xffe0ffff, TRAP|WR_t,		0,		MT32	},
 {"eret",    "",         0x42000018, 0xffffffff, 0,      		0,		I3|I32	},
+{"eretnc",  "",         0x42000058, 0xffffffff, 0,                    0, I33},
 {"evpe",    "",		0x41600021, 0xffffffff, TRAP,			0,		MT32	},
 {"evpe",    "t",	0x41600021, 0xffe0ffff, TRAP|WR_t,		0,		MT32	},
 {"ext",     "t,r,+A,+C", 0x7c000000, 0xfc00003f, WR_t|RD_s,    		0,		I33	},
diff --git a/docs/migration.txt b/docs/migration.txt
index 0492a4547a..f6df4beb2a 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -257,6 +257,7 @@ const VMStateDescription vmstate_ide_drive_pio_state = {
     .minimum_version_id = 1,
     .pre_save = ide_drive_pio_pre_save,
     .post_load = ide_drive_pio_post_load,
+    .needed = ide_drive_pio_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(req_nb_sectors, IDEState),
         VMSTATE_VARRAY_INT32(io_buffer, IDEState, io_buffer_total_len, 1,
@@ -279,13 +280,9 @@ const VMStateDescription vmstate_ide_drive = {
         .... several fields ....
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_ide_drive_pio_state,
-            .needed = ide_drive_pio_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ide_drive_pio_state,
+        NULL
     }
 };
 
diff --git a/exec.c b/exec.c
index 487583b1bd..76bfc4ac4a 100644
--- a/exec.c
+++ b/exec.c
@@ -454,6 +454,7 @@ static const VMStateDescription vmstate_cpu_common_exception_index = {
     .name = "cpu_common/exception_index",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = cpu_common_exception_index_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(exception_index, CPUState),
         VMSTATE_END_OF_LIST()
@@ -471,13 +472,9 @@ const VMStateDescription vmstate_cpu_common = {
         VMSTATE_UINT32(interrupt_request, CPUState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_cpu_common_exception_index,
-            .needed = cpu_common_exception_index_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_cpu_common_exception_index,
+        NULL
     }
 };
 
@@ -3348,14 +3345,20 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr)
     return res;
 }
 
-void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
+int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
 {
     RAMBlock *block;
+    int ret = 0;
 
     rcu_read_lock();
     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-        func(block->host, block->offset, block->used_length, opaque);
+        ret = func(block->idstr, block->host, block->offset,
+                   block->used_length, opaque);
+        if (ret) {
+            break;
+        }
     }
     rcu_read_unlock();
+    return ret;
 }
 #endif
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 25bc023882..8a64ffb38f 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -152,6 +152,7 @@ static const VMStateDescription vmstate_memhp_state = {
     .version_id = 1,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
+    .needed = vmstate_test_use_memhp,
     .fields      = (VMStateField[]) {
         VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, ICH9LPCPMRegs),
         VMSTATE_END_OF_LIST()
@@ -175,12 +176,9 @@ const VMStateDescription vmstate_ich9_pm = {
         VMSTATE_UINT32(smi_sts, ICH9LPCPMRegs),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_memhp_state,
-            .needed = vmstate_test_use_memhp,
-        },
-        VMSTATE_END_OF_LIST()
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_memhp_state,
+        NULL
     }
 };
 
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index b730ca6ced..3bd1d5a865 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -260,6 +260,7 @@ static const VMStateDescription vmstate_memhp_state = {
     .version_id = 1,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
+    .needed = vmstate_test_use_memhp,
     .fields      = (VMStateField[]) {
         VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, PIIX4PMState),
         VMSTATE_END_OF_LIST()
@@ -298,12 +299,9 @@ static const VMStateDescription vmstate_acpi = {
                             vmstate_test_use_acpi_pci_hotplug),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_memhp_state,
-            .needed = vmstate_test_use_memhp,
-        },
-        VMSTATE_END_OF_LIST()
+    .subsections = (const VMStateDescription*[]) {
+         &vmstate_memhp_state,
+         NULL
     }
 };
 
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 6e794597dc..5e1b67ee43 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -699,6 +699,7 @@ static const VMStateDescription vmstate_fdrive_media_changed = {
     .name = "fdrive/media_changed",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdrive_media_changed_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(media_changed, FDrive),
         VMSTATE_END_OF_LIST()
@@ -716,6 +717,7 @@ static const VMStateDescription vmstate_fdrive_media_rate = {
     .name = "fdrive/media_rate",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdrive_media_rate_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(media_rate, FDrive),
         VMSTATE_END_OF_LIST()
@@ -733,6 +735,7 @@ static const VMStateDescription vmstate_fdrive_perpendicular = {
     .name = "fdrive/perpendicular",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdrive_perpendicular_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(perpendicular, FDrive),
         VMSTATE_END_OF_LIST()
@@ -756,19 +759,11 @@ static const VMStateDescription vmstate_fdrive = {
         VMSTATE_UINT8(sect, FDrive),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_fdrive_media_changed,
-            .needed = &fdrive_media_changed_needed,
-        } , {
-            .vmsd = &vmstate_fdrive_media_rate,
-            .needed = &fdrive_media_rate_needed,
-        } , {
-            .vmsd = &vmstate_fdrive_perpendicular,
-            .needed = &fdrive_perpendicular_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_fdrive_media_changed,
+        &vmstate_fdrive_media_rate,
+        &vmstate_fdrive_perpendicular,
+        NULL
     }
 };
 
@@ -833,6 +828,7 @@ static const VMStateDescription vmstate_fdc_reset_sensei = {
     .name = "fdc/reset_sensei",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdc_reset_sensei_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(reset_sensei, FDCtrl),
         VMSTATE_END_OF_LIST()
@@ -850,6 +846,7 @@ static const VMStateDescription vmstate_fdc_result_timer = {
     .name = "fdc/result_timer",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdc_result_timer_needed,
     .fields = (VMStateField[]) {
         VMSTATE_TIMER_PTR(result_timer, FDCtrl),
         VMSTATE_END_OF_LIST()
@@ -867,6 +864,7 @@ static const VMStateDescription vmstate_fdc_phase = {
     .name = "fdc/phase",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdc_phase_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(phase, FDCtrl),
         VMSTATE_END_OF_LIST()
@@ -911,19 +909,11 @@ static const VMStateDescription vmstate_fdc = {
                              vmstate_fdrive, FDrive),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_fdc_reset_sensei,
-            .needed = fdc_reset_sensei_needed,
-        } , {
-            .vmsd = &vmstate_fdc_result_timer,
-            .needed = fdc_result_timer_needed,
-        } , {
-            .vmsd = &vmstate_fdc_phase,
-            .needed = fdc_phase_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_fdc_reset_sensei,
+        &vmstate_fdc_result_timer,
+        &vmstate_fdc_phase,
+        NULL
     }
 };
 
diff --git a/hw/char/serial.c b/hw/char/serial.c
index 55011cfd26..513d73c27f 100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -662,6 +662,7 @@ static const VMStateDescription vmstate_serial_thr_ipending = {
     .name = "serial/thr_ipending",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_thr_ipending_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(thr_ipending, SerialState),
         VMSTATE_END_OF_LIST()
@@ -678,6 +679,7 @@ static const VMStateDescription vmstate_serial_tsr = {
     .name = "serial/tsr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_tsr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(tsr_retry, SerialState),
         VMSTATE_UINT8(thr, SerialState),
@@ -697,6 +699,7 @@ static const VMStateDescription vmstate_serial_recv_fifo = {
     .name = "serial/recv_fifo",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_recv_fifo_needed,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT(recv_fifo, SerialState, 1, vmstate_fifo8, Fifo8),
         VMSTATE_END_OF_LIST()
@@ -713,6 +716,7 @@ static const VMStateDescription vmstate_serial_xmit_fifo = {
     .name = "serial/xmit_fifo",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_xmit_fifo_needed,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT(xmit_fifo, SerialState, 1, vmstate_fifo8, Fifo8),
         VMSTATE_END_OF_LIST()
@@ -729,6 +733,7 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = {
     .name = "serial/fifo_timeout_timer",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_fifo_timeout_timer_needed,
     .fields = (VMStateField[]) {
         VMSTATE_TIMER_PTR(fifo_timeout_timer, SerialState),
         VMSTATE_END_OF_LIST()
@@ -745,6 +750,7 @@ static const VMStateDescription vmstate_serial_timeout_ipending = {
     .name = "serial/timeout_ipending",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_timeout_ipending_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(timeout_ipending, SerialState),
         VMSTATE_END_OF_LIST()
@@ -760,6 +766,7 @@ static bool serial_poll_needed(void *opaque)
 static const VMStateDescription vmstate_serial_poll = {
     .name = "serial/poll",
     .version_id = 1,
+    .needed = serial_poll_needed,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(poll_msl, SerialState),
@@ -788,31 +795,15 @@ const VMStateDescription vmstate_serial = {
         VMSTATE_UINT8_V(fcr_vmstate, SerialState, 3),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_serial_thr_ipending,
-            .needed = &serial_thr_ipending_needed,
-        } , {
-            .vmsd = &vmstate_serial_tsr,
-            .needed = &serial_tsr_needed,
-        } , {
-            .vmsd = &vmstate_serial_recv_fifo,
-            .needed = &serial_recv_fifo_needed,
-        } , {
-            .vmsd = &vmstate_serial_xmit_fifo,
-            .needed = &serial_xmit_fifo_needed,
-        } , {
-            .vmsd = &vmstate_serial_fifo_timeout_timer,
-            .needed = &serial_fifo_timeout_timer_needed,
-        } , {
-            .vmsd = &vmstate_serial_timeout_ipending,
-            .needed = &serial_timeout_ipending_needed,
-        } , {
-            .vmsd = &vmstate_serial_poll,
-            .needed = &serial_poll_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_serial_thr_ipending,
+        &vmstate_serial_tsr,
+        &vmstate_serial_recv_fifo,
+        &vmstate_serial_xmit_fifo,
+        &vmstate_serial_fifo_timeout_timer,
+        &vmstate_serial_timeout_ipending,
+        &vmstate_serial_poll,
+        NULL
     }
 };
 
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index b220e2d5d2..722146ec3a 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2220,6 +2220,7 @@ static VMStateDescription qxl_vmstate_monitors_config = {
     .name               = "qxl/monitors-config",
     .version_id         = 1,
     .minimum_version_id = 1,
+    .needed = qxl_monitors_config_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(guest_monitors_config, PCIQXLDevice),
         VMSTATE_END_OF_LIST()
@@ -2253,13 +2254,9 @@ static VMStateDescription qxl_vmstate = {
         VMSTATE_UINT64(guest_cursor, PCIQXLDevice),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &qxl_vmstate_monitors_config,
-            .needed = qxl_monitors_config_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &qxl_vmstate_monitors_config,
+        NULL
     }
 };
 
diff --git a/hw/display/vga.c b/hw/display/vga.c
index d1d296c74e..b35d523e65 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -2035,6 +2035,7 @@ static const VMStateDescription vmstate_vga_endian = {
     .name = "vga.endian",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = vga_endian_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_BOOL(big_endian_fb, VGACommonState),
         VMSTATE_END_OF_LIST()
@@ -2078,13 +2079,9 @@ const VMStateDescription vmstate_vga_common = {
         VMSTATE_UINT32(vbe_bank_mask, VGACommonState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_vga_endian,
-            .needed = vga_endian_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_vga_endian,
+        NULL
     }
 };
 
diff --git a/hw/dma/rc4030.c b/hw/dma/rc4030.c
index af2663256e..3efa6de352 100644
--- a/hw/dma/rc4030.c
+++ b/hw/dma/rc4030.c
@@ -1,7 +1,7 @@
 /*
  * QEMU JAZZ RC4030 chipset
  *
- * Copyright (c) 2007-2009 Herve Poussineau
+ * Copyright (c) 2007-2013 Hervé Poussineau
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -24,29 +24,16 @@
 
 #include "hw/hw.h"
 #include "hw/mips/mips.h"
+#include "hw/sysbus.h"
 #include "qemu/timer.h"
-
-/********************************************************/
-/* debug rc4030 */
-
-//#define DEBUG_RC4030
-//#define DEBUG_RC4030_DMA
-
-#ifdef DEBUG_RC4030
-#define DPRINTF(fmt, ...) \
-do { printf("rc4030: " fmt , ## __VA_ARGS__); } while (0)
-static const char* irq_names[] = { "parallel", "floppy", "sound", "video",
-            "network", "scsi", "keyboard", "mouse", "serial0", "serial1" };
-#else
-#define DPRINTF(fmt, ...)
-#endif
-
-#define RC4030_ERROR(fmt, ...) \
-do { fprintf(stderr, "rc4030 ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
+#include "exec/address-spaces.h"
+#include "trace.h"
 
 /********************************************************/
 /* rc4030 emulation                                     */
 
+#define MAX_TL_ENTRIES 512
+
 typedef struct dma_pagetable_entry {
     int32_t frame;
     int32_t owner;
@@ -63,8 +50,14 @@ typedef struct dma_pagetable_entry {
 #define DMA_FLAG_MEM_INTR   0x0200
 #define DMA_FLAG_ADDR_INTR  0x0400
 
+#define TYPE_RC4030 "rc4030"
+#define RC4030(obj) \
+    OBJECT_CHECK(rc4030State, (obj), TYPE_RC4030)
+
 typedef struct rc4030State
 {
+    SysBusDevice parent;
+
     uint32_t config; /* 0x0000: RC4030 config register */
     uint32_t revision; /* 0x0008: RC4030 Revision register */
     uint32_t invalid_address_register; /* 0x0010: Invalid Address register */
@@ -83,7 +76,7 @@ typedef struct rc4030State
     uint32_t cache_bmask; /* 0x0058: I/O Cache Byte Mask */
 
     uint32_t nmi_interrupt; /* 0x0200: interrupt source */
-    uint32_t offset210;
+    uint32_t memory_refresh_rate; /* 0x0210: memory refresh rate */
     uint32_t nvram_protect; /* 0x0220: NV ram protect register */
     uint32_t rem_speed[16];
     uint32_t imr_jazz; /* Local bus int enable mask */
@@ -96,6 +89,16 @@ typedef struct rc4030State
     qemu_irq timer_irq;
     qemu_irq jazz_bus_irq;
 
+    /* biggest translation table */
+    MemoryRegion dma_tt;
+    /* translation table memory region alias, added to system RAM */
+    MemoryRegion dma_tt_alias;
+    /* whole DMA memory region, root of DMA address space */
+    MemoryRegion dma_mr;
+    /* translation table entry aliases, added to DMA memory region */
+    MemoryRegion dma_mrs[MAX_TL_ENTRIES];
+    AddressSpace dma_as;
+
     MemoryRegion iomem_chipset;
     MemoryRegion iomem_jazzio;
 } rc4030State;
@@ -112,7 +115,7 @@ static void set_next_tick(rc4030State *s)
 }
 
 /* called for accesses to rc4030 */
-static uint32_t rc4030_readl(void *opaque, hwaddr addr)
+static uint64_t rc4030_read(void *opaque, hwaddr addr, unsigned int size)
 {
     rc4030State *s = opaque;
     uint32_t val;
@@ -220,9 +223,9 @@ static uint32_t rc4030_readl(void *opaque, hwaddr addr)
     case 0x0208:
         val = 0;
         break;
-    /* Offset 0x0210 */
+    /* Memory refresh rate */
     case 0x0210:
-        val = s->offset210;
+        val = s->memory_refresh_rate;
         break;
     /* NV ram protect register */
     case 0x0220:
@@ -238,39 +241,117 @@ static uint32_t rc4030_readl(void *opaque, hwaddr addr)
         val = 7; /* FIXME: should be read from EISA controller */
         break;
     default:
-        RC4030_ERROR("invalid read [" TARGET_FMT_plx "]\n", addr);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "rc4030: invalid read at 0x%x", (int)addr);
         val = 0;
         break;
     }
 
     if ((addr & ~3) != 0x230) {
-        DPRINTF("read 0x%02x at " TARGET_FMT_plx "\n", val, addr);
+        trace_rc4030_read(addr, val);
     }
 
     return val;
 }
 
-static uint32_t rc4030_readw(void *opaque, hwaddr addr)
+static void rc4030_dma_as_update_one(rc4030State *s, int index, uint32_t frame)
 {
-    uint32_t v = rc4030_readl(opaque, addr & ~0x3);
-    if (addr & 0x2)
-        return v >> 16;
-    else
-        return v & 0xffff;
+    if (index < MAX_TL_ENTRIES) {
+        memory_region_set_enabled(&s->dma_mrs[index], false);
+    }
+
+    if (!frame) {
+        return;
+    }
+
+    if (index >= MAX_TL_ENTRIES) {
+        qemu_log_mask(LOG_UNIMP,
+                      "rc4030: trying to use too high "
+                      "translation table entry %d (max allowed=%d)",
+                      index, MAX_TL_ENTRIES);
+        return;
+    }
+    memory_region_set_alias_offset(&s->dma_mrs[index], frame);
+    memory_region_set_enabled(&s->dma_mrs[index], true);
 }
 
-static uint32_t rc4030_readb(void *opaque, hwaddr addr)
+static void rc4030_dma_tt_write(void *opaque, hwaddr addr, uint64_t data,
+                                unsigned int size)
 {
-    uint32_t v = rc4030_readl(opaque, addr & ~0x3);
-    return (v >> (8 * (addr & 0x3))) & 0xff;
+    rc4030State *s = opaque;
+
+    /* write memory */
+    memcpy(memory_region_get_ram_ptr(&s->dma_tt) + addr, &data, size);
+
+    /* update dma address space (only if frame field has been written) */
+    if (addr % sizeof(dma_pagetable_entry) == 0) {
+        int index = addr / sizeof(dma_pagetable_entry);
+        memory_region_transaction_begin();
+        rc4030_dma_as_update_one(s, index, (uint32_t)data);
+        memory_region_transaction_commit();
+    }
 }
 
-static void rc4030_writel(void *opaque, hwaddr addr, uint32_t val)
+static const MemoryRegionOps rc4030_dma_tt_ops = {
+    .write = rc4030_dma_tt_write,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+};
+
+static void rc4030_dma_tt_update(rc4030State *s, uint32_t new_tl_base,
+                                 uint32_t new_tl_limit)
+{
+    int entries, i;
+    dma_pagetable_entry *dma_tl_contents;
+
+    if (s->dma_tl_limit) {
+        /* write old dma tl table to physical memory */
+        memory_region_del_subregion(get_system_memory(), &s->dma_tt_alias);
+        cpu_physical_memory_write(s->dma_tl_limit & 0x7fffffff,
+                                  memory_region_get_ram_ptr(&s->dma_tt),
+                                  memory_region_size(&s->dma_tt_alias));
+    }
+    object_unparent(OBJECT(&s->dma_tt_alias));
+
+    s->dma_tl_base = new_tl_base;
+    s->dma_tl_limit = new_tl_limit;
+    new_tl_base &= 0x7fffffff;
+
+    if (s->dma_tl_limit) {
+        uint64_t dma_tt_size;
+        if (s->dma_tl_limit <= memory_region_size(&s->dma_tt)) {
+            dma_tt_size = s->dma_tl_limit;
+        } else {
+            dma_tt_size = memory_region_size(&s->dma_tt);
+        }
+        memory_region_init_alias(&s->dma_tt_alias, OBJECT(s),
+                                 "dma-table-alias",
+                                 &s->dma_tt, 0, dma_tt_size);
+        dma_tl_contents = memory_region_get_ram_ptr(&s->dma_tt);
+        cpu_physical_memory_read(new_tl_base, dma_tl_contents, dma_tt_size);
+
+        memory_region_transaction_begin();
+        entries = dma_tt_size / sizeof(dma_pagetable_entry);
+        for (i = 0; i < entries; i++) {
+            rc4030_dma_as_update_one(s, i, dma_tl_contents[i].frame);
+        }
+        memory_region_add_subregion(get_system_memory(), new_tl_base,
+                                    &s->dma_tt_alias);
+        memory_region_transaction_commit();
+    } else {
+        memory_region_init(&s->dma_tt_alias, OBJECT(s),
+                           "dma-table-alias", 0);
+    }
+}
+
+static void rc4030_write(void *opaque, hwaddr addr, uint64_t data,
+                         unsigned int size)
 {
     rc4030State *s = opaque;
+    uint32_t val = data;
     addr &= 0x3fff;
 
-    DPRINTF("write 0x%02x at " TARGET_FMT_plx "\n", val, addr);
+    trace_rc4030_write(addr, val);
 
     switch (addr & ~0x3) {
     /* Global config register */
@@ -279,11 +360,11 @@ static void rc4030_writel(void *opaque, hwaddr addr, uint32_t val)
         break;
     /* DMA transl. table base */
     case 0x0018:
-        s->dma_tl_base = val;
+        rc4030_dma_tt_update(s, val, s->dma_tl_limit);
         break;
     /* DMA transl. table limit */
     case 0x0020:
-        s->dma_tl_limit = val;
+        rc4030_dma_tt_update(s, s->dma_tl_base, val);
         break;
     /* DMA transl. table invalidated */
     case 0x0028:
@@ -371,9 +452,9 @@ static void rc4030_writel(void *opaque, hwaddr addr, uint32_t val)
             s->dma_regs[entry][idx] = val;
         }
         break;
-    /* Offset 0x0210 */
+    /* Memory refresh rate */
     case 0x0210:
-        s->offset210 = val;
+        s->memory_refresh_rate = val;
         break;
     /* Interval timer reload */
     case 0x0228:
@@ -385,48 +466,18 @@ static void rc4030_writel(void *opaque, hwaddr addr, uint32_t val)
     case 0x0238:
         break;
     default:
-        RC4030_ERROR("invalid write of 0x%02x at [" TARGET_FMT_plx "]\n", val, addr);
-        break;
-    }
-}
-
-static void rc4030_writew(void *opaque, hwaddr addr, uint32_t val)
-{
-    uint32_t old_val = rc4030_readl(opaque, addr & ~0x3);
-
-    if (addr & 0x2)
-        val = (val << 16) | (old_val & 0x0000ffff);
-    else
-        val = val | (old_val & 0xffff0000);
-    rc4030_writel(opaque, addr & ~0x3, val);
-}
-
-static void rc4030_writeb(void *opaque, hwaddr addr, uint32_t val)
-{
-    uint32_t old_val = rc4030_readl(opaque, addr & ~0x3);
-
-    switch (addr & 3) {
-    case 0:
-        val = val | (old_val & 0xffffff00);
-        break;
-    case 1:
-        val = (val << 8) | (old_val & 0xffff00ff);
-        break;
-    case 2:
-        val = (val << 16) | (old_val & 0xff00ffff);
-        break;
-    case 3:
-        val = (val << 24) | (old_val & 0x00ffffff);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "rc4030: invalid write of 0x%02x at 0x%x",
+                      val, (int)addr);
         break;
     }
-    rc4030_writel(opaque, addr & ~0x3, val);
 }
 
 static const MemoryRegionOps rc4030_ops = {
-    .old_mmio = {
-        .read = { rc4030_readb, rc4030_readw, rc4030_readl, },
-        .write = { rc4030_writeb, rc4030_writew, rc4030_writel, },
-    },
+    .read = rc4030_read,
+    .write = rc4030_write,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
@@ -436,22 +487,6 @@ static void update_jazz_irq(rc4030State *s)
 
     pending = s->isr_jazz & s->imr_jazz;
 
-#ifdef DEBUG_RC4030
-    if (s->isr_jazz != 0) {
-        uint32_t irq = 0;
-        DPRINTF("pending irqs:");
-        for (irq = 0; irq < ARRAY_SIZE(irq_names); irq++) {
-            if (s->isr_jazz & (1 << irq)) {
-                printf(" %s", irq_names[irq]);
-                if (!(s->imr_jazz & (1 << irq))) {
-                    printf("(ignored)");
-                }
-            }
-        }
-        printf("\n");
-    }
-#endif
-
     if (pending != 0)
         qemu_irq_raise(s->jazz_bus_irq);
     else
@@ -479,7 +514,7 @@ static void rc4030_periodic_timer(void *opaque)
     qemu_irq_raise(s->timer_irq);
 }
 
-static uint32_t jazzio_readw(void *opaque, hwaddr addr)
+static uint64_t jazzio_read(void *opaque, hwaddr addr, unsigned int size)
 {
     rc4030State *s = opaque;
     uint32_t val;
@@ -494,7 +529,6 @@ static uint32_t jazzio_readw(void *opaque, hwaddr addr)
         irq = 0;
         while (pending) {
             if (pending & 1) {
-                DPRINTF("returning irq %s\n", irq_names[irq]);
                 val = (irq + 1) << 2;
                 break;
             }
@@ -508,36 +542,25 @@ static uint32_t jazzio_readw(void *opaque, hwaddr addr)
         val = s->imr_jazz;
         break;
     default:
-        RC4030_ERROR("(jazz io controller) invalid read [" TARGET_FMT_plx "]\n", addr);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "rc4030/jazzio: invalid read at 0x%x", (int)addr);
         val = 0;
+        break;
     }
 
-    DPRINTF("(jazz io controller) read 0x%04x at " TARGET_FMT_plx "\n", val, addr);
+    trace_jazzio_read(addr, val);
 
     return val;
 }
 
-static uint32_t jazzio_readb(void *opaque, hwaddr addr)
-{
-    uint32_t v;
-    v = jazzio_readw(opaque, addr & ~0x1);
-    return (v >> (8 * (addr & 0x1))) & 0xff;
-}
-
-static uint32_t jazzio_readl(void *opaque, hwaddr addr)
-{
-    uint32_t v;
-    v = jazzio_readw(opaque, addr);
-    v |= jazzio_readw(opaque, addr + 2) << 16;
-    return v;
-}
-
-static void jazzio_writew(void *opaque, hwaddr addr, uint32_t val)
+static void jazzio_write(void *opaque, hwaddr addr, uint64_t data,
+                         unsigned int size)
 {
     rc4030State *s = opaque;
+    uint32_t val = data;
     addr &= 0xfff;
 
-    DPRINTF("(jazz io controller) write 0x%04x at " TARGET_FMT_plx "\n", val, addr);
+    trace_jazzio_write(addr, val);
 
     switch (addr) {
     /* Local bus int enable mask */
@@ -546,43 +569,24 @@ static void jazzio_writew(void *opaque, hwaddr addr, uint32_t val)
         update_jazz_irq(s);
         break;
     default:
-        RC4030_ERROR("(jazz io controller) invalid write of 0x%04x at [" TARGET_FMT_plx "]\n", val, addr);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "rc4030/jazzio: invalid write of 0x%02x at 0x%x",
+                      val, (int)addr);
         break;
     }
 }
 
-static void jazzio_writeb(void *opaque, hwaddr addr, uint32_t val)
-{
-    uint32_t old_val = jazzio_readw(opaque, addr & ~0x1);
-
-    switch (addr & 1) {
-    case 0:
-        val = val | (old_val & 0xff00);
-        break;
-    case 1:
-        val = (val << 8) | (old_val & 0x00ff);
-        break;
-    }
-    jazzio_writew(opaque, addr & ~0x1, val);
-}
-
-static void jazzio_writel(void *opaque, hwaddr addr, uint32_t val)
-{
-    jazzio_writew(opaque, addr, val & 0xffff);
-    jazzio_writew(opaque, addr + 2, (val >> 16) & 0xffff);
-}
-
 static const MemoryRegionOps jazzio_ops = {
-    .old_mmio = {
-        .read = { jazzio_readb, jazzio_readw, jazzio_readl, },
-        .write = { jazzio_writeb, jazzio_writew, jazzio_writel, },
-    },
+    .read = jazzio_read,
+    .write = jazzio_write,
+    .impl.min_access_size = 2,
+    .impl.max_access_size = 2,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void rc4030_reset(void *opaque)
+static void rc4030_reset(DeviceState *dev)
 {
-    rc4030State *s = opaque;
+    rc4030State *s = RC4030(dev);
     int i;
 
     s->config = 0x410; /* some boards seem to accept 0x104 too */
@@ -590,14 +594,14 @@ static void rc4030_reset(void *opaque)
     s->invalid_address_register = 0;
 
     memset(s->dma_regs, 0, sizeof(s->dma_regs));
-    s->dma_tl_base = s->dma_tl_limit = 0;
+    rc4030_dma_tt_update(s, 0, 0);
 
     s->remote_failed_address = s->memory_failed_address = 0;
     s->cache_maint = 0;
     s->cache_ptag = s->cache_ltag = 0;
     s->cache_bmask = 0;
 
-    s->offset210 = 0x18186;
+    s->memory_refresh_rate = 0x18186;
     s->nvram_protect = 7;
     for (i = 0; i < 15; i++)
         s->rem_speed[i] = 7;
@@ -631,7 +635,7 @@ static int rc4030_load(QEMUFile *f, void *opaque, int version_id)
     s->cache_ptag = qemu_get_be32(f);
     s->cache_ltag = qemu_get_be32(f);
     s->cache_bmask = qemu_get_be32(f);
-    s->offset210 = qemu_get_be32(f);
+    s->memory_refresh_rate = qemu_get_be32(f);
     s->nvram_protect = qemu_get_be32(f);
     for (i = 0; i < 15; i++)
         s->rem_speed[i] = qemu_get_be32(f);
@@ -663,7 +667,7 @@ static void rc4030_save(QEMUFile *f, void *opaque)
     qemu_put_be32(f, s->cache_ptag);
     qemu_put_be32(f, s->cache_ltag);
     qemu_put_be32(f, s->cache_bmask);
-    qemu_put_be32(f, s->offset210);
+    qemu_put_be32(f, s->memory_refresh_rate);
     qemu_put_be32(f, s->nvram_protect);
     for (i = 0; i < 15; i++)
         qemu_put_be32(f, s->rem_speed[i]);
@@ -672,44 +676,6 @@ static void rc4030_save(QEMUFile *f, void *opaque)
     qemu_put_be32(f, s->itr);
 }
 
-void rc4030_dma_memory_rw(void *opaque, hwaddr addr, uint8_t *buf, int len, int is_write)
-{
-    rc4030State *s = opaque;
-    hwaddr entry_addr;
-    hwaddr phys_addr;
-    dma_pagetable_entry entry;
-    int index;
-    int ncpy, i;
-
-    i = 0;
-    for (;;) {
-        if (i == len) {
-            break;
-        }
-
-        ncpy = DMA_PAGESIZE - (addr & (DMA_PAGESIZE - 1));
-        if (ncpy > len - i)
-            ncpy = len - i;
-
-        /* Get DMA translation table entry */
-        index = addr / DMA_PAGESIZE;
-        if (index >= s->dma_tl_limit / sizeof(dma_pagetable_entry)) {
-            break;
-        }
-        entry_addr = s->dma_tl_base + index * sizeof(dma_pagetable_entry);
-        /* XXX: not sure. should we really use only lowest bits? */
-        entry_addr &= 0x7fffffff;
-        cpu_physical_memory_read(entry_addr, &entry, sizeof(entry));
-
-        /* Read/write data at right place */
-        phys_addr = entry.frame + (addr & (DMA_PAGESIZE - 1));
-        cpu_physical_memory_rw(phys_addr, &buf[i], ncpy, is_write);
-
-        i += ncpy;
-        addr += ncpy;
-    }
-}
-
 static void rc4030_do_dma(void *opaque, int n, uint8_t *buf, int len, int is_write)
 {
     rc4030State *s = opaque;
@@ -733,32 +699,11 @@ static void rc4030_do_dma(void *opaque, int n, uint8_t *buf, int len, int is_wri
     dma_addr = s->dma_regs[n][DMA_REG_ADDRESS];
 
     /* Read/write data at right place */
-    rc4030_dma_memory_rw(opaque, dma_addr, buf, len, is_write);
+    address_space_rw(&s->dma_as, dma_addr, MEMTXATTRS_UNSPECIFIED,
+                     buf, len, is_write);
 
     s->dma_regs[n][DMA_REG_ENABLE] |= DMA_FLAG_TC_INTR;
     s->dma_regs[n][DMA_REG_COUNT] -= len;
-
-#ifdef DEBUG_RC4030_DMA
-    {
-        int i, j;
-        printf("rc4030 dma: Copying %d bytes %s host %p\n",
-            len, is_write ? "from" : "to", buf);
-        for (i = 0; i < len; i += 16) {
-            int n = 16;
-            if (n > len - i) {
-                n = len - i;
-            }
-            for (j = 0; j < n; j++)
-                printf("%02x ", buf[i + j]);
-            while (j++ < 16)
-                printf("   ");
-            printf("| ");
-            for (j = 0; j < n; j++)
-                printf("%c", isprint(buf[i + j]) ? buf[i + j] : '.');
-            printf("\n");
-        }
-    }
-#endif
 }
 
 struct rc4030DMAState {
@@ -795,31 +740,102 @@ static rc4030_dma *rc4030_allocate_dmas(void *opaque, int n)
     return s;
 }
 
-void *rc4030_init(qemu_irq timer, qemu_irq jazz_bus,
-                  qemu_irq **irqs, rc4030_dma **dmas,
-                  MemoryRegion *sysmem)
+static void rc4030_initfn(Object *obj)
 {
-    rc4030State *s;
+    DeviceState *dev = DEVICE(obj);
+    rc4030State *s = RC4030(obj);
+    SysBusDevice *sysbus = SYS_BUS_DEVICE(obj);
 
-    s = g_malloc0(sizeof(rc4030State));
+    qdev_init_gpio_in(dev, rc4030_irq_jazz_request, 16);
 
-    *irqs = qemu_allocate_irqs(rc4030_irq_jazz_request, s, 16);
-    *dmas = rc4030_allocate_dmas(s, 4);
+    sysbus_init_irq(sysbus, &s->timer_irq);
+    sysbus_init_irq(sysbus, &s->jazz_bus_irq);
 
-    s->periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, rc4030_periodic_timer, s);
-    s->timer_irq = timer;
-    s->jazz_bus_irq = jazz_bus;
-
-    qemu_register_reset(rc4030_reset, s);
     register_savevm(NULL, "rc4030", 0, 2, rc4030_save, rc4030_load, s);
-    rc4030_reset(s);
+
+    sysbus_init_mmio(sysbus, &s->iomem_chipset);
+    sysbus_init_mmio(sysbus, &s->iomem_jazzio);
+}
+
+static void rc4030_realize(DeviceState *dev, Error **errp)
+{
+    rc4030State *s = RC4030(dev);
+    Object *o = OBJECT(dev);
+    int i;
+
+    s->periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                     rc4030_periodic_timer, s);
 
     memory_region_init_io(&s->iomem_chipset, NULL, &rc4030_ops, s,
                           "rc4030.chipset", 0x300);
-    memory_region_add_subregion(sysmem, 0x80000000, &s->iomem_chipset);
     memory_region_init_io(&s->iomem_jazzio, NULL, &jazzio_ops, s,
                           "rc4030.jazzio", 0x00001000);
-    memory_region_add_subregion(sysmem, 0xf0000000, &s->iomem_jazzio);
 
-    return s;
+    memory_region_init_rom_device(&s->dma_tt, o,
+                                  &rc4030_dma_tt_ops, s, "dma-table",
+                                  MAX_TL_ENTRIES * sizeof(dma_pagetable_entry),
+                                  NULL);
+    memory_region_init(&s->dma_tt_alias, o, "dma-table-alias", 0);
+    memory_region_init(&s->dma_mr, o, "dma", INT32_MAX);
+    for (i = 0; i < MAX_TL_ENTRIES; ++i) {
+        memory_region_init_alias(&s->dma_mrs[i], o, "dma-alias",
+                                 get_system_memory(), 0, DMA_PAGESIZE);
+        memory_region_set_enabled(&s->dma_mrs[i], false);
+        memory_region_add_subregion(&s->dma_mr, i * DMA_PAGESIZE,
+                                    &s->dma_mrs[i]);
+    }
+    address_space_init(&s->dma_as, &s->dma_mr, "rc4030-dma");
+}
+
+static void rc4030_unrealize(DeviceState *dev, Error **errp)
+{
+    rc4030State *s = RC4030(dev);
+    int i;
+
+    timer_free(s->periodic_timer);
+
+    address_space_destroy(&s->dma_as);
+    object_unparent(OBJECT(&s->dma_tt));
+    object_unparent(OBJECT(&s->dma_tt_alias));
+    object_unparent(OBJECT(&s->dma_mr));
+    for (i = 0; i < MAX_TL_ENTRIES; ++i) {
+        memory_region_del_subregion(&s->dma_mr, &s->dma_mrs[i]);
+        object_unparent(OBJECT(&s->dma_mrs[i]));
+    }
+}
+
+static void rc4030_class_init(ObjectClass *klass, void *class_data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = rc4030_realize;
+    dc->unrealize = rc4030_unrealize;
+    dc->reset = rc4030_reset;
+}
+
+static const TypeInfo rc4030_info = {
+    .name = TYPE_RC4030,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(rc4030State),
+    .instance_init = rc4030_initfn,
+    .class_init = rc4030_class_init,
+};
+
+static void rc4030_register_types(void)
+{
+    type_register_static(&rc4030_info);
+}
+
+type_init(rc4030_register_types)
+
+DeviceState *rc4030_init(rc4030_dma **dmas, MemoryRegion **dma_mr)
+{
+    DeviceState *dev;
+
+    dev = qdev_create(NULL, TYPE_RC4030);
+    qdev_init_nofail(dev);
+
+    *dmas = rc4030_allocate_dmas(dev, 4);
+    *dma_mr = &RC4030(dev)->dma_mr;
+    return dev;
 }
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 5253e6d4fa..e142f75649 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -52,6 +52,7 @@
 #ifdef CONFIG_XEN
 #  include <xen/hvm/hvm_info_table.h>
 #endif
+#include "migration/migration.h"
 
 #define MAX_IDE_BUS 2
 
@@ -305,6 +306,7 @@ static void pc_init1(MachineState *machine)
 
 static void pc_compat_2_3(MachineState *machine)
 {
+    savevm_skip_section_footers();
 }
 
 static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 110dfb78a8..b68263d231 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -45,6 +45,7 @@
 #include "hw/usb.h"
 #include "hw/cpu/icc_bus.h"
 #include "qemu/error-report.h"
+#include "migration/migration.h"
 
 /* ICH9 AHCI has 6 ports */
 #define MAX_SATA_PORTS     6
@@ -289,6 +290,7 @@ static void pc_q35_init(MachineState *machine)
 
 static void pc_compat_2_3(MachineState *machine)
 {
+    savevm_skip_section_footers();
 }
 
 static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/ide/core.c b/hw/ide/core.c
index fcb908061c..1efd98af63 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2561,6 +2561,7 @@ static const VMStateDescription vmstate_ide_atapi_gesn_state = {
     .name ="ide_drive/atapi/gesn_state",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = ide_atapi_gesn_needed,
     .fields = (VMStateField[]) {
         VMSTATE_BOOL(events.new_media, IDEState),
         VMSTATE_BOOL(events.eject_request, IDEState),
@@ -2572,6 +2573,7 @@ static const VMStateDescription vmstate_ide_tray_state = {
     .name = "ide_drive/tray_state",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = ide_tray_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_BOOL(tray_open, IDEState),
         VMSTATE_BOOL(tray_locked, IDEState),
@@ -2585,6 +2587,7 @@ static const VMStateDescription vmstate_ide_drive_pio_state = {
     .minimum_version_id = 1,
     .pre_save = ide_drive_pio_pre_save,
     .post_load = ide_drive_pio_post_load,
+    .needed = ide_drive_pio_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(req_nb_sectors, IDEState),
         VMSTATE_VARRAY_INT32(io_buffer, IDEState, io_buffer_total_len, 1,
@@ -2626,19 +2629,11 @@ const VMStateDescription vmstate_ide_drive = {
         VMSTATE_UINT8_V(cdrom_changed, IDEState, 3),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_ide_drive_pio_state,
-            .needed = ide_drive_pio_state_needed,
-        }, {
-            .vmsd = &vmstate_ide_tray_state,
-            .needed = ide_tray_state_needed,
-        }, {
-            .vmsd = &vmstate_ide_atapi_gesn_state,
-            .needed = ide_atapi_gesn_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ide_drive_pio_state,
+        &vmstate_ide_tray_state,
+        &vmstate_ide_atapi_gesn_state,
+        NULL
     }
 };
 
@@ -2646,6 +2641,7 @@ static const VMStateDescription vmstate_ide_error_status = {
     .name ="ide_bus/error",
     .version_id = 2,
     .minimum_version_id = 1,
+    .needed = ide_error_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(error_status, IDEBus),
         VMSTATE_INT64_V(retry_sector_num, IDEBus, 2),
@@ -2664,13 +2660,9 @@ const VMStateDescription vmstate_ide_bus = {
         VMSTATE_UINT8(unit, IDEBus),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_ide_error_status,
-            .needed = ide_error_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ide_error_status,
+        NULL
     }
 };
 
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index 4b5e32dcbe..4afd0cfe8c 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -350,6 +350,7 @@ static const VMStateDescription vmstate_bmdma_current = {
     .name = "ide bmdma_current",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = ide_bmdma_current_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(cur_addr, BMDMAState),
         VMSTATE_UINT32(cur_prd_last, BMDMAState),
@@ -363,6 +364,7 @@ static const VMStateDescription vmstate_bmdma_status = {
     .name ="ide bmdma/status",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = ide_bmdma_status_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(status, BMDMAState),
         VMSTATE_END_OF_LIST()
@@ -383,16 +385,10 @@ static const VMStateDescription vmstate_bmdma = {
         VMSTATE_UINT8(migration_retry_unit, BMDMAState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_bmdma_current,
-            .needed = ide_bmdma_current_needed,
-        }, {
-            .vmsd = &vmstate_bmdma_status,
-            .needed = ide_bmdma_status_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_bmdma_current,
+        &vmstate_bmdma_status,
+        NULL
     }
 };
 
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index 9b9a7d7a8a..ddac69df6f 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -391,23 +391,24 @@ static int kbd_outport_post_load(void *opaque, int version_id)
     return 0;
 }
 
+static bool kbd_outport_needed(void *opaque)
+{
+    KBDState *s = opaque;
+    return s->outport != kbd_outport_default(s);
+}
+
 static const VMStateDescription vmstate_kbd_outport = {
     .name = "pckbd_outport",
     .version_id = 1,
     .minimum_version_id = 1,
     .post_load = kbd_outport_post_load,
+    .needed = kbd_outport_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(outport, KBDState),
         VMSTATE_END_OF_LIST()
     }
 };
 
-static bool kbd_outport_needed(void *opaque)
-{
-    KBDState *s = opaque;
-    return s->outport != kbd_outport_default(s);
-}
-
 static int kbd_post_load(void *opaque, int version_id)
 {
     KBDState *s = opaque;
@@ -430,12 +431,9 @@ static const VMStateDescription vmstate_kbd = {
         VMSTATE_UINT8(pending, KBDState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_kbd_outport,
-            .needed = kbd_outport_needed,
-        },
-        VMSTATE_END_OF_LIST()
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_kbd_outport,
+        NULL
     }
 };
 
diff --git a/hw/input/ps2.c b/hw/input/ps2.c
index 4baeea2b56..fdbe565e62 100644
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -677,6 +677,7 @@ static const VMStateDescription vmstate_ps2_keyboard_ledstate = {
     .version_id = 3,
     .minimum_version_id = 2,
     .post_load = ps2_kbd_ledstate_post_load,
+    .needed = ps2_keyboard_ledstate_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(ledstate, PS2KbdState),
         VMSTATE_END_OF_LIST()
@@ -717,13 +718,9 @@ static const VMStateDescription vmstate_ps2_keyboard = {
         VMSTATE_INT32_V(scancode_set, PS2KbdState,3),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_ps2_keyboard_ledstate,
-            .needed = ps2_keyboard_ledstate_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ps2_keyboard_ledstate,
+        NULL
     }
 };
 
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d595d63a51..0032b97c5f 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -369,6 +369,7 @@ static const VMStateDescription vmstate_apic_common_sipi = {
     .name = "apic_sipi",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = apic_common_sipi_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(sipi_vector, APICCommonState),
         VMSTATE_INT32(wait_for_sipi, APICCommonState),
@@ -408,12 +409,9 @@ static const VMStateDescription vmstate_apic_common = {
                       APICCommonState), /* open-coded timer state */
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_apic_common_sipi,
-            .needed = apic_common_sipi_needed,
-        },
-        VMSTATE_END_OF_LIST()
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_apic_common_sipi,
+        NULL
     }
 };
 
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 71a9f7a716..b3e0b1fd52 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -650,6 +650,7 @@ static const VMStateDescription vmstate_ich9_rst_cnt = {
     .name = "ICH9LPC/rst_cnt",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = ich9_rst_cnt_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(rst_cnt, ICH9LPCState),
         VMSTATE_END_OF_LIST()
@@ -669,12 +670,9 @@ static const VMStateDescription vmstate_ich9_lpc = {
         VMSTATE_UINT32(sci_level, ICH9LPCState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_ich9_rst_cnt,
-            .needed = ich9_rst_cnt_needed
-        },
-        { 0 }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ich9_rst_cnt,
+        NULL
     }
 };
 
diff --git a/hw/mips/Makefile.objs b/hw/mips/Makefile.objs
index 0a652f8521..9633f3a57d 100644
--- a/hw/mips/Makefile.objs
+++ b/hw/mips/Makefile.objs
@@ -1,4 +1,5 @@
-obj-y += mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o
+obj-y += mips_r4k.o mips_malta.o mips_mipssim.o
 obj-y += addr.o cputimer.o mips_int.o
+obj-$(CONFIG_JAZZ) += mips_jazz.o
 obj-$(CONFIG_FULONG) += mips_fulong2e.o
 obj-y += gt64xxx_pci.o
diff --git a/hw/mips/mips_jazz.c b/hw/mips/mips_jazz.c
index 2c153e092f..9d60633efb 100644
--- a/hw/mips/mips_jazz.c
+++ b/hw/mips/mips_jazz.c
@@ -135,16 +135,16 @@ static void mips_jazz_init(MachineState *machine,
     MIPSCPU *cpu;
     CPUClass *cc;
     CPUMIPSState *env;
-    qemu_irq *rc4030, *i8259;
+    qemu_irq *i8259;
     rc4030_dma *dmas;
-    void* rc4030_opaque;
+    MemoryRegion *rc4030_dma_mr;
     MemoryRegion *isa_mem = g_new(MemoryRegion, 1);
     MemoryRegion *isa_io = g_new(MemoryRegion, 1);
     MemoryRegion *rtc = g_new(MemoryRegion, 1);
     MemoryRegion *i8042 = g_new(MemoryRegion, 1);
     MemoryRegion *dma_dummy = g_new(MemoryRegion, 1);
     NICInfo *nd;
-    DeviceState *dev;
+    DeviceState *dev, *rc4030;
     SysBusDevice *sysbus;
     ISABus *isa_bus;
     ISADevice *pit;
@@ -157,12 +157,7 @@ static void mips_jazz_init(MachineState *machine,
 
     /* init CPUs */
     if (cpu_model == NULL) {
-#ifdef TARGET_MIPS64
         cpu_model = "R4000";
-#else
-        /* FIXME: All wrong, this maybe should be R3000 for the older JAZZs. */
-        cpu_model = "24Kf";
-#endif
     }
     cpu = cpu_mips_init(cpu_model);
     if (cpu == NULL) {
@@ -218,8 +213,14 @@ static void mips_jazz_init(MachineState *machine,
     cpu_mips_clock_init(env);
 
     /* Chipset */
-    rc4030_opaque = rc4030_init(env->irq[6], env->irq[3], &rc4030, &dmas,
-                                address_space);
+    rc4030 = rc4030_init(&dmas, &rc4030_dma_mr);
+    sysbus = SYS_BUS_DEVICE(rc4030);
+    sysbus_connect_irq(sysbus, 0, env->irq[6]);
+    sysbus_connect_irq(sysbus, 1, env->irq[3]);
+    memory_region_add_subregion(address_space, 0x80000000,
+                                sysbus_mmio_get_region(sysbus, 0));
+    memory_region_add_subregion(address_space, 0xf0000000,
+                                sysbus_mmio_get_region(sysbus, 1));
     memory_region_init_io(dma_dummy, NULL, &dma_dummy_ops, NULL, "dummy_dma", 0x1000);
     memory_region_add_subregion(address_space, 0x8000d000, dma_dummy);
 
@@ -246,7 +247,7 @@ static void mips_jazz_init(MachineState *machine,
         sysbus = SYS_BUS_DEVICE(dev);
         sysbus_mmio_map(sysbus, 0, 0x60080000);
         sysbus_mmio_map(sysbus, 1, 0x40000000);
-        sysbus_connect_irq(sysbus, 0, rc4030[3]);
+        sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(rc4030, 3));
         {
             /* Simple ROM, so user doesn't have to provide one */
             MemoryRegion *rom_mr = g_new(MemoryRegion, 1);
@@ -272,8 +273,17 @@ static void mips_jazz_init(MachineState *machine,
         if (!nd->model)
             nd->model = g_strdup("dp83932");
         if (strcmp(nd->model, "dp83932") == 0) {
-            dp83932_init(nd, 0x80001000, 2, get_system_memory(), rc4030[4],
-                         rc4030_opaque, rc4030_dma_memory_rw);
+            qemu_check_nic_model(nd, "dp83932");
+
+            dev = qdev_create(NULL, "dp8393x");
+            qdev_set_nic_properties(dev, nd);
+            qdev_prop_set_uint8(dev, "it_shift", 2);
+            qdev_prop_set_ptr(dev, "dma_mr", rc4030_dma_mr);
+            qdev_init_nofail(dev);
+            sysbus = SYS_BUS_DEVICE(dev);
+            sysbus_mmio_map(sysbus, 0, 0x80001000);
+            sysbus_mmio_map(sysbus, 1, 0x8000b000);
+            sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(rc4030, 4));
             break;
         } else if (is_help_option(nd->model)) {
             fprintf(stderr, "qemu: Supported NICs: dp83932\n");
@@ -287,7 +297,7 @@ static void mips_jazz_init(MachineState *machine,
     /* SCSI adapter */
     esp_init(0x80002000, 0,
              rc4030_dma_read, rc4030_dma_write, dmas[0],
-             rc4030[5], &esp_reset, &dma_enable);
+             qdev_get_gpio_in(rc4030, 5), &esp_reset, &dma_enable);
 
     /* Floppy */
     if (drive_get_max_bus(IF_FLOPPY) >= MAX_FD) {
@@ -297,7 +307,7 @@ static void mips_jazz_init(MachineState *machine,
     for (n = 0; n < MAX_FD; n++) {
         fds[n] = drive_get(IF_FLOPPY, 0, n);
     }
-    fdctrl_init_sysbus(rc4030[1], 0, 0x80003000, fds);
+    fdctrl_init_sysbus(qdev_get_gpio_in(rc4030, 1), 0, 0x80003000, fds);
 
     /* Real time clock */
     rtc_init(isa_bus, 1980, NULL);
@@ -305,23 +315,26 @@ static void mips_jazz_init(MachineState *machine,
     memory_region_add_subregion(address_space, 0x80004000, rtc);
 
     /* Keyboard (i8042) */
-    i8042_mm_init(rc4030[6], rc4030[7], i8042, 0x1000, 0x1);
+    i8042_mm_init(qdev_get_gpio_in(rc4030, 6), qdev_get_gpio_in(rc4030, 7),
+                  i8042, 0x1000, 0x1);
     memory_region_add_subregion(address_space, 0x80005000, i8042);
 
     /* Serial ports */
     if (serial_hds[0]) {
-        serial_mm_init(address_space, 0x80006000, 0, rc4030[8], 8000000/16,
+        serial_mm_init(address_space, 0x80006000, 0,
+                       qdev_get_gpio_in(rc4030, 8), 8000000/16,
                        serial_hds[0], DEVICE_NATIVE_ENDIAN);
     }
     if (serial_hds[1]) {
-        serial_mm_init(address_space, 0x80007000, 0, rc4030[9], 8000000/16,
+        serial_mm_init(address_space, 0x80007000, 0,
+                       qdev_get_gpio_in(rc4030, 9), 8000000/16,
                        serial_hds[1], DEVICE_NATIVE_ENDIAN);
     }
 
     /* Parallel port */
     if (parallel_hds[0])
-        parallel_mm_init(address_space, 0x80008000, 0, rc4030[0],
-                         parallel_hds[0]);
+        parallel_mm_init(address_space, 0x80008000, 0,
+                         qdev_get_gpio_in(rc4030, 0), parallel_hds[0]);
 
     /* FIXME: missing Jazz sound at 0x8000c000, rc4030[2] */
 
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 5140882c00..786a8f0638 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -97,7 +97,7 @@ typedef struct {
 static ISADevice *pit;
 
 static struct _loaderparams {
-    int ram_size;
+    int ram_size, ram_low_size;
     const char *kernel_filename;
     const char *kernel_cmdline;
     const char *initrd_filename;
@@ -641,8 +641,8 @@ static void write_bootloader (CPUMIPSState *env, uint8_t *base,
     stl_p(p++, 0x34a50000 | (ENVP_ADDR & 0xffff));               /* ori a1, a1, low(ENVP_ADDR) */
     stl_p(p++, 0x3c060000 | (((ENVP_ADDR + 8) >> 16) & 0xffff)); /* lui a2, high(ENVP_ADDR + 8) */
     stl_p(p++, 0x34c60000 | ((ENVP_ADDR + 8) & 0xffff));         /* ori a2, a2, low(ENVP_ADDR + 8) */
-    stl_p(p++, 0x3c070000 | (loaderparams.ram_size >> 16));     /* lui a3, high(ram_size) */
-    stl_p(p++, 0x34e70000 | (loaderparams.ram_size & 0xffff));  /* ori a3, a3, low(ram_size) */
+    stl_p(p++, 0x3c070000 | (loaderparams.ram_low_size >> 16));     /* lui a3, high(ram_low_size) */
+    stl_p(p++, 0x34e70000 | (loaderparams.ram_low_size & 0xffff));  /* ori a3, a3, low(ram_low_size) */
 
     /* Load BAR registers as done by YAMON */
     stl_p(p++, 0x3c09b400);                                      /* lui t1, 0xb400 */
@@ -851,8 +851,10 @@ static int64_t load_kernel (void)
     }
 
     prom_set(prom_buf, prom_index++, "memsize");
-    prom_set(prom_buf, prom_index++, "%i",
-             MIN(loaderparams.ram_size, 256 << 20));
+    prom_set(prom_buf, prom_index++, "%u", loaderparams.ram_low_size);
+
+    prom_set(prom_buf, prom_index++, "ememsize");
+    prom_set(prom_buf, prom_index++, "%u", loaderparams.ram_size);
 
     prom_set(prom_buf, prom_index++, "modetty0");
     prom_set(prom_buf, prom_index++, "38400n8r");
@@ -1054,7 +1056,8 @@ void mips_malta_init(MachineState *machine)
         }
 
         /* Write a small bootloader to the flash location. */
-        loaderparams.ram_size = ram_low_size;
+        loaderparams.ram_size = ram_size;
+        loaderparams.ram_low_size = ram_low_size;
         loaderparams.kernel_filename = kernel_filename;
         loaderparams.kernel_cmdline = kernel_cmdline;
         loaderparams.initrd_filename = initrd_filename;
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index 7ce13d2b46..cd889bce86 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -17,20 +17,15 @@
  * with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "hw/hw.h"
-#include "qemu/timer.h"
+#include "hw/sysbus.h"
+#include "hw/devices.h"
 #include "net/net.h"
-#include "hw/mips/mips.h"
+#include "qemu/timer.h"
+#include <zlib.h>
 
 //#define DEBUG_SONIC
 
-/* Calculate CRCs properly on Rx packets */
-#define SONIC_CALCULATE_RXCRC
-
-#if defined(SONIC_CALCULATE_RXCRC)
-/* For crc32 */
-#include <zlib.h>
-#endif
+#define SONIC_PROM_SIZE 0x1000
 
 #ifdef DEBUG_SONIC
 #define DPRINTF(fmt, ...) \
@@ -145,9 +140,14 @@ do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
 #define SONIC_ISR_PINT   0x0800
 #define SONIC_ISR_LCD    0x1000
 
+#define TYPE_DP8393X "dp8393x"
+#define DP8393X(obj) OBJECT_CHECK(dp8393xState, (obj), TYPE_DP8393X)
+
 typedef struct dp8393xState {
+    SysBusDevice parent_obj;
+
     /* Hardware */
-    int it_shift;
+    uint8_t it_shift;
     qemu_irq irq;
 #ifdef DEBUG_SONIC
     int irq_level;
@@ -156,8 +156,8 @@ typedef struct dp8393xState {
     int64_t wt_last_update;
     NICConf conf;
     NICState *nic;
-    MemoryRegion *address_space;
     MemoryRegion mmio;
+    MemoryRegion prom;
 
     /* Registers */
     uint8_t cam[16][6];
@@ -168,8 +168,8 @@ typedef struct dp8393xState {
     int loopback_packet;
 
     /* Memory access */
-    void (*memory_rw)(void *opaque, hwaddr addr, uint8_t *buf, int len, int is_write);
-    void* mem_opaque;
+    void *dma_mr;
+    AddressSpace as;
 } dp8393xState;
 
 static void dp8393x_update_irq(dp8393xState *s)
@@ -190,7 +190,7 @@ static void dp8393x_update_irq(dp8393xState *s)
     qemu_set_irq(s->irq, level);
 }
 
-static void do_load_cam(dp8393xState *s)
+static void dp8393x_do_load_cam(dp8393xState *s)
 {
     uint16_t data[8];
     int width, size;
@@ -201,9 +201,9 @@ static void do_load_cam(dp8393xState *s)
 
     while (s->regs[SONIC_CDC] & 0x1f) {
         /* Fill current entry */
-        s->memory_rw(s->mem_opaque,
+        address_space_rw(&s->as,
             (s->regs[SONIC_URRA] << 16) | s->regs[SONIC_CDP],
-            (uint8_t *)data, size, 0);
+            MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0);
         s->cam[index][0] = data[1 * width] & 0xff;
         s->cam[index][1] = data[1 * width] >> 8;
         s->cam[index][2] = data[2 * width] & 0xff;
@@ -220,9 +220,9 @@ static void do_load_cam(dp8393xState *s)
     }
 
     /* Read CAM enable */
-    s->memory_rw(s->mem_opaque,
+    address_space_rw(&s->as,
         (s->regs[SONIC_URRA] << 16) | s->regs[SONIC_CDP],
-        (uint8_t *)data, size, 0);
+        MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0);
     s->regs[SONIC_CE] = data[0 * width];
     DPRINTF("load cam done. cam enable mask 0x%04x\n", s->regs[SONIC_CE]);
 
@@ -232,7 +232,7 @@ static void do_load_cam(dp8393xState *s)
     dp8393x_update_irq(s);
 }
 
-static void do_read_rra(dp8393xState *s)
+static void dp8393x_do_read_rra(dp8393xState *s)
 {
     uint16_t data[8];
     int width, size;
@@ -240,9 +240,9 @@ static void do_read_rra(dp8393xState *s)
     /* Read memory */
     width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1;
     size = sizeof(uint16_t) * 4 * width;
-    s->memory_rw(s->mem_opaque,
+    address_space_rw(&s->as,
         (s->regs[SONIC_URRA] << 16) | s->regs[SONIC_RRP],
-        (uint8_t *)data, size, 0);
+        MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0);
 
     /* Update SONIC registers */
     s->regs[SONIC_CRBA0] = data[0 * width];
@@ -272,7 +272,7 @@ static void do_read_rra(dp8393xState *s)
     s->regs[SONIC_CR] &= ~SONIC_CR_RRRA;
 }
 
-static void do_software_reset(dp8393xState *s)
+static void dp8393x_do_software_reset(dp8393xState *s)
 {
     timer_del(s->watchdog);
 
@@ -280,7 +280,7 @@ static void do_software_reset(dp8393xState *s)
     s->regs[SONIC_CR] |= SONIC_CR_RST | SONIC_CR_RXDIS;
 }
 
-static void set_next_tick(dp8393xState *s)
+static void dp8393x_set_next_tick(dp8393xState *s)
 {
     uint32_t ticks;
     int64_t delay;
@@ -296,7 +296,7 @@ static void set_next_tick(dp8393xState *s)
     timer_mod(s->watchdog, s->wt_last_update + delay);
 }
 
-static void update_wt_regs(dp8393xState *s)
+static void dp8393x_update_wt_regs(dp8393xState *s)
 {
     int64_t elapsed;
     uint32_t val;
@@ -311,33 +311,33 @@ static void update_wt_regs(dp8393xState *s)
     val -= elapsed / 5000000;
     s->regs[SONIC_WT1] = (val >> 16) & 0xffff;
     s->regs[SONIC_WT0] = (val >> 0)  & 0xffff;
-    set_next_tick(s);
+    dp8393x_set_next_tick(s);
 
 }
 
-static void do_start_timer(dp8393xState *s)
+static void dp8393x_do_start_timer(dp8393xState *s)
 {
     s->regs[SONIC_CR] &= ~SONIC_CR_STP;
-    set_next_tick(s);
+    dp8393x_set_next_tick(s);
 }
 
-static void do_stop_timer(dp8393xState *s)
+static void dp8393x_do_stop_timer(dp8393xState *s)
 {
     s->regs[SONIC_CR] &= ~SONIC_CR_ST;
-    update_wt_regs(s);
+    dp8393x_update_wt_regs(s);
 }
 
-static void do_receiver_enable(dp8393xState *s)
+static void dp8393x_do_receiver_enable(dp8393xState *s)
 {
     s->regs[SONIC_CR] &= ~SONIC_CR_RXDIS;
 }
 
-static void do_receiver_disable(dp8393xState *s)
+static void dp8393x_do_receiver_disable(dp8393xState *s)
 {
     s->regs[SONIC_CR] &= ~SONIC_CR_RXEN;
 }
 
-static void do_transmit_packets(dp8393xState *s)
+static void dp8393x_do_transmit_packets(dp8393xState *s)
 {
     NetClientState *nc = qemu_get_queue(s->nic);
     uint16_t data[12];
@@ -353,9 +353,9 @@ static void do_transmit_packets(dp8393xState *s)
                 (s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_CTDA]);
         size = sizeof(uint16_t) * 6 * width;
         s->regs[SONIC_TTDA] = s->regs[SONIC_CTDA];
-        s->memory_rw(s->mem_opaque,
+        address_space_rw(&s->as,
             ((s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA]) + sizeof(uint16_t) * width,
-            (uint8_t *)data, size, 0);
+            MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0);
         tx_len = 0;
 
         /* Update registers */
@@ -379,18 +379,18 @@ static void do_transmit_packets(dp8393xState *s)
             if (tx_len + len > sizeof(s->tx_buffer)) {
                 len = sizeof(s->tx_buffer) - tx_len;
             }
-            s->memory_rw(s->mem_opaque,
+            address_space_rw(&s->as,
                 (s->regs[SONIC_TSA1] << 16) | s->regs[SONIC_TSA0],
-                &s->tx_buffer[tx_len], len, 0);
+                MEMTXATTRS_UNSPECIFIED, &s->tx_buffer[tx_len], len, 0);
             tx_len += len;
 
             i++;
             if (i != s->regs[SONIC_TFC]) {
                 /* Read next fragment details */
                 size = sizeof(uint16_t) * 3 * width;
-                s->memory_rw(s->mem_opaque,
+                address_space_rw(&s->as,
                     ((s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA]) + sizeof(uint16_t) * (4 + 3 * i) * width,
-                    (uint8_t *)data, size, 0);
+                    MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0);
                 s->regs[SONIC_TSA0] = data[0 * width];
                 s->regs[SONIC_TSA1] = data[1 * width];
                 s->regs[SONIC_TFS] = data[2 * width];
@@ -422,16 +422,16 @@ static void do_transmit_packets(dp8393xState *s)
         /* Write status */
         data[0 * width] = s->regs[SONIC_TCR] & 0x0fff; /* status */
         size = sizeof(uint16_t) * width;
-        s->memory_rw(s->mem_opaque,
+        address_space_rw(&s->as,
             (s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA],
-            (uint8_t *)data, size, 1);
+            MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 1);
 
         if (!(s->regs[SONIC_CR] & SONIC_CR_HTX)) {
             /* Read footer of packet */
             size = sizeof(uint16_t) * width;
-            s->memory_rw(s->mem_opaque,
+            address_space_rw(&s->as,
                 ((s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA]) + sizeof(uint16_t) * (4 + 3 * s->regs[SONIC_TFC]) * width,
-                (uint8_t *)data, size, 0);
+                MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0);
             s->regs[SONIC_CTDA] = data[0 * width] & ~0x1;
             if (data[0 * width] & 0x1) {
                 /* EOL detected */
@@ -446,12 +446,12 @@ static void do_transmit_packets(dp8393xState *s)
     dp8393x_update_irq(s);
 }
 
-static void do_halt_transmission(dp8393xState *s)
+static void dp8393x_do_halt_transmission(dp8393xState *s)
 {
     /* Nothing to do */
 }
 
-static void do_command(dp8393xState *s, uint16_t command)
+static void dp8393x_do_command(dp8393xState *s, uint16_t command)
 {
     if ((s->regs[SONIC_CR] & SONIC_CR_RST) && !(command & SONIC_CR_RST)) {
         s->regs[SONIC_CR] &= ~SONIC_CR_RST;
@@ -461,34 +461,36 @@ static void do_command(dp8393xState *s, uint16_t command)
     s->regs[SONIC_CR] |= (command & SONIC_CR_MASK);
 
     if (command & SONIC_CR_HTX)
-        do_halt_transmission(s);
+        dp8393x_do_halt_transmission(s);
     if (command & SONIC_CR_TXP)
-        do_transmit_packets(s);
+        dp8393x_do_transmit_packets(s);
     if (command & SONIC_CR_RXDIS)
-        do_receiver_disable(s);
+        dp8393x_do_receiver_disable(s);
     if (command & SONIC_CR_RXEN)
-        do_receiver_enable(s);
+        dp8393x_do_receiver_enable(s);
     if (command & SONIC_CR_STP)
-        do_stop_timer(s);
+        dp8393x_do_stop_timer(s);
     if (command & SONIC_CR_ST)
-        do_start_timer(s);
+        dp8393x_do_start_timer(s);
     if (command & SONIC_CR_RST)
-        do_software_reset(s);
+        dp8393x_do_software_reset(s);
     if (command & SONIC_CR_RRRA)
-        do_read_rra(s);
+        dp8393x_do_read_rra(s);
     if (command & SONIC_CR_LCAM)
-        do_load_cam(s);
+        dp8393x_do_load_cam(s);
 }
 
-static uint16_t read_register(dp8393xState *s, int reg)
+static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size)
 {
+    dp8393xState *s = opaque;
+    int reg = addr >> s->it_shift;
     uint16_t val = 0;
 
     switch (reg) {
         /* Update data before reading it */
         case SONIC_WT0:
         case SONIC_WT1:
-            update_wt_regs(s);
+            dp8393x_update_wt_regs(s);
             val = s->regs[reg];
             break;
         /* Accept read to some registers only when in reset mode */
@@ -510,14 +512,18 @@ static uint16_t read_register(dp8393xState *s, int reg)
     return val;
 }
 
-static void write_register(dp8393xState *s, int reg, uint16_t val)
+static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
+                          unsigned int size)
 {
-    DPRINTF("write 0x%04x to reg %s\n", val, reg_names[reg]);
+    dp8393xState *s = opaque;
+    int reg = addr >> s->it_shift;
+
+    DPRINTF("write 0x%04x to reg %s\n", (uint16_t)data, reg_names[reg]);
 
     switch (reg) {
         /* Command register */
         case SONIC_CR:
-            do_command(s, val);
+            dp8393x_do_command(s, data);
             break;
         /* Prevent write to read-only registers */
         case SONIC_CAP2:
@@ -530,37 +536,37 @@ static void write_register(dp8393xState *s, int reg, uint16_t val)
         /* Accept write to some registers only when in reset mode */
         case SONIC_DCR:
             if (s->regs[SONIC_CR] & SONIC_CR_RST) {
-                s->regs[reg] = val & 0xbfff;
+                s->regs[reg] = data & 0xbfff;
             } else {
                 DPRINTF("writing to DCR invalid\n");
             }
             break;
         case SONIC_DCR2:
             if (s->regs[SONIC_CR] & SONIC_CR_RST) {
-                s->regs[reg] = val & 0xf017;
+                s->regs[reg] = data & 0xf017;
             } else {
                 DPRINTF("writing to DCR2 invalid\n");
             }
             break;
         /* 12 lower bytes are Read Only */
         case SONIC_TCR:
-            s->regs[reg] = val & 0xf000;
+            s->regs[reg] = data & 0xf000;
             break;
         /* 9 lower bytes are Read Only */
         case SONIC_RCR:
-            s->regs[reg] = val & 0xffe0;
+            s->regs[reg] = data & 0xffe0;
             break;
         /* Ignore most significant bit */
         case SONIC_IMR:
-            s->regs[reg] = val & 0x7fff;
+            s->regs[reg] = data & 0x7fff;
             dp8393x_update_irq(s);
             break;
         /* Clear bits by writing 1 to them */
         case SONIC_ISR:
-            val &= s->regs[reg];
-            s->regs[reg] &= ~val;
-            if (val & SONIC_ISR_RBE) {
-                do_read_rra(s);
+            data &= s->regs[reg];
+            s->regs[reg] &= ~data;
+            if (data & SONIC_ISR_RBE) {
+                dp8393x_do_read_rra(s);
             }
             dp8393x_update_irq(s);
             break;
@@ -569,24 +575,32 @@ static void write_register(dp8393xState *s, int reg, uint16_t val)
         case SONIC_REA:
         case SONIC_RRP:
         case SONIC_RWP:
-            s->regs[reg] = val & 0xfffe;
+            s->regs[reg] = data & 0xfffe;
             break;
         /* Invert written value for some registers */
         case SONIC_CRCT:
         case SONIC_FAET:
         case SONIC_MPT:
-            s->regs[reg] = val ^ 0xffff;
+            s->regs[reg] = data ^ 0xffff;
             break;
         /* All other registers have no special contrainst */
         default:
-            s->regs[reg] = val;
+            s->regs[reg] = data;
     }
 
     if (reg == SONIC_WT0 || reg == SONIC_WT1) {
-        set_next_tick(s);
+        dp8393x_set_next_tick(s);
     }
 }
 
+static const MemoryRegionOps dp8393x_ops = {
+    .read = dp8393x_read,
+    .write = dp8393x_write,
+    .impl.min_access_size = 2,
+    .impl.max_access_size = 2,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
 static void dp8393x_watchdog(void *opaque)
 {
     dp8393xState *s = opaque;
@@ -597,84 +611,14 @@ static void dp8393x_watchdog(void *opaque)
 
     s->regs[SONIC_WT1] = 0xffff;
     s->regs[SONIC_WT0] = 0xffff;
-    set_next_tick(s);
+    dp8393x_set_next_tick(s);
 
     /* Signal underflow */
     s->regs[SONIC_ISR] |= SONIC_ISR_TC;
     dp8393x_update_irq(s);
 }
 
-static uint32_t dp8393x_readw(void *opaque, hwaddr addr)
-{
-    dp8393xState *s = opaque;
-    int reg;
-
-    if ((addr & ((1 << s->it_shift) - 1)) != 0) {
-        return 0;
-    }
-
-    reg = addr >> s->it_shift;
-    return read_register(s, reg);
-}
-
-static uint32_t dp8393x_readb(void *opaque, hwaddr addr)
-{
-    uint16_t v = dp8393x_readw(opaque, addr & ~0x1);
-    return (v >> (8 * (addr & 0x1))) & 0xff;
-}
-
-static uint32_t dp8393x_readl(void *opaque, hwaddr addr)
-{
-    uint32_t v;
-    v = dp8393x_readw(opaque, addr);
-    v |= dp8393x_readw(opaque, addr + 2) << 16;
-    return v;
-}
-
-static void dp8393x_writew(void *opaque, hwaddr addr, uint32_t val)
-{
-    dp8393xState *s = opaque;
-    int reg;
-
-    if ((addr & ((1 << s->it_shift) - 1)) != 0) {
-        return;
-    }
-
-    reg = addr >> s->it_shift;
-
-    write_register(s, reg, (uint16_t)val);
-}
-
-static void dp8393x_writeb(void *opaque, hwaddr addr, uint32_t val)
-{
-    uint16_t old_val = dp8393x_readw(opaque, addr & ~0x1);
-
-    switch (addr & 3) {
-    case 0:
-        val = val | (old_val & 0xff00);
-        break;
-    case 1:
-        val = (val << 8) | (old_val & 0x00ff);
-        break;
-    }
-    dp8393x_writew(opaque, addr & ~0x1, val);
-}
-
-static void dp8393x_writel(void *opaque, hwaddr addr, uint32_t val)
-{
-    dp8393x_writew(opaque, addr, val & 0xffff);
-    dp8393x_writew(opaque, addr + 2, (val >> 16) & 0xffff);
-}
-
-static const MemoryRegionOps dp8393x_ops = {
-    .old_mmio = {
-        .read = { dp8393x_readb, dp8393x_readw, dp8393x_readl, },
-        .write = { dp8393x_writeb, dp8393x_writew, dp8393x_writel, },
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static int nic_can_receive(NetClientState *nc)
+static int dp8393x_can_receive(NetClientState *nc)
 {
     dp8393xState *s = qemu_get_nic_opaque(nc);
 
@@ -685,7 +629,8 @@ static int nic_can_receive(NetClientState *nc)
     return 1;
 }
 
-static int receive_filter(dp8393xState *s, const uint8_t * buf, int size)
+static int dp8393x_receive_filter(dp8393xState *s, const uint8_t * buf,
+                                  int size)
 {
     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
     int i;
@@ -723,7 +668,8 @@ static int receive_filter(dp8393xState *s, const uint8_t * buf, int size)
     return -1;
 }
 
-static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
+static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
+                               size_t size)
 {
     dp8393xState *s = qemu_get_nic_opaque(nc);
     uint16_t data[10];
@@ -737,7 +683,7 @@ static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
     s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER |
         SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC);
 
-    packet_type = receive_filter(s, buf, size);
+    packet_type = dp8393x_receive_filter(s, buf, size);
     if (packet_type < 0) {
         DPRINTF("packet not for netcard\n");
         return -1;
@@ -750,7 +696,8 @@ static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
         /* Are we still in resource exhaustion? */
         size = sizeof(uint16_t) * 1 * width;
         address = ((s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA]) + sizeof(uint16_t) * 5 * width;
-        s->memory_rw(s->mem_opaque, address, (uint8_t*)data, size, 0);
+        address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED,
+                         (uint8_t *)data, size, 0);
         if (data[0 * width] & 0x1) {
             /* Still EOL ; stop reception */
             return -1;
@@ -764,18 +711,16 @@ static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
     s->regs[SONIC_TRBA0] = s->regs[SONIC_CRBA0];
 
     /* Calculate the ethernet checksum */
-#ifdef SONIC_CALCULATE_RXCRC
     checksum = cpu_to_le32(crc32(0, buf, rx_len));
-#else
-    checksum = 0;
-#endif
 
     /* Put packet into RBA */
     DPRINTF("Receive packet at %08x\n", (s->regs[SONIC_CRBA1] << 16) | s->regs[SONIC_CRBA0]);
     address = (s->regs[SONIC_CRBA1] << 16) | s->regs[SONIC_CRBA0];
-    s->memory_rw(s->mem_opaque, address, (uint8_t*)buf, rx_len, 1);
+    address_space_rw(&s->as, address,
+        MEMTXATTRS_UNSPECIFIED, (uint8_t *)buf, rx_len, 1);
     address += rx_len;
-    s->memory_rw(s->mem_opaque, address, (uint8_t*)&checksum, 4, 1);
+    address_space_rw(&s->as, address,
+        MEMTXATTRS_UNSPECIFIED, (uint8_t *)&checksum, 4, 1);
     rx_len += 4;
     s->regs[SONIC_CRBA1] = address >> 16;
     s->regs[SONIC_CRBA0] = address & 0xffff;
@@ -803,29 +748,30 @@ static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
     data[3 * width] = s->regs[SONIC_TRBA1]; /* pkt_ptr1 */
     data[4 * width] = s->regs[SONIC_RSC]; /* seq_no */
     size = sizeof(uint16_t) * 5 * width;
-    s->memory_rw(s->mem_opaque, (s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA], (uint8_t *)data, size, 1);
+    address_space_rw(&s->as, (s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA],
+        MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 1);
 
     /* Move to next descriptor */
     size = sizeof(uint16_t) * width;
-    s->memory_rw(s->mem_opaque,
+    address_space_rw(&s->as,
         ((s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA]) + sizeof(uint16_t) * 5 * width,
-        (uint8_t *)data, size, 0);
+        MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0);
     s->regs[SONIC_LLFA] = data[0 * width];
     if (s->regs[SONIC_LLFA] & 0x1) {
         /* EOL detected */
         s->regs[SONIC_ISR] |= SONIC_ISR_RDE;
     } else {
         data[0 * width] = 0; /* in_use */
-        s->memory_rw(s->mem_opaque,
+        address_space_rw(&s->as,
             ((s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA]) + sizeof(uint16_t) * 6 * width,
-            (uint8_t *)data, size, 1);
+            MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, sizeof(uint16_t), 1);
         s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
         s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
         s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) | (((s->regs[SONIC_RSC] & 0x00ff) + 1) & 0x00ff);
 
         if (s->regs[SONIC_RCR] & SONIC_RCR_LPKT) {
             /* Read next RRA */
-            do_read_rra(s);
+            dp8393x_do_read_rra(s);
         }
     }
 
@@ -835,11 +781,12 @@ static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
     return size;
 }
 
-static void nic_reset(void *opaque)
+static void dp8393x_reset(DeviceState *dev)
 {
-    dp8393xState *s = opaque;
+    dp8393xState *s = DP8393X(dev);
     timer_del(s->watchdog);
 
+    memset(s->regs, 0, sizeof(s->regs));
     s->regs[SONIC_CR] = SONIC_CR_RST | SONIC_CR_STP | SONIC_CR_RXDIS;
     s->regs[SONIC_DCR] &= ~(SONIC_DCR_EXBUS | SONIC_DCR_LBR);
     s->regs[SONIC_RCR] &= ~(SONIC_RCR_LB0 | SONIC_RCR_LB1 | SONIC_RCR_BRD | SONIC_RCR_RNT);
@@ -862,39 +809,91 @@ static void nic_reset(void *opaque)
 static NetClientInfo net_dp83932_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
-    .can_receive = nic_can_receive,
-    .receive = nic_receive,
+    .can_receive = dp8393x_can_receive,
+    .receive = dp8393x_receive,
 };
 
-void dp83932_init(NICInfo *nd, hwaddr base, int it_shift,
-                  MemoryRegion *address_space,
-                  qemu_irq irq, void* mem_opaque,
-                  void (*memory_rw)(void *opaque, hwaddr addr, uint8_t *buf, int len, int is_write))
+static void dp8393x_instance_init(Object *obj)
 {
-    dp8393xState *s;
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    dp8393xState *s = DP8393X(obj);
+
+    sysbus_init_mmio(sbd, &s->mmio);
+    sysbus_init_mmio(sbd, &s->prom);
+    sysbus_init_irq(sbd, &s->irq);
+}
 
-    qemu_check_nic_model(nd, "dp83932");
+static void dp8393x_realize(DeviceState *dev, Error **errp)
+{
+    dp8393xState *s = DP8393X(dev);
+    int i, checksum;
+    uint8_t *prom;
 
-    s = g_malloc0(sizeof(dp8393xState));
+    address_space_init(&s->as, s->dma_mr, "dp8393x");
+    memory_region_init_io(&s->mmio, OBJECT(dev), &dp8393x_ops, s,
+                          "dp8393x-regs", 0x40 << s->it_shift);
+
+    s->nic = qemu_new_nic(&net_dp83932_info, &s->conf,
+                          object_get_typename(OBJECT(dev)), dev->id, s);
+    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 
-    s->address_space = address_space;
-    s->mem_opaque = mem_opaque;
-    s->memory_rw = memory_rw;
-    s->it_shift = it_shift;
-    s->irq = irq;
     s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s);
     s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */
 
-    s->conf.macaddr = nd->macaddr;
-    s->conf.peers.ncs[0] = nd->netdev;
+    memory_region_init_rom_device(&s->prom, OBJECT(dev), NULL, NULL,
+                                  "dp8393x-prom", SONIC_PROM_SIZE, NULL);
+    prom = memory_region_get_ram_ptr(&s->prom);
+    checksum = 0;
+    for (i = 0; i < 6; i++) {
+        prom[i] = s->conf.macaddr.a[i];
+        checksum += prom[i];
+        if (checksum > 0xff) {
+            checksum = (checksum + 1) & 0xff;
+        }
+    }
+    prom[7] = 0xff - checksum;
+}
 
-    s->nic = qemu_new_nic(&net_dp83932_info, &s->conf, nd->model, nd->name, s);
+static const VMStateDescription vmstate_dp8393x = {
+    .name = "dp8393x",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField []) {
+        VMSTATE_BUFFER_UNSAFE(cam, dp8393xState, 0, 16 * 6),
+        VMSTATE_UINT16_ARRAY(regs, dp8393xState, 0x40),
+        VMSTATE_END_OF_LIST()
+    }
+};
 
-    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
-    qemu_register_reset(nic_reset, s);
-    nic_reset(s);
+static Property dp8393x_properties[] = {
+    DEFINE_NIC_PROPERTIES(dp8393xState, conf),
+    DEFINE_PROP_PTR("dma_mr", dp8393xState, dma_mr),
+    DEFINE_PROP_UINT8("it_shift", dp8393xState, it_shift, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void dp8393x_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
-    memory_region_init_io(&s->mmio, NULL, &dp8393x_ops, s,
-                          "dp8393x", 0x40 << it_shift);
-    memory_region_add_subregion(address_space, base, &s->mmio);
+    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+    dc->realize = dp8393x_realize;
+    dc->reset = dp8393x_reset;
+    dc->vmsd = &vmstate_dp8393x;
+    dc->props = dp8393x_properties;
 }
+
+static const TypeInfo dp8393x_info = {
+    .name          = TYPE_DP8393X,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(dp8393xState),
+    .instance_init = dp8393x_instance_init,
+    .class_init    = dp8393x_class_init,
+};
+
+static void dp8393x_register_types(void)
+{
+    type_register_static(&dp8393x_info);
+}
+
+type_init(dp8393x_register_types)
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 091d61acc3..bab8e2abfb 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1370,6 +1370,7 @@ static const VMStateDescription vmstate_e1000_mit_state = {
     .name = "e1000/mit_state",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = e1000_mit_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
         VMSTATE_UINT32(mac_reg[RADV], E1000State),
@@ -1457,13 +1458,9 @@ static const VMStateDescription vmstate_e1000 = {
         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_e1000_mit_state,
-            .needed = e1000_mit_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_e1000_mit_state,
+        NULL
     }
 };
 
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index f868108dfe..e0db4727ae 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -3240,6 +3240,7 @@ static const VMStateDescription vmstate_rtl8139_hotplug_ready ={
     .name = "rtl8139/hotplug_ready",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = rtl8139_hotplug_ready_needed,
     .fields = (VMStateField[]) {
         VMSTATE_END_OF_LIST()
     }
@@ -3335,13 +3336,9 @@ static const VMStateDescription vmstate_rtl8139 = {
         VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_rtl8139_hotplug_ready,
-            .needed = rtl8139_hotplug_ready_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_rtl8139_hotplug_ready,
+        NULL
     }
 };
 
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index dfb328debd..8bcdf3ed77 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2226,6 +2226,7 @@ static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
     .version_id = 1,
     .minimum_version_id = 1,
     .pre_load = vmxnet3_mcast_list_pre_load,
+    .needed = vmxnet3_mc_list_needed,
     .fields = (VMStateField[]) {
         VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0,
             mcast_list_buff_size),
@@ -2470,14 +2471,9 @@ static const VMStateDescription vmstate_vmxnet3 = {
 
             VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmxstate_vmxnet3_mcast_list,
-            .needed = vmxnet3_mc_list_needed
-        },
-        {
-            /* empty element. */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmxstate_vmxnet3_mcast_list,
+        NULL
     }
 };
 
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index f1712b86fe..ed2424c4cd 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -582,6 +582,7 @@ static const VMStateDescription vmstate_piix3_rcr = {
     .name = "PIIX3/rcr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = piix3_rcr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(rcr, PIIX3State),
         VMSTATE_END_OF_LIST()
@@ -600,12 +601,9 @@ static const VMStateDescription vmstate_piix3 = {
                               PIIX_NUM_PIRQS, 3),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_piix3_rcr,
-            .needed = piix3_rcr_needed,
-        },
-        { 0 }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_piix3_rcr,
+        NULL
     }
 };
 
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index bd2c0e4caa..f50b2f08af 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -1968,6 +1968,7 @@ static const VMStateDescription vmstate_scsi_sense_state = {
     .name = "SCSIDevice/sense",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = scsi_sense_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8_SUB_ARRAY(sense, SCSIDevice,
                                 SCSI_SENSE_BUF_SIZE_OLD,
@@ -1998,13 +1999,9 @@ const VMStateDescription vmstate_scsi_device = {
         },
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_scsi_sense_state,
-            .needed = scsi_sense_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_scsi_sense_state,
+        NULL
     }
 };
 
diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c
index 4221060308..5e22ed79b2 100644
--- a/hw/sh4/r2d.c
+++ b/hw/sh4/r2d.c
@@ -127,7 +127,7 @@ static void r2d_fpga_irq_set(void *opaque, int n, int level)
     update_irl(fpga);
 }
 
-static uint32_t r2d_fpga_read(void *opaque, hwaddr addr)
+static uint64_t r2d_fpga_read(void *opaque, hwaddr addr, unsigned int size)
 {
     r2d_fpga_t *s = opaque;
 
@@ -146,7 +146,7 @@ static uint32_t r2d_fpga_read(void *opaque, hwaddr addr)
 }
 
 static void
-r2d_fpga_write(void *opaque, hwaddr addr, uint32_t value)
+r2d_fpga_write(void *opaque, hwaddr addr, uint64_t value, unsigned int size)
 {
     r2d_fpga_t *s = opaque;
 
@@ -170,10 +170,10 @@ r2d_fpga_write(void *opaque, hwaddr addr, uint32_t value)
 }
 
 static const MemoryRegionOps r2d_fpga_ops = {
-    .old_mmio = {
-        .read = { r2d_fpga_read, r2d_fpga_read, NULL, },
-        .write = { r2d_fpga_write, r2d_fpga_write, NULL, },
-    },
+    .read = r2d_fpga_read,
+    .write = r2d_fpga_write,
+    .impl.min_access_size = 2,
+    .impl.max_access_size = 2,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index b6b8a2063d..b50071ef93 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -283,6 +283,7 @@ static const VMStateDescription vmstate_hpet_rtc_irq_level = {
     .name = "hpet/rtc_irq_level",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = hpet_rtc_irq_level_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(rtc_irq_level, HPETState),
         VMSTATE_END_OF_LIST()
@@ -322,13 +323,9 @@ static const VMStateDescription vmstate_hpet = {
                                     vmstate_hpet_timer, HPETTimer),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_hpet_rtc_irq_level,
-            .needed = hpet_rtc_irq_level_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_hpet_rtc_irq_level,
+        NULL
     }
 };
 
diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index f2b77fa118..32048258c9 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -733,22 +733,23 @@ static int rtc_post_load(void *opaque, int version_id)
     return 0;
 }
 
+static bool rtc_irq_reinject_on_ack_count_needed(void *opaque)
+{
+    RTCState *s = (RTCState *)opaque;
+    return s->irq_reinject_on_ack_count != 0;
+}
+
 static const VMStateDescription vmstate_rtc_irq_reinject_on_ack_count = {
     .name = "mc146818rtc/irq_reinject_on_ack_count",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = rtc_irq_reinject_on_ack_count_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT16(irq_reinject_on_ack_count, RTCState),
         VMSTATE_END_OF_LIST()
     }
 };
 
-static bool rtc_irq_reinject_on_ack_count_needed(void *opaque)
-{
-    RTCState *s = (RTCState *)opaque;
-    return s->irq_reinject_on_ack_count != 0;
-}
-
 static const VMStateDescription vmstate_rtc = {
     .name = "mc146818rtc",
     .version_id = 3,
@@ -770,13 +771,9 @@ static const VMStateDescription vmstate_rtc = {
         VMSTATE_UINT64_V(next_alarm_time, RTCState, 3),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_rtc_irq_reinject_on_ack_count,
-            .needed = rtc_irq_reinject_on_ack_count_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_rtc_irq_reinject_on_ack_count,
+        NULL
     }
 };
 
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 1a22c9c0cb..7d65818064 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -2034,6 +2034,7 @@ static const VMStateDescription vmstate_ohci_eof_timer = {
     .version_id = 1,
     .minimum_version_id = 1,
     .pre_load = ohci_eof_timer_pre_load,
+    .needed = ohci_eof_timer_needed,
     .fields = (VMStateField[]) {
         VMSTATE_TIMER_PTR(eof_timer, OHCIState),
         VMSTATE_END_OF_LIST()
@@ -2081,13 +2082,9 @@ static const VMStateDescription vmstate_ohci_state = {
         VMSTATE_BOOL(async_complete, OHCIState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_ohci_eof_timer,
-            .needed = ohci_eof_timer_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ohci_eof_timer,
+        NULL
     }
 };
 
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 242a654583..6b4218c037 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -2257,40 +2257,42 @@ static const VMStateInfo usbredir_ep_bufpq_vmstate_info = {
 
 
 /* For endp_data migration */
+static bool usbredir_bulk_receiving_needed(void *priv)
+{
+    struct endp_data *endp = priv;
+
+    return endp->bulk_receiving_started;
+}
+
 static const VMStateDescription usbredir_bulk_receiving_vmstate = {
     .name = "usb-redir-ep/bulk-receiving",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = usbredir_bulk_receiving_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(bulk_receiving_started, struct endp_data),
         VMSTATE_END_OF_LIST()
     }
 };
 
-static bool usbredir_bulk_receiving_needed(void *priv)
+static bool usbredir_stream_needed(void *priv)
 {
     struct endp_data *endp = priv;
 
-    return endp->bulk_receiving_started;
+    return endp->max_streams;
 }
 
 static const VMStateDescription usbredir_stream_vmstate = {
     .name = "usb-redir-ep/stream-state",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = usbredir_stream_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(max_streams, struct endp_data),
         VMSTATE_END_OF_LIST()
     }
 };
 
-static bool usbredir_stream_needed(void *priv)
-{
-    struct endp_data *endp = priv;
-
-    return endp->max_streams;
-}
-
 static const VMStateDescription usbredir_ep_vmstate = {
     .name = "usb-redir-ep",
     .version_id = 1,
@@ -2318,16 +2320,10 @@ static const VMStateDescription usbredir_ep_vmstate = {
         VMSTATE_INT32(bufpq_target_size, struct endp_data),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &usbredir_bulk_receiving_vmstate,
-            .needed = usbredir_bulk_receiving_needed,
-        }, {
-            .vmsd = &usbredir_stream_vmstate,
-            .needed = usbredir_stream_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &usbredir_bulk_receiving_vmstate,
+        &usbredir_stream_vmstate,
+        NULL
     }
 };
 
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index fb49ffcb2d..ee4e07c5e7 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1053,6 +1053,7 @@ static const VMStateDescription vmstate_virtio_device_endian = {
     .name = "virtio/device_endian",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = &virtio_device_endian_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(device_endian, VirtIODevice),
         VMSTATE_END_OF_LIST()
@@ -1063,6 +1064,7 @@ static const VMStateDescription vmstate_virtio_64bit_features = {
     .name = "virtio/64bit_features",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = &virtio_64bit_features_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(guest_features, VirtIODevice),
         VMSTATE_END_OF_LIST()
@@ -1077,16 +1079,10 @@ static const VMStateDescription vmstate_virtio = {
     .fields = (VMStateField[]) {
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_virtio_device_endian,
-            .needed = &virtio_device_endian_needed
-        },
-        {
-            .vmsd = &vmstate_virtio_64bit_features,
-            .needed = &virtio_64bit_features_needed
-        },
-        { 0 }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_virtio_device_endian,
+        &vmstate_virtio_64bit_features,
+        NULL
     }
 };
 
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 43428bd030..de8a7200a9 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -126,10 +126,10 @@ void cpu_flush_icache_range(hwaddr start, int len);
 extern struct MemoryRegion io_mem_rom;
 extern struct MemoryRegion io_mem_notdirty;
 
-typedef void (RAMBlockIterFunc)(void *host_addr,
+typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
     ram_addr_t offset, ram_addr_t length, void *opaque);
 
-void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
 
 #endif
 
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 2f7a4f1700..2573e8c36e 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -105,6 +105,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
                              hwaddr paddr, MemTxAttrs attrs,
                              int prot, int mmu_idx, target_ulong size);
 void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr);
+void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
+                 uintptr_t retaddr);
 #else
 static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
 {
diff --git a/include/hw/mips/mips.h b/include/hw/mips/mips.h
index 2a7a9c9f42..e0065ce808 100644
--- a/include/hw/mips/mips.h
+++ b/include/hw/mips/mips.h
@@ -15,18 +15,9 @@ PCIBus *bonito_init(qemu_irq *pic);
 
 /* rc4030.c */
 typedef struct rc4030DMAState *rc4030_dma;
-void rc4030_dma_memory_rw(void *opaque, hwaddr addr, uint8_t *buf, int len, int is_write);
 void rc4030_dma_read(void *dma, uint8_t *buf, int len);
 void rc4030_dma_write(void *dma, uint8_t *buf, int len);
 
-void *rc4030_init(qemu_irq timer, qemu_irq jazz_bus,
-                  qemu_irq **irqs, rc4030_dma **dmas,
-                  MemoryRegion *sysmem);
-
-/* dp8393x.c */
-void dp83932_init(NICInfo *nd, hwaddr base, int it_shift,
-                  MemoryRegion *address_space,
-                  qemu_irq irq, void* mem_opaque,
-                  void (*memory_rw)(void *opaque, hwaddr addr, uint8_t *buf, int len, int is_write));
+DeviceState *rc4030_init(rc4030_dma **dmas, MemoryRegion **dma_mr);
 
 #endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index a6e025a248..9387c8c9d4 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -34,6 +34,7 @@
 #define QEMU_VM_SECTION_FULL         0x04
 #define QEMU_VM_SUBSECTION           0x05
 #define QEMU_VM_VMDESCRIPTION        0x06
+#define QEMU_VM_SECTION_FOOTER       0x7e
 
 struct MigrationParams {
     bool blk;
@@ -42,6 +43,20 @@ struct MigrationParams {
 
 typedef struct MigrationState MigrationState;
 
+typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
+
+/* State for the incoming migration */
+struct MigrationIncomingState {
+    QEMUFile *file;
+
+    /* See savevm.c */
+    LoadStateEntry_Head loadvm_handlers;
+};
+
+MigrationIncomingState *migration_incoming_get_current(void);
+MigrationIncomingState *migration_incoming_state_new(QEMUFile *f);
+void migration_incoming_state_destroy(void);
+
 struct MigrationState
 {
     int64_t bandwidth_limit;
@@ -180,4 +195,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
                              ram_addr_t offset, size_t size,
                              uint64_t *bytes_sent);
 
+void ram_mig_init(void);
+void savevm_skip_section_footers(void);
 #endif
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index a01c5b817e..4f67d79227 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -157,7 +157,7 @@ static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
 void qemu_put_be16(QEMUFile *f, unsigned int v);
 void qemu_put_be32(QEMUFile *f, unsigned int v);
 void qemu_put_be64(QEMUFile *f, uint64_t v);
-int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset);
+int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset);
 int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
 ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
                                   int level);
@@ -312,4 +312,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
 {
     qemu_get_be64s(f, (uint64_t *)pv);
 }
+
+size_t qemu_get_counted_string(QEMUFile *f, char buf[256]);
+
 #endif
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index bc7616aaa8..7153b1e145 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -120,11 +120,6 @@ typedef struct {
     bool (*field_exists)(void *opaque, int version_id);
 } VMStateField;
 
-typedef struct VMStateSubsection {
-    const VMStateDescription *vmsd;
-    bool (*needed)(void *opaque);
-} VMStateSubsection;
-
 struct VMStateDescription {
     const char *name;
     int unmigratable;
@@ -135,8 +130,9 @@ struct VMStateDescription {
     int (*pre_load)(void *opaque);
     int (*post_load)(void *opaque, int version_id);
     void (*pre_save)(void *opaque);
+    bool (*needed)(void *opaque);
     VMStateField *fields;
-    const VMStateSubsection *subsections;
+    const VMStateDescription **subsections;
 };
 
 extern const VMStateDescription vmstate_dummy;
@@ -812,6 +808,8 @@ extern const VMStateInfo vmstate_info_bitmap;
 
 #define SELF_ANNOUNCE_ROUNDS 5
 
+void loadvm_free_handlers(MigrationIncomingState *mis);
+
 int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
                        void *opaque, int version_id);
 void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index cde3314896..6fdcbcd524 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -31,6 +31,7 @@ typedef struct I2CBus I2CBus;
 typedef struct I2SCodec I2SCodec;
 typedef struct ISABus ISABus;
 typedef struct ISADevice ISADevice;
+typedef struct LoadStateEntry LoadStateEntry;
 typedef struct MACAddr MACAddr;
 typedef struct MachineClass MachineClass;
 typedef struct MachineState MachineState;
@@ -38,6 +39,7 @@ typedef struct MemoryListener MemoryListener;
 typedef struct MemoryMappingList MemoryMappingList;
 typedef struct MemoryRegion MemoryRegion;
 typedef struct MemoryRegionSection MemoryRegionSection;
+typedef struct MigrationIncomingState MigrationIncomingState;
 typedef struct MigrationParams MigrationParams;
 typedef struct Monitor Monitor;
 typedef struct MouseTransformInfo MouseTransformInfo;
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index 54b36c16c4..c38892fec6 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -30,7 +30,6 @@ extern const uint32_t arch_type;
 void select_soundhw(const char *optarg);
 void do_acpitable_option(const QemuOpts *opts);
 void do_smbios_option(QemuOpts *opts);
-void ram_mig_init(void);
 void cpudef_init(void);
 void audio_init(void);
 int kvm_available(void);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 853d90a317..ef793f702e 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -84,6 +84,7 @@ void qemu_announce_self(void);
 bool qemu_savevm_state_blocked(Error **errp);
 void qemu_savevm_state_begin(QEMUFile *f,
                              const MigrationParams *params);
+void qemu_savevm_state_header(QEMUFile *f);
 int qemu_savevm_state_iterate(QEMUFile *f);
 void qemu_savevm_state_complete(QEMUFile *f);
 void qemu_savevm_state_cancel(void);
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 0ba97062b7..b71e866973 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1075,6 +1075,35 @@ static inline void elf_core_copy_regs(target_elf_gregset_t *regs,
 #define USE_ELF_CORE_DUMP
 #define ELF_EXEC_PAGESIZE        4096
 
+enum {
+    SH_CPU_HAS_FPU            = 0x0001, /* Hardware FPU support */
+    SH_CPU_HAS_P2_FLUSH_BUG   = 0x0002, /* Need to flush the cache in P2 area */
+    SH_CPU_HAS_MMU_PAGE_ASSOC = 0x0004, /* SH3: TLB way selection bit support */
+    SH_CPU_HAS_DSP            = 0x0008, /* SH-DSP: DSP support */
+    SH_CPU_HAS_PERF_COUNTER   = 0x0010, /* Hardware performance counters */
+    SH_CPU_HAS_PTEA           = 0x0020, /* PTEA register */
+    SH_CPU_HAS_LLSC           = 0x0040, /* movli.l/movco.l */
+    SH_CPU_HAS_L2_CACHE       = 0x0080, /* Secondary cache / URAM */
+    SH_CPU_HAS_OP32           = 0x0100, /* 32-bit instruction support */
+    SH_CPU_HAS_PTEAEX         = 0x0200, /* PTE ASID Extension support */
+};
+
+#define ELF_HWCAP get_elf_hwcap()
+
+static uint32_t get_elf_hwcap(void)
+{
+    SuperHCPU *cpu = SUPERH_CPU(thread_cpu);
+    uint32_t hwcap = 0;
+
+    hwcap |= SH_CPU_HAS_FPU;
+
+    if (cpu->env.features & SH_FEATURE_SH4A) {
+        hwcap |= SH_CPU_HAS_LLSC;
+    }
+
+    return hwcap;
+}
+
 #endif
 
 #ifdef TARGET_CRIS
diff --git a/linux-user/main.c b/linux-user/main.c
index 6989b82455..a0d3e58bd4 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3925,6 +3925,8 @@ int main(int argc, char **argv, char **envp)
 # else
         cpu_model = "750";
 # endif
+#elif defined TARGET_SH4
+        cpu_model = TYPE_SH7785_CPU;
 #else
         cpu_model = "any";
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 732d229708..b04b4571a8 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -53,6 +53,7 @@ static bool deferred_incoming;
    migrations at once.  For now we don't need to add
    dynamic creation of migration */
 
+/* For outgoing */
 MigrationState *migrate_get_current(void)
 {
     static MigrationState current_migration = {
@@ -71,6 +72,30 @@ MigrationState *migrate_get_current(void)
     return &current_migration;
 }
 
+/* For incoming */
+static MigrationIncomingState *mis_current;
+
+MigrationIncomingState *migration_incoming_get_current(void)
+{
+    return mis_current;
+}
+
+MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
+{
+    mis_current = g_malloc0(sizeof(MigrationIncomingState));
+    mis_current->file = f;
+    QLIST_INIT(&mis_current->loadvm_handlers);
+
+    return mis_current;
+}
+
+void migration_incoming_state_destroy(void)
+{
+    loadvm_free_handlers(mis_current);
+    g_free(mis_current);
+    mis_current = NULL;
+}
+
 /*
  * Called on -incoming with a defer: uri.
  * The migration can be started later after any parameters have been
@@ -115,9 +140,14 @@ static void process_incoming_migration_co(void *opaque)
     Error *local_err = NULL;
     int ret;
 
+    migration_incoming_state_new(f);
+
     ret = qemu_loadvm_state(f);
+
     qemu_fclose(f);
     free_xbzrle_decoded_buf();
+    migration_incoming_state_destroy();
+
     if (ret < 0) {
         error_report("load of migration failed: %s", strerror(-ret));
         migrate_decompress_threads_join();
@@ -738,6 +768,7 @@ static void *migration_thread(void *opaque)
     int64_t start_time = initial_time;
     bool old_vm_running = false;
 
+    qemu_savevm_state_header(s->file);
     qemu_savevm_state_begin(s->file, &s->params);
 
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
@@ -838,9 +869,6 @@ static void *migration_thread(void *opaque)
 
 void migrate_fd_connect(MigrationState *s)
 {
-    s->state = MIGRATION_STATUS_SETUP;
-    trace_migrate_set_state(MIGRATION_STATUS_SETUP);
-
     /* This is a best 1st approximation. ns to ms */
     s->expected_downtime = max_downtime/1000000;
     s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 2750365a7e..965a757772 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -349,14 +349,14 @@ void qemu_file_skip(QEMUFile *f, int size)
 }
 
 /*
- * Read 'size' bytes from file (at 'offset') into buf without moving the
- * pointer.
+ * Read 'size' bytes from file (at 'offset') without moving the
+ * pointer and set 'buf' to point to that data.
  *
  * It will return size bytes unless there was an error, in which case it will
  * return as many as it managed to read (assuming blocking fd's which
  * all current QEMUFile are)
  */
-int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
+int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset)
 {
     int pending;
     int index;
@@ -392,7 +392,7 @@ int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
         size = pending;
     }
 
-    memcpy(buf, f->buf + index, size);
+    *buf = f->buf + index;
     return size;
 }
 
@@ -411,11 +411,13 @@ int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size)
 
     while (pending > 0) {
         int res;
+        uint8_t *src;
 
-        res = qemu_peek_buffer(f, buf, MIN(pending, IO_BUF_SIZE), 0);
+        res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
         if (res == 0) {
             return done;
         }
+        memcpy(buf, src, res);
         qemu_file_skip(f, res);
         buf += res;
         pending -= res;
@@ -585,3 +587,20 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src)
     }
     return len;
 }
+
+/*
+ * Get a string whose length is determined by a single preceding byte
+ * A preallocated 256 byte buffer must be passed in.
+ * Returns: len on success and a 0 terminated string in the buffer
+ *          else 0
+ *          (Note a 0 length string will return 0 either way)
+ */
+size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
+{
+    size_t len = qemu_get_byte(f);
+    size_t res = qemu_get_buffer(f, (uint8_t *)buf, len);
+
+    buf[res] = 0;
+
+    return res == len ? res : 0;
+}
diff --git a/migration/ram.c b/migration/ram.c
new file mode 100644
index 0000000000..57368e1575
--- /dev/null
+++ b/migration/ram.c
@@ -0,0 +1,1628 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2011-2015 Red Hat Inc
+ *
+ * Authors:
+ *  Juan Quintela <quintela@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <stdint.h>
+#include <zlib.h>
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+#include "qemu/timer.h"
+#include "qemu/main-loop.h"
+#include "migration/migration.h"
+#include "exec/address-spaces.h"
+#include "migration/page_cache.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "exec/ram_addr.h"
+#include "qemu/rcu_queue.h"
+
+#ifdef DEBUG_MIGRATION_RAM
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+static bool mig_throttle_on;
+static int dirty_rate_high_cnt;
+static void check_guest_throttling(void);
+
+static uint64_t bitmap_sync_count;
+
+/***********************************************************/
+/* ram save/restore */
+
+#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
+#define RAM_SAVE_FLAG_COMPRESS 0x02
+#define RAM_SAVE_FLAG_MEM_SIZE 0x04
+#define RAM_SAVE_FLAG_PAGE     0x08
+#define RAM_SAVE_FLAG_EOS      0x10
+#define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBZRLE   0x40
+/* 0x80 is reserved in migration.h start with 0x100 next */
+#define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
+
+static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
+
+static inline bool is_zero_range(uint8_t *p, uint64_t size)
+{
+    return buffer_find_nonzero_offset(p, size) == size;
+}
+
+/* struct contains XBZRLE cache and a static page
+   used by the compression */
+static struct {
+    /* buffer used for XBZRLE encoding */
+    uint8_t *encoded_buf;
+    /* buffer for storing page content */
+    uint8_t *current_buf;
+    /* Cache for XBZRLE, Protected by lock. */
+    PageCache *cache;
+    QemuMutex lock;
+} XBZRLE;
+
+/* buffer used for XBZRLE decoding */
+static uint8_t *xbzrle_decoded_buf;
+
+static void XBZRLE_cache_lock(void)
+{
+    if (migrate_use_xbzrle())
+        qemu_mutex_lock(&XBZRLE.lock);
+}
+
+static void XBZRLE_cache_unlock(void)
+{
+    if (migrate_use_xbzrle())
+        qemu_mutex_unlock(&XBZRLE.lock);
+}
+
+/*
+ * called from qmp_migrate_set_cache_size in main thread, possibly while
+ * a migration is in progress.
+ * A running migration maybe using the cache and might finish during this
+ * call, hence changes to the cache are protected by XBZRLE.lock().
+ */
+int64_t xbzrle_cache_resize(int64_t new_size)
+{
+    PageCache *new_cache;
+    int64_t ret;
+
+    if (new_size < TARGET_PAGE_SIZE) {
+        return -1;
+    }
+
+    XBZRLE_cache_lock();
+
+    if (XBZRLE.cache != NULL) {
+        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
+            goto out_new_size;
+        }
+        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
+                                        TARGET_PAGE_SIZE);
+        if (!new_cache) {
+            error_report("Error creating cache");
+            ret = -1;
+            goto out;
+        }
+
+        cache_fini(XBZRLE.cache);
+        XBZRLE.cache = new_cache;
+    }
+
+out_new_size:
+    ret = pow2floor(new_size);
+out:
+    XBZRLE_cache_unlock();
+    return ret;
+}
+
+/* accounting for migration statistics */
+typedef struct AccountingInfo {
+    uint64_t dup_pages;
+    uint64_t skipped_pages;
+    uint64_t norm_pages;
+    uint64_t iterations;
+    uint64_t xbzrle_bytes;
+    uint64_t xbzrle_pages;
+    uint64_t xbzrle_cache_miss;
+    double xbzrle_cache_miss_rate;
+    uint64_t xbzrle_overflows;
+} AccountingInfo;
+
+static AccountingInfo acct_info;
+
+static void acct_clear(void)
+{
+    memset(&acct_info, 0, sizeof(acct_info));
+}
+
+uint64_t dup_mig_bytes_transferred(void)
+{
+    return acct_info.dup_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t dup_mig_pages_transferred(void)
+{
+    return acct_info.dup_pages;
+}
+
+uint64_t skipped_mig_bytes_transferred(void)
+{
+    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t skipped_mig_pages_transferred(void)
+{
+    return acct_info.skipped_pages;
+}
+
+uint64_t norm_mig_bytes_transferred(void)
+{
+    return acct_info.norm_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t norm_mig_pages_transferred(void)
+{
+    return acct_info.norm_pages;
+}
+
+uint64_t xbzrle_mig_bytes_transferred(void)
+{
+    return acct_info.xbzrle_bytes;
+}
+
+uint64_t xbzrle_mig_pages_transferred(void)
+{
+    return acct_info.xbzrle_pages;
+}
+
+uint64_t xbzrle_mig_pages_cache_miss(void)
+{
+    return acct_info.xbzrle_cache_miss;
+}
+
+double xbzrle_mig_cache_miss_rate(void)
+{
+    return acct_info.xbzrle_cache_miss_rate;
+}
+
+uint64_t xbzrle_mig_pages_overflow(void)
+{
+    return acct_info.xbzrle_overflows;
+}
+
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
+struct CompressParam {
+    bool start;
+    bool done;
+    QEMUFile *file;
+    QemuMutex mutex;
+    QemuCond cond;
+    RAMBlock *block;
+    ram_addr_t offset;
+};
+typedef struct CompressParam CompressParam;
+
+struct DecompressParam {
+    bool start;
+    QemuMutex mutex;
+    QemuCond cond;
+    void *des;
+    uint8 *compbuf;
+    int len;
+};
+typedef struct DecompressParam DecompressParam;
+
+static CompressParam *comp_param;
+static QemuThread *compress_threads;
+/* comp_done_cond is used to wake up the migration thread when
+ * one of the compression threads has finished the compression.
+ * comp_done_lock is used to co-work with comp_done_cond.
+ */
+static QemuMutex *comp_done_lock;
+static QemuCond *comp_done_cond;
+/* The empty QEMUFileOps will be used by file in CompressParam */
+static const QEMUFileOps empty_ops = { };
+
+static bool compression_switch;
+static bool quit_comp_thread;
+static bool quit_decomp_thread;
+static DecompressParam *decomp_param;
+static QemuThread *decompress_threads;
+static uint8_t *compressed_data_buf;
+
+static int do_compress_ram_page(CompressParam *param);
+
+static void *do_data_compress(void *opaque)
+{
+    CompressParam *param = opaque;
+
+    while (!quit_comp_thread) {
+        qemu_mutex_lock(&param->mutex);
+        /* Re-check the quit_comp_thread in case of
+         * terminate_compression_threads is called just before
+         * qemu_mutex_lock(&param->mutex) and after
+         * while(!quit_comp_thread), re-check it here can make
+         * sure the compression thread terminate as expected.
+         */
+        while (!param->start && !quit_comp_thread) {
+            qemu_cond_wait(&param->cond, &param->mutex);
+        }
+        if (!quit_comp_thread) {
+            do_compress_ram_page(param);
+        }
+        param->start = false;
+        qemu_mutex_unlock(&param->mutex);
+
+        qemu_mutex_lock(comp_done_lock);
+        param->done = true;
+        qemu_cond_signal(comp_done_cond);
+        qemu_mutex_unlock(comp_done_lock);
+    }
+
+    return NULL;
+}
+
+static inline void terminate_compression_threads(void)
+{
+    int idx, thread_count;
+
+    thread_count = migrate_compress_threads();
+    quit_comp_thread = true;
+    for (idx = 0; idx < thread_count; idx++) {
+        qemu_mutex_lock(&comp_param[idx].mutex);
+        qemu_cond_signal(&comp_param[idx].cond);
+        qemu_mutex_unlock(&comp_param[idx].mutex);
+    }
+}
+
+void migrate_compress_threads_join(void)
+{
+    int i, thread_count;
+
+    if (!migrate_use_compression()) {
+        return;
+    }
+    terminate_compression_threads();
+    thread_count = migrate_compress_threads();
+    for (i = 0; i < thread_count; i++) {
+        qemu_thread_join(compress_threads + i);
+        qemu_fclose(comp_param[i].file);
+        qemu_mutex_destroy(&comp_param[i].mutex);
+        qemu_cond_destroy(&comp_param[i].cond);
+    }
+    qemu_mutex_destroy(comp_done_lock);
+    qemu_cond_destroy(comp_done_cond);
+    g_free(compress_threads);
+    g_free(comp_param);
+    g_free(comp_done_cond);
+    g_free(comp_done_lock);
+    compress_threads = NULL;
+    comp_param = NULL;
+    comp_done_cond = NULL;
+    comp_done_lock = NULL;
+}
+
+void migrate_compress_threads_create(void)
+{
+    int i, thread_count;
+
+    if (!migrate_use_compression()) {
+        return;
+    }
+    quit_comp_thread = false;
+    compression_switch = true;
+    thread_count = migrate_compress_threads();
+    compress_threads = g_new0(QemuThread, thread_count);
+    comp_param = g_new0(CompressParam, thread_count);
+    comp_done_cond = g_new0(QemuCond, 1);
+    comp_done_lock = g_new0(QemuMutex, 1);
+    qemu_cond_init(comp_done_cond);
+    qemu_mutex_init(comp_done_lock);
+    for (i = 0; i < thread_count; i++) {
+        /* com_param[i].file is just used as a dummy buffer to save data, set
+         * it's ops to empty.
+         */
+        comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
+        comp_param[i].done = true;
+        qemu_mutex_init(&comp_param[i].mutex);
+        qemu_cond_init(&comp_param[i].cond);
+        qemu_thread_create(compress_threads + i, "compress",
+                           do_data_compress, comp_param + i,
+                           QEMU_THREAD_JOINABLE);
+    }
+}
+
+/**
+ * save_page_header: Write page header to wire
+ *
+ * If this is the 1st block, it also writes the block identification
+ *
+ * Returns: Number of bytes written
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ *          in the lower bits, it contains flags
+ */
+static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
+{
+    size_t size;
+
+    qemu_put_be64(f, offset);
+    size = 8;
+
+    if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
+        qemu_put_byte(f, strlen(block->idstr));
+        qemu_put_buffer(f, (uint8_t *)block->idstr,
+                        strlen(block->idstr));
+        size += 1 + strlen(block->idstr);
+    }
+    return size;
+}
+
+/* Update the xbzrle cache to reflect a page that's been sent as all 0.
+ * The important thing is that a stale (not-yet-0'd) page be replaced
+ * by the new data.
+ * As a bonus, if the page wasn't in the cache it gets added so that
+ * when a small write is made into the 0'd page it gets XBZRLE sent
+ */
+static void xbzrle_cache_zero_page(ram_addr_t current_addr)
+{
+    if (ram_bulk_stage || !migrate_use_xbzrle()) {
+        return;
+    }
+
+    /* We don't care if this fails to allocate a new cache page
+     * as long as it updated an old one */
+    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
+                 bitmap_sync_count);
+}
+
+#define ENCODING_FLAG_XBZRLE 0x1
+
+/**
+ * save_xbzrle_page: compress and send current page
+ *
+ * Returns: 1 means that we wrote the page
+ *          0 means that page is identical to the one already sent
+ *          -1 means that xbzrle would be longer than normal
+ *
+ * @f: QEMUFile where to send the data
+ * @current_data:
+ * @current_addr:
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
+                            ram_addr_t current_addr, RAMBlock *block,
+                            ram_addr_t offset, bool last_stage,
+                            uint64_t *bytes_transferred)
+{
+    int encoded_len = 0, bytes_xbzrle;
+    uint8_t *prev_cached_page;
+
+    if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
+        acct_info.xbzrle_cache_miss++;
+        if (!last_stage) {
+            if (cache_insert(XBZRLE.cache, current_addr, *current_data,
+                             bitmap_sync_count) == -1) {
+                return -1;
+            } else {
+                /* update *current_data when the page has been
+                   inserted into cache */
+                *current_data = get_cached_data(XBZRLE.cache, current_addr);
+            }
+        }
+        return -1;
+    }
+
+    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
+
+    /* save current buffer into memory */
+    memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
+
+    /* XBZRLE encoding (if there is no overflow) */
+    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
+                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+                                       TARGET_PAGE_SIZE);
+    if (encoded_len == 0) {
+        DPRINTF("Skipping unmodified page\n");
+        return 0;
+    } else if (encoded_len == -1) {
+        DPRINTF("Overflow\n");
+        acct_info.xbzrle_overflows++;
+        /* update data in the cache */
+        if (!last_stage) {
+            memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
+            *current_data = prev_cached_page;
+        }
+        return -1;
+    }
+
+    /* we need to update the data in the cache, in order to get the same data */
+    if (!last_stage) {
+        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
+    }
+
+    /* Send XBZRLE based compressed page */
+    bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
+    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
+    qemu_put_be16(f, encoded_len);
+    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
+    bytes_xbzrle += encoded_len + 1 + 2;
+    acct_info.xbzrle_pages++;
+    acct_info.xbzrle_bytes += bytes_xbzrle;
+    *bytes_transferred += bytes_xbzrle;
+
+    return 1;
+}
+
+static inline
+ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
+                                                 ram_addr_t start)
+{
+    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
+    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
+    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
+    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
+
+    unsigned long next;
+
+    if (ram_bulk_stage && nr > base) {
+        next = nr + 1;
+    } else {
+        next = find_next_bit(migration_bitmap, size, nr);
+    }
+
+    if (next < size) {
+        clear_bit(next, migration_bitmap);
+        migration_dirty_pages--;
+    }
+    return (next - base) << TARGET_PAGE_BITS;
+}
+
+static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
+{
+    migration_dirty_pages +=
+        cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
+}
+
+
+/* Fix me: there are too many global variables used in migration process. */
+static int64_t start_time;
+static int64_t bytes_xfer_prev;
+static int64_t num_dirty_pages_period;
+static uint64_t xbzrle_cache_miss_prev;
+static uint64_t iterations_prev;
+
+static void migration_bitmap_sync_init(void)
+{
+    start_time = 0;
+    bytes_xfer_prev = 0;
+    num_dirty_pages_period = 0;
+    xbzrle_cache_miss_prev = 0;
+    iterations_prev = 0;
+}
+
+/* Called with iothread lock held, to protect ram_list.dirty_memory[] */
+static void migration_bitmap_sync(void)
+{
+    RAMBlock *block;
+    uint64_t num_dirty_pages_init = migration_dirty_pages;
+    MigrationState *s = migrate_get_current();
+    int64_t end_time;
+    int64_t bytes_xfer_now;
+
+    bitmap_sync_count++;
+
+    if (!bytes_xfer_prev) {
+        bytes_xfer_prev = ram_bytes_transferred();
+    }
+
+    if (!start_time) {
+        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    }
+
+    trace_migration_bitmap_sync_start();
+    address_space_sync_dirty_bitmap(&address_space_memory);
+
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
+    }
+    rcu_read_unlock();
+
+    trace_migration_bitmap_sync_end(migration_dirty_pages
+                                    - num_dirty_pages_init);
+    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
+    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+    /* more than 1 second = 1000 millisecons */
+    if (end_time > start_time + 1000) {
+        if (migrate_auto_converge()) {
+            /* The following detection logic can be refined later. For now:
+               Check to see if the dirtied bytes is 50% more than the approx.
+               amount of bytes that just got transferred since the last time we
+               were in this routine. If that happens >N times (for now N==4)
+               we turn on the throttle down logic */
+            bytes_xfer_now = ram_bytes_transferred();
+            if (s->dirty_pages_rate &&
+               (num_dirty_pages_period * TARGET_PAGE_SIZE >
+                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
+               (dirty_rate_high_cnt++ > 4)) {
+                    trace_migration_throttle();
+                    mig_throttle_on = true;
+                    dirty_rate_high_cnt = 0;
+             }
+             bytes_xfer_prev = bytes_xfer_now;
+        } else {
+             mig_throttle_on = false;
+        }
+        if (migrate_use_xbzrle()) {
+            if (iterations_prev != acct_info.iterations) {
+                acct_info.xbzrle_cache_miss_rate =
+                   (double)(acct_info.xbzrle_cache_miss -
+                            xbzrle_cache_miss_prev) /
+                   (acct_info.iterations - iterations_prev);
+            }
+            iterations_prev = acct_info.iterations;
+            xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
+        }
+        s->dirty_pages_rate = num_dirty_pages_period * 1000
+            / (end_time - start_time);
+        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
+        start_time = end_time;
+        num_dirty_pages_period = 0;
+    }
+    s->dirty_sync_count = bitmap_sync_count;
+}
+
+/**
+ * save_zero_page: Send the zero page to the stream
+ *
+ * Returns: Number of pages written.
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @p: pointer to the page
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+                          uint8_t *p, uint64_t *bytes_transferred)
+{
+    int pages = -1;
+
+    if (is_zero_range(p, TARGET_PAGE_SIZE)) {
+        acct_info.dup_pages++;
+        *bytes_transferred += save_page_header(f, block,
+                                               offset | RAM_SAVE_FLAG_COMPRESS);
+        qemu_put_byte(f, 0);
+        *bytes_transferred += 1;
+        pages = 1;
+    }
+
+    return pages;
+}
+
+/**
+ * ram_save_page: Send the given page to the stream
+ *
+ * Returns: Number of pages written.
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
+                         bool last_stage, uint64_t *bytes_transferred)
+{
+    int pages = -1;
+    uint64_t bytes_xmit;
+    ram_addr_t current_addr;
+    MemoryRegion *mr = block->mr;
+    uint8_t *p;
+    int ret;
+    bool send_async = true;
+
+    p = memory_region_get_ram_ptr(mr) + offset;
+
+    /* In doubt sent page as normal */
+    bytes_xmit = 0;
+    ret = ram_control_save_page(f, block->offset,
+                           offset, TARGET_PAGE_SIZE, &bytes_xmit);
+    if (bytes_xmit) {
+        *bytes_transferred += bytes_xmit;
+        pages = 1;
+    }
+
+    XBZRLE_cache_lock();
+
+    current_addr = block->offset + offset;
+
+    if (block == last_sent_block) {
+        offset |= RAM_SAVE_FLAG_CONTINUE;
+    }
+    if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
+        if (ret != RAM_SAVE_CONTROL_DELAYED) {
+            if (bytes_xmit > 0) {
+                acct_info.norm_pages++;
+            } else if (bytes_xmit == 0) {
+                acct_info.dup_pages++;
+            }
+        }
+    } else {
+        pages = save_zero_page(f, block, offset, p, bytes_transferred);
+        if (pages > 0) {
+            /* Must let xbzrle know, otherwise a previous (now 0'd) cached
+             * page would be stale
+             */
+            xbzrle_cache_zero_page(current_addr);
+        } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
+            pages = save_xbzrle_page(f, &p, current_addr, block,
+                                     offset, last_stage, bytes_transferred);
+            if (!last_stage) {
+                /* Can't send this cached data async, since the cache page
+                 * might get updated before it gets to the wire
+                 */
+                send_async = false;
+            }
+        }
+    }
+
+    /* XBZRLE overflow or normal page */
+    if (pages == -1) {
+        *bytes_transferred += save_page_header(f, block,
+                                               offset | RAM_SAVE_FLAG_PAGE);
+        if (send_async) {
+            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+        } else {
+            qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+        }
+        *bytes_transferred += TARGET_PAGE_SIZE;
+        pages = 1;
+        acct_info.norm_pages++;
+    }
+
+    XBZRLE_cache_unlock();
+
+    return pages;
+}
+
+static int do_compress_ram_page(CompressParam *param)
+{
+    int bytes_sent, blen;
+    uint8_t *p;
+    RAMBlock *block = param->block;
+    ram_addr_t offset = param->offset;
+
+    p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK);
+
+    bytes_sent = save_page_header(param->file, block, offset |
+                                  RAM_SAVE_FLAG_COMPRESS_PAGE);
+    blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE,
+                                     migrate_compress_level());
+    bytes_sent += blen;
+
+    return bytes_sent;
+}
+
+static inline void start_compression(CompressParam *param)
+{
+    param->done = false;
+    qemu_mutex_lock(&param->mutex);
+    param->start = true;
+    qemu_cond_signal(&param->cond);
+    qemu_mutex_unlock(&param->mutex);
+}
+
+static inline void start_decompression(DecompressParam *param)
+{
+    qemu_mutex_lock(&param->mutex);
+    param->start = true;
+    qemu_cond_signal(&param->cond);
+    qemu_mutex_unlock(&param->mutex);
+}
+
+static uint64_t bytes_transferred;
+
+static void flush_compressed_data(QEMUFile *f)
+{
+    int idx, len, thread_count;
+
+    if (!migrate_use_compression()) {
+        return;
+    }
+    thread_count = migrate_compress_threads();
+    for (idx = 0; idx < thread_count; idx++) {
+        if (!comp_param[idx].done) {
+            qemu_mutex_lock(comp_done_lock);
+            while (!comp_param[idx].done && !quit_comp_thread) {
+                qemu_cond_wait(comp_done_cond, comp_done_lock);
+            }
+            qemu_mutex_unlock(comp_done_lock);
+        }
+        if (!quit_comp_thread) {
+            len = qemu_put_qemu_file(f, comp_param[idx].file);
+            bytes_transferred += len;
+        }
+    }
+}
+
+static inline void set_compress_params(CompressParam *param, RAMBlock *block,
+                                       ram_addr_t offset)
+{
+    param->block = block;
+    param->offset = offset;
+}
+
+static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
+                                           ram_addr_t offset,
+                                           uint64_t *bytes_transferred)
+{
+    int idx, thread_count, bytes_xmit = -1, pages = -1;
+
+    thread_count = migrate_compress_threads();
+    qemu_mutex_lock(comp_done_lock);
+    while (true) {
+        for (idx = 0; idx < thread_count; idx++) {
+            if (comp_param[idx].done) {
+                bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
+                set_compress_params(&comp_param[idx], block, offset);
+                start_compression(&comp_param[idx]);
+                pages = 1;
+                acct_info.norm_pages++;
+                *bytes_transferred += bytes_xmit;
+                break;
+            }
+        }
+        if (pages > 0) {
+            break;
+        } else {
+            qemu_cond_wait(comp_done_cond, comp_done_lock);
+        }
+    }
+    qemu_mutex_unlock(comp_done_lock);
+
+    return pages;
+}
+
+/**
+ * ram_save_compressed_page: compress the given page and send it to the stream
+ *
+ * Returns: Number of pages written.
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
+                                    ram_addr_t offset, bool last_stage,
+                                    uint64_t *bytes_transferred)
+{
+    int pages = -1;
+    uint64_t bytes_xmit;
+    MemoryRegion *mr = block->mr;
+    uint8_t *p;
+    int ret;
+
+    p = memory_region_get_ram_ptr(mr) + offset;
+
+    bytes_xmit = 0;
+    ret = ram_control_save_page(f, block->offset,
+                                offset, TARGET_PAGE_SIZE, &bytes_xmit);
+    if (bytes_xmit) {
+        *bytes_transferred += bytes_xmit;
+        pages = 1;
+    }
+    if (block == last_sent_block) {
+        offset |= RAM_SAVE_FLAG_CONTINUE;
+    }
+    if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
+        if (ret != RAM_SAVE_CONTROL_DELAYED) {
+            if (bytes_xmit > 0) {
+                acct_info.norm_pages++;
+            } else if (bytes_xmit == 0) {
+                acct_info.dup_pages++;
+            }
+        }
+    } else {
+        /* When starting the process of a new block, the first page of
+         * the block should be sent out before other pages in the same
+         * block, and all the pages in last block should have been sent
+         * out, keeping this order is important, because the 'cont' flag
+         * is used to avoid resending the block name.
+         */
+        if (block != last_sent_block) {
+            flush_compressed_data(f);
+            pages = save_zero_page(f, block, offset, p, bytes_transferred);
+            if (pages == -1) {
+                set_compress_params(&comp_param[0], block, offset);
+                /* Use the qemu thread to compress the data to make sure the
+                 * first page is sent out before other pages
+                 */
+                bytes_xmit = do_compress_ram_page(&comp_param[0]);
+                acct_info.norm_pages++;
+                qemu_put_qemu_file(f, comp_param[0].file);
+                *bytes_transferred += bytes_xmit;
+                pages = 1;
+            }
+        } else {
+            pages = save_zero_page(f, block, offset, p, bytes_transferred);
+            if (pages == -1) {
+                pages = compress_page_with_multi_thread(f, block, offset,
+                                                        bytes_transferred);
+            }
+        }
+    }
+
+    return pages;
+}
+
+/**
+ * ram_find_and_save_block: Finds a dirty page and sends it to f
+ *
+ * Called within an RCU critical section.
+ *
+ * Returns:  The number of pages written
+ *           0 means no dirty pages
+ *
+ * @f: QEMUFile where to send the data
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+
+static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
+                                   uint64_t *bytes_transferred)
+{
+    RAMBlock *block = last_seen_block;
+    ram_addr_t offset = last_offset;
+    bool complete_round = false;
+    int pages = 0;
+    MemoryRegion *mr;
+
+    if (!block)
+        block = QLIST_FIRST_RCU(&ram_list.blocks);
+
+    while (true) {
+        mr = block->mr;
+        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
+        if (complete_round && block == last_seen_block &&
+            offset >= last_offset) {
+            break;
+        }
+        if (offset >= block->used_length) {
+            offset = 0;
+            block = QLIST_NEXT_RCU(block, next);
+            if (!block) {
+                block = QLIST_FIRST_RCU(&ram_list.blocks);
+                complete_round = true;
+                ram_bulk_stage = false;
+                if (migrate_use_xbzrle()) {
+                    /* If xbzrle is on, stop using the data compression at this
+                     * point. In theory, xbzrle can do better than compression.
+                     */
+                    flush_compressed_data(f);
+                    compression_switch = false;
+                }
+            }
+        } else {
+            if (compression_switch && migrate_use_compression()) {
+                pages = ram_save_compressed_page(f, block, offset, last_stage,
+                                                 bytes_transferred);
+            } else {
+                pages = ram_save_page(f, block, offset, last_stage,
+                                      bytes_transferred);
+            }
+
+            /* if page is unmodified, continue to the next */
+            if (pages > 0) {
+                last_sent_block = block;
+                break;
+            }
+        }
+    }
+
+    last_seen_block = block;
+    last_offset = offset;
+
+    return pages;
+}
+
+void acct_update_position(QEMUFile *f, size_t size, bool zero)
+{
+    uint64_t pages = size / TARGET_PAGE_SIZE;
+    if (zero) {
+        acct_info.dup_pages += pages;
+    } else {
+        acct_info.norm_pages += pages;
+        bytes_transferred += size;
+        qemu_update_position(f, size);
+    }
+}
+
+static ram_addr_t ram_save_remaining(void)
+{
+    return migration_dirty_pages;
+}
+
+uint64_t ram_bytes_remaining(void)
+{
+    return ram_save_remaining() * TARGET_PAGE_SIZE;
+}
+
+uint64_t ram_bytes_transferred(void)
+{
+    return bytes_transferred;
+}
+
+uint64_t ram_bytes_total(void)
+{
+    RAMBlock *block;
+    uint64_t total = 0;
+
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
+        total += block->used_length;
+    rcu_read_unlock();
+    return total;
+}
+
+void free_xbzrle_decoded_buf(void)
+{
+    g_free(xbzrle_decoded_buf);
+    xbzrle_decoded_buf = NULL;
+}
+
+static void migration_end(void)
+{
+    if (migration_bitmap) {
+        memory_global_dirty_log_stop();
+        g_free(migration_bitmap);
+        migration_bitmap = NULL;
+    }
+
+    XBZRLE_cache_lock();
+    if (XBZRLE.cache) {
+        cache_fini(XBZRLE.cache);
+        g_free(XBZRLE.encoded_buf);
+        g_free(XBZRLE.current_buf);
+        XBZRLE.cache = NULL;
+        XBZRLE.encoded_buf = NULL;
+        XBZRLE.current_buf = NULL;
+    }
+    XBZRLE_cache_unlock();
+}
+
+static void ram_migration_cancel(void *opaque)
+{
+    migration_end();
+}
+
+static void reset_ram_globals(void)
+{
+    last_seen_block = NULL;
+    last_sent_block = NULL;
+    last_offset = 0;
+    last_version = ram_list.version;
+    ram_bulk_stage = true;
+}
+
+#define MAX_WAIT 50 /* ms, half buffered_file limit */
+
+
+/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
+ * long-running RCU critical section.  When rcu-reclaims in the code
+ * start to become numerous it will be necessary to reduce the
+ * granularity of these critical sections.
+ */
+
+static int ram_save_setup(QEMUFile *f, void *opaque)
+{
+    RAMBlock *block;
+    int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
+
+    mig_throttle_on = false;
+    dirty_rate_high_cnt = 0;
+    bitmap_sync_count = 0;
+    migration_bitmap_sync_init();
+
+    if (migrate_use_xbzrle()) {
+        XBZRLE_cache_lock();
+        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
+                                  TARGET_PAGE_SIZE,
+                                  TARGET_PAGE_SIZE);
+        if (!XBZRLE.cache) {
+            XBZRLE_cache_unlock();
+            error_report("Error creating cache");
+            return -1;
+        }
+        XBZRLE_cache_unlock();
+
+        /* We prefer not to abort if there is no memory */
+        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
+        if (!XBZRLE.encoded_buf) {
+            error_report("Error allocating encoded_buf");
+            return -1;
+        }
+
+        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
+        if (!XBZRLE.current_buf) {
+            error_report("Error allocating current_buf");
+            g_free(XBZRLE.encoded_buf);
+            XBZRLE.encoded_buf = NULL;
+            return -1;
+        }
+
+        acct_clear();
+    }
+
+    /* iothread lock needed for ram_list.dirty_memory[] */
+    qemu_mutex_lock_iothread();
+    qemu_mutex_lock_ramlist();
+    rcu_read_lock();
+    bytes_transferred = 0;
+    reset_ram_globals();
+
+    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+    migration_bitmap = bitmap_new(ram_bitmap_pages);
+    bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
+
+    /*
+     * Count the total number of pages used by ram blocks not including any
+     * gaps due to alignment or unplugs.
+     */
+    migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+
+    memory_global_dirty_log_start();
+    migration_bitmap_sync();
+    qemu_mutex_unlock_ramlist();
+    qemu_mutex_unlock_iothread();
+
+    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        qemu_put_byte(f, strlen(block->idstr));
+        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
+        qemu_put_be64(f, block->used_length);
+    }
+
+    rcu_read_unlock();
+
+    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
+    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
+
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+    return 0;
+}
+
+static int ram_save_iterate(QEMUFile *f, void *opaque)
+{
+    int ret;
+    int i;
+    int64_t t0;
+    int pages_sent = 0;
+
+    rcu_read_lock();
+    if (ram_list.version != last_version) {
+        reset_ram_globals();
+    }
+
+    /* Read version before ram_list.blocks */
+    smp_rmb();
+
+    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
+
+    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    i = 0;
+    while ((ret = qemu_file_rate_limit(f)) == 0) {
+        int pages;
+
+        pages = ram_find_and_save_block(f, false, &bytes_transferred);
+        /* no more pages to sent */
+        if (pages == 0) {
+            break;
+        }
+        pages_sent += pages;
+        acct_info.iterations++;
+        check_guest_throttling();
+        /* we want to check in the 1st loop, just in case it was the 1st time
+           and we had to sync the dirty bitmap.
+           qemu_get_clock_ns() is a bit expensive, so we only check each some
+           iterations
+        */
+        if ((i & 63) == 0) {
+            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
+            if (t1 > MAX_WAIT) {
+                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
+                        t1, i);
+                break;
+            }
+        }
+        i++;
+    }
+    flush_compressed_data(f);
+    rcu_read_unlock();
+
+    /*
+     * Must occur before EOS (or any QEMUFile operation)
+     * because of RDMA protocol.
+     */
+    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
+
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    bytes_transferred += 8;
+
+    ret = qemu_file_get_error(f);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return pages_sent;
+}
+
+/* Called with iothread lock */
+static int ram_save_complete(QEMUFile *f, void *opaque)
+{
+    rcu_read_lock();
+
+    migration_bitmap_sync();
+
+    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
+
+    /* try transferring iterative blocks of memory */
+
+    /* flush all remaining blocks regardless of rate limiting */
+    while (true) {
+        int pages;
+
+        pages = ram_find_and_save_block(f, true, &bytes_transferred);
+        /* no more blocks to sent */
+        if (pages == 0) {
+            break;
+        }
+    }
+
+    flush_compressed_data(f);
+    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
+    migration_end();
+
+    rcu_read_unlock();
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+    return 0;
+}
+
+static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
+{
+    uint64_t remaining_size;
+
+    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+
+    if (remaining_size < max_size) {
+        qemu_mutex_lock_iothread();
+        rcu_read_lock();
+        migration_bitmap_sync();
+        rcu_read_unlock();
+        qemu_mutex_unlock_iothread();
+        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+    }
+    return remaining_size;
+}
+
+static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    unsigned int xh_len;
+    int xh_flags;
+
+    if (!xbzrle_decoded_buf) {
+        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
+    }
+
+    /* extract RLE header */
+    xh_flags = qemu_get_byte(f);
+    xh_len = qemu_get_be16(f);
+
+    if (xh_flags != ENCODING_FLAG_XBZRLE) {
+        error_report("Failed to load XBZRLE page - wrong compression!");
+        return -1;
+    }
+
+    if (xh_len > TARGET_PAGE_SIZE) {
+        error_report("Failed to load XBZRLE page - len overflow!");
+        return -1;
+    }
+    /* load data and decode */
+    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
+
+    /* decode RLE */
+    if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
+                             TARGET_PAGE_SIZE) == -1) {
+        error_report("Failed to load XBZRLE page - decode error!");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* Must be called from within a rcu critical section.
+ * Returns a pointer from within the RCU-protected ram_list.
+ */
+static inline void *host_from_stream_offset(QEMUFile *f,
+                                            ram_addr_t offset,
+                                            int flags)
+{
+    static RAMBlock *block = NULL;
+    char id[256];
+    uint8_t len;
+
+    if (flags & RAM_SAVE_FLAG_CONTINUE) {
+        if (!block || block->max_length <= offset) {
+            error_report("Ack, bad migration stream!");
+            return NULL;
+        }
+
+        return memory_region_get_ram_ptr(block->mr) + offset;
+    }
+
+    len = qemu_get_byte(f);
+    qemu_get_buffer(f, (uint8_t *)id, len);
+    id[len] = 0;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        if (!strncmp(id, block->idstr, sizeof(id)) &&
+            block->max_length > offset) {
+            return memory_region_get_ram_ptr(block->mr) + offset;
+        }
+    }
+
+    error_report("Can't find block %s!", id);
+    return NULL;
+}
+
+/*
+ * If a page (or a whole RDMA chunk) has been
+ * determined to be zero, then zap it.
+ */
+void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
+{
+    if (ch != 0 || !is_zero_range(host, size)) {
+        memset(host, ch, size);
+    }
+}
+
+static void *do_data_decompress(void *opaque)
+{
+    DecompressParam *param = opaque;
+    unsigned long pagesize;
+
+    while (!quit_decomp_thread) {
+        qemu_mutex_lock(&param->mutex);
+        while (!param->start && !quit_decomp_thread) {
+            qemu_cond_wait(&param->cond, &param->mutex);
+            pagesize = TARGET_PAGE_SIZE;
+            if (!quit_decomp_thread) {
+                /* uncompress() will return failed in some case, especially
+                 * when the page is dirted when doing the compression, it's
+                 * not a problem because the dirty page will be retransferred
+                 * and uncompress() won't break the data in other pages.
+                 */
+                uncompress((Bytef *)param->des, &pagesize,
+                           (const Bytef *)param->compbuf, param->len);
+            }
+            param->start = false;
+        }
+        qemu_mutex_unlock(&param->mutex);
+    }
+
+    return NULL;
+}
+
+void migrate_decompress_threads_create(void)
+{
+    int i, thread_count;
+
+    thread_count = migrate_decompress_threads();
+    decompress_threads = g_new0(QemuThread, thread_count);
+    decomp_param = g_new0(DecompressParam, thread_count);
+    compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
+    quit_decomp_thread = false;
+    for (i = 0; i < thread_count; i++) {
+        qemu_mutex_init(&decomp_param[i].mutex);
+        qemu_cond_init(&decomp_param[i].cond);
+        decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
+        qemu_thread_create(decompress_threads + i, "decompress",
+                           do_data_decompress, decomp_param + i,
+                           QEMU_THREAD_JOINABLE);
+    }
+}
+
+void migrate_decompress_threads_join(void)
+{
+    int i, thread_count;
+
+    quit_decomp_thread = true;
+    thread_count = migrate_decompress_threads();
+    for (i = 0; i < thread_count; i++) {
+        qemu_mutex_lock(&decomp_param[i].mutex);
+        qemu_cond_signal(&decomp_param[i].cond);
+        qemu_mutex_unlock(&decomp_param[i].mutex);
+    }
+    for (i = 0; i < thread_count; i++) {
+        qemu_thread_join(decompress_threads + i);
+        qemu_mutex_destroy(&decomp_param[i].mutex);
+        qemu_cond_destroy(&decomp_param[i].cond);
+        g_free(decomp_param[i].compbuf);
+    }
+    g_free(decompress_threads);
+    g_free(decomp_param);
+    g_free(compressed_data_buf);
+    decompress_threads = NULL;
+    decomp_param = NULL;
+    compressed_data_buf = NULL;
+}
+
+static void decompress_data_with_multi_threads(uint8_t *compbuf,
+                                               void *host, int len)
+{
+    int idx, thread_count;
+
+    thread_count = migrate_decompress_threads();
+    while (true) {
+        for (idx = 0; idx < thread_count; idx++) {
+            if (!decomp_param[idx].start) {
+                memcpy(decomp_param[idx].compbuf, compbuf, len);
+                decomp_param[idx].des = host;
+                decomp_param[idx].len = len;
+                start_decompression(&decomp_param[idx]);
+                break;
+            }
+        }
+        if (idx < thread_count) {
+            break;
+        }
+    }
+}
+
+static int ram_load(QEMUFile *f, void *opaque, int version_id)
+{
+    int flags = 0, ret = 0;
+    static uint64_t seq_iter;
+    int len = 0;
+
+    seq_iter++;
+
+    if (version_id != 4) {
+        ret = -EINVAL;
+    }
+
+    /* This RCU critical section can be very long running.
+     * When RCU reclaims in the code start to become numerous,
+     * it will be necessary to reduce the granularity of this
+     * critical section.
+     */
+    rcu_read_lock();
+    while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
+        ram_addr_t addr, total_ram_bytes;
+        void *host;
+        uint8_t ch;
+
+        addr = qemu_get_be64(f);
+        flags = addr & ~TARGET_PAGE_MASK;
+        addr &= TARGET_PAGE_MASK;
+
+        switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
+        case RAM_SAVE_FLAG_MEM_SIZE:
+            /* Synchronize RAM block list */
+            total_ram_bytes = addr;
+            while (!ret && total_ram_bytes) {
+                RAMBlock *block;
+                char id[256];
+                ram_addr_t length;
+
+                len = qemu_get_byte(f);
+                qemu_get_buffer(f, (uint8_t *)id, len);
+                id[len] = 0;
+                length = qemu_get_be64(f);
+
+                QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+                    if (!strncmp(id, block->idstr, sizeof(id))) {
+                        if (length != block->used_length) {
+                            Error *local_err = NULL;
+
+                            ret = qemu_ram_resize(block->offset, length, &local_err);
+                            if (local_err) {
+                                error_report_err(local_err);
+                            }
+                        }
+                        break;
+                    }
+                }
+
+                if (!block) {
+                    error_report("Unknown ramblock \"%s\", cannot "
+                                 "accept migration", id);
+                    ret = -EINVAL;
+                }
+
+                total_ram_bytes -= length;
+            }
+            break;
+        case RAM_SAVE_FLAG_COMPRESS:
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+            ch = qemu_get_byte(f);
+            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
+            break;
+        case RAM_SAVE_FLAG_PAGE:
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+            break;
+        case RAM_SAVE_FLAG_COMPRESS_PAGE:
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                error_report("Invalid RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+
+            len = qemu_get_be32(f);
+            if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
+                error_report("Invalid compressed data length: %d", len);
+                ret = -EINVAL;
+                break;
+            }
+            qemu_get_buffer(f, compressed_data_buf, len);
+            decompress_data_with_multi_threads(compressed_data_buf, host, len);
+            break;
+        case RAM_SAVE_FLAG_XBZRLE:
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+            if (load_xbzrle(f, addr, host) < 0) {
+                error_report("Failed to decompress XBZRLE page at "
+                             RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+            break;
+        case RAM_SAVE_FLAG_EOS:
+            /* normal exit */
+            break;
+        default:
+            if (flags & RAM_SAVE_FLAG_HOOK) {
+                ram_control_load_hook(f, flags);
+            } else {
+                error_report("Unknown combination of migration flags: %#x",
+                             flags);
+                ret = -EINVAL;
+            }
+        }
+        if (!ret) {
+            ret = qemu_file_get_error(f);
+        }
+    }
+
+    rcu_read_unlock();
+    DPRINTF("Completed load of VM with exit code %d seq iteration "
+            "%" PRIu64 "\n", ret, seq_iter);
+    return ret;
+}
+
+static SaveVMHandlers savevm_ram_handlers = {
+    .save_live_setup = ram_save_setup,
+    .save_live_iterate = ram_save_iterate,
+    .save_live_complete = ram_save_complete,
+    .save_live_pending = ram_save_pending,
+    .load_state = ram_load,
+    .cancel = ram_migration_cancel,
+};
+
+void ram_mig_init(void)
+{
+    qemu_mutex_init(&XBZRLE.lock);
+    register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
+}
+/* Stub function that's gets run on the vcpu when its brought out of the
+   VM to run inside qemu via async_run_on_cpu()*/
+
+static void mig_sleep_cpu(void *opq)
+{
+    qemu_mutex_unlock_iothread();
+    g_usleep(30*1000);
+    qemu_mutex_lock_iothread();
+}
+
+/* To reduce the dirty rate explicitly disallow the VCPUs from spending
+   much time in the VM. The migration thread will try to catchup.
+   Workload will experience a performance drop.
+*/
+static void mig_throttle_guest_down(void)
+{
+    CPUState *cpu;
+
+    qemu_mutex_lock_iothread();
+    CPU_FOREACH(cpu) {
+        async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+    }
+    qemu_mutex_unlock_iothread();
+}
+
+static void check_guest_throttling(void)
+{
+    static int64_t t0;
+    int64_t        t1;
+
+    if (!mig_throttle_on) {
+        return;
+    }
+
+    if (!t0)  {
+        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        return;
+    }
+
+    t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+    /* If it has been more than 40 ms since the last time the guest
+     * was throttled then do it again.
+     */
+    if (40 < (t1-t0)/1000000) {
+        mig_throttle_guest_down();
+        t0 = t1;
+    }
+}
diff --git a/migration/rdma.c b/migration/rdma.c
index 171c23fc3c..cf5de7e2ae 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -236,13 +236,13 @@ typedef struct RDMALocalBlock {
  * corresponding RDMALocalBlock with
  * the information needed to perform the actual RDMA.
  */
-typedef struct QEMU_PACKED RDMARemoteBlock {
+typedef struct QEMU_PACKED RDMADestBlock {
     uint64_t remote_host_addr;
     uint64_t offset;
     uint64_t length;
     uint32_t remote_rkey;
     uint32_t padding;
-} RDMARemoteBlock;
+} RDMADestBlock;
 
 static uint64_t htonll(uint64_t v)
 {
@@ -258,20 +258,20 @@ static uint64_t ntohll(uint64_t v) {
     return ((uint64_t)ntohl(u.lv[0]) << 32) | (uint64_t) ntohl(u.lv[1]);
 }
 
-static void remote_block_to_network(RDMARemoteBlock *rb)
+static void dest_block_to_network(RDMADestBlock *db)
 {
-    rb->remote_host_addr = htonll(rb->remote_host_addr);
-    rb->offset = htonll(rb->offset);
-    rb->length = htonll(rb->length);
-    rb->remote_rkey = htonl(rb->remote_rkey);
+    db->remote_host_addr = htonll(db->remote_host_addr);
+    db->offset = htonll(db->offset);
+    db->length = htonll(db->length);
+    db->remote_rkey = htonl(db->remote_rkey);
 }
 
-static void network_to_remote_block(RDMARemoteBlock *rb)
+static void network_to_dest_block(RDMADestBlock *db)
 {
-    rb->remote_host_addr = ntohll(rb->remote_host_addr);
-    rb->offset = ntohll(rb->offset);
-    rb->length = ntohll(rb->length);
-    rb->remote_rkey = ntohl(rb->remote_rkey);
+    db->remote_host_addr = ntohll(db->remote_host_addr);
+    db->offset = ntohll(db->offset);
+    db->length = ntohll(db->length);
+    db->remote_rkey = ntohl(db->remote_rkey);
 }
 
 /*
@@ -350,7 +350,7 @@ typedef struct RDMAContext {
      * Description of ram blocks used throughout the code.
      */
     RDMALocalBlocks local_ram_blocks;
-    RDMARemoteBlock *block;
+    RDMADestBlock  *dest_blocks;
 
     /*
      * Migration on *destination* started.
@@ -570,10 +570,10 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr,
  * in advanced before the migration starts. This tells us where the RAM blocks
  * are so that we can register them individually.
  */
-static void qemu_rdma_init_one_block(void *host_addr,
+static int qemu_rdma_init_one_block(const char *block_name, void *host_addr,
     ram_addr_t block_offset, ram_addr_t length, void *opaque)
 {
-    rdma_add_block(opaque, host_addr, block_offset, length);
+    return rdma_add_block(opaque, host_addr, block_offset, length);
 }
 
 /*
@@ -590,7 +590,7 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
     memset(local, 0, sizeof *local);
     qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
     trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
-    rdma->block = (RDMARemoteBlock *) g_malloc0(sizeof(RDMARemoteBlock) *
+    rdma->dest_blocks = (RDMADestBlock *) g_malloc0(sizeof(RDMADestBlock) *
                         rdma->local_ram_blocks.nb_blocks);
     local->init = true;
     return 0;
@@ -790,6 +790,13 @@ static int qemu_rdma_broken_ipv6_kernel(Error **errp, struct ibv_context *verbs)
 
         for (x = 0; x < num_devices; x++) {
             verbs = ibv_open_device(dev_list[x]);
+            if (!verbs) {
+                if (errno == EPERM) {
+                    continue;
+                } else {
+                    return -EINVAL;
+                }
+            }
 
             if (ibv_query_port(verbs, 1, &port_attr)) {
                 ibv_close_device(verbs);
@@ -2177,8 +2184,8 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
         rdma->connected = false;
     }
 
-    g_free(rdma->block);
-    rdma->block = NULL;
+    g_free(rdma->dest_blocks);
+    rdma->dest_blocks = NULL;
 
     for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
         if (rdma->wr_data[idx].control_mr) {
@@ -2445,7 +2452,6 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
 
     if (host_port) {
         rdma = g_malloc0(sizeof(RDMAContext));
-        memset(rdma, 0, sizeof(RDMAContext));
         rdma->current_index = -1;
         rdma->current_chunk = -1;
 
@@ -2967,25 +2973,25 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
              * their "local" descriptions with what was sent.
              */
             for (i = 0; i < local->nb_blocks; i++) {
-                rdma->block[i].remote_host_addr =
+                rdma->dest_blocks[i].remote_host_addr =
                     (uintptr_t)(local->block[i].local_host_addr);
 
                 if (rdma->pin_all) {
-                    rdma->block[i].remote_rkey = local->block[i].mr->rkey;
+                    rdma->dest_blocks[i].remote_rkey = local->block[i].mr->rkey;
                 }
 
-                rdma->block[i].offset = local->block[i].offset;
-                rdma->block[i].length = local->block[i].length;
+                rdma->dest_blocks[i].offset = local->block[i].offset;
+                rdma->dest_blocks[i].length = local->block[i].length;
 
-                remote_block_to_network(&rdma->block[i]);
+                dest_block_to_network(&rdma->dest_blocks[i]);
             }
 
             blocks.len = rdma->local_ram_blocks.nb_blocks
-                                                * sizeof(RDMARemoteBlock);
+                                                * sizeof(RDMADestBlock);
 
 
             ret = qemu_rdma_post_send_control(rdma,
-                                        (uint8_t *) rdma->block, &blocks);
+                                        (uint8_t *) rdma->dest_blocks, &blocks);
 
             if (ret < 0) {
                 error_report("rdma migration: error sending remote info");
@@ -3141,7 +3147,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
     if (flags == RAM_CONTROL_SETUP) {
         RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT };
         RDMALocalBlocks *local = &rdma->local_ram_blocks;
-        int reg_result_idx, i, j, nb_remote_blocks;
+        int reg_result_idx, i, j, nb_dest_blocks;
 
         head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
         trace_qemu_rdma_registration_stop_ram();
@@ -3162,7 +3168,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
             return ret;
         }
 
-        nb_remote_blocks = resp.len / sizeof(RDMARemoteBlock);
+        nb_dest_blocks = resp.len / sizeof(RDMADestBlock);
 
         /*
          * The protocol uses two different sets of rkeys (mutually exclusive):
@@ -3176,7 +3182,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
          * and then propagates the remote ram block descriptions to his local copy.
          */
 
-        if (local->nb_blocks != nb_remote_blocks) {
+        if (local->nb_blocks != nb_dest_blocks) {
             ERROR(errp, "ram blocks mismatch #1! "
                         "Your QEMU command line parameters are probably "
                         "not identical on both the source and destination.");
@@ -3184,26 +3190,26 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
         }
 
         qemu_rdma_move_header(rdma, reg_result_idx, &resp);
-        memcpy(rdma->block,
+        memcpy(rdma->dest_blocks,
             rdma->wr_data[reg_result_idx].control_curr, resp.len);
-        for (i = 0; i < nb_remote_blocks; i++) {
-            network_to_remote_block(&rdma->block[i]);
+        for (i = 0; i < nb_dest_blocks; i++) {
+            network_to_dest_block(&rdma->dest_blocks[i]);
 
             /* search local ram blocks */
             for (j = 0; j < local->nb_blocks; j++) {
-                if (rdma->block[i].offset != local->block[j].offset) {
+                if (rdma->dest_blocks[i].offset != local->block[j].offset) {
                     continue;
                 }
 
-                if (rdma->block[i].length != local->block[j].length) {
+                if (rdma->dest_blocks[i].length != local->block[j].length) {
                     ERROR(errp, "ram blocks mismatch #2! "
                         "Your QEMU command line parameters are probably "
                         "not identical on both the source and destination.");
                     return -EINVAL;
                 }
                 local->block[j].remote_host_addr =
-                        rdma->block[i].remote_host_addr;
-                local->block[j].remote_rkey = rdma->block[i].remote_rkey;
+                        rdma->dest_blocks[i].remote_host_addr;
+                local->block[j].remote_rkey = rdma->dest_blocks[i].remote_rkey;
                 break;
             }
 
diff --git a/savevm.c b/migration/savevm.c
index 3b0e222cb3..2091882196 100644
--- a/savevm.c
+++ b/migration/savevm.c
@@ -2,6 +2,10 @@
  * QEMU System Emulator
  *
  * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009-2015 Red Hat Inc
+ *
+ * Authors:
+ *  Juan Quintela <quintela@redhat.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -51,6 +55,8 @@
 #define ARP_PTYPE_IP 0x0800
 #define ARP_OP_REQUEST_REV 0x3
 
+static bool skip_section_footers;
+
 static int announce_self_create(uint8_t *buf,
                                 uint8_t *mac_addr)
 {
@@ -235,10 +241,15 @@ typedef struct SaveStateEntry {
     int is_ram;
 } SaveStateEntry;
 
+typedef struct SaveState {
+    QTAILQ_HEAD(, SaveStateEntry) handlers;
+    int global_section_id;
+} SaveState;
 
-static QTAILQ_HEAD(savevm_handlers, SaveStateEntry) savevm_handlers =
-    QTAILQ_HEAD_INITIALIZER(savevm_handlers);
-static int global_section_id;
+static SaveState savevm_state = {
+    .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
+    .global_section_id = 0,
+};
 
 static void dump_vmstate_vmsd(FILE *out_file,
                               const VMStateDescription *vmsd, int indent,
@@ -263,11 +274,11 @@ static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
 }
 
 static void dump_vmstate_vmss(FILE *out_file,
-                              const VMStateSubsection *subsection,
+                              const VMStateDescription **subsection,
                               int indent)
 {
-    if (subsection->vmsd != NULL) {
-        dump_vmstate_vmsd(out_file, subsection->vmsd, indent, true);
+    if (*subsection != NULL) {
+        dump_vmstate_vmsd(out_file, *subsection, indent, true);
     }
 }
 
@@ -308,12 +319,12 @@ static void dump_vmstate_vmsd(FILE *out_file,
         fprintf(out_file, "\n%*s]", indent, "");
     }
     if (vmsd->subsections != NULL) {
-        const VMStateSubsection *subsection = vmsd->subsections;
+        const VMStateDescription **subsection = vmsd->subsections;
         bool first;
 
         fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
         first = true;
-        while (subsection->vmsd != NULL) {
+        while (*subsection != NULL) {
             if (!first) {
                 fprintf(out_file, ",\n");
             }
@@ -383,7 +394,7 @@ static int calculate_new_instance_id(const char *idstr)
     SaveStateEntry *se;
     int instance_id = 0;
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (strcmp(idstr, se->idstr) == 0
             && instance_id <= se->instance_id) {
             instance_id = se->instance_id + 1;
@@ -397,7 +408,7 @@ static int calculate_compat_instance_id(const char *idstr)
     SaveStateEntry *se;
     int instance_id = 0;
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->compat) {
             continue;
         }
@@ -425,7 +436,7 @@ int register_savevm_live(DeviceState *dev,
 
     se = g_malloc0(sizeof(SaveStateEntry));
     se->version_id = version_id;
-    se->section_id = global_section_id++;
+    se->section_id = savevm_state.global_section_id++;
     se->ops = ops;
     se->opaque = opaque;
     se->vmsd = NULL;
@@ -457,7 +468,7 @@ int register_savevm_live(DeviceState *dev,
     }
     assert(!se->compat || se->instance_id == 0);
     /* add at the end of list */
-    QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry);
+    QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
     return 0;
 }
 
@@ -491,9 +502,9 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
     }
     pstrcat(id, sizeof(id), idstr);
 
-    QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) {
+    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
         if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
-            QTAILQ_REMOVE(&savevm_handlers, se, entry);
+            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
             if (se->compat) {
                 g_free(se->compat);
             }
@@ -515,7 +526,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
 
     se = g_malloc0(sizeof(SaveStateEntry));
     se->version_id = vmsd->version_id;
-    se->section_id = global_section_id++;
+    se->section_id = savevm_state.global_section_id++;
     se->opaque = opaque;
     se->vmsd = vmsd;
     se->alias_id = alias_id;
@@ -543,7 +554,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
     }
     assert(!se->compat || se->instance_id == 0);
     /* add at the end of list */
-    QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry);
+    QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
     return 0;
 }
 
@@ -552,9 +563,9 @@ void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
 {
     SaveStateEntry *se, *new_se;
 
-    QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) {
+    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
         if (se->vmsd == vmsd && se->opaque == opaque) {
-            QTAILQ_REMOVE(&savevm_handlers, se, entry);
+            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
             if (se->compat) {
                 g_free(se->compat);
             }
@@ -602,11 +613,84 @@ static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
     vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
 }
 
+void savevm_skip_section_footers(void)
+{
+    skip_section_footers = true;
+}
+
+/*
+ * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
+ */
+static void save_section_header(QEMUFile *f, SaveStateEntry *se,
+                                uint8_t section_type)
+{
+    qemu_put_byte(f, section_type);
+    qemu_put_be32(f, se->section_id);
+
+    if (section_type == QEMU_VM_SECTION_FULL ||
+        section_type == QEMU_VM_SECTION_START) {
+        /* ID string */
+        size_t len = strlen(se->idstr);
+        qemu_put_byte(f, len);
+        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
+
+        qemu_put_be32(f, se->instance_id);
+        qemu_put_be32(f, se->version_id);
+    }
+}
+
+/*
+ * Write a footer onto device sections that catches cases misformatted device
+ * sections.
+ */
+static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
+{
+    if (!skip_section_footers) {
+        qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
+        qemu_put_be32(f, se->section_id);
+    }
+}
+
+/*
+ * Read a footer off the wire and check that it matches the expected section
+ *
+ * Returns: true if the footer was good
+ *          false if there is a problem (and calls error_report to say why)
+ */
+static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
+{
+    uint8_t read_mark;
+    uint32_t read_section_id;
+
+    if (skip_section_footers) {
+        /* No footer to check */
+        return true;
+    }
+
+    read_mark = qemu_get_byte(f);
+
+    if (read_mark != QEMU_VM_SECTION_FOOTER) {
+        error_report("Missing section footer for %s", se->idstr);
+        return false;
+    }
+
+    read_section_id = qemu_get_be32(f);
+    if (read_section_id != se->section_id) {
+        error_report("Mismatched section id in footer for %s -"
+                     " read 0x%x expected 0x%x",
+                     se->idstr, read_section_id, se->section_id);
+        return false;
+    }
+
+    /* All good */
+    return true;
+}
+
 bool qemu_savevm_state_blocked(Error **errp)
 {
     SaveStateEntry *se;
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->vmsd && se->vmsd->unmigratable) {
             error_setg(errp, "State blocked by non-migratable device '%s'",
                        se->idstr);
@@ -616,6 +700,13 @@ bool qemu_savevm_state_blocked(Error **errp)
     return false;
 }
 
+void qemu_savevm_state_header(QEMUFile *f)
+{
+    trace_savevm_state_header();
+    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
+    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+}
+
 void qemu_savevm_state_begin(QEMUFile *f,
                              const MigrationParams *params)
 {
@@ -623,19 +714,14 @@ void qemu_savevm_state_begin(QEMUFile *f,
     int ret;
 
     trace_savevm_state_begin();
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->set_params) {
             continue;
         }
         se->ops->set_params(params, se->opaque);
     }
 
-    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
-    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
-
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
-        int len;
-
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_live_setup) {
             continue;
         }
@@ -644,19 +730,10 @@ void qemu_savevm_state_begin(QEMUFile *f,
                 continue;
             }
         }
-        /* Section type */
-        qemu_put_byte(f, QEMU_VM_SECTION_START);
-        qemu_put_be32(f, se->section_id);
-
-        /* ID string */
-        len = strlen(se->idstr);
-        qemu_put_byte(f, len);
-        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
-
-        qemu_put_be32(f, se->instance_id);
-        qemu_put_be32(f, se->version_id);
+        save_section_header(f, se, QEMU_VM_SECTION_START);
 
         ret = se->ops->save_live_setup(f, se->opaque);
+        save_section_footer(f, se);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
             break;
@@ -676,7 +753,7 @@ int qemu_savevm_state_iterate(QEMUFile *f)
     int ret = 1;
 
     trace_savevm_state_iterate();
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_live_iterate) {
             continue;
         }
@@ -689,12 +766,12 @@ int qemu_savevm_state_iterate(QEMUFile *f)
             return 0;
         }
         trace_savevm_section_start(se->idstr, se->section_id);
-        /* Section type */
-        qemu_put_byte(f, QEMU_VM_SECTION_PART);
-        qemu_put_be32(f, se->section_id);
+
+        save_section_header(f, se, QEMU_VM_SECTION_PART);
 
         ret = se->ops->save_live_iterate(f, se->opaque);
         trace_savevm_section_end(se->idstr, se->section_id, ret);
+        save_section_footer(f, se);
 
         if (ret < 0) {
             qemu_file_set_error(f, ret);
@@ -727,7 +804,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
 
     cpu_synchronize_all_states();
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_live_complete) {
             continue;
         }
@@ -737,12 +814,12 @@ void qemu_savevm_state_complete(QEMUFile *f)
             }
         }
         trace_savevm_section_start(se->idstr, se->section_id);
-        /* Section type */
-        qemu_put_byte(f, QEMU_VM_SECTION_END);
-        qemu_put_be32(f, se->section_id);
+
+        save_section_header(f, se, QEMU_VM_SECTION_END);
 
         ret = se->ops->save_live_complete(f, se->opaque);
         trace_savevm_section_end(se->idstr, se->section_id, ret);
+        save_section_footer(f, se);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
             return;
@@ -752,8 +829,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
     vmdesc = qjson_new();
     json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE);
     json_start_array(vmdesc, "devices");
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
-        int len;
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 
         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
             continue;
@@ -764,22 +840,13 @@ void qemu_savevm_state_complete(QEMUFile *f)
         json_prop_str(vmdesc, "name", se->idstr);
         json_prop_int(vmdesc, "instance_id", se->instance_id);
 
-        /* Section type */
-        qemu_put_byte(f, QEMU_VM_SECTION_FULL);
-        qemu_put_be32(f, se->section_id);
-
-        /* ID string */
-        len = strlen(se->idstr);
-        qemu_put_byte(f, len);
-        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
-
-        qemu_put_be32(f, se->instance_id);
-        qemu_put_be32(f, se->version_id);
+        save_section_header(f, se, QEMU_VM_SECTION_FULL);
 
         vmstate_save(f, se, vmdesc);
 
         json_end_object(vmdesc);
         trace_savevm_section_end(se->idstr, se->section_id, 0);
+        save_section_footer(f, se);
     }
 
     qemu_put_byte(f, QEMU_VM_EOF);
@@ -803,7 +870,7 @@ uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size)
     SaveStateEntry *se;
     uint64_t ret = 0;
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_live_pending) {
             continue;
         }
@@ -822,7 +889,7 @@ void qemu_savevm_state_cancel(void)
     SaveStateEntry *se;
 
     trace_savevm_state_cancel();
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->ops && se->ops->cancel) {
             se->ops->cancel(se->opaque);
         }
@@ -842,6 +909,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
     }
 
     qemu_mutex_unlock_iothread();
+    qemu_savevm_state_header(f);
     qemu_savevm_state_begin(f, &params);
     qemu_mutex_lock_iothread();
 
@@ -872,9 +940,7 @@ static int qemu_save_device_state(QEMUFile *f)
 
     cpu_synchronize_all_states();
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
-        int len;
-
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->is_ram) {
             continue;
         }
@@ -882,19 +948,11 @@ static int qemu_save_device_state(QEMUFile *f)
             continue;
         }
 
-        /* Section type */
-        qemu_put_byte(f, QEMU_VM_SECTION_FULL);
-        qemu_put_be32(f, se->section_id);
-
-        /* ID string */
-        len = strlen(se->idstr);
-        qemu_put_byte(f, len);
-        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
-
-        qemu_put_be32(f, se->instance_id);
-        qemu_put_be32(f, se->version_id);
+        save_section_header(f, se, QEMU_VM_SECTION_FULL);
 
         vmstate_save(f, se, NULL);
+
+        save_section_footer(f, se);
     }
 
     qemu_put_byte(f, QEMU_VM_EOF);
@@ -906,7 +964,7 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id)
 {
     SaveStateEntry *se;
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!strcmp(se->idstr, idstr) &&
             (instance_id == se->instance_id ||
              instance_id == se->alias_id))
@@ -922,18 +980,26 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id)
     return NULL;
 }
 
-typedef struct LoadStateEntry {
+struct LoadStateEntry {
     QLIST_ENTRY(LoadStateEntry) entry;
     SaveStateEntry *se;
     int section_id;
     int version_id;
-} LoadStateEntry;
+};
 
-int qemu_loadvm_state(QEMUFile *f)
+void loadvm_free_handlers(MigrationIncomingState *mis)
 {
-    QLIST_HEAD(, LoadStateEntry) loadvm_handlers =
-        QLIST_HEAD_INITIALIZER(loadvm_handlers);
     LoadStateEntry *le, *new_le;
+
+    QLIST_FOREACH_SAFE(le, &mis->loadvm_handlers, entry, new_le) {
+        QLIST_REMOVE(le, entry);
+        g_free(le);
+    }
+}
+
+int qemu_loadvm_state(QEMUFile *f)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
     Error *local_err = NULL;
     uint8_t section_type;
     unsigned int v;
@@ -964,8 +1030,8 @@ int qemu_loadvm_state(QEMUFile *f)
     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
         uint32_t instance_id, version_id, section_id;
         SaveStateEntry *se;
-        char idstr[257];
-        int len;
+        LoadStateEntry *le;
+        char idstr[256];
 
         trace_qemu_loadvm_state_section(section_type);
         switch (section_type) {
@@ -973,9 +1039,11 @@ int qemu_loadvm_state(QEMUFile *f)
         case QEMU_VM_SECTION_FULL:
             /* Read section start */
             section_id = qemu_get_be32(f);
-            len = qemu_get_byte(f);
-            qemu_get_buffer(f, (uint8_t *)idstr, len);
-            idstr[len] = 0;
+            if (!qemu_get_counted_string(f, idstr)) {
+                error_report("Unable to read ID string for section %u",
+                            section_id);
+                return -EINVAL;
+            }
             instance_id = qemu_get_be32(f);
             version_id = qemu_get_be32(f);
 
@@ -1004,7 +1072,7 @@ int qemu_loadvm_state(QEMUFile *f)
             le->se = se;
             le->section_id = section_id;
             le->version_id = version_id;
-            QLIST_INSERT_HEAD(&loadvm_handlers, le, entry);
+            QLIST_INSERT_HEAD(&mis->loadvm_handlers, le, entry);
 
             ret = vmstate_load(f, le->se, le->version_id);
             if (ret < 0) {
@@ -1012,13 +1080,17 @@ int qemu_loadvm_state(QEMUFile *f)
                              " device '%s'", instance_id, idstr);
                 goto out;
             }
+            if (!check_section_footer(f, le->se)) {
+                ret = -EINVAL;
+                goto out;
+            }
             break;
         case QEMU_VM_SECTION_PART:
         case QEMU_VM_SECTION_END:
             section_id = qemu_get_be32(f);
 
             trace_qemu_loadvm_state_section_partend(section_id);
-            QLIST_FOREACH(le, &loadvm_handlers, entry) {
+            QLIST_FOREACH(le, &mis->loadvm_handlers, entry) {
                 if (le->section_id == section_id) {
                     break;
                 }
@@ -1035,6 +1107,10 @@ int qemu_loadvm_state(QEMUFile *f)
                              section_id, le->se->idstr);
                 goto out;
             }
+            if (!check_section_footer(f, le->se)) {
+                ret = -EINVAL;
+                goto out;
+            }
             break;
         default:
             error_report("Unknown savevm section type %d", section_type);
@@ -1066,11 +1142,6 @@ int qemu_loadvm_state(QEMUFile *f)
     ret = 0;
 
 out:
-    QLIST_FOREACH_SAFE(le, &loadvm_handlers, entry, new_le) {
-        QLIST_REMOVE(le, entry);
-        g_free(le);
-    }
-
     if (ret == 0) {
         /* We may not have a VMDESC section, so ignore relative errors */
         ret = file_error_after_eof;
@@ -1314,9 +1385,11 @@ int load_vmstate(const char *name)
     }
 
     qemu_system_reset(VMRESET_SILENT);
+    migration_incoming_state_new(f);
     ret = qemu_loadvm_state(f);
 
     qemu_fclose(f);
+    migration_incoming_state_destroy();
     if (ret < 0) {
         error_report("Error %d while loading VM state", ret);
         return ret;
diff --git a/migration/vmstate.c b/migration/vmstate.c
index e5388f0596..6138d1acb7 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -341,11 +341,11 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
 }
 
 static const VMStateDescription *
-    vmstate_get_subsection(const VMStateSubsection *sub, char *idstr)
+vmstate_get_subsection(const VMStateDescription **sub, char *idstr)
 {
-    while (sub && sub->needed) {
-        if (strcmp(idstr, sub->vmsd->name) == 0) {
-            return sub->vmsd;
+    while (sub && *sub && (*sub)->needed) {
+        if (strcmp(idstr, (*sub)->name) == 0) {
+            return *sub;
         }
         sub++;
     }
@@ -358,7 +358,7 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
     trace_vmstate_subsection_load(vmsd->name);
 
     while (qemu_peek_byte(f, 0) == QEMU_VM_SUBSECTION) {
-        char idstr[256];
+        char idstr[256], *idstr_ret;
         int ret;
         uint8_t version_id, len, size;
         const VMStateDescription *sub_vmsd;
@@ -369,11 +369,12 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
             trace_vmstate_subsection_load_bad(vmsd->name, "(short)");
             return 0;
         }
-        size = qemu_peek_buffer(f, (uint8_t *)idstr, len, 2);
+        size = qemu_peek_buffer(f, (uint8_t **)&idstr_ret, len, 2);
         if (size != len) {
             trace_vmstate_subsection_load_bad(vmsd->name, "(peek fail)");
             return 0;
         }
+        memcpy(idstr, idstr_ret, size);
         idstr[size] = 0;
 
         if (strncmp(vmsd->name, idstr, strlen(vmsd->name)) != 0) {
@@ -405,12 +406,12 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
 static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
                                     void *opaque, QJSON *vmdesc)
 {
-    const VMStateSubsection *sub = vmsd->subsections;
+    const VMStateDescription **sub = vmsd->subsections;
     bool subsection_found = false;
 
-    while (sub && sub->needed) {
-        if (sub->needed(opaque)) {
-            const VMStateDescription *vmsd = sub->vmsd;
+    while (sub && *sub && (*sub)->needed) {
+        if ((*sub)->needed(opaque)) {
+            const VMStateDescription *vmsd = *sub;
             uint8_t len;
 
             if (vmdesc) {
diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index 0c8b22f2aa..f6894bece9 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -474,6 +474,7 @@ class MigrationDump(object):
     QEMU_VM_SECTION_FULL  = 0x04
     QEMU_VM_SUBSECTION    = 0x05
     QEMU_VM_VMDESCRIPTION = 0x06
+    QEMU_VM_SECTION_FOOTER= 0x7e
 
     def __init__(self, filename):
         self.section_classes = { ( 'ram', 0 ) : [ RamSection, None ],
@@ -526,6 +527,10 @@ class MigrationDump(object):
             elif section_type == self.QEMU_VM_SECTION_PART or section_type == self.QEMU_VM_SECTION_END:
                 section_id = file.read32()
                 self.sections[section_id].read()
+            elif section_type == self.QEMU_VM_SECTION_FOOTER:
+                read_section_id = file.read32()
+                if read_section_id != section_id:
+                    raise Exception("Mismatched section footer: %x vs %x" % (read_section_id, section_id))
             else:
                 raise Exception("Unknown section type: %d" % section_type)
         file.close()
diff --git a/softmmu_template.h b/softmmu_template.h
index 39f571b0ca..d42d89d541 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -548,6 +548,28 @@ glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
     helper_te_st_name(env, addr, val, oi, GETRA());
 }
 
+#if DATA_SIZE == 1
+/* Probe for whether the specified guest write access is permitted.
+ * If it is not permitted then an exception will be taken in the same
+ * way as if this were a real write access (and we will not return).
+ * Otherwise the function will return, and there will be a valid
+ * entry in the TLB for this access.
+ */
+void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
+                 uintptr_t retaddr)
+{
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        /* TLB entry is for a different page */
+        if (!VICTIM_TLB_HIT(addr_write)) {
+            tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
+        }
+    }
+}
+#endif
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
 #undef READ_ACCESS_TYPE
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 9446e5a8ab..36365a57c7 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -40,6 +40,7 @@ static const VMStateDescription vmstate_vfp = {
     .name = "cpu/vfp",
     .version_id = 3,
     .minimum_version_id = 3,
+    .needed = vfp_needed,
     .fields = (VMStateField[]) {
         VMSTATE_FLOAT64_ARRAY(env.vfp.regs, ARMCPU, 64),
         /* The xregs array is a little awkward because element 1 (FPSCR)
@@ -72,6 +73,7 @@ static const VMStateDescription vmstate_iwmmxt = {
     .name = "cpu/iwmmxt",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = iwmmxt_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16),
         VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16),
@@ -91,6 +93,7 @@ static const VMStateDescription vmstate_m = {
     .name = "cpu/m",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = m_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(env.v7m.other_sp, ARMCPU),
         VMSTATE_UINT32(env.v7m.vecbase, ARMCPU),
@@ -114,6 +117,7 @@ static const VMStateDescription vmstate_thumb2ee = {
     .name = "cpu/thumb2ee",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = thumb2ee_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(env.teecr, ARMCPU),
         VMSTATE_UINT32(env.teehbr, ARMCPU),
@@ -282,21 +286,11 @@ const VMStateDescription vmstate_arm_cpu = {
         VMSTATE_BOOL(powered_off, ARMCPU),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_vfp,
-            .needed = vfp_needed,
-        } , {
-            .vmsd = &vmstate_iwmmxt,
-            .needed = iwmmxt_needed,
-        } , {
-            .vmsd = &vmstate_m,
-            .needed = m_needed,
-        } , {
-            .vmsd = &vmstate_thumb2ee,
-            .needed = thumb2ee_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_vfp,
+        &vmstate_iwmmxt,
+        &vmstate_m,
+        &vmstate_thumb2ee,
+        NULL
     }
 };
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 69d86cb476..a0df64b577 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -403,6 +403,7 @@ static const VMStateDescription vmstate_steal_time_msr = {
     .name = "cpu/steal_time_msr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = steal_time_msr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.steal_time_msr, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -413,6 +414,7 @@ static const VMStateDescription vmstate_async_pf_msr = {
     .name = "cpu/async_pf_msr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = async_pf_msr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.async_pf_en_msr, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -423,6 +425,7 @@ static const VMStateDescription vmstate_pv_eoi_msr = {
     .name = "cpu/async_pv_eoi_msr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = pv_eoi_msr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.pv_eoi_en_msr, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -441,6 +444,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     .name = "cpu/fpop_ip_dp",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fpop_ip_dp_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT16(env.fpop, X86CPU),
         VMSTATE_UINT64(env.fpip, X86CPU),
@@ -461,6 +465,7 @@ static const VMStateDescription vmstate_msr_tsc_adjust = {
     .name = "cpu/msr_tsc_adjust",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = tsc_adjust_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.tsc_adjust, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -479,6 +484,7 @@ static const VMStateDescription vmstate_msr_tscdeadline = {
     .name = "cpu/msr_tscdeadline",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = tscdeadline_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.tsc_deadline, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -505,6 +511,7 @@ static const VMStateDescription vmstate_msr_ia32_misc_enable = {
     .name = "cpu/msr_ia32_misc_enable",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = misc_enable_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_ia32_misc_enable, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -515,6 +522,7 @@ static const VMStateDescription vmstate_msr_ia32_feature_control = {
     .name = "cpu/msr_ia32_feature_control",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = feature_control_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_ia32_feature_control, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -549,6 +557,7 @@ static const VMStateDescription vmstate_msr_architectural_pmu = {
     .name = "cpu/msr_architectural_pmu",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = pmu_enable_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_fixed_ctr_ctrl, X86CPU),
         VMSTATE_UINT64(env.msr_global_ctrl, X86CPU),
@@ -584,6 +593,7 @@ static const VMStateDescription vmstate_mpx = {
     .name = "cpu/mpx",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = mpx_needed,
     .fields = (VMStateField[]) {
         VMSTATE_BND_REGS(env.bnd_regs, X86CPU, 4),
         VMSTATE_UINT64(env.bndcs_regs.cfgu, X86CPU),
@@ -605,6 +615,7 @@ static const VMStateDescription vmstate_msr_hypercall_hypercall = {
     .name = "cpu/msr_hyperv_hypercall",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = hyperv_hypercall_enable_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_hv_guest_os_id, X86CPU),
         VMSTATE_UINT64(env.msr_hv_hypercall, X86CPU),
@@ -624,6 +635,7 @@ static const VMStateDescription vmstate_msr_hyperv_vapic = {
     .name = "cpu/msr_hyperv_vapic",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = hyperv_vapic_enable_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_hv_vapic, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -642,6 +654,7 @@ static const VMStateDescription vmstate_msr_hyperv_time = {
     .name = "cpu/msr_hyperv_time",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = hyperv_time_enable_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_hv_tsc, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -683,6 +696,7 @@ static const VMStateDescription vmstate_avx512 = {
     .name = "cpu/avx512",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = avx512_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
         VMSTATE_ZMMH_REGS_VARS(env.xmm_regs, X86CPU, 0),
@@ -705,6 +719,7 @@ static const VMStateDescription vmstate_xss = {
     .name = "cpu/xss",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = xss_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.xss, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -813,54 +828,22 @@ VMStateDescription vmstate_x86_cpu = {
         VMSTATE_END_OF_LIST()
         /* The above list is not sorted /wrt version numbers, watch out! */
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_async_pf_msr,
-            .needed = async_pf_msr_needed,
-        } , {
-            .vmsd = &vmstate_pv_eoi_msr,
-            .needed = pv_eoi_msr_needed,
-        } , {
-            .vmsd = &vmstate_steal_time_msr,
-            .needed = steal_time_msr_needed,
-        } , {
-            .vmsd = &vmstate_fpop_ip_dp,
-            .needed = fpop_ip_dp_needed,
-        }, {
-            .vmsd = &vmstate_msr_tsc_adjust,
-            .needed = tsc_adjust_needed,
-        }, {
-            .vmsd = &vmstate_msr_tscdeadline,
-            .needed = tscdeadline_needed,
-        }, {
-            .vmsd = &vmstate_msr_ia32_misc_enable,
-            .needed = misc_enable_needed,
-        }, {
-            .vmsd = &vmstate_msr_ia32_feature_control,
-            .needed = feature_control_needed,
-        }, {
-            .vmsd = &vmstate_msr_architectural_pmu,
-            .needed = pmu_enable_needed,
-        } , {
-            .vmsd = &vmstate_mpx,
-            .needed = mpx_needed,
-        }, {
-            .vmsd = &vmstate_msr_hypercall_hypercall,
-            .needed = hyperv_hypercall_enable_needed,
-        }, {
-            .vmsd = &vmstate_msr_hyperv_vapic,
-            .needed = hyperv_vapic_enable_needed,
-        }, {
-            .vmsd = &vmstate_msr_hyperv_time,
-            .needed = hyperv_time_enable_needed,
-        }, {
-            .vmsd = &vmstate_avx512,
-            .needed = avx512_needed,
-         }, {
-            .vmsd = &vmstate_xss,
-            .needed = xss_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_async_pf_msr,
+        &vmstate_pv_eoi_msr,
+        &vmstate_steal_time_msr,
+        &vmstate_fpop_ip_dp,
+        &vmstate_msr_tsc_adjust,
+        &vmstate_msr_tscdeadline,
+        &vmstate_msr_ia32_misc_enable,
+        &vmstate_msr_ia32_feature_control,
+        &vmstate_msr_architectural_pmu,
+        &vmstate_mpx,
+        &vmstate_msr_hypercall_hypercall,
+        &vmstate_msr_hyperv_vapic,
+        &vmstate_msr_hyperv_time,
+        &vmstate_avx512,
+        &vmstate_xss,
+        NULL
     }
 };
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index f9d2b4c5af..474a0e327d 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -34,7 +34,7 @@ struct r4k_tlb_t {
     uint_fast16_t RI0:1;
     uint_fast16_t RI1:1;
     uint_fast16_t EHINV:1;
-    target_ulong PFN[2];
+    uint64_t PFN[2];
 };
 
 #if !defined(CONFIG_USER_ONLY)
@@ -100,6 +100,7 @@ struct CPUMIPSFPUContext {
     float_status fp_status;
     /* fpu implementation/revision register (fir) */
     uint32_t fcr0;
+#define FCR0_FREP 29
 #define FCR0_UFRP 28
 #define FCR0_F64 22
 #define FCR0_L 21
@@ -223,8 +224,14 @@ struct CPUMIPSState {
 
     uint32_t SEGBITS;
     uint32_t PABITS;
+#if defined(TARGET_MIPS64)
+# define PABITS_BASE 36
+#else
+# define PABITS_BASE 32
+#endif
     target_ulong SEGMask;
-    target_ulong PAMask;
+    uint64_t PAMask;
+#define PAMASK_BASE ((1ULL << PABITS_BASE) - 1)
 
     int32_t msair;
 #define MSAIR_ProcID    8
@@ -272,8 +279,8 @@ struct CPUMIPSState {
 #define CP0VPEOpt_DWX2	2
 #define CP0VPEOpt_DWX1	1
 #define CP0VPEOpt_DWX0	0
-    target_ulong CP0_EntryLo0;
-    target_ulong CP0_EntryLo1;
+    uint64_t CP0_EntryLo0;
+    uint64_t CP0_EntryLo1;
 #if defined(TARGET_MIPS64)
 # define CP0EnLo_RI 63
 # define CP0EnLo_XI 62
@@ -288,6 +295,7 @@ struct CPUMIPSState {
     int32_t CP0_PageGrain;
 #define CP0PG_RIE 31
 #define CP0PG_XIE 30
+#define CP0PG_ELPA 29
 #define CP0PG_IEC 27
     int32_t CP0_Wired;
     int32_t CP0_SRSConf0_rw_bitmask;
@@ -462,17 +470,21 @@ struct CPUMIPSState {
 #define CP0C5_CV         29
 #define CP0C5_EVA        28
 #define CP0C5_MSAEn      27
+#define CP0C5_UFE        9
+#define CP0C5_FRE        8
 #define CP0C5_SBRI       6
+#define CP0C5_MVH        5
+#define CP0C5_LLB        4
 #define CP0C5_UFR        2
 #define CP0C5_NFExists   0
     int32_t CP0_Config6;
     int32_t CP0_Config7;
     /* XXX: Maybe make LLAddr per-TC? */
-    target_ulong lladdr;
+    uint64_t lladdr;
     target_ulong llval;
     target_ulong llnewval;
     target_ulong llreg;
-    target_ulong CP0_LLAddr_rw_bitmask;
+    uint64_t CP0_LLAddr_rw_bitmask;
     int CP0_LLAddr_shift;
     target_ulong CP0_WatchLo[8];
     int32_t CP0_WatchHi[8];
@@ -499,7 +511,7 @@ struct CPUMIPSState {
 #define CP0DB_DSS  0
     target_ulong CP0_DEPC;
     int32_t CP0_Performance0;
-    int32_t CP0_TagLo;
+    uint64_t CP0_TagLo;
     int32_t CP0_DataLo;
     int32_t CP0_TagHi;
     int32_t CP0_DataHi;
@@ -514,7 +526,7 @@ struct CPUMIPSState {
 #define EXCP_INST_NOTAVAIL 0x2 /* No valid instruction word for BadInstr */
     uint32_t hflags;    /* CPU State */
     /* TMASK defines different execution modes */
-#define MIPS_HFLAG_TMASK  0x15807FF
+#define MIPS_HFLAG_TMASK  0x75807FF
 #define MIPS_HFLAG_MODE   0x00007 /* execution modes                    */
     /* The KSU flags must be the lowest bits in hflags. The flag order
        must be the same as defined for CP0 Status. This allows to use
@@ -561,6 +573,8 @@ struct CPUMIPSState {
 #define MIPS_HFLAG_SBRI  0x400000 /* R6 SDBBP causes RI excpt. in user mode */
 #define MIPS_HFLAG_FBNSLOT 0x800000 /* Forbidden slot                   */
 #define MIPS_HFLAG_MSA   0x1000000
+#define MIPS_HFLAG_FRE   0x2000000 /* FRE enabled */
+#define MIPS_HFLAG_ELPA  0x4000000
     target_ulong btarget;        /* Jump / branch target               */
     target_ulong bcond;          /* Branch condition (if needed)       */
 
@@ -796,6 +810,15 @@ static inline void restore_msa_fp_status(CPUMIPSState *env)
     set_flush_inputs_to_zero(flush_to_zero, status);
 }
 
+static inline void restore_pamask(CPUMIPSState *env)
+{
+    if (env->hflags & MIPS_HFLAG_ELPA) {
+        env->PAMask = (1ULL << env->PABITS) - 1;
+    } else {
+        env->PAMask = PAMASK_BASE;
+    }
+}
+
 static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
@@ -843,7 +866,8 @@ static inline void compute_hflags(CPUMIPSState *env)
     env->hflags &= ~(MIPS_HFLAG_COP1X | MIPS_HFLAG_64 | MIPS_HFLAG_CP0 |
                      MIPS_HFLAG_F64 | MIPS_HFLAG_FPU | MIPS_HFLAG_KSU |
                      MIPS_HFLAG_AWRAP | MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2 |
-                     MIPS_HFLAG_SBRI | MIPS_HFLAG_MSA);
+                     MIPS_HFLAG_SBRI | MIPS_HFLAG_MSA | MIPS_HFLAG_FRE |
+                     MIPS_HFLAG_ELPA);
     if (!(env->CP0_Status & (1 << CP0St_EXL)) &&
         !(env->CP0_Status & (1 << CP0St_ERL)) &&
         !(env->hflags & MIPS_HFLAG_DM)) {
@@ -924,6 +948,16 @@ static inline void compute_hflags(CPUMIPSState *env)
             env->hflags |= MIPS_HFLAG_MSA;
         }
     }
+    if (env->active_fpu.fcr0 & (1 << FCR0_FREP)) {
+        if (env->CP0_Config5 & (1 << CP0C5_FRE)) {
+            env->hflags |= MIPS_HFLAG_FRE;
+        }
+    }
+    if (env->CP0_Config3 & (1 << CP0C3_LPA)) {
+        if (env->CP0_PageGrain & (1 << CP0PG_ELPA)) {
+            env->hflags |= MIPS_HFLAG_ELPA;
+        }
+    }
 }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 3bd0b029e4..8df98c71b8 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -348,6 +348,7 @@ DEF_HELPER_1(tlbinvf, void, env)
 DEF_HELPER_1(di, tl, env)
 DEF_HELPER_1(ei, tl, env)
 DEF_HELPER_1(eret, void, env)
+DEF_HELPER_1(eretnc, void, env)
 DEF_HELPER_1(deret, void, env)
 #endif /* !CONFIG_USER_ONLY */
 DEF_HELPER_1(rdhwr_cpunum, tl, env)
@@ -931,5 +932,11 @@ DEF_HELPER_4(msa_ftint_u_df, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ffint_s_df, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ffint_u_df, void, env, i32, i32, i32)
 
-DEF_HELPER_5(msa_ld_df, void, env, i32, i32, i32, s32)
-DEF_HELPER_5(msa_st_df, void, env, i32, i32, i32, s32)
+#define MSALDST_PROTO(type)                         \
+DEF_HELPER_3(msa_ld_ ## type, void, env, i32, tl)   \
+DEF_HELPER_3(msa_st_ ## type, void, env, i32, tl)
+MSALDST_PROTO(b)
+MSALDST_PROTO(h)
+MSALDST_PROTO(w)
+MSALDST_PROTO(d)
+#undef MSALDST_PROTO
diff --git a/target-mips/machine.c b/target-mips/machine.c
index 7d1fa32e57..8fa755cd39 100644
--- a/target-mips/machine.c
+++ b/target-mips/machine.c
@@ -10,6 +10,7 @@ static int cpu_post_load(void *opaque, int version_id)
     restore_fp_status(env);
     restore_msa_fp_status(env);
     compute_hflags(env);
+    restore_pamask(env);
 
     return 0;
 }
@@ -142,8 +143,8 @@ static int get_tlb(QEMUFile *f, void *pv, size_t size)
     v->RI0 = (flags >> 13) & 1;
     v->XI1 = (flags >> 12) & 1;
     v->XI0 = (flags >> 11) & 1;
-    qemu_get_betls(f, &v->PFN[0]);
-    qemu_get_betls(f, &v->PFN[1]);
+    qemu_get_be64s(f, &v->PFN[0]);
+    qemu_get_be64s(f, &v->PFN[1]);
 
     return 0;
 }
@@ -169,8 +170,8 @@ static void put_tlb(QEMUFile *f, void *pv, size_t size)
     qemu_put_be32s(f, &v->PageMask);
     qemu_put_8s(f, &v->ASID);
     qemu_put_be16s(f, &flags);
-    qemu_put_betls(f, &v->PFN[0]);
-    qemu_put_betls(f, &v->PFN[1]);
+    qemu_put_be64s(f, &v->PFN[0]);
+    qemu_put_be64s(f, &v->PFN[1]);
 }
 
 const VMStateInfo vmstate_info_tlb = {
@@ -201,8 +202,8 @@ const VMStateDescription vmstate_tlb = {
 
 const VMStateDescription vmstate_mips_cpu = {
     .name = "cpu",
-    .version_id = 6,
-    .minimum_version_id = 6,
+    .version_id = 7,
+    .minimum_version_id = 7,
     .post_load = cpu_post_load,
     .fields = (VMStateField[]) {
         /* Active TC */
@@ -237,8 +238,8 @@ const VMStateDescription vmstate_mips_cpu = {
         VMSTATE_UINTTL(env.CP0_VPESchedule, MIPSCPU),
         VMSTATE_UINTTL(env.CP0_VPEScheFBack, MIPSCPU),
         VMSTATE_INT32(env.CP0_VPEOpt, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_EntryLo0, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_EntryLo1, MIPSCPU),
+        VMSTATE_UINT64(env.CP0_EntryLo0, MIPSCPU),
+        VMSTATE_UINT64(env.CP0_EntryLo1, MIPSCPU),
         VMSTATE_UINTTL(env.CP0_Context, MIPSCPU),
         VMSTATE_INT32(env.CP0_PageMask, MIPSCPU),
         VMSTATE_INT32(env.CP0_PageGrain, MIPSCPU),
@@ -269,7 +270,7 @@ const VMStateDescription vmstate_mips_cpu = {
         VMSTATE_INT32(env.CP0_Config3, MIPSCPU),
         VMSTATE_INT32(env.CP0_Config6, MIPSCPU),
         VMSTATE_INT32(env.CP0_Config7, MIPSCPU),
-        VMSTATE_UINTTL(env.lladdr, MIPSCPU),
+        VMSTATE_UINT64(env.lladdr, MIPSCPU),
         VMSTATE_UINTTL_ARRAY(env.CP0_WatchLo, MIPSCPU, 8),
         VMSTATE_INT32_ARRAY(env.CP0_WatchHi, MIPSCPU, 8),
         VMSTATE_UINTTL(env.CP0_XContext, MIPSCPU),
@@ -277,7 +278,7 @@ const VMStateDescription vmstate_mips_cpu = {
         VMSTATE_INT32(env.CP0_Debug, MIPSCPU),
         VMSTATE_UINTTL(env.CP0_DEPC, MIPSCPU),
         VMSTATE_INT32(env.CP0_Performance0, MIPSCPU),
-        VMSTATE_INT32(env.CP0_TagLo, MIPSCPU),
+        VMSTATE_UINT64(env.CP0_TagLo, MIPSCPU),
         VMSTATE_INT32(env.CP0_DataLo, MIPSCPU),
         VMSTATE_INT32(env.CP0_TagHi, MIPSCPU),
         VMSTATE_INT32(env.CP0_DataHi, MIPSCPU),
diff --git a/target-mips/mips-defs.h b/target-mips/mips-defs.h
index 1784227494..20aa87c24c 100644
--- a/target-mips/mips-defs.h
+++ b/target-mips/mips-defs.h
@@ -10,11 +10,11 @@
 
 #if defined(TARGET_MIPS64)
 #define TARGET_LONG_BITS 64
-#define TARGET_PHYS_ADDR_SPACE_BITS 36
+#define TARGET_PHYS_ADDR_SPACE_BITS 48
 #define TARGET_VIRT_ADDR_SPACE_BITS 42
 #else
 #define TARGET_LONG_BITS 32
-#define TARGET_PHYS_ADDR_SPACE_BITS 36
+#define TARGET_PHYS_ADDR_SPACE_BITS 40
 #define TARGET_VIRT_ADDR_SPACE_BITS 32
 #endif
 
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 73a8e458fc..2a9ddff70f 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -90,10 +90,10 @@ static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
     }                                                                   \
 }
 #endif
-HELPER_LD(lbu, ldub, uint8_t)
-HELPER_LD(lhu, lduw, uint16_t)
 HELPER_LD(lw, ldl, int32_t)
+#if defined(TARGET_MIPS64)
 HELPER_LD(ld, ldq, int64_t)
+#endif
 #undef HELPER_LD
 
 #if defined(CONFIG_USER_ONLY)
@@ -118,9 +118,10 @@ static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
 }
 #endif
 HELPER_ST(sb, stb, uint8_t)
-HELPER_ST(sh, stw, uint16_t)
 HELPER_ST(sw, stl, uint32_t)
+#if defined(TARGET_MIPS64)
 HELPER_ST(sd, stq, uint64_t)
+#endif
 #undef HELPER_ST
 
 target_ulong helper_clo (target_ulong arg1)
@@ -1067,19 +1068,23 @@ void helper_mtc0_vpeopt(CPUMIPSState *env, target_ulong arg1)
     env->CP0_VPEOpt = arg1 & 0x0000ffff;
 }
 
+#define MTC0_ENTRYLO_MASK(env) ((env->PAMask >> 6) & 0x3FFFFFFF)
+
 void helper_mtc0_entrylo0(CPUMIPSState *env, target_ulong arg1)
 {
-    /* Large physaddr (PABITS) not implemented */
     /* 1k pages not implemented */
     target_ulong rxi = arg1 & (env->CP0_PageGrain & (3u << CP0PG_XIE));
-    env->CP0_EntryLo0 = (arg1 & 0x3FFFFFFF) | (rxi << (CP0EnLo_XI - 30));
+    env->CP0_EntryLo0 = (arg1 & MTC0_ENTRYLO_MASK(env))
+                        | (rxi << (CP0EnLo_XI - 30));
 }
 
 #if defined(TARGET_MIPS64)
+#define DMTC0_ENTRYLO_MASK(env) (env->PAMask >> 6)
+
 void helper_dmtc0_entrylo0(CPUMIPSState *env, uint64_t arg1)
 {
     uint64_t rxi = arg1 & ((env->CP0_PageGrain & (3ull << CP0PG_XIE)) << 32);
-    env->CP0_EntryLo0 = (arg1 & 0x3FFFFFFF) | rxi;
+    env->CP0_EntryLo0 = (arg1 & DMTC0_ENTRYLO_MASK(env)) | rxi;
 }
 #endif
 
@@ -1245,17 +1250,17 @@ void helper_mttc0_tcschefback(CPUMIPSState *env, target_ulong arg1)
 
 void helper_mtc0_entrylo1(CPUMIPSState *env, target_ulong arg1)
 {
-    /* Large physaddr (PABITS) not implemented */
     /* 1k pages not implemented */
     target_ulong rxi = arg1 & (env->CP0_PageGrain & (3u << CP0PG_XIE));
-    env->CP0_EntryLo1 = (arg1 & 0x3FFFFFFF) | (rxi << (CP0EnLo_XI - 30));
+    env->CP0_EntryLo1 = (arg1 & MTC0_ENTRYLO_MASK(env))
+                        | (rxi << (CP0EnLo_XI - 30));
 }
 
 #if defined(TARGET_MIPS64)
 void helper_dmtc0_entrylo1(CPUMIPSState *env, uint64_t arg1)
 {
     uint64_t rxi = arg1 & ((env->CP0_PageGrain & (3ull << CP0PG_XIE)) << 32);
-    env->CP0_EntryLo1 = (arg1 & 0x3FFFFFFF) | rxi;
+    env->CP0_EntryLo1 = (arg1 & DMTC0_ENTRYLO_MASK(env)) | rxi;
 }
 #endif
 
@@ -1278,10 +1283,11 @@ void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1)
 void helper_mtc0_pagegrain(CPUMIPSState *env, target_ulong arg1)
 {
     /* SmartMIPS not implemented */
-    /* Large physaddr (PABITS) not implemented */
     /* 1k pages not implemented */
     env->CP0_PageGrain = (arg1 & env->CP0_PageGrain_rw_bitmask) |
                          (env->CP0_PageGrain & ~env->CP0_PageGrain_rw_bitmask);
+    compute_hflags(env);
+    restore_pamask(env);
 }
 
 void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1)
@@ -1825,6 +1831,16 @@ static void r4k_mips_tlb_flush_extra (CPUMIPSState *env, int first)
     }
 }
 
+static inline uint64_t get_tlb_pfn_from_entrylo(uint64_t entrylo)
+{
+#if defined(TARGET_MIPS64)
+    return extract64(entrylo, 6, 54);
+#else
+    return extract64(entrylo, 6, 24) | /* PFN */
+           (extract64(entrylo, 32, 32) << 24); /* PFNX */
+#endif
+}
+
 static void r4k_fill_tlb(CPUMIPSState *env, int idx)
 {
     r4k_tlb_t *tlb;
@@ -1848,13 +1864,13 @@ static void r4k_fill_tlb(CPUMIPSState *env, int idx)
     tlb->C0 = (env->CP0_EntryLo0 >> 3) & 0x7;
     tlb->XI0 = (env->CP0_EntryLo0 >> CP0EnLo_XI) & 1;
     tlb->RI0 = (env->CP0_EntryLo0 >> CP0EnLo_RI) & 1;
-    tlb->PFN[0] = (env->CP0_EntryLo0 >> 6) << 12;
+    tlb->PFN[0] = get_tlb_pfn_from_entrylo(env->CP0_EntryLo0) << 12;
     tlb->V1 = (env->CP0_EntryLo1 & 2) != 0;
     tlb->D1 = (env->CP0_EntryLo1 & 4) != 0;
     tlb->C1 = (env->CP0_EntryLo1 >> 3) & 0x7;
     tlb->XI1 = (env->CP0_EntryLo1 >> CP0EnLo_XI) & 1;
     tlb->RI1 = (env->CP0_EntryLo1 >> CP0EnLo_RI) & 1;
-    tlb->PFN[1] = (env->CP0_EntryLo1 >> 6) << 12;
+    tlb->PFN[1] = get_tlb_pfn_from_entrylo(env->CP0_EntryLo1) << 12;
 }
 
 void r4k_helper_tlbinv(CPUMIPSState *env)
@@ -1971,6 +1987,16 @@ void r4k_helper_tlbp(CPUMIPSState *env)
     }
 }
 
+static inline uint64_t get_entrylo_pfn_from_tlb(uint64_t tlb_pfn)
+{
+#if defined(TARGET_MIPS64)
+    return tlb_pfn << 6;
+#else
+    return (extract64(tlb_pfn, 0, 24) << 6) | /* PFN */
+           (extract64(tlb_pfn, 24, 32) << 32); /* PFNX */
+#endif
+}
+
 void r4k_helper_tlbr(CPUMIPSState *env)
 {
     r4k_tlb_t *tlb;
@@ -1996,13 +2022,13 @@ void r4k_helper_tlbr(CPUMIPSState *env)
         env->CP0_EntryHi = tlb->VPN | tlb->ASID;
         env->CP0_PageMask = tlb->PageMask;
         env->CP0_EntryLo0 = tlb->G | (tlb->V0 << 1) | (tlb->D0 << 2) |
-                        ((target_ulong)tlb->RI0 << CP0EnLo_RI) |
-                        ((target_ulong)tlb->XI0 << CP0EnLo_XI) |
-                        (tlb->C0 << 3) | (tlb->PFN[0] >> 6);
+                        ((uint64_t)tlb->RI0 << CP0EnLo_RI) |
+                        ((uint64_t)tlb->XI0 << CP0EnLo_XI) | (tlb->C0 << 3) |
+                        get_entrylo_pfn_from_tlb(tlb->PFN[0] >> 12);
         env->CP0_EntryLo1 = tlb->G | (tlb->V1 << 1) | (tlb->D1 << 2) |
-                        ((target_ulong)tlb->RI1 << CP0EnLo_RI) |
-                        ((target_ulong)tlb->XI1 << CP0EnLo_XI) |
-                        (tlb->C1 << 3) | (tlb->PFN[1] >> 6);
+                        ((uint64_t)tlb->RI1 << CP0EnLo_RI) |
+                        ((uint64_t)tlb->XI1 << CP0EnLo_XI) | (tlb->C1 << 3) |
+                        get_entrylo_pfn_from_tlb(tlb->PFN[1] >> 12);
     }
 }
 
@@ -2098,7 +2124,7 @@ static void set_pc(CPUMIPSState *env, target_ulong error_pc)
     }
 }
 
-void helper_eret(CPUMIPSState *env)
+static inline void exception_return(CPUMIPSState *env)
 {
     debug_pre_eret(env);
     if (env->CP0_Status & (1 << CP0St_ERL)) {
@@ -2110,9 +2136,19 @@ void helper_eret(CPUMIPSState *env)
     }
     compute_hflags(env);
     debug_post_eret(env);
+}
+
+void helper_eret(CPUMIPSState *env)
+{
+    exception_return(env);
     env->lladdr = 1;
 }
 
+void helper_eretnc(CPUMIPSState *env)
+{
+    exception_return(env);
+}
+
 void helper_deret(CPUMIPSState *env)
 {
     debug_pre_eret(env);
@@ -2303,6 +2339,16 @@ target_ulong helper_cfc1(CPUMIPSState *env, uint32_t reg)
             }
         }
         break;
+    case 5:
+        /* FRE Support - read Config5.FRE bit */
+        if (env->active_fpu.fcr0 & (1 << FCR0_FREP)) {
+            if (env->CP0_Config5 & (1 << CP0C5_UFE)) {
+                arg1 = (env->CP0_Config5 >> CP0C5_FRE) & 1;
+            } else {
+                helper_raise_exception(env, EXCP_RI);
+            }
+        }
+        break;
     case 25:
         arg1 = ((env->active_fpu.fcr31 >> 24) & 0xfe) | ((env->active_fpu.fcr31 >> 23) & 0x1);
         break;
@@ -2347,6 +2393,30 @@ void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t fs, uint32_t rt)
             helper_raise_exception(env, EXCP_RI);
         }
         break;
+    case 5:
+        /* FRE Support - clear Config5.FRE bit */
+        if (!((env->active_fpu.fcr0 & (1 << FCR0_FREP)) && (rt == 0))) {
+            return;
+        }
+        if (env->CP0_Config5 & (1 << CP0C5_UFE)) {
+            env->CP0_Config5 &= ~(1 << CP0C5_FRE);
+            compute_hflags(env);
+        } else {
+            helper_raise_exception(env, EXCP_RI);
+        }
+        break;
+    case 6:
+        /* FRE Support - set Config5.FRE bit */
+        if (!((env->active_fpu.fcr0 & (1 << FCR0_FREP)) && (rt == 0))) {
+            return;
+        }
+        if (env->CP0_Config5 & (1 << CP0C5_UFE)) {
+            env->CP0_Config5 |= (1 << CP0C5_FRE);
+            compute_hflags(env);
+        } else {
+            helper_raise_exception(env, EXCP_RI);
+        }
+        break;
     case 25:
         if ((env->insn_flags & ISA_MIPS32R6) || (arg1 & 0xffffff00)) {
             return;
@@ -3558,72 +3628,82 @@ FOP_CONDN_S(sne,  (float32_lt(fst1, fst0, &env->active_fpu.fp_status)
 /* Element-by-element access macros */
 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
 
-void helper_msa_ld_df(CPUMIPSState *env, uint32_t df, uint32_t wd, uint32_t rs,
-                     int32_t s10)
-{
-    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    target_ulong addr = env->active_tc.gpr[rs] + (s10 << df);
-    int i;
+#if !defined(CONFIG_USER_ONLY)
+#define MEMOP_IDX(DF)                                           \
+        TCGMemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,  \
+                                        cpu_mmu_index(env));
+#else
+#define MEMOP_IDX(DF)
+#endif
 
-    switch (df) {
-    case DF_BYTE:
-        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
-            pwd->b[i] = do_lbu(env, addr + (i << DF_BYTE),
-                                env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_HALF:
-        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
-            pwd->h[i] = do_lhu(env, addr + (i << DF_HALF),
-                                env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_WORD:
-        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
-            pwd->w[i] = do_lw(env, addr + (i << DF_WORD),
-                                env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_DOUBLE:
-        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
-            pwd->d[i] = do_ld(env, addr + (i << DF_DOUBLE),
-                                env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    }
+#define MSA_LD_DF(DF, TYPE, LD_INSN, ...)                               \
+void helper_msa_ld_ ## TYPE(CPUMIPSState *env, uint32_t wd,             \
+                            target_ulong addr)                          \
+{                                                                       \
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
+    wr_t wx;                                                            \
+    int i;                                                              \
+    MEMOP_IDX(DF)                                                       \
+    for (i = 0; i < DF_ELEMENTS(DF); i++) {                             \
+        wx.TYPE[i] = LD_INSN(env, addr + (i << DF), ##__VA_ARGS__);     \
+    }                                                                   \
+    memcpy(pwd, &wx, sizeof(wr_t));                                     \
 }
 
-void helper_msa_st_df(CPUMIPSState *env, uint32_t df, uint32_t wd, uint32_t rs,
-                     int32_t s10)
-{
-    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    target_ulong addr = env->active_tc.gpr[rs] + (s10 << df);
-    int i;
+#if !defined(CONFIG_USER_ONLY)
+MSA_LD_DF(DF_BYTE,   b, helper_ret_ldub_mmu, oi, GETRA())
+MSA_LD_DF(DF_HALF,   h, helper_ret_lduw_mmu, oi, GETRA())
+MSA_LD_DF(DF_WORD,   w, helper_ret_ldul_mmu, oi, GETRA())
+MSA_LD_DF(DF_DOUBLE, d, helper_ret_ldq_mmu,  oi, GETRA())
+#else
+MSA_LD_DF(DF_BYTE,   b, cpu_ldub_data)
+MSA_LD_DF(DF_HALF,   h, cpu_lduw_data)
+MSA_LD_DF(DF_WORD,   w, cpu_ldl_data)
+MSA_LD_DF(DF_DOUBLE, d, cpu_ldq_data)
+#endif
 
-    switch (df) {
-    case DF_BYTE:
-        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
-            do_sb(env, addr + (i << DF_BYTE), pwd->b[i],
-                    env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_HALF:
-        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
-            do_sh(env, addr + (i << DF_HALF), pwd->h[i],
-                    env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_WORD:
-        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
-            do_sw(env, addr + (i << DF_WORD), pwd->w[i],
-                    env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_DOUBLE:
-        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
-            do_sd(env, addr + (i << DF_DOUBLE), pwd->d[i],
-                    env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
+#define MSA_PAGESPAN(x) \
+        ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN/8 - 1) >= TARGET_PAGE_SIZE)
+
+static inline void ensure_writable_pages(CPUMIPSState *env,
+                                         target_ulong addr,
+                                         int mmu_idx,
+                                         uintptr_t retaddr)
+{
+#if !defined(CONFIG_USER_ONLY)
+    target_ulong page_addr;
+    if (unlikely(MSA_PAGESPAN(addr))) {
+        /* first page */
+        probe_write(env, addr, mmu_idx, retaddr);
+        /* second page */
+        page_addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+        probe_write(env, page_addr, mmu_idx, retaddr);
     }
+#endif
+}
+
+#define MSA_ST_DF(DF, TYPE, ST_INSN, ...)                               \
+void helper_msa_st_ ## TYPE(CPUMIPSState *env, uint32_t wd,             \
+                            target_ulong addr)                          \
+{                                                                       \
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
+    int mmu_idx = cpu_mmu_index(env);                                   \
+    int i;                                                              \
+    MEMOP_IDX(DF)                                                       \
+    ensure_writable_pages(env, addr, mmu_idx, GETRA());                 \
+    for (i = 0; i < DF_ELEMENTS(DF); i++) {                             \
+        ST_INSN(env, addr + (i << DF), pwd->TYPE[i], ##__VA_ARGS__);    \
+    }                                                                   \
 }
+
+#if !defined(CONFIG_USER_ONLY)
+MSA_ST_DF(DF_BYTE,   b, helper_ret_stb_mmu, oi, GETRA())
+MSA_ST_DF(DF_HALF,   h, helper_ret_stw_mmu, oi, GETRA())
+MSA_ST_DF(DF_WORD,   w, helper_ret_stl_mmu, oi, GETRA())
+MSA_ST_DF(DF_DOUBLE, d, helper_ret_stq_mmu, oi, GETRA())
+#else
+MSA_ST_DF(DF_BYTE,   b, cpu_stb_data)
+MSA_ST_DF(DF_HALF,   h, cpu_stw_data)
+MSA_ST_DF(DF_WORD,   w, cpu_stl_data)
+MSA_ST_DF(DF_DOUBLE, d, cpu_stq_data)
+#endif
diff --git a/target-mips/translate.c b/target-mips/translate.c
index fd063a2aae..1d128eef02 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -868,8 +868,10 @@ enum {
 enum {
     OPC_MFC0     = (0x00 << 21) | OPC_CP0,
     OPC_DMFC0    = (0x01 << 21) | OPC_CP0,
+    OPC_MFHC0    = (0x02 << 21) | OPC_CP0,
     OPC_MTC0     = (0x04 << 21) | OPC_CP0,
     OPC_DMTC0    = (0x05 << 21) | OPC_CP0,
+    OPC_MTHC0    = (0x06 << 21) | OPC_CP0,
     OPC_MFTR     = (0x08 << 21) | OPC_CP0,
     OPC_RDPGPR   = (0x0A << 21) | OPC_CP0,
     OPC_MFMC0    = (0x0B << 21) | OPC_CP0,
@@ -1414,6 +1416,7 @@ typedef struct DisasContext {
     int32_t CP0_Config1;
     /* Routine used to access memory */
     int mem_idx;
+    TCGMemOp default_tcg_memop_mask;
     uint32_t hflags, saved_hflags;
     int bstate;
     target_ulong btarget;
@@ -1423,6 +1426,9 @@ typedef struct DisasContext {
     int ie;
     bool bi;
     bool bp;
+    uint64_t PAMask;
+    bool mvh;
+    int CP0_LLAddr_shift;
 } DisasContext;
 
 enum {
@@ -1557,15 +1563,80 @@ static inline void gen_store_srsgpr (int from, int to)
     }
 }
 
+/* Tests */
+static inline void gen_save_pc(target_ulong pc)
+{
+    tcg_gen_movi_tl(cpu_PC, pc);
+}
+
+static inline void save_cpu_state(DisasContext *ctx, int do_save_pc)
+{
+    LOG_DISAS("hflags %08x saved %08x\n", ctx->hflags, ctx->saved_hflags);
+    if (do_save_pc && ctx->pc != ctx->saved_pc) {
+        gen_save_pc(ctx->pc);
+        ctx->saved_pc = ctx->pc;
+    }
+    if (ctx->hflags != ctx->saved_hflags) {
+        tcg_gen_movi_i32(hflags, ctx->hflags);
+        ctx->saved_hflags = ctx->hflags;
+        switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
+        case MIPS_HFLAG_BR:
+            break;
+        case MIPS_HFLAG_BC:
+        case MIPS_HFLAG_BL:
+        case MIPS_HFLAG_B:
+            tcg_gen_movi_tl(btarget, ctx->btarget);
+            break;
+        }
+    }
+}
+
+static inline void restore_cpu_state(CPUMIPSState *env, DisasContext *ctx)
+{
+    ctx->saved_hflags = ctx->hflags;
+    switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
+    case MIPS_HFLAG_BR:
+        break;
+    case MIPS_HFLAG_BC:
+    case MIPS_HFLAG_BL:
+    case MIPS_HFLAG_B:
+        ctx->btarget = env->btarget;
+        break;
+    }
+}
+
+static inline void generate_exception_err(DisasContext *ctx, int excp, int err)
+{
+    TCGv_i32 texcp = tcg_const_i32(excp);
+    TCGv_i32 terr = tcg_const_i32(err);
+    save_cpu_state(ctx, 1);
+    gen_helper_raise_exception_err(cpu_env, texcp, terr);
+    tcg_temp_free_i32(terr);
+    tcg_temp_free_i32(texcp);
+}
+
+static inline void generate_exception(DisasContext *ctx, int excp)
+{
+    save_cpu_state(ctx, 1);
+    gen_helper_0e0i(raise_exception, excp);
+}
+
 /* Floating point register moves. */
-static void gen_load_fpr32(TCGv_i32 t, int reg)
+static void gen_load_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
 {
+    if (ctx->hflags & MIPS_HFLAG_FRE) {
+        generate_exception(ctx, EXCP_RI);
+    }
     tcg_gen_trunc_i64_i32(t, fpu_f64[reg]);
 }
 
-static void gen_store_fpr32(TCGv_i32 t, int reg)
+static void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
 {
-    TCGv_i64 t64 = tcg_temp_new_i64();
+    TCGv_i64 t64;
+    if (ctx->hflags & MIPS_HFLAG_FRE) {
+        generate_exception(ctx, EXCP_RI);
+    }
+    t64 = tcg_temp_new_i64();
     tcg_gen_extu_i32_i64(t64, t);
     tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32);
     tcg_temp_free_i64(t64);
@@ -1579,7 +1650,7 @@ static void gen_load_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
         tcg_gen_trunc_i64_i32(t, t64);
         tcg_temp_free_i64(t64);
     } else {
-        gen_load_fpr32(t, reg | 1);
+        gen_load_fpr32(ctx, t, reg | 1);
     }
 }
 
@@ -1591,7 +1662,7 @@ static void gen_store_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
         tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32);
         tcg_temp_free_i64(t64);
     } else {
-        gen_store_fpr32(t, reg | 1);
+        gen_store_fpr32(ctx, t, reg | 1);
     }
 }
 
@@ -1626,66 +1697,6 @@ static inline int get_fp_bit (int cc)
         return 23;
 }
 
-/* Tests */
-static inline void gen_save_pc(target_ulong pc)
-{
-    tcg_gen_movi_tl(cpu_PC, pc);
-}
-
-static inline void save_cpu_state (DisasContext *ctx, int do_save_pc)
-{
-    LOG_DISAS("hflags %08x saved %08x\n", ctx->hflags, ctx->saved_hflags);
-    if (do_save_pc && ctx->pc != ctx->saved_pc) {
-        gen_save_pc(ctx->pc);
-        ctx->saved_pc = ctx->pc;
-    }
-    if (ctx->hflags != ctx->saved_hflags) {
-        tcg_gen_movi_i32(hflags, ctx->hflags);
-        ctx->saved_hflags = ctx->hflags;
-        switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
-        case MIPS_HFLAG_BR:
-            break;
-        case MIPS_HFLAG_BC:
-        case MIPS_HFLAG_BL:
-        case MIPS_HFLAG_B:
-            tcg_gen_movi_tl(btarget, ctx->btarget);
-            break;
-        }
-    }
-}
-
-static inline void restore_cpu_state (CPUMIPSState *env, DisasContext *ctx)
-{
-    ctx->saved_hflags = ctx->hflags;
-    switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
-    case MIPS_HFLAG_BR:
-        break;
-    case MIPS_HFLAG_BC:
-    case MIPS_HFLAG_BL:
-    case MIPS_HFLAG_B:
-        ctx->btarget = env->btarget;
-        break;
-    }
-}
-
-static inline void
-generate_exception_err (DisasContext *ctx, int excp, int err)
-{
-    TCGv_i32 texcp = tcg_const_i32(excp);
-    TCGv_i32 terr = tcg_const_i32(err);
-    save_cpu_state(ctx, 1);
-    gen_helper_raise_exception_err(cpu_env, texcp, terr);
-    tcg_temp_free_i32(terr);
-    tcg_temp_free_i32(texcp);
-}
-
-static inline void
-generate_exception (DisasContext *ctx, int excp)
-{
-    save_cpu_state(ctx, 1);
-    gen_helper_0e0i(raise_exception, excp);
-}
-
 /* Addresses computation */
 static inline void gen_op_addr_add (DisasContext *ctx, TCGv ret, TCGv arg0, TCGv arg1)
 {
@@ -1815,11 +1826,20 @@ static inline void check_mips_64(DisasContext *ctx)
 }
 #endif
 
+#ifndef CONFIG_USER_ONLY
+static inline void check_mvh(DisasContext *ctx)
+{
+    if (unlikely(!ctx->mvh)) {
+        generate_exception(ctx, EXCP_RI);
+    }
+}
+#endif
+
 /* Define small wrappers for gen_load_fpr* so that we have a uniform
    calling interface for 32 and 64-bit FPRs.  No sense in changing
    all callers for gen_load_fpr32 when we need the CTX parameter for
    this one use.  */
-#define gen_ldcmp_fpr32(ctx, x, y) gen_load_fpr32(x, y)
+#define gen_ldcmp_fpr32(ctx, x, y) gen_load_fpr32(ctx, x, y)
 #define gen_ldcmp_fpr64(ctx, x, y) gen_load_fpr64(ctx, x, y)
 #define FOP_CONDS(type, abs, fmt, ifmt, bits)                                 \
 static inline void gen_cmp ## type ## _ ## fmt(DisasContext *ctx, int n,      \
@@ -1963,7 +1983,7 @@ static inline void gen_r6_cmp_ ## fmt(DisasContext * ctx, int n,        \
 }
 
 FOP_CONDNS(d, FMT_D, 64, gen_store_fpr64(ctx, fp0, fd))
-FOP_CONDNS(s, FMT_S, 32, gen_store_fpr32(fp0, fd))
+FOP_CONDNS(s, FMT_S, 32, gen_store_fpr32(ctx, fp0, fd))
 #undef FOP_CONDNS
 #undef gen_ldcmp_fpr32
 #undef gen_ldcmp_fpr64
@@ -2081,12 +2101,14 @@ static void gen_ld(DisasContext *ctx, uint32_t opc,
     switch (opc) {
 #if defined(TARGET_MIPS64)
     case OPC_LWU:
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL |
+                           ctx->default_tcg_memop_mask);
         gen_store_gpr(t0, rt);
         opn = "lwu";
         break;
     case OPC_LD:
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
         gen_store_gpr(t0, rt);
         opn = "ld";
         break;
@@ -2157,17 +2179,20 @@ static void gen_ld(DisasContext *ctx, uint32_t opc,
         opn = "lwpc";
         break;
     case OPC_LW:
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL |
+                           ctx->default_tcg_memop_mask);
         gen_store_gpr(t0, rt);
         opn = "lw";
         break;
     case OPC_LH:
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
+                           ctx->default_tcg_memop_mask);
         gen_store_gpr(t0, rt);
         opn = "lh";
         break;
     case OPC_LHU:
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUW);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUW |
+                           ctx->default_tcg_memop_mask);
         gen_store_gpr(t0, rt);
         opn = "lhu";
         break;
@@ -2251,7 +2276,8 @@ static void gen_st (DisasContext *ctx, uint32_t opc, int rt,
     switch (opc) {
 #if defined(TARGET_MIPS64)
     case OPC_SD:
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
         opn = "sd";
         break;
     case OPC_SDL:
@@ -2266,11 +2292,13 @@ static void gen_st (DisasContext *ctx, uint32_t opc, int rt,
         break;
 #endif
     case OPC_SW:
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
+                           ctx->default_tcg_memop_mask);
         opn = "sw";
         break;
     case OPC_SH:
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW |
+                           ctx->default_tcg_memop_mask);
         opn = "sh";
         break;
     case OPC_SB:
@@ -2347,8 +2375,9 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft,
     case OPC_LWC1:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
-            tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL);
-            gen_store_fpr32(fp0, ft);
+            tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL |
+                                ctx->default_tcg_memop_mask);
+            gen_store_fpr32(ctx, fp0, ft);
             tcg_temp_free_i32(fp0);
         }
         opn = "lwc1";
@@ -2356,8 +2385,9 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft,
     case OPC_SWC1:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, ft);
-            tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL);
+            gen_load_fpr32(ctx, fp0, ft);
+            tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL |
+                                ctx->default_tcg_memop_mask);
             tcg_temp_free_i32(fp0);
         }
         opn = "swc1";
@@ -2365,7 +2395,8 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft,
     case OPC_LDC1:
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
-            tcg_gen_qemu_ld_i64(fp0, t0, ctx->mem_idx, MO_TEQ);
+            tcg_gen_qemu_ld_i64(fp0, t0, ctx->mem_idx, MO_TEQ |
+                                ctx->default_tcg_memop_mask);
             gen_store_fpr64(ctx, fp0, ft);
             tcg_temp_free_i64(fp0);
         }
@@ -2375,7 +2406,8 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft,
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             gen_load_fpr64(ctx, fp0, ft);
-            tcg_gen_qemu_st_i64(fp0, t0, ctx->mem_idx, MO_TEQ);
+            tcg_gen_qemu_st_i64(fp0, t0, ctx->mem_idx, MO_TEQ |
+                                ctx->default_tcg_memop_mask);
             tcg_temp_free_i64(fp0);
         }
         opn = "sdc1";
@@ -4815,6 +4847,69 @@ static void gen_bshfl (DisasContext *ctx, uint32_t op2, int rt, int rd)
 
 #ifndef CONFIG_USER_ONLY
 /* CP0 (MMU and control) */
+static inline void gen_move_low32(TCGv ret, TCGv_i64 arg)
+{
+#if defined(TARGET_MIPS64)
+    tcg_gen_ext32s_tl(ret, arg);
+#else
+    tcg_gen_trunc_i64_tl(ret, arg);
+#endif
+}
+
+static inline void gen_mthc0_entrylo(TCGv arg, target_ulong off)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_ext_tl_i64(t0, arg);
+    tcg_gen_ld_i64(t1, cpu_env, off);
+#if defined(TARGET_MIPS64)
+    tcg_gen_deposit_i64(t1, t1, t0, 30, 32);
+#else
+    tcg_gen_concat32_i64(t1, t1, t0);
+#endif
+    tcg_gen_st_i64(t1, cpu_env, off);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t0);
+}
+
+static inline void gen_mthc0_store64(TCGv arg, target_ulong off)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_ext_tl_i64(t0, arg);
+    tcg_gen_ld_i64(t1, cpu_env, off);
+    tcg_gen_concat32_i64(t1, t1, t0);
+    tcg_gen_st_i64(t1, cpu_env, off);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t0);
+}
+
+static inline void gen_mfhc0_entrylo(TCGv arg, target_ulong off)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+
+    tcg_gen_ld_i64(t0, cpu_env, off);
+#if defined(TARGET_MIPS64)
+    tcg_gen_shri_i64(t0, t0, 30);
+#else
+    tcg_gen_shri_i64(t0, t0, 32);
+#endif
+    gen_move_low32(arg, t0);
+    tcg_temp_free_i64(t0);
+}
+
+static inline void gen_mfhc0_load64(TCGv arg, target_ulong off, int shift)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+
+    tcg_gen_ld_i64(t0, cpu_env, off);
+    tcg_gen_shri_i64(t0, t0, 32 + shift);
+    gen_move_low32(arg, t0);
+    tcg_temp_free_i64(t0);
+}
+
 static inline void gen_mfc0_load32 (TCGv arg, target_ulong off)
 {
     TCGv_i32 t0 = tcg_temp_new_i32();
@@ -4845,6 +4940,140 @@ static inline void gen_mtc0_store64 (TCGv arg, target_ulong off)
     tcg_gen_st_tl(arg, cpu_env, off);
 }
 
+static void gen_mfhc0(DisasContext *ctx, TCGv arg, int reg, int sel)
+{
+    const char *rn = "invalid";
+
+    if (!(ctx->hflags & MIPS_HFLAG_ELPA)) {
+        goto mfhc0_read_zero;
+    }
+
+    switch (reg) {
+    case 2:
+        switch (sel) {
+        case 0:
+            gen_mfhc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo0));
+            rn = "EntryLo0";
+            break;
+        default:
+            goto mfhc0_read_zero;
+        }
+        break;
+    case 3:
+        switch (sel) {
+        case 0:
+            gen_mfhc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo1));
+            rn = "EntryLo1";
+            break;
+        default:
+            goto mfhc0_read_zero;
+        }
+        break;
+    case 17:
+        switch (sel) {
+        case 0:
+            gen_mfhc0_load64(arg, offsetof(CPUMIPSState, lladdr),
+                             ctx->CP0_LLAddr_shift);
+            rn = "LLAddr";
+            break;
+        default:
+            goto mfhc0_read_zero;
+        }
+        break;
+    case 28:
+        switch (sel) {
+        case 0:
+        case 2:
+        case 4:
+        case 6:
+            gen_mfhc0_load64(arg, offsetof(CPUMIPSState, CP0_TagLo), 0);
+            rn = "TagLo";
+            break;
+        default:
+            goto mfhc0_read_zero;
+        }
+        break;
+    default:
+        goto mfhc0_read_zero;
+    }
+
+    (void)rn; /* avoid a compiler warning */
+    LOG_DISAS("mfhc0 %s (reg %d sel %d)\n", rn, reg, sel);
+    return;
+
+mfhc0_read_zero:
+    LOG_DISAS("mfhc0 %s (reg %d sel %d)\n", rn, reg, sel);
+    tcg_gen_movi_tl(arg, 0);
+}
+
+static void gen_mthc0(DisasContext *ctx, TCGv arg, int reg, int sel)
+{
+    const char *rn = "invalid";
+    uint64_t mask = ctx->PAMask >> 36;
+
+    if (!(ctx->hflags & MIPS_HFLAG_ELPA)) {
+        goto mthc0_nop;
+    }
+
+    switch (reg) {
+    case 2:
+        switch (sel) {
+        case 0:
+            tcg_gen_andi_tl(arg, arg, mask);
+            gen_mthc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo0));
+            rn = "EntryLo0";
+            break;
+        default:
+            goto mthc0_nop;
+        }
+        break;
+    case 3:
+        switch (sel) {
+        case 0:
+            tcg_gen_andi_tl(arg, arg, mask);
+            gen_mthc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo1));
+            rn = "EntryLo1";
+            break;
+        default:
+            goto mthc0_nop;
+        }
+        break;
+    case 17:
+        switch (sel) {
+        case 0:
+            /* LLAddr is read-only (the only exception is bit 0 if LLB is
+               supported); the CP0_LLAddr_rw_bitmask does not seem to be
+               relevant for modern MIPS cores supporting MTHC0, therefore
+               treating MTHC0 to LLAddr as NOP. */
+            rn = "LLAddr";
+            break;
+        default:
+            goto mthc0_nop;
+        }
+        break;
+    case 28:
+        switch (sel) {
+        case 0:
+        case 2:
+        case 4:
+        case 6:
+            tcg_gen_andi_tl(arg, arg, mask);
+            gen_mthc0_store64(arg, offsetof(CPUMIPSState, CP0_TagLo));
+            rn = "TagLo";
+            break;
+        default:
+            goto mthc0_nop;
+        }
+        break;
+    default:
+        goto mthc0_nop;
+    }
+
+    (void)rn; /* avoid a compiler warning */
+mthc0_nop:
+    LOG_DISAS("mthc0 %s (reg %d sel %d)\n", rn, reg, sel);
+}
+
 static inline void gen_mfc0_unimplemented(DisasContext *ctx, TCGv arg)
 {
     if (ctx->insn_flags & ISA_MIPS32R6) {
@@ -4943,17 +5172,20 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel)
     case 2:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EntryLo0));
+            {
+                TCGv_i64 tmp = tcg_temp_new_i64();
+                tcg_gen_ld_i64(tmp, cpu_env,
+                               offsetof(CPUMIPSState, CP0_EntryLo0));
 #if defined(TARGET_MIPS64)
-            if (ctx->rxi) {
-                TCGv tmp = tcg_temp_new();
-                tcg_gen_andi_tl(tmp, arg, (3ull << CP0EnLo_XI));
-                tcg_gen_shri_tl(tmp, tmp, 32);
-                tcg_gen_or_tl(arg, arg, tmp);
-                tcg_temp_free(tmp);
-            }
+                if (ctx->rxi) {
+                    /* Move RI/XI fields to bits 31:30 */
+                    tcg_gen_shri_tl(arg, tmp, CP0EnLo_XI);
+                    tcg_gen_deposit_tl(tmp, tmp, arg, 30, 2);
+                }
 #endif
-            tcg_gen_ext32s_tl(arg, arg);
+                gen_move_low32(arg, tmp);
+                tcg_temp_free_i64(tmp);
+            }
             rn = "EntryLo0";
             break;
         case 1:
@@ -4998,17 +5230,20 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel)
     case 3:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EntryLo1));
+            {
+                TCGv_i64 tmp = tcg_temp_new_i64();
+                tcg_gen_ld_i64(tmp, cpu_env,
+                               offsetof(CPUMIPSState, CP0_EntryLo1));
 #if defined(TARGET_MIPS64)
-            if (ctx->rxi) {
-                TCGv tmp = tcg_temp_new();
-                tcg_gen_andi_tl(tmp, arg, (3ull << CP0EnLo_XI));
-                tcg_gen_shri_tl(tmp, tmp, 32);
-                tcg_gen_or_tl(arg, arg, tmp);
-                tcg_temp_free(tmp);
-            }
+                if (ctx->rxi) {
+                    /* Move RI/XI fields to bits 31:30 */
+                    tcg_gen_shri_tl(arg, tmp, CP0EnLo_XI);
+                    tcg_gen_deposit_tl(tmp, tmp, arg, 30, 2);
+                }
 #endif
-            tcg_gen_ext32s_tl(arg, arg);
+                gen_move_low32(arg, tmp);
+                tcg_temp_free_i64(tmp);
+            }
             rn = "EntryLo1";
             break;
         default:
@@ -5418,7 +5653,12 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel)
         case 2:
         case 4:
         case 6:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_TagLo));
+            {
+                TCGv_i64 tmp = tcg_temp_new_i64();
+                tcg_gen_ld_i64(tmp, cpu_env, offsetof(CPUMIPSState, CP0_TagLo));
+                gen_move_low32(arg, tmp);
+                tcg_temp_free_i64(tmp);
+            }
             rn = "TagLo";
             break;
         case 1:
@@ -5661,6 +5901,7 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel)
             check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_pagegrain(cpu_env, arg);
             rn = "PageGrain";
+            ctx->bstate = BS_STOP;
             break;
         default:
             goto cp0_unimplemented;
@@ -7557,7 +7798,7 @@ static void gen_mftr(CPUMIPSState *env, DisasContext *ctx, int rt, int rd,
         if (h == 0) {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, rt);
+            gen_load_fpr32(ctx, fp0, rt);
             tcg_gen_ext_i32_tl(t0, fp0);
             tcg_temp_free_i32(fp0);
         } else {
@@ -7756,7 +7997,7 @@ static void gen_mttr(CPUMIPSState *env, DisasContext *ctx, int rd, int rt,
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32(fp0, rd);
+            gen_store_fpr32(ctx, fp0, rd);
             tcg_temp_free_i32(fp0);
         } else {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7841,6 +8082,25 @@ static void gen_cp0 (CPUMIPSState *env, DisasContext *ctx, uint32_t opc, int rt,
         opn = "dmtc0";
         break;
 #endif
+    case OPC_MFHC0:
+        check_mvh(ctx);
+        if (rt == 0) {
+            /* Treat as NOP. */
+            return;
+        }
+        gen_mfhc0(ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
+        opn = "mfhc0";
+        break;
+    case OPC_MTHC0:
+        check_mvh(ctx);
+        {
+            TCGv t0 = tcg_temp_new();
+            gen_load_gpr(t0, rt);
+            gen_mthc0(ctx, t0, rd, ctx->opcode & 0x7);
+            tcg_temp_free(t0);
+        }
+        opn = "mthc0";
+        break;
     case OPC_MFTR:
         check_insn(ctx, ASE_MT);
         if (rd == 0) {
@@ -7899,16 +8159,26 @@ static void gen_cp0 (CPUMIPSState *env, DisasContext *ctx, uint32_t opc, int rt,
             goto die;
         gen_helper_tlbr(cpu_env);
         break;
-    case OPC_ERET:
-        opn = "eret";
-        check_insn(ctx, ISA_MIPS2);
+    case OPC_ERET: /* OPC_ERETNC */
         if ((ctx->insn_flags & ISA_MIPS32R6) &&
             (ctx->hflags & MIPS_HFLAG_BMASK)) {
             MIPS_DEBUG("CTI in delay / forbidden slot");
             goto die;
+        } else {
+            int bit_shift = (ctx->hflags & MIPS_HFLAG_M16) ? 16 : 6;
+            if (ctx->opcode & (1 << bit_shift)) {
+                /* OPC_ERETNC */
+                opn = "eretnc";
+                check_insn(ctx, ISA_MIPS32R5);
+                gen_helper_eretnc(cpu_env);
+            } else {
+                /* OPC_ERET */
+                opn = "eret";
+                check_insn(ctx, ISA_MIPS2);
+                gen_helper_eret(cpu_env);
+            }
+            ctx->bstate = BS_EXCP;
         }
-        gen_helper_eret(cpu_env);
-        ctx->bstate = BS_EXCP;
         break;
     case OPC_DERET:
         opn = "deret";
@@ -8346,7 +8616,7 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             tcg_gen_ext_i32_tl(t0, fp0);
             tcg_temp_free_i32(fp0);
         }
@@ -8359,7 +8629,7 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32(fp0, fs);
+            gen_store_fpr32(ctx, fp0, fs);
             tcg_temp_free_i32(fp0);
         }
         opn = "mtc1";
@@ -8457,7 +8727,8 @@ static void gen_movci (DisasContext *ctx, int rd, int rs, int cc, int tf)
     gen_set_label(l1);
 }
 
-static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
+static inline void gen_movcf_s(DisasContext *ctx, int fs, int fd, int cc,
+                               int tf)
 {
     int cond;
     TCGv_i32 t0 = tcg_temp_new_i32();
@@ -8470,8 +8741,8 @@ static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
 
     tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
     tcg_gen_brcondi_i32(cond, t0, 0, l1);
-    gen_load_fpr32(t0, fs);
-    gen_store_fpr32(t0, fd);
+    gen_load_fpr32(ctx, t0, fs);
+    gen_store_fpr32(ctx, t0, fd);
     gen_set_label(l1);
     tcg_temp_free_i32(t0);
 }
@@ -8513,8 +8784,8 @@ static inline void gen_movcf_ps(DisasContext *ctx, int fs, int fd,
 
     tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
     tcg_gen_brcondi_i32(cond, t0, 0, l1);
-    gen_load_fpr32(t0, fs);
-    gen_store_fpr32(t0, fd);
+    gen_load_fpr32(ctx, t0, fs);
+    gen_store_fpr32(ctx, t0, fd);
     gen_set_label(l1);
 
     tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc+1));
@@ -8532,9 +8803,9 @@ static void gen_sel_s(DisasContext *ctx, enum fopcode op1, int fd, int ft,
     TCGv_i32 fp0 = tcg_temp_new_i32();
     TCGv_i32 fp1 = tcg_temp_new_i32();
     TCGv_i32 fp2 = tcg_temp_new_i32();
-    gen_load_fpr32(fp0, fd);
-    gen_load_fpr32(fp1, ft);
-    gen_load_fpr32(fp2, fs);
+    gen_load_fpr32(ctx, fp0, fd);
+    gen_load_fpr32(ctx, fp1, ft);
+    gen_load_fpr32(ctx, fp2, fs);
 
     switch (op1) {
     case OPC_SEL_S:
@@ -8555,7 +8826,7 @@ static void gen_sel_s(DisasContext *ctx, enum fopcode op1, int fd, int ft,
         break;
     }
 
-    gen_store_fpr32(fp0, fd);
+    gen_store_fpr32(ctx, fp0, fd);
     tcg_temp_free_i32(fp2);
     tcg_temp_free_i32(fp1);
     tcg_temp_free_i32(fp0);
@@ -8648,11 +8919,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_add_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "add.s";
@@ -8663,11 +8934,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_sub_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "sub.s";
@@ -8678,11 +8949,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_mul_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "mul.s";
@@ -8693,11 +8964,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_div_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "div.s";
@@ -8707,9 +8978,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_sqrt_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "sqrt.s";
@@ -8718,9 +8989,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_abs_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "abs.s";
@@ -8729,8 +9000,8 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_store_fpr32(fp0, fd);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "mov.s";
@@ -8739,9 +9010,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_chs_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "neg.s";
@@ -8752,7 +9023,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_roundl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -8766,7 +9037,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_truncl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -8780,7 +9051,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_ceill_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -8794,7 +9065,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_floorl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -8806,9 +9077,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_roundw_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "round.w.s";
@@ -8817,9 +9088,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_truncw_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "trunc.w.s";
@@ -8828,9 +9099,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_ceilw_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "ceil.w.s";
@@ -8839,9 +9110,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_floorw_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "floor.w.s";
@@ -8863,7 +9134,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         break;
     case OPC_MOVCF_S:
         check_insn_opc_removed(ctx, ISA_MIPS32R6);
-        gen_movcf_s(fs, fd, (ft >> 2) & 0x7, ft & 0x1);
+        gen_movcf_s(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
         opn = "movcf.s";
         break;
     case OPC_MOVZ_S:
@@ -8876,8 +9147,8 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
                 tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
             }
             fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
-            gen_store_fpr32(fp0, fd);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
             gen_set_label(l1);
         }
@@ -8892,8 +9163,8 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             if (ft != 0) {
                 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
                 fp0 = tcg_temp_new_i32();
-                gen_load_fpr32(fp0, fs);
-                gen_store_fpr32(fp0, fd);
+                gen_load_fpr32(ctx, fp0, fs);
+                gen_store_fpr32(ctx, fp0, fd);
                 tcg_temp_free_i32(fp0);
                 gen_set_label(l1);
             }
@@ -8905,9 +9176,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_recip_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "recip.s";
@@ -8917,9 +9188,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_rsqrt_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "rsqrt.s";
@@ -8930,11 +9201,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32(fp2, fd);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fd);
             gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1, fp2);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
             tcg_temp_free_i32(fp1);
             tcg_temp_free_i32(fp0);
@@ -8947,11 +9218,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32(fp2, fd);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fd);
             gen_helper_float_msubf_s(fp2, cpu_env, fp0, fp1, fp2);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
             tcg_temp_free_i32(fp1);
             tcg_temp_free_i32(fp0);
@@ -8962,9 +9233,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         check_insn(ctx, ISA_MIPS32R6);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_rint_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
             opn = "rint.s";
         }
@@ -8973,9 +9244,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         check_insn(ctx, ISA_MIPS32R6);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_class_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
             opn = "class.s";
         }
@@ -8986,10 +9257,10 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_min_s(fp2, cpu_env, fp0, fp1);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
             tcg_temp_free_i32(fp1);
             tcg_temp_free_i32(fp0);
@@ -9001,11 +9272,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
                 TCGv_i32 fp0 = tcg_temp_new_i32();
                 TCGv_i32 fp1 = tcg_temp_new_i32();
 
-                gen_load_fpr32(fp0, fs);
-                gen_load_fpr32(fp1, ft);
+                gen_load_fpr32(ctx, fp0, fs);
+                gen_load_fpr32(ctx, fp1, ft);
                 gen_helper_float_recip2_s(fp0, cpu_env, fp0, fp1);
                 tcg_temp_free_i32(fp1);
-                gen_store_fpr32(fp0, fd);
+                gen_store_fpr32(ctx, fp0, fd);
                 tcg_temp_free_i32(fp0);
             }
             opn = "recip2.s";
@@ -9017,10 +9288,10 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_mina_s(fp2, cpu_env, fp0, fp1);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
             tcg_temp_free_i32(fp1);
             tcg_temp_free_i32(fp0);
@@ -9031,9 +9302,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             {
                 TCGv_i32 fp0 = tcg_temp_new_i32();
 
-                gen_load_fpr32(fp0, fs);
+                gen_load_fpr32(ctx, fp0, fs);
                 gen_helper_float_recip1_s(fp0, cpu_env, fp0);
-                gen_store_fpr32(fp0, fd);
+                gen_store_fpr32(ctx, fp0, fd);
                 tcg_temp_free_i32(fp0);
             }
             opn = "recip1.s";
@@ -9044,10 +9315,10 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             /* OPC_MAX_S */
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_max_s(fp1, cpu_env, fp0, fp1);
-            gen_store_fpr32(fp1, fd);
+            gen_store_fpr32(ctx, fp1, fd);
             tcg_temp_free_i32(fp1);
             tcg_temp_free_i32(fp0);
             opn = "max.s";
@@ -9057,9 +9328,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             {
                 TCGv_i32 fp0 = tcg_temp_new_i32();
 
-                gen_load_fpr32(fp0, fs);
+                gen_load_fpr32(ctx, fp0, fs);
                 gen_helper_float_rsqrt1_s(fp0, cpu_env, fp0);
-                gen_store_fpr32(fp0, fd);
+                gen_store_fpr32(ctx, fp0, fd);
                 tcg_temp_free_i32(fp0);
             }
             opn = "rsqrt1.s";
@@ -9070,10 +9341,10 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             /* OPC_MAXA_S */
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_maxa_s(fp1, cpu_env, fp0, fp1);
-            gen_store_fpr32(fp1, fd);
+            gen_store_fpr32(ctx, fp1, fd);
             tcg_temp_free_i32(fp1);
             tcg_temp_free_i32(fp0);
             opn = "maxa.s";
@@ -9084,11 +9355,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
                 TCGv_i32 fp0 = tcg_temp_new_i32();
                 TCGv_i32 fp1 = tcg_temp_new_i32();
 
-                gen_load_fpr32(fp0, fs);
-                gen_load_fpr32(fp1, ft);
+                gen_load_fpr32(ctx, fp0, fs);
+                gen_load_fpr32(ctx, fp1, ft);
                 gen_helper_float_rsqrt2_s(fp0, cpu_env, fp0, fp1);
                 tcg_temp_free_i32(fp1);
-                gen_store_fpr32(fp0, fd);
+                gen_store_fpr32(ctx, fp0, fd);
                 tcg_temp_free_i32(fp0);
             }
             opn = "rsqrt2.s";
@@ -9100,7 +9371,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_cvtd_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -9112,9 +9383,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_cvtw_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "cvt.w.s";
@@ -9125,7 +9396,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_cvtl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -9141,8 +9412,8 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32_0 = tcg_temp_new_i32();
             TCGv_i32 fp32_1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp32_0, fs);
-            gen_load_fpr32(fp32_1, ft);
+            gen_load_fpr32(ctx, fp32_0, fs);
+            gen_load_fpr32(ctx, fp32_1, ft);
             tcg_gen_concat_i32_i64(fp64, fp32_1, fp32_0);
             tcg_temp_free_i32(fp32_1);
             tcg_temp_free_i32(fp32_0);
@@ -9344,7 +9615,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_roundw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "round.w.d";
@@ -9358,7 +9629,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_truncw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "trunc.w.d";
@@ -9372,7 +9643,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_ceilw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "ceil.w.d";
@@ -9386,7 +9657,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_floorw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "floor.w.d";
@@ -9669,7 +9940,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_cvts_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "cvt.s.d";
@@ -9683,7 +9954,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_cvtw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "cvt.w.d";
@@ -9704,9 +9975,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_cvts_w(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "cvt.s.w";
@@ -9717,7 +9988,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_cvtd_w(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -9734,7 +10005,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_cvts_l(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "cvt.s.l";
@@ -9973,7 +10244,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
 
             gen_load_fpr32h(ctx, fp0, fs);
             gen_helper_float_cvts_pu(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "cvt.s.pu";
@@ -9995,9 +10266,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_cvts_pl(fp0, cpu_env, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "cvt.s.pl";
@@ -10008,10 +10279,10 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_store_fpr32h(ctx, fp0, fd);
-            gen_store_fpr32(fp1, fd);
+            gen_store_fpr32(ctx, fp1, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
         }
@@ -10023,9 +10294,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32h(ctx, fp1, ft);
-            gen_store_fpr32(fp1, fd);
+            gen_store_fpr32(ctx, fp1, fd);
             gen_store_fpr32h(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
@@ -10039,8 +10310,8 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
             gen_load_fpr32h(ctx, fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_store_fpr32(fp1, fd);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_store_fpr32(ctx, fp1, fd);
             gen_store_fpr32h(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
@@ -10055,7 +10326,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
 
             gen_load_fpr32h(ctx, fp0, fs);
             gen_load_fpr32h(ctx, fp1, ft);
-            gen_store_fpr32(fp1, fd);
+            gen_store_fpr32(ctx, fp1, fd);
             gen_store_fpr32h(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
@@ -10130,7 +10401,7 @@ static void gen_flt3_ldst (DisasContext *ctx, uint32_t opc,
 
             tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL);
             tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "lwxc1";
@@ -10162,7 +10433,7 @@ static void gen_flt3_ldst (DisasContext *ctx, uint32_t opc,
         check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL);
             tcg_temp_free_i32(fp0);
         }
@@ -10219,23 +10490,23 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             tcg_gen_andi_tl(t0, t0, 0x7);
 
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
-            gen_load_fpr32(fp, fs);
+            gen_load_fpr32(ctx, fp, fs);
             gen_load_fpr32h(ctx, fph, fs);
-            gen_store_fpr32(fp, fd);
+            gen_store_fpr32(ctx, fp, fd);
             gen_store_fpr32h(ctx, fph, fd);
             tcg_gen_br(l2);
             gen_set_label(l1);
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 4, l2);
             tcg_temp_free(t0);
 #ifdef TARGET_WORDS_BIGENDIAN
-            gen_load_fpr32(fp, fs);
+            gen_load_fpr32(ctx, fp, fs);
             gen_load_fpr32h(ctx, fph, ft);
             gen_store_fpr32h(ctx, fp, fd);
-            gen_store_fpr32(fph, fd);
+            gen_store_fpr32(ctx, fph, fd);
 #else
             gen_load_fpr32h(ctx, fph, fs);
-            gen_load_fpr32(fp, ft);
-            gen_store_fpr32(fph, fd);
+            gen_load_fpr32(ctx, fp, ft);
+            gen_store_fpr32(ctx, fph, fd);
             gen_store_fpr32h(ctx, fp, fd);
 #endif
             gen_set_label(l2);
@@ -10251,13 +10522,13 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32(fp2, fr);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
             gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
         }
         opn = "madd.s";
@@ -10306,13 +10577,13 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32(fp2, fr);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
             gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
         }
         opn = "msub.s";
@@ -10361,13 +10632,13 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32(fp2, fr);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
             gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
         }
         opn = "nmadd.s";
@@ -10416,13 +10687,13 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32(fp2, fr);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
             gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
         }
         opn = "nmsub.s";
@@ -13502,7 +13773,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
                 case MOVF_FMT:
                     switch (fmt) {
                     case FMT_SDPS_S:
-                        gen_movcf_s(rs, rt, cc, 0);
+                        gen_movcf_s(ctx, rs, rt, cc, 0);
                         break;
                     case FMT_SDPS_D:
                         gen_movcf_d(ctx, rs, rt, cc, 0);
@@ -13517,7 +13788,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
                 case MOVT_FMT:
                     switch (fmt) {
                     case FMT_SDPS_S:
-                        gen_movcf_s(rs, rt, cc, 1);
+                        gen_movcf_s(ctx, rs, rt, cc, 1);
                         break;
                     case FMT_SDPS_D:
                         gen_movcf_d(ctx, rs, rt, cc, 1);
@@ -18404,32 +18675,39 @@ static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
             uint8_t wd = (ctx->opcode >> 6) & 0x1f;
             uint8_t df = (ctx->opcode >> 0) & 0x3;
 
-            TCGv_i32 tdf = tcg_const_i32(df);
             TCGv_i32 twd = tcg_const_i32(wd);
-            TCGv_i32 trs = tcg_const_i32(rs);
-            TCGv_i32 ts10 = tcg_const_i32(s10);
+            TCGv taddr = tcg_temp_new();
+            gen_base_offset_addr(ctx, taddr, rs, s10 << df);
 
             switch (MASK_MSA_MINOR(opcode)) {
             case OPC_LD_B:
+                gen_helper_msa_ld_b(cpu_env, twd, taddr);
+                break;
             case OPC_LD_H:
+                gen_helper_msa_ld_h(cpu_env, twd, taddr);
+                break;
             case OPC_LD_W:
+                gen_helper_msa_ld_w(cpu_env, twd, taddr);
+                break;
             case OPC_LD_D:
-                save_cpu_state(ctx, 1);
-                gen_helper_msa_ld_df(cpu_env, tdf, twd, trs, ts10);
+                gen_helper_msa_ld_d(cpu_env, twd, taddr);
                 break;
             case OPC_ST_B:
+                gen_helper_msa_st_b(cpu_env, twd, taddr);
+                break;
             case OPC_ST_H:
+                gen_helper_msa_st_h(cpu_env, twd, taddr);
+                break;
             case OPC_ST_W:
+                gen_helper_msa_st_w(cpu_env, twd, taddr);
+                break;
             case OPC_ST_D:
-                save_cpu_state(ctx, 1);
-                gen_helper_msa_st_df(cpu_env, tdf, twd, trs, ts10);
+                gen_helper_msa_st_d(cpu_env, twd, taddr);
                 break;
             }
 
             tcg_temp_free_i32(twd);
-            tcg_temp_free_i32(tdf);
-            tcg_temp_free_i32(trs);
-            tcg_temp_free_i32(ts10);
+            tcg_temp_free(taddr);
         }
         break;
     default:
@@ -18564,6 +18842,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
         case OPC_MTC0:
         case OPC_MFTR:
         case OPC_MTTR:
+        case OPC_MFHC0:
+        case OPC_MTHC0:
 #if defined(TARGET_MIPS64)
         case OPC_DMFC0:
         case OPC_DMTC0:
@@ -19134,6 +19414,9 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
     ctx.ie = (env->CP0_Config4 >> CP0C4_IE) & 3;
     ctx.bi = (env->CP0_Config3 >> CP0C3_BI) & 1;
     ctx.bp = (env->CP0_Config3 >> CP0C3_BP) & 1;
+    ctx.PAMask = env->PAMask;
+    ctx.mvh = (env->CP0_Config5 >> CP0C5_MVH) & 1;
+    ctx.CP0_LLAddr_shift = env->CP0_LLAddr_shift;
     /* Restore delay slot state from the tb context.  */
     ctx.hflags = (uint32_t)tb->flags; /* FIXME: maybe use 64 bits here? */
     ctx.ulri = (env->CP0_Config3 >> CP0C3_ULRI) & 1;
@@ -19143,6 +19426,8 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
 #else
         ctx.mem_idx = ctx.hflags & MIPS_HFLAG_KSU;
 #endif
+    ctx.default_tcg_memop_mask = (ctx.insn_flags & ISA_MIPS32R6) ?
+                                 MO_UNALN : MO_ALIGN;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0)
@@ -19385,7 +19670,8 @@ void mips_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
 
     cpu_fprintf(f, "CP0 Status  0x%08x Cause   0x%08x EPC    0x" TARGET_FMT_lx "\n",
                 env->CP0_Status, env->CP0_Cause, env->CP0_EPC);
-    cpu_fprintf(f, "    Config0 0x%08x Config1 0x%08x LLAddr 0x" TARGET_FMT_lx "\n",
+    cpu_fprintf(f, "    Config0 0x%08x Config1 0x%08x LLAddr 0x%016"
+                PRIx64 "\n",
                 env->CP0_Config0, env->CP0_Config1, env->lladdr);
     cpu_fprintf(f, "    Config2 0x%08x Config3 0x%08x\n",
                 env->CP0_Config2, env->CP0_Config3);
@@ -19519,7 +19805,6 @@ void cpu_state_reset(CPUMIPSState *env)
     }
 #endif
     env->PABITS = env->cpu_model->PABITS;
-    env->PAMask = (target_ulong)((1ULL << env->cpu_model->PABITS) - 1);
     env->CP0_SRSConf0_rw_bitmask = env->cpu_model->CP0_SRSConf0_rw_bitmask;
     env->CP0_SRSConf0 = env->cpu_model->CP0_SRSConf0;
     env->CP0_SRSConf1_rw_bitmask = env->cpu_model->CP0_SRSConf1_rw_bitmask;
@@ -19640,6 +19925,7 @@ void cpu_state_reset(CPUMIPSState *env)
     compute_hflags(env);
     restore_rounding_mode(env);
     restore_flush_mode(env);
+    restore_pamask(env);
     cs->exception_index = EXCP_NONE;
 }
 
diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index 85a65e74b4..30605dab06 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c
@@ -400,10 +400,12 @@ static const mips_def_t mips_defs[] =
                        (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
                        (1 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
-        .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | (1 << CP0C3_MSAP),
+        .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | (1 << CP0C3_MSAP) |
+                       (1 << CP0C3_LPA),
         .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M),
         .CP0_Config4_rw_bitmask = 0,
-        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_UFR),
+        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_UFR) | (1 << CP0C5_LLB) |
+                       (1 << CP0C5_MVH),
         .CP0_Config5_rw_bitmask = (0 << CP0C5_M) | (1 << CP0C5_K) |
                                   (1 << CP0C5_CV) | (0 << CP0C5_EVA) |
                                   (1 << CP0C5_MSAEn) | (1 << CP0C5_UFR) |
@@ -413,11 +415,12 @@ static const mips_def_t mips_defs[] =
         .SYNCI_Step = 32,
         .CCRes = 2,
         .CP0_Status_rw_bitmask = 0x3778FF1F,
+        .CP0_PageGrain_rw_bitmask = (1 << CP0PG_ELPA),
         .CP1_fcr0 = (1 << FCR0_UFRP) | (1 << FCR0_F64) | (1 << FCR0_L) |
                     (1 << FCR0_W) | (1 << FCR0_D) | (1 << FCR0_S) |
                     (0x93 << FCR0_PRID),
         .SEGBITS = 32,
-        .PABITS = 32,
+        .PABITS = 40,
         .insn_flags = CPU_MIPS32R5 | ASE_MIPS16 | ASE_MSA,
         .mmu_type = MMU_TYPE_R4000,
     },
@@ -553,9 +556,6 @@ static const mips_def_t mips_defs[] =
                     (1 << FCR0_L) | (1 << FCR0_W) | (1 << FCR0_D) |
                     (1 << FCR0_S) | (0x00 << FCR0_PRID) | (0x0 << FCR0_REV),
         .SEGBITS = 42,
-        /* The architectural limit is 59, but we have hardcoded 36 bit
-           in some places...
-        .PABITS = 59, */ /* the architectural limit */
         .PABITS = 36,
         .insn_flags = CPU_MIPS64R2 | ASE_MIPS3D,
         .mmu_type = MMU_TYPE_R4000,
@@ -607,7 +607,7 @@ static const mips_def_t mips_defs[] =
     },
     {
         /* A generic CPU supporting MIPS64 Release 6 ISA.
-           FIXME: Support IEEE 754-2008 FP and misaligned memory accesses.
+           FIXME: Support IEEE 754-2008 FP.
                   Eventually this should be replaced by a real CPU model. */
         .name = "MIPS64R6-generic",
         .CP0_PRid = 0x00010000,
@@ -619,10 +619,13 @@ static const mips_def_t mips_defs[] =
                        (0 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP),
         .CP0_Config2 = MIPS_CONFIG2,
         .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_RXI) | (1 << CP0C3_BP) |
-                       (1 << CP0C3_BI) | (1 << CP0C3_ULRI) | (1U << CP0C3_M),
+                       (1 << CP0C3_BI) | (1 << CP0C3_ULRI) | (1 << CP0C3_LPA) |
+                       (1U << CP0C3_M),
         .CP0_Config4 = MIPS_CONFIG4 | (0xfc << CP0C4_KScrExist) |
                        (3 << CP0C4_IE) | (1 << CP0C4_M),
-        .CP0_Config5_rw_bitmask = (1 << CP0C5_SBRI),
+        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_LLB),
+        .CP0_Config5_rw_bitmask = (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) |
+                                  (1 << CP0C5_UFE),
         .CP0_LLAddr_rw_bitmask = 0,
         .CP0_LLAddr_shift = 0,
         .SYNCI_Step = 32,
@@ -630,15 +633,12 @@ static const mips_def_t mips_defs[] =
         .CP0_Status_rw_bitmask = 0x30D8FFFF,
         .CP0_PageGrain = (1 << CP0PG_IEC) | (1 << CP0PG_XIE) |
                          (1U << CP0PG_RIE),
-        .CP0_PageGrain_rw_bitmask = 0,
-        .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_L) | (1 << FCR0_W) |
-                    (1 << FCR0_D) | (1 << FCR0_S) | (0x00 << FCR0_PRID) |
-                    (0x0 << FCR0_REV),
+        .CP0_PageGrain_rw_bitmask = (1 << CP0PG_ELPA),
+        .CP1_fcr0 = (1 << FCR0_FREP) | (1 << FCR0_F64) | (1 << FCR0_L) |
+                    (1 << FCR0_W) | (1 << FCR0_D) | (1 << FCR0_S) |
+                    (0x00 << FCR0_PRID) | (0x0 << FCR0_REV),
         .SEGBITS = 42,
-        /* The architectural limit is 59, but we have hardcoded 36 bit
-           in some places...
-        .PABITS = 59, */ /* the architectural limit */
-        .PABITS = 36,
+        .PABITS = 48,
         .insn_flags = CPU_MIPS64R6,
         .mmu_type = MMU_TYPE_R4000,
     },
@@ -701,9 +701,6 @@ static const mips_def_t mips_defs[] =
                     (1 << FCR0_L) | (1 << FCR0_W) | (1 << FCR0_D) |
                     (1 << FCR0_S) | (0x00 << FCR0_PRID) | (0x0 << FCR0_REV),
         .SEGBITS = 42,
-        /* The architectural limit is 59, but we have hardcoded 36 bit
-           in some places...
-        .PABITS = 59, */ /* the architectural limit */
         .PABITS = 36,
         .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSPR2,
         .mmu_type = MMU_TYPE_R4000,
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index d875211a2d..f4ac7611dd 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -213,6 +213,7 @@ static const VMStateDescription vmstate_fpu = {
     .name = "cpu/fpu",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fpu_needed,
     .fields = (VMStateField[]) {
         VMSTATE_FLOAT64_ARRAY(env.fpr, PowerPCCPU, 32),
         VMSTATE_UINTTL(env.fpscr, PowerPCCPU),
@@ -231,6 +232,7 @@ static const VMStateDescription vmstate_altivec = {
     .name = "cpu/altivec",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = altivec_needed,
     .fields = (VMStateField[]) {
         VMSTATE_AVR_ARRAY(env.avr, PowerPCCPU, 32),
         VMSTATE_UINT32(env.vscr, PowerPCCPU),
@@ -249,6 +251,7 @@ static const VMStateDescription vmstate_vsx = {
     .name = "cpu/vsx",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = vsx_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64_ARRAY(env.vsr, PowerPCCPU, 32),
         VMSTATE_END_OF_LIST()
@@ -269,6 +272,7 @@ static const VMStateDescription vmstate_tm = {
     .version_id = 1,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
+    .needed = tm_needed,
     .fields      = (VMStateField []) {
         VMSTATE_UINTTL_ARRAY(env.tm_gpr, PowerPCCPU, 32),
         VMSTATE_AVR_ARRAY(env.tm_vsr, PowerPCCPU, 64),
@@ -302,6 +306,7 @@ static const VMStateDescription vmstate_sr = {
     .name = "cpu/sr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = sr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINTTL_ARRAY(env.sr, PowerPCCPU, 32),
         VMSTATE_END_OF_LIST()
@@ -351,6 +356,7 @@ static const VMStateDescription vmstate_slb = {
     .name = "cpu/slb",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = slb_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32_EQUAL(env.slb_nr, PowerPCCPU),
         VMSTATE_SLB_ARRAY(env.slb, PowerPCCPU, MAX_SLB_ENTRIES),
@@ -383,6 +389,7 @@ static const VMStateDescription vmstate_tlb6xx = {
     .name = "cpu/tlb6xx",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = tlb6xx_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU),
         VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlb6, PowerPCCPU,
@@ -429,6 +436,7 @@ static const VMStateDescription vmstate_pbr403 = {
     .name = "cpu/pbr403",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = pbr403_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINTTL_ARRAY(env.pb, PowerPCCPU, 4),
         VMSTATE_END_OF_LIST()
@@ -439,6 +447,7 @@ static const VMStateDescription vmstate_tlbemb = {
     .name = "cpu/tlb6xx",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = tlbemb_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU),
         VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlbe, PowerPCCPU,
@@ -448,13 +457,9 @@ static const VMStateDescription vmstate_tlbemb = {
         /* 403 protection registers */
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_pbr403,
-            .needed = pbr403_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_pbr403,
+        NULL
     }
 };
 
@@ -483,6 +488,7 @@ static const VMStateDescription vmstate_tlbmas = {
     .name = "cpu/tlbmas",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = tlbmas_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU),
         VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlbm, PowerPCCPU,
@@ -533,38 +539,18 @@ const VMStateDescription vmstate_ppc_cpu = {
         VMSTATE_UINT32_EQUAL(env.nb_BATs, PowerPCCPU),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_fpu,
-            .needed = fpu_needed,
-        } , {
-            .vmsd = &vmstate_altivec,
-            .needed = altivec_needed,
-        } , {
-            .vmsd = &vmstate_vsx,
-            .needed = vsx_needed,
-        } , {
-            .vmsd = &vmstate_sr,
-            .needed = sr_needed,
-        } , {
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_fpu,
+        &vmstate_altivec,
+        &vmstate_vsx,
+        &vmstate_sr,
 #ifdef TARGET_PPC64
-            .vmsd = &vmstate_tm,
-            .needed = tm_needed,
-        } , {
-            .vmsd = &vmstate_slb,
-            .needed = slb_needed,
-        } , {
+        &vmstate_tm,
+        &vmstate_slb,
 #endif /* TARGET_PPC64 */
-            .vmsd = &vmstate_tlb6xx,
-            .needed = tlb6xx_needed,
-        } , {
-            .vmsd = &vmstate_tlbemb,
-            .needed = tlbemb_needed,
-        } , {
-            .vmsd = &vmstate_tlbmas,
-            .needed = tlbmas_needed,
-        } , {
-            /* empty */
-        }
+        &vmstate_tlb6xx,
+        &vmstate_tlbemb,
+        &vmstate_tlbmas,
+        NULL
     }
 };
diff --git a/target-s390x/machine.c b/target-s390x/machine.c
index 004474959a..b76fb08319 100644
--- a/target-s390x/machine.c
+++ b/target-s390x/machine.c
@@ -42,10 +42,17 @@ static void cpu_pre_save(void *opaque)
     }
 }
 
+static inline bool fpu_needed(void *opaque)
+{
+    /* This looks odd, but we might want to NOT transfer fprs in the future */
+    return true;
+}
+
 const VMStateDescription vmstate_fpu = {
     .name = "cpu/fpu",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fpu_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.vregs[0][0].ll, S390CPU),
         VMSTATE_UINT64(env.vregs[1][0].ll, S390CPU),
@@ -68,16 +75,11 @@ const VMStateDescription vmstate_fpu = {
     }
 };
 
-static inline bool fpu_needed(void *opaque)
-{
-    /* This looks odd, but we might want to NOT transfer fprs in the future */
-    return true;
-}
-
 const VMStateDescription vmstate_vregs = {
     .name = "cpu/vregs",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = vregs_needed,
     .fields = (VMStateField[]) {
         /* vregs[0][0] -> vregs[15][0] and fregs are overlays */
         VMSTATE_UINT64(env.vregs[16][0].ll, S390CPU),
@@ -159,16 +161,10 @@ const VMStateDescription vmstate_s390_cpu = {
         VMSTATE_VBUFFER_UINT32(irqstate, S390CPU, 4, NULL, 0,
                                irqstate_saved_size),
         VMSTATE_END_OF_LIST()
-     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_fpu,
-            .needed = fpu_needed,
-        } , {
-            .vmsd = &vmstate_vregs,
-            .needed = vregs_needed,
-        } , {
-            /* empty */
-        }
+    },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_fpu,
+        &vmstate_vregs,
+        NULL
     },
 };
diff --git a/target-sh4/cpu.c b/target-sh4/cpu.c
index d187a2bdba..cccb14fe7b 100644
--- a/target-sh4/cpu.c
+++ b/target-sh4/cpu.c
@@ -61,7 +61,8 @@ static void superh_cpu_reset(CPUState *s)
     env->fpscr = FPSCR_PR; /* value for userspace according to the kernel */
     set_float_rounding_mode(float_round_nearest_even, &env->fp_status); /* ?! */
 #else
-    env->sr = SR_MD | SR_RB | SR_BL | SR_I3 | SR_I2 | SR_I1 | SR_I0;
+    env->sr = (1u << SR_MD) | (1u << SR_RB) | (1u << SR_BL) |
+              (1u << SR_I3) | (1u << SR_I2) | (1u << SR_I1) | (1u << SR_I0);
     env->fpscr = FPSCR_DN | FPSCR_RM_ZERO; /* CPU reset value according to SH4 manual */
     set_float_rounding_mode(float_round_to_zero, &env->fp_status);
     set_flush_to_zero(1, &env->fp_status);
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index c8dea6c020..4a027a6c1c 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -47,18 +47,18 @@
 #define TARGET_PHYS_ADDR_SPACE_BITS 32
 #define TARGET_VIRT_ADDR_SPACE_BITS 32
 
-#define SR_MD (1 << 30)
-#define SR_RB (1 << 29)
-#define SR_BL (1 << 28)
-#define SR_FD (1 << 15)
-#define SR_M  (1 << 9)
-#define SR_Q  (1 << 8)
-#define SR_I3 (1 << 7)
-#define SR_I2 (1 << 6)
-#define SR_I1 (1 << 5)
-#define SR_I0 (1 << 4)
-#define SR_S  (1 << 1)
-#define SR_T  (1 << 0)
+#define SR_MD 30
+#define SR_RB 29
+#define SR_BL 28
+#define SR_FD 15
+#define SR_M  9
+#define SR_Q  8
+#define SR_I3 7
+#define SR_I2 6
+#define SR_I1 5
+#define SR_I0 4
+#define SR_S  1
+#define SR_T  0
 
 #define FPSCR_MASK             (0x003fffff)
 #define FPSCR_FR               (1 << 21)
@@ -138,7 +138,10 @@ typedef struct CPUSH4State {
     uint32_t flags;		/* general execution flags */
     uint32_t gregs[24];		/* general registers */
     float32 fregs[32];		/* floating point registers */
-    uint32_t sr;		/* status register */
+    uint32_t sr;                /* status register (with T split out) */
+    uint32_t sr_m;              /* M bit of status register */
+    uint32_t sr_q;              /* Q bit of status register */
+    uint32_t sr_t;              /* T bit of status register */
     uint32_t ssr;		/* saved status register */
     uint32_t spc;		/* saved program counter */
     uint32_t gbr;		/* global base register */
@@ -234,7 +237,7 @@ void cpu_load_tlb(CPUSH4State * env);
 #define MMU_USER_IDX 1
 static inline int cpu_mmu_index (CPUSH4State *env)
 {
-    return (env->sr & SR_MD) == 0 ? 1 : 0;
+    return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
 }
 
 #include "exec/cpu-all.h"
@@ -331,6 +334,21 @@ static inline int cpu_ptel_pr (uint32_t ptel)
 
 #define TB_FLAG_PENDING_MOVCA  (1 << 4)
 
+static inline target_ulong cpu_read_sr(CPUSH4State *env)
+{
+    return env->sr | (env->sr_m << SR_M) |
+                     (env->sr_q << SR_Q) |
+                     (env->sr_t << SR_T);
+}
+
+static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr)
+{
+    env->sr_m = (sr >> SR_M) & 1;
+    env->sr_q = (sr >> SR_Q) & 1;
+    env->sr_t = (sr >> SR_T) & 1;
+    env->sr = sr & ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T));
+}
+
 static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
@@ -339,8 +357,8 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
     *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL
                     | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME))   /* Bits  0- 3 */
             | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
-            | (env->sr & (SR_MD | SR_RB))                      /* Bits 29-30 */
-            | (env->sr & SR_FD)                                /* Bit 15 */
+            | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
+            | (env->sr & (1u << SR_FD))                        /* Bit 15 */
             | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */
 }
 
diff --git a/target-sh4/gdbstub.c b/target-sh4/gdbstub.c
index df4fa2af76..a365a27aad 100644
--- a/target-sh4/gdbstub.c
+++ b/target-sh4/gdbstub.c
@@ -31,7 +31,7 @@ int superh_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
 
     switch (n) {
     case 0 ... 7:
-        if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) {
+        if ((env->sr & (1u << SR_MD)) && (env->sr & (1u << SR_RB))) {
             return gdb_get_regl(mem_buf, env->gregs[n + 16]);
         } else {
             return gdb_get_regl(mem_buf, env->gregs[n]);
@@ -51,7 +51,7 @@ int superh_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
     case 21:
         return gdb_get_regl(mem_buf, env->macl);
     case 22:
-        return gdb_get_regl(mem_buf, env->sr);
+        return gdb_get_regl(mem_buf, cpu_read_sr(env));
     case 23:
         return gdb_get_regl(mem_buf, env->fpul);
     case 24:
@@ -83,7 +83,7 @@ int superh_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 
     switch (n) {
     case 0 ... 7:
-        if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) {
+        if ((env->sr & (1u << SR_MD)) && (env->sr & (1u << SR_RB))) {
             env->gregs[n + 16] = ldl_p(mem_buf);
         } else {
             env->gregs[n] = ldl_p(mem_buf);
@@ -111,7 +111,7 @@ int superh_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
         env->macl = ldl_p(mem_buf);
         break;
     case 22:
-        env->sr = ldl_p(mem_buf);
+        cpu_write_sr(env, ldl_p(mem_buf));
         break;
     case 23:
         env->fpul = ldl_p(mem_buf);
diff --git a/target-sh4/helper.c b/target-sh4/helper.c
index 58113601ec..a533f08ea3 100644
--- a/target-sh4/helper.c
+++ b/target-sh4/helper.c
@@ -93,7 +93,7 @@ void superh_cpu_do_interrupt(CPUState *cs)
     do_exp = cs->exception_index != -1;
     do_irq = do_irq && (cs->exception_index == -1);
 
-    if (env->sr & SR_BL) {
+    if (env->sr & (1u << SR_BL)) {
         if (do_exp && cs->exception_index != 0x1e0) {
             cs->exception_index = 0x000; /* masked exception -> reset */
         }
@@ -162,10 +162,10 @@ void superh_cpu_do_interrupt(CPUState *cs)
         log_cpu_state(cs, 0);
     }
 
-    env->ssr = env->sr;
+    env->ssr = cpu_read_sr(env);
     env->spc = env->pc;
     env->sgr = env->gregs[15];
-    env->sr |= SR_BL | SR_MD | SR_RB;
+    env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
 
     if (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
         /* Branch instruction should be executed again before delay slot. */
@@ -182,7 +182,7 @@ void superh_cpu_do_interrupt(CPUState *cs)
         case 0x000:
         case 0x020:
         case 0x140:
-            env->sr &= ~SR_FD;
+            env->sr &= ~(1u << SR_FD);
             env->sr |= 0xf << 4; /* IMASK */
             env->pc = 0xa0000000;
             break;
@@ -355,23 +355,24 @@ static int get_mmu_address(CPUSH4State * env, target_ulong * physical,
     int use_asid, n;
     tlb_t *matching = NULL;
 
-    use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0;
+    use_asid = !(env->mmucr & MMUCR_SV) || !(env->sr & (1u << SR_MD));
 
     if (rw == 2) {
         n = find_itlb_entry(env, address, use_asid);
 	if (n >= 0) {
 	    matching = &env->itlb[n];
-	    if (!(env->sr & SR_MD) && !(matching->pr & 2))
+            if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) {
 		n = MMU_ITLB_VIOLATION;
-	    else
+            } else {
 		*prot = PAGE_EXEC;
+            }
         } else {
             n = find_utlb_entry(env, address, use_asid);
             if (n >= 0) {
                 n = copy_utlb_entry_itlb(env, n);
                 matching = &env->itlb[n];
-                if (!(env->sr & SR_MD) && !(matching->pr & 2)) {
-                      n = MMU_ITLB_VIOLATION;
+                if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) {
+                    n = MMU_ITLB_VIOLATION;
                 } else {
                     *prot = PAGE_READ | PAGE_EXEC;
                     if ((matching->pr & 1) && matching->d) {
@@ -388,7 +389,7 @@ static int get_mmu_address(CPUSH4State * env, target_ulong * physical,
 	n = find_utlb_entry(env, address, use_asid);
 	if (n >= 0) {
 	    matching = &env->utlb[n];
-            if (!(env->sr & SR_MD) && !(matching->pr & 2)) {
+            if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) {
                 n = (rw == 1) ? MMU_DTLB_VIOLATION_WRITE :
                     MMU_DTLB_VIOLATION_READ;
             } else if ((rw == 1) && !(matching->pr & 1)) {
@@ -421,7 +422,7 @@ static int get_physical_address(CPUSH4State * env, target_ulong * physical,
     /* P1, P2 and P4 areas do not use translation */
     if ((address >= 0x80000000 && address < 0xc0000000) ||
 	address >= 0xe0000000) {
-	if (!(env->sr & SR_MD)
+        if (!(env->sr & (1u << SR_MD))
 	    && (address < 0xe0000000 || address >= 0xe4000000)) {
 	    /* Unauthorized access in user mode (only store queues are available) */
 	    fprintf(stderr, "Unauthorized access\n");
@@ -690,7 +691,7 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, hwaddr addr,
     uint8_t d = (uint8_t)((mem_value & 0x00000200) >> 9);
     uint8_t v = (uint8_t)((mem_value & 0x00000100) >> 8);
     uint8_t asid = (uint8_t)(mem_value & 0x000000ff);
-    int use_asid = (s->mmucr & MMUCR_SV) == 0 || (s->sr & SR_MD) == 0;
+    int use_asid = !(s->mmucr & MMUCR_SV) || !(s->sr & (1u << SR_MD));
 
     if (associate) {
         int i;
@@ -821,10 +822,10 @@ void cpu_sh4_write_mmaped_utlb_data(CPUSH4State *s, hwaddr addr,
 int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr)
 {
     int n;
-    int use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0;
+    int use_asid = !(env->mmucr & MMUCR_SV) || !(env->sr & (1u << SR_MD));
 
     /* check area */
-    if (env->sr & SR_MD) {
+    if (env->sr & (1u << SR_MD)) {
         /* For previledged mode, P2 and P4 area is not cachable. */
         if ((0xA0000000 <= addr && addr < 0xC0000000) || 0xE0000000 <= addr)
             return 0;
diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index 3b5c436ab4..c9bc407042 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -11,7 +11,6 @@ DEF_HELPER_3(movcal, void, env, i32, i32)
 DEF_HELPER_1(discard_movcal_backup, void, env)
 DEF_HELPER_2(ocbi, void, env, i32)
 
-DEF_HELPER_3(div1, i32, env, i32, i32)
 DEF_HELPER_3(macl, void, env, i32, i32)
 DEF_HELPER_3(macw, void, env, i32, i32)
 
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 74a5c4ea77..cbc11aeccd 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -156,124 +156,6 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
     }
 }
 
-#define T (env->sr & SR_T)
-#define Q (env->sr & SR_Q ? 1 : 0)
-#define M (env->sr & SR_M ? 1 : 0)
-#define SETT env->sr |= SR_T
-#define CLRT env->sr &= ~SR_T
-#define SETQ env->sr |= SR_Q
-#define CLRQ env->sr &= ~SR_Q
-#define SETM env->sr |= SR_M
-#define CLRM env->sr &= ~SR_M
-
-uint32_t helper_div1(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
-{
-    uint32_t tmp0, tmp2;
-    uint8_t old_q, tmp1 = 0xff;
-
-    //printf("div1 arg0=0x%08x arg1=0x%08x M=%d Q=%d T=%d\n", arg0, arg1, M, Q, T);
-    old_q = Q;
-    if ((0x80000000 & arg1) != 0)
-	SETQ;
-    else
-	CLRQ;
-    tmp2 = arg0;
-    arg1 <<= 1;
-    arg1 |= T;
-    switch (old_q) {
-    case 0:
-	switch (M) {
-	case 0:
-	    tmp0 = arg1;
-	    arg1 -= tmp2;
-	    tmp1 = arg1 > tmp0;
-	    switch (Q) {
-	    case 0:
-		if (tmp1)
-		    SETQ;
-		else
-		    CLRQ;
-		break;
-	    case 1:
-		if (tmp1 == 0)
-		    SETQ;
-		else
-		    CLRQ;
-		break;
-	    }
-	    break;
-	case 1:
-	    tmp0 = arg1;
-	    arg1 += tmp2;
-	    tmp1 = arg1 < tmp0;
-	    switch (Q) {
-	    case 0:
-		if (tmp1 == 0)
-		    SETQ;
-		else
-		    CLRQ;
-		break;
-	    case 1:
-		if (tmp1)
-		    SETQ;
-		else
-		    CLRQ;
-		break;
-	    }
-	    break;
-	}
-	break;
-    case 1:
-	switch (M) {
-	case 0:
-	    tmp0 = arg1;
-	    arg1 += tmp2;
-	    tmp1 = arg1 < tmp0;
-	    switch (Q) {
-	    case 0:
-		if (tmp1)
-		    SETQ;
-		else
-		    CLRQ;
-		break;
-	    case 1:
-		if (tmp1 == 0)
-		    SETQ;
-		else
-		    CLRQ;
-		break;
-	    }
-	    break;
-	case 1:
-	    tmp0 = arg1;
-	    arg1 -= tmp2;
-	    tmp1 = arg1 > tmp0;
-	    switch (Q) {
-	    case 0:
-		if (tmp1 == 0)
-		    SETQ;
-		else
-		    CLRQ;
-		break;
-	    case 1:
-		if (tmp1)
-		    SETQ;
-		else
-		    CLRQ;
-		break;
-	    }
-	    break;
-	}
-	break;
-    }
-    if (Q == M)
-	SETT;
-    else
-	CLRT;
-    //printf("Output: arg1=0x%08x M=%d Q=%d T=%d\n", arg1, M, Q, T);
-    return arg1;
-}
-
 void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     int64_t res;
@@ -282,7 +164,7 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
     res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1;
     env->mach = (res >> 32) & 0xffffffff;
     env->macl = res & 0xffffffff;
-    if (env->sr & SR_S) {
+    if (env->sr & (1u << SR_S)) {
 	if (res < 0)
 	    env->mach |= 0xffff0000;
 	else
@@ -298,7 +180,7 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
     res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1;
     env->mach = (res >> 32) & 0xffffffff;
     env->macl = res & 0xffffffff;
-    if (env->sr & SR_S) {
+    if (env->sr & (1u << SR_S)) {
 	if (res < -0x80000000) {
 	    env->mach = 1;
 	    env->macl = 0x80000000;
@@ -309,16 +191,6 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
     }
 }
 
-static inline void set_t(CPUSH4State *env)
-{
-    env->sr |= SR_T;
-}
-
-static inline void clr_t(CPUSH4State *env)
-{
-    env->sr &= ~SR_T;
-}
-
 void helper_ld_fpscr(CPUSH4State *env, uint32_t val)
 {
     env->fpscr = val & FPSCR_MASK;
@@ -403,10 +275,8 @@ void helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, float32 t1)
     relation = float32_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
         update_fpscr(env, GETPC());
-    } else if (relation == float_relation_equal) {
-        set_t(env);
     } else {
-        clr_t(env);
+        env->sr_t = (relation == float_relation_equal);
     }
 }
 
@@ -418,10 +288,8 @@ void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1)
     relation = float64_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
         update_fpscr(env, GETPC());
-    } else if (relation == float_relation_equal) {
-        set_t(env);
     } else {
-        clr_t(env);
+        env->sr_t = (relation == float_relation_equal);
     }
 }
 
@@ -433,10 +301,8 @@ void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1)
     relation = float32_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
         update_fpscr(env, GETPC());
-    } else if (relation == float_relation_greater) {
-        set_t(env);
     } else {
-        clr_t(env);
+        env->sr_t = (relation == float_relation_greater);
     }
 }
 
@@ -448,10 +314,8 @@ void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1)
     relation = float64_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
         update_fpscr(env, GETPC());
-    } else if (relation == float_relation_greater) {
-        set_t(env);
     } else {
-        clr_t(env);
+        env->sr_t = (relation == float_relation_greater);
     }
 }
 
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 41aa928321..28259f9e14 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -18,7 +18,6 @@
  */
 
 #define DEBUG_DISAS
-//#define SH4_SINGLE_STEP
 
 #include "cpu.h"
 #include "disas/disas.h"
@@ -47,7 +46,7 @@ typedef struct DisasContext {
 #if defined(CONFIG_USER_ONLY)
 #define IS_USER(ctx) 1
 #else
-#define IS_USER(ctx) (!(ctx->flags & SR_MD))
+#define IS_USER(ctx) (!(ctx->flags & (1u << SR_MD)))
 #endif
 
 enum {
@@ -62,7 +61,8 @@ enum {
 /* global register indexes */
 static TCGv_ptr cpu_env;
 static TCGv cpu_gregs[24];
-static TCGv cpu_pc, cpu_sr, cpu_ssr, cpu_spc, cpu_gbr;
+static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t;
+static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr;
 static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
 static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst;
 static TCGv cpu_fregs[32];
@@ -110,6 +110,12 @@ void sh4_translate_init(void)
                                     offsetof(CPUSH4State, pc), "PC");
     cpu_sr = tcg_global_mem_new_i32(TCG_AREG0,
                                     offsetof(CPUSH4State, sr), "SR");
+    cpu_sr_m = tcg_global_mem_new_i32(TCG_AREG0,
+                                    offsetof(CPUSH4State, sr_m), "SR_M");
+    cpu_sr_q = tcg_global_mem_new_i32(TCG_AREG0,
+                                    offsetof(CPUSH4State, sr_q), "SR_Q");
+    cpu_sr_t = tcg_global_mem_new_i32(TCG_AREG0,
+                                    offsetof(CPUSH4State, sr_t), "SR_T");
     cpu_ssr = tcg_global_mem_new_i32(TCG_AREG0,
                                      offsetof(CPUSH4State, ssr), "SSR");
     cpu_spc = tcg_global_mem_new_i32(TCG_AREG0,
@@ -156,7 +162,7 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f,
     CPUSH4State *env = &cpu->env;
     int i;
     cpu_fprintf(f, "pc=0x%08x sr=0x%08x pr=0x%08x fpscr=0x%08x\n",
-		env->pc, env->sr, env->pr, env->fpscr);
+                env->pc, cpu_read_sr(env), env->pr, env->fpscr);
     cpu_fprintf(f, "spc=0x%08x ssr=0x%08x gbr=0x%08x vbr=0x%08x\n",
 		env->spc, env->ssr, env->gbr, env->vbr);
     cpu_fprintf(f, "sgr=0x%08x dbr=0x%08x delayed_pc=0x%08x fpul=0x%08x\n",
@@ -175,6 +181,30 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f,
     }
 }
 
+static void gen_read_sr(TCGv dst)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_shli_i32(t0, cpu_sr_q, SR_Q);
+    tcg_gen_or_i32(dst, dst, t0);
+    tcg_gen_shli_i32(t0, cpu_sr_m, SR_M);
+    tcg_gen_or_i32(dst, dst, t0);
+    tcg_gen_shli_i32(t0, cpu_sr_t, SR_T);
+    tcg_gen_or_i32(dst, cpu_sr, t0);
+    tcg_temp_free_i32(t0);
+}
+
+static void gen_write_sr(TCGv src)
+{
+    tcg_gen_andi_i32(cpu_sr, src,
+                     ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T)));
+    tcg_gen_shri_i32(cpu_sr_q, src, SR_Q);
+    tcg_gen_andi_i32(cpu_sr_q, cpu_sr_q, 1);
+    tcg_gen_shri_i32(cpu_sr_m, src, SR_M);
+    tcg_gen_andi_i32(cpu_sr_m, cpu_sr_m, 1);
+    tcg_gen_shri_i32(cpu_sr_t, src, SR_T);
+    tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
+}
+
 static void gen_goto_tb(DisasContext * ctx, int n, target_ulong dest)
 {
     TranslationBlock *tb;
@@ -210,12 +240,9 @@ static void gen_jump(DisasContext * ctx)
 
 static inline void gen_branch_slot(uint32_t delayed_pc, int t)
 {
-    TCGv sr;
     TCGLabel *label = gen_new_label();
     tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc);
-    sr = tcg_temp_new();
-    tcg_gen_andi_i32(sr, cpu_sr, SR_T);
-    tcg_gen_brcondi_i32(t ? TCG_COND_EQ:TCG_COND_NE, sr, 0, label);
+    tcg_gen_brcondi_i32(t ? TCG_COND_EQ : TCG_COND_NE, cpu_sr_t, 0, label);
     tcg_gen_ori_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
     gen_set_label(label);
 }
@@ -224,13 +251,8 @@ static inline void gen_branch_slot(uint32_t delayed_pc, int t)
 static void gen_conditional_jump(DisasContext * ctx,
 				 target_ulong ift, target_ulong ifnott)
 {
-    TCGLabel *l1;
-    TCGv sr;
-
-    l1 = gen_new_label();
-    sr = tcg_temp_new();
-    tcg_gen_andi_i32(sr, cpu_sr, SR_T);
-    tcg_gen_brcondi_i32(TCG_COND_NE, sr, 0, l1);
+    TCGLabel *l1 = gen_new_label();
+    tcg_gen_brcondi_i32(TCG_COND_NE, cpu_sr_t, 0, l1);
     gen_goto_tb(ctx, 0, ifnott);
     gen_set_label(l1);
     gen_goto_tb(ctx, 1, ift);
@@ -252,54 +274,12 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
     gen_jump(ctx);
 }
 
-static inline void gen_cmp(int cond, TCGv t0, TCGv t1)
-{
-    TCGv t;
-
-    t = tcg_temp_new();
-    tcg_gen_setcond_i32(cond, t, t1, t0);
-    tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
-    tcg_gen_or_i32(cpu_sr, cpu_sr, t);
-
-    tcg_temp_free(t);
-}
-
-static inline void gen_cmp_imm(int cond, TCGv t0, int32_t imm)
-{
-    TCGv t;
-
-    t = tcg_temp_new();
-    tcg_gen_setcondi_i32(cond, t, t0, imm);
-    tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
-    tcg_gen_or_i32(cpu_sr, cpu_sr, t);
-
-    tcg_temp_free(t);
-}
-
 static inline void gen_store_flags(uint32_t flags)
 {
     tcg_gen_andi_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
     tcg_gen_ori_i32(cpu_flags, cpu_flags, flags);
 }
 
-static inline void gen_copy_bit_i32(TCGv t0, int p0, TCGv t1, int p1)
-{
-    TCGv tmp = tcg_temp_new();
-
-    p0 &= 0x1f;
-    p1 &= 0x1f;
-
-    tcg_gen_andi_i32(tmp, t1, (1 << p1));
-    tcg_gen_andi_i32(t0, t0, ~(1 << p0));
-    if (p0 < p1)
-        tcg_gen_shri_i32(tmp, tmp, p1 - p0);
-    else if (p0 > p1)
-        tcg_gen_shli_i32(tmp, tmp, p0 - p1);
-    tcg_gen_or_i32(t0, t0, tmp);
-
-    tcg_temp_free(tmp);
-}
-
 static inline void gen_load_fpr64(TCGv_i64 t, int reg)
 {
     tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]);
@@ -326,10 +306,12 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 #define B11_8 ((ctx->opcode >> 8) & 0xf)
 #define B15_12 ((ctx->opcode >> 12) & 0xf)
 
-#define REG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) == (SR_MD | SR_RB) \
+#define REG(x) ((x) < 8 && (ctx->flags & (1u << SR_MD))\
+                        && (ctx->flags & (1u << SR_RB))\
                 ? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
 
-#define ALTREG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) != (SR_MD | SR_RB)\
+#define ALTREG(x) ((x) < 8 && (!(ctx->flags & (1u << SR_MD))\
+                               || !(ctx->flags & (1u << SR_RB)))\
 		? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
 
 #define FREG(x) (ctx->flags & FPSCR_FR ? (x) ^ 0x10 : (x))
@@ -359,7 +341,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
   }
 
 #define CHECK_FPU_ENABLED                                       \
-  if (ctx->flags & SR_FD) {                                     \
+  if (ctx->flags & (1u << SR_FD)) {                             \
       tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
       if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
           gen_helper_raise_slot_fpu_disable(cpu_env);           \
@@ -409,7 +391,9 @@ static void _decode_opc(DisasContext * ctx)
 
     switch (ctx->opcode) {
     case 0x0019:		/* div0u */
-	tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(SR_M | SR_Q | SR_T));
+        tcg_gen_movi_i32(cpu_sr_m, 0);
+        tcg_gen_movi_i32(cpu_sr_q, 0);
+        tcg_gen_movi_i32(cpu_sr_t, 0);
 	return;
     case 0x000b:		/* rts */
 	CHECK_NOT_DELAY_SLOT
@@ -422,10 +406,10 @@ static void _decode_opc(DisasContext * ctx)
 	tcg_gen_movi_i32(cpu_macl, 0);
 	return;
     case 0x0048:		/* clrs */
-	tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_S);
+        tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_S));
 	return;
     case 0x0008:		/* clrt */
-        tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+        tcg_gen_movi_i32(cpu_sr_t, 0);
 	return;
     case 0x0038:		/* ldtlb */
 	CHECK_PRIVILEGED
@@ -434,16 +418,16 @@ static void _decode_opc(DisasContext * ctx)
     case 0x002b:		/* rte */
 	CHECK_PRIVILEGED
 	CHECK_NOT_DELAY_SLOT
-	tcg_gen_mov_i32(cpu_sr, cpu_ssr);
+        gen_write_sr(cpu_ssr);
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
 	ctx->flags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0058:		/* sets */
-	tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_S);
+        tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S));
 	return;
     case 0x0018:		/* sett */
-        tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_T);
+        tcg_gen_movi_i32(cpu_sr_t, 1);
 	return;
     case 0xfbfd:		/* frchg */
 	tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR);
@@ -659,22 +643,14 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x300e:		/* addc Rm,Rn */
         {
-            TCGv t0, t1, t2;
-            t0 = tcg_temp_new();
-            tcg_gen_andi_i32(t0, cpu_sr, SR_T);
+            TCGv t0, t1;
+            t0 = tcg_const_tl(0);
             t1 = tcg_temp_new();
-            tcg_gen_add_i32(t1, REG(B7_4), REG(B11_8));
-            tcg_gen_add_i32(t0, t0, t1);
-            t2 = tcg_temp_new();
-            tcg_gen_setcond_i32(TCG_COND_GTU, t2, REG(B11_8), t1);
-            tcg_gen_setcond_i32(TCG_COND_GTU, t1, t1, t0);
-            tcg_gen_or_i32(t1, t1, t2);
-            tcg_temp_free(t2);
-            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
-            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
-            tcg_temp_free(t1);
-            tcg_gen_mov_i32(REG(B11_8), t0);
+            tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0);
+            tcg_gen_add2_i32(REG(B11_8), cpu_sr_t,
+                             REG(B11_8), t0, t1, cpu_sr_t);
             tcg_temp_free(t0);
+            tcg_temp_free(t1);
         }
 	return;
     case 0x300f:		/* addv Rm,Rn */
@@ -686,11 +662,9 @@ static void _decode_opc(DisasContext * ctx)
             tcg_gen_xor_i32(t1, t0, REG(B11_8));
             t2 = tcg_temp_new();
             tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8));
-            tcg_gen_andc_i32(t1, t1, t2);
+            tcg_gen_andc_i32(cpu_sr_t, t1, t2);
             tcg_temp_free(t2);
-            tcg_gen_shri_i32(t1, t1, 31);
-            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
-            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31);
             tcg_temp_free(t1);
             tcg_gen_mov_i32(REG(B7_4), t0);
             tcg_temp_free(t0);
@@ -700,54 +674,79 @@ static void _decode_opc(DisasContext * ctx)
 	tcg_gen_and_i32(REG(B11_8), REG(B11_8), REG(B7_4));
 	return;
     case 0x3000:		/* cmp/eq Rm,Rn */
-	gen_cmp(TCG_COND_EQ, REG(B7_4), REG(B11_8));
+        tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), REG(B7_4));
 	return;
     case 0x3003:		/* cmp/ge Rm,Rn */
-	gen_cmp(TCG_COND_GE, REG(B7_4), REG(B11_8));
+        tcg_gen_setcond_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), REG(B7_4));
 	return;
     case 0x3007:		/* cmp/gt Rm,Rn */
-	gen_cmp(TCG_COND_GT, REG(B7_4), REG(B11_8));
+        tcg_gen_setcond_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), REG(B7_4));
 	return;
     case 0x3006:		/* cmp/hi Rm,Rn */
-	gen_cmp(TCG_COND_GTU, REG(B7_4), REG(B11_8));
+        tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_t, REG(B11_8), REG(B7_4));
 	return;
     case 0x3002:		/* cmp/hs Rm,Rn */
-	gen_cmp(TCG_COND_GEU, REG(B7_4), REG(B11_8));
+        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_sr_t, REG(B11_8), REG(B7_4));
 	return;
     case 0x200c:		/* cmp/str Rm,Rn */
 	{
 	    TCGv cmp1 = tcg_temp_new();
 	    TCGv cmp2 = tcg_temp_new();
-	    tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
 	    tcg_gen_xor_i32(cmp1, REG(B7_4), REG(B11_8));
 	    tcg_gen_andi_i32(cmp2, cmp1, 0xff000000);
-	    tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
-	    tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2);
+            tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, cmp2, 0);
 	    tcg_gen_andi_i32(cmp2, cmp1, 0x00ff0000);
 	    tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
-	    tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2);
+            tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
 	    tcg_gen_andi_i32(cmp2, cmp1, 0x0000ff00);
 	    tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
-	    tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2);
+            tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
 	    tcg_gen_andi_i32(cmp2, cmp1, 0x000000ff);
 	    tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
-	    tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2);
+            tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
 	    tcg_temp_free(cmp2);
 	    tcg_temp_free(cmp1);
 	}
 	return;
     case 0x2007:		/* div0s Rm,Rn */
-	{
-	    gen_copy_bit_i32(cpu_sr, 8, REG(B11_8), 31);	/* SR_Q */
-	    gen_copy_bit_i32(cpu_sr, 9, REG(B7_4), 31);		/* SR_M */
-	    TCGv val = tcg_temp_new();
-	    tcg_gen_xor_i32(val, REG(B7_4), REG(B11_8));
-	    gen_copy_bit_i32(cpu_sr, 0, val, 31);		/* SR_T */
-	    tcg_temp_free(val);
-	}
+        tcg_gen_shri_i32(cpu_sr_q, REG(B11_8), 31);         /* SR_Q */
+        tcg_gen_shri_i32(cpu_sr_m, REG(B7_4), 31);          /* SR_M */
+        tcg_gen_xor_i32(cpu_sr_t, cpu_sr_q, cpu_sr_m);      /* SR_T */
 	return;
     case 0x3004:		/* div1 Rm,Rn */
-        gen_helper_div1(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
+        {
+            TCGv t0 = tcg_temp_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
+            TCGv zero = tcg_const_i32(0);
+
+            /* shift left arg1, saving the bit being pushed out and inserting
+               T on the right */
+            tcg_gen_shri_i32(t0, REG(B11_8), 31);
+            tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
+            tcg_gen_or_i32(REG(B11_8), REG(B11_8), cpu_sr_t);
+
+            /* Add or subtract arg0 from arg1 depending if Q == M. To avoid
+               using 64-bit temps, we compute arg0's high part from q ^ m, so
+               that it is 0x00000000 when adding the value or 0xffffffff when
+               subtracting it. */
+            tcg_gen_xor_i32(t1, cpu_sr_q, cpu_sr_m);
+            tcg_gen_subi_i32(t1, t1, 1);
+            tcg_gen_neg_i32(t2, REG(B7_4));
+            tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, zero, REG(B7_4), t2);
+            tcg_gen_add2_i32(REG(B11_8), t1, REG(B11_8), zero, t2, t1);
+
+            /* compute T and Q depending on carry */
+            tcg_gen_andi_i32(t1, t1, 1);
+            tcg_gen_xor_i32(t1, t1, t0);
+            tcg_gen_xori_i32(cpu_sr_t, t1, 1);
+            tcg_gen_xor_i32(cpu_sr_q, cpu_sr_m, t1);
+
+            tcg_temp_free(zero);
+            tcg_temp_free(t2);
+            tcg_temp_free(t1);
+            tcg_temp_free(t0);
+        }
 	return;
     case 0x300d:		/* dmuls.l Rm,Rn */
         tcg_gen_muls2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));
@@ -827,19 +826,13 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x600a:		/* negc Rm,Rn */
         {
-	    TCGv t0, t1;
-            t0 = tcg_temp_new();
-            tcg_gen_neg_i32(t0, REG(B7_4));
-            t1 = tcg_temp_new();
-            tcg_gen_andi_i32(t1, cpu_sr, SR_T);
-            tcg_gen_sub_i32(REG(B11_8), t0, t1);
-            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
-            tcg_gen_setcondi_i32(TCG_COND_GTU, t1, t0, 0);
-            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
-            tcg_gen_setcond_i32(TCG_COND_GTU, t1, REG(B11_8), t0);
-            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            TCGv t0 = tcg_const_i32(0);
+            tcg_gen_add2_i32(REG(B11_8), cpu_sr_t,
+                             REG(B7_4), t0, cpu_sr_t, t0);
+            tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t,
+                             t0, t0, REG(B11_8), cpu_sr_t);
+            tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
             tcg_temp_free(t0);
-            tcg_temp_free(t1);
         }
 	return;
     case 0x6007:		/* not Rm,Rn */
@@ -918,22 +911,15 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x300a:		/* subc Rm,Rn */
         {
-            TCGv t0, t1, t2;
-            t0 = tcg_temp_new();
-            tcg_gen_andi_i32(t0, cpu_sr, SR_T);
+            TCGv t0, t1;
+            t0 = tcg_const_tl(0);
             t1 = tcg_temp_new();
-            tcg_gen_sub_i32(t1, REG(B11_8), REG(B7_4));
-            tcg_gen_sub_i32(t0, t1, t0);
-            t2 = tcg_temp_new();
-            tcg_gen_setcond_i32(TCG_COND_LTU, t2, REG(B11_8), t1);
-            tcg_gen_setcond_i32(TCG_COND_LTU, t1, t1, t0);
-            tcg_gen_or_i32(t1, t1, t2);
-            tcg_temp_free(t2);
-            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
-            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
-            tcg_temp_free(t1);
-            tcg_gen_mov_i32(REG(B11_8), t0);
+            tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0);
+            tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t,
+                             REG(B11_8), t0, t1, cpu_sr_t);
+            tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
             tcg_temp_free(t0);
+            tcg_temp_free(t1);
         }
 	return;
     case 0x300b:		/* subv Rm,Rn */
@@ -947,9 +933,7 @@ static void _decode_opc(DisasContext * ctx)
             tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4));
             tcg_gen_and_i32(t1, t1, t2);
             tcg_temp_free(t2);
-            tcg_gen_shri_i32(t1, t1, 31);
-            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
-            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            tcg_gen_shri_i32(cpu_sr_t, t1, 31);
             tcg_temp_free(t1);
             tcg_gen_mov_i32(REG(B11_8), t0);
             tcg_temp_free(t0);
@@ -959,7 +943,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv val = tcg_temp_new();
 	    tcg_gen_and_i32(val, REG(B7_4), REG(B11_8));
-	    gen_cmp_imm(TCG_COND_EQ, val, 0);
+            tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
 	    tcg_temp_free(val);
 	}
 	return;
@@ -1025,24 +1009,19 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
+        TCGv addr = tcg_temp_new_i32();
+        tcg_gen_subi_i32(addr, REG(B11_8), 4);
         if (ctx->flags & FPSCR_SZ) {
-	    TCGv addr = tcg_temp_new_i32();
 	    int fr = XREG(B7_4);
-	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
             tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr, ctx->memidx, MO_TEUL);
 	    tcg_gen_subi_i32(addr, addr, 4);
             tcg_gen_qemu_st_i32(cpu_fregs[fr], addr, ctx->memidx, MO_TEUL);
-	    tcg_gen_mov_i32(REG(B11_8), addr);
-	    tcg_temp_free(addr);
 	} else {
-	    TCGv addr;
-	    addr = tcg_temp_new_i32();
-	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
             tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], addr,
                                 ctx->memidx, MO_TEUL);
-	    tcg_gen_mov_i32(REG(B11_8), addr);
-	    tcg_temp_free(addr);
 	}
+        tcg_gen_mov_i32(REG(B11_8), addr);
+        tcg_temp_free(addr);
 	return;
     case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
@@ -1210,7 +1189,7 @@ static void _decode_opc(DisasContext * ctx)
 	ctx->flags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8800:		/* cmp/eq #imm,R0 */
-	gen_cmp_imm(TCG_COND_EQ, REG(0), B7_0s);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
 	return;
     case 0xc400:		/* mov.b @(disp,GBR),R0 */
 	{
@@ -1326,7 +1305,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv val = tcg_temp_new();
 	    tcg_gen_andi_i32(val, REG(0), B7_0);
-	    gen_cmp_imm(TCG_COND_EQ, val, 0);
+            tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
 	    tcg_temp_free(val);
 	}
 	return;
@@ -1336,7 +1315,7 @@ static void _decode_opc(DisasContext * ctx)
 	    tcg_gen_add_i32(val, REG(0), cpu_gbr);
             tcg_gen_qemu_ld_i32(val, val, ctx->memidx, MO_UB);
 	    tcg_gen_andi_i32(val, val, B7_0);
-	    gen_cmp_imm(TCG_COND_EQ, val, 0);
+            tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
 	    tcg_temp_free(val);
 	}
 	return;
@@ -1399,14 +1378,14 @@ static void _decode_opc(DisasContext * ctx)
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x4015:		/* cmp/pl Rn */
-	gen_cmp_imm(TCG_COND_GT, REG(B11_8), 0);
+        tcg_gen_setcondi_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), 0);
 	return;
     case 0x4011:		/* cmp/pz Rn */
-	gen_cmp_imm(TCG_COND_GE, REG(B11_8), 0);
+        tcg_gen_setcondi_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), 0);
 	return;
     case 0x4010:		/* dt Rn */
 	tcg_gen_subi_i32(REG(B11_8), REG(B11_8), 1);
-	gen_cmp_imm(TCG_COND_EQ, REG(B11_8), 0);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), 0);
 	return;
     case 0x402b:		/* jmp @Rn */
 	CHECK_NOT_DELAY_SLOT
@@ -1423,15 +1402,21 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x400e:		/* ldc Rm,SR */
 	CHECK_PRIVILEGED
-	tcg_gen_andi_i32(cpu_sr, REG(B11_8), 0x700083f3);
-	ctx->bstate = BS_STOP;
+        {
+            TCGv val = tcg_temp_new();
+            tcg_gen_andi_i32(val, REG(B11_8), 0x700083f3);
+            gen_write_sr(val);
+            tcg_temp_free(val);
+            ctx->bstate = BS_STOP;
+        }
 	return;
     case 0x4007:		/* ldc.l @Rm+,SR */
 	CHECK_PRIVILEGED
 	{
 	    TCGv val = tcg_temp_new();
             tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL);
-	    tcg_gen_andi_i32(cpu_sr, val, 0x700083f3);
+            tcg_gen_andi_i32(val, val, 0x700083f3);
+            gen_write_sr(val);
 	    tcg_temp_free(val);
 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
 	    ctx->bstate = BS_STOP;
@@ -1439,15 +1424,18 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x0002:		/* stc SR,Rn */
 	CHECK_PRIVILEGED
-	tcg_gen_mov_i32(REG(B11_8), cpu_sr);
+        gen_read_sr(REG(B11_8));
 	return;
     case 0x4003:		/* stc SR,@-Rn */
 	CHECK_PRIVILEGED
 	{
 	    TCGv addr = tcg_temp_new();
+            TCGv val = tcg_temp_new();
 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
-            tcg_gen_qemu_st_i32(cpu_sr, addr, ctx->memidx, MO_TEUL);
+            gen_read_sr(val);
+            tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
 	    tcg_gen_mov_i32(REG(B11_8), addr);
+            tcg_temp_free(val);
 	    tcg_temp_free(addr);
 	}
 	return;
@@ -1545,7 +1533,7 @@ static void _decode_opc(DisasContext * ctx)
 	tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
 	return;
     case 0x0029:		/* movt Rn */
-	tcg_gen_andi_i32(REG(B11_8), cpu_sr, SR_T);
+        tcg_gen_mov_i32(REG(B11_8), cpu_sr_t);
 	return;
     case 0x0073:
         /* MOVCO.L
@@ -1555,8 +1543,7 @@ static void _decode_opc(DisasContext * ctx)
         */
         if (ctx->features & SH_FEATURE_SH4A) {
             TCGLabel *label = gen_new_label();
-            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
-	    tcg_gen_or_i32(cpu_sr, cpu_sr, cpu_ldst);
+            tcg_gen_mov_i32(cpu_sr_t, cpu_ldst);
 	    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label);
             tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
 	    gen_set_label(label);
@@ -1609,42 +1596,42 @@ static void _decode_opc(DisasContext * ctx)
     case 0x4024:		/* rotcl Rn */
 	{
 	    TCGv tmp = tcg_temp_new();
-	    tcg_gen_mov_i32(tmp, cpu_sr);
-	    gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 31);
+            tcg_gen_mov_i32(tmp, cpu_sr_t);
+            tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31);
 	    tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
-	    gen_copy_bit_i32(REG(B11_8), 0, tmp, 0);
+            tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp);
 	    tcg_temp_free(tmp);
 	}
 	return;
     case 0x4025:		/* rotcr Rn */
 	{
 	    TCGv tmp = tcg_temp_new();
-	    tcg_gen_mov_i32(tmp, cpu_sr);
-	    gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0);
+            tcg_gen_shli_i32(tmp, cpu_sr_t, 31);
+            tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
 	    tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
-	    gen_copy_bit_i32(REG(B11_8), 31, tmp, 0);
+            tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp);
 	    tcg_temp_free(tmp);
 	}
 	return;
     case 0x4004:		/* rotl Rn */
 	tcg_gen_rotli_i32(REG(B11_8), REG(B11_8), 1);
-	gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0);
+        tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0);
 	return;
     case 0x4005:		/* rotr Rn */
-	gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0);
+        tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0);
 	tcg_gen_rotri_i32(REG(B11_8), REG(B11_8), 1);
 	return;
     case 0x4000:		/* shll Rn */
     case 0x4020:		/* shal Rn */
-	gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 31);
+        tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31);
 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
 	return;
     case 0x4021:		/* shar Rn */
-	gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0);
+        tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
 	tcg_gen_sari_i32(REG(B11_8), REG(B11_8), 1);
 	return;
     case 0x4001:		/* shlr Rn */
-	gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0);
+        tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
 	return;
     case 0x4008:		/* shll2 Rn */
@@ -1672,7 +1659,7 @@ static void _decode_opc(DisasContext * ctx)
 	    tcg_gen_mov_i32(addr, REG(B11_8));
 	    val = tcg_temp_local_new();
             tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
-	    gen_cmp_imm(TCG_COND_EQ, val, 0);
+            tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
 	    tcg_gen_ori_i32(val, val, 0x80);
             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
 	    tcg_temp_free(val);
@@ -1874,7 +1861,7 @@ gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
     ctx.pc = pc_start;
     ctx.flags = (uint32_t)tb->flags;
     ctx.bstate = BS_NONE;
-    ctx.memidx = (ctx.flags & SR_MD) == 0 ? 1 : 0;
+    ctx.memidx = (ctx.flags & (1u << SR_MD)) == 0 ? 1 : 0;
     /* We don't know if the delayed pc came from a dynamic or static branch,
        so assume it is a dynamic branch.  */
     ctx.delayed_pc = -1; /* use delayed pc from env pointer */
diff --git a/tests/endianness-test.c b/tests/endianness-test.c
index 92e17d251a..26ee734f42 100644
--- a/tests/endianness-test.c
+++ b/tests/endianness-test.c
@@ -31,8 +31,6 @@ struct TestCase {
 
 static const TestCase test_cases[] = {
     { "i386", "pc", -1 },
-    { "mips", "magnum", 0x90000000, .bswap = true },
-    { "mips", "pica61", 0x90000000, .bswap = true },
     { "mips", "mips", 0x14000000, .bswap = true },
     { "mips", "malta", 0x10000000, .bswap = true },
     { "mips64", "magnum", 0x90000000, .bswap = true },
diff --git a/trace-events b/trace-events
index 2662ffa850..6060d36773 100644
--- a/trace-events
+++ b/trace-events
@@ -280,6 +280,12 @@ slavio_timer_mem_writel_mode_counter(unsigned int timer_index) "processor %d cha
 slavio_timer_mem_writel_mode_invalid(void) "not system timer"
 slavio_timer_mem_writel_invalid(uint64_t addr) "invalid write address %"PRIx64
 
+# hw/dma/rc4030.c
+jazzio_read(uint64_t addr, uint32_t ret) "read reg[0x%"PRIx64"] = 0x%x"
+jazzio_write(uint64_t addr, uint32_t val) "write reg[0x%"PRIx64"] = 0x%x"
+rc4030_read(uint64_t addr, uint32_t ret) "read reg[0x%"PRIx64"] = 0x%x"
+rc4030_write(uint64_t addr, uint32_t val) "write reg[0x%"PRIx64"] = 0x%x"
+
 # hw/dma/sparc32_dma.c
 ledma_memory_read(uint64_t addr) "DMA read addr 0x%"PRIx64
 ledma_memory_write(uint64_t addr) "DMA write addr 0x%"PRIx64
@@ -1179,13 +1185,14 @@ virtio_gpu_cmd_res_flush(uint32_t res, uint32_t w, uint32_t h, uint32_t x, uint3
 virtio_gpu_fence_ctrl(uint64_t fence, uint32_t type) "fence 0x%" PRIx64 ", type 0x%x"
 virtio_gpu_fence_resp(uint64_t fence) "fence 0x%" PRIx64
 
-# savevm.c
+# migration/savevm.c
 qemu_loadvm_state_section(unsigned int section_type) "%d"
 qemu_loadvm_state_section_partend(uint32_t section_id) "%u"
 qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
 savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u"
 savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d"
 savevm_state_begin(void) ""
+savevm_state_header(void) ""
 savevm_state_iterate(void) ""
 savevm_state_complete(void) ""
 savevm_state_cancel(void) ""
@@ -1205,7 +1212,7 @@ vmstate_subsection_load_good(const char *parent) "%s"
 # qemu-file.c
 qemu_file_fclose(void) ""
 
-# arch_init.c
+# migration/ram.c
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
 migration_throttle(void) ""