summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.travis.yml3
-rw-r--r--backends/hostmem-ram.c2
-rw-r--r--block/io.c42
-rw-r--r--block/qcow2.c4
-rw-r--r--block/qed-cluster.c4
-rw-r--r--block/qed-l2-cache.c6
-rw-r--r--block/qed-table.c24
-rw-r--r--block/qed.c218
-rw-r--r--block/qed.h11
-rw-r--r--block/sheepdog.c21
-rw-r--r--block/ssh.c24
-rw-r--r--block/vdi.c48
-rw-r--r--block/vpc.c20
-rw-r--r--block/vvfat.c8
-rw-r--r--docs/devel/memory.txt31
-rw-r--r--hw/arm/aspeed.c2
-rw-r--r--hw/arm/aspeed_soc.c2
-rw-r--r--hw/arm/exynos4210.c2
-rw-r--r--hw/arm/exynos4_boards.c2
-rw-r--r--hw/arm/fsl-imx25.c5
-rw-r--r--hw/arm/fsl-imx31.c5
-rw-r--r--hw/arm/fsl-imx6.c5
-rw-r--r--hw/arm/highbank.c2
-rw-r--r--hw/arm/integratorcp.c2
-rw-r--r--hw/arm/mainstone.c1
-rw-r--r--hw/arm/musicpal.c1
-rw-r--r--hw/arm/omap1.c1
-rw-r--r--hw/arm/omap2.c1
-rw-r--r--hw/arm/omap_sx1.c6
-rw-r--r--hw/arm/palm.c1
-rw-r--r--hw/arm/pxa2xx.c4
-rw-r--r--hw/arm/realview.c3
-rw-r--r--hw/arm/spitz.c1
-rw-r--r--hw/arm/stellaris.c2
-rw-r--r--hw/arm/stm32f205_soc.c3
-rw-r--r--hw/arm/tosa.c1
-rw-r--r--hw/arm/vexpress.c3
-rw-r--r--hw/arm/virt.c4
-rw-r--r--hw/arm/xilinx_zynq.c1
-rw-r--r--hw/arm/xlnx-zynqmp.c1
-rw-r--r--hw/block/onenand.c2
-rw-r--r--hw/block/pflash_cfi01.c1
-rw-r--r--hw/block/pflash_cfi02.c1
-rw-r--r--hw/cris/axis_dev88.c5
-rw-r--r--hw/display/cg3.c3
-rw-r--r--hw/display/qxl.c3
-rw-r--r--hw/display/sm501.c2
-rw-r--r--hw/display/tc6393xb.c1
-rw-r--r--hw/display/tcx.c4
-rw-r--r--hw/display/vga.c2
-rw-r--r--hw/display/vmware_vga.c1
-rw-r--r--hw/i386/pc.c1
-rw-r--r--hw/i386/pc_sysfw.c2
-rw-r--r--hw/i386/pci-assign-load-rom.c2
-rw-r--r--hw/i386/xen/xen-hvm.c1
-rw-r--r--hw/input/milkymist-softusb.c4
-rw-r--r--hw/m68k/an5206.c1
-rw-r--r--hw/m68k/mcf5208.c1
-rw-r--r--hw/microblaze/petalogix_ml605_mmu.c2
-rw-r--r--hw/microblaze/petalogix_s3adsp1800_mmu.c2
-rw-r--r--hw/mips/boston.c2
-rw-r--r--hw/mips/mips_fulong2e.c1
-rw-r--r--hw/mips/mips_jazz.c2
-rw-r--r--hw/mips/mips_malta.c2
-rw-r--r--hw/mips/mips_mipssim.c1
-rw-r--r--hw/mips/mips_r4k.c1
-rw-r--r--hw/moxie/moxiesim.c4
-rw-r--r--hw/net/dp8393x.c2
-rw-r--r--hw/net/milkymist-minimac2.c2
-rw-r--r--hw/nios2/10m50_devboard.c8
-rw-r--r--hw/openrisc/openrisc_sim.c1
-rw-r--r--hw/pci-host/prep.c2
-rw-r--r--hw/pci-host/xilinx-pcie.c2
-rw-r--r--hw/pci/pci.c1
-rw-r--r--hw/ppc/mac_newworld.c1
-rw-r--r--hw/ppc/mac_oldworld.c1
-rw-r--r--hw/ppc/ppc405_boards.c3
-rw-r--r--hw/ppc/ppc405_uc.c1
-rw-r--r--hw/s390x/sclp.c1
-rw-r--r--hw/sh4/r2d.c1
-rw-r--r--hw/sh4/shix.c3
-rw-r--r--hw/sparc/leon3.c1
-rw-r--r--hw/sparc/sun4m.c6
-rw-r--r--hw/sparc64/sun4u.c4
-rw-r--r--hw/tricore/tricore_testboard.c26
-rw-r--r--hw/unicore32/puv3.c1
-rw-r--r--hw/xtensa/sim.c4
-rw-r--r--hw/xtensa/xtfpga.c4
-rw-r--r--include/block/block_int.h2
-rw-r--r--include/exec/memory.h162
-rw-r--r--include/hw/boards.h29
-rw-r--r--include/qemu/coroutine.h18
-rw-r--r--memory.c110
-rw-r--r--numa.c4
-rw-r--r--scripts/coccinelle/memory-region-init-ram.cocci38
-rw-r--r--slirp/ip6.h6
-rw-r--r--slirp/misc.c4
-rw-r--r--slirp/sbuf.c2
-rw-r--r--slirp/socket.c52
-rw-r--r--tests/docker/Makefile.include5
-rwxr-xr-xtests/docker/docker.py23
-rw-r--r--util/qemu-coroutine-lock.c35
102 files changed, 783 insertions, 363 deletions
diff --git a/.travis.yml b/.travis.yml
index 27a2d9cfb3..3c7a5cbe25 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -86,6 +86,9 @@ matrix:
     - env: CONFIG="--enable-trace-backends=ust"
            TEST_CMD=""
       compiler: gcc
+    - env: CONFIG="--disable-tcg"
+           TEST_CMD=""
+      compiler: gcc
     - env: CONFIG=""
       os: osx
       compiler: clang
diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
index 04a7ac362b..38977be73e 100644
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -28,7 +28,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
     }
 
     path = object_get_canonical_path_component(OBJECT(backend));
-    memory_region_init_ram(&backend->mr, OBJECT(backend), path,
+    memory_region_init_ram_nomigrate(&backend->mr, OBJECT(backend), path,
                            backend->size, errp);
     g_free(path);
 }
diff --git a/block/io.c b/block/io.c
index b413727524..aece54c015 100644
--- a/block/io.c
+++ b/block/io.c
@@ -149,6 +149,37 @@ bool bdrv_requests_pending(BlockDriverState *bs)
     return false;
 }
 
+typedef struct {
+    Coroutine *co;
+    BlockDriverState *bs;
+    bool done;
+} BdrvCoDrainData;
+
+static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
+{
+    BdrvCoDrainData *data = opaque;
+    BlockDriverState *bs = data->bs;
+
+    bs->drv->bdrv_co_drain(bs);
+
+    /* Set data->done before reading bs->wakeup.  */
+    atomic_mb_set(&data->done, true);
+    bdrv_wakeup(bs);
+}
+
+static void bdrv_drain_invoke(BlockDriverState *bs)
+{
+    BdrvCoDrainData data = { .bs = bs, .done = false };
+
+    if (!bs->drv || !bs->drv->bdrv_co_drain) {
+        return;
+    }
+
+    data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
+    bdrv_coroutine_enter(bs, data.co);
+    BDRV_POLL_WHILE(bs, !data.done);
+}
+
 static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
     BdrvChild *child, *tmp;
@@ -156,9 +187,8 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
 
     waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
 
-    if (bs->drv && bs->drv->bdrv_drain) {
-        bs->drv->bdrv_drain(bs);
-    }
+    /* Ensure any pending metadata writes are submitted to bs->file.  */
+    bdrv_drain_invoke(bs);
 
     QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
         BlockDriverState *bs = child->bs;
@@ -184,12 +214,6 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
     return waited;
 }
 
-typedef struct {
-    Coroutine *co;
-    BlockDriverState *bs;
-    bool done;
-} BdrvCoDrainData;
-
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
     BdrvCoDrainData *data = opaque;
diff --git a/block/qcow2.c b/block/qcow2.c
index c144ea5620..d5790af1e0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2025,8 +2025,6 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
     ret = 0;
 
 fail:
-    qemu_co_mutex_unlock(&s->lock);
-
     while (l2meta != NULL) {
         QCowL2Meta *next;
 
@@ -2040,6 +2038,8 @@ fail:
         l2meta = next;
     }
 
+    qemu_co_mutex_unlock(&s->lock);
+
     qemu_iovec_destroy(&hd_qiov);
     qemu_vfree(cluster_data);
     trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
diff --git a/block/qed-cluster.c b/block/qed-cluster.c
index d8d6e66a0f..672e2e654b 100644
--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -85,6 +85,8 @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
  *
  * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1
  * table offset, respectively. len is number of contiguous unallocated bytes.
+ *
+ * Called with table_lock held.
  */
 int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
                                   uint64_t pos, size_t *len,
@@ -112,7 +114,6 @@ int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
     }
 
     ret = qed_read_l2_table(s, request, l2_offset);
-    qed_acquire(s);
     if (ret) {
         goto out;
     }
@@ -137,6 +138,5 @@ int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
 
 out:
     *img_offset = offset;
-    qed_release(s);
     return ret;
 }
diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c
index 5cba794650..b548362398 100644
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -101,6 +101,8 @@ CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
 /**
  * Decrease an entry's reference count and free if necessary when the reference
  * count drops to zero.
+ *
+ * Called with table_lock held.
  */
 void qed_unref_l2_cache_entry(CachedL2Table *entry)
 {
@@ -122,6 +124,8 @@ void qed_unref_l2_cache_entry(CachedL2Table *entry)
  *
  * For a cached entry, this function increases the reference count and returns
  * the entry.
+ *
+ * Called with table_lock held.
  */
 CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
 {
@@ -150,6 +154,8 @@ CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
  * N.B. This function steals a reference to the l2_table from the caller so the
  * caller must obtain a new reference by issuing a call to
  * qed_find_l2_cache_entry().
+ *
+ * Called with table_lock held.
  */
 void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
 {
diff --git a/block/qed-table.c b/block/qed-table.c
index ebee2c50f0..eead8b0fc7 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -18,6 +18,7 @@
 #include "qed.h"
 #include "qemu/bswap.h"
 
+/* Called either from qed_check or with table_lock held.  */
 static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
 {
     QEMUIOVector qiov;
@@ -32,18 +33,22 @@ static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
 
     trace_qed_read_table(s, offset, table);
 
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
     ret = bdrv_preadv(s->bs->file, offset, &qiov);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
     if (ret < 0) {
         goto out;
     }
 
     /* Byteswap offsets */
-    qed_acquire(s);
     noffsets = qiov.size / sizeof(uint64_t);
     for (i = 0; i < noffsets; i++) {
         table->offsets[i] = le64_to_cpu(table->offsets[i]);
     }
-    qed_release(s);
 
     ret = 0;
 out:
@@ -61,6 +66,8 @@ out:
  * @index:      Index of first element
  * @n:          Number of elements
  * @flush:      Whether or not to sync to disk
+ *
+ * Called either from qed_check or with table_lock held.
  */
 static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
                            unsigned int index, unsigned int n, bool flush)
@@ -97,16 +104,20 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
     /* Adjust for offset into table */
     offset += start * sizeof(uint64_t);
 
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
     ret = bdrv_pwritev(s->bs->file, offset, &qiov);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
     trace_qed_write_table_cb(s, table, flush, ret);
     if (ret < 0) {
         goto out;
     }
 
     if (flush) {
-        qed_acquire(s);
         ret = bdrv_flush(s->bs);
-        qed_release(s);
         if (ret < 0) {
             goto out;
         }
@@ -123,6 +134,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
     return qed_read_table(s, s->header.l1_table_offset, s->l1_table);
 }
 
+/* Called either from qed_check or with table_lock held.  */
 int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n)
 {
     BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
@@ -136,6 +148,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
     return qed_write_l1_table(s, index, n);
 }
 
+/* Called either from qed_check or with table_lock held.  */
 int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
 {
     int ret;
@@ -154,7 +167,6 @@ int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
     BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
     ret = qed_read_table(s, offset, request->l2_table->table);
 
-    qed_acquire(s);
     if (ret) {
         /* can't trust loaded L2 table anymore */
         qed_unref_l2_cache_entry(request->l2_table);
@@ -170,7 +182,6 @@ int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
         request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
         assert(request->l2_table != NULL);
     }
-    qed_release(s);
 
     return ret;
 }
@@ -180,6 +191,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
     return qed_read_l2_table(s, request, offset);
 }
 
+/* Called either from qed_check or with table_lock held.  */
 int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                        unsigned int index, unsigned int n, bool flush)
 {
diff --git a/block/qed.c b/block/qed.c
index 86cad2188c..dc54bf4a93 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -93,6 +93,8 @@ int qed_write_header_sync(BDRVQEDState *s)
  *
  * This function only updates known header fields in-place and does not affect
  * extra data after the QED header.
+ *
+ * No new allocating reqs can start while this function runs.
  */
 static int coroutine_fn qed_write_header(BDRVQEDState *s)
 {
@@ -109,6 +111,8 @@ static int coroutine_fn qed_write_header(BDRVQEDState *s)
     QEMUIOVector qiov;
     int ret;
 
+    assert(s->allocating_acb || s->allocating_write_reqs_plugged);
+
     buf = qemu_blockalign(s->bs, len);
     iov = (struct iovec) {
         .iov_base = buf,
@@ -219,6 +223,8 @@ static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
  * This function only produces the offset where the new clusters should be
  * written.  It updates BDRVQEDState but does not make any changes to the image
  * file.
+ *
+ * Called with table_lock held.
  */
 static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
 {
@@ -236,6 +242,8 @@ QEDTable *qed_alloc_table(BDRVQEDState *s)
 
 /**
  * Allocate a new zeroed L2 table
+ *
+ * Called with table_lock held.
  */
 static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
 {
@@ -249,19 +257,32 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
     return l2_table;
 }
 
-static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+static bool qed_plug_allocating_write_reqs(BDRVQEDState *s)
 {
+    qemu_co_mutex_lock(&s->table_lock);
+
+    /* No reentrancy is allowed.  */
     assert(!s->allocating_write_reqs_plugged);
+    if (s->allocating_acb != NULL) {
+        /* Another allocating write came concurrently.  This cannot happen
+         * from bdrv_qed_co_drain, but it can happen when the timer runs.
+         */
+        qemu_co_mutex_unlock(&s->table_lock);
+        return false;
+    }
 
     s->allocating_write_reqs_plugged = true;
+    qemu_co_mutex_unlock(&s->table_lock);
+    return true;
 }
 
 static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
 {
+    qemu_co_mutex_lock(&s->table_lock);
     assert(s->allocating_write_reqs_plugged);
-
     s->allocating_write_reqs_plugged = false;
-    qemu_co_enter_next(&s->allocating_write_reqs);
+    qemu_co_queue_next(&s->allocating_write_reqs);
+    qemu_co_mutex_unlock(&s->table_lock);
 }
 
 static void coroutine_fn qed_need_check_timer_entry(void *opaque)
@@ -269,17 +290,14 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque)
     BDRVQEDState *s = opaque;
     int ret;
 
-    /* The timer should only fire when allocating writes have drained */
-    assert(!s->allocating_acb);
-
     trace_qed_need_check_timer_cb(s);
 
-    qed_acquire(s);
-    qed_plug_allocating_write_reqs(s);
+    if (!qed_plug_allocating_write_reqs(s)) {
+        return;
+    }
 
     /* Ensure writes are on disk before clearing flag */
     ret = bdrv_co_flush(s->bs->file->bs);
-    qed_release(s);
     if (ret < 0) {
         qed_unplug_allocating_write_reqs(s);
         return;
@@ -301,16 +319,6 @@ static void qed_need_check_timer_cb(void *opaque)
     qemu_coroutine_enter(co);
 }
 
-void qed_acquire(BDRVQEDState *s)
-{
-    aio_context_acquire(bdrv_get_aio_context(s->bs));
-}
-
-void qed_release(BDRVQEDState *s)
-{
-    aio_context_release(bdrv_get_aio_context(s->bs));
-}
-
 static void qed_start_need_check_timer(BDRVQEDState *s)
 {
     trace_qed_start_need_check_timer(s);
@@ -350,7 +358,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
     }
 }
 
-static void bdrv_qed_drain(BlockDriverState *bs)
+static void coroutine_fn bdrv_qed_co_drain(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
 
@@ -359,10 +367,20 @@ static void bdrv_qed_drain(BlockDriverState *bs)
      */
     if (s->need_check_timer && timer_pending(s->need_check_timer)) {
         qed_cancel_need_check_timer(s);
-        qed_need_check_timer_cb(s);
+        qed_need_check_timer_entry(s);
     }
 }
 
+static void bdrv_qed_init_state(BlockDriverState *bs)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    memset(s, 0, sizeof(BDRVQEDState));
+    s->bs = bs;
+    qemu_co_mutex_init(&s->table_lock);
+    qemu_co_queue_init(&s->allocating_write_reqs);
+}
+
 static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
                             Error **errp)
 {
@@ -371,9 +389,6 @@ static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
     int64_t file_size;
     int ret;
 
-    s->bs = bs;
-    qemu_co_queue_init(&s->allocating_write_reqs);
-
     ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
     if (ret < 0) {
         return ret;
@@ -507,6 +522,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
+    bdrv_qed_init_state(bs);
     return bdrv_qed_do_open(bs, options, flags, errp);
 }
 
@@ -681,6 +697,7 @@ typedef struct {
     BlockDriverState **file;
 } QEDIsAllocatedCB;
 
+/* Called with table_lock held.  */
 static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
 {
     QEDIsAllocatedCB *cb = opaque;
@@ -728,6 +745,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
     uint64_t offset;
     int ret;
 
+    qemu_co_mutex_lock(&s->table_lock);
     ret = qed_find_cluster(s, &request, cb.pos, &len, &offset);
     qed_is_allocated_cb(&cb, ret, offset, len);
 
@@ -735,6 +753,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
     assert(cb.status != BDRV_BLOCK_OFFSET_MASK);
 
     qed_unref_l2_cache_entry(request.l2_table);
+    qemu_co_mutex_unlock(&s->table_lock);
 
     return cb.status;
 }
@@ -865,6 +884,8 @@ out:
  *
  * The cluster offset may be an allocated byte offset in the image file, the
  * zero cluster marker, or the unallocated cluster marker.
+ *
+ * Called with table_lock held.
  */
 static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
                                              int index, unsigned int n,
@@ -880,6 +901,7 @@ static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
     }
 }
 
+/* Called with table_lock held.  */
 static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
@@ -903,7 +925,7 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
     if (acb == s->allocating_acb) {
         s->allocating_acb = NULL;
         if (!qemu_co_queue_empty(&s->allocating_write_reqs)) {
-            qemu_co_enter_next(&s->allocating_write_reqs);
+            qemu_co_queue_next(&s->allocating_write_reqs);
         } else if (s->header.features & QED_F_NEED_CHECK) {
             qed_start_need_check_timer(s);
         }
@@ -912,6 +934,8 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
 
 /**
  * Update L1 table with new L2 table offset and write it out
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
 {
@@ -940,6 +964,8 @@ static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
 
 /**
  * Update L2 table with new cluster offsets and write them out
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
 {
@@ -976,50 +1002,26 @@ static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
 
 /**
  * Write data to the image file
+ *
+ * Called with table_lock *not* held.
  */
 static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t offset = acb->cur_cluster +
                       qed_offset_into_cluster(s, acb->cur_pos);
-    int ret;
 
     trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size);
 
     BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
-    ret = bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
-                          &acb->cur_qiov, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (acb->find_cluster_ret != QED_CLUSTER_FOUND) {
-        if (s->bs->backing) {
-            /*
-             * Flush new data clusters before updating the L2 table
-             *
-             * This flush is necessary when a backing file is in use.  A crash
-             * during an allocating write could result in empty clusters in the
-             * image.  If the write only touched a subregion of the cluster,
-             * then backing image sectors have been lost in the untouched
-             * region.  The solution is to flush after writing a new data
-             * cluster and before updating the L2 table.
-             */
-            ret = bdrv_co_flush(s->bs->file->bs);
-            if (ret < 0) {
-                return ret;
-            }
-        }
-        ret = qed_aio_write_l2_update(acb, acb->cur_cluster);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-    return 0;
+    return bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
+                           &acb->cur_qiov, 0);
 }
 
 /**
  * Populate untouched regions of new data cluster
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
 {
@@ -1027,6 +1029,8 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
     uint64_t start, len, offset;
     int ret;
 
+    qemu_co_mutex_unlock(&s->table_lock);
+
     /* Populate front untouched region of new data cluster */
     start = qed_start_of_cluster(s, acb->cur_pos);
     len = qed_offset_into_cluster(s, acb->cur_pos);
@@ -1034,7 +1038,7 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
     trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
     ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster);
     if (ret < 0) {
-        return ret;
+        goto out;
     }
 
     /* Populate back untouched region of new data cluster */
@@ -1047,10 +1051,31 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
     trace_qed_aio_write_postfill(s, acb, start, len, offset);
     ret = qed_copy_from_backing_file(s, start, len, offset);
     if (ret < 0) {
-        return ret;
+        goto out;
     }
 
-    return qed_aio_write_main(acb);
+    ret = qed_aio_write_main(acb);
+    if (ret < 0) {
+        goto out;
+    }
+
+    if (s->bs->backing) {
+        /*
+         * Flush new data clusters before updating the L2 table
+         *
+         * This flush is necessary when a backing file is in use.  A crash
+         * during an allocating write could result in empty clusters in the
+         * image.  If the write only touched a subregion of the cluster,
+         * then backing image sectors have been lost in the untouched
+         * region.  The solution is to flush after writing a new data
+         * cluster and before updating the L2 table.
+         */
+        ret = bdrv_co_flush(s->bs->file->bs);
+    }
+
+out:
+    qemu_co_mutex_lock(&s->table_lock);
+    return ret;
 }
 
 /**
@@ -1073,6 +1098,8 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
  * @len:        Length in bytes
  *
  * This path is taken when writing to previously unallocated clusters.
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
@@ -1087,7 +1114,7 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
     /* Freeze this request if another allocating write is in progress */
     if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) {
         if (s->allocating_acb != NULL) {
-            qemu_co_queue_wait(&s->allocating_write_reqs, NULL);
+            qemu_co_queue_wait(&s->allocating_write_reqs, &s->table_lock);
             assert(s->allocating_acb == NULL);
         }
         s->allocating_acb = acb;
@@ -1103,6 +1130,7 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
         if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
             return 0;
         }
+        acb->cur_cluster = 1;
     } else {
         acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
     }
@@ -1115,15 +1143,14 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
         }
     }
 
-    if (acb->flags & QED_AIOCB_ZERO) {
-        ret = qed_aio_write_l2_update(acb, 1);
-    } else {
+    if (!(acb->flags & QED_AIOCB_ZERO)) {
         ret = qed_aio_write_cow(acb);
+        if (ret < 0) {
+            return ret;
+        }
     }
-    if (ret < 0) {
-        return ret;
-    }
-    return 0;
+
+    return qed_aio_write_l2_update(acb, acb->cur_cluster);
 }
 
 /**
@@ -1134,10 +1161,17 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
  * @len:        Length in bytes
  *
  * This path is taken when writing to already allocated clusters.
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
                                               size_t len)
 {
+    BDRVQEDState *s = acb_to_s(acb);
+    int r;
+
+    qemu_co_mutex_unlock(&s->table_lock);
+
     /* Allocate buffer for zero writes */
     if (acb->flags & QED_AIOCB_ZERO) {
         struct iovec *iov = acb->qiov->iov;
@@ -1145,7 +1179,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
         if (!iov->iov_base) {
             iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len);
             if (iov->iov_base == NULL) {
-                return -ENOMEM;
+                r = -ENOMEM;
+                goto out;
             }
             memset(iov->iov_base, 0, iov->iov_len);
         }
@@ -1155,8 +1190,11 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
     acb->cur_cluster = offset;
     qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
-    /* Do the actual write */
-    return qed_aio_write_main(acb);
+    /* Do the actual write.  */
+    r = qed_aio_write_main(acb);
+out:
+    qemu_co_mutex_lock(&s->table_lock);
+    return r;
 }
 
 /**
@@ -1166,6 +1204,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
  * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
  * @offset:     Cluster offset in bytes
  * @len:        Length in bytes
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
                                            uint64_t offset, size_t len)
@@ -1197,6 +1237,8 @@ static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
  * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
  * @offset:     Cluster offset in bytes
  * @len:        Length in bytes
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
                                           uint64_t offset, size_t len)
@@ -1204,6 +1246,9 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
     QEDAIOCB *acb = opaque;
     BDRVQEDState *s = acb_to_s(acb);
     BlockDriverState *bs = acb->bs;
+    int r;
+
+    qemu_co_mutex_unlock(&s->table_lock);
 
     /* Adjust offset into cluster */
     offset += qed_offset_into_cluster(s, acb->cur_pos);
@@ -1212,22 +1257,23 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
 
     qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
-    /* Handle zero cluster and backing file reads */
+    /* Handle zero cluster and backing file reads, otherwise read
+     * data cluster directly.
+     */
     if (ret == QED_CLUSTER_ZERO) {
         qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
-        return 0;
+        r = 0;
     } else if (ret != QED_CLUSTER_FOUND) {
-        return qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                                     &acb->backing_qiov);
+        r = qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
+                                  &acb->backing_qiov);
+    } else {
+        BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+        r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
+                           &acb->cur_qiov, 0);
     }
 
-    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    ret = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
-                         &acb->cur_qiov, 0);
-    if (ret < 0) {
-        return ret;
-    }
-    return 0;
+    qemu_co_mutex_lock(&s->table_lock);
+    return r;
 }
 
 /**
@@ -1240,6 +1286,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
     size_t len;
     int ret;
 
+    qemu_co_mutex_lock(&s->table_lock);
     while (1) {
         trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size);
 
@@ -1279,6 +1326,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
 
     trace_qed_aio_complete(s, acb, ret);
     qed_aio_complete(acb);
+    qemu_co_mutex_unlock(&s->table_lock);
     return ret;
 }
 
@@ -1474,8 +1522,14 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
 
     bdrv_qed_close(bs);
 
-    memset(s, 0, sizeof(BDRVQEDState));
+    bdrv_qed_init_state(bs);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
     ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
     if (local_err) {
         error_propagate(errp, local_err);
         error_prepend(errp, "Could not reopen qed layer: ");
@@ -1554,7 +1608,7 @@ static BlockDriver bdrv_qed = {
     .bdrv_check               = bdrv_qed_check,
     .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
     .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
-    .bdrv_drain               = bdrv_qed_drain,
+    .bdrv_co_drain            = bdrv_qed_co_drain,
 };
 
 static void bdrv_qed_init(void)
diff --git a/block/qed.h b/block/qed.h
index dd3a2d5519..f35341f134 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -151,15 +151,21 @@ typedef struct QEDAIOCB {
 
 typedef struct {
     BlockDriverState *bs;           /* device */
-    uint64_t file_size;             /* length of image file, in bytes */
 
+    /* Written only by an allocating write or the timer handler (the latter
+     * while allocating reqs are plugged).
+     */
     QEDHeader header;               /* always cpu-endian */
+
+    /* Protected by table_lock.  */
+    CoMutex table_lock;
     QEDTable *l1_table;
     L2TableCache l2_cache;          /* l2 table cache */
     uint32_t table_nelems;
     uint32_t l1_shift;
     uint32_t l2_shift;
     uint32_t l2_mask;
+    uint64_t file_size;             /* length of image file, in bytes */
 
     /* Allocating write request queue */
     QEDAIOCB *allocating_acb;
@@ -177,9 +183,6 @@ enum {
     QED_CLUSTER_L1,            /* cluster missing in L1 */
 };
 
-void qed_acquire(BDRVQEDState *s);
-void qed_release(BDRVQEDState *s);
-
 /**
  * Header functions
  */
diff --git a/block/sheepdog.c b/block/sheepdog.c
index b7b7e6bbe5..abb2e79065 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -390,6 +390,7 @@ struct BDRVSheepdogState {
     QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
     QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
 
+    CoMutex queue_lock;
     CoQueue overlapping_queue;
     QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
 };
@@ -488,7 +489,7 @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
 retry:
     QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
         if (AIOCBOverlapping(acb, cb)) {
-            qemu_co_queue_wait(&s->overlapping_queue, NULL);
+            qemu_co_queue_wait(&s->overlapping_queue, &s->queue_lock);
             goto retry;
         }
     }
@@ -525,8 +526,10 @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
         return;
     }
 
+    qemu_co_mutex_lock(&s->queue_lock);
     wait_for_overlapping_aiocb(s, acb);
     QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);
 }
 
 static SocketAddress *sd_socket_address(const char *path,
@@ -785,6 +788,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
      * have to move all the inflight requests to the failed queue before
      * resend_aioreq() is called.
      */
+    qemu_co_mutex_lock(&s->queue_lock);
     QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) {
         QLIST_REMOVE(aio_req, aio_siblings);
         QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings);
@@ -794,8 +798,11 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
     while (!QLIST_EMPTY(&s->failed_aio_head)) {
         aio_req = QLIST_FIRST(&s->failed_aio_head);
         QLIST_REMOVE(aio_req, aio_siblings);
+        qemu_co_mutex_unlock(&s->queue_lock);
         resend_aioreq(s, aio_req);
+        qemu_co_mutex_lock(&s->queue_lock);
     }
+    qemu_co_mutex_unlock(&s->queue_lock);
 }
 
 /*
@@ -887,7 +894,10 @@ static void coroutine_fn aio_read_response(void *opaque)
     */
     s->co_recv = NULL;
 
+    qemu_co_mutex_lock(&s->queue_lock);
     QLIST_REMOVE(aio_req, aio_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);
+
     switch (rsp.result) {
     case SD_RES_SUCCESS:
         break;
@@ -1307,7 +1317,9 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     uint64_t old_oid = aio_req->base_oid;
     bool create = aio_req->create;
 
+    qemu_co_mutex_lock(&s->queue_lock);
     QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);
 
     if (!nr_copies) {
         error_report("bug");
@@ -1678,6 +1690,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
     pstrcpy(s->name, sizeof(s->name), vdi);
     qemu_co_mutex_init(&s->lock);
+    qemu_co_mutex_init(&s->queue_lock);
     qemu_co_queue_init(&s->overlapping_queue);
     qemu_opts_del(opts);
     g_free(buf);
@@ -2438,12 +2451,16 @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
 
 static void sd_aio_complete(SheepdogAIOCB *acb)
 {
+    BDRVSheepdogState *s;
     if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
         return;
     }
 
+    s = acb->s;
+    qemu_co_mutex_lock(&s->queue_lock);
     QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&acb->s->overlapping_queue);
+    qemu_co_queue_restart_all(&s->overlapping_queue);
+    qemu_co_mutex_unlock(&s->queue_lock);
 }
 
 static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
diff --git a/block/ssh.c b/block/ssh.c
index 07a57eb466..e8f0404c03 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -888,13 +888,22 @@ static int ssh_has_zero_init(BlockDriverState *bs)
     return has_zero_init;
 }
 
+typedef struct BDRVSSHRestart {
+    BlockDriverState *bs;
+    Coroutine *co;
+} BDRVSSHRestart;
+
 static void restart_coroutine(void *opaque)
 {
-    Coroutine *co = opaque;
+    BDRVSSHRestart *restart = opaque;
+    BlockDriverState *bs = restart->bs;
+    BDRVSSHState *s = bs->opaque;
+    AioContext *ctx = bdrv_get_aio_context(bs);
 
-    DPRINTF("co=%p", co);
+    DPRINTF("co=%p", restart->co);
+    aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL);
 
-    aio_co_wake(co);
+    aio_co_wake(restart->co);
 }
 
 /* A non-blocking call returned EAGAIN, so yield, ensuring the
@@ -905,7 +914,10 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
 {
     int r;
     IOHandler *rd_handler = NULL, *wr_handler = NULL;
-    Coroutine *co = qemu_coroutine_self();
+    BDRVSSHRestart restart = {
+        .bs = bs,
+        .co = qemu_coroutine_self()
+    };
 
     r = libssh2_session_block_directions(s->session);
 
@@ -920,11 +932,9 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
             rd_handler, wr_handler);
 
     aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, rd_handler, wr_handler, NULL, co);
+                       false, rd_handler, wr_handler, NULL, &restart);
     qemu_coroutine_yield();
     DPRINTF("s->sock=%d - back", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
-                       NULL, NULL, NULL, NULL);
 }
 
 /* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
diff --git a/block/vdi.c b/block/vdi.c
index 2b6e8fa1ed..8da5dfc897 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -172,7 +172,7 @@ typedef struct {
     /* VDI header (converted to host endianness). */
     VdiHeader header;
 
-    CoMutex write_lock;
+    CoRwlock bmap_lock;
 
     Error *migration_blocker;
 } BDRVVdiState;
@@ -485,7 +485,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail_free_bmap;
     }
 
-    qemu_co_mutex_init(&s->write_lock);
+    qemu_co_rwlock_init(&s->bmap_lock);
 
     return 0;
 
@@ -557,7 +557,9 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                n_bytes, offset);
 
         /* prepare next AIO request */
+        qemu_co_rwlock_rdlock(&s->bmap_lock);
         bmap_entry = le32_to_cpu(s->bmap[block_index]);
+        qemu_co_rwlock_unlock(&s->bmap_lock);
         if (!VDI_IS_ALLOCATED(bmap_entry)) {
             /* Block not allocated, return zeros, no need to wait. */
             qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
@@ -595,6 +597,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     uint32_t block_index;
     uint32_t offset_in_block;
     uint32_t n_bytes;
+    uint64_t data_offset;
     uint32_t bmap_first = VDI_UNALLOCATED;
     uint32_t bmap_last = VDI_UNALLOCATED;
     uint8_t *block = NULL;
@@ -614,10 +617,19 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                n_bytes, offset);
 
         /* prepare next AIO request */
+        qemu_co_rwlock_rdlock(&s->bmap_lock);
         bmap_entry = le32_to_cpu(s->bmap[block_index]);
         if (!VDI_IS_ALLOCATED(bmap_entry)) {
             /* Allocate new block and write to it. */
             uint64_t data_offset;
+            qemu_co_rwlock_upgrade(&s->bmap_lock);
+            bmap_entry = le32_to_cpu(s->bmap[block_index]);
+            if (VDI_IS_ALLOCATED(bmap_entry)) {
+                /* A concurrent allocation did the work for us.  */
+                qemu_co_rwlock_downgrade(&s->bmap_lock);
+                goto nonallocating_write;
+            }
+
             bmap_entry = s->header.blocks_allocated;
             s->bmap[block_index] = cpu_to_le32(bmap_entry);
             s->header.blocks_allocated++;
@@ -635,30 +647,18 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
             memset(block + offset_in_block + n_bytes, 0,
                    s->block_size - n_bytes - offset_in_block);
 
-            /* Note that this coroutine does not yield anywhere from reading the
-             * bmap entry until here, so in regards to all the coroutines trying
-             * to write to this cluster, the one doing the allocation will
-             * always be the first to try to acquire the lock.
-             * Therefore, it is also the first that will actually be able to
-             * acquire the lock and thus the padded cluster is written before
-             * the other coroutines can write to the affected area. */
-            qemu_co_mutex_lock(&s->write_lock);
+            /* Write the new block under CoRwLock write-side protection,
+             * so this full-cluster write does not overlap a partial write
+             * of the same cluster, issued from the "else" branch.
+             */
             ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
-            qemu_co_mutex_unlock(&s->write_lock);
+            qemu_co_rwlock_unlock(&s->bmap_lock);
         } else {
-            uint64_t data_offset = s->header.offset_data +
-                                   (uint64_t)bmap_entry * s->block_size +
-                                   offset_in_block;
-            qemu_co_mutex_lock(&s->write_lock);
-            /* This lock is only used to make sure the following write operation
-             * is executed after the write issued by the coroutine allocating
-             * this cluster, therefore we do not need to keep it locked.
-             * As stated above, the allocating coroutine will always try to lock
-             * the mutex before all the other concurrent accesses to that
-             * cluster, therefore at this point we can be absolutely certain
-             * that that write operation has returned (there may be other writes
-             * in flight, but they do not concern this very operation). */
-            qemu_co_mutex_unlock(&s->write_lock);
+nonallocating_write:
+            data_offset = s->header.offset_data +
+                           (uint64_t)bmap_entry * s->block_size +
+                           offset_in_block;
+            qemu_co_rwlock_unlock(&s->bmap_lock);
 
             qemu_iovec_reset(&local_qiov);
             qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
diff --git a/block/vpc.c b/block/vpc.c
index 9a6f8173a5..8057d42a23 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -496,12 +496,6 @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
     return block_offset;
 }
 
-static inline int64_t get_sector_offset(BlockDriverState *bs,
-                                        int64_t sector_num, bool write)
-{
-    return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
-}
-
 /*
  * Writes the footer to the end of the image file. This is needed when the
  * file grows as it overwrites the old footer
@@ -696,6 +690,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
     VHDFooter *footer = (VHDFooter*) s->footer_buf;
     int64_t start, offset;
     bool allocated;
+    int64_t ret;
     int n;
 
     if (be32_to_cpu(footer->type) == VHD_FIXED) {
@@ -705,10 +700,13 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
                (sector_num << BDRV_SECTOR_BITS);
     }
 
-    offset = get_sector_offset(bs, sector_num, 0);
+    qemu_co_mutex_lock(&s->lock);
+
+    offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false);
     start = offset;
     allocated = (offset != -1);
     *pnum = 0;
+    ret = 0;
 
     do {
         /* All sectors in a block are contiguous (without using the bitmap) */
@@ -723,15 +721,17 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
          * sectors since there is always a bitmap in between. */
         if (allocated) {
             *file = bs->file->bs;
-            return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+            break;
         }
         if (nb_sectors == 0) {
             break;
         }
-        offset = get_sector_offset(bs, sector_num, 0);
+        offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false);
     } while (offset == -1);
 
-    return 0;
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
 }
 
 /*
diff --git a/block/vvfat.c b/block/vvfat.c
index 4fd28e1e87..4dae790203 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3078,8 +3078,14 @@ static int coroutine_fn
 write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                     QEMUIOVector *qiov, int flags)
 {
+    int ret;
+
     BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    return try_commit(s);
+    qemu_co_mutex_lock(&s->lock);
+    ret = try_commit(s);
+    qemu_co_mutex_unlock(&s->lock);
+
+    return ret;
 }
 
 static void write_target_close(BlockDriverState *bs) {
diff --git a/docs/devel/memory.txt b/docs/devel/memory.txt
index 811b1bd3c5..8ed810f8b9 100644
--- a/docs/devel/memory.txt
+++ b/docs/devel/memory.txt
@@ -91,6 +91,37 @@ one of whose subregions is a low priority "background" region covering
 the whole address range; this is often clearer and is preferred.
 Subregions cannot be added to an alias region.
 
+Migration
+---------
+
+Where the memory region is backed by host memory (RAM, ROM and
+ROM device memory region types), this host memory needs to be
+copied to the destination on migration. These APIs which allocate
+the host memory for you will also register the memory so it is
+migrated:
+ - memory_region_init_ram()
+ - memory_region_init_rom()
+ - memory_region_init_rom_device()
+
+For most devices and boards this is the correct thing. If you
+have a special case where you need to manage the migration of
+the backing memory yourself, you can call the functions:
+ - memory_region_init_ram_nomigrate()
+ - memory_region_init_rom_nomigrate()
+ - memory_region_init_rom_device_nomigrate()
+which only initialize the MemoryRegion and leave handling
+migration to the caller.
+
+The functions:
+ - memory_region_init_resizeable_ram()
+ - memory_region_init_ram_from_file()
+ - memory_region_init_ram_from_fd()
+ - memory_region_init_ram_ptr()
+ - memory_region_init_ram_device_ptr()
+are for special cases only, and so they do not automatically
+register the backing memory for migration; the caller must
+manage migration if necessary.
+
 Region names
 ------------
 
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index db3f6d20c6..0c5635f300 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -216,7 +216,7 @@ static void aspeed_board_init(MachineState *machine,
          * SoC and 128MB for the AST2500 SoC, which is twice as big as
          * needed by the flash modules of the Aspeed machines.
          */
-        memory_region_init_rom(boot_rom, OBJECT(bmc), "aspeed.boot_rom",
+        memory_region_init_rom_nomigrate(boot_rom, OBJECT(bmc), "aspeed.boot_rom",
                                fl->size, &error_abort);
         memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR,
                                     boot_rom);
diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
index 3034849c80..5529024edf 100644
--- a/hw/arm/aspeed_soc.c
+++ b/hw/arm/aspeed_soc.c
@@ -211,7 +211,7 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp)
     }
 
     /* SRAM */
-    memory_region_init_ram(&s->sram, OBJECT(dev), "aspeed.sram",
+    memory_region_init_ram_nomigrate(&s->sram, OBJECT(dev), "aspeed.sram",
                            sc->info->sram_size, &err);
     if (err) {
         error_propagate(errp, err);
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
index ee851e3ae5..f9e79f3ebb 100644
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -281,7 +281,6 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem)
     /* Internal ROM */
     memory_region_init_ram(&s->irom_mem, NULL, "exynos4210.irom",
                            EXYNOS4210_IROM_SIZE, &error_fatal);
-    vmstate_register_ram_global(&s->irom_mem);
     memory_region_set_readonly(&s->irom_mem, true);
     memory_region_add_subregion(system_mem, EXYNOS4210_IROM_BASE_ADDR,
                                 &s->irom_mem);
@@ -297,7 +296,6 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem)
     /* Internal RAM */
     memory_region_init_ram(&s->iram_mem, NULL, "exynos4210.iram",
                            EXYNOS4210_IRAM_SIZE, &error_fatal);
-    vmstate_register_ram_global(&s->iram_mem);
     memory_region_add_subregion(system_mem, EXYNOS4210_IRAM_BASE_ADDR,
                                 &s->iram_mem);
 
diff --git a/hw/arm/exynos4_boards.c b/hw/arm/exynos4_boards.c
index 6240b26839..7c03ed32b7 100644
--- a/hw/arm/exynos4_boards.c
+++ b/hw/arm/exynos4_boards.c
@@ -113,7 +113,6 @@ static void exynos4_boards_init_ram(Exynos4BoardState *s,
         memory_region_init_ram(&s->dram1_mem, NULL, "exynos4210.dram1",
                                mem_size - EXYNOS4210_DRAM_MAX_SIZE,
                                &error_fatal);
-        vmstate_register_ram_global(&s->dram1_mem);
         memory_region_add_subregion(system_mem, EXYNOS4210_DRAM1_BASE_ADDR,
                                     &s->dram1_mem);
         mem_size = EXYNOS4210_DRAM_MAX_SIZE;
@@ -121,7 +120,6 @@ static void exynos4_boards_init_ram(Exynos4BoardState *s,
 
     memory_region_init_ram(&s->dram0_mem, NULL, "exynos4210.dram0", mem_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->dram0_mem);
     memory_region_add_subregion(system_mem, EXYNOS4210_DRAM0_BASE_ADDR,
                                 &s->dram0_mem);
 }
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index 40666b68a3..8cff3c1f7b 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -249,7 +249,7 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
     }
 
     /* initialize 2 x 16 KB ROM */
-    memory_region_init_rom(&s->rom[0], NULL,
+    memory_region_init_rom_nomigrate(&s->rom[0], NULL,
                            "imx25.rom0", FSL_IMX25_ROM0_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -257,7 +257,7 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
     }
     memory_region_add_subregion(get_system_memory(), FSL_IMX25_ROM0_ADDR,
                                 &s->rom[0]);
-    memory_region_init_rom(&s->rom[1], NULL,
+    memory_region_init_rom_nomigrate(&s->rom[1], NULL,
                            "imx25.rom1", FSL_IMX25_ROM1_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -275,7 +275,6 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
     }
     memory_region_add_subregion(get_system_memory(), FSL_IMX25_IRAM_ADDR,
                                 &s->iram);
-    vmstate_register_ram_global(&s->iram);
 
     /* internal RAM (128 KB) is aliased over 128 MB - 128 KB */
     memory_region_init_alias(&s->iram_alias, NULL, "imx25.iram_alias",
diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c
index c30130667e..90278758f9 100644
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -219,7 +219,7 @@ static void fsl_imx31_realize(DeviceState *dev, Error **errp)
     }
 
     /* On a real system, the first 16k is a `secure boot rom' */
-    memory_region_init_rom(&s->secure_rom, NULL, "imx31.secure_rom",
+    memory_region_init_rom_nomigrate(&s->secure_rom, NULL, "imx31.secure_rom",
                            FSL_IMX31_SECURE_ROM_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -229,7 +229,7 @@ static void fsl_imx31_realize(DeviceState *dev, Error **errp)
                                 &s->secure_rom);
 
     /* There is also a 16k ROM */
-    memory_region_init_rom(&s->rom, NULL, "imx31.rom",
+    memory_region_init_rom_nomigrate(&s->rom, NULL, "imx31.rom",
                            FSL_IMX31_ROM_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -247,7 +247,6 @@ static void fsl_imx31_realize(DeviceState *dev, Error **errp)
     }
     memory_region_add_subregion(get_system_memory(), FSL_IMX31_IRAM_ADDR,
                                 &s->iram);
-    vmstate_register_ram_global(&s->iram);
 
     /* internal RAM (16 KB) is aliased over 256 MB - 16 KB */
     memory_region_init_alias(&s->iram_alias, NULL, "imx31.iram_alias",
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
index 27773c9c47..576c6631a1 100644
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -399,7 +399,7 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
                                         FSL_IMX6_ENET_MAC_1588_IRQ));
 
     /* ROM memory */
-    memory_region_init_rom(&s->rom, NULL, "imx6.rom",
+    memory_region_init_rom_nomigrate(&s->rom, NULL, "imx6.rom",
                            FSL_IMX6_ROM_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -409,7 +409,7 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
                                 &s->rom);
 
     /* CAAM memory */
-    memory_region_init_rom(&s->caam, NULL, "imx6.caam",
+    memory_region_init_rom_nomigrate(&s->caam, NULL, "imx6.caam",
                            FSL_IMX6_CAAM_MEM_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -427,7 +427,6 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
     }
     memory_region_add_subregion(get_system_memory(), FSL_IMX6_OCRAM_ADDR,
                                 &s->ocram);
-    vmstate_register_ram_global(&s->ocram);
 
     /* internal OCRAM (256 KB) is aliased over 1 MB */
     memory_region_init_alias(&s->ocram_alias, NULL, "imx6.ocram_alias",
diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index 750c463e2a..20e60f15c4 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -276,7 +276,7 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id)
     memory_region_add_subregion(sysmem, 0, dram);
 
     sysram = g_new(MemoryRegion, 1);
-    memory_region_init_ram(sysram, NULL, "highbank.sysram", 0x8000,
+    memory_region_init_ram_nomigrate(sysram, NULL, "highbank.sysram", 0x8000,
                            &error_fatal);
     memory_region_add_subregion(sysmem, 0xfff88000, sysram);
     if (bios_name != NULL) {
diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c
index ca3eca1d16..d79221d166 100644
--- a/hw/arm/integratorcp.c
+++ b/hw/arm/integratorcp.c
@@ -276,7 +276,7 @@ static void integratorcm_init(Object *obj)
     s->cm_init = 0x00000112;
     s->cm_refcnt_offset = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), 24,
                                    1000);
-    memory_region_init_ram(&s->flash, obj, "integrator.flash", 0x100000,
+    memory_region_init_ram_nomigrate(&s->flash, obj, "integrator.flash", 0x100000,
                            &error_fatal);
     vmstate_register_ram_global(&s->flash);
 
diff --git a/hw/arm/mainstone.c b/hw/arm/mainstone.c
index f962236cf4..fb268e691e 100644
--- a/hw/arm/mainstone.c
+++ b/hw/arm/mainstone.c
@@ -130,7 +130,6 @@ static void mainstone_common_init(MemoryRegion *address_space_mem,
     mpu = pxa270_init(address_space_mem, mainstone_binfo.ram_size, cpu_model);
     memory_region_init_ram(rom, NULL, "mainstone.rom", MAINSTONE_ROM,
                            &error_fatal);
-    vmstate_register_ram_global(rom);
     memory_region_set_readonly(rom, true);
     memory_region_add_subregion(address_space_mem, 0, rom);
 
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index 9c710f74b4..7e8ab3184c 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -1606,7 +1606,6 @@ static void musicpal_init(MachineState *machine)
 
     memory_region_init_ram(sram, NULL, "musicpal.sram", MP_SRAM_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(address_space_mem, MP_SRAM_BASE, sram);
 
     dev = sysbus_create_simple(TYPE_MV88W8618_PIC, MP_PIC_BASE,
diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c
index 54582bd148..3d15ff6779 100644
--- a/hw/arm/omap1.c
+++ b/hw/arm/omap1.c
@@ -3882,7 +3882,6 @@ struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *system_memory,
     memory_region_add_subregion(system_memory, OMAP_EMIFF_BASE, &s->emiff_ram);
     memory_region_init_ram(&s->imif_ram, NULL, "omap1.sram", s->sram_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->imif_ram);
     memory_region_add_subregion(system_memory, OMAP_IMIF_BASE, &s->imif_ram);
 
     omap_clkm_init(system_memory, 0xfffece00, 0xe1008000, s);
diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c
index 91f573338c..bbf0b7e188 100644
--- a/hw/arm/omap2.c
+++ b/hw/arm/omap2.c
@@ -2280,7 +2280,6 @@ struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sysmem,
     memory_region_add_subregion(sysmem, OMAP2_Q2_BASE, &s->sdram);
     memory_region_init_ram(&s->sram, NULL, "omap2.sram", s->sram_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->sram);
     memory_region_add_subregion(sysmem, OMAP2_SRAM_BASE, &s->sram);
 
     s->l4 = omap_l4_init(sysmem, OMAP2_L4_BASE, 54);
diff --git a/hw/arm/omap_sx1.c b/hw/arm/omap_sx1.c
index 5d74026cb2..9809106617 100644
--- a/hw/arm/omap_sx1.c
+++ b/hw/arm/omap_sx1.c
@@ -125,7 +125,6 @@ static void sx1_init(MachineState *machine, const int version)
     /* External Flash (EMIFS) */
     memory_region_init_ram(flash, NULL, "omap_sx1.flash0-0", flash_size,
                            &error_fatal);
-    vmstate_register_ram_global(flash);
     memory_region_set_readonly(flash, true);
     memory_region_add_subregion(address_space, OMAP_CS0_BASE, flash);
 
@@ -167,9 +166,8 @@ static void sx1_init(MachineState *machine, const int version)
     if ((version == 1) &&
             (dinfo = drive_get(IF_PFLASH, 0, fl_idx)) != NULL) {
         MemoryRegion *flash_1 = g_new(MemoryRegion, 1);
-        memory_region_init_ram(flash_1, NULL, "omap_sx1.flash1-0", flash1_size,
-                               &error_fatal);
-        vmstate_register_ram_global(flash_1);
+        memory_region_init_ram(flash_1, NULL, "omap_sx1.flash1-0",
+                               flash1_size, &error_fatal);
         memory_region_set_readonly(flash_1, true);
         memory_region_add_subregion(address_space, OMAP_CS1_BASE, flash_1);
 
diff --git a/hw/arm/palm.c b/hw/arm/palm.c
index 7f460732e3..64cf8ca921 100644
--- a/hw/arm/palm.c
+++ b/hw/arm/palm.c
@@ -216,7 +216,6 @@ static void palmte_init(MachineState *machine)
     /* External Flash (EMIFS) */
     memory_region_init_ram(flash, NULL, "palmte.flash", flash_size,
                            &error_fatal);
-    vmstate_register_ram_global(flash);
     memory_region_set_readonly(flash, true);
     memory_region_add_subregion(address_space_mem, OMAP_CS0_BASE, flash);
 
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 731ed08de7..194b0bc808 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -2076,11 +2076,9 @@ PXA2xxState *pxa270_init(MemoryRegion *address_space,
     /* SDRAM & Internal Memory Storage */
     memory_region_init_ram(&s->sdram, NULL, "pxa270.sdram", sdram_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->sdram);
     memory_region_add_subregion(address_space, PXA2XX_SDRAM_BASE, &s->sdram);
     memory_region_init_ram(&s->internal, NULL, "pxa270.internal", 0x40000,
                            &error_fatal);
-    vmstate_register_ram_global(&s->internal);
     memory_region_add_subregion(address_space, PXA2XX_INTERNAL_BASE,
                                 &s->internal);
 
@@ -2208,11 +2206,9 @@ PXA2xxState *pxa255_init(MemoryRegion *address_space, unsigned int sdram_size)
     /* SDRAM & Internal Memory Storage */
     memory_region_init_ram(&s->sdram, NULL, "pxa255.sdram", sdram_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->sdram);
     memory_region_add_subregion(address_space, PXA2XX_SDRAM_BASE, &s->sdram);
     memory_region_init_ram(&s->internal, NULL, "pxa255.internal",
                            PXA2XX_INTERNAL_SIZE, &error_fatal);
-    vmstate_register_ram_global(&s->internal);
     memory_region_add_subregion(address_space, PXA2XX_INTERNAL_BASE,
                                 &s->internal);
 
diff --git a/hw/arm/realview.c b/hw/arm/realview.c
index b7d4753400..76ff5579bc 100644
--- a/hw/arm/realview.c
+++ b/hw/arm/realview.c
@@ -145,13 +145,11 @@ static void realview_init(MachineState *machine,
         ram_size = 0x20000000;
         memory_region_init_ram(ram_lo, NULL, "realview.lowmem", low_ram_size,
                                &error_fatal);
-        vmstate_register_ram_global(ram_lo);
         memory_region_add_subregion(sysmem, 0x20000000, ram_lo);
     }
 
     memory_region_init_ram(ram_hi, NULL, "realview.highmem", ram_size,
                            &error_fatal);
-    vmstate_register_ram_global(ram_hi);
     low_ram_size = ram_size;
     if (low_ram_size > 0x10000000)
       low_ram_size = 0x10000000;
@@ -347,7 +345,6 @@ static void realview_init(MachineState *machine,
        until after Linux boots the secondary CPUs.  */
     memory_region_init_ram(ram_hack, NULL, "realview.hack", 0x1000,
                            &error_fatal);
-    vmstate_register_ram_global(ram_hack);
     memory_region_add_subregion(sysmem, SMP_BOOT_ADDR, ram_hack);
 
     realview_binfo.ram_size = ram_size;
diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c
index 93bde14743..7f588cea21 100644
--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c
@@ -920,7 +920,6 @@ static void spitz_common_init(MachineState *machine,
     sl_flash_register(mpu, (model == spitz) ? FLASH_128M : FLASH_1024M);
 
     memory_region_init_ram(rom, NULL, "spitz.rom", SPITZ_ROM, &error_fatal);
-    vmstate_register_ram_global(rom);
     memory_region_set_readonly(rom, true);
     memory_region_add_subregion(address_space_mem, 0, rom);
 
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index cf6e7be083..408c1a14d3 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -1290,13 +1290,11 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model,
     /* Flash programming is done via the SCU, so pretend it is ROM.  */
     memory_region_init_ram(flash, NULL, "stellaris.flash", flash_size,
                            &error_fatal);
-    vmstate_register_ram_global(flash);
     memory_region_set_readonly(flash, true);
     memory_region_add_subregion(system_memory, 0, flash);
 
     memory_region_init_ram(sram, NULL, "stellaris.sram", sram_size,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(system_memory, 0x20000000, sram);
 
     nvic = armv7m_init(system_memory, flash_size, NUM_IRQ_LINES,
diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
index 6e1260d2ed..f61e735f0f 100644
--- a/hw/arm/stm32f205_soc.c
+++ b/hw/arm/stm32f205_soc.c
@@ -100,8 +100,6 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
     memory_region_init_alias(flash_alias, NULL, "STM32F205.flash.alias",
                              flash, 0, FLASH_SIZE);
 
-    vmstate_register_ram_global(flash);
-
     memory_region_set_readonly(flash, true);
     memory_region_set_readonly(flash_alias, true);
 
@@ -110,7 +108,6 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
 
     memory_region_init_ram(sram, NULL, "STM32F205.sram", SRAM_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram);
 
     armv7m = DEVICE(&s->armv7m);
diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c
index 2421b8150d..8b757ff6a3 100644
--- a/hw/arm/tosa.c
+++ b/hw/arm/tosa.c
@@ -235,7 +235,6 @@ static void tosa_init(MachineState *machine)
     mpu = pxa255_init(address_space_mem, tosa_binfo.ram_size);
 
     memory_region_init_ram(rom, NULL, "tosa.rom", TOSA_ROM, &error_fatal);
-    vmstate_register_ram_global(rom);
     memory_region_set_readonly(rom, true);
     memory_region_add_subregion(address_space_mem, 0, rom);
 
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index c6b1e674b4..528c65ddb6 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -392,7 +392,6 @@ static void a15_daughterboard_init(const VexpressMachineState *vms,
     /* 0x2e000000: system SRAM */
     memory_region_init_ram(sram, NULL, "vexpress.a15sram", 0x10000,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(sysmem, 0x2e000000, sram);
 
     /* 0x7ffb0000: DMA330 DMA controller: not modelled */
@@ -675,13 +674,11 @@ static void vexpress_common_init(MachineState *machine)
     sram_size = 0x2000000;
     memory_region_init_ram(sram, NULL, "vexpress.sram", sram_size,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(sysmem, map[VE_SRAM], sram);
 
     vram_size = 0x800000;
     memory_region_init_ram(vram, NULL, "vexpress.vram", vram_size,
                            &error_fatal);
-    vmstate_register_ram_global(vram);
     memory_region_add_subregion(sysmem, map[VE_VIDEORAM], vram);
 
     /* 0x4e000000 LAN9118 Ethernet */
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 010f7244bf..31739d75a3 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1155,8 +1155,8 @@ static void create_secure_ram(VirtMachineState *vms,
     hwaddr base = vms->memmap[VIRT_SECURE_MEM].base;
     hwaddr size = vms->memmap[VIRT_SECURE_MEM].size;
 
-    memory_region_init_ram(secram, NULL, "virt.secure-ram", size, &error_fatal);
-    vmstate_register_ram_global(secram);
+    memory_region_init_ram(secram, NULL, "virt.secure-ram", size,
+                           &error_fatal);
     memory_region_add_subregion(secure_sysmem, base, secram);
 
     nodename = g_strdup_printf("/secram@%" PRIx64, base);
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 3985356fc2..6b11a75e67 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -206,7 +206,6 @@ static void zynq_init(MachineState *machine)
     /* 256K of on-chip memory */
     memory_region_init_ram(ocm_ram, NULL, "zynq.ocm_ram", 256 << 10,
                            &error_fatal);
-    vmstate_register_ram_global(ocm_ram);
     memory_region_add_subregion(address_space_mem, 0xFFFC0000, ocm_ram);
 
     DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 64f52f80a5..9eceadbdc8 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -228,7 +228,6 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
 
         memory_region_init_ram(&s->ocm_ram[i], NULL, ocm_name,
                                XLNX_ZYNQMP_OCM_RAM_SIZE, &error_fatal);
-        vmstate_register_ram_global(&s->ocm_ram[i]);
         memory_region_add_subregion(get_system_memory(),
                                     XLNX_ZYNQMP_OCM_RAM_0_ADDRESS +
                                         i * XLNX_ZYNQMP_OCM_RAM_SIZE,
diff --git a/hw/block/onenand.c b/hw/block/onenand.c
index ddf5492426..b7423607d9 100644
--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -807,7 +807,7 @@ static int onenand_initfn(SysBusDevice *sbd)
     }
     s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT),
                     0xff, (64 + 2) << PAGE_SHIFT);
-    memory_region_init_ram(&s->ram, OBJECT(s), "onenand.ram",
+    memory_region_init_ram_nomigrate(&s->ram, OBJECT(s), "onenand.ram",
                            0xc000 << s->shift, &error_fatal);
     vmstate_register_ram_global(&s->ram);
     ram = memory_region_get_ram_ptr(&s->ram);
diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 594d4cf6fe..1113ab1ccf 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -753,7 +753,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    vmstate_register_ram(&pfl->mem, DEVICE(pfl));
     pfl->storage = memory_region_get_ram_ptr(&pfl->mem);
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
 
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index e6c5c6c25d..c81ddd3a99 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -629,7 +629,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl));
     pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem);
     pfl->chip_len = chip_len;
 
diff --git a/hw/cris/axis_dev88.c b/hw/cris/axis_dev88.c
index 60df8877c1..80674f6bbb 100644
--- a/hw/cris/axis_dev88.c
+++ b/hw/cris/axis_dev88.c
@@ -281,9 +281,8 @@ void axisdev88_init(MachineState *machine)
 
     /* The ETRAX-FS has 128Kb on chip ram, the docs refer to it as the 
        internal memory.  */
-    memory_region_init_ram(phys_intmem, NULL, "axisdev88.chipram", INTMEM_SIZE,
-                           &error_fatal);
-    vmstate_register_ram_global(phys_intmem);
+    memory_region_init_ram(phys_intmem, NULL, "axisdev88.chipram",
+                           INTMEM_SIZE, &error_fatal);
     memory_region_add_subregion(address_space_mem, 0x38000000, phys_intmem);
 
       /* Attach a NAND flash to CS1.  */
diff --git a/hw/display/cg3.c b/hw/display/cg3.c
index 1de15a1d34..e069c4484c 100644
--- a/hw/display/cg3.c
+++ b/hw/display/cg3.c
@@ -283,7 +283,7 @@ static void cg3_initfn(Object *obj)
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     CG3State *s = CG3(obj);
 
-    memory_region_init_ram(&s->rom, obj, "cg3.prom", FCODE_MAX_ROM_SIZE,
+    memory_region_init_ram_nomigrate(&s->rom, obj, "cg3.prom", FCODE_MAX_ROM_SIZE,
                            &error_fatal);
     memory_region_set_readonly(&s->rom, true);
     sysbus_init_mmio(sbd, &s->rom);
@@ -314,7 +314,6 @@ static void cg3_realizefn(DeviceState *dev, Error **errp)
     memory_region_init_ram(&s->vram_mem, NULL, "cg3.vram", s->vram_size,
                            &error_fatal);
     memory_region_set_log(&s->vram_mem, true, DIRTY_MEMORY_VGA);
-    vmstate_register_ram_global(&s->vram_mem);
     sysbus_init_mmio(sbd, &s->vram_mem);
 
     sysbus_init_irq(sbd, &s->irq);
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 3c1688e7cb..7f8c73b56d 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2091,14 +2091,12 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
     qxl->rom_size = qxl_rom_size();
     memory_region_init_ram(&qxl->rom_bar, OBJECT(qxl), "qxl.vrom",
                            qxl->rom_size, &error_fatal);
-    vmstate_register_ram(&qxl->rom_bar, &qxl->pci.qdev);
     init_qxl_rom(qxl);
     init_qxl_ram(qxl);
 
     qxl->guest_surfaces.cmds = g_new0(QXLPHYSICAL, qxl->ssd.num_surfaces);
     memory_region_init_ram(&qxl->vram_bar, OBJECT(qxl), "qxl.vram",
                            qxl->vram_size, &error_fatal);
-    vmstate_register_ram(&qxl->vram_bar, &qxl->pci.qdev);
     memory_region_init_alias(&qxl->vram32_bar, OBJECT(qxl), "qxl.vram32",
                              &qxl->vram_bar, 0, qxl->vram32_size);
 
@@ -2200,7 +2198,6 @@ static void qxl_realize_secondary(PCIDevice *dev, Error **errp)
     qxl_init_ramsize(qxl);
     memory_region_init_ram(&qxl->vga.vram, OBJECT(dev), "qxl.vgavram",
                            qxl->vga.vram_size, &error_fatal);
-    vmstate_register_ram(&qxl->vga.vram, &qxl->pci.qdev);
     qxl->vga.vram_ptr = memory_region_get_ram_ptr(&qxl->vga.vram);
     qxl->vga.con = graphic_console_init(DEVICE(dev), 0, &qxl_ops, qxl);
 
diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index 9d254ef2e1..af792c533b 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -1578,7 +1578,7 @@ static void sm501_init(SM501State *s, DeviceState *dev,
                   s->local_mem_size_index);
 
     /* local memory */
-    memory_region_init_ram(&s->local_mem_region, OBJECT(dev), "sm501.local",
+    memory_region_init_ram_nomigrate(&s->local_mem_region, OBJECT(dev), "sm501.local",
                            get_local_mem_size(s), &error_fatal);
     vmstate_register_ram_global(&s->local_mem_region);
     memory_region_set_log(&s->local_mem_region, true, DIRTY_MEMORY_VGA);
diff --git a/hw/display/tc6393xb.c b/hw/display/tc6393xb.c
index 92f7120acc..74d10af3d4 100644
--- a/hw/display/tc6393xb.c
+++ b/hw/display/tc6393xb.c
@@ -588,7 +588,6 @@ TC6393xbState *tc6393xb_init(MemoryRegion *sysmem, uint32_t base, qemu_irq irq)
 
     memory_region_init_ram(&s->vram, NULL, "tc6393xb.vram", 0x100000,
                            &error_fatal);
-    vmstate_register_ram_global(&s->vram);
     s->vram_ptr = memory_region_get_ram_ptr(&s->vram);
     memory_region_add_subregion(sysmem, base + 0x100000, &s->vram);
     s->scr_width = 480;
diff --git a/hw/display/tcx.c b/hw/display/tcx.c
index 6593c1d6af..daa93e0929 100644
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c
@@ -752,7 +752,7 @@ static void tcx_initfn(Object *obj)
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     TCXState *s = TCX(obj);
 
-    memory_region_init_ram(&s->rom, obj, "tcx.prom", FCODE_MAX_ROM_SIZE,
+    memory_region_init_ram_nomigrate(&s->rom, obj, "tcx.prom", FCODE_MAX_ROM_SIZE,
                            &error_fatal);
     memory_region_set_readonly(&s->rom, true);
     sysbus_init_mmio(sbd, &s->rom);
@@ -812,7 +812,7 @@ static void tcx_realizefn(DeviceState *dev, Error **errp)
     uint8_t *vram_base;
     char *fcode_filename;
 
-    memory_region_init_ram(&s->vram_mem, OBJECT(s), "tcx.vram",
+    memory_region_init_ram_nomigrate(&s->vram_mem, OBJECT(s), "tcx.vram",
                            s->vram_size * (1 + 4 + 4), &error_fatal);
     vmstate_register_ram_global(&s->vram_mem);
     memory_region_set_log(&s->vram_mem, true, DIRTY_MEMORY_VGA);
diff --git a/hw/display/vga.c b/hw/display/vga.c
index 80508b83f4..63421f9ee8 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -2166,7 +2166,7 @@ void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate)
     }
 
     s->is_vbe_vmstate = 1;
-    memory_region_init_ram(&s->vram, obj, "vga.vram", s->vram_size,
+    memory_region_init_ram_nomigrate(&s->vram, obj, "vga.vram", s->vram_size,
                            &error_fatal);
     vmstate_register_ram(&s->vram, global_vmstate ? NULL : DEVICE(obj));
     xen_register_framebuffer(&s->vram);
diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index c989cef1cd..4a64b41259 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -1241,7 +1241,6 @@ static void vmsvga_init(DeviceState *dev, struct vmsvga_state_s *s,
     s->fifo_size = SVGA_FIFO_SIZE;
     memory_region_init_ram(&s->fifo_ram, NULL, "vmsvga.fifo", s->fifo_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->fifo_ram);
     s->fifo_ptr = memory_region_get_ram_ptr(&s->fifo_ram);
 
     vga_common_init(&s->vga, OBJECT(dev), true);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index bf541cafd6..22e16031b0 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1443,7 +1443,6 @@ void pc_memory_init(PCMachineState *pcms,
     option_rom_mr = g_malloc(sizeof(*option_rom_mr));
     memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(option_rom_mr);
     memory_region_add_subregion_overlap(rom_memory,
                                         PC_ROM_MIN_VGA,
                                         option_rom_mr,
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index f915ad0a36..6b183747fc 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -59,7 +59,6 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
     isa_bios = g_malloc(sizeof(*isa_bios));
     memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
                            &error_fatal);
-    vmstate_register_ram_global(isa_bios);
     memory_region_add_subregion_overlap(rom_memory,
                                         0x100000 - isa_bios_size,
                                         isa_bios,
@@ -196,7 +195,6 @@ static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw)
     }
     bios = g_malloc(sizeof(*bios));
     memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
-    vmstate_register_ram_global(bios);
     if (!isapc_ram_fw) {
         memory_region_set_readonly(bios, true);
     }
diff --git a/hw/i386/pci-assign-load-rom.c b/hw/i386/pci-assign-load-rom.c
index fd59076e7a..43429b66be 100644
--- a/hw/i386/pci-assign-load-rom.c
+++ b/hw/i386/pci-assign-load-rom.c
@@ -59,7 +59,7 @@ void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner,
     fseek(fp, 0, SEEK_SET);
 
     snprintf(name, sizeof(name), "%s.rom", object_get_typename(owner));
-    memory_region_init_ram(&dev->rom, owner, name, st.st_size, &error_abort);
+    memory_region_init_ram_nomigrate(&dev->rom, owner, name, st.st_size, &error_abort);
     vmstate_register_ram(&dev->rom, &dev->qdev);
     ptr = memory_region_get_ram_ptr(&dev->rom);
     memset(ptr, 0xff, st.st_size);
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index cffa7e2017..3d951a3794 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -215,7 +215,6 @@ static void xen_ram_init(PCMachineState *pcms,
     memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len,
                            &error_fatal);
     *ram_memory_p = &ram_memory;
-    vmstate_register_ram_global(&ram_memory);
 
     memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k",
                              &ram_memory, 0, 0xa0000);
diff --git a/hw/input/milkymist-softusb.c b/hw/input/milkymist-softusb.c
index 40dfca157f..ef8f47cd83 100644
--- a/hw/input/milkymist-softusb.c
+++ b/hw/input/milkymist-softusb.c
@@ -256,12 +256,12 @@ static int milkymist_softusb_init(SysBusDevice *dev)
     sysbus_init_mmio(dev, &s->regs_region);
 
     /* register pmem and dmem */
-    memory_region_init_ram(&s->pmem, OBJECT(s), "milkymist-softusb.pmem",
+    memory_region_init_ram_nomigrate(&s->pmem, OBJECT(s), "milkymist-softusb.pmem",
                            s->pmem_size, &error_fatal);
     vmstate_register_ram_global(&s->pmem);
     s->pmem_ptr = memory_region_get_ram_ptr(&s->pmem);
     sysbus_init_mmio(dev, &s->pmem);
-    memory_region_init_ram(&s->dmem, OBJECT(s), "milkymist-softusb.dmem",
+    memory_region_init_ram_nomigrate(&s->dmem, OBJECT(s), "milkymist-softusb.dmem",
                            s->dmem_size, &error_fatal);
     vmstate_register_ram_global(&s->dmem);
     s->dmem_ptr = memory_region_get_ram_ptr(&s->dmem);
diff --git a/hw/m68k/an5206.c b/hw/m68k/an5206.c
index 142bab98c9..c76244176f 100644
--- a/hw/m68k/an5206.c
+++ b/hw/m68k/an5206.c
@@ -61,7 +61,6 @@ static void an5206_init(MachineState *machine)
 
     /* Internal SRAM.  */
     memory_region_init_ram(sram, NULL, "an5206.sram", 512, &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(address_space_mem, AN5206_RAMBAR_ADDR, sram);
 
     mcf5206_init(address_space_mem, AN5206_MBAR_ADDR, cpu);
diff --git a/hw/m68k/mcf5208.c b/hw/m68k/mcf5208.c
index 656351834e..f4b1387c0d 100644
--- a/hw/m68k/mcf5208.c
+++ b/hw/m68k/mcf5208.c
@@ -249,7 +249,6 @@ static void mcf5208evb_init(MachineState *machine)
 
     /* Internal SRAM.  */
     memory_region_init_ram(sram, NULL, "mcf5208.sram", 16384, &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(address_space_mem, 0x80000000, sram);
 
     /* Internal peripherals.  */
diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c
index 4968bdbb28..b664dc0f9c 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -98,12 +98,10 @@ petalogix_ml605_init(MachineState *machine)
     /* Attach emulated BRAM through the LMB.  */
     memory_region_init_ram(phys_lmb_bram, NULL, "petalogix_ml605.lmb_bram",
                            LMB_BRAM_SIZE, &error_fatal);
-    vmstate_register_ram_global(phys_lmb_bram);
     memory_region_add_subregion(address_space_mem, 0x00000000, phys_lmb_bram);
 
     memory_region_init_ram(phys_ram, NULL, "petalogix_ml605.ram", ram_size,
                            &error_fatal);
-    vmstate_register_ram_global(phys_ram);
     memory_region_add_subregion(address_space_mem, MEMORY_BASEADDR, phys_ram);
 
     dinfo = drive_get(IF_PFLASH, 0, 0);
diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c
index 423bcd7f6c..5cb4deb69e 100644
--- a/hw/microblaze/petalogix_s3adsp1800_mmu.c
+++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c
@@ -78,12 +78,10 @@ petalogix_s3adsp1800_init(MachineState *machine)
     memory_region_init_ram(phys_lmb_bram, NULL,
                            "petalogix_s3adsp1800.lmb_bram", LMB_BRAM_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(phys_lmb_bram);
     memory_region_add_subregion(sysmem, 0x00000000, phys_lmb_bram);
 
     memory_region_init_ram(phys_ram, NULL, "petalogix_s3adsp1800.ram",
                            ram_size, &error_fatal);
-    vmstate_register_ram_global(phys_ram);
     memory_region_add_subregion(sysmem, ddr_base, phys_ram);
 
     dinfo = drive_get(IF_PFLASH, 0, 0);
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index 146be2ae74..7985c60dde 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -484,7 +484,7 @@ static void boston_mach_init(MachineState *machine)
     sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s->cps), 0, 0, 1);
 
     flash =  g_new(MemoryRegion, 1);
-    memory_region_init_rom_device(flash, NULL, &boston_flash_ops, s,
+    memory_region_init_rom_device_nomigrate(flash, NULL, &boston_flash_ops, s,
                                   "boston.flash", 128 * M_BYTE, &err);
     memory_region_add_subregion_overlap(sys_mem, 0x18000000, flash, 0);
 
diff --git a/hw/mips/mips_fulong2e.c b/hw/mips/mips_fulong2e.c
index dbe2805acb..3f3cb32651 100644
--- a/hw/mips/mips_fulong2e.c
+++ b/hw/mips/mips_fulong2e.c
@@ -296,7 +296,6 @@ static void mips_fulong2e_init(MachineState *machine)
     memory_region_allocate_system_memory(ram, NULL, "fulong2e.ram", ram_size);
     memory_region_init_ram(bios, NULL, "fulong2e.bios", bios_size,
                            &error_fatal);
-    vmstate_register_ram_global(bios);
     memory_region_set_readonly(bios, true);
 
     memory_region_add_subregion(address_space_mem, 0, ram);
diff --git a/hw/mips/mips_jazz.c b/hw/mips/mips_jazz.c
index 1f69322c15..df2262a2a8 100644
--- a/hw/mips/mips_jazz.c
+++ b/hw/mips/mips_jazz.c
@@ -177,7 +177,6 @@ static void mips_jazz_init(MachineState *machine,
 
     memory_region_init_ram(bios, NULL, "mips_jazz.bios", MAGNUM_BIOS_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(bios);
     memory_region_set_readonly(bios, true);
     memory_region_init_alias(bios2, NULL, "mips_jazz.bios", bios,
                              0, MAGNUM_BIOS_SIZE);
@@ -244,7 +243,6 @@ static void mips_jazz_init(MachineState *machine,
             MemoryRegion *rom_mr = g_new(MemoryRegion, 1);
             memory_region_init_ram(rom_mr, NULL, "g364fb.rom", 0x80000,
                                    &error_fatal);
-            vmstate_register_ram_global(rom_mr);
             memory_region_set_readonly(rom_mr, true);
             uint8_t *rom = memory_region_get_ram_ptr(rom_mr);
             memory_region_add_subregion(address_space, 0x60000000, rom_mr);
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 8cb9d3c3ce..3487d16f61 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -1178,7 +1178,7 @@ void mips_malta_init(MachineState *machine)
      * handled by an overlapping region as the resulting ROM code subpage
      * regions are not executable.
      */
-    memory_region_init_ram(bios_copy, NULL, "bios.1fc", BIOS_SIZE,
+    memory_region_init_ram_nomigrate(bios_copy, NULL, "bios.1fc", BIOS_SIZE,
                            &error_fatal);
     if (!rom_copy(memory_region_get_ram_ptr(bios_copy),
                   FLASH_ADDRESS, BIOS_SIZE)) {
diff --git a/hw/mips/mips_mipssim.c b/hw/mips/mips_mipssim.c
index 1b91195006..6990b1b0dd 100644
--- a/hw/mips/mips_mipssim.c
+++ b/hw/mips/mips_mipssim.c
@@ -179,7 +179,6 @@ mips_mipssim_init(MachineState *machine)
                                          ram_size);
     memory_region_init_ram(bios, NULL, "mips_mipssim.bios", BIOS_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(bios);
     memory_region_set_readonly(bios, true);
 
     memory_region_add_subregion(address_space_mem, 0, ram);
diff --git a/hw/mips/mips_r4k.c b/hw/mips/mips_r4k.c
index f4de9fc343..690874be2b 100644
--- a/hw/mips/mips_r4k.c
+++ b/hw/mips/mips_r4k.c
@@ -238,7 +238,6 @@ void mips_r4k_init(MachineState *machine)
         bios = g_new(MemoryRegion, 1);
         memory_region_init_ram(bios, NULL, "mips_r4k.bios", BIOS_SIZE,
                                &error_fatal);
-        vmstate_register_ram_global(bios);
         memory_region_set_readonly(bios, true);
         memory_region_add_subregion(get_system_memory(), 0x1fc00000, bios);
 
diff --git a/hw/moxie/moxiesim.c b/hw/moxie/moxiesim.c
index 3069834cf4..df3f1249ae 100644
--- a/hw/moxie/moxiesim.c
+++ b/hw/moxie/moxiesim.c
@@ -129,11 +129,9 @@ static void moxiesim_init(MachineState *machine)
 
     /* Allocate RAM. */
     memory_region_init_ram(ram, NULL, "moxiesim.ram", ram_size, &error_fatal);
-    vmstate_register_ram_global(ram);
     memory_region_add_subregion(address_space_mem, ram_base, ram);
 
-    memory_region_init_ram(rom, NULL, "moxie.rom", 128*0x1000, &error_fatal);
-    vmstate_register_ram_global(rom);
+    memory_region_init_ram(rom, NULL, "moxie.rom", 128 * 0x1000, &error_fatal);
     memory_region_add_subregion(get_system_memory(), 0x1000, rom);
 
     if (kernel_filename) {
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index b53fcaa8bc..f2d2ce344c 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -887,7 +887,7 @@ static void dp8393x_realize(DeviceState *dev, Error **errp)
     s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s);
     s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */
 
-    memory_region_init_ram(&s->prom, OBJECT(dev),
+    memory_region_init_ram_nomigrate(&s->prom, OBJECT(dev),
                            "dp8393x-prom", SONIC_PROM_SIZE, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/hw/net/milkymist-minimac2.c b/hw/net/milkymist-minimac2.c
index c3a12e1197..3eaa19dfde 100644
--- a/hw/net/milkymist-minimac2.c
+++ b/hw/net/milkymist-minimac2.c
@@ -466,7 +466,7 @@ static int milkymist_minimac2_init(SysBusDevice *sbd)
     sysbus_init_mmio(sbd, &s->regs_region);
 
     /* register buffers memory */
-    memory_region_init_ram(&s->buffers, OBJECT(dev), "milkymist-minimac2.buffers",
+    memory_region_init_ram_nomigrate(&s->buffers, OBJECT(dev), "milkymist-minimac2.buffers",
                            buffers_size, &error_fatal);
     vmstate_register_ram_global(&s->buffers);
     s->rx0_buf = memory_region_get_ram_ptr(&s->buffers);
diff --git a/hw/nios2/10m50_devboard.c b/hw/nios2/10m50_devboard.c
index 051be73e9a..b6868b8233 100644
--- a/hw/nios2/10m50_devboard.c
+++ b/hw/nios2/10m50_devboard.c
@@ -57,19 +57,19 @@ static void nios2_10m50_ghrd_init(MachineState *machine)
     int i;
 
     /* Physical TCM (tb_ram_1k) with alias at 0xc0000000 */
-    memory_region_init_ram(phys_tcm, NULL, "nios2.tcm", tcm_size, &error_abort);
+    memory_region_init_ram(phys_tcm, NULL, "nios2.tcm", tcm_size,
+                           &error_abort);
     memory_region_init_alias(phys_tcm_alias, NULL, "nios2.tcm.alias",
                              phys_tcm, 0, tcm_size);
-    vmstate_register_ram_global(phys_tcm);
     memory_region_add_subregion(address_space_mem, tcm_base, phys_tcm);
     memory_region_add_subregion(address_space_mem, 0xc0000000 + tcm_base,
                                 phys_tcm_alias);
 
     /* Physical DRAM with alias at 0xc0000000 */
-    memory_region_init_ram(phys_ram, NULL, "nios2.ram", ram_size, &error_abort);
+    memory_region_init_ram(phys_ram, NULL, "nios2.ram", ram_size,
+                           &error_abort);
     memory_region_init_alias(phys_ram_alias, NULL, "nios2.ram.alias",
                              phys_ram, 0, ram_size);
-    vmstate_register_ram_global(phys_ram);
     memory_region_add_subregion(address_space_mem, ram_base, phys_ram);
     memory_region_add_subregion(address_space_mem, 0xc0000000 + ram_base,
                                 phys_ram_alias);
diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c
index fc0d0967b7..e1eeffc490 100644
--- a/hw/openrisc/openrisc_sim.c
+++ b/hw/openrisc/openrisc_sim.c
@@ -120,7 +120,6 @@ static void openrisc_sim_init(MachineState *machine)
 
     ram = g_malloc(sizeof(*ram));
     memory_region_init_ram(ram, NULL, "openrisc.ram", ram_size, &error_fatal);
-    vmstate_register_ram_global(ram);
     memory_region_add_subregion(get_system_memory(), 0, ram);
 
     cpu_openrisc_pic_init(cpu);
diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
index 900a6edfcf..8b293ba0f1 100644
--- a/hw/pci-host/prep.c
+++ b/hw/pci-host/prep.c
@@ -304,7 +304,7 @@ static void raven_realize(PCIDevice *d, Error **errp)
     d->config[0x0D] = 0x10; // latency_timer
     d->config[0x34] = 0x00; // capabilities_pointer
 
-    memory_region_init_ram(&s->bios, OBJECT(s), "bios", BIOS_SIZE,
+    memory_region_init_ram_nomigrate(&s->bios, OBJECT(s), "bios", BIOS_SIZE,
                            &error_fatal);
     memory_region_set_readonly(&s->bios, true);
     memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE),
diff --git a/hw/pci-host/xilinx-pcie.c b/hw/pci-host/xilinx-pcie.c
index 2c78dcfc26..4613dda1d2 100644
--- a/hw/pci-host/xilinx-pcie.c
+++ b/hw/pci-host/xilinx-pcie.c
@@ -120,7 +120,7 @@ static void xilinx_pcie_host_realize(DeviceState *dev, Error **errp)
     memory_region_set_enabled(&s->mmio, false);
 
     /* dummy I/O region */
-    memory_region_init_ram(&s->io, OBJECT(s), "io", 16, NULL);
+    memory_region_init_ram_nomigrate(&s->io, OBJECT(s), "io", 16, NULL);
     memory_region_set_enabled(&s->io, false);
 
     /* interrupt out */
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 0c6f74a347..258fbe51e2 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2236,7 +2236,6 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
     }
     pdev->has_rom = true;
     memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, size, &error_fatal);
-    vmstate_register_ram(&pdev->rom, &pdev->qdev);
     ptr = memory_region_get_ram_ptr(&pdev->rom);
     load_image(path, ptr);
     g_free(path);
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index bae1c0ac99..3056d5f075 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -206,7 +206,6 @@ static void ppc_core99_init(MachineState *machine)
     /* allocate and load BIOS */
     memory_region_init_ram(bios, NULL, "ppc_core99.bios", BIOS_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(bios);
 
     if (bios_name == NULL)
         bios_name = PROM_FILENAME;
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 97bb8541d7..f2ae60a360 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -143,7 +143,6 @@ static void ppc_heathrow_init(MachineState *machine)
     /* allocate and load BIOS */
     memory_region_init_ram(bios, NULL, "ppc_heathrow.bios", BIOS_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(bios);
 
     if (bios_name == NULL)
         bios_name = PROM_FILENAME;
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
index d01798f245..e92db2c66a 100644
--- a/hw/ppc/ppc405_boards.c
+++ b/hw/ppc/ppc405_boards.c
@@ -220,7 +220,6 @@ static void ref405ep_init(MachineState *machine)
     sram_size = 512 * 1024;
     memory_region_init_ram(sram, NULL, "ef405ep.sram", sram_size,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(sysmem, 0xFFF00000, sram);
     /* allocate and load BIOS */
 #ifdef DEBUG_BOARD_INIT
@@ -255,7 +254,6 @@ static void ref405ep_init(MachineState *machine)
         bios = g_new(MemoryRegion, 1);
         memory_region_init_ram(bios, NULL, "ef405ep.bios", BIOS_SIZE,
                                &error_fatal);
-        vmstate_register_ram_global(bios);
 
         if (bios_name == NULL)
             bios_name = BIOS_FILENAME;
@@ -556,7 +554,6 @@ static void taihu_405ep_init(MachineState *machine)
         bios = g_new(MemoryRegion, 1);
         memory_region_init_ram(bios, NULL, "taihu_405ep.bios", BIOS_SIZE,
                                &error_fatal);
-        vmstate_register_ram_global(bios);
         filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
         if (filename) {
             bios_size = load_image(filename, memory_region_get_ram_ptr(bios));
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
index fc32e96bf4..f6fe3e6f5e 100644
--- a/hw/ppc/ppc405_uc.c
+++ b/hw/ppc/ppc405_uc.c
@@ -980,7 +980,6 @@ static void ppc405_ocm_init(CPUPPCState *env)
     /* XXX: Size is 4096 or 0x04000000 */
     memory_region_init_ram(&ocm->isarc_ram, NULL, "ppc405.ocm", 4096,
                            &error_fatal);
-    vmstate_register_ram_global(&ocm->isarc_ram);
     memory_region_init_alias(&ocm->dsarc_ram, NULL, "ppc405.dsarc", &ocm->isarc_ram,
                              0, 4096);
     qemu_register_reset(&ocm_reset, ocm);
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index 83d6023894..9253dbbc64 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -273,7 +273,6 @@ static void assign_storage(SCLPDevice *sclp, SCCB *sccb)
              * instead of doing it via the ref count of the MemoryRegion. */
             object_ref(OBJECT(standby_ram));
             object_unparent(OBJECT(standby_ram));
-            vmstate_register_ram_global(standby_ram);
             memory_region_add_subregion(sysmem, offset, standby_ram);
         }
         /* The specified subregion is no longer in standby */
diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c
index e6fc74ed87..a0462adb97 100644
--- a/hw/sh4/r2d.c
+++ b/hw/sh4/r2d.c
@@ -260,7 +260,6 @@ static void r2d_init(MachineState *machine)
 
     /* Allocate memory space */
     memory_region_init_ram(sdram, NULL, "r2d.sdram", SDRAM_SIZE, &error_fatal);
-    vmstate_register_ram_global(sdram);
     memory_region_add_subregion(address_space_mem, SDRAM_BASE, sdram);
     /* Register peripherals */
     s = sh7750_init(cpu, address_space_mem);
diff --git a/hw/sh4/shix.c b/hw/sh4/shix.c
index fd00cc5ea2..e22eaf0c8f 100644
--- a/hw/sh4/shix.c
+++ b/hw/sh4/shix.c
@@ -64,16 +64,13 @@ static void shix_init(MachineState *machine)
 
     /* Allocate memory space */
     memory_region_init_ram(rom, NULL, "shix.rom", 0x4000, &error_fatal);
-    vmstate_register_ram_global(rom);
     memory_region_set_readonly(rom, true);
     memory_region_add_subregion(sysmem, 0x00000000, rom);
     memory_region_init_ram(&sdram[0], NULL, "shix.sdram1", 0x01000000,
                            &error_fatal);
-    vmstate_register_ram_global(&sdram[0]);
     memory_region_add_subregion(sysmem, 0x08000000, &sdram[0]);
     memory_region_init_ram(&sdram[1], NULL, "shix.sdram2", 0x01000000,
                            &error_fatal);
-    vmstate_register_ram_global(&sdram[1]);
     memory_region_add_subregion(sysmem, 0x0c000000, &sdram[1]);
 
     /* Load BIOS in 0 (and access it through P2, 0xA0000000) */
diff --git a/hw/sparc/leon3.c b/hw/sparc/leon3.c
index f415997649..d5ff188d9e 100644
--- a/hw/sparc/leon3.c
+++ b/hw/sparc/leon3.c
@@ -160,7 +160,6 @@ static void leon3_generic_hw_init(MachineState *machine)
     /* Allocate BIOS */
     prom_size = 8 * 1024 * 1024; /* 8Mb */
     memory_region_init_ram(prom, NULL, "Leon3.bios", prom_size, &error_fatal);
-    vmstate_register_ram_global(prom);
     memory_region_set_readonly(prom, true);
     memory_region_add_subregion(address_space_mem, 0x00000000, prom);
 
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 0faff4619f..89dd8a96c3 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -590,7 +590,7 @@ static void idreg_init1(Object *obj)
     IDRegState *s = MACIO_ID_REGISTER(obj);
     SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-    memory_region_init_ram(&s->mem, obj,
+    memory_region_init_ram_nomigrate(&s->mem, obj,
                            "sun4m.idreg", sizeof(idreg_data), &error_fatal);
     vmstate_register_ram_global(&s->mem);
     memory_region_set_readonly(&s->mem, true);
@@ -631,7 +631,7 @@ static void afx_init1(Object *obj)
     AFXState *s = TCX_AFX(obj);
     SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-    memory_region_init_ram(&s->mem, obj, "sun4m.afx", 4, &error_fatal);
+    memory_region_init_ram_nomigrate(&s->mem, obj, "sun4m.afx", 4, &error_fatal);
     vmstate_register_ram_global(&s->mem);
     sysbus_init_mmio(dev, &s->mem);
 }
@@ -698,7 +698,7 @@ static void prom_init1(Object *obj)
     PROMState *s = OPENPROM(obj);
     SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-    memory_region_init_ram(&s->prom, obj, "sun4m.prom", PROM_SIZE_MAX,
+    memory_region_init_ram_nomigrate(&s->prom, obj, "sun4m.prom", PROM_SIZE_MAX,
                            &error_fatal);
     vmstate_register_ram_global(&s->prom);
     memory_region_set_readonly(&s->prom, true);
diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
index 69f565db25..bbdb40c330 100644
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -334,7 +334,7 @@ static void prom_init1(Object *obj)
     PROMState *s = OPENPROM(obj);
     SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-    memory_region_init_ram(&s->prom, obj, "sun4u.prom", PROM_SIZE_MAX,
+    memory_region_init_ram_nomigrate(&s->prom, obj, "sun4u.prom", PROM_SIZE_MAX,
                            &error_fatal);
     vmstate_register_ram_global(&s->prom);
     memory_region_set_readonly(&s->prom, true);
@@ -377,7 +377,7 @@ static void ram_realize(DeviceState *dev, Error **errp)
     RamDevice *d = SUN4U_RAM(dev);
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 
-    memory_region_init_ram(&d->ram, OBJECT(d), "sun4u.ram", d->size,
+    memory_region_init_ram_nomigrate(&d->ram, OBJECT(d), "sun4u.ram", d->size,
                            &error_fatal);
     vmstate_register_ram_global(&d->ram);
     sysbus_init_mmio(sbd, &d->ram);
diff --git a/hw/tricore/tricore_testboard.c b/hw/tricore/tricore_testboard.c
index 8910bf0f27..6c574231d5 100644
--- a/hw/tricore/tricore_testboard.c
+++ b/hw/tricore/tricore_testboard.c
@@ -80,24 +80,18 @@ static void tricore_testboard_init(MachineState *machine, int board_id)
         exit(1);
     }
     env = &cpu->env;
-    memory_region_init_ram(ext_cram, NULL, "powerlink_ext_c.ram", 2*1024*1024,
+    memory_region_init_ram(ext_cram, NULL, "powerlink_ext_c.ram",
+                           2 * 1024 * 1024, &error_fatal);
+    memory_region_init_ram(ext_dram, NULL, "powerlink_ext_d.ram",
+                           4 * 1024 * 1024, &error_fatal);
+    memory_region_init_ram(int_cram, NULL, "powerlink_int_c.ram", 48 * 1024,
                            &error_fatal);
-    vmstate_register_ram_global(ext_cram);
-    memory_region_init_ram(ext_dram, NULL, "powerlink_ext_d.ram", 4*1024*1024,
+    memory_region_init_ram(int_dram, NULL, "powerlink_int_d.ram", 48 * 1024,
                            &error_fatal);
-    vmstate_register_ram_global(ext_dram);
-    memory_region_init_ram(int_cram, NULL, "powerlink_int_c.ram", 48*1024,
-                           &error_fatal);
-    vmstate_register_ram_global(int_cram);
-    memory_region_init_ram(int_dram, NULL, "powerlink_int_d.ram", 48*1024,
-                           &error_fatal);
-    vmstate_register_ram_global(int_dram);
-    memory_region_init_ram(pcp_data, NULL, "powerlink_pcp_data.ram", 16*1024,
-                           &error_fatal);
-    vmstate_register_ram_global(pcp_data);
-    memory_region_init_ram(pcp_text, NULL, "powerlink_pcp_text.ram", 32*1024,
-                           &error_fatal);
-    vmstate_register_ram_global(pcp_text);
+    memory_region_init_ram(pcp_data, NULL, "powerlink_pcp_data.ram",
+                           16 * 1024, &error_fatal);
+    memory_region_init_ram(pcp_text, NULL, "powerlink_pcp_text.ram",
+                           32 * 1024, &error_fatal);
 
     memory_region_add_subregion(sysmem, 0x80000000, ext_cram);
     memory_region_add_subregion(sysmem, 0xa1000000, ext_dram);
diff --git a/hw/unicore32/puv3.c b/hw/unicore32/puv3.c
index 032078fd3e..e9d1a60b6f 100644
--- a/hw/unicore32/puv3.c
+++ b/hw/unicore32/puv3.c
@@ -80,7 +80,6 @@ static void puv3_board_init(CPUUniCore32State *env, ram_addr_t ram_size)
     /* SDRAM at address zero.  */
     memory_region_init_ram(ram_memory, NULL, "puv3.ram", ram_size,
                            &error_fatal);
-    vmstate_register_ram_global(ram_memory);
     memory_region_add_subregion(get_system_memory(), 0, ram_memory);
 }
 
diff --git a/hw/xtensa/sim.c b/hw/xtensa/sim.c
index 5521e9184a..249cd1e8c9 100644
--- a/hw/xtensa/sim.c
+++ b/hw/xtensa/sim.c
@@ -49,9 +49,7 @@ static void xtensa_create_memory_regions(const XtensaMemory *memory,
         g_string_printf(num_name, "%s%u", name, i);
         m = g_new(MemoryRegion, 1);
         memory_region_init_ram(m, NULL, num_name->str,
-                               memory->location[i].size,
-                               &error_fatal);
-        vmstate_register_ram_global(m);
+                               memory->location[i].size, &error_fatal);
         memory_region_add_subregion(get_system_memory(),
                                     memory->location[i].addr, m);
     }
diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c
index d5ac080d4a..635a4d4ec3 100644
--- a/hw/xtensa/xtfpga.c
+++ b/hw/xtensa/xtfpga.c
@@ -147,7 +147,7 @@ static void lx60_net_init(MemoryRegion *address_space,
             sysbus_mmio_get_region(s, 1));
 
     ram = g_malloc(sizeof(*ram));
-    memory_region_init_ram(ram, OBJECT(s), "open_eth.ram", 16384,
+    memory_region_init_ram_nomigrate(ram, OBJECT(s), "open_eth.ram", 16384,
                            &error_fatal);
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(address_space, buffers, ram);
@@ -251,7 +251,6 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine)
     ram = g_malloc(sizeof(*ram));
     memory_region_init_ram(ram, NULL, "lx60.dram", machine->ram_size,
                            &error_fatal);
-    vmstate_register_ram_global(ram);
     memory_region_add_subregion(system_memory, 0, ram);
 
     system_io = g_malloc(sizeof(*system_io));
@@ -294,7 +293,6 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine)
         rom = g_malloc(sizeof(*rom));
         memory_region_init_ram(rom, NULL, "lx60.sram", board->sram_size,
                                &error_fatal);
-        vmstate_register_ram_global(rom);
         memory_region_add_subregion(system_memory, 0xfe000000, rom);
 
         if (kernel_cmdline) {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 669a2797fd..5c6b761d81 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -324,7 +324,7 @@ struct BlockDriver {
      * Drain and stop any internal sources of requests in the driver, and
      * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
      */
-    void (*bdrv_drain)(BlockDriverState *bs);
+    void coroutine_fn (*bdrv_co_drain)(BlockDriverState *bs);
 
     void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
                            Error **errp);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index b7966014fe..400dd4491b 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -420,8 +420,9 @@ void memory_region_init_io(MemoryRegion *mr,
                            uint64_t size);
 
 /**
- * memory_region_init_ram:  Initialize RAM memory region.  Accesses into the
- *                          region will modify memory directly.
+ * memory_region_init_ram_nomigrate:  Initialize RAM memory region.  Accesses
+ *                                    into the region will modify memory
+ *                                    directly.
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
@@ -429,12 +430,15 @@ void memory_region_init_io(MemoryRegion *mr,
  *        must be unique within any device
  * @size: size of the region.
  * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
-void memory_region_init_ram(MemoryRegion *mr,
-                            struct Object *owner,
-                            const char *name,
-                            uint64_t size,
-                            Error **errp);
+void memory_region_init_ram_nomigrate(MemoryRegion *mr,
+                                      struct Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      Error **errp);
 
 /**
  * memory_region_init_resizeable_ram:  Initialize memory region with resizeable
@@ -451,6 +455,9 @@ void memory_region_init_ram(MemoryRegion *mr,
  * @max_size: max size of the region.
  * @resized: callback to notify owner about used size change.
  * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_resizeable_ram(MemoryRegion *mr,
                                        struct Object *owner,
@@ -474,6 +481,9 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
  * @share: %true if memory must be mmaped with the MAP_SHARED flag
  * @path: the path in which to allocate the RAM.
  * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       struct Object *owner,
@@ -494,6 +504,9 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
  * @share: %true if memory must be mmaped with the MAP_SHARED flag
  * @fd: the fd to mmap.
  * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_ram_from_fd(MemoryRegion *mr,
                                     struct Object *owner,
@@ -515,6 +528,9 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
  *        must be unique within any device
  * @size: size of the region.
  * @ptr: memory to be mapped; must contain at least @size bytes.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_ram_ptr(MemoryRegion *mr,
                                 struct Object *owner,
@@ -539,6 +555,10 @@ void memory_region_init_ram_ptr(MemoryRegion *mr,
  * @name: the name of the region.
  * @size: size of the region.
  * @ptr: memory to be mapped; must contain at least @size bytes.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
+ * (For RAM device memory regions, migrating the contents rarely makes sense.)
  */
 void memory_region_init_ram_device_ptr(MemoryRegion *mr,
                                        struct Object *owner,
@@ -566,12 +586,16 @@ void memory_region_init_alias(MemoryRegion *mr,
                               uint64_t size);
 
 /**
- * memory_region_init_rom: Initialize a ROM memory region.
+ * memory_region_init_rom_nomigrate: Initialize a ROM memory region.
  *
- * This has the same effect as calling memory_region_init_ram()
+ * This has the same effect as calling memory_region_init_ram_nomigrate()
  * and then marking the resulting region read-only with
  * memory_region_set_readonly().
  *
+ * Note that this function does not do anything to cause the data in the
+ * RAM side of the memory region to be migrated; that is the responsibility
+ * of the caller.
+ *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
  * @name: Region name, becomes part of RAMBlock name used in migration stream
@@ -579,15 +603,19 @@ void memory_region_init_alias(MemoryRegion *mr,
  * @size: size of the region.
  * @errp: pointer to Error*, to store an error if it happens.
  */
-void memory_region_init_rom(MemoryRegion *mr,
-                            struct Object *owner,
-                            const char *name,
-                            uint64_t size,
-                            Error **errp);
+void memory_region_init_rom_nomigrate(MemoryRegion *mr,
+                                      struct Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      Error **errp);
 
 /**
- * memory_region_init_rom_device:  Initialize a ROM memory region.  Writes are
- *                                 handled via callbacks.
+ * memory_region_init_rom_device_nomigrate:  Initialize a ROM memory region.
+ *                                 Writes are handled via callbacks.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM side of the memory region to be migrated; that is the responsibility
+ * of the caller.
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
@@ -597,13 +625,13 @@ void memory_region_init_rom(MemoryRegion *mr,
  * @size: size of the region.
  * @errp: pointer to Error*, to store an error if it happens.
  */
-void memory_region_init_rom_device(MemoryRegion *mr,
-                                   struct Object *owner,
-                                   const MemoryRegionOps *ops,
-                                   void *opaque,
-                                   const char *name,
-                                   uint64_t size,
-                                   Error **errp);
+void memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
+                                             struct Object *owner,
+                                             const MemoryRegionOps *ops,
+                                             void *opaque,
+                                             const char *name,
+                                             uint64_t size,
+                                             Error **errp);
 
 /**
  * memory_region_init_reservation: Initialize a memory region that reserves
@@ -651,6 +679,94 @@ void memory_region_init_iommu(void *_iommu_mr,
                               uint64_t size);
 
 /**
+ * memory_region_init_ram - Initialize RAM memory region.  Accesses into the
+ *                          region will modify memory directly.
+ *
+ * @mr: the #MemoryRegion to be initialized
+ * @owner: the object that tracks the region's reference count (must be
+ *         TYPE_DEVICE or a subclass of TYPE_DEVICE, or NULL)
+ * @name: name of the memory region
+ * @size: size of the region in bytes
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * This function allocates RAM for a board model or device, and
+ * arranges for it to be migrated (by calling vmstate_register_ram()
+ * if @owner is a DeviceState, or vmstate_register_ram_global() if
+ * @owner is NULL).
+ *
+ * TODO: Currently we restrict @owner to being either NULL (for
+ * global RAM regions with no owner) or devices, so that we can
+ * give the RAM block a unique name for migration purposes.
+ * We should lift this restriction and allow arbitrary Objects.
+ * If you pass a non-NULL non-device @owner then we will assert.
+ */
+void memory_region_init_ram(MemoryRegion *mr,
+                            struct Object *owner,
+                            const char *name,
+                            uint64_t size,
+                            Error **errp);
+
+/**
+ * memory_region_init_rom: Initialize a ROM memory region.
+ *
+ * This has the same effect as calling memory_region_init_ram()
+ * and then marking the resulting region read-only with
+ * memory_region_set_readonly(). This includes arranging for the
+ * contents to be migrated.
+ *
+ * TODO: Currently we restrict @owner to being either NULL (for
+ * global RAM regions with no owner) or devices, so that we can
+ * give the RAM block a unique name for migration purposes.
+ * We should lift this restriction and allow arbitrary Objects.
+ * If you pass a non-NULL non-device @owner then we will assert.
+ *
+ * @mr: the #MemoryRegion to be initialized.
+ * @owner: the object that tracks the region's reference count
+ * @name: Region name, becomes part of RAMBlock name used in migration stream
+ *        must be unique within any device
+ * @size: size of the region.
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+void memory_region_init_rom(MemoryRegion *mr,
+                            struct Object *owner,
+                            const char *name,
+                            uint64_t size,
+                            Error **errp);
+
+/**
+ * memory_region_init_rom_device:  Initialize a ROM memory region.
+ *                                 Writes are handled via callbacks.
+ *
+ * This function initializes a memory region backed by RAM for reads
+ * and callbacks for writes, and arranges for the RAM backing to
+ * be migrated (by calling vmstate_register_ram()
+ * if @owner is a DeviceState, or vmstate_register_ram_global() if
+ * @owner is NULL).
+ *
+ * TODO: Currently we restrict @owner to being either NULL (for
+ * global RAM regions with no owner) or devices, so that we can
+ * give the RAM block a unique name for migration purposes.
+ * We should lift this restriction and allow arbitrary Objects.
+ * If you pass a non-NULL non-device @owner then we will assert.
+ *
+ * @mr: the #MemoryRegion to be initialized.
+ * @owner: the object that tracks the region's reference count
+ * @ops: callbacks for write access handling (must not be NULL).
+ * @name: Region name, becomes part of RAMBlock name used in migration stream
+ *        must be unique within any device
+ * @size: size of the region.
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+void memory_region_init_rom_device(MemoryRegion *mr,
+                                   struct Object *owner,
+                                   const MemoryRegionOps *ops,
+                                   void *opaque,
+                                   const char *name,
+                                   uint64_t size,
+                                   Error **errp);
+
+
+/**
  * memory_region_owner: get a memory region's owner.
  *
  * @mr: the memory region being queried.
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 76ce0219ff..3363dd19fd 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -9,6 +9,35 @@
 #include "qom/object.h"
 #include "qom/cpu.h"
 
+/**
+ * memory_region_allocate_system_memory - Allocate a board's main memory
+ * @mr: the #MemoryRegion to be initialized
+ * @owner: the object that tracks the region's reference count
+ * @name: name of the memory region
+ * @ram_size: size of the region in bytes
+ *
+ * This function allocates the main memory for a board model, and
+ * initializes @mr appropriately. It also arranges for the memory
+ * to be migrated (by calling vmstate_register_ram_global()).
+ *
+ * Memory allocated via this function will be backed with the memory
+ * backend the user provided using "-mem-path" or "-numa node,memdev=..."
+ * if appropriate; this is typically used to cause host huge pages to be
+ * used. This function should therefore be called by a board exactly once,
+ * for the primary or largest RAM area it implements.
+ *
+ * For boards where the major RAM is split into two parts in the memory
+ * map, you can deal with this by calling memory_region_allocate_system_memory()
+ * once to get a MemoryRegion with enough RAM for both parts, and then
+ * creating alias MemoryRegions via memory_region_init_alias() which
+ * alias into different parts of the RAM MemoryRegion and can be mapped
+ * into the memory map in the appropriate places.
+ *
+ * Smaller pieces of memory (display RAM, static RAMs, etc) don't need
+ * to be backed via the -mem-path memory backend and can simply
+ * be created via memory_region_allocate_aux_memory() or
+ * memory_region_init_ram().
+ */
 void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
                                           const char *name,
                                           uint64_t ram_size);
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index a4509bd977..9aff9a735e 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -229,6 +229,24 @@ void qemu_co_rwlock_init(CoRwlock *lock);
 void qemu_co_rwlock_rdlock(CoRwlock *lock);
 
 /**
+ * Write Locks the CoRwlock from a reader.  This is a bit more efficient than
+ * @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock.
+ * However, if the lock cannot be upgraded immediately, control is transferred
+ * to the caller of the current coroutine.  Also, @qemu_co_rwlock_upgrade
+ * only overrides CoRwlock fairness if there are no concurrent readers, so
+ * another writer might run while @qemu_co_rwlock_upgrade blocks.
+ */
+void qemu_co_rwlock_upgrade(CoRwlock *lock);
+
+/**
+ * Downgrades a write-side critical section to a reader.  Downgrading with
+ * @qemu_co_rwlock_downgrade never blocks, unlike @qemu_co_rwlock_unlock
+ * followed by @qemu_co_rwlock_rdlock.  This makes it more efficient, but
+ * may also sometimes be necessary for correctness.
+ */
+void qemu_co_rwlock_downgrade(CoRwlock *lock);
+
+/**
  * Write Locks the mutex. If the lock cannot be taken immediately because
  * of a parallel reader, control is transferred to the caller of the current
  * coroutine.
diff --git a/memory.c b/memory.c
index 69f697c20e..a7bc70aac1 100644
--- a/memory.c
+++ b/memory.c
@@ -32,6 +32,7 @@
 #include "sysemu/sysemu.h"
 #include "hw/misc/mmio_interface.h"
 #include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
 
 //#define DEBUG_UNASSIGNED
 
@@ -1365,11 +1366,11 @@ void memory_region_init_io(MemoryRegion *mr,
     mr->terminates = true;
 }
 
-void memory_region_init_ram(MemoryRegion *mr,
-                            Object *owner,
-                            const char *name,
-                            uint64_t size,
-                            Error **errp)
+void memory_region_init_ram_nomigrate(MemoryRegion *mr,
+                                      Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      Error **errp)
 {
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
@@ -1473,11 +1474,11 @@ void memory_region_init_alias(MemoryRegion *mr,
     mr->alias_offset = offset;
 }
 
-void memory_region_init_rom(MemoryRegion *mr,
-                            struct Object *owner,
-                            const char *name,
-                            uint64_t size,
-                            Error **errp)
+void memory_region_init_rom_nomigrate(MemoryRegion *mr,
+                                      struct Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      Error **errp)
 {
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
@@ -1488,13 +1489,13 @@ void memory_region_init_rom(MemoryRegion *mr,
     mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
 }
 
-void memory_region_init_rom_device(MemoryRegion *mr,
-                                   Object *owner,
-                                   const MemoryRegionOps *ops,
-                                   void *opaque,
-                                   const char *name,
-                                   uint64_t size,
-                                   Error **errp)
+void memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
+                                             Object *owner,
+                                             const MemoryRegionOps *ops,
+                                             void *opaque,
+                                             const char *name,
+                                             uint64_t size,
+                                             Error **errp)
 {
     assert(ops);
     memory_region_init(mr, owner, name, size);
@@ -2848,6 +2849,81 @@ void mtree_info(fprintf_function mon_printf, void *f, bool flatview)
     }
 }
 
+void memory_region_init_ram(MemoryRegion *mr,
+                            struct Object *owner,
+                            const char *name,
+                            uint64_t size,
+                            Error **errp)
+{
+    DeviceState *owner_dev;
+    Error *err = NULL;
+
+    memory_region_init_ram_nomigrate(mr, owner, name, size, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    /* This will assert if owner is neither NULL nor a DeviceState.
+     * We only want the owner here for the purposes of defining a
+     * unique name for migration. TODO: Ideally we should implement
+     * a naming scheme for Objects which are not DeviceStates, in
+     * which case we can relax this restriction.
+     */
+    owner_dev = DEVICE(owner);
+    vmstate_register_ram(mr, owner_dev);
+}
+
+void memory_region_init_rom(MemoryRegion *mr,
+                            struct Object *owner,
+                            const char *name,
+                            uint64_t size,
+                            Error **errp)
+{
+    DeviceState *owner_dev;
+    Error *err = NULL;
+
+    memory_region_init_rom_nomigrate(mr, owner, name, size, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    /* This will assert if owner is neither NULL nor a DeviceState.
+     * We only want the owner here for the purposes of defining a
+     * unique name for migration. TODO: Ideally we should implement
+     * a naming scheme for Objects which are not DeviceStates, in
+     * which case we can relax this restriction.
+     */
+    owner_dev = DEVICE(owner);
+    vmstate_register_ram(mr, owner_dev);
+}
+
+void memory_region_init_rom_device(MemoryRegion *mr,
+                                   struct Object *owner,
+                                   const MemoryRegionOps *ops,
+                                   void *opaque,
+                                   const char *name,
+                                   uint64_t size,
+                                   Error **errp)
+{
+    DeviceState *owner_dev;
+    Error *err = NULL;
+
+    memory_region_init_rom_device_nomigrate(mr, owner, ops, opaque,
+                                            name, size, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    /* This will assert if owner is neither NULL nor a DeviceState.
+     * We only want the owner here for the purposes of defining a
+     * unique name for migration. TODO: Ideally we should implement
+     * a naming scheme for Objects which are not DeviceStates, in
+     * which case we can relax this restriction.
+     */
+    owner_dev = DEVICE(owner);
+    vmstate_register_ram(mr, owner_dev);
+}
+
 static const TypeInfo memory_region_info = {
     .parent             = TYPE_OBJECT,
     .name               = TYPE_MEMORY_REGION,
diff --git a/numa.c b/numa.c
index b0e75f6268..e32af04cd2 100644
--- a/numa.c
+++ b/numa.c
@@ -542,14 +542,14 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
             /* Legacy behavior: if allocation failed, fall back to
              * regular RAM allocation.
              */
-            memory_region_init_ram(mr, owner, name, ram_size, &error_fatal);
+            memory_region_init_ram_nomigrate(mr, owner, name, ram_size, &error_fatal);
         }
 #else
         fprintf(stderr, "-mem-path not supported on this host\n");
         exit(1);
 #endif
     } else {
-        memory_region_init_ram(mr, owner, name, ram_size, &error_fatal);
+        memory_region_init_ram_nomigrate(mr, owner, name, ram_size, &error_fatal);
     }
     vmstate_register_ram_global(mr);
 }
diff --git a/scripts/coccinelle/memory-region-init-ram.cocci b/scripts/coccinelle/memory-region-init-ram.cocci
new file mode 100644
index 0000000000..d290150872
--- /dev/null
+++ b/scripts/coccinelle/memory-region-init-ram.cocci
@@ -0,0 +1,38 @@
+// Replace by-hand memory_region_init_ram_nomigrate/vmstate_register_ram
+// code sequences with use of the new memory_region_init_ram function.
+// Similarly for the _rom and _rom_device functions.
+// We don't try to replace sequences with a non-NULL owner, because
+// there are none in the tree that can be automatically converted
+// (and only a handful that can be manually converted).
+@@
+expression MR;
+expression NAME;
+expression SIZE;
+expression ERRP;
+@@
+-memory_region_init_ram_nomigrate(MR, NULL, NAME, SIZE, ERRP);
++memory_region_init_ram(MR, NULL, NAME, SIZE, ERRP);
+ ...
+-vmstate_register_ram_global(MR);
+@@
+expression MR;
+expression NAME;
+expression SIZE;
+expression ERRP;
+@@
+-memory_region_init_rom_nomigrate(MR, NULL, NAME, SIZE, ERRP);
++memory_region_init_rom(MR, NULL, NAME, SIZE, ERRP);
+ ...
+-vmstate_register_ram_global(MR);
+@@
+expression MR;
+expression OPS;
+expression OPAQUE;
+expression NAME;
+expression SIZE;
+expression ERRP;
+@@
+-memory_region_init_rom_device_nomigrate(MR, NULL, OPS, OPAQUE, NAME, SIZE, ERRP);
++memory_region_init_rom_device(MR, NULL, OPS, OPAQUE, NAME, SIZE, ERRP);
+ ...
+-vmstate_register_ram_global(MR);
diff --git a/slirp/ip6.h b/slirp/ip6.h
index 0908855f0f..b1bea43b3c 100644
--- a/slirp/ip6.h
+++ b/slirp/ip6.h
@@ -57,9 +57,9 @@ static inline bool in6_equal_mach(const struct in6_addr *a,
                                   const struct in6_addr *b,
                                   int prefix_len)
 {
-    if (memcmp(&(a->s6_addr[(prefix_len + 7) / 8]),
-               &(b->s6_addr[(prefix_len + 7) / 8]),
-               16 - (prefix_len + 7) / 8) != 0) {
+    if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]),
+               &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]),
+               16 - DIV_ROUND_UP(prefix_len, 8)) != 0) {
         return 0;
     }
 
diff --git a/slirp/misc.c b/slirp/misc.c
index 88e9d94197..260187b6b6 100644
--- a/slirp/misc.c
+++ b/slirp/misc.c
@@ -112,7 +112,9 @@ fork_exec(struct socket *so, const char *ex, int do_pty)
 		    bind(s, (struct sockaddr *)&addr, addrlen) < 0 ||
 		    listen(s, 1) < 0) {
 			error_report("Error: inet socket: %s", strerror(errno));
-			closesocket(s);
+			if (s >= 0) {
+			    closesocket(s);
+			}
 
 			return 0;
 		}
diff --git a/slirp/sbuf.c b/slirp/sbuf.c
index 10119d3ad5..912f235f65 100644
--- a/slirp/sbuf.c
+++ b/slirp/sbuf.c
@@ -91,7 +91,7 @@ sbappend(struct socket *so, struct mbuf *m)
 	if (so->so_urgc) {
 		sbappendsb(&so->so_rcv, m);
 		m_free(m);
-		sosendoob(so);
+		(void)sosendoob(so);
 		return;
 	}
 
diff --git a/slirp/socket.c b/slirp/socket.c
index 3b49a69a93..ecec0295a9 100644
--- a/slirp/socket.c
+++ b/slirp/socket.c
@@ -345,33 +345,40 @@ sosendoob(struct socket *so)
 	if (sb->sb_rptr < sb->sb_wptr) {
 		/* We can send it directly */
 		n = slirp_send(so, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
-		so->so_urgc -= n;
-
-		DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
 	} else {
 		/*
 		 * Since there's no sendv or sendtov like writev,
 		 * we must copy all data to a linear buffer then
 		 * send it all
 		 */
+		uint32_t urgc = so->so_urgc;
 		len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
-		if (len > so->so_urgc) len = so->so_urgc;
+		if (len > urgc) {
+			len = urgc;
+		}
 		memcpy(buff, sb->sb_rptr, len);
-		so->so_urgc -= len;
-		if (so->so_urgc) {
+		urgc -= len;
+		if (urgc) {
 			n = sb->sb_wptr - sb->sb_data;
-			if (n > so->so_urgc) n = so->so_urgc;
+			if (n > urgc) {
+				n = urgc;
+			}
 			memcpy((buff + len), sb->sb_data, n);
-			so->so_urgc -= n;
 			len += n;
 		}
 		n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
+	}
+
 #ifdef DEBUG
-		if (n != len)
-		   DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
+	if (n != len) {
+		DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
+	}
 #endif
-		DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
+	if (n < 0) {
+		return n;
 	}
+	so->so_urgc -= n;
+	DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
 
 	sb->sb_cc -= n;
 	sb->sb_rptr += n;
@@ -397,7 +404,15 @@ sowrite(struct socket *so)
 	DEBUG_ARG("so = %p", so);
 
 	if (so->so_urgc) {
-		sosendoob(so);
+		uint32_t expected = so->so_urgc;
+		if (sosendoob(so) < expected) {
+			/* Treat a short write as a fatal error too,
+			 * rather than continuing on and sending the urgent
+			 * data as if it were non-urgent and leaving the
+			 * so_urgc count wrong.
+			 */
+			goto err_disconnected;
+		}
 		if (sb->sb_cc == 0)
 			return 0;
 	}
@@ -441,11 +456,7 @@ sowrite(struct socket *so)
 		return 0;
 
 	if (nn <= 0) {
-		DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
-			so->so_state, errno));
-		sofcantsendmore(so);
-		tcp_sockclosed(sototcpcb(so));
-		return -1;
+		goto err_disconnected;
 	}
 
 #ifndef HAVE_READV
@@ -472,6 +483,13 @@ sowrite(struct socket *so)
 		sofcantsendmore(so);
 
 	return nn;
+
+err_disconnected:
+	DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
+		    so->so_state, errno));
+	sofcantsendmore(so);
+	tcp_sockclosed(sototcpcb(so));
+	return -1;
 }
 
 /*
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index 037cb9e9e7..012a2fc1af 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -106,6 +106,8 @@ docker:
 	@echo '                         (default is 1)'
 	@echo '    DEBUG=1              Stop and drop to shell in the created container'
 	@echo '                         before running the command.'
+	@echo '    NETWORK=1            Enable virtual network interface with default backend.'
+	@echo '    NETWORK=$BACKEND     Enable virtual network interface with $BACKEND.'
 	@echo '    NOUSER               Define to disable adding current user to containers passwd.'
 	@echo '    NOCACHE=1            Ignore cache when build images.'
 	@echo '    EXECUTABLE=<path>    Include executable in image.'
@@ -132,7 +134,8 @@ docker-run: docker-qemu-src
 		$(SRC_PATH)/tests/docker/docker.py run 			\
 			$(if $(NOUSER),,-u $(shell id -u)) -t 		\
 			$(if $V,,--rm) 					\
-			$(if $(DEBUG),-i,--net=none) 			\
+			$(if $(DEBUG),-i,)				\
+			$(if $(NETWORK),$(if $(subst $(NETWORK),,1),--net=$(NETWORK)),--net=none) \
 			-e TARGET_LIST=$(TARGET_LIST) 			\
 			-e EXTRA_CONFIGURE_OPTS="$(EXTRA_CONFIGURE_OPTS)" \
 			-e V=$V -e J=$J -e DEBUG=$(DEBUG)		\
diff --git a/tests/docker/docker.py b/tests/docker/docker.py
index e707e5bcca..ee40ca04d9 100755
--- a/tests/docker/docker.py
+++ b/tests/docker/docker.py
@@ -112,13 +112,16 @@ class Docker(object):
         signal.signal(signal.SIGTERM, self._kill_instances)
         signal.signal(signal.SIGHUP, self._kill_instances)
 
-    def _do(self, cmd, quiet=True, infile=None, **kwargs):
+    def _do(self, cmd, quiet=True, **kwargs):
         if quiet:
             kwargs["stdout"] = DEVNULL
-        if infile:
-            kwargs["stdin"] = infile
         return subprocess.call(self._command + cmd, **kwargs)
 
+    def _do_check(self, cmd, quiet=True, **kwargs):
+        if quiet:
+            kwargs["stdout"] = DEVNULL
+        return subprocess.check_call(self._command + cmd, **kwargs)
+
     def _do_kill_instances(self, only_known, only_active=True):
         cmd = ["ps", "-q"]
         if not only_active:
@@ -177,14 +180,14 @@ class Docker(object):
                                     extra_files_cksum)))
         tmp_df.flush()
 
-        self._do(["build", "-t", tag, "-f", tmp_df.name] + argv + \
-                 [docker_dir],
-                 quiet=quiet)
+        self._do_check(["build", "-t", tag, "-f", tmp_df.name] + argv + \
+                       [docker_dir],
+                       quiet=quiet)
 
     def update_image(self, tag, tarball, quiet=True):
         "Update a tagged image using "
 
-        self._do(["build", "-t", tag, "-"], quiet=quiet, infile=tarball)
+        self._do_check(["build", "-t", tag, "-"], quiet=quiet, stdin=tarball)
 
     def image_matches_dockerfile(self, tag, dockerfile):
         try:
@@ -197,9 +200,9 @@ class Docker(object):
         label = uuid.uuid1().hex
         if not keep:
             self._instances.append(label)
-        ret = self._do(["run", "--label",
-                        "com.qemu.instance.uuid=" + label] + cmd,
-                       quiet=quiet)
+        ret = self._do_check(["run", "--label",
+                             "com.qemu.instance.uuid=" + label] + cmd,
+                             quiet=quiet)
         if not keep:
             self._instances.remove(label)
         return ret
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
index b44b5d55eb..846ff9167f 100644
--- a/util/qemu-coroutine-lock.c
+++ b/util/qemu-coroutine-lock.c
@@ -402,6 +402,21 @@ void qemu_co_rwlock_unlock(CoRwlock *lock)
     qemu_co_mutex_unlock(&lock->mutex);
 }
 
+void qemu_co_rwlock_downgrade(CoRwlock *lock)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    /* lock->mutex critical section started in qemu_co_rwlock_wrlock or
+     * qemu_co_rwlock_upgrade.
+     */
+    assert(lock->reader == 0);
+    lock->reader++;
+    qemu_co_mutex_unlock(&lock->mutex);
+
+    /* The rest of the read-side critical section is run without the mutex.  */
+    self->locks_held++;
+}
+
 void qemu_co_rwlock_wrlock(CoRwlock *lock)
 {
     qemu_co_mutex_lock(&lock->mutex);
@@ -416,3 +431,23 @@ void qemu_co_rwlock_wrlock(CoRwlock *lock)
      * There is no need to update self->locks_held.
      */
 }
+
+void qemu_co_rwlock_upgrade(CoRwlock *lock)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    qemu_co_mutex_lock(&lock->mutex);
+    assert(lock->reader > 0);
+    lock->reader--;
+    lock->pending_writer++;
+    while (lock->reader) {
+        qemu_co_queue_wait(&lock->queue, &lock->mutex);
+    }
+    lock->pending_writer--;
+
+    /* The rest of the write-side critical section is run with
+     * the mutex taken, similar to qemu_co_rwlock_wrlock.  Do
+     * not account for the lock twice in self->locks_held.
+     */
+    self->locks_held--;
+}