summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--docs/devel/rcu.txt16
-rw-r--r--exec.c116
-rw-r--r--include/exec/ram_addr.h138
-rw-r--r--include/migration/misc.h1
-rw-r--r--include/migration/vmstate.h40
-rw-r--r--include/qemu/rcu.h25
-rw-r--r--memory.c15
-rw-r--r--migration/migration.c17
-rw-r--r--migration/postcopy-ram.c88
-rw-r--r--migration/postcopy-ram.h9
-rw-r--r--migration/ram.c298
-rw-r--r--migration/rdma.c57
-rw-r--r--migration/savevm.c14
-rw-r--r--migration/trace-events5
-rw-r--r--migration/vmstate-types.c152
-rw-r--r--tests/test-vmstate.c421
16 files changed, 980 insertions, 432 deletions
diff --git a/docs/devel/rcu.txt b/docs/devel/rcu.txt
index c84e7f42b2..d83fed2f79 100644
--- a/docs/devel/rcu.txt
+++ b/docs/devel/rcu.txt
@@ -187,6 +187,22 @@ The following APIs must be used before RCU is used in a thread:
 Note that these APIs are relatively heavyweight, and should _not_ be
 nested.
 
+Convenience macros
+==================
+
+Two macros are provided that automatically release the read lock at the
+end of the scope.
+
+      RCU_READ_LOCK_GUARD()
+
+         Takes the lock and will release it at the end of the block it's
+         used in.
+
+      WITH_RCU_READ_LOCK_GUARD()  { code }
+
+         Is used at the head of a block to protect the code within the block.
+
+Note that 'goto'ing out of the guarded block will also drop the lock.
 
 DIFFERENCES WITH LINUX
 ======================
diff --git a/exec.c b/exec.c
index bdcfcdff3f..fb0943cfed 100644
--- a/exec.c
+++ b/exec.c
@@ -1037,16 +1037,14 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
         return;
     }
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     mr = address_space_translate(as, addr, &addr, &l, false, attrs);
     if (!(memory_region_is_ram(mr)
           || memory_region_is_romd(mr))) {
-        rcu_read_unlock();
         return;
     }
     ram_addr = memory_region_get_ram_addr(mr) + addr;
     tb_invalidate_phys_page_range(ram_addr, ram_addr + 1);
-    rcu_read_unlock();
 }
 
 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
@@ -1332,14 +1330,13 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
     end = TARGET_PAGE_ALIGN(start + length);
     start &= TARGET_PAGE_MASK;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     block = qemu_get_ram_block(start);
     assert(block == qemu_get_ram_block(end - 1));
     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
     CPU_FOREACH(cpu) {
         tlb_reset_dirty(cpu, start1, length);
     }
-    rcu_read_unlock();
 }
 
 /* Note: start and end must be within the same ram block.  */
@@ -1360,30 +1357,29 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
     page = start >> TARGET_PAGE_BITS;
 
-    rcu_read_lock();
+    WITH_RCU_READ_LOCK_GUARD() {
+        blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+        ramblock = qemu_get_ram_block(start);
+        /* Range sanity check on the ramblock */
+        assert(start >= ramblock->offset &&
+               start + length <= ramblock->offset + ramblock->used_length);
 
-    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
-    ramblock = qemu_get_ram_block(start);
-    /* Range sanity check on the ramblock */
-    assert(start >= ramblock->offset &&
-           start + length <= ramblock->offset + ramblock->used_length);
+        while (page < end) {
+            unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long num = MIN(end - page,
+                                    DIRTY_MEMORY_BLOCK_SIZE - offset);
 
-    while (page < end) {
-        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
-        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+            dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
+                                                  offset, num);
+            page += num;
+        }
 
-        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
-                                              offset, num);
-        page += num;
+        mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset;
+        mr_size = (end - page) << TARGET_PAGE_BITS;
+        memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
     }
 
-    mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset;
-    mr_size = (end - page) << TARGET_PAGE_BITS;
-    memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
-
-    rcu_read_unlock();
-
     if (dirty && tcg_enabled()) {
         tlb_reset_dirty_range_all(start, length);
     }
@@ -1411,28 +1407,27 @@ DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
     end  = last  >> TARGET_PAGE_BITS;
     dest = 0;
 
-    rcu_read_lock();
+    WITH_RCU_READ_LOCK_GUARD() {
+        blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
 
-    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+        while (page < end) {
+            unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long num = MIN(end - page,
+                                    DIRTY_MEMORY_BLOCK_SIZE - offset);
 
-    while (page < end) {
-        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
-        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
-
-        assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
-        assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
-        offset >>= BITS_PER_LEVEL;
+            assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
+            assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
+            offset >>= BITS_PER_LEVEL;
 
-        bitmap_copy_and_clear_atomic(snap->dirty + dest,
-                                     blocks->blocks[idx] + offset,
-                                     num);
-        page += num;
-        dest += num >> BITS_PER_LEVEL;
+            bitmap_copy_and_clear_atomic(snap->dirty + dest,
+                                         blocks->blocks[idx] + offset,
+                                         num);
+            page += num;
+            dest += num >> BITS_PER_LEVEL;
+        }
     }
 
-    rcu_read_unlock();
-
     if (tcg_enabled()) {
         tlb_reset_dirty_range_all(start, length);
     }
@@ -1643,7 +1638,7 @@ void ram_block_dump(Monitor *mon)
     RAMBlock *block;
     char *psize;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     monitor_printf(mon, "%24s %8s  %18s %18s %18s\n",
                    "Block Name", "PSize", "Offset", "Used", "Total");
     RAMBLOCK_FOREACH(block) {
@@ -1655,7 +1650,6 @@ void ram_block_dump(Monitor *mon)
                        (uint64_t)block->max_length);
         g_free(psize);
     }
-    rcu_read_unlock();
 }
 
 #ifdef __linux__
@@ -2009,11 +2003,10 @@ static unsigned long last_ram_page(void)
     RAMBlock *block;
     ram_addr_t last = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     RAMBLOCK_FOREACH(block) {
         last = MAX(last, block->offset + block->max_length);
     }
-    rcu_read_unlock();
     return last >> TARGET_PAGE_BITS;
 }
 
@@ -2100,7 +2093,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
     }
     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     RAMBLOCK_FOREACH(block) {
         if (block != new_block &&
             !strcmp(block->idstr, new_block->idstr)) {
@@ -2109,7 +2102,6 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
             abort();
         }
     }
-    rcu_read_unlock();
 }
 
 /* Called with iothread lock held.  */
@@ -2651,17 +2643,16 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
 
     if (xen_enabled()) {
         ram_addr_t ram_addr;
-        rcu_read_lock();
+        RCU_READ_LOCK_GUARD();
         ram_addr = xen_ram_addr_from_mapcache(ptr);
         block = qemu_get_ram_block(ram_addr);
         if (block) {
             *offset = ram_addr - block->offset;
         }
-        rcu_read_unlock();
         return block;
     }
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     block = atomic_rcu_read(&ram_list.mru_block);
     if (block && block->host && host - block->host < block->max_length) {
         goto found;
@@ -2677,7 +2668,6 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
         }
     }
 
-    rcu_read_unlock();
     return NULL;
 
 found:
@@ -2685,7 +2675,6 @@ found:
     if (round_offset) {
         *offset &= TARGET_PAGE_MASK;
     }
-    rcu_read_unlock();
     return block;
 }
 
@@ -3281,10 +3270,9 @@ MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
     FlatView *fv;
 
     if (len > 0) {
-        rcu_read_lock();
+        RCU_READ_LOCK_GUARD();
         fv = address_space_to_flatview(as);
         result = flatview_read(fv, addr, attrs, buf, len);
-        rcu_read_unlock();
     }
 
     return result;
@@ -3298,10 +3286,9 @@ MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
     FlatView *fv;
 
     if (len > 0) {
-        rcu_read_lock();
+        RCU_READ_LOCK_GUARD();
         fv = address_space_to_flatview(as);
         result = flatview_write(fv, addr, attrs, buf, len);
-        rcu_read_unlock();
     }
 
     return result;
@@ -3341,7 +3328,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
     hwaddr addr1;
     MemoryRegion *mr;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     while (len > 0) {
         l = len;
         mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
@@ -3366,7 +3353,6 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
         buf += l;
         addr += l;
     }
-    rcu_read_unlock();
     return MEMTX_OK;
 }
 
@@ -3511,10 +3497,9 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr,
     FlatView *fv;
     bool result;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     fv = address_space_to_flatview(as);
     result = flatview_access_valid(fv, addr, len, is_write, attrs);
-    rcu_read_unlock();
     return result;
 }
 
@@ -3569,13 +3554,12 @@ void *address_space_map(AddressSpace *as,
     }
 
     l = len;
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     fv = address_space_to_flatview(as);
     mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
 
     if (!memory_access_is_direct(mr, is_write)) {
         if (atomic_xchg(&bounce.in_use, true)) {
-            rcu_read_unlock();
             return NULL;
         }
         /* Avoid unbounded allocations */
@@ -3591,7 +3575,6 @@ void *address_space_map(AddressSpace *as,
                                bounce.buffer, l);
         }
 
-        rcu_read_unlock();
         *plen = l;
         return bounce.buffer;
     }
@@ -3601,7 +3584,6 @@ void *address_space_map(AddressSpace *as,
     *plen = flatview_extend_translation(fv, addr, len, mr, xlat,
                                         l, is_write, attrs);
     ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen, true);
-    rcu_read_unlock();
 
     return ptr;
 }
@@ -3869,13 +3851,12 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr)
     hwaddr l = 1;
     bool res;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     mr = address_space_translate(&address_space_memory,
                                  phys_addr, &phys_addr, &l, false,
                                  MEMTXATTRS_UNSPECIFIED);
 
     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
-    rcu_read_unlock();
     return res;
 }
 
@@ -3884,14 +3865,13 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
     RAMBlock *block;
     int ret = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     RAMBLOCK_FOREACH(block) {
         ret = func(block, opaque);
         if (ret) {
             break;
         }
     }
-    rcu_read_unlock();
     return ret;
 }
 
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index e96e621de5..ad158bb247 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -193,30 +193,29 @@ static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
     page = start >> TARGET_PAGE_BITS;
 
-    rcu_read_lock();
-
-    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+    WITH_RCU_READ_LOCK_GUARD() {
+        blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+
+        idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+        offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+        base = page - offset;
+        while (page < end) {
+            unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
+            unsigned long num = next - base;
+            unsigned long found = find_next_bit(blocks->blocks[idx],
+                                                num, offset);
+            if (found < num) {
+                dirty = true;
+                break;
+            }
 
-    idx = page / DIRTY_MEMORY_BLOCK_SIZE;
-    offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-    base = page - offset;
-    while (page < end) {
-        unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
-        unsigned long num = next - base;
-        unsigned long found = find_next_bit(blocks->blocks[idx], num, offset);
-        if (found < num) {
-            dirty = true;
-            break;
+            page = next;
+            idx++;
+            offset = 0;
+            base += DIRTY_MEMORY_BLOCK_SIZE;
         }
-
-        page = next;
-        idx++;
-        offset = 0;
-        base += DIRTY_MEMORY_BLOCK_SIZE;
     }
 
-    rcu_read_unlock();
-
     return dirty;
 }
 
@@ -234,7 +233,7 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
     page = start >> TARGET_PAGE_BITS;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
 
     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
 
@@ -256,8 +255,6 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
         base += DIRTY_MEMORY_BLOCK_SIZE;
     }
 
-    rcu_read_unlock();
-
     return dirty;
 }
 
@@ -309,13 +306,11 @@ static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
 
     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
 
     set_bit_atomic(offset, blocks->blocks[idx]);
-
-    rcu_read_unlock();
 }
 
 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
@@ -334,39 +329,37 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
     page = start >> TARGET_PAGE_BITS;
 
-    rcu_read_lock();
+    WITH_RCU_READ_LOCK_GUARD() {
+        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
+            blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]);
+        }
 
-    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
-        blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]);
-    }
+        idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+        offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+        base = page - offset;
+        while (page < end) {
+            unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
 
-    idx = page / DIRTY_MEMORY_BLOCK_SIZE;
-    offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-    base = page - offset;
-    while (page < end) {
-        unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
+            if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
+                bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
+                                  offset, next - page);
+            }
+            if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
+                bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
+                                  offset, next - page);
+            }
+            if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
+                bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
+                                  offset, next - page);
+            }
 
-        if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
-            bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
-                              offset, next - page);
+            page = next;
+            idx++;
+            offset = 0;
+            base += DIRTY_MEMORY_BLOCK_SIZE;
         }
-        if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
-            bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
-                              offset, next - page);
-        }
-        if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
-            bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
-                              offset, next - page);
-        }
-
-        page = next;
-        idx++;
-        offset = 0;
-        base += DIRTY_MEMORY_BLOCK_SIZE;
     }
 
-    rcu_read_unlock();
-
     xen_hvm_modified_memory(start, length);
 }
 
@@ -396,36 +389,35 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
         offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
                           DIRTY_MEMORY_BLOCK_SIZE);
 
-        rcu_read_lock();
+        WITH_RCU_READ_LOCK_GUARD() {
+            for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
+                blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
+            }
 
-        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
-            blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
-        }
+            for (k = 0; k < nr; k++) {
+                if (bitmap[k]) {
+                    unsigned long temp = leul_to_cpu(bitmap[k]);
 
-        for (k = 0; k < nr; k++) {
-            if (bitmap[k]) {
-                unsigned long temp = leul_to_cpu(bitmap[k]);
+                    atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
 
-                atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
+                    if (global_dirty_log) {
+                        atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
+                                  temp);
+                    }
 
-                if (global_dirty_log) {
-                    atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
-                              temp);
+                    if (tcg_enabled()) {
+                        atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset],
+                                  temp);
+                    }
                 }
 
-                if (tcg_enabled()) {
-                    atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp);
+                if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
+                    offset = 0;
+                    idx++;
                 }
             }
-
-            if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
-                offset = 0;
-                idx++;
-            }
         }
 
-        rcu_read_unlock();
-
         xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
     } else {
         uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
diff --git a/include/migration/misc.h b/include/migration/misc.h
index b9d8e787af..d2762257aa 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -60,6 +60,7 @@ void migration_object_init(void);
 void migration_shutdown(void);
 void qemu_start_incoming_migration(const char *uri, Error **errp);
 bool migration_is_idle(void);
+bool migration_is_active(MigrationState *);
 void add_migration_state_change_notifier(Notifier *notify);
 void remove_migration_state_change_notifier(Notifier *notify);
 bool migration_in_setup(MigrationState *);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 1fbfd099dd..b9ee563aa4 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -224,6 +224,7 @@ extern const VMStateInfo vmstate_info_unused_buffer;
 extern const VMStateInfo vmstate_info_tmp;
 extern const VMStateInfo vmstate_info_bitmap;
 extern const VMStateInfo vmstate_info_qtailq;
+extern const VMStateInfo vmstate_info_gtree;
 
 #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
 /*
@@ -754,6 +755,45 @@ extern const VMStateInfo vmstate_info_qtailq;
     .start        = offsetof(_type, _next),                              \
 }
 
+/*
+ * For migrating a GTree whose key is a pointer to _key_type and the
+ * value, a pointer to _val_type
+ * The target tree must have been properly initialized
+ * _vmsd: Start address of the 2 element array containing the data vmsd
+ *        and the key vmsd, in that order
+ * _key_type: type of the key
+ * _val_type: type of the value
+ */
+#define VMSTATE_GTREE_V(_field, _state, _version, _vmsd,                       \
+                        _key_type, _val_type)                                  \
+{                                                                              \
+    .name         = (stringify(_field)),                                       \
+    .version_id   = (_version),                                                \
+    .vmsd         = (_vmsd),                                                   \
+    .info         = &vmstate_info_gtree,                                       \
+    .start        = sizeof(_key_type),                                         \
+    .size         = sizeof(_val_type),                                         \
+    .offset       = offsetof(_state, _field),                                  \
+}
+
+/*
+ * For migrating a GTree with direct key and the value a pointer
+ * to _val_type
+ * The target tree must have been properly initialized
+ * _vmsd: data vmsd
+ * _val_type: type of the value
+ */
+#define VMSTATE_GTREE_DIRECT_KEY_V(_field, _state, _version, _vmsd, _val_type) \
+{                                                                              \
+    .name         = (stringify(_field)),                                       \
+    .version_id   = (_version),                                                \
+    .vmsd         = (_vmsd),                                                   \
+    .info         = &vmstate_info_gtree,                                       \
+    .start        = 0,                                                         \
+    .size         = sizeof(_val_type),                                         \
+    .offset       = offsetof(_state, _field),                                  \
+}
+
 /* _f : field name
    _f_n : num of elements field_name
    _n : num of elements
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index 22876d1428..9c82683e37 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -154,6 +154,31 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
       }),                                                                \
       (RCUCBFunc *)g_free);
 
+typedef void RCUReadAuto;
+static inline RCUReadAuto *rcu_read_auto_lock(void)
+{
+    rcu_read_lock();
+    /* Anything non-NULL causes the cleanup function to be called */
+    return (void *)(uintptr_t)0x1;
+}
+
+static inline void rcu_read_auto_unlock(RCUReadAuto *r)
+{
+    rcu_read_unlock();
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock)
+
+#define WITH_RCU_READ_LOCK_GUARD() \
+    WITH_RCU_READ_LOCK_GUARD_(_rcu_read_auto##__COUNTER__)
+
+#define WITH_RCU_READ_LOCK_GUARD_(var) \
+    for (g_autoptr(RCUReadAuto) var = rcu_read_auto_lock(); \
+        (var); rcu_read_auto_unlock(var), (var) = NULL)
+
+#define RCU_READ_LOCK_GUARD() \
+    g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock()
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/memory.c b/memory.c
index 978da3d3df..c952eabb5d 100644
--- a/memory.c
+++ b/memory.c
@@ -779,14 +779,13 @@ FlatView *address_space_get_flatview(AddressSpace *as)
 {
     FlatView *view;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     do {
         view = address_space_to_flatview(as);
         /* If somebody has replaced as->current_map concurrently,
          * flatview_ref returns false.
          */
     } while (!flatview_ref(view));
-    rcu_read_unlock();
     return view;
 }
 
@@ -2166,12 +2165,11 @@ int memory_region_get_fd(MemoryRegion *mr)
 {
     int fd;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     while (mr->alias) {
         mr = mr->alias;
     }
     fd = mr->ram_block->fd;
-    rcu_read_unlock();
 
     return fd;
 }
@@ -2181,14 +2179,13 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr)
     void *ptr;
     uint64_t offset = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     while (mr->alias) {
         offset += mr->alias_offset;
         mr = mr->alias;
     }
     assert(mr->ram_block);
     ptr = qemu_map_ram_ptr(mr->ram_block, offset);
-    rcu_read_unlock();
 
     return ptr;
 }
@@ -2578,12 +2575,11 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size)
 {
     MemoryRegionSection ret;
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     ret = memory_region_find_rcu(mr, addr, size);
     if (ret.mr) {
         memory_region_ref(ret.mr);
     }
-    rcu_read_unlock();
     return ret;
 }
 
@@ -2591,9 +2587,8 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr)
 {
     MemoryRegion *mr;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     mr = memory_region_find_rcu(container, addr, 1).mr;
-    rcu_read_unlock();
     return mr && mr != container;
 }
 
diff --git a/migration/migration.c b/migration/migration.c
index 5f7e4d15e9..3febd0f8f3 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1533,8 +1533,7 @@ static void migrate_fd_cleanup(MigrationState *s)
         qemu_fclose(tmp);
     }
 
-    assert((s->state != MIGRATION_STATUS_ACTIVE) &&
-           (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
+    assert(!migration_is_active(s));
 
     if (s->state == MIGRATION_STATUS_CANCELLING) {
         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
@@ -1703,6 +1702,12 @@ bool migration_is_idle(void)
     return false;
 }
 
+bool migration_is_active(MigrationState *s)
+{
+    return (s->state == MIGRATION_STATUS_ACTIVE ||
+            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
+}
+
 void migrate_init(MigrationState *s)
 {
     /*
@@ -2481,7 +2486,7 @@ retry:
 out:
     res = qemu_file_get_error(rp);
     if (res) {
-        if (res == -EIO) {
+        if (res == -EIO && migration_in_postcopy()) {
             /*
              * Maybe there is something we can do: it looks like a
              * network down issue, and we pause for a recovery.
@@ -3144,8 +3149,7 @@ static MigIterateState migration_iteration_run(MigrationState *s)
             return MIG_ITERATE_SKIP;
         }
         /* Just another iteration step */
-        qemu_savevm_state_iterate(s->to_dst_file,
-            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
+        qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
     } else {
         trace_migration_thread_low_pending(pending_size);
         migration_completion(s);
@@ -3266,8 +3270,7 @@ static void *migration_thread(void *opaque)
 
     trace_migration_thread_setup_complete();
 
-    while (s->state == MIGRATION_STATUS_ACTIVE ||
-           s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+    while (migration_is_active(s)) {
         int64_t current_time;
 
         if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 1f63e65ed7..abccafc8c8 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -577,8 +577,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
         }
     }
 
-    postcopy_state_set(POSTCOPY_INCOMING_END);
-
     if (mis->postcopy_tmp_page) {
         munmap(mis->postcopy_tmp_page, mis->largest_page_size);
         mis->postcopy_tmp_page = NULL;
@@ -768,9 +766,11 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
     atomic_xchg(&dc->page_fault_vcpu_time[cpu], low_time_offset);
     atomic_xchg(&dc->vcpu_addr[cpu], addr);
 
-    /* check it here, not at the begining of the function,
-     * due to, check could accur early than bitmap_set in
-     * qemu_ufd_copy_ioctl */
+    /*
+     * check it here, not at the beginning of the function,
+     * due to, check could occur early than bitmap_set in
+     * qemu_ufd_copy_ioctl
+     */
     already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
     if (already_received) {
         atomic_xchg(&dc->vcpu_addr[cpu], 0);
@@ -1094,7 +1094,7 @@ retry:
     return NULL;
 }
 
-int postcopy_ram_enable_notify(MigrationIncomingState *mis)
+int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
 {
     /* Open the fd for the kernel to give us userfaults */
     mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
@@ -1134,6 +1134,32 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
         return -1;
     }
 
+    mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
+                                  PROT_READ | PROT_WRITE, MAP_PRIVATE |
+                                  MAP_ANONYMOUS, -1, 0);
+    if (mis->postcopy_tmp_page == MAP_FAILED) {
+        mis->postcopy_tmp_page = NULL;
+        error_report("%s: Failed to map postcopy_tmp_page %s",
+                     __func__, strerror(errno));
+        return -1;
+    }
+
+    /*
+     * Map large zero page when kernel can't use UFFDIO_ZEROPAGE for hugepages
+     */
+    mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
+                                       PROT_READ | PROT_WRITE,
+                                       MAP_PRIVATE | MAP_ANONYMOUS,
+                                       -1, 0);
+    if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
+        int e = errno;
+        mis->postcopy_tmp_zero_page = NULL;
+        error_report("%s: Failed to map large zero page %s",
+                     __func__, strerror(e));
+        return -e;
+    }
+    memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
+
     /*
      * Ballooning can mark pages as absent while we're postcopying
      * that would cause false userfaults.
@@ -1240,50 +1266,10 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
                                            qemu_ram_block_host_offset(rb,
                                                                       host));
     } else {
-        /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
-        if (!mis->postcopy_tmp_zero_page) {
-            mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
-                                               PROT_READ | PROT_WRITE,
-                                               MAP_PRIVATE | MAP_ANONYMOUS,
-                                               -1, 0);
-            if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
-                int e = errno;
-                mis->postcopy_tmp_zero_page = NULL;
-                error_report("%s: %s mapping large zero page",
-                             __func__, strerror(e));
-                return -e;
-            }
-            memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
-        }
-        return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
-                                   rb);
+        return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, rb);
     }
 }
 
-/*
- * Returns a target page of memory that can be mapped at a later point in time
- * using postcopy_place_page
- * The same address is used repeatedly, postcopy_place_page just takes the
- * backing page away.
- * Returns: Pointer to allocated page
- *
- */
-void *postcopy_get_tmp_page(MigrationIncomingState *mis)
-{
-    if (!mis->postcopy_tmp_page) {
-        mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
-                             PROT_READ | PROT_WRITE, MAP_PRIVATE |
-                             MAP_ANONYMOUS, -1, 0);
-        if (mis->postcopy_tmp_page == MAP_FAILED) {
-            mis->postcopy_tmp_page = NULL;
-            error_report("%s: %s", __func__, strerror(errno));
-            return NULL;
-        }
-    }
-
-    return mis->postcopy_tmp_page;
-}
-
 #else
 /* No target OS support, stubs just fail */
 void fill_destination_postcopy_migration_info(MigrationInfo *info)
@@ -1321,7 +1307,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
     return -1;
 }
 
-int postcopy_ram_enable_notify(MigrationIncomingState *mis)
+int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
 {
     assert(0);
     return -1;
@@ -1341,12 +1327,6 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
     return -1;
 }
 
-void *postcopy_get_tmp_page(MigrationIncomingState *mis)
-{
-    assert(0);
-    return NULL;
-}
-
 int postcopy_wake_shared(struct PostCopyFD *pcfd,
                          uint64_t client_addr,
                          RAMBlock *rb)
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 9c8bd2bae0..9941feb63a 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -20,7 +20,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis);
  * Make all of RAM sensitive to accesses to areas that haven't yet been written
  * and wire up anything necessary to deal with it.
  */
-int postcopy_ram_enable_notify(MigrationIncomingState *mis);
+int postcopy_ram_incoming_setup(MigrationIncomingState *mis);
 
 /*
  * Initialise postcopy-ram, setting the RAM to a state where we can go into
@@ -100,13 +100,6 @@ typedef enum {
     POSTCOPY_INCOMING_END
 } PostcopyState;
 
-/*
- * Allocate a page of memory that can be mapped at a later point in time
- * using postcopy_place_page
- * Returns: Pointer to allocated page
- */
-void *postcopy_get_tmp_page(MigrationIncomingState *mis);
-
 PostcopyState postcopy_state_get(void);
 /* Set the state and return the old state */
 PostcopyState postcopy_state_set(PostcopyState new_state);
diff --git a/migration/ram.c b/migration/ram.c
index 22423f08cd..5078f94490 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -181,14 +181,14 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
     RAMBlock *block;
     int ret = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
+
     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
         ret = func(block, opaque);
         if (ret) {
             break;
         }
     }
-    rcu_read_unlock();
     return ret;
 }
 
@@ -791,13 +791,10 @@ static void multifd_pages_clear(MultiFDPages_t *pages)
 static void multifd_send_fill_packet(MultiFDSendParams *p)
 {
     MultiFDPacket_t *packet = p->packet;
-    uint32_t page_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
     int i;
 
-    packet->magic = cpu_to_be32(MULTIFD_MAGIC);
-    packet->version = cpu_to_be32(MULTIFD_VERSION);
     packet->flags = cpu_to_be32(p->flags);
-    packet->pages_alloc = cpu_to_be32(page_max);
+    packet->pages_alloc = cpu_to_be32(p->pages->allocated);
     packet->pages_used = cpu_to_be32(p->pages->used);
     packet->next_packet_size = cpu_to_be32(p->next_packet_size);
     packet->packet_num = cpu_to_be64(p->packet_num);
@@ -838,7 +835,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
 
     packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
     /*
-     * If we recevied a packet that is 100 times bigger than expected
+     * If we received a packet that is 100 times bigger than expected
      * just stop migration.  It is a magic number.
      */
     if (packet->pages_alloc > pages_max * 100) {
@@ -1132,7 +1129,6 @@ static void *multifd_send_thread(void *opaque)
             p->flags = 0;
             p->num_packets++;
             p->num_pages += used;
-            p->pages->used = 0;
             qemu_mutex_unlock(&p->mutex);
 
             trace_multifd_send(p->id, packet_num, used, flags,
@@ -1241,6 +1237,8 @@ int multifd_save_setup(void)
         p->packet_len = sizeof(MultiFDPacket_t)
                       + sizeof(ram_addr_t) * page_count;
         p->packet = g_malloc0(p->packet_len);
+        p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
+        p->packet->version = cpu_to_be32(MULTIFD_VERSION);
         p->name = g_strdup_printf("multifdsend_%d", i);
         socket_send_channel_create(multifd_new_send_channel_async, p);
     }
@@ -1848,12 +1846,12 @@ static void migration_bitmap_sync(RAMState *rs)
     memory_global_dirty_log_sync();
 
     qemu_mutex_lock(&rs->bitmap_mutex);
-    rcu_read_lock();
-    RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-        ramblock_sync_dirty_bitmap(rs, block);
+    WITH_RCU_READ_LOCK_GUARD() {
+        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+            ramblock_sync_dirty_bitmap(rs, block);
+        }
+        ram_counters.remaining = ram_bytes_remaining();
     }
-    ram_counters.remaining = ram_bytes_remaining();
-    rcu_read_unlock();
     qemu_mutex_unlock(&rs->bitmap_mutex);
 
     memory_global_after_dirty_log_sync();
@@ -2397,13 +2395,12 @@ static void migration_page_queue_free(RAMState *rs)
     /* This queue generally should be empty - but in the case of a failed
      * migration might have some droppings in.
      */
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
         memory_region_unref(mspr->rb->mr);
         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
         g_free(mspr);
     }
-    rcu_read_unlock();
 }
 
 /**
@@ -2424,7 +2421,8 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
     RAMState *rs = ram_state;
 
     ram_counters.postcopy_requests++;
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
+
     if (!rbname) {
         /* Reuse last RAMBlock */
         ramblock = rs->last_req_rb;
@@ -2466,12 +2464,10 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
     QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
     migration_make_urgent_request();
     qemu_mutex_unlock(&rs->src_page_req_mutex);
-    rcu_read_unlock();
 
     return 0;
 
 err:
-    rcu_read_unlock();
     return -1;
 }
 
@@ -2700,7 +2696,8 @@ static uint64_t ram_bytes_total_common(bool count_ignored)
     RAMBlock *block;
     uint64_t total = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
+
     if (count_ignored) {
         RAMBLOCK_FOREACH_MIGRATABLE(block) {
             total += block->used_length;
@@ -2710,7 +2707,6 @@ static uint64_t ram_bytes_total_common(bool count_ignored)
             total += block->used_length;
         }
     }
-    rcu_read_unlock();
     return total;
 }
 
@@ -3034,7 +3030,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
     RAMBlock *block;
     int ret;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
 
     /* This should be our last sync, the src is now paused */
     migration_bitmap_sync(rs);
@@ -3048,7 +3044,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
         /* Deal with TPS != HPS and huge pages */
         ret = postcopy_chunk_hostpages(ms, block);
         if (ret) {
-            rcu_read_unlock();
             return ret;
         }
 
@@ -3060,7 +3055,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
     trace_ram_postcopy_send_discard_bitmap();
 
     ret = postcopy_each_ram_send_discard(ms);
-    rcu_read_unlock();
 
     return ret;
 }
@@ -3081,7 +3075,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length)
 
     trace_ram_discard_range(rbname, start, length);
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     RAMBlock *rb = qemu_ram_block_by_name(rbname);
 
     if (!rb) {
@@ -3101,8 +3095,6 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length)
     ret = ram_block_discard_range(rb, start, length);
 
 err:
-    rcu_read_unlock();
-
     return ret;
 }
 
@@ -3231,13 +3223,12 @@ static void ram_init_bitmaps(RAMState *rs)
     /* For memory_global_dirty_log_start below.  */
     qemu_mutex_lock_iothread();
     qemu_mutex_lock_ramlist();
-    rcu_read_lock();
-
-    ram_list_init_bitmaps();
-    memory_global_dirty_log_start();
-    migration_bitmap_sync_precopy(rs);
 
-    rcu_read_unlock();
+    WITH_RCU_READ_LOCK_GUARD() {
+        ram_list_init_bitmaps();
+        memory_global_dirty_log_start();
+        migration_bitmap_sync_precopy(rs);
+    }
     qemu_mutex_unlock_ramlist();
     qemu_mutex_unlock_iothread();
 }
@@ -3373,24 +3364,23 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     }
     (*rsp)->f = f;
 
-    rcu_read_lock();
-
-    qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
+    WITH_RCU_READ_LOCK_GUARD() {
+        qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
 
-    RAMBLOCK_FOREACH_MIGRATABLE(block) {
-        qemu_put_byte(f, strlen(block->idstr));
-        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
-        qemu_put_be64(f, block->used_length);
-        if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
-            qemu_put_be64(f, block->page_size);
-        }
-        if (migrate_ignore_shared()) {
-            qemu_put_be64(f, block->mr->addr);
+        RAMBLOCK_FOREACH_MIGRATABLE(block) {
+            qemu_put_byte(f, strlen(block->idstr));
+            qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
+            qemu_put_be64(f, block->used_length);
+            if (migrate_postcopy_ram() && block->page_size !=
+                                          qemu_host_page_size) {
+                qemu_put_be64(f, block->page_size);
+            }
+            if (migrate_ignore_shared()) {
+                qemu_put_be64(f, block->mr->addr);
+            }
         }
     }
 
-    rcu_read_unlock();
-
     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
 
@@ -3425,55 +3415,57 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         goto out;
     }
 
-    rcu_read_lock();
-    if (ram_list.version != rs->last_version) {
-        ram_state_reset(rs);
-    }
-
-    /* Read version before ram_list.blocks */
-    smp_rmb();
+    WITH_RCU_READ_LOCK_GUARD() {
+        if (ram_list.version != rs->last_version) {
+            ram_state_reset(rs);
+        }
 
-    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
+        /* Read version before ram_list.blocks */
+        smp_rmb();
 
-    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    i = 0;
-    while ((ret = qemu_file_rate_limit(f)) == 0 ||
-            !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
-        int pages;
+        ram_control_before_iterate(f, RAM_CONTROL_ROUND);
 
-        if (qemu_file_get_error(f)) {
-            break;
-        }
+        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        i = 0;
+        while ((ret = qemu_file_rate_limit(f)) == 0 ||
+                !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
+            int pages;
 
-        pages = ram_find_and_save_block(rs, false);
-        /* no more pages to sent */
-        if (pages == 0) {
-            done = 1;
-            break;
-        }
+            if (qemu_file_get_error(f)) {
+                break;
+            }
 
-        if (pages < 0) {
-            qemu_file_set_error(f, pages);
-            break;
-        }
+            pages = ram_find_and_save_block(rs, false);
+            /* no more pages to sent */
+            if (pages == 0) {
+                done = 1;
+                break;
+            }
 
-        rs->target_page_count += pages;
-
-        /* we want to check in the 1st loop, just in case it was the 1st time
-           and we had to sync the dirty bitmap.
-           qemu_clock_get_ns() is a bit expensive, so we only check each some
-           iterations
-        */
-        if ((i & 63) == 0) {
-            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
-            if (t1 > MAX_WAIT) {
-                trace_ram_save_iterate_big_wait(t1, i);
+            if (pages < 0) {
+                qemu_file_set_error(f, pages);
                 break;
             }
+
+            rs->target_page_count += pages;
+
+            /*
+             * we want to check in the 1st loop, just in case it was the 1st
+             * time and we had to sync the dirty bitmap.
+             * qemu_clock_get_ns() is a bit expensive, so we only check each
+             * some iterations
+             */
+            if ((i & 63) == 0) {
+                uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
+                              1000000;
+                if (t1 > MAX_WAIT) {
+                    trace_ram_save_iterate_big_wait(t1, i);
+                    break;
+                }
+            }
+            i++;
         }
-        i++;
     }
-    rcu_read_unlock();
 
     /*
      * Must occur before EOS (or any QEMUFile operation)
@@ -3511,35 +3503,33 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
     RAMState *rs = *temp;
     int ret = 0;
 
-    rcu_read_lock();
-
-    if (!migration_in_postcopy()) {
-        migration_bitmap_sync_precopy(rs);
-    }
+    WITH_RCU_READ_LOCK_GUARD() {
+        if (!migration_in_postcopy()) {
+            migration_bitmap_sync_precopy(rs);
+        }
 
-    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
+        ram_control_before_iterate(f, RAM_CONTROL_FINISH);
 
-    /* try transferring iterative blocks of memory */
+        /* try transferring iterative blocks of memory */
 
-    /* flush all remaining blocks regardless of rate limiting */
-    while (true) {
-        int pages;
+        /* flush all remaining blocks regardless of rate limiting */
+        while (true) {
+            int pages;
 
-        pages = ram_find_and_save_block(rs, !migration_in_colo_state());
-        /* no more blocks to sent */
-        if (pages == 0) {
-            break;
-        }
-        if (pages < 0) {
-            ret = pages;
-            break;
+            pages = ram_find_and_save_block(rs, !migration_in_colo_state());
+            /* no more blocks to sent */
+            if (pages == 0) {
+                break;
+            }
+            if (pages < 0) {
+                ret = pages;
+                break;
+            }
         }
-    }
 
-    flush_compressed_data(rs);
-    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
-
-    rcu_read_unlock();
+        flush_compressed_data(rs);
+        ram_control_after_iterate(f, RAM_CONTROL_FINISH);
+    }
 
     multifd_send_sync_main(rs);
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
@@ -3562,9 +3552,9 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
     if (!migration_in_postcopy() &&
         remaining_size < max_size) {
         qemu_mutex_lock_iothread();
-        rcu_read_lock();
-        migration_bitmap_sync_precopy(rs);
-        rcu_read_unlock();
+        WITH_RCU_READ_LOCK_GUARD() {
+            migration_bitmap_sync_precopy(rs);
+        }
         qemu_mutex_unlock_iothread();
         remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
     }
@@ -3908,7 +3898,13 @@ int colo_init_ram_cache(void)
             error_report("%s: Can't alloc memory for COLO cache of block %s,"
                          "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
                          block->used_length);
-            goto out_locked;
+            RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+                if (block->colo_cache) {
+                    qemu_anon_ram_free(block->colo_cache, block->used_length);
+                    block->colo_cache = NULL;
+                }
+            }
+            return -errno;
         }
         memcpy(block->colo_cache, block->host, block->used_length);
     }
@@ -3934,18 +3930,6 @@ int colo_init_ram_cache(void)
     memory_global_dirty_log_start();
 
     return 0;
-
-out_locked:
-
-    RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-        if (block->colo_cache) {
-            qemu_anon_ram_free(block->colo_cache, block->used_length);
-            block->colo_cache = NULL;
-        }
-    }
-
-    rcu_read_unlock();
-    return -errno;
 }
 
 /* It is need to hold the global lock to call this helper */
@@ -3959,16 +3943,14 @@ void colo_release_ram_cache(void)
         block->bmap = NULL;
     }
 
-    rcu_read_lock();
-
-    RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-        if (block->colo_cache) {
-            qemu_anon_ram_free(block->colo_cache, block->used_length);
-            block->colo_cache = NULL;
+    WITH_RCU_READ_LOCK_GUARD() {
+        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+            if (block->colo_cache) {
+                qemu_anon_ram_free(block->colo_cache, block->used_length);
+                block->colo_cache = NULL;
+            }
         }
     }
-
-    rcu_read_unlock();
     qemu_mutex_destroy(&ram_state->bitmap_mutex);
     g_free(ram_state);
     ram_state = NULL;
@@ -4048,7 +4030,7 @@ static int ram_load_postcopy(QEMUFile *f)
     bool matches_target_page_size = false;
     MigrationIncomingState *mis = migration_incoming_get_current();
     /* Temporary page that is later 'placed' */
-    void *postcopy_host_page = postcopy_get_tmp_page(mis);
+    void *postcopy_host_page = mis->postcopy_tmp_page;
     void *last_host = NULL;
     bool all_zero = false;
 
@@ -4206,31 +4188,30 @@ static void colo_flush_ram_cache(void)
     unsigned long offset = 0;
 
     memory_global_dirty_log_sync();
-    rcu_read_lock();
-    RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-        ramblock_sync_dirty_bitmap(ram_state, block);
+    WITH_RCU_READ_LOCK_GUARD() {
+        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+            ramblock_sync_dirty_bitmap(ram_state, block);
+        }
     }
-    rcu_read_unlock();
 
     trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
-    rcu_read_lock();
-    block = QLIST_FIRST_RCU(&ram_list.blocks);
+    WITH_RCU_READ_LOCK_GUARD() {
+        block = QLIST_FIRST_RCU(&ram_list.blocks);
 
-    while (block) {
-        offset = migration_bitmap_find_dirty(ram_state, block, offset);
+        while (block) {
+            offset = migration_bitmap_find_dirty(ram_state, block, offset);
 
-        if (offset << TARGET_PAGE_BITS >= block->used_length) {
-            offset = 0;
-            block = QLIST_NEXT_RCU(block, next);
-        } else {
-            migration_bitmap_clear_dirty(ram_state, block, offset);
-            dst_host = block->host + (offset << TARGET_PAGE_BITS);
-            src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
-            memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
+            if (offset << TARGET_PAGE_BITS >= block->used_length) {
+                offset = 0;
+                block = QLIST_NEXT_RCU(block, next);
+            } else {
+                migration_bitmap_clear_dirty(ram_state, block, offset);
+                dst_host = block->host + (offset << TARGET_PAGE_BITS);
+                src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
+                memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
+            }
         }
     }
-
-    rcu_read_unlock();
     trace_colo_flush_ram_cache_end();
 }
 
@@ -4429,16 +4410,15 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
      * it will be necessary to reduce the granularity of this
      * critical section.
      */
-    rcu_read_lock();
+    WITH_RCU_READ_LOCK_GUARD() {
+        if (postcopy_running) {
+            ret = ram_load_postcopy(f);
+        } else {
+            ret = ram_load_precopy(f);
+        }
 
-    if (postcopy_running) {
-        ret = ram_load_postcopy(f);
-    } else {
-        ret = ram_load_precopy(f);
+        ret |= wait_for_decompress_done();
     }
-
-    ret |= wait_for_decompress_done();
-    rcu_read_unlock();
     trace_ram_load_complete(ret, seq_iter);
 
     if (!ret  && migration_incoming_in_colo_state()) {
diff --git a/migration/rdma.c b/migration/rdma.c
index 4c74e88a37..e241dcb992 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -88,7 +88,6 @@ static uint32_t known_capabilities = RDMA_CAPABILITY_PIN_ALL;
                                 " to abort!"); \
                 rdma->error_reported = 1; \
             } \
-            rcu_read_unlock(); \
             return rdma->error_state; \
         } \
     } while (0)
@@ -2678,11 +2677,10 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
     size_t i;
     size_t len = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     rdma = atomic_rcu_read(&rioc->rdmaout);
 
     if (!rdma) {
-        rcu_read_unlock();
         return -EIO;
     }
 
@@ -2695,7 +2693,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
     ret = qemu_rdma_write_flush(f, rdma);
     if (ret < 0) {
         rdma->error_state = ret;
-        rcu_read_unlock();
         return ret;
     }
 
@@ -2715,7 +2712,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
 
             if (ret < 0) {
                 rdma->error_state = ret;
-                rcu_read_unlock();
                 return ret;
             }
 
@@ -2724,7 +2720,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
         }
     }
 
-    rcu_read_unlock();
     return done;
 }
 
@@ -2764,11 +2759,10 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
     ssize_t i;
     size_t done = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     rdma = atomic_rcu_read(&rioc->rdmain);
 
     if (!rdma) {
-        rcu_read_unlock();
         return -EIO;
     }
 
@@ -2805,7 +2799,6 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
 
         if (ret < 0) {
             rdma->error_state = ret;
-            rcu_read_unlock();
             return ret;
         }
 
@@ -2819,14 +2812,12 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
         /* Still didn't get enough, so lets just return */
         if (want) {
             if (done == 0) {
-                rcu_read_unlock();
                 return QIO_CHANNEL_ERR_BLOCK;
             } else {
                 break;
             }
         }
     }
-    rcu_read_unlock();
     return done;
 }
 
@@ -2882,7 +2873,7 @@ qio_channel_rdma_source_prepare(GSource *source,
     GIOCondition cond = 0;
     *timeout = -1;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     if (rsource->condition == G_IO_IN) {
         rdma = atomic_rcu_read(&rsource->rioc->rdmain);
     } else {
@@ -2891,7 +2882,6 @@ qio_channel_rdma_source_prepare(GSource *source,
 
     if (!rdma) {
         error_report("RDMAContext is NULL when prepare Gsource");
-        rcu_read_unlock();
         return FALSE;
     }
 
@@ -2900,7 +2890,6 @@ qio_channel_rdma_source_prepare(GSource *source,
     }
     cond |= G_IO_OUT;
 
-    rcu_read_unlock();
     return cond & rsource->condition;
 }
 
@@ -2911,7 +2900,7 @@ qio_channel_rdma_source_check(GSource *source)
     RDMAContext *rdma;
     GIOCondition cond = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     if (rsource->condition == G_IO_IN) {
         rdma = atomic_rcu_read(&rsource->rioc->rdmain);
     } else {
@@ -2920,7 +2909,6 @@ qio_channel_rdma_source_check(GSource *source)
 
     if (!rdma) {
         error_report("RDMAContext is NULL when check Gsource");
-        rcu_read_unlock();
         return FALSE;
     }
 
@@ -2929,7 +2917,6 @@ qio_channel_rdma_source_check(GSource *source)
     }
     cond |= G_IO_OUT;
 
-    rcu_read_unlock();
     return cond & rsource->condition;
 }
 
@@ -2943,7 +2930,7 @@ qio_channel_rdma_source_dispatch(GSource *source,
     RDMAContext *rdma;
     GIOCondition cond = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     if (rsource->condition == G_IO_IN) {
         rdma = atomic_rcu_read(&rsource->rioc->rdmain);
     } else {
@@ -2952,7 +2939,6 @@ qio_channel_rdma_source_dispatch(GSource *source,
 
     if (!rdma) {
         error_report("RDMAContext is NULL when dispatch Gsource");
-        rcu_read_unlock();
         return FALSE;
     }
 
@@ -2961,7 +2947,6 @@ qio_channel_rdma_source_dispatch(GSource *source,
     }
     cond |= G_IO_OUT;
 
-    rcu_read_unlock();
     return (*func)(QIO_CHANNEL(rsource->rioc),
                    (cond & rsource->condition),
                    user_data);
@@ -3073,7 +3058,7 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
     RDMAContext *rdmain, *rdmaout;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
 
     rdmain = atomic_rcu_read(&rioc->rdmain);
     rdmaout = atomic_rcu_read(&rioc->rdmain);
@@ -3100,7 +3085,6 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
         break;
     }
 
-    rcu_read_unlock();
     return 0;
 }
 
@@ -3146,18 +3130,16 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
     RDMAContext *rdma;
     int ret;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     rdma = atomic_rcu_read(&rioc->rdmaout);
 
     if (!rdma) {
-        rcu_read_unlock();
         return -EIO;
     }
 
     CHECK_ERROR_STATE();
 
     if (migration_in_postcopy()) {
-        rcu_read_unlock();
         return RAM_SAVE_CONTROL_NOT_SUPP;
     }
 
@@ -3242,11 +3224,9 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
         }
     }
 
-    rcu_read_unlock();
     return RAM_SAVE_CONTROL_DELAYED;
 err:
     rdma->error_state = ret;
-    rcu_read_unlock();
     return ret;
 }
 
@@ -3470,11 +3450,10 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque)
     int count = 0;
     int i = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     rdma = atomic_rcu_read(&rioc->rdmain);
 
     if (!rdma) {
-        rcu_read_unlock();
         return -EIO;
     }
 
@@ -3717,7 +3696,6 @@ out:
     if (ret < 0) {
         rdma->error_state = ret;
     }
-    rcu_read_unlock();
     return ret;
 }
 
@@ -3735,11 +3713,10 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
     int curr;
     int found = -1;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     rdma = atomic_rcu_read(&rioc->rdmain);
 
     if (!rdma) {
-        rcu_read_unlock();
         return -EIO;
     }
 
@@ -3753,7 +3730,6 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
 
     if (found == -1) {
         error_report("RAMBlock '%s' not found on destination", name);
-        rcu_read_unlock();
         return -ENOENT;
     }
 
@@ -3761,7 +3737,6 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
     trace_rdma_block_notification_handle(name, rdma->next_src_index);
     rdma->next_src_index++;
 
-    rcu_read_unlock();
     return 0;
 }
 
@@ -3786,17 +3761,15 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
     RDMAContext *rdma;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     rdma = atomic_rcu_read(&rioc->rdmaout);
     if (!rdma) {
-        rcu_read_unlock();
         return -EIO;
     }
 
     CHECK_ERROR_STATE();
 
     if (migration_in_postcopy()) {
-        rcu_read_unlock();
         return 0;
     }
 
@@ -3804,7 +3777,6 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
     qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
     qemu_fflush(f);
 
-    rcu_read_unlock();
     return 0;
 }
 
@@ -3821,17 +3793,15 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
     RDMAControlHeader head = { .len = 0, .repeat = 1 };
     int ret = 0;
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
     rdma = atomic_rcu_read(&rioc->rdmaout);
     if (!rdma) {
-        rcu_read_unlock();
         return -EIO;
     }
 
     CHECK_ERROR_STATE();
 
     if (migration_in_postcopy()) {
-        rcu_read_unlock();
         return 0;
     }
 
@@ -3863,7 +3833,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
                     qemu_rdma_reg_whole_ram_blocks : NULL);
         if (ret < 0) {
             ERROR(errp, "receiving remote info!");
-            rcu_read_unlock();
             return ret;
         }
 
@@ -3887,7 +3856,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
                         "not identical on both the source and destination.",
                         local->nb_blocks, nb_dest_blocks);
             rdma->error_state = -EINVAL;
-            rcu_read_unlock();
             return -EINVAL;
         }
 
@@ -3904,7 +3872,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
                             local->block[i].length,
                             rdma->dest_blocks[i].length);
                 rdma->error_state = -EINVAL;
-                rcu_read_unlock();
                 return -EINVAL;
             }
             local->block[i].remote_host_addr =
@@ -3922,11 +3889,9 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
         goto err;
     }
 
-    rcu_read_unlock();
     return 0;
 err:
     rdma->error_state = ret;
-    rcu_read_unlock();
     return ret;
 }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index bb9462a54d..8d95e261f6 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1215,6 +1215,8 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
         save_section_footer(f, se);
 
         if (ret < 0) {
+            error_report("failed to save SaveStateEntry with id(name): %d(%s)",
+                         se->section_id, se->idstr);
             qemu_file_set_error(f, ret);
         }
         if (ret <= 0) {
@@ -1835,6 +1837,8 @@ static void *postcopy_ram_listen_thread(void *opaque)
 
     rcu_unregister_thread();
     mis->have_listen_thread = false;
+    postcopy_state_set(POSTCOPY_INCOMING_END);
+
     return NULL;
 }
 
@@ -1865,7 +1869,7 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
      * shouldn't be doing anything yet so don't actually expect requests
      */
     if (migrate_postcopy_ram()) {
-        if (postcopy_ram_enable_notify(mis)) {
+        if (postcopy_ram_incoming_setup(mis)) {
             postcopy_ram_incoming_cleanup(mis);
             return -1;
         }
@@ -1876,11 +1880,6 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
         return -1;
     }
 
-    if (mis->have_listen_thread) {
-        error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
-        return -1;
-    }
-
     mis->have_listen_thread = true;
     /* Start up the listening thread and wait for it to signal ready */
     qemu_sem_init(&mis->listen_thread_sem, 0);
@@ -1934,7 +1933,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
 /* After all discards we can start running and asking for pages */
 static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
 {
-    PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
+    PostcopyState ps = postcopy_state_get();
 
     trace_loadvm_postcopy_handle_run();
     if (ps != POSTCOPY_INCOMING_LISTENING) {
@@ -1942,6 +1941,7 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
         return -1;
     }
 
+    postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
     mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
     qemu_bh_schedule(mis->bh);
 
diff --git a/migration/trace-events b/migration/trace-events
index 858d415d56..6dee7b5389 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -71,6 +71,11 @@ get_qtailq_end(const char *name, const char *reason, int val) "%s %s/%d"
 put_qtailq(const char *name, int version_id) "%s v%d"
 put_qtailq_end(const char *name, const char *reason) "%s %s"
 
+get_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d"
+get_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d"
+put_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d"
+put_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d"
+
 # qemu-file.c
 qemu_file_fclose(void) ""
 
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index bee658a1b2..7236cf92bc 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -691,3 +691,155 @@ const VMStateInfo vmstate_info_qtailq = {
     .get  = get_qtailq,
     .put  = put_qtailq,
 };
+
+struct put_gtree_data {
+    QEMUFile *f;
+    const VMStateDescription *key_vmsd;
+    const VMStateDescription *val_vmsd;
+    QJSON *vmdesc;
+    int ret;
+};
+
+static gboolean put_gtree_elem(gpointer key, gpointer value, gpointer data)
+{
+    struct put_gtree_data *capsule = (struct put_gtree_data *)data;
+    QEMUFile *f = capsule->f;
+    int ret;
+
+    qemu_put_byte(f, true);
+
+    /* put the key */
+    if (!capsule->key_vmsd) {
+        qemu_put_be64(f, (uint64_t)(uintptr_t)(key)); /* direct key */
+    } else {
+        ret = vmstate_save_state(f, capsule->key_vmsd, key, capsule->vmdesc);
+        if (ret) {
+            capsule->ret = ret;
+            return true;
+        }
+    }
+
+    /* put the data */
+    ret = vmstate_save_state(f, capsule->val_vmsd, value, capsule->vmdesc);
+    if (ret) {
+        capsule->ret = ret;
+        return true;
+    }
+    return false;
+}
+
+static int put_gtree(QEMUFile *f, void *pv, size_t unused_size,
+                     const VMStateField *field, QJSON *vmdesc)
+{
+    bool direct_key = (!field->start);
+    const VMStateDescription *key_vmsd = direct_key ? NULL : &field->vmsd[1];
+    const VMStateDescription *val_vmsd = &field->vmsd[0];
+    const char *key_vmsd_name = direct_key ? "direct" : key_vmsd->name;
+    struct put_gtree_data capsule = {
+        .f = f,
+        .key_vmsd = key_vmsd,
+        .val_vmsd = val_vmsd,
+        .vmdesc = vmdesc,
+        .ret = 0};
+    GTree **pval = pv;
+    GTree *tree = *pval;
+    uint32_t nnodes = g_tree_nnodes(tree);
+    int ret;
+
+    trace_put_gtree(field->name, key_vmsd_name, val_vmsd->name, nnodes);
+    qemu_put_be32(f, nnodes);
+    g_tree_foreach(tree, put_gtree_elem, (gpointer)&capsule);
+    qemu_put_byte(f, false);
+    ret = capsule.ret;
+    if (ret) {
+        error_report("%s : failed to save gtree (%d)", field->name, ret);
+    }
+    trace_put_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret);
+    return ret;
+}
+
+static int get_gtree(QEMUFile *f, void *pv, size_t unused_size,
+                     const VMStateField *field)
+{
+    bool direct_key = (!field->start);
+    const VMStateDescription *key_vmsd = direct_key ? NULL : &field->vmsd[1];
+    const VMStateDescription *val_vmsd = &field->vmsd[0];
+    const char *key_vmsd_name = direct_key ? "direct" : key_vmsd->name;
+    int version_id = field->version_id;
+    size_t key_size = field->start;
+    size_t val_size = field->size;
+    int nnodes, count = 0;
+    GTree **pval = pv;
+    GTree *tree = *pval;
+    void *key, *val;
+    int ret = 0;
+
+    /* in case of direct key, the key vmsd can be {}, ie. check fields */
+    if (!direct_key && version_id > key_vmsd->version_id) {
+        error_report("%s %s",  key_vmsd->name, "too new");
+        return -EINVAL;
+    }
+    if (!direct_key && version_id < key_vmsd->minimum_version_id) {
+        error_report("%s %s",  key_vmsd->name, "too old");
+        return -EINVAL;
+    }
+    if (version_id > val_vmsd->version_id) {
+        error_report("%s %s",  val_vmsd->name, "too new");
+        return -EINVAL;
+    }
+    if (version_id < val_vmsd->minimum_version_id) {
+        error_report("%s %s",  val_vmsd->name, "too old");
+        return -EINVAL;
+    }
+
+    nnodes = qemu_get_be32(f);
+    trace_get_gtree(field->name, key_vmsd_name, val_vmsd->name, nnodes);
+
+    while (qemu_get_byte(f)) {
+        if ((++count) > nnodes) {
+            ret = -EINVAL;
+            break;
+        }
+        if (direct_key) {
+            key = (void *)(uintptr_t)qemu_get_be64(f);
+        } else {
+            key = g_malloc0(key_size);
+            ret = vmstate_load_state(f, key_vmsd, key, version_id);
+            if (ret) {
+                error_report("%s : failed to load %s (%d)",
+                             field->name, key_vmsd->name, ret);
+                goto key_error;
+            }
+        }
+        val = g_malloc0(val_size);
+        ret = vmstate_load_state(f, val_vmsd, val, version_id);
+        if (ret) {
+            error_report("%s : failed to load %s (%d)",
+                         field->name, val_vmsd->name, ret);
+            goto val_error;
+        }
+        g_tree_insert(tree, key, val);
+    }
+    if (count != nnodes) {
+        error_report("%s inconsistent stream when loading the gtree",
+                     field->name);
+        return -EINVAL;
+    }
+    trace_get_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret);
+    return ret;
+val_error:
+    g_free(val);
+key_error:
+    if (!direct_key) {
+        g_free(key);
+    }
+    trace_get_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret);
+    return ret;
+}
+
+
+const VMStateInfo vmstate_info_gtree = {
+    .name = "gtree",
+    .get  = get_gtree,
+    .put  = put_gtree,
+};
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index e80c4c6143..1e5be1d4ff 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -812,6 +812,423 @@ static void test_load_q(void)
     qemu_fclose(fload);
 }
 
+/* interval (key) */
+typedef struct TestGTreeInterval {
+    uint64_t low;
+    uint64_t high;
+} TestGTreeInterval;
+
+#define VMSTATE_INTERVAL                               \
+{                                                      \
+    .name = "interval",                                \
+    .version_id = 1,                                   \
+    .minimum_version_id = 1,                           \
+    .fields = (VMStateField[]) {                       \
+        VMSTATE_UINT64(low, TestGTreeInterval),        \
+        VMSTATE_UINT64(high, TestGTreeInterval),       \
+        VMSTATE_END_OF_LIST()                          \
+    }                                                  \
+}
+
+/* mapping (value) */
+typedef struct TestGTreeMapping {
+    uint64_t phys_addr;
+    uint32_t flags;
+} TestGTreeMapping;
+
+#define VMSTATE_MAPPING                               \
+{                                                     \
+    .name = "mapping",                                \
+    .version_id = 1,                                  \
+    .minimum_version_id = 1,                          \
+    .fields = (VMStateField[]) {                      \
+        VMSTATE_UINT64(phys_addr, TestGTreeMapping),  \
+        VMSTATE_UINT32(flags, TestGTreeMapping),      \
+        VMSTATE_END_OF_LIST()                         \
+    },                                                \
+}
+
+static const VMStateDescription vmstate_interval_mapping[2] = {
+    VMSTATE_MAPPING,   /* value */
+    VMSTATE_INTERVAL   /* key   */
+};
+
+typedef struct TestGTreeDomain {
+    int32_t  id;
+    GTree    *mappings;
+} TestGTreeDomain;
+
+typedef struct TestGTreeIOMMU {
+    int32_t  id;
+    GTree    *domains;
+} TestGTreeIOMMU;
+
+/* Interval comparison function */
+static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+    TestGTreeInterval *inta = (TestGTreeInterval *)a;
+    TestGTreeInterval *intb = (TestGTreeInterval *)b;
+
+    if (inta->high < intb->low) {
+        return -1;
+    } else if (intb->high < inta->low) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+/* ID comparison function */
+static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+    uint ua = GPOINTER_TO_UINT(a);
+    uint ub = GPOINTER_TO_UINT(b);
+    return (ua > ub) - (ua < ub);
+}
+
+static void destroy_domain(gpointer data)
+{
+    TestGTreeDomain *domain = (TestGTreeDomain *)data;
+
+    g_tree_destroy(domain->mappings);
+    g_free(domain);
+}
+
+static int domain_preload(void *opaque)
+{
+    TestGTreeDomain *domain = opaque;
+
+    domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
+                                       NULL, g_free, g_free);
+    return 0;
+}
+
+static int iommu_preload(void *opaque)
+{
+    TestGTreeIOMMU *iommu = opaque;
+
+    iommu->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
+                                     NULL, NULL, destroy_domain);
+    return 0;
+}
+
+static const VMStateDescription vmstate_domain = {
+    .name = "domain",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_load = domain_preload,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(id, TestGTreeDomain),
+        VMSTATE_GTREE_V(mappings, TestGTreeDomain, 1,
+                        vmstate_interval_mapping,
+                        TestGTreeInterval, TestGTreeMapping),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_iommu = {
+    .name = "iommu",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_load = iommu_preload,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(id, TestGTreeIOMMU),
+        VMSTATE_GTREE_DIRECT_KEY_V(domains, TestGTreeIOMMU, 1,
+                                   &vmstate_domain, TestGTreeDomain),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+uint8_t first_domain_dump[] = {
+    /* id */
+    0x00, 0x0, 0x0, 0x6,
+    0x00, 0x0, 0x0, 0x2, /* 2 mappings */
+    0x1, /* start of a */
+    /* a */
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F, 0xFF,
+    /* map_a */
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00,
+    0x00, 0x00, 0x00, 0x01,
+    0x1, /* start of b */
+    /* b */
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4F, 0xFF,
+    /* map_b */
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x02,
+    0x0, /* end of gtree */
+    QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+};
+
+static TestGTreeDomain *create_first_domain(void)
+{
+    TestGTreeDomain *domain;
+    TestGTreeMapping *map_a, *map_b;
+    TestGTreeInterval *a, *b;
+
+    domain = g_malloc0(sizeof(TestGTreeDomain));
+    domain->id = 6;
+
+    a = g_malloc0(sizeof(TestGTreeInterval));
+    a->low = 0x1000;
+    a->high = 0x1FFF;
+
+    b = g_malloc0(sizeof(TestGTreeInterval));
+    b->low = 0x4000;
+    b->high = 0x4FFF;
+
+    map_a = g_malloc0(sizeof(TestGTreeMapping));
+    map_a->phys_addr = 0xa000;
+    map_a->flags = 1;
+
+    map_b = g_malloc0(sizeof(TestGTreeMapping));
+    map_b->phys_addr = 0xe0000;
+    map_b->flags = 2;
+
+    domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, NULL,
+                                        (GDestroyNotify)g_free,
+                                        (GDestroyNotify)g_free);
+    g_tree_insert(domain->mappings, a, map_a);
+    g_tree_insert(domain->mappings, b, map_b);
+    return domain;
+}
+
+static void test_gtree_save_domain(void)
+{
+    TestGTreeDomain *first_domain = create_first_domain();
+
+    save_vmstate(&vmstate_domain, first_domain);
+    compare_vmstate(first_domain_dump, sizeof(first_domain_dump));
+    destroy_domain(first_domain);
+}
+
+struct match_node_data {
+    GTree *tree;
+    gpointer key;
+    gpointer value;
+};
+
+struct tree_cmp_data {
+    GTree *tree1;
+    GTree *tree2;
+    GTraverseFunc match_node;
+};
+
+static gboolean match_interval_mapping_node(gpointer key,
+                                            gpointer value, gpointer data)
+{
+    TestGTreeMapping *map_a, *map_b;
+    TestGTreeInterval *a, *b;
+    struct match_node_data *d = (struct match_node_data *)data;
+    char *str = g_strdup_printf("dest");
+
+    g_free(str);
+    a = (TestGTreeInterval *)key;
+    b = (TestGTreeInterval *)d->key;
+
+    map_a = (TestGTreeMapping *)value;
+    map_b = (TestGTreeMapping *)d->value;
+
+    assert(a->low == b->low);
+    assert(a->high == b->high);
+    assert(map_a->phys_addr == map_b->phys_addr);
+    assert(map_a->flags == map_b->flags);
+    g_tree_remove(d->tree, key);
+    return true;
+}
+
+static gboolean diff_tree(gpointer key, gpointer value, gpointer data)
+{
+    struct tree_cmp_data *tp = (struct tree_cmp_data *)data;
+    struct match_node_data d = {tp->tree2, key, value};
+
+    g_tree_foreach(tp->tree2, tp->match_node, &d);
+    g_tree_remove(tp->tree1, key);
+    return false;
+}
+
+static void compare_trees(GTree *tree1, GTree *tree2,
+                          GTraverseFunc function)
+{
+    struct tree_cmp_data tp = {tree1, tree2, function};
+
+    g_tree_foreach(tree1, diff_tree, &tp);
+    assert(g_tree_nnodes(tree1) == 0);
+    assert(g_tree_nnodes(tree2) == 0);
+}
+
+static void diff_domain(TestGTreeDomain *d1, TestGTreeDomain *d2)
+{
+    assert(d1->id == d2->id);
+    compare_trees(d1->mappings, d2->mappings, match_interval_mapping_node);
+}
+
+static gboolean match_domain_node(gpointer key, gpointer value, gpointer data)
+{
+    uint64_t id1, id2;
+    TestGTreeDomain *d1, *d2;
+    struct match_node_data *d = (struct match_node_data *)data;
+
+    id1 = (uint64_t)(uintptr_t)key;
+    id2 = (uint64_t)(uintptr_t)d->key;
+    d1 = (TestGTreeDomain *)value;
+    d2 = (TestGTreeDomain *)d->value;
+    assert(id1 == id2);
+    diff_domain(d1, d2);
+    g_tree_remove(d->tree, key);
+    return true;
+}
+
+static void diff_iommu(TestGTreeIOMMU *iommu1, TestGTreeIOMMU *iommu2)
+{
+    assert(iommu1->id == iommu2->id);
+    compare_trees(iommu1->domains, iommu2->domains, match_domain_node);
+}
+
+static void test_gtree_load_domain(void)
+{
+    TestGTreeDomain *dest_domain = g_malloc0(sizeof(TestGTreeDomain));
+    TestGTreeDomain *orig_domain = create_first_domain();
+    QEMUFile *fload, *fsave;
+    char eof;
+
+    fsave = open_test_file(true);
+    qemu_put_buffer(fsave, first_domain_dump, sizeof(first_domain_dump));
+    g_assert(!qemu_file_get_error(fsave));
+    qemu_fclose(fsave);
+
+    fload = open_test_file(false);
+
+    vmstate_load_state(fload, &vmstate_domain, dest_domain, 1);
+    eof = qemu_get_byte(fload);
+    g_assert(!qemu_file_get_error(fload));
+    g_assert_cmpint(orig_domain->id, ==, dest_domain->id);
+    g_assert_cmpint(eof, ==, QEMU_VM_EOF);
+
+    diff_domain(orig_domain, dest_domain);
+    destroy_domain(orig_domain);
+    destroy_domain(dest_domain);
+    qemu_fclose(fload);
+}
+
+uint8_t iommu_dump[] = {
+    /* iommu id */
+    0x00, 0x0, 0x0, 0x7,
+    0x00, 0x0, 0x0, 0x2, /* 2 domains */
+    0x1,/* start of domain 5 */
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0x0, 0x5, /* key = 5 */
+        0x00, 0x0, 0x0, 0x5, /* domain1 id */
+        0x00, 0x0, 0x0, 0x1, /* 1 mapping */
+        0x1, /* start of mappings */
+            /* c */
+            0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x01, 0xFF, 0xFF, 0xFF,
+            /* map_c */
+            0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00,
+            0x00, 0x0, 0x0, 0x3,
+            0x0, /* end of domain1 mappings*/
+    0x1,/* start of domain 6 */
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0x0, 0x6, /* key = 6 */
+        0x00, 0x0, 0x0, 0x6, /* domain6 id */
+            0x00, 0x0, 0x0, 0x2, /* 2 mappings */
+            0x1, /* start of a */
+            /* a */
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F, 0xFF,
+            /* map_a */
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00,
+            0x00, 0x00, 0x00, 0x01,
+            0x1, /* start of b */
+            /* b */
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4F, 0xFF,
+            /* map_b */
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x02,
+            0x0, /* end of domain6 mappings*/
+    0x0, /* end of domains */
+    QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+};
+
+static TestGTreeIOMMU *create_iommu(void)
+{
+    TestGTreeIOMMU *iommu = g_malloc0(sizeof(TestGTreeIOMMU));
+    TestGTreeDomain *first_domain = create_first_domain();
+    TestGTreeDomain *second_domain;
+    TestGTreeMapping *map_c;
+    TestGTreeInterval *c;
+
+    iommu->id = 7;
+    iommu->domains = g_tree_new_full((GCompareDataFunc)int_cmp, NULL,
+                                     NULL,
+                                     destroy_domain);
+
+    second_domain = g_malloc0(sizeof(TestGTreeDomain));
+    second_domain->id = 5;
+    second_domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
+                                              NULL,
+                                              (GDestroyNotify)g_free,
+                                              (GDestroyNotify)g_free);
+
+    g_tree_insert(iommu->domains, GUINT_TO_POINTER(6), first_domain);
+    g_tree_insert(iommu->domains, (gpointer)0x0000000000000005, second_domain);
+
+    c = g_malloc0(sizeof(TestGTreeInterval));
+    c->low = 0x1000000;
+    c->high = 0x1FFFFFF;
+
+    map_c = g_malloc0(sizeof(TestGTreeMapping));
+    map_c->phys_addr = 0xF000000;
+    map_c->flags = 0x3;
+
+    g_tree_insert(second_domain->mappings, c, map_c);
+    return iommu;
+}
+
+static void destroy_iommu(TestGTreeIOMMU *iommu)
+{
+    g_tree_destroy(iommu->domains);
+    g_free(iommu);
+}
+
+static void test_gtree_save_iommu(void)
+{
+    TestGTreeIOMMU *iommu = create_iommu();
+
+    save_vmstate(&vmstate_iommu, iommu);
+    compare_vmstate(iommu_dump, sizeof(iommu_dump));
+    destroy_iommu(iommu);
+}
+
+static void test_gtree_load_iommu(void)
+{
+    TestGTreeIOMMU *dest_iommu = g_malloc0(sizeof(TestGTreeIOMMU));
+    TestGTreeIOMMU *orig_iommu = create_iommu();
+    QEMUFile *fsave, *fload;
+    char eof;
+    int ret;
+
+    fsave = open_test_file(true);
+    qemu_put_buffer(fsave, iommu_dump, sizeof(iommu_dump));
+    g_assert(!qemu_file_get_error(fsave));
+    qemu_fclose(fsave);
+
+    fload = open_test_file(false);
+    vmstate_load_state(fload, &vmstate_iommu, dest_iommu, 1);
+    ret = qemu_file_get_error(fload);
+    eof = qemu_get_byte(fload);
+    ret = qemu_file_get_error(fload);
+    g_assert(!ret);
+    g_assert_cmpint(orig_iommu->id, ==, dest_iommu->id);
+    g_assert_cmpint(eof, ==, QEMU_VM_EOF);
+
+    diff_iommu(orig_iommu, dest_iommu);
+    destroy_iommu(orig_iommu);
+    destroy_iommu(dest_iommu);
+    qemu_fclose(fload);
+}
+
 typedef struct TmpTestStruct {
     TestStruct *parent;
     int64_t diff;
@@ -932,6 +1349,10 @@ int main(int argc, char **argv)
                     test_arr_ptr_prim_0_load);
     g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q);
     g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q);
+    g_test_add_func("/vmstate/gtree/save/savedomain", test_gtree_save_domain);
+    g_test_add_func("/vmstate/gtree/load/loaddomain", test_gtree_load_domain);
+    g_test_add_func("/vmstate/gtree/save/saveiommu", test_gtree_save_iommu);
+    g_test_add_func("/vmstate/gtree/load/loadiommu", test_gtree_load_iommu);
     g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);
     g_test_run();