summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS9
-rw-r--r--accel/tcg/cpu-exec.c34
-rw-r--r--accel/tcg/tcg-accel-ops-mttcg.c3
-rw-r--r--accel/tcg/tcg-accel-ops-rr.c2
-rw-r--r--accel/tcg/tcg-accel-ops.c8
-rw-r--r--accel/tcg/tcg-accel-ops.h1
-rw-r--r--accel/tcg/tcg-runtime.c6
-rw-r--r--accel/tcg/translate-all.c18
-rw-r--r--block.c8
-rw-r--r--block/backup-top.c10
-rw-r--r--block/backup.c1
-rw-r--r--block/dirty-bitmap.c13
-rw-r--r--block/export/vhost-user-blk-server.c150
-rw-r--r--block/meson.build3
-rw-r--r--block/parallels-ext.c300
-rw-r--r--block/parallels.c26
-rw-r--r--block/parallels.h7
-rw-r--r--block/qcow2-bitmap.c16
-rw-r--r--blockdev.c13
-rw-r--r--blockjob.c10
-rw-r--r--docs/interop/parallels.txt28
-rw-r--r--docs/tools/qemu-storage-daemon.rst68
-rw-r--r--hw/block/Kconfig3
-rw-r--r--hw/block/meson.build2
-rw-r--r--hw/block/tc58128.c26
-rw-r--r--hw/block/vhost-user-blk.c7
-rw-r--r--hw/char/Kconfig3
-rw-r--r--hw/char/meson.build2
-rw-r--r--hw/dma/sparc32_dma.c4
-rw-r--r--hw/intc/Kconfig3
-rw-r--r--hw/intc/meson.build2
-rw-r--r--hw/m68k/q800.c4
-rw-r--r--hw/mips/jazz.c4
-rw-r--r--hw/pci-host/Kconfig4
-rw-r--r--hw/pci-host/meson.build1
-rw-r--r--hw/pci-host/sh_pci.c (renamed from hw/sh4/sh_pci.c)0
-rw-r--r--hw/scsi/esp-pci.c53
-rw-r--r--hw/scsi/esp.c975
-rw-r--r--hw/scsi/trace-events5
-rw-r--r--hw/sh4/Kconfig12
-rw-r--r--hw/sh4/meson.build1
-rw-r--r--hw/sh4/sh7750_regs.h24
-rw-r--r--hw/sparc/sun4m.c2
-rw-r--r--hw/timer/Kconfig4
-rw-r--r--hw/timer/meson.build2
-rw-r--r--include/block/dirty-bitmap.h2
-rw-r--r--include/exec/exec-all.h22
-rw-r--r--include/exec/poison.h2
-rw-r--r--include/exec/tb-lookup.h26
-rw-r--r--include/hw/core/cpu.h2
-rw-r--r--include/hw/scsi/esp.h52
-rw-r--r--include/hw/sh4/sh.h31
-rw-r--r--linux-user/main.c1
-rw-r--r--linux-user/sh4/signal.c8
-rw-r--r--linux-user/syscall.c18
-rw-r--r--softmmu/physmem.c2
-rw-r--r--storage-daemon/qemu-storage-daemon.c56
-rw-r--r--target/hexagon/macros.h4
-rw-r--r--target/hexagon/opcodes.c1
-rw-r--r--target/sh4/cpu.h11
-rw-r--r--target/sh4/helper.c101
-rw-r--r--tcg/aarch64/tcg-target.c.inc229
-rw-r--r--tcg/tcg.c29
-rw-r--r--tcg/tci.c526
-rw-r--r--tcg/tci/tcg-target.c.inc204
-rwxr-xr-xtests/qemu-iotests/0304
-rwxr-xr-xtests/qemu-iotests/0404
-rw-r--r--tests/qemu-iotests/051.pc.out6
-rw-r--r--tests/qemu-iotests/081.out2
-rw-r--r--tests/qemu-iotests/085.out6
-rwxr-xr-xtests/qemu-iotests/0878
-rw-r--r--tests/qemu-iotests/087.out2
-rwxr-xr-xtests/qemu-iotests/18418
-rw-r--r--tests/qemu-iotests/206.out2
-rw-r--r--tests/qemu-iotests/210.out2
-rw-r--r--tests/qemu-iotests/211.out2
-rw-r--r--tests/qemu-iotests/212.out2
-rw-r--r--tests/qemu-iotests/213.out2
-rwxr-xr-xtests/qemu-iotests/2182
-rw-r--r--tests/qemu-iotests/223.out4
-rwxr-xr-xtests/qemu-iotests/2352
-rw-r--r--tests/qemu-iotests/237.out2
-rwxr-xr-xtests/qemu-iotests/24514
-rw-r--r--tests/qemu-iotests/249.out2
-rwxr-xr-xtests/qemu-iotests/2586
-rw-r--r--tests/qemu-iotests/258.out4
-rwxr-xr-xtests/qemu-iotests/28353
-rw-r--r--tests/qemu-iotests/283.out15
-rwxr-xr-xtests/qemu-iotests/2952
-rwxr-xr-xtests/qemu-iotests/2962
-rwxr-xr-xtests/qemu-iotests/30014
-rw-r--r--tests/qemu-iotests/iotests.py10
-rw-r--r--tests/qemu-iotests/sample_images/parallels-with-bitmap.bz2bin0 -> 203 bytes
-rwxr-xr-xtests/qemu-iotests/sample_images/parallels-with-bitmap.sh51
-rwxr-xr-xtests/qemu-iotests/tests/parallels-read-bitmap55
-rw-r--r--tests/qemu-iotests/tests/parallels-read-bitmap.out6
-rw-r--r--tests/qtest/libqos/libqtest.h37
-rw-r--r--tests/qtest/libqtest.c82
98 files changed, 2411 insertions, 1216 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index be8385255d..b46b799263 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1404,16 +1404,22 @@ R2D
 M: Yoshinori Sato <ysato@users.sourceforge.jp>
 R: Magnus Damm <magnus.damm@gmail.com>
 S: Odd Fixes
+F: hw/char/sh_serial.c
 F: hw/sh4/r2d.c
 F: hw/intc/sh_intc.c
+F: hw/pci-host/sh_pci.c
+F: hw/timer/sh_timer.c
 F: include/hw/sh4/sh_intc.h
 
 Shix
 M: Yoshinori Sato <ysato@users.sourceforge.jp>
 R: Magnus Damm <magnus.damm@gmail.com>
 S: Odd Fixes
+F: hw/block/tc58128.c
+F: hw/char/sh_serial.c
 F: hw/sh4/shix.c
 F: hw/intc/sh_intc.c
+F: hw/timer/sh_timer.c
 F: include/hw/sh4/sh_intc.h
 
 SPARC Machines
@@ -3133,10 +3139,13 @@ F: block/dmg.c
 parallels
 M: Stefan Hajnoczi <stefanha@redhat.com>
 M: Denis V. Lunev <den@openvz.org>
+M: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/parallels.c
+F: block/parallels-ext.c
 F: docs/interop/parallels.txt
+T: git https://src.openvz.org/scm/~vsementsov/qemu.git parallels
 
 qed
 M: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 16e4fe3ccd..bdfa036ac8 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -245,11 +245,11 @@ static void cpu_exec_exit(CPUState *cpu)
 
 void cpu_exec_step_atomic(CPUState *cpu)
 {
+    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
     target_ulong cs_base, pc;
     uint32_t flags;
-    uint32_t cflags = 1;
-    uint32_t cf_mask = cflags & CF_HASH_MASK;
+    uint32_t cflags = (curr_cflags(cpu) & ~CF_PARALLEL) | 1;
     int tb_exit;
 
     if (sigsetjmp(cpu->jmp_env, 0) == 0) {
@@ -258,15 +258,15 @@ void cpu_exec_step_atomic(CPUState *cpu)
         g_assert(!cpu->running);
         cpu->running = true;
 
-        tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
+        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+        tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
+
         if (tb == NULL) {
             mmap_lock();
             tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
             mmap_unlock();
         }
 
-        /* Since we got here, we know that parallel_cpus must be true.  */
-        parallel_cpus = false;
         cpu_exec_enter(cpu);
         /* execute the generated code */
         trace_exec_tb(tb, pc);
@@ -294,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
      * the execution.
      */
     g_assert(cpu_in_exclusive_context(cpu));
-    parallel_cpus = true;
     cpu->running = false;
     end_exclusive();
 }
@@ -305,7 +304,7 @@ struct tb_desc {
     CPUArchState *env;
     tb_page_addr_t phys_page1;
     uint32_t flags;
-    uint32_t cf_mask;
+    uint32_t cflags;
     uint32_t trace_vcpu_dstate;
 };
 
@@ -319,7 +318,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
         tb->cs_base == desc->cs_base &&
         tb->flags == desc->flags &&
         tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
-        (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) {
+        tb_cflags(tb) == desc->cflags) {
         /* check next page if needed */
         if (tb->page_addr[1] == -1) {
             return true;
@@ -339,7 +338,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
 
 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
                                    target_ulong cs_base, uint32_t flags,
-                                   uint32_t cf_mask)
+                                   uint32_t cflags)
 {
     tb_page_addr_t phys_pc;
     struct tb_desc desc;
@@ -348,7 +347,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
     desc.env = (CPUArchState *)cpu->env_ptr;
     desc.cs_base = cs_base;
     desc.flags = flags;
-    desc.cf_mask = cf_mask;
+    desc.cflags = cflags;
     desc.trace_vcpu_dstate = *cpu->trace_dstate;
     desc.pc = pc;
     phys_pc = get_page_addr_code(desc.env, pc);
@@ -356,7 +355,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
         return NULL;
     }
     desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
-    h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate);
+    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
     return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 }
 
@@ -416,16 +415,19 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
 
 static inline TranslationBlock *tb_find(CPUState *cpu,
                                         TranslationBlock *last_tb,
-                                        int tb_exit, uint32_t cf_mask)
+                                        int tb_exit, uint32_t cflags)
 {
+    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
     target_ulong cs_base, pc;
     uint32_t flags;
 
-    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+
+    tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
     if (tb == NULL) {
         mmap_lock();
-        tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
+        tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
         mmap_unlock();
         /* We add the TB in the virtual pc hash table for the fast lookup */
         qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
@@ -491,7 +493,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
         if (replay_has_exception()
             && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
             /* Execute just one insn to trigger exception pending in the log */
-            cpu->cflags_next_tb = (curr_cflags() & ~CF_USE_ICOUNT) | 1;
+            cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1;
         }
 #endif
         return false;
@@ -788,7 +790,7 @@ int cpu_exec(CPUState *cpu)
                have CF_INVALID set, -1 is a convenient invalid value that
                does not require tcg headers for cpu_common_reset.  */
             if (cflags == -1) {
-                cflags = curr_cflags();
+                cflags = curr_cflags(cpu);
             } else {
                 cpu->cflags_next_tb = -1;
             }
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 42973fb062..847d2079d2 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -114,8 +114,7 @@ void mttcg_start_vcpu_thread(CPUState *cpu)
     char thread_name[VCPU_THREAD_NAME_SIZE];
 
     g_assert(tcg_enabled());
-
-    parallel_cpus = (current_machine->smp.max_cpus > 1);
+    tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1);
 
     cpu->thread = g_malloc0(sizeof(QemuThread));
     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 4a66055e0d..018b54c508 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -269,7 +269,7 @@ void rr_start_vcpu_thread(CPUState *cpu)
     static QemuThread *single_tcg_cpu_thread;
 
     g_assert(tcg_enabled());
-    parallel_cpus = false;
+    tcg_cpu_init_cflags(cpu, false);
 
     if (!single_tcg_cpu_thread) {
         cpu->thread = g_malloc0(sizeof(QemuThread));
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 6144d9df87..6cdcaa2855 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -41,6 +41,14 @@
 
 /* common functionality among all TCG variants */
 
+void tcg_cpu_init_cflags(CPUState *cpu, bool parallel)
+{
+    uint32_t cflags = cpu->cluster_index << CF_CLUSTER_SHIFT;
+    cflags |= parallel ? CF_PARALLEL : 0;
+    cflags |= icount_enabled() ? CF_USE_ICOUNT : 0;
+    cpu->tcg_cflags = cflags;
+}
+
 void tcg_cpus_destroy(CPUState *cpu)
 {
     cpu_thread_signal_destroyed(cpu);
diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h
index 48130006de..6a5fcef889 100644
--- a/accel/tcg/tcg-accel-ops.h
+++ b/accel/tcg/tcg-accel-ops.h
@@ -17,5 +17,6 @@
 void tcg_cpus_destroy(CPUState *cpu);
 int tcg_cpus_exec(CPUState *cpu);
 void tcg_handle_interrupt(CPUState *cpu, int mask);
+void tcg_cpu_init_cflags(CPUState *cpu, bool parallel);
 
 #endif /* TCG_CPUS_H */
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index d736f4ff55..49f5de37e8 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,10 +27,10 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
-#include "exec/tb-lookup.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
+#include "exec/tb-lookup.h"
 
 /* 32-bit helpers */
 
@@ -152,7 +152,9 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
     target_ulong cs_base, pc;
     uint32_t flags;
 
-    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags());
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+
+    tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags(cpu));
     if (tb == NULL) {
         return tcg_code_gen_epilogue;
     }
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index bbd919a393..f32df8b240 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -224,7 +224,6 @@ static void *l1_map[V_L1_MAX_SIZE];
 TCGContext tcg_init_ctx;
 __thread TCGContext *tcg_ctx;
 TBContext tb_ctx;
-bool parallel_cpus;
 
 static void page_table_config_init(void)
 {
@@ -1311,7 +1310,7 @@ static bool tb_cmp(const void *ap, const void *bp)
     return a->pc == b->pc &&
         a->cs_base == b->cs_base &&
         a->flags == b->flags &&
-        (tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) &&
+        (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
         a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
         a->page_addr[0] == b->page_addr[0] &&
         a->page_addr[1] == b->page_addr[1];
@@ -1616,6 +1615,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
     PageDesc *p;
     uint32_t h;
     tb_page_addr_t phys_pc;
+    uint32_t orig_cflags = tb_cflags(tb);
 
     assert_memory_lock();
 
@@ -1626,7 +1626,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
 
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK,
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
                      tb->trace_vcpu_dstate);
     if (!qht_remove(&tb_ctx.htable, tb, h)) {
         return;
@@ -1793,6 +1793,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
     uint32_t h;
 
     assert_memory_lock();
+    tcg_debug_assert(!(tb->cflags & CF_INVALID));
 
     /*
      * Add the TB to the page list, acquiring first the pages's locks.
@@ -1811,7 +1812,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
     }
 
     /* add in the hash table */
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
                      tb->trace_vcpu_dstate);
     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
 
@@ -1865,9 +1866,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
         cflags = (cflags & ~CF_COUNT_MASK) | 1;
     }
 
-    cflags &= ~CF_CLUSTER_MASK;
-    cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT;
-
     max_insns = cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
@@ -2194,7 +2192,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
     if (current_tb_modified) {
         page_collection_unlock(pages);
         /* Force execution of one insn next time.  */
-        cpu->cflags_next_tb = 1 | curr_cflags();
+        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
         mmap_unlock();
         cpu_loop_exit_noexc(cpu);
     }
@@ -2362,7 +2360,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
 #ifdef TARGET_HAS_PRECISE_SMC
     if (current_tb_modified) {
         /* Force execution of one insn next time.  */
-        cpu->cflags_next_tb = 1 | curr_cflags();
+        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
         return true;
     }
 #endif
@@ -2438,7 +2436,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
      * operations only (which execute after completion) so we don't
      * double instrument the instruction.
      */
-    cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n;
+    cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
 
     qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
                            "cpu_io_recompile: rewound execution of TB to "
diff --git a/block.c b/block.c
index a1f3cecd75..2daff6d29a 100644
--- a/block.c
+++ b/block.c
@@ -1440,7 +1440,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
          * Check for empty string or invalid characters, but not if it is
          * generated (generated names use characters not available to the user)
          */
-        error_setg(errp, "Invalid node name");
+        error_setg(errp, "Invalid node-name: '%s'", node_name);
         return;
     }
 
@@ -1453,7 +1453,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
 
     /* takes care of avoiding duplicates node names */
     if (bdrv_find_node(node_name)) {
-        error_setg(errp, "Duplicate node name");
+        error_setg(errp, "Duplicate nodes with node-name='%s'", node_name);
         goto out;
     }
 
@@ -5432,7 +5432,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
         }
     }
 
-    error_setg(errp, "Cannot find device=%s nor node_name=%s",
+    error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'",
                      device ? device : "",
                      node_name ? node_name : "");
     return NULL;
@@ -6752,7 +6752,7 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
     AioContext *aio_context;
 
     if (!to_replace_bs) {
-        error_setg(errp, "Node name '%s' not found", node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
         return NULL;
     }
 
diff --git a/block/backup-top.c b/block/backup-top.c
index d1253e1aa6..589e8b651d 100644
--- a/block/backup-top.c
+++ b/block/backup-top.c
@@ -45,6 +45,12 @@ static coroutine_fn int backup_top_co_preadv(
         BlockDriverState *bs, uint64_t offset, uint64_t bytes,
         QEMUIOVector *qiov, int flags)
 {
+    BDRVBackupTopState *s = bs->opaque;
+
+    if (!s->active) {
+        return -EIO;
+    }
+
     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
 
@@ -54,6 +60,10 @@ static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset,
     BDRVBackupTopState *s = bs->opaque;
     uint64_t off, end;
 
+    if (!s->active) {
+        return -EIO;
+    }
+
     if (flags & BDRV_REQ_WRITE_UNCHANGED) {
         return 0;
     }
diff --git a/block/backup.c b/block/backup.c
index 94e6dcd72e..6cf2f974aa 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -103,6 +103,7 @@ static void backup_abort(Job *job)
 static void backup_clean(Job *job)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
+    block_job_remove_all_bdrv(&s->common);
     bdrv_backup_top_drop(s->backup_top);
 }
 
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 9b9cd71238..a0eaa28785 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -726,6 +726,19 @@ uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap)
     return hbitmap_serialization_align(bitmap->bitmap);
 }
 
+/* Return the disk size covered by a chunk of serialized bitmap data. */
+uint64_t bdrv_dirty_bitmap_serialization_coverage(int serialized_chunk_size,
+                                                  const BdrvDirtyBitmap *bitmap)
+{
+    uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
+    uint64_t limit = granularity * (serialized_chunk_size << 3);
+
+    assert(QEMU_IS_ALIGNED(limit,
+                           bdrv_dirty_bitmap_serialization_align(bitmap)));
+    return limit;
+}
+
+
 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
                                       uint8_t *buf, uint64_t offset,
                                       uint64_t bytes)
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
index ab2c4d44c4..cb5d896b7b 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -20,8 +20,17 @@
 #include "sysemu/block-backend.h"
 #include "util/block-helpers.h"
 
+/*
+ * Sector units are 512 bytes regardless of the
+ * virtio_blk_config->blk_size value.
+ */
+#define VIRTIO_BLK_SECTOR_BITS 9
+#define VIRTIO_BLK_SECTOR_SIZE (1ull << VIRTIO_BLK_SECTOR_BITS)
+
 enum {
     VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
+    VHOST_USER_BLK_MAX_DISCARD_SECTORS = 32768,
+    VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS = 32768,
 };
 struct virtio_blk_inhdr {
     unsigned char status;
@@ -58,30 +67,102 @@ static void vu_blk_req_complete(VuBlkReq *req)
     free(req);
 }
 
+static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector,
+                                 size_t size)
+{
+    uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
+    uint64_t total_sectors;
+
+    if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+        return false;
+    }
+    if ((sector << VIRTIO_BLK_SECTOR_BITS) % vexp->blk_size) {
+        return false;
+    }
+    blk_get_geometry(vexp->export.blk, &total_sectors);
+    if (sector > total_sectors || nb_sectors > total_sectors - sector) {
+        return false;
+    }
+    return true;
+}
+
 static int coroutine_fn
-vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov,
+vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
                             uint32_t iovcnt, uint32_t type)
 {
+    BlockBackend *blk = vexp->export.blk;
     struct virtio_blk_discard_write_zeroes desc;
-    ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
+    ssize_t size;
+    uint64_t sector;
+    uint32_t num_sectors;
+    uint32_t max_sectors;
+    uint32_t flags;
+    int bytes;
+
+    /* Only one desc is currently supported */
+    if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) {
+        return VIRTIO_BLK_S_UNSUPP;
+    }
+
+    size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
     if (unlikely(size != sizeof(desc))) {
-        error_report("Invalid size %zd, expect %zu", size, sizeof(desc));
-        return -EINVAL;
+        error_report("Invalid size %zd, expected %zu", size, sizeof(desc));
+        return VIRTIO_BLK_S_IOERR;
+    }
+
+    sector = le64_to_cpu(desc.sector);
+    num_sectors = le32_to_cpu(desc.num_sectors);
+    flags = le32_to_cpu(desc.flags);
+    max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ?
+                  VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS :
+                  VHOST_USER_BLK_MAX_DISCARD_SECTORS;
+
+    /* This check ensures that 'bytes' fits in an int */
+    if (unlikely(num_sectors > max_sectors)) {
+        return VIRTIO_BLK_S_IOERR;
     }
 
-    uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
-                          le32_to_cpu(desc.num_sectors) << 9 };
-    if (type == VIRTIO_BLK_T_DISCARD) {
-        if (blk_co_pdiscard(blk, range[0], range[1]) == 0) {
-            return 0;
+    bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS;
+
+    if (unlikely(!vu_blk_sect_range_ok(vexp, sector, bytes))) {
+        return VIRTIO_BLK_S_IOERR;
+    }
+
+    /*
+     * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
+     * and write zeroes commands if any unknown flag is set.
+     */
+    if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+        return VIRTIO_BLK_S_UNSUPP;
+    }
+
+    if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+        int blk_flags = 0;
+
+        if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
+            blk_flags |= BDRV_REQ_MAY_UNMAP;
         }
-    } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
-        if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) {
-            return 0;
+
+        if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS,
+                                 bytes, blk_flags) == 0) {
+            return VIRTIO_BLK_S_OK;
+        }
+    } else if (type == VIRTIO_BLK_T_DISCARD) {
+        /*
+         * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
+         * discard commands if the unmap flag is set.
+         */
+        if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+            return VIRTIO_BLK_S_UNSUPP;
+        }
+
+        if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS,
+                            bytes) == 0) {
+            return VIRTIO_BLK_S_OK;
         }
     }
 
-    return -EINVAL;
+    return VIRTIO_BLK_S_IOERR;
 }
 
 static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
@@ -128,6 +209,8 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
     switch (type & ~VIRTIO_BLK_T_BARRIER) {
     case VIRTIO_BLK_T_IN:
     case VIRTIO_BLK_T_OUT: {
+        QEMUIOVector qiov;
+        int64_t offset;
         ssize_t ret = 0;
         bool is_write = type & VIRTIO_BLK_T_OUT;
         req->sector_num = le64_to_cpu(req->out.sector);
@@ -137,13 +220,24 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
             break;
         }
 
-        int64_t offset = req->sector_num * vexp->blk_size;
-        QEMUIOVector qiov;
         if (is_write) {
             qemu_iovec_init_external(&qiov, out_iov, out_num);
-            ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
         } else {
             qemu_iovec_init_external(&qiov, in_iov, in_num);
+        }
+
+        if (unlikely(!vu_blk_sect_range_ok(vexp,
+                                           req->sector_num,
+                                           qiov.size))) {
+            req->in->status = VIRTIO_BLK_S_IOERR;
+            break;
+        }
+
+        offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS;
+
+        if (is_write) {
+            ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
+        } else {
             ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
         }
         if (ret >= 0) {
@@ -170,19 +264,13 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
     }
     case VIRTIO_BLK_T_DISCARD:
     case VIRTIO_BLK_T_WRITE_ZEROES: {
-        int rc;
-
         if (!vexp->writable) {
             req->in->status = VIRTIO_BLK_S_IOERR;
             break;
         }
 
-        rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type);
-        if (rc == 0) {
-            req->in->status = VIRTIO_BLK_S_OK;
-        } else {
-            req->in->status = VIRTIO_BLK_S_IOERR;
-        }
+        req->in->status = vu_blk_discard_write_zeroes(vexp, out_iov, out_num,
+                                                      type);
         break;
     }
     default:
@@ -347,17 +435,21 @@ vu_blk_initialize_config(BlockDriverState *bs,
                          uint32_t blk_size,
                          uint16_t num_queues)
 {
-    config->capacity = cpu_to_le64(bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+    config->capacity =
+        cpu_to_le64(bdrv_getlength(bs) >> VIRTIO_BLK_SECTOR_BITS);
     config->blk_size = cpu_to_le32(blk_size);
     config->size_max = cpu_to_le32(0);
     config->seg_max = cpu_to_le32(128 - 2);
     config->min_io_size = cpu_to_le16(1);
     config->opt_io_size = cpu_to_le32(1);
     config->num_queues = cpu_to_le16(num_queues);
-    config->max_discard_sectors = cpu_to_le32(32768);
+    config->max_discard_sectors =
+        cpu_to_le32(VHOST_USER_BLK_MAX_DISCARD_SECTORS);
     config->max_discard_seg = cpu_to_le32(1);
-    config->discard_sector_alignment = cpu_to_le32(config->blk_size >> 9);
-    config->max_write_zeroes_sectors = cpu_to_le32(32768);
+    config->discard_sector_alignment =
+        cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS);
+    config->max_write_zeroes_sectors
+        = cpu_to_le32(VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS);
     config->max_write_zeroes_seg = cpu_to_le32(1);
 }
 
@@ -383,7 +475,7 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
     if (vu_opts->has_logical_block_size) {
         logical_block_size = vu_opts->logical_block_size;
     } else {
-        logical_block_size = BDRV_SECTOR_SIZE;
+        logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
     }
     check_block_size(exp->id, "logical-block-size", logical_block_size,
                      &local_err);
diff --git a/block/meson.build b/block/meson.build
index eeaefe5809..d21990ec95 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -57,7 +57,8 @@ block_ss.add(when: 'CONFIG_QED', if_true: files(
   'qed-table.c',
   'qed.c',
 ))
-block_ss.add(when: [libxml2, 'CONFIG_PARALLELS'], if_true: files('parallels.c'))
+block_ss.add(when: [libxml2, 'CONFIG_PARALLELS'],
+             if_true: files('parallels.c', 'parallels-ext.c'))
 block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c'))
 block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
 block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c'))
diff --git a/block/parallels-ext.c b/block/parallels-ext.c
new file mode 100644
index 0000000000..e0dd0975c6
--- /dev/null
+++ b/block/parallels-ext.c
@@ -0,0 +1,300 @@
+/*
+ * Support of Parallels Format Extension. It's a part of Parallels format
+ * driver.
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "parallels.h"
+#include "crypto/hash.h"
+#include "qemu/uuid.h"
+
+#define PARALLELS_FORMAT_EXTENSION_MAGIC 0xAB234CEF23DCEA87ULL
+
+#define PARALLELS_END_OF_FEATURES_MAGIC 0x0ULL
+#define PARALLELS_DIRTY_BITMAP_FEATURE_MAGIC 0x20385FAE252CB34AULL
+
+typedef struct ParallelsFormatExtensionHeader {
+    uint64_t magic; /* PARALLELS_FORMAT_EXTENSION_MAGIC */
+    uint8_t check_sum[16];
+} QEMU_PACKED ParallelsFormatExtensionHeader;
+
+typedef struct ParallelsFeatureHeader {
+    uint64_t magic;
+    uint64_t flags;
+    uint32_t data_size;
+    uint32_t _unused;
+} QEMU_PACKED ParallelsFeatureHeader;
+
+typedef struct ParallelsDirtyBitmapFeature {
+    uint64_t size;
+    uint8_t id[16];
+    uint32_t granularity;
+    uint32_t l1_size;
+    /* L1 table follows */
+} QEMU_PACKED ParallelsDirtyBitmapFeature;
+
+/* Given L1 table read bitmap data from the image and populate @bitmap */
+static int parallels_load_bitmap_data(BlockDriverState *bs,
+                                      const uint64_t *l1_table,
+                                      uint32_t l1_size,
+                                      BdrvDirtyBitmap *bitmap,
+                                      Error **errp)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int ret = 0;
+    uint64_t offset, limit;
+    uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
+    uint8_t *buf = NULL;
+    uint64_t i, tab_size =
+        DIV_ROUND_UP(bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size),
+                     s->cluster_size);
+
+    if (tab_size != l1_size) {
+        error_setg(errp, "Bitmap table size %" PRIu32 " does not correspond "
+                   "to bitmap size and cluster size. Expected %" PRIu64,
+                   l1_size, tab_size);
+        return -EINVAL;
+    }
+
+    buf = qemu_blockalign(bs, s->cluster_size);
+    limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap);
+    for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) {
+        uint64_t count = MIN(bm_size - offset, limit);
+        uint64_t entry = l1_table[i];
+
+        if (entry == 0) {
+            /* No need to deserialize zeros because @bitmap is cleared. */
+            continue;
+        }
+
+        if (entry == 1) {
+            bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count, false);
+        } else {
+            ret = bdrv_pread(bs->file, entry << BDRV_SECTOR_BITS, buf,
+                             s->cluster_size);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret,
+                                 "Failed to read bitmap data cluster");
+                goto finish;
+            }
+            bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count,
+                                               false);
+        }
+    }
+    ret = 0;
+
+    bdrv_dirty_bitmap_deserialize_finish(bitmap);
+
+finish:
+    qemu_vfree(buf);
+
+    return ret;
+}
+
+/*
+ * @data buffer (of @data_size size) is the Dirty bitmaps feature which
+ * consists of ParallelsDirtyBitmapFeature followed by L1 table.
+ */
+static BdrvDirtyBitmap *parallels_load_bitmap(BlockDriverState *bs,
+                                              uint8_t *data,
+                                              size_t data_size,
+                                              Error **errp)
+{
+    int ret;
+    ParallelsDirtyBitmapFeature bf;
+    g_autofree uint64_t *l1_table = NULL;
+    BdrvDirtyBitmap *bitmap;
+    QemuUUID uuid;
+    char uuidstr[UUID_FMT_LEN + 1];
+    int i;
+
+    if (data_size < sizeof(bf)) {
+        error_setg(errp, "Too small Bitmap Feature area in Parallels Format "
+                   "Extension: %zu bytes, expected at least %zu bytes",
+                   data_size, sizeof(bf));
+        return NULL;
+    }
+    memcpy(&bf, data, sizeof(bf));
+    bf.size = le64_to_cpu(bf.size);
+    bf.granularity = le32_to_cpu(bf.granularity) << BDRV_SECTOR_BITS;
+    bf.l1_size = le32_to_cpu(bf.l1_size);
+    data += sizeof(bf);
+    data_size -= sizeof(bf);
+
+    if (bf.size != bs->total_sectors) {
+        error_setg(errp, "Bitmap size (in sectors) %" PRId64 " differs from "
+                   "disk size in sectors %" PRId64, bf.size, bs->total_sectors);
+        return NULL;
+    }
+
+    if (bf.l1_size * sizeof(uint64_t) > data_size) {
+        error_setg(errp, "Bitmaps feature corrupted: l1 table exceeds "
+                   "extension data_size");
+        return NULL;
+    }
+
+    memcpy(&uuid, bf.id, sizeof(uuid));
+    qemu_uuid_unparse(&uuid, uuidstr);
+    bitmap = bdrv_create_dirty_bitmap(bs, bf.granularity, uuidstr, errp);
+    if (!bitmap) {
+        return NULL;
+    }
+
+    l1_table = g_new(uint64_t, bf.l1_size);
+    for (i = 0; i < bf.l1_size; i++, data += sizeof(uint64_t)) {
+        l1_table[i] = ldq_le_p(data);
+    }
+
+    ret = parallels_load_bitmap_data(bs, l1_table, bf.l1_size, bitmap, errp);
+    if (ret < 0) {
+        bdrv_release_dirty_bitmap(bitmap);
+        return NULL;
+    }
+
+    /* We support format extension only for RO parallels images. */
+    assert(!(bs->open_flags & BDRV_O_RDWR));
+    bdrv_dirty_bitmap_set_readonly(bitmap, true);
+
+    return bitmap;
+}
+
+static int parallels_parse_format_extension(BlockDriverState *bs,
+                                            uint8_t *ext_cluster, Error **errp)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int ret;
+    int remaining = s->cluster_size;
+    uint8_t *pos = ext_cluster;
+    ParallelsFormatExtensionHeader eh;
+    g_autofree uint8_t *hash = NULL;
+    size_t hash_len = 0;
+    GSList *bitmaps = NULL, *el;
+
+    memcpy(&eh, pos, sizeof(eh));
+    eh.magic = le64_to_cpu(eh.magic);
+    pos += sizeof(eh);
+    remaining -= sizeof(eh);
+
+    if (eh.magic != PARALLELS_FORMAT_EXTENSION_MAGIC) {
+        error_setg(errp, "Wrong parallels Format Extension magic: 0x%" PRIx64
+                   ", expected: 0x%llx", eh.magic,
+                   PARALLELS_FORMAT_EXTENSION_MAGIC);
+        goto fail;
+    }
+
+    ret = qcrypto_hash_bytes(QCRYPTO_HASH_ALG_MD5, (char *)pos, remaining,
+                             &hash, &hash_len, errp);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    if (hash_len != sizeof(eh.check_sum) ||
+        memcmp(hash, eh.check_sum, sizeof(eh.check_sum)) != 0) {
+        error_setg(errp, "Wrong checksum in Format Extension header. Format "
+                   "extension is corrupted.");
+        goto fail;
+    }
+
+    while (true) {
+        ParallelsFeatureHeader fh;
+        BdrvDirtyBitmap *bitmap;
+
+        if (remaining < sizeof(fh)) {
+            error_setg(errp, "Can not read feature header, as remaining bytes "
+                       "(%d) in Format Extension is less than Feature header "
+                       "size (%zu)", remaining, sizeof(fh));
+            goto fail;
+        }
+
+        memcpy(&fh, pos, sizeof(fh));
+        pos += sizeof(fh);
+        remaining -= sizeof(fh);
+
+        fh.magic = le64_to_cpu(fh.magic);
+        fh.flags = le64_to_cpu(fh.flags);
+        fh.data_size = le32_to_cpu(fh.data_size);
+
+        if (fh.flags) {
+            error_setg(errp, "Flags for extension feature are unsupported");
+            goto fail;
+        }
+
+        if (fh.data_size > remaining) {
+            error_setg(errp, "Feature data_size exceedes Format Extension "
+                       "cluster");
+            goto fail;
+        }
+
+        switch (fh.magic) {
+        case PARALLELS_END_OF_FEATURES_MAGIC:
+            return 0;
+
+        case PARALLELS_DIRTY_BITMAP_FEATURE_MAGIC:
+            bitmap = parallels_load_bitmap(bs, pos, fh.data_size, errp);
+            if (!bitmap) {
+                goto fail;
+            }
+            bitmaps = g_slist_append(bitmaps, bitmap);
+            break;
+
+        default:
+            error_setg(errp, "Unknown feature: 0x%" PRIu64, fh.magic);
+            goto fail;
+        }
+
+        pos = ext_cluster + QEMU_ALIGN_UP(pos + fh.data_size - ext_cluster, 8);
+    }
+
+fail:
+    for (el = bitmaps; el; el = el->next) {
+        bdrv_release_dirty_bitmap(el->data);
+    }
+    g_slist_free(bitmaps);
+
+    return -EINVAL;
+}
+
+int parallels_read_format_extension(BlockDriverState *bs,
+                                    int64_t ext_off, Error **errp)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int ret;
+    uint8_t *ext_cluster = qemu_blockalign(bs, s->cluster_size);
+
+    assert(ext_off > 0);
+
+    ret = bdrv_pread(bs->file, ext_off, ext_cluster, s->cluster_size);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to read Format Extension cluster");
+        goto out;
+    }
+
+    ret = parallels_parse_format_extension(bs, ext_cluster, errp);
+
+out:
+    qemu_vfree(ext_cluster);
+
+    return ret;
+}
diff --git a/block/parallels.c b/block/parallels.c
index 3c22dfdc9d..6ebad2a2bb 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -29,6 +29,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
@@ -421,7 +422,6 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs,
     int ret;
     uint32_t i;
     bool flush_bat = false;
-    int cluster_size = s->tracks << BDRV_SECTOR_BITS;
 
     size = bdrv_getlength(bs->file->bs);
     if (size < 0) {
@@ -472,7 +472,7 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs,
             high_off = off;
         }
 
-        if (prev_off != 0 && (prev_off + cluster_size) != off) {
+        if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
             res->bfi.fragmented_clusters++;
         }
         prev_off = off;
@@ -487,10 +487,10 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs,
         }
     }
 
-    res->image_end_offset = high_off + cluster_size;
+    res->image_end_offset = high_off + s->cluster_size;
     if (size > res->image_end_offset) {
         int64_t count;
-        count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size);
+        count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
         fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
                 fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
                 size - res->image_end_offset);
@@ -771,6 +771,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
         ret = -EFBIG;
         goto fail;
     }
+    s->cluster_size = s->tracks << BDRV_SECTOR_BITS;
 
     s->bat_size = le32_to_cpu(ph.bat_entries);
     if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
@@ -843,6 +844,23 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail_options;
     }
 
+    if (ph.ext_off) {
+        if (flags & BDRV_O_RDWR) {
+            /*
+             * It's unsafe to open image RW if there is an extension (as we
+             * don't support it). But parallels driver in QEMU historically
+             * ignores the extension, so print warning and don't care.
+             */
+            warn_report("Format Extension ignored in RW mode");
+        } else {
+            ret = parallels_read_format_extension(
+                    bs, le64_to_cpu(ph.ext_off) << BDRV_SECTOR_BITS, errp);
+            if (ret < 0) {
+                goto fail;
+            }
+        }
+    }
+
     if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
         s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
         ret = parallels_update_header(bs);
diff --git a/block/parallels.h b/block/parallels.h
index 5aa101cfc8..f22f43f988 100644
--- a/block/parallels.h
+++ b/block/parallels.h
@@ -48,7 +48,8 @@ typedef struct ParallelsHeader {
     uint64_t nb_sectors;
     uint32_t inuse;
     uint32_t data_off;
-    char padding[12];
+    uint32_t flags;
+    uint64_t ext_off;
 } QEMU_PACKED ParallelsHeader;
 
 typedef enum ParallelsPreallocMode {
@@ -79,9 +80,13 @@ typedef struct BDRVParallelsState {
     ParallelsPreallocMode prealloc_mode;
 
     unsigned int tracks;
+    unsigned int cluster_size;
 
     unsigned int off_multiplier;
     Error *migration_blocker;
 } BDRVParallelsState;
 
+int parallels_read_format_extension(BlockDriverState *bs,
+                                    int64_t ext_off, Error **errp);
+
 #endif
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 5eef82fa55..42d81c44cd 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -278,18 +278,6 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
     return 0;
 }
 
-/* Return the disk size covered by a single qcow2 cluster of bitmap data. */
-static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s,
-                                                const BdrvDirtyBitmap *bitmap)
-{
-    uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
-    uint64_t limit = granularity * (s->cluster_size << 3);
-
-    assert(QEMU_IS_ALIGNED(limit,
-                           bdrv_dirty_bitmap_serialization_align(bitmap)));
-    return limit;
-}
-
 /* load_bitmap_data
  * @bitmap_table entries must satisfy specification constraints.
  * @bitmap must be cleared */
@@ -312,7 +300,7 @@ static int load_bitmap_data(BlockDriverState *bs,
     }
 
     buf = g_malloc(s->cluster_size);
-    limit = bytes_covered_by_bitmap_cluster(s, bitmap);
+    limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap);
     for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) {
         uint64_t count = MIN(bm_size - offset, limit);
         uint64_t entry = bitmap_table[i];
@@ -1303,7 +1291,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
     }
 
     buf = g_malloc(s->cluster_size);
-    limit = bytes_covered_by_bitmap_cluster(s, bitmap);
+    limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap);
     assert(DIV_ROUND_UP(bm_size, limit) == tb_size);
 
     offset = 0;
diff --git a/blockdev.c b/blockdev.c
index cd438e60e3..7c7ab2b386 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1515,13 +1515,13 @@ static void external_snapshot_prepare(BlkActionState *common,
             s->has_snapshot_node_name ? s->snapshot_node_name : NULL;
 
         if (node_name && !snapshot_node_name) {
-            error_setg(errp, "New overlay node name missing");
+            error_setg(errp, "New overlay node-name missing");
             goto out;
         }
 
         if (snapshot_node_name &&
             bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) {
-            error_setg(errp, "New overlay node name already in use");
+            error_setg(errp, "New overlay node-name already in use");
             goto out;
         }
 
@@ -3598,13 +3598,14 @@ void qmp_x_blockdev_reopen(BlockdevOptions *options, Error **errp)
 
     /* Check for the selected node name */
     if (!options->has_node_name) {
-        error_setg(errp, "Node name not specified");
+        error_setg(errp, "node-name not specified");
         goto fail;
     }
 
     bs = bdrv_find_node(options->node_name);
     if (!bs) {
-        error_setg(errp, "Cannot find node named '%s'", options->node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'",
+                   options->node_name);
         goto fail;
     }
 
@@ -3635,7 +3636,7 @@ void qmp_blockdev_del(const char *node_name, Error **errp)
 
     bs = bdrv_find_node(node_name);
     if (!bs) {
-        error_setg(errp, "Cannot find node %s", node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
         return;
     }
     if (bdrv_has_blk(bs)) {
@@ -3758,7 +3759,7 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
 
     bs = bdrv_find_node(node_name);
     if (!bs) {
-        error_setg(errp, "Cannot find node %s", node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
         return;
     }
 
diff --git a/blockjob.c b/blockjob.c
index f2feff051d..ef968017a2 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -318,8 +318,12 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
     info->status    = job->job.status;
     info->auto_finalize = job->job.auto_finalize;
     info->auto_dismiss  = job->job.auto_dismiss;
-    info->has_error = job->job.ret != 0;
-    info->error     = job->job.ret ? g_strdup(strerror(-job->job.ret)) : NULL;
+    if (job->job.ret) {
+        info->has_error = true;
+        info->error = job->job.err ?
+                        g_strdup(error_get_pretty(job->job.err)) :
+                        g_strdup(strerror(-job->job.ret));
+    }
     return info;
 }
 
@@ -356,7 +360,7 @@ static void block_job_event_completed(Notifier *n, void *opaque)
     }
 
     if (job->job.ret < 0) {
-        msg = strerror(-job->job.ret);
+        msg = error_get_pretty(job->job.err);
     }
 
     qapi_event_send_block_job_completed(job_type(&job->job),
diff --git a/docs/interop/parallels.txt b/docs/interop/parallels.txt
index f15bf35bd1..bb3fadf369 100644
--- a/docs/interop/parallels.txt
+++ b/docs/interop/parallels.txt
@@ -208,21 +208,25 @@ of its data area are:
   28 - 31:    l1_size
               The number of entries in the L1 table of the bitmap.
 
-  variable:   l1_table (8 * l1_size bytes)
-              L1 offset table (in bytes)
+  variable:   L1 offset table (l1_table), size: 8 * l1_size bytes
 
-A dirty bitmap is stored using a one-level structure for the mapping to host
-clusters - an L1 table.
+The dirty bitmap described by this feature extension is stored in a set of
+clusters inside the Parallels image file. The offsets of these clusters are
+saved in the L1 offset table specified by the feature extension. Each L1 table
+entry is a 64 bit integer as described below:
 
-Given an offset in bytes into the bitmap data, the offset in bytes into the
-image file can be obtained as follows:
+Given an offset in bytes into the bitmap data, corresponding L1 entry is
 
-    offset = l1_table[offset / cluster_size] + (offset % cluster_size)
+    l1_table[offset / cluster_size]
 
-If an L1 table entry is 0, the corresponding cluster of the bitmap is assumed
-to be zero.
+If an L1 table entry is 0, all bits in the corresponding cluster of the bitmap
+are assumed to be 0.
 
-If an L1 table entry is 1, the corresponding cluster of the bitmap is assumed
-to have all bits set.
+If an L1 table entry is 1, all bits in the corresponding cluster of the bitmap
+are assumed to be 1.
 
-If an L1 table entry is not 0 or 1, it allocates a cluster from the data area.
+If an L1 table entry is not 0 or 1, it contains the corresponding cluster
+offset (in 512b sectors). Given an offset in bytes into the bitmap data the
+offset in bytes into the image file can be obtained as follows:
+
+    offset = l1_table[offset / cluster_size] * 512 + (offset % cluster_size)
diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst
index c05b3d3811..086493ebb3 100644
--- a/docs/tools/qemu-storage-daemon.rst
+++ b/docs/tools/qemu-storage-daemon.rst
@@ -69,7 +69,7 @@ Standard options:
   a description of character device properties. A common character device
   definition configures a UNIX domain socket::
 
-  --chardev socket,id=char1,path=/tmp/qmp.sock,server=on,wait=off
+  --chardev socket,id=char1,path=/var/run/qsd-qmp.sock,server=on,wait=off
 
 .. option:: --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>][,writable=on|off][,bitmap=<name>]
   --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=unix,addr.path=<socket-path>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>]
@@ -80,8 +80,9 @@ Standard options:
   requests for modifying data (the default is off).
 
   The ``nbd`` export type requires ``--nbd-server`` (see below). ``name`` is
-  the NBD export name. ``bitmap`` is the name of a dirty bitmap reachable from
-  the block node, so the NBD client can use NBD_OPT_SET_META_CONTEXT with the
+  the NBD export name (if not specified, it defaults to the given
+  ``node-name``). ``bitmap`` is the name of a dirty bitmap reachable from the
+  block node, so the NBD client can use NBD_OPT_SET_META_CONTEXT with the
   metadata context name "qemu:dirty-bitmap:BITMAP" to inspect the bitmap.
 
   The ``vhost-user-blk`` export type takes a vhost-user socket address on which
@@ -101,14 +102,17 @@ Standard options:
 
 .. option:: --nbd-server addr.type=inet,addr.host=<host>,addr.port=<port>[,tls-creds=<id>][,tls-authz=<id>][,max-connections=<n>]
   --nbd-server addr.type=unix,addr.path=<path>[,tls-creds=<id>][,tls-authz=<id>][,max-connections=<n>]
+  --nbd-server addr.type=fd,addr.str=<fd>[,tls-creds=<id>][,tls-authz=<id>][,max-connections=<n>]
 
   is a server for NBD exports. Both TCP and UNIX domain sockets are supported.
-  TLS encryption can be configured using ``--object`` tls-creds-* and authz-*
-  secrets (see below).
+  A listen socket can be provided via file descriptor passing (see Examples
+  below). TLS encryption can be configured using ``--object`` tls-creds-* and
+  authz-* secrets (see below).
 
-  To configure an NBD server on UNIX domain socket path ``/tmp/nbd.sock``::
+  To configure an NBD server on UNIX domain socket path
+  ``/var/run/qsd-nbd.sock``::
 
-  --nbd-server addr.type=unix,addr.path=/tmp/nbd.sock
+  --nbd-server addr.type=unix,addr.path=/var/run/qsd-nbd.sock
 
 .. option:: --object help
   --object <type>,help
@@ -118,6 +122,20 @@ Standard options:
   List object properties with ``<type>,help``. See the :manpage:`qemu(1)`
   manual page for a description of the object properties.
 
+.. option:: --pidfile PATH
+
+  is the path to a file where the daemon writes its pid. This allows scripts to
+  stop the daemon by sending a signal::
+
+    $ kill -SIGTERM $(<path/to/qsd.pid)
+
+  A file lock is applied to the file so only one instance of the daemon can run
+  with a given pid file path. The daemon unlinks its pid file when terminating.
+
+  The pid file is written after chardevs, exports, and NBD servers have been
+  created but before accepting connections. The daemon has started successfully
+  when the pid file is written and clients may begin connecting.
+
 Examples
 --------
 Launch the daemon with QMP monitor socket ``qmp.sock`` so clients can execute
@@ -127,6 +145,42 @@ QMP commands::
       --chardev socket,path=qmp.sock,server=on,wait=off,id=char1 \
       --monitor chardev=char1
 
+Launch the daemon from Python with a QMP monitor socket using file descriptor
+passing so there is no need to busy wait for the QMP monitor to become
+available::
+
+  #!/usr/bin/env python3
+  import subprocess
+  import socket
+
+  sock_path = '/var/run/qmp.sock'
+
+  with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as listen_sock:
+      listen_sock.bind(sock_path)
+      listen_sock.listen()
+
+      fd = listen_sock.fileno()
+
+      subprocess.Popen(
+          ['qemu-storage-daemon',
+           '--chardev', f'socket,fd={fd},server=on,id=char1',
+           '--monitor', 'chardev=char1'],
+          pass_fds=[fd],
+      )
+
+  # listen_sock was automatically closed when leaving the 'with' statement
+  # body. If the daemon process terminated early then the following connect()
+  # will fail with "Connection refused" because no process has the listen
+  # socket open anymore. Launch errors can be detected this way.
+
+  qmp_sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+  qmp_sock.connect(sock_path)
+  ...QMP interaction...
+
+The same socket spawning approach also works with the ``--nbd-server
+addr.type=fd,addr.str=<fd>`` and ``--export
+type=vhost-user-blk,addr.type=fd,addr.str=<fd>`` options.
+
 Export raw image file ``disk.img`` over NBD UNIX domain socket ``nbd.sock``::
 
   $ qemu-storage-daemon \
diff --git a/hw/block/Kconfig b/hw/block/Kconfig
index 2d17f481ad..4fcd152166 100644
--- a/hw/block/Kconfig
+++ b/hw/block/Kconfig
@@ -22,6 +22,9 @@ config ECC
 config ONENAND
     bool
 
+config TC58128
+    bool
+
 config NVME_PCI
     bool
     default y if PCI_DEVICES
diff --git a/hw/block/meson.build b/hw/block/meson.build
index 602ca6c854..4bf994c64f 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -12,7 +12,7 @@ softmmu_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c'))
 softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
 softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
-softmmu_ss.add(when: 'CONFIG_SH4', if_true: files('tc58128.c'))
+softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c'))
 softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
diff --git a/hw/block/tc58128.c b/hw/block/tc58128.c
index 9888f01ac6..bfc27ad899 100644
--- a/hw/block/tc58128.c
+++ b/hw/block/tc58128.c
@@ -1,3 +1,29 @@
+/*
+ * TC58128 NAND EEPROM emulation
+ *
+ * Copyright (c) 2005 Samuel Tardieu
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * SPDX-License-Identifier: MIT
+ */
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "hw/sh4/sh.h"
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index da4fbf9084..b870a50e6b 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -54,6 +54,9 @@ static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
 
+    /* Our num_queues overrides the device backend */
+    virtio_stw_p(vdev, &s->blkcfg.num_queues, s->num_queues);
+
     memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config));
 }
 
@@ -491,10 +494,6 @@ reconnect:
         goto reconnect;
     }
 
-    if (s->blkcfg.num_queues != s->num_queues) {
-        s->blkcfg.num_queues = s->num_queues;
-    }
-
     return;
 
 virtio_err:
diff --git a/hw/char/Kconfig b/hw/char/Kconfig
index 939bc44758..f6f4fffd1b 100644
--- a/hw/char/Kconfig
+++ b/hw/char/Kconfig
@@ -50,6 +50,9 @@ config SCLPCONSOLE
 config TERMINAL3270
     bool
 
+config SH_SCI
+    bool
+
 config RENESAS_SCI
     bool
 
diff --git a/hw/char/meson.build b/hw/char/meson.build
index 196ac91fa2..afe9a0af88 100644
--- a/hw/char/meson.build
+++ b/hw/char/meson.build
@@ -31,7 +31,7 @@ softmmu_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_uart.c'))
 softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_aux.c'))
 softmmu_ss.add(when: 'CONFIG_RENESAS_SCI', if_true: files('renesas_sci.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_UART', if_true: files('sifive_uart.c'))
-softmmu_ss.add(when: 'CONFIG_SH4', if_true: files('sh_serial.c'))
+softmmu_ss.add(when: 'CONFIG_SH_SCI', if_true: files('sh_serial.c'))
 softmmu_ss.add(when: 'CONFIG_STM32F2XX_USART', if_true: files('stm32f2xx_usart.c'))
 softmmu_ss.add(when: 'CONFIG_MCHP_PFSOC_MMUART', if_true: files('mchp_pfsoc_mmuart.c'))
 
diff --git a/hw/dma/sparc32_dma.c b/hw/dma/sparc32_dma.c
index b643b413c5..03bc500878 100644
--- a/hw/dma/sparc32_dma.c
+++ b/hw/dma/sparc32_dma.c
@@ -295,13 +295,13 @@ static void sparc32_espdma_device_init(Object *obj)
     memory_region_init_io(&s->iomem, OBJECT(s), &dma_mem_ops, s,
                           "espdma-mmio", DMA_SIZE);
 
-    object_initialize_child(obj, "esp", &es->esp, TYPE_ESP);
+    object_initialize_child(obj, "esp", &es->esp, TYPE_SYSBUS_ESP);
 }
 
 static void sparc32_espdma_device_realize(DeviceState *dev, Error **errp)
 {
     ESPDMADeviceState *es = SPARC32_ESPDMA_DEVICE(dev);
-    SysBusESPState *sysbus = ESP(&es->esp);
+    SysBusESPState *sysbus = SYSBUS_ESP(&es->esp);
     ESPState *esp = &sysbus->esp;
 
     esp->dma_memory_read = espdma_memory_read;
diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig
index c18d11142a..66bf0b90b4 100644
--- a/hw/intc/Kconfig
+++ b/hw/intc/Kconfig
@@ -53,6 +53,9 @@ config OMPIC
 config PPC_UIC
     bool
 
+config SH_INTC
+    bool
+
 config RX_ICU
     bool
 
diff --git a/hw/intc/meson.build b/hw/intc/meson.build
index 53cba11569..b3d9345a0d 100644
--- a/hw/intc/meson.build
+++ b/hw/intc/meson.build
@@ -47,7 +47,7 @@ specific_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_ic.c', 'bcm2836_co
 specific_ss.add(when: 'CONFIG_RX_ICU', if_true: files('rx_icu.c'))
 specific_ss.add(when: 'CONFIG_S390_FLIC', if_true: files('s390_flic.c'))
 specific_ss.add(when: 'CONFIG_S390_FLIC_KVM', if_true: files('s390_flic_kvm.c'))
-specific_ss.add(when: 'CONFIG_SH4', if_true: files('sh_intc.c'))
+specific_ss.add(when: 'CONFIG_SH_INTC', if_true: files('sh_intc.c'))
 specific_ss.add(when: 'CONFIG_SIFIVE_CLINT', if_true: files('sifive_clint.c'))
 specific_ss.add(when: 'CONFIG_SIFIVE_PLIC', if_true: files('sifive_plic.c'))
 specific_ss.add(when: 'CONFIG_XICS', if_true: files('xics.c'))
diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index d4eca46767..4d2e866eec 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -350,8 +350,8 @@ static void q800_init(MachineState *machine)
 
     /* SCSI */
 
-    dev = qdev_new(TYPE_ESP);
-    sysbus_esp = ESP(dev);
+    dev = qdev_new(TYPE_SYSBUS_ESP);
+    sysbus_esp = SYSBUS_ESP(dev);
     esp = &sysbus_esp->esp;
     esp->dma_memory_read = NULL;
     esp->dma_memory_write = NULL;
diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index 83c8086062..1a0888a0fd 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -328,8 +328,8 @@ static void mips_jazz_init(MachineState *machine,
     }
 
     /* SCSI adapter */
-    dev = qdev_new(TYPE_ESP);
-    sysbus_esp = ESP(dev);
+    dev = qdev_new(TYPE_SYSBUS_ESP);
+    sysbus_esp = SYSBUS_ESP(dev);
     esp = &sysbus_esp->esp;
     esp->dma_memory_read = rc4030_dma_read;
     esp->dma_memory_write = rc4030_dma_write;
diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig
index 8b8c763c28..2ccc96f02c 100644
--- a/hw/pci-host/Kconfig
+++ b/hw/pci-host/Kconfig
@@ -68,3 +68,7 @@ config PCI_POWERNV
 
 config REMOTE_PCIHOST
     bool
+
+config SH_PCI
+    bool
+    select PCI
diff --git a/hw/pci-host/meson.build b/hw/pci-host/meson.build
index 1847c69905..87a896973e 100644
--- a/hw/pci-host/meson.build
+++ b/hw/pci-host/meson.build
@@ -10,6 +10,7 @@ pci_ss.add(when: 'CONFIG_PCI_I440FX', if_true: files('i440fx.c'))
 pci_ss.add(when: 'CONFIG_PCI_SABRE', if_true: files('sabre.c'))
 pci_ss.add(when: 'CONFIG_XEN_IGD_PASSTHROUGH', if_true: files('xen_igd_pt.c'))
 pci_ss.add(when: 'CONFIG_REMOTE_PCIHOST', if_true: files('remote.c'))
+pci_ss.add(when: 'CONFIG_SH_PCI', if_true: files('sh_pci.c'))
 
 # PPC devices
 pci_ss.add(when: 'CONFIG_PREP_PCI', if_true: files('prep.c'))
diff --git a/hw/sh4/sh_pci.c b/hw/pci-host/sh_pci.c
index 734892f47c..734892f47c 100644
--- a/hw/sh4/sh_pci.c
+++ b/hw/pci-host/sh_pci.c
diff --git a/hw/scsi/esp-pci.c b/hw/scsi/esp-pci.c
index 4d7c2cab56..c3d3dab05e 100644
--- a/hw/scsi/esp-pci.c
+++ b/hw/scsi/esp-pci.c
@@ -79,8 +79,10 @@ struct PCIESPState {
 
 static void esp_pci_handle_idle(PCIESPState *pci, uint32_t val)
 {
+    ESPState *s = ESP(&pci->esp);
+
     trace_esp_pci_dma_idle(val);
-    esp_dma_enable(&pci->esp, 0, 0);
+    esp_dma_enable(s, 0, 0);
 }
 
 static void esp_pci_handle_blast(PCIESPState *pci, uint32_t val)
@@ -91,14 +93,18 @@ static void esp_pci_handle_blast(PCIESPState *pci, uint32_t val)
 
 static void esp_pci_handle_abort(PCIESPState *pci, uint32_t val)
 {
+    ESPState *s = ESP(&pci->esp);
+
     trace_esp_pci_dma_abort(val);
-    if (pci->esp.current_req) {
-        scsi_req_cancel(pci->esp.current_req);
+    if (s->current_req) {
+        scsi_req_cancel(s->current_req);
     }
 }
 
 static void esp_pci_handle_start(PCIESPState *pci, uint32_t val)
 {
+    ESPState *s = ESP(&pci->esp);
+
     trace_esp_pci_dma_start(val);
 
     pci->dma_regs[DMA_WBC] = pci->dma_regs[DMA_STC];
@@ -109,7 +115,7 @@ static void esp_pci_handle_start(PCIESPState *pci, uint32_t val)
                                | DMA_STAT_DONE | DMA_STAT_ABORT
                                | DMA_STAT_ERROR | DMA_STAT_PWDN);
 
-    esp_dma_enable(&pci->esp, 0, 1);
+    esp_dma_enable(s, 0, 1);
 }
 
 static void esp_pci_dma_write(PCIESPState *pci, uint32_t saddr, uint32_t val)
@@ -155,11 +161,12 @@ static void esp_pci_dma_write(PCIESPState *pci, uint32_t saddr, uint32_t val)
 
 static uint32_t esp_pci_dma_read(PCIESPState *pci, uint32_t saddr)
 {
+    ESPState *s = ESP(&pci->esp);
     uint32_t val;
 
     val = pci->dma_regs[saddr];
     if (saddr == DMA_STAT) {
-        if (pci->esp.rregs[ESP_RSTAT] & STAT_INT) {
+        if (s->rregs[ESP_RSTAT] & STAT_INT) {
             val |= DMA_STAT_SCSIINT;
         }
         if (!(pci->sbac & SBAC_STATUS)) {
@@ -176,6 +183,7 @@ static void esp_pci_io_write(void *opaque, hwaddr addr,
                              uint64_t val, unsigned int size)
 {
     PCIESPState *pci = opaque;
+    ESPState *s = ESP(&pci->esp);
 
     if (size < 4 || addr & 3) {
         /* need to upgrade request: we only support 4-bytes accesses */
@@ -183,7 +191,7 @@ static void esp_pci_io_write(void *opaque, hwaddr addr,
         int shift;
 
         if (addr < 0x40) {
-            current = pci->esp.wregs[addr >> 2];
+            current = s->wregs[addr >> 2];
         } else if (addr < 0x60) {
             current = pci->dma_regs[(addr - 0x40) >> 2];
         } else if (addr < 0x74) {
@@ -203,7 +211,7 @@ static void esp_pci_io_write(void *opaque, hwaddr addr,
 
     if (addr < 0x40) {
         /* SCSI core reg */
-        esp_reg_write(&pci->esp, addr >> 2, val);
+        esp_reg_write(s, addr >> 2, val);
     } else if (addr < 0x60) {
         /* PCI DMA CCB */
         esp_pci_dma_write(pci, (addr - 0x40) >> 2, val);
@@ -220,11 +228,12 @@ static uint64_t esp_pci_io_read(void *opaque, hwaddr addr,
                                 unsigned int size)
 {
     PCIESPState *pci = opaque;
+    ESPState *s = ESP(&pci->esp);
     uint32_t ret;
 
     if (addr < 0x40) {
         /* SCSI core reg */
-        ret = esp_reg_read(&pci->esp, addr >> 2);
+        ret = esp_reg_read(s, addr >> 2);
     } else if (addr < 0x60) {
         /* PCI DMA CCB */
         ret = esp_pci_dma_read(pci, (addr - 0x40) >> 2);
@@ -306,7 +315,9 @@ static const MemoryRegionOps esp_pci_io_ops = {
 static void esp_pci_hard_reset(DeviceState *dev)
 {
     PCIESPState *pci = PCI_ESP(dev);
-    esp_hard_reset(&pci->esp);
+    ESPState *s = ESP(&pci->esp);
+
+    esp_hard_reset(s);
     pci->dma_regs[DMA_CMD] &= ~(DMA_CMD_DIR | DMA_CMD_INTE_D | DMA_CMD_INTE_P
                               | DMA_CMD_MDL | DMA_CMD_DIAG | DMA_CMD_MASK);
     pci->dma_regs[DMA_WBC] &= ~0xffff;
@@ -319,11 +330,12 @@ static void esp_pci_hard_reset(DeviceState *dev)
 
 static const VMStateDescription vmstate_esp_pci_scsi = {
     .name = "pciespscsi",
-    .version_id = 1,
+    .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_PCI_DEVICE(parent_obj, PCIESPState),
         VMSTATE_BUFFER_UNSAFE(dma_regs, PCIESPState, 0, 8 * sizeof(uint32_t)),
+        VMSTATE_UINT8_V(esp.mig_version_id, PCIESPState, 2),
         VMSTATE_STRUCT(esp, PCIESPState, 0, vmstate_esp, ESPState),
         VMSTATE_END_OF_LIST()
     }
@@ -353,9 +365,13 @@ static void esp_pci_scsi_realize(PCIDevice *dev, Error **errp)
 {
     PCIESPState *pci = PCI_ESP(dev);
     DeviceState *d = DEVICE(dev);
-    ESPState *s = &pci->esp;
+    ESPState *s = ESP(&pci->esp);
     uint8_t *pci_conf;
 
+    if (!qdev_realize(DEVICE(s), NULL, errp)) {
+        return;
+    }
+
     pci_conf = dev->config;
 
     /* Interrupt pin A */
@@ -374,11 +390,19 @@ static void esp_pci_scsi_realize(PCIDevice *dev, Error **errp)
     scsi_bus_new(&s->bus, sizeof(s->bus), d, &esp_pci_scsi_info, NULL);
 }
 
-static void esp_pci_scsi_uninit(PCIDevice *d)
+static void esp_pci_scsi_exit(PCIDevice *d)
 {
     PCIESPState *pci = PCI_ESP(d);
+    ESPState *s = ESP(&pci->esp);
+
+    qemu_free_irq(s->irq);
+}
+
+static void esp_pci_init(Object *obj)
+{
+    PCIESPState *pci = PCI_ESP(obj);
 
-    qemu_free_irq(pci->esp.irq);
+    object_initialize_child(obj, "esp", &pci->esp, TYPE_ESP);
 }
 
 static void esp_pci_class_init(ObjectClass *klass, void *data)
@@ -387,7 +411,7 @@ static void esp_pci_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     k->realize = esp_pci_scsi_realize;
-    k->exit = esp_pci_scsi_uninit;
+    k->exit = esp_pci_scsi_exit;
     k->vendor_id = PCI_VENDOR_ID_AMD;
     k->device_id = PCI_DEVICE_ID_AMD_SCSI;
     k->revision = 0x10;
@@ -401,6 +425,7 @@ static void esp_pci_class_init(ObjectClass *klass, void *data)
 static const TypeInfo esp_pci_info = {
     .name = TYPE_AM53C974_DEVICE,
     .parent = TYPE_PCI_DEVICE,
+    .instance_init = esp_pci_init,
     .instance_size = sizeof(PCIESPState),
     .class_init = esp_pci_class_init,
     .interfaces = (InterfaceInfo[]) {
diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c
index 93d9c9c7b9..507ab363bc 100644
--- a/hw/scsi/esp.c
+++ b/hw/scsi/esp.c
@@ -63,11 +63,13 @@ static void esp_lower_irq(ESPState *s)
 static void esp_raise_drq(ESPState *s)
 {
     qemu_irq_raise(s->irq_data);
+    trace_esp_raise_drq();
 }
 
 static void esp_lower_drq(ESPState *s)
 {
     qemu_irq_lower(s->irq_data);
+    trace_esp_lower_drq();
 }
 
 void esp_dma_enable(ESPState *s, int irq, int level)
@@ -96,39 +98,112 @@ void esp_request_cancelled(SCSIRequest *req)
     }
 }
 
-static void set_pdma(ESPState *s, enum pdma_origin_id origin,
-                     uint32_t index, uint32_t len)
+static void esp_fifo_push(ESPState *s, uint8_t val)
 {
-    s->pdma_origin = origin;
-    s->pdma_start = index;
-    s->pdma_cur = index;
-    s->pdma_len = len;
+    if (fifo8_num_used(&s->fifo) == ESP_FIFO_SZ) {
+        trace_esp_error_fifo_overrun();
+        return;
+    }
+
+    fifo8_push(&s->fifo, val);
+}
+
+static uint8_t esp_fifo_pop(ESPState *s)
+{
+    if (fifo8_is_empty(&s->fifo)) {
+        return 0;
+    }
+
+    return fifo8_pop(&s->fifo);
+}
+
+static void esp_cmdfifo_push(ESPState *s, uint8_t val)
+{
+    if (fifo8_num_used(&s->cmdfifo) == ESP_CMDFIFO_SZ) {
+        trace_esp_error_fifo_overrun();
+        return;
+    }
+
+    fifo8_push(&s->cmdfifo, val);
+}
+
+static uint8_t esp_cmdfifo_pop(ESPState *s)
+{
+    if (fifo8_is_empty(&s->cmdfifo)) {
+        return 0;
+    }
+
+    return fifo8_pop(&s->cmdfifo);
+}
+
+static uint32_t esp_get_tc(ESPState *s)
+{
+    uint32_t dmalen;
+
+    dmalen = s->rregs[ESP_TCLO];
+    dmalen |= s->rregs[ESP_TCMID] << 8;
+    dmalen |= s->rregs[ESP_TCHI] << 16;
+
+    return dmalen;
+}
+
+static void esp_set_tc(ESPState *s, uint32_t dmalen)
+{
+    s->rregs[ESP_TCLO] = dmalen;
+    s->rregs[ESP_TCMID] = dmalen >> 8;
+    s->rregs[ESP_TCHI] = dmalen >> 16;
+}
+
+static uint32_t esp_get_stc(ESPState *s)
+{
+    uint32_t dmalen;
+
+    dmalen = s->wregs[ESP_TCLO];
+    dmalen |= s->wregs[ESP_TCMID] << 8;
+    dmalen |= s->wregs[ESP_TCHI] << 16;
+
+    return dmalen;
+}
+
+static uint8_t esp_pdma_read(ESPState *s)
+{
+    uint8_t val;
+
+    if (s->do_cmd) {
+        val = esp_cmdfifo_pop(s);
+    } else {
+        val = esp_fifo_pop(s);
+    }
+
+    return val;
 }
 
-static uint8_t *get_pdma_buf(ESPState *s)
+static void esp_pdma_write(ESPState *s, uint8_t val)
 {
-    switch (s->pdma_origin) {
-    case PDMA:
-        return s->pdma_buf;
-    case TI:
-        return s->ti_buf;
-    case CMD:
-        return s->cmdbuf;
-    case ASYNC:
-        return s->async_buf;
+    uint32_t dmalen = esp_get_tc(s);
+
+    if (dmalen == 0) {
+        return;
+    }
+
+    if (s->do_cmd) {
+        esp_cmdfifo_push(s, val);
+    } else {
+        esp_fifo_push(s, val);
     }
-    return NULL;
+
+    dmalen--;
+    esp_set_tc(s, dmalen);
 }
 
-static int get_cmd_cb(ESPState *s)
+static int esp_select(ESPState *s)
 {
     int target;
 
     target = s->wregs[ESP_WBUSID] & BUSID_DID;
 
     s->ti_size = 0;
-    s->ti_rptr = 0;
-    s->ti_wptr = 0;
+    fifo8_reset(&s->fifo);
 
     if (s->current_req) {
         /* Started a new command before the old one finished.  Cancel it.  */
@@ -140,148 +215,195 @@ static int get_cmd_cb(ESPState *s)
     if (!s->current_dev) {
         /* No such drive */
         s->rregs[ESP_RSTAT] = 0;
-        s->rregs[ESP_RINTR] = INTR_DC;
+        s->rregs[ESP_RINTR] |= INTR_DC;
         s->rregs[ESP_RSEQ] = SEQ_0;
         esp_raise_irq(s);
         return -1;
     }
+
+    /*
+     * Note that we deliberately don't raise the IRQ here: this will be done
+     * either in do_busid_cmd() for DATA OUT transfers or by the deferred
+     * IRQ mechanism in esp_transfer_data() for DATA IN transfers
+     */
+    s->rregs[ESP_RINTR] |= INTR_FC;
+    s->rregs[ESP_RSEQ] = SEQ_CD;
     return 0;
 }
 
-static uint32_t get_cmd(ESPState *s, uint8_t *buf, uint8_t buflen)
+static uint32_t get_cmd(ESPState *s, uint32_t maxlen)
 {
-    uint32_t dmalen;
+    uint8_t buf[ESP_CMDFIFO_SZ];
+    uint32_t dmalen, n;
     int target;
 
     target = s->wregs[ESP_WBUSID] & BUSID_DID;
     if (s->dma) {
-        dmalen = s->rregs[ESP_TCLO];
-        dmalen |= s->rregs[ESP_TCMID] << 8;
-        dmalen |= s->rregs[ESP_TCHI] << 16;
-        if (dmalen > buflen) {
+        dmalen = MIN(esp_get_tc(s), maxlen);
+        if (dmalen == 0) {
             return 0;
         }
         if (s->dma_memory_read) {
             s->dma_memory_read(s->dma_opaque, buf, dmalen);
+            fifo8_push_all(&s->cmdfifo, buf, dmalen);
         } else {
-            memcpy(s->pdma_buf, buf, dmalen);
-            set_pdma(s, PDMA, 0, dmalen);
+            if (esp_select(s) < 0) {
+                fifo8_reset(&s->cmdfifo);
+                return -1;
+            }
             esp_raise_drq(s);
+            fifo8_reset(&s->cmdfifo);
             return 0;
         }
     } else {
-        dmalen = s->ti_size;
-        if (dmalen > TI_BUFSZ) {
+        dmalen = MIN(fifo8_num_used(&s->fifo), maxlen);
+        if (dmalen == 0) {
             return 0;
         }
-        memcpy(buf, s->ti_buf, dmalen);
-        buf[0] = buf[2] >> 5;
+        memcpy(buf, fifo8_pop_buf(&s->fifo, dmalen, &n), dmalen);
+        if (dmalen >= 3) {
+            buf[0] = buf[2] >> 5;
+        }
+        fifo8_push_all(&s->cmdfifo, buf, dmalen);
     }
     trace_esp_get_cmd(dmalen, target);
 
-    if (get_cmd_cb(s) < 0) {
-        return 0;
+    if (esp_select(s) < 0) {
+        fifo8_reset(&s->cmdfifo);
+        return -1;
     }
     return dmalen;
 }
 
-static void do_busid_cmd(ESPState *s, uint8_t *buf, uint8_t busid)
+static void do_busid_cmd(ESPState *s, uint8_t busid)
 {
+    uint32_t n, cmdlen;
     int32_t datalen;
     int lun;
     SCSIDevice *current_lun;
+    uint8_t *buf;
 
     trace_esp_do_busid_cmd(busid);
     lun = busid & 7;
+    cmdlen = fifo8_num_used(&s->cmdfifo);
+    buf = (uint8_t *)fifo8_pop_buf(&s->cmdfifo, cmdlen, &n);
+
     current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, lun);
     s->current_req = scsi_req_new(current_lun, 0, lun, buf, s);
     datalen = scsi_req_enqueue(s->current_req);
     s->ti_size = datalen;
+    fifo8_reset(&s->cmdfifo);
     if (datalen != 0) {
         s->rregs[ESP_RSTAT] = STAT_TC;
-        s->dma_left = 0;
-        s->dma_counter = 0;
+        s->rregs[ESP_RSEQ] = SEQ_CD;
+        s->ti_cmd = 0;
+        esp_set_tc(s, 0);
         if (datalen > 0) {
+            /*
+             * Switch to DATA IN phase but wait until initial data xfer is
+             * complete before raising the command completion interrupt
+             */
+            s->data_in_ready = false;
             s->rregs[ESP_RSTAT] |= STAT_DI;
         } else {
             s->rregs[ESP_RSTAT] |= STAT_DO;
+            s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
+            esp_raise_irq(s);
+            esp_lower_drq(s);
         }
         scsi_req_continue(s->current_req);
+        return;
     }
-    s->rregs[ESP_RINTR] = INTR_BS | INTR_FC;
-    s->rregs[ESP_RSEQ] = SEQ_CD;
-    esp_raise_irq(s);
 }
 
-static void do_cmd(ESPState *s, uint8_t *buf)
+static void do_cmd(ESPState *s)
 {
-    uint8_t busid = buf[0];
+    uint8_t busid = fifo8_pop(&s->cmdfifo);
+    uint32_t n;
+
+    s->cmdfifo_cdb_offset--;
+
+    /* Ignore extended messages for now */
+    if (s->cmdfifo_cdb_offset) {
+        fifo8_pop_buf(&s->cmdfifo, s->cmdfifo_cdb_offset, &n);
+        s->cmdfifo_cdb_offset = 0;
+    }
 
-    do_busid_cmd(s, &buf[1], busid);
+    do_busid_cmd(s, busid);
 }
 
 static void satn_pdma_cb(ESPState *s)
 {
-    if (get_cmd_cb(s) < 0) {
-        return;
-    }
-    if (s->pdma_cur != s->pdma_start) {
-        do_cmd(s, get_pdma_buf(s) + s->pdma_start);
+    s->do_cmd = 0;
+    if (!fifo8_is_empty(&s->cmdfifo)) {
+        s->cmdfifo_cdb_offset = 1;
+        do_cmd(s);
     }
 }
 
 static void handle_satn(ESPState *s)
 {
-    uint8_t buf[32];
-    int len;
+    int32_t cmdlen;
 
     if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_satn;
         return;
     }
     s->pdma_cb = satn_pdma_cb;
-    len = get_cmd(s, buf, sizeof(buf));
-    if (len)
-        do_cmd(s, buf);
+    cmdlen = get_cmd(s, ESP_CMDFIFO_SZ);
+    if (cmdlen > 0) {
+        s->cmdfifo_cdb_offset = 1;
+        do_cmd(s);
+    } else if (cmdlen == 0) {
+        s->do_cmd = 1;
+        /* Target present, but no cmd yet - switch to command phase */
+        s->rregs[ESP_RSEQ] = SEQ_CD;
+        s->rregs[ESP_RSTAT] = STAT_CD;
+    }
 }
 
 static void s_without_satn_pdma_cb(ESPState *s)
 {
-    if (get_cmd_cb(s) < 0) {
-        return;
-    }
-    if (s->pdma_cur != s->pdma_start) {
-        do_busid_cmd(s, get_pdma_buf(s) + s->pdma_start, 0);
+    uint32_t len;
+
+    s->do_cmd = 0;
+    len = fifo8_num_used(&s->cmdfifo);
+    if (len) {
+        s->cmdfifo_cdb_offset = 0;
+        do_busid_cmd(s, 0);
     }
 }
 
 static void handle_s_without_atn(ESPState *s)
 {
-    uint8_t buf[32];
-    int len;
+    int32_t cmdlen;
 
     if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_s_without_atn;
         return;
     }
     s->pdma_cb = s_without_satn_pdma_cb;
-    len = get_cmd(s, buf, sizeof(buf));
-    if (len) {
-        do_busid_cmd(s, buf, 0);
+    cmdlen = get_cmd(s, ESP_CMDFIFO_SZ);
+    if (cmdlen > 0) {
+        s->cmdfifo_cdb_offset = 0;
+        do_busid_cmd(s, 0);
+    } else if (cmdlen == 0) {
+        s->do_cmd = 1;
+        /* Target present, but no cmd yet - switch to command phase */
+        s->rregs[ESP_RSEQ] = SEQ_CD;
+        s->rregs[ESP_RSTAT] = STAT_CD;
     }
 }
 
 static void satn_stop_pdma_cb(ESPState *s)
 {
-    if (get_cmd_cb(s) < 0) {
-        return;
-    }
-    s->cmdlen = s->pdma_cur - s->pdma_start;
-    if (s->cmdlen) {
-        trace_esp_handle_satn_stop(s->cmdlen);
+    s->do_cmd = 0;
+    if (!fifo8_is_empty(&s->cmdfifo)) {
+        trace_esp_handle_satn_stop(fifo8_num_used(&s->cmdfifo));
         s->do_cmd = 1;
+        s->cmdfifo_cdb_offset = 1;
         s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD;
-        s->rregs[ESP_RINTR] = INTR_BS | INTR_FC;
+        s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
         s->rregs[ESP_RSEQ] = SEQ_CD;
         esp_raise_irq(s);
     }
@@ -289,51 +411,62 @@ static void satn_stop_pdma_cb(ESPState *s)
 
 static void handle_satn_stop(ESPState *s)
 {
+    int32_t cmdlen;
+
     if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_satn_stop;
         return;
     }
     s->pdma_cb = satn_stop_pdma_cb;
-    s->cmdlen = get_cmd(s, s->cmdbuf, sizeof(s->cmdbuf));
-    if (s->cmdlen) {
-        trace_esp_handle_satn_stop(s->cmdlen);
+    cmdlen = get_cmd(s, 1);
+    if (cmdlen > 0) {
+        trace_esp_handle_satn_stop(fifo8_num_used(&s->cmdfifo));
         s->do_cmd = 1;
-        s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD;
-        s->rregs[ESP_RINTR] = INTR_BS | INTR_FC;
-        s->rregs[ESP_RSEQ] = SEQ_CD;
+        s->cmdfifo_cdb_offset = 1;
+        s->rregs[ESP_RSTAT] = STAT_MO;
+        s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
+        s->rregs[ESP_RSEQ] = SEQ_MO;
         esp_raise_irq(s);
+    } else if (cmdlen == 0) {
+        s->do_cmd = 1;
+        /* Target present, switch to message out phase */
+        s->rregs[ESP_RSEQ] = SEQ_MO;
+        s->rregs[ESP_RSTAT] = STAT_MO;
     }
 }
 
 static void write_response_pdma_cb(ESPState *s)
 {
     s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST;
-    s->rregs[ESP_RINTR] = INTR_BS | INTR_FC;
+    s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
     s->rregs[ESP_RSEQ] = SEQ_CD;
     esp_raise_irq(s);
 }
 
 static void write_response(ESPState *s)
 {
+    uint32_t n;
+
     trace_esp_write_response(s->status);
-    s->ti_buf[0] = s->status;
-    s->ti_buf[1] = 0;
+
+    fifo8_reset(&s->fifo);
+    esp_fifo_push(s, s->status);
+    esp_fifo_push(s, 0);
+
     if (s->dma) {
         if (s->dma_memory_write) {
-            s->dma_memory_write(s->dma_opaque, s->ti_buf, 2);
+            s->dma_memory_write(s->dma_opaque,
+                                (uint8_t *)fifo8_pop_buf(&s->fifo, 2, &n), 2);
             s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST;
-            s->rregs[ESP_RINTR] = INTR_BS | INTR_FC;
+            s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
             s->rregs[ESP_RSEQ] = SEQ_CD;
         } else {
-            set_pdma(s, TI, 0, 2);
             s->pdma_cb = write_response_pdma_cb;
             esp_raise_drq(s);
             return;
         }
     } else {
         s->ti_size = 2;
-        s->ti_rptr = 0;
-        s->ti_wptr = 2;
         s->rregs[ESP_RFLAGS] = 2;
     }
     esp_raise_irq(s);
@@ -342,77 +475,133 @@ static void write_response(ESPState *s)
 static void esp_dma_done(ESPState *s)
 {
     s->rregs[ESP_RSTAT] |= STAT_TC;
-    s->rregs[ESP_RINTR] = INTR_BS;
+    s->rregs[ESP_RINTR] |= INTR_BS;
     s->rregs[ESP_RSEQ] = 0;
     s->rregs[ESP_RFLAGS] = 0;
-    s->rregs[ESP_TCLO] = 0;
-    s->rregs[ESP_TCMID] = 0;
-    s->rregs[ESP_TCHI] = 0;
+    esp_set_tc(s, 0);
     esp_raise_irq(s);
 }
 
 static void do_dma_pdma_cb(ESPState *s)
 {
-    int to_device = (s->ti_size < 0);
-    int len = s->pdma_cur - s->pdma_start;
+    int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO);
+    int len;
+    uint32_t n;
+
     if (s->do_cmd) {
         s->ti_size = 0;
-        s->cmdlen = 0;
         s->do_cmd = 0;
-        do_cmd(s, s->cmdbuf);
+        do_cmd(s);
+        esp_lower_drq(s);
         return;
     }
-    s->dma_left -= len;
-    s->async_buf += len;
-    s->async_len -= len;
+
     if (to_device) {
-        s->ti_size += len;
+        /* Copy FIFO data to device */
+        len = MIN(s->async_len, ESP_FIFO_SZ);
+        len = MIN(len, fifo8_num_used(&s->fifo));
+        memcpy(s->async_buf, fifo8_pop_buf(&s->fifo, len, &n), len);
+        s->async_buf += n;
+        s->async_len -= n;
+        s->ti_size += n;
+
+        if (n < len) {
+            /* Unaligned accesses can cause FIFO wraparound */
+            len = len - n;
+            memcpy(s->async_buf, fifo8_pop_buf(&s->fifo, len, &n), len);
+            s->async_buf += n;
+            s->async_len -= n;
+            s->ti_size += n;
+        }
+
+        if (s->async_len == 0) {
+            scsi_req_continue(s->current_req);
+            return;
+        }
+
+        if (esp_get_tc(s) == 0) {
+            esp_lower_drq(s);
+            esp_dma_done(s);
+        }
+
+        return;
     } else {
-        s->ti_size -= len;
-    }
-    if (s->async_len == 0) {
-        scsi_req_continue(s->current_req);
-        /*
-         * If there is still data to be read from the device then
-         * complete the DMA operation immediately.  Otherwise defer
-         * until the scsi layer has completed.
-         */
-        if (to_device || s->dma_left != 0 || s->ti_size == 0) {
+        if (s->async_len == 0) {
+            if (s->current_req) {
+                /* Defer until the scsi layer has completed */
+                scsi_req_continue(s->current_req);
+                s->data_in_ready = false;
+            }
             return;
         }
-    }
 
-    /* Partially filled a scsi buffer. Complete immediately.  */
-    esp_dma_done(s);
+        if (esp_get_tc(s) != 0) {
+            /* Copy device data to FIFO */
+            len = MIN(s->async_len, esp_get_tc(s));
+            len = MIN(len, fifo8_num_free(&s->fifo));
+            fifo8_push_all(&s->fifo, s->async_buf, len);
+            s->async_buf += len;
+            s->async_len -= len;
+            s->ti_size -= len;
+            esp_set_tc(s, esp_get_tc(s) - len);
+
+            if (esp_get_tc(s) == 0) {
+                /* Indicate transfer to FIFO is complete */
+                 s->rregs[ESP_RSTAT] |= STAT_TC;
+            }
+            return;
+        }
+
+        /* Partially filled a scsi buffer. Complete immediately.  */
+        esp_lower_drq(s);
+        esp_dma_done(s);
+    }
 }
 
 static void esp_do_dma(ESPState *s)
 {
-    uint32_t len;
-    int to_device;
+    uint32_t len, cmdlen;
+    int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO);
+    uint8_t buf[ESP_CMDFIFO_SZ];
 
-    len = s->dma_left;
+    len = esp_get_tc(s);
     if (s->do_cmd) {
         /*
          * handle_ti_cmd() case: esp_do_dma() is called only from
          * handle_ti_cmd() with do_cmd != NULL (see the assert())
          */
-        trace_esp_do_dma(s->cmdlen, len);
-        assert (s->cmdlen <= sizeof(s->cmdbuf) &&
-                len <= sizeof(s->cmdbuf) - s->cmdlen);
+        cmdlen = fifo8_num_used(&s->cmdfifo);
+        trace_esp_do_dma(cmdlen, len);
         if (s->dma_memory_read) {
-            s->dma_memory_read(s->dma_opaque, &s->cmdbuf[s->cmdlen], len);
+            s->dma_memory_read(s->dma_opaque, buf, len);
+            fifo8_push_all(&s->cmdfifo, buf, len);
         } else {
-            set_pdma(s, CMD, s->cmdlen, len);
             s->pdma_cb = do_dma_pdma_cb;
             esp_raise_drq(s);
             return;
         }
-        trace_esp_handle_ti_cmd(s->cmdlen);
+        trace_esp_handle_ti_cmd(cmdlen);
         s->ti_size = 0;
-        s->cmdlen = 0;
-        s->do_cmd = 0;
-        do_cmd(s, s->cmdbuf);
+        if ((s->rregs[ESP_RSTAT] & 7) == STAT_CD) {
+            /* No command received */
+            if (s->cmdfifo_cdb_offset == fifo8_num_used(&s->cmdfifo)) {
+                return;
+            }
+
+            /* Command has been received */
+            s->do_cmd = 0;
+            do_cmd(s);
+        } else {
+            /*
+             * Extra message out bytes received: update cmdfifo_cdb_offset
+             * and then switch to commmand phase
+             */
+            s->cmdfifo_cdb_offset = fifo8_num_used(&s->cmdfifo);
+            s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD;
+            s->rregs[ESP_RSEQ] = SEQ_CD;
+            s->rregs[ESP_RINTR] |= INTR_BS;
+            esp_raise_irq(s);
+        }
         return;
     }
     if (s->async_len == 0) {
@@ -422,12 +611,10 @@ static void esp_do_dma(ESPState *s)
     if (len > s->async_len) {
         len = s->async_len;
     }
-    to_device = (s->ti_size < 0);
     if (to_device) {
         if (s->dma_memory_read) {
             s->dma_memory_read(s->dma_opaque, s->async_buf, len);
         } else {
-            set_pdma(s, ASYNC, 0, len);
             s->pdma_cb = do_dma_pdma_cb;
             esp_raise_drq(s);
             return;
@@ -436,48 +623,145 @@ static void esp_do_dma(ESPState *s)
         if (s->dma_memory_write) {
             s->dma_memory_write(s->dma_opaque, s->async_buf, len);
         } else {
-            set_pdma(s, ASYNC, 0, len);
+            /* Adjust TC for any leftover data in the FIFO */
+            if (!fifo8_is_empty(&s->fifo)) {
+                esp_set_tc(s, esp_get_tc(s) - fifo8_num_used(&s->fifo));
+            }
+
+            /* Copy device data to FIFO */
+            len = MIN(len, fifo8_num_free(&s->fifo));
+            fifo8_push_all(&s->fifo, s->async_buf, len);
+            s->async_buf += len;
+            s->async_len -= len;
+            s->ti_size -= len;
+
+            /*
+             * MacOS toolbox uses a TI length of 16 bytes for all commands, so
+             * commands shorter than this must be padded accordingly
+             */
+            if (len < esp_get_tc(s) && esp_get_tc(s) <= ESP_FIFO_SZ) {
+                while (fifo8_num_used(&s->fifo) < ESP_FIFO_SZ) {
+                    esp_fifo_push(s, 0);
+                    len++;
+                }
+            }
+
+            esp_set_tc(s, esp_get_tc(s) - len);
             s->pdma_cb = do_dma_pdma_cb;
             esp_raise_drq(s);
+
+            /* Indicate transfer to FIFO is complete */
+            s->rregs[ESP_RSTAT] |= STAT_TC;
             return;
         }
     }
-    s->dma_left -= len;
+    esp_set_tc(s, esp_get_tc(s) - len);
     s->async_buf += len;
     s->async_len -= len;
-    if (to_device)
+    if (to_device) {
         s->ti_size += len;
-    else
+    } else {
         s->ti_size -= len;
+    }
     if (s->async_len == 0) {
         scsi_req_continue(s->current_req);
-        /* If there is still data to be read from the device then
-           complete the DMA operation immediately.  Otherwise defer
-           until the scsi layer has completed.  */
-        if (to_device || s->dma_left != 0 || s->ti_size == 0) {
+        /*
+         * If there is still data to be read from the device then
+         * complete the DMA operation immediately.  Otherwise defer
+         * until the scsi layer has completed.
+         */
+        if (to_device || esp_get_tc(s) != 0 || s->ti_size == 0) {
             return;
         }
     }
 
     /* Partially filled a scsi buffer. Complete immediately.  */
     esp_dma_done(s);
+    esp_lower_drq(s);
 }
 
-static void esp_report_command_complete(ESPState *s, uint32_t status)
+static void esp_do_nodma(ESPState *s)
 {
+    int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO);
+    uint32_t cmdlen, n;
+    int len;
+
+    if (s->do_cmd) {
+        cmdlen = fifo8_num_used(&s->cmdfifo);
+        trace_esp_handle_ti_cmd(cmdlen);
+        s->ti_size = 0;
+        if ((s->rregs[ESP_RSTAT] & 7) == STAT_CD) {
+            /* No command received */
+            if (s->cmdfifo_cdb_offset == fifo8_num_used(&s->cmdfifo)) {
+                return;
+            }
+
+            /* Command has been received */
+            s->do_cmd = 0;
+            do_cmd(s);
+        } else {
+            /*
+             * Extra message out bytes received: update cmdfifo_cdb_offset
+             * and then switch to commmand phase
+             */
+            s->cmdfifo_cdb_offset = fifo8_num_used(&s->cmdfifo);
+            s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD;
+            s->rregs[ESP_RSEQ] = SEQ_CD;
+            s->rregs[ESP_RINTR] |= INTR_BS;
+            esp_raise_irq(s);
+        }
+        return;
+    }
+
+    if (s->async_len == 0) {
+        /* Defer until data is available.  */
+        return;
+    }
+
+    if (to_device) {
+        len = MIN(fifo8_num_used(&s->fifo), ESP_FIFO_SZ);
+        memcpy(s->async_buf, fifo8_pop_buf(&s->fifo, len, &n), len);
+        s->async_buf += len;
+        s->async_len -= len;
+        s->ti_size += len;
+    } else {
+        len = MIN(s->ti_size, s->async_len);
+        len = MIN(len, fifo8_num_free(&s->fifo));
+        fifo8_push_all(&s->fifo, s->async_buf, len);
+        s->async_buf += len;
+        s->async_len -= len;
+        s->ti_size -= len;
+    }
+
+    if (s->async_len == 0) {
+        scsi_req_continue(s->current_req);
+
+        if (to_device || s->ti_size == 0) {
+            return;
+        }
+    }
+
+    s->rregs[ESP_RINTR] |= INTR_BS;
+    esp_raise_irq(s);
+}
+
+void esp_command_complete(SCSIRequest *req, size_t resid)
+{
+    ESPState *s = req->hba_private;
+
     trace_esp_command_complete();
     if (s->ti_size != 0) {
         trace_esp_command_complete_unexpected();
     }
     s->ti_size = 0;
-    s->dma_left = 0;
     s->async_len = 0;
-    if (status) {
+    if (req->status) {
         trace_esp_command_complete_fail();
     }
-    s->status = status;
+    s->status = req->status;
     s->rregs[ESP_RSTAT] = STAT_ST;
     esp_dma_done(s);
+    esp_lower_drq(s);
     if (s->current_req) {
         scsi_req_unref(s->current_req);
         s->current_req = NULL;
@@ -485,73 +769,83 @@ static void esp_report_command_complete(ESPState *s, uint32_t status)
     }
 }
 
-void esp_command_complete(SCSIRequest *req, size_t resid)
+void esp_transfer_data(SCSIRequest *req, uint32_t len)
 {
     ESPState *s = req->hba_private;
+    int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO);
+    uint32_t dmalen = esp_get_tc(s);
 
-    if (s->rregs[ESP_RSTAT] & STAT_INT) {
-        /* Defer handling command complete until the previous
-         * interrupt has been handled.
+    assert(!s->do_cmd);
+    trace_esp_transfer_data(dmalen, s->ti_size);
+    s->async_len = len;
+    s->async_buf = scsi_req_get_buf(req);
+
+    if (!to_device && !s->data_in_ready) {
+        /*
+         * Initial incoming data xfer is complete so raise command
+         * completion interrupt
          */
-        trace_esp_command_complete_deferred();
-        s->deferred_status = req->status;
-        s->deferred_complete = true;
+        s->data_in_ready = true;
+        s->rregs[ESP_RSTAT] |= STAT_TC;
+        s->rregs[ESP_RINTR] |= INTR_BS;
+        esp_raise_irq(s);
+
+        /*
+         * If data is ready to transfer and the TI command has already
+         * been executed, start DMA immediately. Otherwise DMA will start
+         * when host sends the TI command
+         */
+        if (s->ti_size && (s->rregs[ESP_CMD] == (CMD_TI | CMD_DMA))) {
+            esp_do_dma(s);
+        }
         return;
     }
-    esp_report_command_complete(s, req->status);
-}
 
-void esp_transfer_data(SCSIRequest *req, uint32_t len)
-{
-    ESPState *s = req->hba_private;
+    if (s->ti_cmd == 0) {
+        /*
+         * Always perform the initial transfer upon reception of the next TI
+         * command to ensure the DMA/non-DMA status of the command is correct.
+         * It is not possible to use s->dma directly in the section below as
+         * some OSs send non-DMA NOP commands after a DMA transfer. Hence if the
+         * async data transfer is delayed then s->dma is set incorrectly.
+         */
+        return;
+    }
 
-    assert(!s->do_cmd);
-    trace_esp_transfer_data(s->dma_left, s->ti_size);
-    s->async_len = len;
-    s->async_buf = scsi_req_get_buf(req);
-    if (s->dma_left) {
-        esp_do_dma(s);
-    } else if (s->dma_counter != 0 && s->ti_size <= 0) {
-        /* If this was the last part of a DMA transfer then the
-           completion interrupt is deferred to here.  */
-        esp_dma_done(s);
+    if (s->ti_cmd & CMD_DMA) {
+        if (dmalen) {
+            esp_do_dma(s);
+        } else if (s->ti_size <= 0) {
+            /*
+             * If this was the last part of a DMA transfer then the
+             * completion interrupt is deferred to here.
+             */
+            esp_dma_done(s);
+            esp_lower_drq(s);
+        }
+    } else {
+        esp_do_nodma(s);
     }
 }
 
 static void handle_ti(ESPState *s)
 {
-    uint32_t dmalen, minlen;
+    uint32_t dmalen;
 
     if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_ti;
         return;
     }
 
-    dmalen = s->rregs[ESP_TCLO];
-    dmalen |= s->rregs[ESP_TCMID] << 8;
-    dmalen |= s->rregs[ESP_TCHI] << 16;
-    if (dmalen==0) {
-      dmalen=0x10000;
-    }
-    s->dma_counter = dmalen;
-
-    if (s->do_cmd)
-        minlen = (dmalen < ESP_CMDBUF_SZ) ? dmalen : ESP_CMDBUF_SZ;
-    else if (s->ti_size < 0)
-        minlen = (dmalen < -s->ti_size) ? dmalen : -s->ti_size;
-    else
-        minlen = (dmalen < s->ti_size) ? dmalen : s->ti_size;
-    trace_esp_handle_ti(minlen);
+    s->ti_cmd = s->rregs[ESP_CMD];
     if (s->dma) {
-        s->dma_left = minlen;
+        dmalen = esp_get_tc(s);
+        trace_esp_handle_ti(dmalen);
         s->rregs[ESP_RSTAT] &= ~STAT_TC;
         esp_do_dma(s);
-    } else if (s->do_cmd) {
-        trace_esp_handle_ti_cmd(s->cmdlen);
-        s->ti_size = 0;
-        s->cmdlen = 0;
-        s->do_cmd = 0;
-        do_cmd(s, s->cmdbuf);
+    } else {
+        trace_esp_handle_ti(s->ti_size);
+        esp_do_nodma(s);
     }
 }
 
@@ -561,8 +855,8 @@ void esp_hard_reset(ESPState *s)
     memset(s->wregs, 0, ESP_REGS);
     s->tchi_written = 0;
     s->ti_size = 0;
-    s->ti_rptr = 0;
-    s->ti_wptr = 0;
+    fifo8_reset(&s->fifo);
+    fifo8_reset(&s->cmdfifo);
     s->dma = 0;
     s->do_cmd = 0;
     s->dma_cb = NULL;
@@ -586,46 +880,50 @@ static void parent_esp_reset(ESPState *s, int irq, int level)
 
 uint64_t esp_reg_read(ESPState *s, uint32_t saddr)
 {
-    uint32_t old_val;
+    uint32_t val;
 
-    trace_esp_mem_readb(saddr, s->rregs[saddr]);
     switch (saddr) {
     case ESP_FIFO:
-        if ((s->rregs[ESP_RSTAT] & STAT_PIO_MASK) == 0) {
+        if (s->dma_memory_read && s->dma_memory_write &&
+                (s->rregs[ESP_RSTAT] & STAT_PIO_MASK) == 0) {
             /* Data out.  */
             qemu_log_mask(LOG_UNIMP, "esp: PIO data read not implemented\n");
             s->rregs[ESP_FIFO] = 0;
-        } else if (s->ti_rptr < s->ti_wptr) {
-            s->ti_size--;
-            s->rregs[ESP_FIFO] = s->ti_buf[s->ti_rptr++];
-        }
-        if (s->ti_rptr == s->ti_wptr) {
-            s->ti_rptr = 0;
-            s->ti_wptr = 0;
+        } else {
+            s->rregs[ESP_FIFO] = esp_fifo_pop(s);
         }
+        val = s->rregs[ESP_FIFO];
         break;
     case ESP_RINTR:
-        /* Clear sequence step, interrupt register and all status bits
-           except TC */
-        old_val = s->rregs[ESP_RINTR];
+        /*
+         * Clear sequence step, interrupt register and all status bits
+         * except TC
+         */
+        val = s->rregs[ESP_RINTR];
         s->rregs[ESP_RINTR] = 0;
         s->rregs[ESP_RSTAT] &= ~STAT_TC;
-        s->rregs[ESP_RSEQ] = SEQ_CD;
+        s->rregs[ESP_RSEQ] = SEQ_0;
         esp_lower_irq(s);
-        if (s->deferred_complete) {
-            esp_report_command_complete(s, s->deferred_status);
-            s->deferred_complete = false;
-        }
-        return old_val;
+        break;
     case ESP_TCHI:
         /* Return the unique id if the value has never been written */
         if (!s->tchi_written) {
-            return s->chip_id;
+            val = s->chip_id;
+        } else {
+            val = s->rregs[saddr];
         }
+        break;
+     case ESP_RFLAGS:
+        /* Bottom 5 bits indicate number of bytes in FIFO */
+        val = fifo8_num_used(&s->fifo);
+        break;
     default:
+        val = s->rregs[saddr];
         break;
     }
-    return s->rregs[saddr];
+
+    trace_esp_mem_readb(saddr, val);
+    return val;
 }
 
 void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
@@ -641,16 +939,15 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
         break;
     case ESP_FIFO:
         if (s->do_cmd) {
-            if (s->cmdlen < ESP_CMDBUF_SZ) {
-                s->cmdbuf[s->cmdlen++] = val & 0xff;
-            } else {
-                trace_esp_error_fifo_overrun();
-            }
-        } else if (s->ti_wptr == TI_BUFSZ - 1) {
-            trace_esp_error_fifo_overrun();
+            esp_cmdfifo_push(s, val);
         } else {
-            s->ti_size++;
-            s->ti_buf[s->ti_wptr++] = val & 0xff;
+            esp_fifo_push(s, val);
+        }
+
+        /* Non-DMA transfers raise an interrupt after every byte */
+        if (s->rregs[ESP_CMD] == CMD_TI) {
+            s->rregs[ESP_RINTR] |= INTR_FC | INTR_BS;
+            esp_raise_irq(s);
         }
         break;
     case ESP_CMD:
@@ -658,22 +955,21 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
         if (val & CMD_DMA) {
             s->dma = 1;
             /* Reload DMA counter.  */
-            s->rregs[ESP_TCLO] = s->wregs[ESP_TCLO];
-            s->rregs[ESP_TCMID] = s->wregs[ESP_TCMID];
-            s->rregs[ESP_TCHI] = s->wregs[ESP_TCHI];
+            if (esp_get_stc(s) == 0) {
+                esp_set_tc(s, 0x10000);
+            } else {
+                esp_set_tc(s, esp_get_stc(s));
+            }
         } else {
             s->dma = 0;
         }
-        switch(val & CMD_CMD) {
+        switch (val & CMD_CMD) {
         case CMD_NOP:
             trace_esp_mem_writeb_cmd_nop(val);
             break;
         case CMD_FLUSH:
             trace_esp_mem_writeb_cmd_flush(val);
-            //s->ti_size = 0;
-            s->rregs[ESP_RINTR] = INTR_FC;
-            s->rregs[ESP_RSEQ] = 0;
-            s->rregs[ESP_RFLAGS] = 0;
+            fifo8_reset(&s->fifo);
             break;
         case CMD_RESET:
             trace_esp_mem_writeb_cmd_reset(val);
@@ -681,23 +977,24 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
             break;
         case CMD_BUSRESET:
             trace_esp_mem_writeb_cmd_bus_reset(val);
-            s->rregs[ESP_RINTR] = INTR_RST;
             if (!(s->wregs[ESP_CFG1] & CFG1_RESREPT)) {
+                s->rregs[ESP_RINTR] |= INTR_RST;
                 esp_raise_irq(s);
             }
             break;
         case CMD_TI:
+            trace_esp_mem_writeb_cmd_ti(val);
             handle_ti(s);
             break;
         case CMD_ICCS:
             trace_esp_mem_writeb_cmd_iccs(val);
             write_response(s);
-            s->rregs[ESP_RINTR] = INTR_FC;
+            s->rregs[ESP_RINTR] |= INTR_FC;
             s->rregs[ESP_RSTAT] |= STAT_MI;
             break;
         case CMD_MSGACC:
             trace_esp_mem_writeb_cmd_msgacc(val);
-            s->rregs[ESP_RINTR] = INTR_DC;
+            s->rregs[ESP_RINTR] |= INTR_DC;
             s->rregs[ESP_RSEQ] = 0;
             s->rregs[ESP_RFLAGS] = 0;
             esp_raise_irq(s);
@@ -705,7 +1002,7 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
         case CMD_PAD:
             trace_esp_mem_writeb_cmd_pad(val);
             s->rregs[ESP_RSTAT] = STAT_TC;
-            s->rregs[ESP_RINTR] = INTR_FC;
+            s->rregs[ESP_RINTR] |= INTR_FC;
             s->rregs[ESP_RSEQ] = 0;
             break;
         case CMD_SATN:
@@ -763,74 +1060,112 @@ static bool esp_mem_accepts(void *opaque, hwaddr addr,
     return (size == 1) || (is_write && size == 4);
 }
 
-static bool esp_pdma_needed(void *opaque)
+static bool esp_is_before_version_5(void *opaque, int version_id)
 {
-    ESPState *s = opaque;
-    return s->dma_memory_read == NULL && s->dma_memory_write == NULL &&
-           s->dma_enabled;
+    ESPState *s = ESP(opaque);
+
+    version_id = MIN(version_id, s->mig_version_id);
+    return version_id < 5;
 }
 
-static const VMStateDescription vmstate_esp_pdma = {
-    .name = "esp/pdma",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = esp_pdma_needed,
-    .fields = (VMStateField[]) {
-        VMSTATE_BUFFER(pdma_buf, ESPState),
-        VMSTATE_INT32(pdma_origin, ESPState),
-        VMSTATE_UINT32(pdma_len, ESPState),
-        VMSTATE_UINT32(pdma_start, ESPState),
-        VMSTATE_UINT32(pdma_cur, ESPState),
-        VMSTATE_END_OF_LIST()
+static bool esp_is_version_5(void *opaque, int version_id)
+{
+    ESPState *s = ESP(opaque);
+
+    version_id = MIN(version_id, s->mig_version_id);
+    return version_id == 5;
+}
+
+static int esp_pre_save(void *opaque)
+{
+    ESPState *s = ESP(opaque);
+
+    s->mig_version_id = vmstate_esp.version_id;
+    return 0;
+}
+
+static int esp_post_load(void *opaque, int version_id)
+{
+    ESPState *s = ESP(opaque);
+    int len, i;
+
+    version_id = MIN(version_id, s->mig_version_id);
+
+    if (version_id < 5) {
+        esp_set_tc(s, s->mig_dma_left);
+
+        /* Migrate ti_buf to fifo */
+        len = s->mig_ti_wptr - s->mig_ti_rptr;
+        for (i = 0; i < len; i++) {
+            fifo8_push(&s->fifo, s->mig_ti_buf[i]);
+        }
+
+        /* Migrate cmdbuf to cmdfifo */
+        for (i = 0; i < s->mig_cmdlen; i++) {
+            fifo8_push(&s->cmdfifo, s->mig_cmdbuf[i]);
+        }
     }
-};
+
+    s->mig_version_id = vmstate_esp.version_id;
+    return 0;
+}
 
 const VMStateDescription vmstate_esp = {
-    .name ="esp",
-    .version_id = 4,
+    .name = "esp",
+    .version_id = 5,
     .minimum_version_id = 3,
+    .pre_save = esp_pre_save,
+    .post_load = esp_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_BUFFER(rregs, ESPState),
         VMSTATE_BUFFER(wregs, ESPState),
         VMSTATE_INT32(ti_size, ESPState),
-        VMSTATE_UINT32(ti_rptr, ESPState),
-        VMSTATE_UINT32(ti_wptr, ESPState),
-        VMSTATE_BUFFER(ti_buf, ESPState),
+        VMSTATE_UINT32_TEST(mig_ti_rptr, ESPState, esp_is_before_version_5),
+        VMSTATE_UINT32_TEST(mig_ti_wptr, ESPState, esp_is_before_version_5),
+        VMSTATE_BUFFER_TEST(mig_ti_buf, ESPState, esp_is_before_version_5),
         VMSTATE_UINT32(status, ESPState),
-        VMSTATE_UINT32(deferred_status, ESPState),
-        VMSTATE_BOOL(deferred_complete, ESPState),
+        VMSTATE_UINT32_TEST(mig_deferred_status, ESPState,
+                            esp_is_before_version_5),
+        VMSTATE_BOOL_TEST(mig_deferred_complete, ESPState,
+                          esp_is_before_version_5),
         VMSTATE_UINT32(dma, ESPState),
-        VMSTATE_PARTIAL_BUFFER(cmdbuf, ESPState, 16),
-        VMSTATE_BUFFER_START_MIDDLE_V(cmdbuf, ESPState, 16, 4),
-        VMSTATE_UINT32(cmdlen, ESPState),
+        VMSTATE_STATIC_BUFFER(mig_cmdbuf, ESPState, 0,
+                              esp_is_before_version_5, 0, 16),
+        VMSTATE_STATIC_BUFFER(mig_cmdbuf, ESPState, 4,
+                              esp_is_before_version_5, 16,
+                              sizeof(typeof_field(ESPState, mig_cmdbuf))),
+        VMSTATE_UINT32_TEST(mig_cmdlen, ESPState, esp_is_before_version_5),
         VMSTATE_UINT32(do_cmd, ESPState),
-        VMSTATE_UINT32(dma_left, ESPState),
+        VMSTATE_UINT32_TEST(mig_dma_left, ESPState, esp_is_before_version_5),
+        VMSTATE_BOOL_TEST(data_in_ready, ESPState, esp_is_version_5),
+        VMSTATE_UINT8_TEST(cmdfifo_cdb_offset, ESPState, esp_is_version_5),
+        VMSTATE_FIFO8_TEST(fifo, ESPState, esp_is_version_5),
+        VMSTATE_FIFO8_TEST(cmdfifo, ESPState, esp_is_version_5),
+        VMSTATE_UINT8_TEST(ti_cmd, ESPState, esp_is_version_5),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (const VMStateDescription * []) {
-        &vmstate_esp_pdma,
-        NULL
-    }
 };
 
 static void sysbus_esp_mem_write(void *opaque, hwaddr addr,
                                  uint64_t val, unsigned int size)
 {
     SysBusESPState *sysbus = opaque;
+    ESPState *s = ESP(&sysbus->esp);
     uint32_t saddr;
 
     saddr = addr >> sysbus->it_shift;
-    esp_reg_write(&sysbus->esp, saddr, val);
+    esp_reg_write(s, saddr, val);
 }
 
 static uint64_t sysbus_esp_mem_read(void *opaque, hwaddr addr,
                                     unsigned int size)
 {
     SysBusESPState *sysbus = opaque;
+    ESPState *s = ESP(&sysbus->esp);
     uint32_t saddr;
 
     saddr = addr >> sysbus->it_shift;
-    return esp_reg_read(&sysbus->esp, saddr);
+    return esp_reg_read(s, saddr);
 }
 
 static const MemoryRegionOps sysbus_esp_mem_ops = {
@@ -844,36 +1179,23 @@ static void sysbus_esp_pdma_write(void *opaque, hwaddr addr,
                                   uint64_t val, unsigned int size)
 {
     SysBusESPState *sysbus = opaque;
-    ESPState *s = &sysbus->esp;
+    ESPState *s = ESP(&sysbus->esp);
     uint32_t dmalen;
-    uint8_t *buf = get_pdma_buf(s);
 
-    dmalen = s->rregs[ESP_TCLO];
-    dmalen |= s->rregs[ESP_TCMID] << 8;
-    dmalen |= s->rregs[ESP_TCHI] << 16;
-    if (dmalen == 0 || s->pdma_len == 0) {
-        return;
-    }
+    trace_esp_pdma_write(size);
+
     switch (size) {
     case 1:
-        buf[s->pdma_cur++] = val;
-        s->pdma_len--;
-        dmalen--;
+        esp_pdma_write(s, val);
         break;
     case 2:
-        buf[s->pdma_cur++] = val >> 8;
-        buf[s->pdma_cur++] = val;
-        s->pdma_len -= 2;
-        dmalen -= 2;
+        esp_pdma_write(s, val >> 8);
+        esp_pdma_write(s, val);
         break;
     }
-    s->rregs[ESP_TCLO] = dmalen & 0xff;
-    s->rregs[ESP_TCMID] = dmalen >> 8;
-    s->rregs[ESP_TCHI] = dmalen >> 16;
-    if (s->pdma_len == 0 && s->pdma_cb) {
-        esp_lower_drq(s);
+    dmalen = esp_get_tc(s);
+    if (dmalen == 0 || fifo8_num_free(&s->fifo) < 2) {
         s->pdma_cb(s);
-        s->pdma_cb = NULL;
     }
 }
 
@@ -881,29 +1203,22 @@ static uint64_t sysbus_esp_pdma_read(void *opaque, hwaddr addr,
                                      unsigned int size)
 {
     SysBusESPState *sysbus = opaque;
-    ESPState *s = &sysbus->esp;
-    uint8_t *buf = get_pdma_buf(s);
+    ESPState *s = ESP(&sysbus->esp);
     uint64_t val = 0;
 
-    if (s->pdma_len == 0) {
-        return 0;
-    }
+    trace_esp_pdma_read(size);
+
     switch (size) {
     case 1:
-        val = buf[s->pdma_cur++];
-        s->pdma_len--;
+        val = esp_pdma_read(s);
         break;
     case 2:
-        val = buf[s->pdma_cur++];
-        val = (val << 8) | buf[s->pdma_cur++];
-        s->pdma_len -= 2;
+        val = esp_pdma_read(s);
+        val = (val << 8) | esp_pdma_read(s);
         break;
     }
-
-    if (s->pdma_len == 0 && s->pdma_cb) {
-        esp_lower_drq(s);
+    if (fifo8_num_used(&s->fifo) < 2) {
         s->pdma_cb(s);
-        s->pdma_cb = NULL;
     }
     return val;
 }
@@ -913,7 +1228,9 @@ static const MemoryRegionOps sysbus_esp_pdma_ops = {
     .write = sysbus_esp_pdma_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid.min_access_size = 1,
-    .valid.max_access_size = 2,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 2,
 };
 
 static const struct SCSIBusInfo esp_scsi_info = {
@@ -928,8 +1245,8 @@ static const struct SCSIBusInfo esp_scsi_info = {
 
 static void sysbus_esp_gpio_demux(void *opaque, int irq, int level)
 {
-    SysBusESPState *sysbus = ESP(opaque);
-    ESPState *s = &sysbus->esp;
+    SysBusESPState *sysbus = SYSBUS_ESP(opaque);
+    ESPState *s = ESP(&sysbus->esp);
 
     switch (irq) {
     case 0:
@@ -944,8 +1261,12 @@ static void sysbus_esp_gpio_demux(void *opaque, int irq, int level)
 static void sysbus_esp_realize(DeviceState *dev, Error **errp)
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-    SysBusESPState *sysbus = ESP(dev);
-    ESPState *s = &sysbus->esp;
+    SysBusESPState *sysbus = SYSBUS_ESP(dev);
+    ESPState *s = ESP(&sysbus->esp);
+
+    if (!qdev_realize(DEVICE(s), NULL, errp)) {
+        return;
+    }
 
     sysbus_init_irq(sbd, &s->irq);
     sysbus_init_irq(sbd, &s->irq_data);
@@ -956,7 +1277,7 @@ static void sysbus_esp_realize(DeviceState *dev, Error **errp)
                           sysbus, "esp-regs", ESP_REGS << sysbus->it_shift);
     sysbus_init_mmio(sbd, &sysbus->iomem);
     memory_region_init_io(&sysbus->pdma, OBJECT(sysbus), &sysbus_esp_pdma_ops,
-                          sysbus, "esp-pdma", 2);
+                          sysbus, "esp-pdma", 4);
     sysbus_init_mmio(sbd, &sysbus->pdma);
 
     qdev_init_gpio_in(dev, sysbus_esp_gpio_demux, 2);
@@ -966,15 +1287,25 @@ static void sysbus_esp_realize(DeviceState *dev, Error **errp)
 
 static void sysbus_esp_hard_reset(DeviceState *dev)
 {
-    SysBusESPState *sysbus = ESP(dev);
-    esp_hard_reset(&sysbus->esp);
+    SysBusESPState *sysbus = SYSBUS_ESP(dev);
+    ESPState *s = ESP(&sysbus->esp);
+
+    esp_hard_reset(s);
+}
+
+static void sysbus_esp_init(Object *obj)
+{
+    SysBusESPState *sysbus = SYSBUS_ESP(obj);
+
+    object_initialize_child(obj, "esp", &sysbus->esp, TYPE_ESP);
 }
 
 static const VMStateDescription vmstate_sysbus_esp_scsi = {
     .name = "sysbusespscsi",
-    .version_id = 1,
+    .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
+        VMSTATE_UINT8_V(esp.mig_version_id, SysBusESPState, 2),
         VMSTATE_STRUCT(esp, SysBusESPState, 0, vmstate_esp, ESPState),
         VMSTATE_END_OF_LIST()
     }
@@ -991,15 +1322,51 @@ static void sysbus_esp_class_init(ObjectClass *klass, void *data)
 }
 
 static const TypeInfo sysbus_esp_info = {
-    .name          = TYPE_ESP,
+    .name          = TYPE_SYSBUS_ESP,
     .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_init = sysbus_esp_init,
     .instance_size = sizeof(SysBusESPState),
     .class_init    = sysbus_esp_class_init,
 };
 
+static void esp_finalize(Object *obj)
+{
+    ESPState *s = ESP(obj);
+
+    fifo8_destroy(&s->fifo);
+    fifo8_destroy(&s->cmdfifo);
+}
+
+static void esp_init(Object *obj)
+{
+    ESPState *s = ESP(obj);
+
+    fifo8_create(&s->fifo, ESP_FIFO_SZ);
+    fifo8_create(&s->cmdfifo, ESP_CMDFIFO_SZ);
+}
+
+static void esp_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    /* internal device for sysbusesp/pciespscsi, not user-creatable */
+    dc->user_creatable = false;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+}
+
+static const TypeInfo esp_info = {
+    .name = TYPE_ESP,
+    .parent = TYPE_DEVICE,
+    .instance_init = esp_init,
+    .instance_finalize = esp_finalize,
+    .instance_size = sizeof(ESPState),
+    .class_init = esp_class_init,
+};
+
 static void esp_register_types(void)
 {
     type_register_static(&sysbus_esp_info);
+    type_register_static(&esp_info);
 }
 
 type_init(esp_register_types)
diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events
index 9788661bfd..1c331fb189 100644
--- a/hw/scsi/trace-events
+++ b/hw/scsi/trace-events
@@ -159,8 +159,12 @@ esp_error_unhandled_command(uint32_t val) "unhandled command (0x%2.2x)"
 esp_error_invalid_write(uint32_t val, uint32_t addr) "invalid write of 0x%02x at [0x%x]"
 esp_raise_irq(void) "Raise IRQ"
 esp_lower_irq(void) "Lower IRQ"
+esp_raise_drq(void) "Raise DREQ"
+esp_lower_drq(void) "Lower DREQ"
 esp_dma_enable(void) "Raise enable"
 esp_dma_disable(void) "Lower enable"
+esp_pdma_read(int size) "pDMA read %u bytes"
+esp_pdma_write(int size) "pDMA write %u bytes"
 esp_get_cmd(uint32_t dmalen, int target) "len %d target %d"
 esp_do_busid_cmd(uint8_t busid) "busid 0x%x"
 esp_handle_satn_stop(uint32_t cmdlen) "cmdlen %d"
@@ -189,6 +193,7 @@ esp_mem_writeb_cmd_selatn(uint32_t val) "Select with ATN (0x%2.2x)"
 esp_mem_writeb_cmd_selatns(uint32_t val) "Select with ATN & stop (0x%2.2x)"
 esp_mem_writeb_cmd_ensel(uint32_t val) "Enable selection (0x%2.2x)"
 esp_mem_writeb_cmd_dissel(uint32_t val) "Disable selection (0x%2.2x)"
+esp_mem_writeb_cmd_ti(uint32_t val) "Transfer Information (0x%2.2x)"
 
 # esp-pci.c
 esp_pci_error_invalid_dma_direction(void) "invalid DMA transfer direction"
diff --git a/hw/sh4/Kconfig b/hw/sh4/Kconfig
index 4cbce3a0ed..ab733a3f76 100644
--- a/hw/sh4/Kconfig
+++ b/hw/sh4/Kconfig
@@ -9,16 +9,16 @@ config R2D
     select USB_OHCI_PCI
     select PCI
     select SM501
-    select SH4
+    select SH7750
+    select SH_PCI
 
 config SHIX
     bool
     select SH7750
-    select SH4
+    select TC58128
 
 config SH7750
     bool
-
-config SH4
-    bool
-    select PTIMER
+    select SH_INTC
+    select SH_SCI
+    select SH_TIMER
diff --git a/hw/sh4/meson.build b/hw/sh4/meson.build
index 303c0f4287..424d5674de 100644
--- a/hw/sh4/meson.build
+++ b/hw/sh4/meson.build
@@ -2,7 +2,6 @@ sh4_ss = ss.source_set()
 sh4_ss.add(files(
   'sh7750.c',
   'sh7750_regnames.c',
-  'sh_pci.c'
 ))
 sh4_ss.add(when: 'CONFIG_R2D', if_true: files('r2d.c'))
 sh4_ss.add(when: 'CONFIG_SHIX', if_true: files('shix.c'))
diff --git a/hw/sh4/sh7750_regs.h b/hw/sh4/sh7750_regs.h
index 3e4554af31..ab073dadc7 100644
--- a/hw/sh4/sh7750_regs.h
+++ b/hw/sh4/sh7750_regs.h
@@ -10,8 +10,28 @@
  *         Victor V. Vengerov <vvv@oktet.ru>
  *
  * The license and distribution terms for this file may be
- * found in the file LICENSE in this distribution or at
- *  http://www.rtems.com/license/LICENSE.
+ * found in this file hereafter or at http://www.rtems.com/license/LICENSE.
+ *
+ *                       LICENSE INFORMATION
+ *
+ * RTEMS is free software; you can redistribute it and/or modify it under
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.  RTEMS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details. You should have received
+ * a copy of the GNU General Public License along with RTEMS; see
+ * file COPYING. If not, write to the Free Software Foundation, 675
+ * Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * As a special exception, including RTEMS header files in a file,
+ * instantiating RTEMS generics or templates, or linking other files
+ * with RTEMS objects to produce an executable application, does not
+ * by itself cause the resulting executable application to be covered
+ * by the GNU General Public License. This exception does not
+ * however invalidate any other reasons why the executable file might be
+ * covered by the GNU Public License.
  *
  * @(#) sh7750_regs.h,v 1.2.4.1 2003/09/04 18:46:00 joel Exp
  */
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 38ca1e33c7..312e2afaf9 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -334,7 +334,7 @@ static void *sparc32_dma_init(hwaddr dma_base,
                                    OBJECT(dma), "espdma"));
     sysbus_connect_irq(SYS_BUS_DEVICE(espdma), 0, espdma_irq);
 
-    esp = ESP(object_resolve_path_component(OBJECT(espdma), "esp"));
+    esp = SYSBUS_ESP(object_resolve_path_component(OBJECT(espdma), "esp"));
 
     ledma = SPARC32_LEDMA_DEVICE(object_resolve_path_component(
                                  OBJECT(dma), "ledma"));
diff --git a/hw/timer/Kconfig b/hw/timer/Kconfig
index 726be4f82c..bac2511715 100644
--- a/hw/timer/Kconfig
+++ b/hw/timer/Kconfig
@@ -36,6 +36,10 @@ config CMSDK_APB_DUALTIMER
     bool
     select PTIMER
 
+config SH_TIMER
+    bool
+    select PTIMER
+
 config RENESAS_TMR
     bool
 
diff --git a/hw/timer/meson.build b/hw/timer/meson.build
index 91ab2aa803..a429792b08 100644
--- a/hw/timer/meson.build
+++ b/hw/timer/meson.build
@@ -30,7 +30,7 @@ softmmu_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_synctimer.c'))
 softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_ost.c'))
 softmmu_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx_timer.c'))
 softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_systmr.c'))
-softmmu_ss.add(when: 'CONFIG_SH4', if_true: files('sh_timer.c'))
+softmmu_ss.add(when: 'CONFIG_SH_TIMER', if_true: files('sh_timer.c'))
 softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_timer.c'))
 softmmu_ss.add(when: 'CONFIG_SSE_COUNTER', if_true: files('sse-counter.c'))
 softmmu_ss.add(when: 'CONFIG_SSE_TIMER', if_true: files('sse-timer.c'))
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 36e8da4fc2..f581cf9fd7 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -57,6 +57,8 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter);
 uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
                                               uint64_t offset, uint64_t bytes);
 uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap);
+uint64_t bdrv_dirty_bitmap_serialization_coverage(int serialized_chunk_size,
+        const BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
                                       uint8_t *buf, uint64_t offset,
                                       uint64_t bytes);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index b7b3c0ef12..6b036cae8f 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -448,9 +448,6 @@ struct TranslationBlock {
     target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
     target_ulong cs_base; /* CS base for this block */
     uint32_t flags; /* flags defining in which context the code was generated */
-    uint16_t size;      /* size of target code for this block (1 <=
-                           size <= TARGET_PAGE_SIZE) */
-    uint16_t icount;
     uint32_t cflags;    /* compile flags */
 #define CF_COUNT_MASK  0x00007fff
 #define CF_LAST_IO     0x00008000 /* Last insn may be an IO access.  */
@@ -460,12 +457,18 @@ struct TranslationBlock {
 #define CF_PARALLEL    0x00080000 /* Generate code for a parallel context */
 #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
 #define CF_CLUSTER_SHIFT 24
-/* cflags' mask for hashing/comparison, basically ignore CF_INVALID */
-#define CF_HASH_MASK   (~CF_INVALID)
 
     /* Per-vCPU dynamic tracing state used to generate this TB */
     uint32_t trace_vcpu_dstate;
 
+    /*
+     * Above fields used for comparing
+     */
+
+    /* size of target code for this block (1 <= size <= TARGET_PAGE_SIZE) */
+    uint16_t size;
+    uint16_t icount;
+
     struct tb_tc tc;
 
     /* first and second physical page containing code. The lower bit
@@ -510,8 +513,6 @@ struct TranslationBlock {
     uintptr_t jmp_dest[2];
 };
 
-extern bool parallel_cpus;
-
 /* Hide the qatomic_read to make code a little easier on the eyes */
 static inline uint32_t tb_cflags(const TranslationBlock *tb)
 {
@@ -519,10 +520,9 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb)
 }
 
 /* current cflags for hashing/comparison */
-static inline uint32_t curr_cflags(void)
+static inline uint32_t curr_cflags(CPUState *cpu)
 {
-    return (parallel_cpus ? CF_PARALLEL : 0)
-         | (icount_enabled() ? CF_USE_ICOUNT : 0);
+    return cpu->tcg_cflags;
 }
 
 /* TranslationBlock invalidate API */
@@ -536,7 +536,7 @@ void tb_flush(CPUState *cpu);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
                                    target_ulong cs_base, uint32_t flags,
-                                   uint32_t cf_mask);
+                                   uint32_t cflags);
 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
 
 /* GETPC is the true target of the return instruction that we'll execute.  */
diff --git a/include/exec/poison.h b/include/exec/poison.h
index d7ae1f23e7..6bb86f6c2f 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -10,6 +10,7 @@
 #pragma GCC poison TARGET_ALPHA
 #pragma GCC poison TARGET_ARM
 #pragma GCC poison TARGET_CRIS
+#pragma GCC poison TARGET_HEXAGON
 #pragma GCC poison TARGET_HPPA
 #pragma GCC poison TARGET_LM32
 #pragma GCC poison TARGET_M68K
@@ -73,6 +74,7 @@
 #pragma GCC poison CONFIG_CRIS_DIS
 #pragma GCC poison CONFIG_HPPA_DIS
 #pragma GCC poison CONFIG_I386_DIS
+#pragma GCC poison CONFIG_HEXAGON_DIS
 #pragma GCC poison CONFIG_LM32_DIS
 #pragma GCC poison CONFIG_M68K_DIS
 #pragma GCC poison CONFIG_MICROBLAZE_DIS
diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h
index 9cf475bb03..29d61ceb34 100644
--- a/include/exec/tb-lookup.h
+++ b/include/exec/tb-lookup.h
@@ -17,30 +17,28 @@
 #include "exec/tb-hash.h"
 
 /* Might cause an exception, so have a longjmp destination ready */
-static inline TranslationBlock *
-tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
-                     uint32_t *flags, uint32_t cf_mask)
+static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
+                                          target_ulong cs_base,
+                                          uint32_t flags, uint32_t cflags)
 {
-    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
     uint32_t hash;
 
-    cpu_get_tb_cpu_state(env, pc, cs_base, flags);
-    hash = tb_jmp_cache_hash_func(*pc);
-    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
+    /* we should never be trying to look up an INVALID tb */
+    tcg_debug_assert(!(cflags & CF_INVALID));
 
-    cf_mask &= ~CF_CLUSTER_MASK;
-    cf_mask |= cpu->cluster_index << CF_CLUSTER_SHIFT;
+    hash = tb_jmp_cache_hash_func(pc);
+    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
 
     if (likely(tb &&
-               tb->pc == *pc &&
-               tb->cs_base == *cs_base &&
-               tb->flags == *flags &&
+               tb->pc == pc &&
+               tb->cs_base == cs_base &&
+               tb->flags == flags &&
                tb->trace_vcpu_dstate == *cpu->trace_dstate &&
-               (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) {
+               tb_cflags(tb) == cflags)) {
         return tb;
     }
-    tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask);
+    tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
     if (tb == NULL) {
         return NULL;
     }
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index c005d3dc2d..c68bc3ba8a 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -282,6 +282,7 @@ struct qemu_work_item;
  *   to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will
  *   be the same as the cluster-id property of the CPU object's TYPE_CPU_CLUSTER
  *   QOM parent.
+ * @tcg_cflags: Pre-computed cflags for this cpu.
  * @nr_cores: Number of cores within this CPU package.
  * @nr_threads: Number of threads within this CPU.
  * @running: #true if CPU is currently running (lockless).
@@ -412,6 +413,7 @@ struct CPUState {
     /* TODO Move common fields from CPUArchState here. */
     int cpu_index;
     int cluster_index;
+    uint32_t tcg_cflags;
     uint32_t halted;
     uint32_t can_do_io;
     int32_t exception_index;
diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h
index d8a6263c13..95088490aa 100644
--- a/include/hw/scsi/esp.h
+++ b/include/hw/scsi/esp.h
@@ -3,6 +3,7 @@
 
 #include "hw/scsi/scsi.h"
 #include "hw/sysbus.h"
+#include "qemu/fifo8.h"
 #include "qom/object.h"
 
 /* esp.c */
@@ -10,19 +11,17 @@
 typedef void (*ESPDMAMemoryReadWriteFunc)(void *opaque, uint8_t *buf, int len);
 
 #define ESP_REGS 16
-#define TI_BUFSZ 16
-#define ESP_CMDBUF_SZ 32
+#define ESP_FIFO_SZ 16
+#define ESP_CMDFIFO_SZ 32
 
 typedef struct ESPState ESPState;
 
-enum pdma_origin_id {
-    PDMA,
-    TI,
-    CMD,
-    ASYNC,
-};
+#define TYPE_ESP "esp"
+OBJECT_DECLARE_SIMPLE_TYPE(ESPState, ESP)
 
 struct ESPState {
+    DeviceState parent_obj;
+
     uint8_t rregs[ESP_REGS];
     uint8_t wregs[ESP_REGS];
     qemu_irq irq;
@@ -30,24 +29,18 @@ struct ESPState {
     uint8_t chip_id;
     bool tchi_written;
     int32_t ti_size;
-    uint32_t ti_rptr, ti_wptr;
     uint32_t status;
-    uint32_t deferred_status;
-    bool deferred_complete;
     uint32_t dma;
-    uint8_t ti_buf[TI_BUFSZ];
+    Fifo8 fifo;
     SCSIBus bus;
     SCSIDevice *current_dev;
     SCSIRequest *current_req;
-    uint8_t cmdbuf[ESP_CMDBUF_SZ];
-    uint32_t cmdlen;
+    Fifo8 cmdfifo;
+    uint8_t cmdfifo_cdb_offset;
     uint32_t do_cmd;
 
-    /* The amount of data left in the current DMA transfer.  */
-    uint32_t dma_left;
-    /* The size of the current DMA transfer.  Zero if no transfer is in
-       progress.  */
-    uint32_t dma_counter;
+    bool data_in_ready;
+    uint8_t ti_cmd;
     int dma_enabled;
 
     uint32_t async_len;
@@ -57,16 +50,22 @@ struct ESPState {
     ESPDMAMemoryReadWriteFunc dma_memory_write;
     void *dma_opaque;
     void (*dma_cb)(ESPState *s);
-    uint8_t pdma_buf[32];
-    int pdma_origin;
-    uint32_t pdma_len;
-    uint32_t pdma_start;
-    uint32_t pdma_cur;
     void (*pdma_cb)(ESPState *s);
+
+    uint8_t mig_version_id;
+
+    /* Legacy fields for vmstate_esp version < 5 */
+    uint32_t mig_dma_left;
+    uint32_t mig_deferred_status;
+    bool mig_deferred_complete;
+    uint32_t mig_ti_rptr, mig_ti_wptr;
+    uint8_t mig_ti_buf[ESP_FIFO_SZ];
+    uint8_t mig_cmdbuf[ESP_CMDFIFO_SZ];
+    uint32_t mig_cmdlen;
 };
 
-#define TYPE_ESP "esp"
-OBJECT_DECLARE_SIMPLE_TYPE(SysBusESPState, ESP)
+#define TYPE_SYSBUS_ESP "sysbus-esp"
+OBJECT_DECLARE_SIMPLE_TYPE(SysBusESPState, SYSBUS_ESP)
 
 struct SysBusESPState {
     /*< private >*/
@@ -142,6 +141,7 @@ struct SysBusESPState {
 #define INTR_RST 0x80
 
 #define SEQ_0 0x0
+#define SEQ_MO 0x1
 #define SEQ_CD 0x4
 
 #define CFG1_RESREPT 0x40
diff --git a/include/hw/sh4/sh.h b/include/hw/sh4/sh.h
index 93f464bf4c..becb596979 100644
--- a/include/hw/sh4/sh.h
+++ b/include/hw/sh4/sh.h
@@ -1,6 +1,31 @@
-#ifndef QEMU_SH_H
-#define QEMU_SH_H
-/* Definitions for SH board emulation.  */
+/*
+ * Definitions for SH board emulation
+ *
+ * Copyright (c) 2005 Samuel Tardieu
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef QEMU_HW_SH_H
+#define QEMU_HW_SH_H
 
 #include "hw/sh4/sh_intc.h"
 #include "target/sh4/cpu-qom.h"
diff --git a/linux-user/main.c b/linux-user/main.c
index 81f48ff54e..4f4746dce8 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -205,6 +205,7 @@ CPUArchState *cpu_copy(CPUArchState *env)
     /* Reset non arch specific state */
     cpu_reset(new_cpu);
 
+    new_cpu->tcg_cflags = cpu->tcg_cflags;
     memcpy(new_env, env, sizeof(CPUArchState));
 
     /* Clone all break/watchpoints.
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index cc89a48ff8..29c1ee30e6 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -82,9 +82,11 @@ static abi_ulong get_sigframe(struct target_sigaction *ka,
     return (sp - frame_size) & -8ul;
 }
 
-/* Notice when we're in the middle of a gUSA region and reset.
-   Note that this will only occur for !parallel_cpus, as we will
-   translate such sequences differently in a parallel context.  */
+/*
+ * Notice when we're in the middle of a gUSA region and reset.
+ * Note that this will only occur when #CF_PARALLEL is unset, as we
+ * will translate such sequences differently in a parallel context.
+ */
 static void unwind_gusa(CPUSH4State *regs)
 {
     /* If the stack pointer is sufficiently negative, and we haven't
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 389ec09764..9522f603aa 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6481,6 +6481,16 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         /* Grab a mutex so that thread setup appears atomic.  */
         pthread_mutex_lock(&clone_lock);
 
+        /*
+         * If this is our first additional thread, we need to ensure we
+         * generate code for parallel execution and flush old translations.
+         * Do this now so that the copy gets CF_PARALLEL too.
+         */
+        if (!(cpu->tcg_cflags & CF_PARALLEL)) {
+            cpu->tcg_cflags |= CF_PARALLEL;
+            tb_flush(cpu);
+        }
+
         /* we create a new CPU instance. */
         new_env = cpu_copy(env);
         /* Init regs that differ from the parent.  */
@@ -6521,14 +6531,6 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         sigprocmask(SIG_BLOCK, &sigmask, &info.sigmask);
         cpu->random_seed = qemu_guest_random_seed_thread_part1();
 
-        /* If this is our first additional thread, we need to ensure we
-         * generate code for parallel execution and flush old translations.
-         */
-        if (!parallel_cpus) {
-            parallel_cpus = true;
-            tb_flush(cpu);
-        }
-
         ret = pthread_create(&info.thread, &attr, clone_func, &info);
         /* TODO: Free new CPU state if thread creation failed.  */
 
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 19e0aa9836..7e8b0fab89 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -937,7 +937,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
                     cpu_loop_exit_restore(cpu, ra);
                 } else {
                     /* Force execution of one insn next time.  */
-                    cpu->cflags_next_tb = 1 | curr_cflags();
+                    cpu->cflags_next_tb = 1 | curr_cflags(cpu);
                     mmap_unlock();
                     if (ra) {
                         cpu_restore_state(cpu, ra, true);
diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c
index 9021a46b3a..23756fc8e5 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -59,6 +59,7 @@
 #include "sysemu/runstate.h"
 #include "trace/control.h"
 
+static const char *pid_file;
 static volatile bool exit_requested = false;
 
 void qemu_system_killed(int signal, pid_t pid)
@@ -115,6 +116,8 @@ static void help(void)
 "                         See the qemu(1) man page for documentation of the\n"
 "                         objects that can be added.\n"
 "\n"
+"  --pidfile <path>       write process ID to a file after startup\n"
+"\n"
 QEMU_HELP_BOTTOM "\n",
     error_get_progname());
 }
@@ -126,6 +129,7 @@ enum {
     OPTION_MONITOR,
     OPTION_NBD_SERVER,
     OPTION_OBJECT,
+    OPTION_PIDFILE,
 };
 
 extern QemuOptsList qemu_chardev_opts;
@@ -152,6 +156,20 @@ static void init_qmp_commands(void)
                          qmp_marshal_qmp_capabilities, QCO_ALLOW_PRECONFIG);
 }
 
+static int getopt_set_loc(int argc, char **argv, const char *optstring,
+                          const struct option *longopts)
+{
+    int c, save_index;
+
+    optarg = NULL;
+    save_index = optind;
+    c = getopt_long(argc, argv, optstring, longopts, NULL);
+    if (optarg) {
+        loc_set_cmdline(argv, save_index, MAX(1, optind - save_index));
+    }
+    return c;
+}
+
 static void process_options(int argc, char *argv[])
 {
     int c;
@@ -164,6 +182,7 @@ static void process_options(int argc, char *argv[])
         {"monitor", required_argument, NULL, OPTION_MONITOR},
         {"nbd-server", required_argument, NULL, OPTION_NBD_SERVER},
         {"object", required_argument, NULL, OPTION_OBJECT},
+        {"pidfile", required_argument, NULL, OPTION_PIDFILE},
         {"trace", required_argument, NULL, 'T'},
         {"version", no_argument, NULL, 'V'},
         {0, 0, 0, 0}
@@ -174,7 +193,7 @@ static void process_options(int argc, char *argv[])
      * they are given on the command lines. This means that things must be
      * defined first before they can be referenced in another option.
      */
-    while ((c = getopt_long(argc, argv, "hT:V", long_options, NULL)) != -1) {
+    while ((c = getopt_set_loc(argc, argv, "-hT:V", long_options)) != -1) {
         switch (c) {
         case '?':
             exit(EXIT_FAILURE);
@@ -275,14 +294,38 @@ static void process_options(int argc, char *argv[])
                 qobject_unref(args);
                 break;
             }
+        case OPTION_PIDFILE:
+            pid_file = optarg;
+            break;
+        case 1:
+            error_report("Unexpected argument");
+            exit(EXIT_FAILURE);
         default:
             g_assert_not_reached();
         }
     }
-    if (optind != argc) {
-        error_report("Unexpected argument: %s", argv[optind]);
+    loc_set_none();
+}
+
+static void pid_file_cleanup(void)
+{
+    unlink(pid_file);
+}
+
+static void pid_file_init(void)
+{
+    Error *err = NULL;
+
+    if (!pid_file) {
+        return;
+    }
+
+    if (!qemu_write_pidfile(pid_file, &err)) {
+        error_reportf_err(err, "cannot create PID file: ");
         exit(EXIT_FAILURE);
     }
+
+    atexit(pid_file_cleanup);
 }
 
 int main(int argc, char *argv[])
@@ -312,6 +355,13 @@ int main(int argc, char *argv[])
     qemu_init_main_loop(&error_fatal);
     process_options(argc, argv);
 
+    /*
+     * Write the pid file after creating chardevs, exports, and NBD servers but
+     * before accepting connections. This ordering is documented. Do not change
+     * it.
+     */
+    pid_file_init();
+
     while (!exit_requested) {
         main_loop_wait(false);
     }
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 78c4efb5cb..cfcb8173ba 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -459,7 +459,7 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
                    : (fCAST##REGSTYPE##s(SRC) >> (SHAMT)))
 #define fASHIFTR(SRC, SHAMT, REGSTYPE) (fCAST##REGSTYPE##s(SRC) >> (SHAMT))
 #define fLSHIFTR(SRC, SHAMT, REGSTYPE) \
-    (((SHAMT) >= 64) ? 0 : (fCAST##REGSTYPE##u(SRC) >> (SHAMT)))
+    (((SHAMT) >= (sizeof(SRC) * 8)) ? 0 : (fCAST##REGSTYPE##u(SRC) >> (SHAMT)))
 #define fROTL(SRC, SHAMT, REGSTYPE) \
     (((SHAMT) == 0) ? (SRC) : ((fCAST##REGSTYPE##u(SRC) << (SHAMT)) | \
                               ((fCAST##REGSTYPE##u(SRC) >> \
@@ -469,7 +469,7 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
                               ((fCAST##REGSTYPE##u(SRC) << \
                                  ((sizeof(SRC) * 8) - (SHAMT))))))
 #define fASHIFTL(SRC, SHAMT, REGSTYPE) \
-    (((SHAMT) >= 64) ? 0 : (fCAST##REGSTYPE##s(SRC) << (SHAMT)))
+    (((SHAMT) >= (sizeof(SRC) * 8)) ? 0 : (fCAST##REGSTYPE##s(SRC) << (SHAMT)))
 
 #ifdef QEMU_GENERATE
 #define fLOAD(NUM, SIZE, SIGN, EA, DST) MEM_LOAD##SIZE##SIGN(DST, EA)
diff --git a/target/hexagon/opcodes.c b/target/hexagon/opcodes.c
index 4eef5fc40f..35d790cdd5 100644
--- a/target/hexagon/opcodes.c
+++ b/target/hexagon/opcodes.c
@@ -82,6 +82,7 @@ static void init_attribs(int tag, ...)
     while ((attr = va_arg(ap, int)) != 0) {
         set_bit(attr, opcode_attribs[tag]);
     }
+    va_end(ap);
 }
 
 const OpcodeEncoding opcode_encodings[] = {
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 714e3b5641..01c4344082 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -271,17 +271,6 @@ typedef SuperHCPU ArchCPU;
 
 #include "exec/cpu-all.h"
 
-/* Memory access type */
-enum {
-    /* Privilege */
-    ACCESS_PRIV = 0x01,
-    /* Direction */
-    ACCESS_WRITE = 0x02,
-    /* Type of instruction */
-    ACCESS_CODE = 0x10,
-    ACCESS_INT = 0x20
-};
-
 /* MMU control register */
 #define MMUCR    0x1F000010
 #define MMUCR_AT (1<<0)
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 408478ce5d..bd8e034f17 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -330,22 +330,22 @@ static int find_utlb_entry(CPUSH4State * env, target_ulong address, int use_asid
    MMU_IADDR_ERROR, MMU_DADDR_ERROR_READ, MMU_DADDR_ERROR_WRITE.
 */
 static int get_mmu_address(CPUSH4State * env, target_ulong * physical,
-			   int *prot, target_ulong address,
-			   int rw, int access_type)
+                           int *prot, target_ulong address,
+                           MMUAccessType access_type)
 {
     int use_asid, n;
     tlb_t *matching = NULL;
 
     use_asid = !(env->mmucr & MMUCR_SV) || !(env->sr & (1u << SR_MD));
 
-    if (rw == 2) {
+    if (access_type == MMU_INST_FETCH) {
         n = find_itlb_entry(env, address, use_asid);
-	if (n >= 0) {
-	    matching = &env->itlb[n];
+        if (n >= 0) {
+            matching = &env->itlb[n];
             if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) {
-		n = MMU_ITLB_VIOLATION;
+                n = MMU_ITLB_VIOLATION;
             } else {
-		*prot = PAGE_EXEC;
+                *prot = PAGE_EXEC;
             }
         } else {
             n = find_utlb_entry(env, address, use_asid);
@@ -365,17 +365,17 @@ static int get_mmu_address(CPUSH4State * env, target_ulong * physical,
             } else if (n == MMU_DTLB_MISS) {
                 n = MMU_ITLB_MISS;
             }
-	}
+        }
     } else {
-	n = find_utlb_entry(env, address, use_asid);
-	if (n >= 0) {
-	    matching = &env->utlb[n];
+        n = find_utlb_entry(env, address, use_asid);
+        if (n >= 0) {
+            matching = &env->utlb[n];
             if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) {
-                n = (rw == 1) ? MMU_DTLB_VIOLATION_WRITE :
-                    MMU_DTLB_VIOLATION_READ;
-            } else if ((rw == 1) && !(matching->pr & 1)) {
+                n = (access_type == MMU_DATA_STORE)
+                    ? MMU_DTLB_VIOLATION_WRITE : MMU_DTLB_VIOLATION_READ;
+            } else if ((access_type == MMU_DATA_STORE) && !(matching->pr & 1)) {
                 n = MMU_DTLB_VIOLATION_WRITE;
-            } else if ((rw == 1) && !matching->d) {
+            } else if ((access_type == MMU_DATA_STORE) && !matching->d) {
                 n = MMU_DTLB_INITIAL_WRITE;
             } else {
                 *prot = PAGE_READ;
@@ -383,56 +383,56 @@ static int get_mmu_address(CPUSH4State * env, target_ulong * physical,
                     *prot |= PAGE_WRITE;
                 }
             }
-	} else if (n == MMU_DTLB_MISS) {
-	    n = (rw == 1) ? MMU_DTLB_MISS_WRITE :
-		MMU_DTLB_MISS_READ;
-	}
+        } else if (n == MMU_DTLB_MISS) {
+            n = (access_type == MMU_DATA_STORE)
+                ? MMU_DTLB_MISS_WRITE : MMU_DTLB_MISS_READ;
+        }
     }
     if (n >= 0) {
-	n = MMU_OK;
-	*physical = ((matching->ppn << 10) & ~(matching->size - 1)) |
-	    (address & (matching->size - 1));
+        n = MMU_OK;
+        *physical = ((matching->ppn << 10) & ~(matching->size - 1))
+                    | (address & (matching->size - 1));
     }
     return n;
 }
 
 static int get_physical_address(CPUSH4State * env, target_ulong * physical,
                                 int *prot, target_ulong address,
-                                int rw, int access_type)
+                                MMUAccessType access_type)
 {
     /* P1, P2 and P4 areas do not use translation */
-    if ((address >= 0x80000000 && address < 0xc0000000) ||
-	address >= 0xe0000000) {
+    if ((address >= 0x80000000 && address < 0xc0000000) || address >= 0xe0000000) {
         if (!(env->sr & (1u << SR_MD))
-	    && (address < 0xe0000000 || address >= 0xe4000000)) {
-	    /* Unauthorized access in user mode (only store queues are available) */
+                && (address < 0xe0000000 || address >= 0xe4000000)) {
+            /* Unauthorized access in user mode (only store queues are available) */
             qemu_log_mask(LOG_GUEST_ERROR, "Unauthorized access\n");
-	    if (rw == 0)
-		return MMU_DADDR_ERROR_READ;
-	    else if (rw == 1)
-		return MMU_DADDR_ERROR_WRITE;
-	    else
-		return MMU_IADDR_ERROR;
-	}
-	if (address >= 0x80000000 && address < 0xc0000000) {
-	    /* Mask upper 3 bits for P1 and P2 areas */
-	    *physical = address & 0x1fffffff;
-	} else {
-	    *physical = address;
-	}
-	*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-	return MMU_OK;
+            if (access_type == MMU_DATA_LOAD) {
+                return MMU_DADDR_ERROR_READ;
+            } else if (access_type == MMU_DATA_STORE) {
+                return MMU_DADDR_ERROR_WRITE;
+            } else {
+                return MMU_IADDR_ERROR;
+            }
+        }
+        if (address >= 0x80000000 && address < 0xc0000000) {
+            /* Mask upper 3 bits for P1 and P2 areas */
+            *physical = address & 0x1fffffff;
+        } else {
+            *physical = address;
+        }
+        *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        return MMU_OK;
     }
 
     /* If MMU is disabled, return the corresponding physical page */
     if (!(env->mmucr & MMUCR_AT)) {
-	*physical = address & 0x1FFFFFFF;
-	*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-	return MMU_OK;
+        *physical = address & 0x1FFFFFFF;
+        *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        return MMU_OK;
     }
 
     /* We need to resort to the MMU */
-    return get_mmu_address(env, physical, prot, address, rw, access_type);
+    return get_mmu_address(env, physical, prot, address, access_type);
 }
 
 hwaddr superh_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
@@ -441,7 +441,8 @@ hwaddr superh_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     target_ulong physical;
     int prot;
 
-    get_physical_address(&cpu->env, &physical, &prot, addr, 0, 0);
+    get_physical_address(&cpu->env, &physical, &prot, addr, MMU_DATA_LOAD);
+
     return physical;
 }
 
@@ -813,11 +814,9 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
            MMU_DTLB_VIOLATION_READ);
 #else
     target_ulong physical;
-    int prot, sh_access_type;
+    int prot;
 
-    sh_access_type = ACCESS_INT;
-    ret = get_physical_address(env, &physical, &prot, address,
-                               access_type, sh_access_type);
+    ret = get_physical_address(env, &physical, &prot, address, access_type);
 
     if (ret == MMU_OK) {
         address &= TARGET_PAGE_MASK;
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 1376cdc404..fcaa5aface 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -519,6 +519,39 @@ typedef enum {
     I3606_BIC       = 0x2f001400,
     I3606_ORR       = 0x0f001400,
 
+    /* AdvSIMD scalar shift by immediate */
+    I3609_SSHR      = 0x5f000400,
+    I3609_SSRA      = 0x5f001400,
+    I3609_SHL       = 0x5f005400,
+    I3609_USHR      = 0x7f000400,
+    I3609_USRA      = 0x7f001400,
+    I3609_SLI       = 0x7f005400,
+
+    /* AdvSIMD scalar three same */
+    I3611_SQADD     = 0x5e200c00,
+    I3611_SQSUB     = 0x5e202c00,
+    I3611_CMGT      = 0x5e203400,
+    I3611_CMGE      = 0x5e203c00,
+    I3611_SSHL      = 0x5e204400,
+    I3611_ADD       = 0x5e208400,
+    I3611_CMTST     = 0x5e208c00,
+    I3611_UQADD     = 0x7e200c00,
+    I3611_UQSUB     = 0x7e202c00,
+    I3611_CMHI      = 0x7e203400,
+    I3611_CMHS      = 0x7e203c00,
+    I3611_USHL      = 0x7e204400,
+    I3611_SUB       = 0x7e208400,
+    I3611_CMEQ      = 0x7e208c00,
+
+    /* AdvSIMD scalar two-reg misc */
+    I3612_CMGT0     = 0x5e208800,
+    I3612_CMEQ0     = 0x5e209800,
+    I3612_CMLT0     = 0x5e20a800,
+    I3612_ABS       = 0x5e20b800,
+    I3612_CMGE0     = 0x7e208800,
+    I3612_CMLE0     = 0x7e209800,
+    I3612_NEG       = 0x7e20b800,
+
     /* AdvSIMD shift by immediate */
     I3614_SSHR      = 0x0f000400,
     I3614_SSRA      = 0x0f001400,
@@ -561,7 +594,7 @@ typedef enum {
     I3617_CMEQ0     = 0x0e209800,
     I3617_CMLT0     = 0x0e20a800,
     I3617_CMGE0     = 0x2e208800,
-    I3617_CMLE0     = 0x2e20a800,
+    I3617_CMLE0     = 0x2e209800,
     I3617_NOT       = 0x2e205800,
     I3617_ABS       = 0x0e20b800,
     I3617_NEG       = 0x2e20b800,
@@ -735,6 +768,25 @@ static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
               | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
 }
 
+static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
+                              TCGReg rd, TCGReg rn, unsigned immhb)
+{
+    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
+                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
+{
+    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
+              | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
+                              unsigned size, TCGReg rd, TCGReg rn)
+{
+    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
                               TCGReg rd, TCGReg rn, unsigned immhb)
 {
@@ -1410,10 +1462,10 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
     }
 }
 
-static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
-                                   TCGReg rh, TCGReg al, TCGReg ah,
-                                   tcg_target_long bl, tcg_target_long bh,
-                                   bool const_bl, bool const_bh, bool sub)
+static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
+                            TCGReg rh, TCGReg al, TCGReg ah,
+                            tcg_target_long bl, tcg_target_long bh,
+                            bool const_bl, bool const_bh, bool sub)
 {
     TCGReg orig_rl = rl;
     AArch64Insn insn;
@@ -1423,11 +1475,13 @@ static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
     }
 
     if (const_bl) {
-        insn = I3401_ADDSI;
-        if ((bl < 0) ^ sub) {
-            insn = I3401_SUBSI;
+        if (bl < 0) {
             bl = -bl;
+            insn = sub ? I3401_ADDSI : I3401_SUBSI;
+        } else {
+            insn = sub ? I3401_SUBSI : I3401_ADDSI;
         }
+
         if (unlikely(al == TCG_REG_XZR)) {
             /* ??? We want to allow al to be zero for the benefit of
                negation via subtraction.  However, that leaves open the
@@ -2234,23 +2288,38 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
                            unsigned vecl, unsigned vece,
                            const TCGArg *args, const int *const_args)
 {
-    static const AArch64Insn cmp_insn[16] = {
+    static const AArch64Insn cmp_vec_insn[16] = {
         [TCG_COND_EQ] = I3616_CMEQ,
         [TCG_COND_GT] = I3616_CMGT,
         [TCG_COND_GE] = I3616_CMGE,
         [TCG_COND_GTU] = I3616_CMHI,
         [TCG_COND_GEU] = I3616_CMHS,
     };
-    static const AArch64Insn cmp0_insn[16] = {
+    static const AArch64Insn cmp_scalar_insn[16] = {
+        [TCG_COND_EQ] = I3611_CMEQ,
+        [TCG_COND_GT] = I3611_CMGT,
+        [TCG_COND_GE] = I3611_CMGE,
+        [TCG_COND_GTU] = I3611_CMHI,
+        [TCG_COND_GEU] = I3611_CMHS,
+    };
+    static const AArch64Insn cmp0_vec_insn[16] = {
         [TCG_COND_EQ] = I3617_CMEQ0,
         [TCG_COND_GT] = I3617_CMGT0,
         [TCG_COND_GE] = I3617_CMGE0,
         [TCG_COND_LT] = I3617_CMLT0,
         [TCG_COND_LE] = I3617_CMLE0,
     };
+    static const AArch64Insn cmp0_scalar_insn[16] = {
+        [TCG_COND_EQ] = I3612_CMEQ0,
+        [TCG_COND_GT] = I3612_CMGT0,
+        [TCG_COND_GE] = I3612_CMGE0,
+        [TCG_COND_LT] = I3612_CMLT0,
+        [TCG_COND_LE] = I3612_CMLE0,
+    };
 
     TCGType type = vecl + TCG_TYPE_V64;
     unsigned is_q = vecl;
+    bool is_scalar = !is_q && vece == MO_64;
     TCGArg a0, a1, a2, a3;
     int cmode, imm8;
 
@@ -2269,19 +2338,35 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
         break;
     case INDEX_op_add_vec:
-        tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_sub_vec:
-        tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_mul_vec:
         tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
         break;
     case INDEX_op_neg_vec:
-        tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
+        if (is_scalar) {
+            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
+        } else {
+            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
+        }
         break;
     case INDEX_op_abs_vec:
-        tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
+        if (is_scalar) {
+            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
+        } else {
+            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
+        }
         break;
     case INDEX_op_and_vec:
         if (const_args[2]) {
@@ -2335,16 +2420,32 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
         break;
     case INDEX_op_ssadd_vec:
-        tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_sssub_vec:
-        tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_usadd_vec:
-        tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_ussub_vec:
-        tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_smax_vec:
         tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
@@ -2362,22 +2463,46 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
         break;
     case INDEX_op_shli_vec:
-        tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
+        if (is_scalar) {
+            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
+        } else {
+            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
+        }
         break;
     case INDEX_op_shri_vec:
-        tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
+        } else {
+            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
+        }
         break;
     case INDEX_op_sari_vec:
-        tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
+        } else {
+            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
+        }
         break;
     case INDEX_op_aa64_sli_vec:
-        tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
+        if (is_scalar) {
+            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
+        } else {
+            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
+        }
         break;
     case INDEX_op_shlv_vec:
-        tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_aa64_sshl_vec:
-        tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_cmp_vec:
         {
@@ -2386,30 +2511,58 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 
             if (cond == TCG_COND_NE) {
                 if (const_args[2]) {
-                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
+                    if (is_scalar) {
+                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
+                    } else {
+                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
+                    }
                 } else {
-                    tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
+                    if (is_scalar) {
+                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
+                    } else {
+                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
+                    }
                     tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
                 }
             } else {
                 if (const_args[2]) {
-                    insn = cmp0_insn[cond];
-                    if (insn) {
-                        tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
-                        break;
+                    if (is_scalar) {
+                        insn = cmp0_scalar_insn[cond];
+                        if (insn) {
+                            tcg_out_insn_3612(s, insn, vece, a0, a1);
+                            break;
+                        }
+                    } else {
+                        insn = cmp0_vec_insn[cond];
+                        if (insn) {
+                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
+                            break;
+                        }
                     }
                     tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
                     a2 = TCG_VEC_TMP;
                 }
-                insn = cmp_insn[cond];
-                if (insn == 0) {
-                    TCGArg t;
-                    t = a1, a1 = a2, a2 = t;
-                    cond = tcg_swap_cond(cond);
-                    insn = cmp_insn[cond];
-                    tcg_debug_assert(insn != 0);
+                if (is_scalar) {
+                    insn = cmp_scalar_insn[cond];
+                    if (insn == 0) {
+                        TCGArg t;
+                        t = a1, a1 = a2, a2 = t;
+                        cond = tcg_swap_cond(cond);
+                        insn = cmp_scalar_insn[cond];
+                        tcg_debug_assert(insn != 0);
+                    }
+                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
+                } else {
+                    insn = cmp_vec_insn[cond];
+                    if (insn == 0) {
+                        TCGArg t;
+                        t = a1, a1 = a2, a2 = t;
+                        cond = tcg_swap_cond(cond);
+                        insn = cmp_vec_insn[cond];
+                        tcg_debug_assert(insn != 0);
+                    }
+                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
                 }
-                tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
             }
         }
         break;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 63a12b197b..2991112829 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -346,6 +346,12 @@ static void set_jmp_reset_offset(TCGContext *s, int which)
     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 }
 
+/* Signal overflow, starting over with fewer guest insns. */
+static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
+{
+    siglongjmp(s->jmp_trans, -2);
+}
+
 #define C_PFX1(P, A)                    P##A
 #define C_PFX2(P, A, B)                 P##A##_##B
 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
@@ -507,11 +513,21 @@ static void tcg_region_trees_init(void)
     }
 }
 
-static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
+static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
 {
-    void *p = tcg_splitwx_to_rw(cp);
     size_t region_idx;
 
+    /*
+     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
+     * a signal handler over which the caller has no control.
+     */
+    if (!in_code_gen_buffer(p)) {
+        p -= tcg_splitwx_diff;
+        if (!in_code_gen_buffer(p)) {
+            return NULL;
+        }
+    }
+
     if (p < region.start_aligned) {
         region_idx = 0;
     } else {
@@ -530,6 +546,7 @@ void tcg_tb_insert(TranslationBlock *tb)
 {
     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 
+    g_assert(rt != NULL);
     qemu_mutex_lock(&rt->lock);
     g_tree_insert(rt->tree, &tb->tc, tb);
     qemu_mutex_unlock(&rt->lock);
@@ -539,6 +556,7 @@ void tcg_tb_remove(TranslationBlock *tb)
 {
     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 
+    g_assert(rt != NULL);
     qemu_mutex_lock(&rt->lock);
     g_tree_remove(rt->tree, &tb->tc);
     qemu_mutex_unlock(&rt->lock);
@@ -555,6 +573,10 @@ TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
     TranslationBlock *tb;
     struct tb_tc s = { .ptr = (void *)tc_ptr };
 
+    if (rt == NULL) {
+        return NULL;
+    }
+
     qemu_mutex_lock(&rt->lock);
     tb = g_tree_lookup(rt->tree, &s);
     qemu_mutex_unlock(&rt->lock);
@@ -1310,8 +1332,7 @@ static TCGTemp *tcg_temp_alloc(TCGContext *s)
     int n = s->nb_temps++;
 
     if (n >= TCG_MAX_TEMPS) {
-        /* Signal overflow, starting over with fewer guest insns. */
-        siglongjmp(s->jmp_trans, -2);
+        tcg_raise_tb_overflow(s);
     }
     return memset(&s->temps[n], 0, sizeof(TCGTemp));
 }
diff --git a/tcg/tci.c b/tcg/tci.c
index fb3c97aaf1..3ccd30c39c 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -57,49 +57,6 @@ static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index)
     return regs[index];
 }
 
-#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
-static int8_t tci_read_reg8s(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (int8_t)tci_read_reg(regs, index);
-}
-#endif
-
-#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
-static int16_t tci_read_reg16s(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (int16_t)tci_read_reg(regs, index);
-}
-#endif
-
-#if TCG_TARGET_REG_BITS == 64
-static int32_t tci_read_reg32s(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (int32_t)tci_read_reg(regs, index);
-}
-#endif
-
-static uint8_t tci_read_reg8(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (uint8_t)tci_read_reg(regs, index);
-}
-
-static uint16_t tci_read_reg16(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (uint16_t)tci_read_reg(regs, index);
-}
-
-static uint32_t tci_read_reg32(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (uint32_t)tci_read_reg(regs, index);
-}
-
-#if TCG_TARGET_REG_BITS == 64
-static uint64_t tci_read_reg64(const tcg_target_ulong *regs, TCGReg index)
-{
-    return tci_read_reg(regs, index);
-}
-#endif
-
 static void
 tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value)
 {
@@ -169,78 +126,20 @@ tci_read_r(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
     return value;
 }
 
-/* Read indexed register (8 bit) from bytecode. */
-static uint8_t tci_read_r8(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
-{
-    uint8_t value = tci_read_reg8(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
-#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
-/* Read indexed register (8 bit signed) from bytecode. */
-static int8_t tci_read_r8s(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
-{
-    int8_t value = tci_read_reg8s(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-#endif
-
-/* Read indexed register (16 bit) from bytecode. */
-static uint16_t tci_read_r16(const tcg_target_ulong *regs,
-                             const uint8_t **tb_ptr)
-{
-    uint16_t value = tci_read_reg16(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
-#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
-/* Read indexed register (16 bit signed) from bytecode. */
-static int16_t tci_read_r16s(const tcg_target_ulong *regs,
-                             const uint8_t **tb_ptr)
-{
-    int16_t value = tci_read_reg16s(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-#endif
-
-/* Read indexed register (32 bit) from bytecode. */
-static uint32_t tci_read_r32(const tcg_target_ulong *regs,
-                             const uint8_t **tb_ptr)
-{
-    uint32_t value = tci_read_reg32(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
 #if TCG_TARGET_REG_BITS == 32
 /* Read two indexed registers (2 * 32 bit) from bytecode. */
 static uint64_t tci_read_r64(const tcg_target_ulong *regs,
                              const uint8_t **tb_ptr)
 {
-    uint32_t low = tci_read_r32(regs, tb_ptr);
-    return tci_uint64(tci_read_r32(regs, tb_ptr), low);
+    uint32_t low = tci_read_r(regs, tb_ptr);
+    return tci_uint64(tci_read_r(regs, tb_ptr), low);
 }
 #elif TCG_TARGET_REG_BITS == 64
-/* Read indexed register (32 bit signed) from bytecode. */
-static int32_t tci_read_r32s(const tcg_target_ulong *regs,
-                             const uint8_t **tb_ptr)
-{
-    int32_t value = tci_read_reg32s(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
 /* Read indexed register (64 bit) from bytecode. */
 static uint64_t tci_read_r64(const tcg_target_ulong *regs,
                              const uint8_t **tb_ptr)
 {
-    uint64_t value = tci_read_reg64(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
+    return tci_read_r(regs, tb_ptr);
 }
 #endif
 
@@ -346,51 +245,34 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
     return result;
 }
 
-#ifdef CONFIG_SOFTMMU
-# define qemu_ld_ub \
-    helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_leuw \
-    helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_leul \
-    helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_leq \
-    helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_beuw \
-    helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_beul \
-    helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_beq \
-    helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_st_b(X) \
-    helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_lew(X) \
-    helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_lel(X) \
-    helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_leq(X) \
-    helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_bew(X) \
-    helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_bel(X) \
-    helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_beq(X) \
-    helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-#else
-# define qemu_ld_ub      ldub_p(g2h(taddr))
-# define qemu_ld_leuw    lduw_le_p(g2h(taddr))
-# define qemu_ld_leul    (uint32_t)ldl_le_p(g2h(taddr))
-# define qemu_ld_leq     ldq_le_p(g2h(taddr))
-# define qemu_ld_beuw    lduw_be_p(g2h(taddr))
-# define qemu_ld_beul    (uint32_t)ldl_be_p(g2h(taddr))
-# define qemu_ld_beq     ldq_be_p(g2h(taddr))
-# define qemu_st_b(X)    stb_p(g2h(taddr), X)
-# define qemu_st_lew(X)  stw_le_p(g2h(taddr), X)
-# define qemu_st_lel(X)  stl_le_p(g2h(taddr), X)
-# define qemu_st_leq(X)  stq_le_p(g2h(taddr), X)
-# define qemu_st_bew(X)  stw_be_p(g2h(taddr), X)
-# define qemu_st_bel(X)  stl_be_p(g2h(taddr), X)
-# define qemu_st_beq(X)  stq_be_p(g2h(taddr), X)
-#endif
+#define qemu_ld_ub \
+    cpu_ldub_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_leuw \
+    cpu_lduw_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_leul \
+    cpu_ldl_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_leq \
+    cpu_ldq_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_beuw \
+    cpu_lduw_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_beul \
+    cpu_ldl_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_beq \
+    cpu_ldq_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_b(X) \
+    cpu_stb_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_lew(X) \
+    cpu_stw_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_lel(X) \
+    cpu_stl_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_leq(X) \
+    cpu_stq_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_bew(X) \
+    cpu_stw_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_bel(X) \
+    cpu_stl_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_beq(X) \
+    cpu_stq_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
 
 #if TCG_TARGET_REG_BITS == 64
 # define CASE_32_64(x) \
@@ -483,8 +365,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             continue;
         case INDEX_op_setcond_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             condition = *tb_ptr++;
             tci_write_reg(regs, t0, tci_compare32(t1, t2, condition));
             break;
@@ -499,15 +381,15 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
 #elif TCG_TARGET_REG_BITS == 64
         case INDEX_op_setcond_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             condition = *tb_ptr++;
             tci_write_reg(regs, t0, tci_compare64(t1, t2, condition));
             break;
 #endif
-        case INDEX_op_mov_i32:
+        CASE_32_64(mov)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1);
             break;
         case INDEX_op_tci_movi_i32:
@@ -550,127 +432,130 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             tci_write_reg(regs, t0, *(uint32_t *)(t1 + t2));
             break;
         CASE_32_64(st8)
-            t0 = tci_read_r8(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
             t1 = tci_read_r(regs, &tb_ptr);
             t2 = tci_read_s32(&tb_ptr);
             *(uint8_t *)(t1 + t2) = t0;
             break;
         CASE_32_64(st16)
-            t0 = tci_read_r16(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
             t1 = tci_read_r(regs, &tb_ptr);
             t2 = tci_read_s32(&tb_ptr);
             *(uint16_t *)(t1 + t2) = t0;
             break;
         case INDEX_op_st_i32:
         CASE_64(st32)
-            t0 = tci_read_r32(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
             t1 = tci_read_r(regs, &tb_ptr);
             t2 = tci_read_s32(&tb_ptr);
             *(uint32_t *)(t1 + t2) = t0;
             break;
 
-            /* Arithmetic operations (32 bit). */
+            /* Arithmetic operations (mixed 32/64 bit). */
 
-        case INDEX_op_add_i32:
+        CASE_32_64(add)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1 + t2);
             break;
-        case INDEX_op_sub_i32:
+        CASE_32_64(sub)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1 - t2);
             break;
-        case INDEX_op_mul_i32:
+        CASE_32_64(mul)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1 * t2);
             break;
-        case INDEX_op_div_i32:
+        CASE_32_64(and)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, (int32_t)t1 / (int32_t)t2);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, t1 & t2);
             break;
-        case INDEX_op_divu_i32:
+        CASE_32_64(or)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 / t2);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, t1 | t2);
             break;
-        case INDEX_op_rem_i32:
+        CASE_32_64(xor)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, (int32_t)t1 % (int32_t)t2);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, t1 ^ t2);
             break;
-        case INDEX_op_remu_i32:
+
+            /* Arithmetic operations (32 bit). */
+
+        case INDEX_op_div_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 % t2);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (int32_t)t1 / (int32_t)t2);
             break;
-        case INDEX_op_and_i32:
+        case INDEX_op_divu_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 & t2);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint32_t)t1 / (uint32_t)t2);
             break;
-        case INDEX_op_or_i32:
+        case INDEX_op_rem_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 | t2);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (int32_t)t1 % (int32_t)t2);
             break;
-        case INDEX_op_xor_i32:
+        case INDEX_op_remu_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 ^ t2);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint32_t)t1 % (uint32_t)t2);
             break;
 
             /* Shift/rotate operations (32 bit). */
 
         case INDEX_op_shl_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 << (t2 & 31));
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint32_t)t1 << (t2 & 31));
             break;
         case INDEX_op_shr_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 >> (t2 & 31));
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint32_t)t1 >> (t2 & 31));
             break;
         case INDEX_op_sar_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, ((int32_t)t1 >> (t2 & 31)));
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (int32_t)t1 >> (t2 & 31));
             break;
 #if TCG_TARGET_HAS_rot_i32
         case INDEX_op_rotl_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, rol32(t1, t2 & 31));
             break;
         case INDEX_op_rotr_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, ror32(t1, t2 & 31));
             break;
 #endif
 #if TCG_TARGET_HAS_deposit_i32
         case INDEX_op_deposit_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tmp16 = *tb_ptr++;
             tmp8 = *tb_ptr++;
             tmp32 = (((1 << tmp8) - 1) << tmp16);
@@ -678,8 +563,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             break;
 #endif
         case INDEX_op_brcond_i32:
-            t0 = tci_read_r32(regs, &tb_ptr);
-            t1 = tci_read_r32(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             condition = *tb_ptr++;
             label = tci_read_label(&tb_ptr);
             if (tci_compare32(t0, t1, condition)) {
@@ -717,73 +602,68 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
         case INDEX_op_mulu2_i32:
             t0 = *tb_ptr++;
             t1 = *tb_ptr++;
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tmp64 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg64(regs, t1, t0, t2 * tmp64);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tmp64 = (uint32_t)tci_read_r(regs, &tb_ptr);
+            tci_write_reg64(regs, t1, t0, (uint32_t)t2 * tmp64);
             break;
 #endif /* TCG_TARGET_REG_BITS == 32 */
-#if TCG_TARGET_HAS_ext8s_i32
-        case INDEX_op_ext8s_i32:
+#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
+        CASE_32_64(ext8s)
             t0 = *tb_ptr++;
-            t1 = tci_read_r8s(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (int8_t)t1);
             break;
 #endif
-#if TCG_TARGET_HAS_ext16s_i32
-        case INDEX_op_ext16s_i32:
+#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
+        CASE_32_64(ext16s)
             t0 = *tb_ptr++;
-            t1 = tci_read_r16s(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (int16_t)t1);
             break;
 #endif
-#if TCG_TARGET_HAS_ext8u_i32
-        case INDEX_op_ext8u_i32:
+#if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64
+        CASE_32_64(ext8u)
             t0 = *tb_ptr++;
-            t1 = tci_read_r8(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint8_t)t1);
             break;
 #endif
-#if TCG_TARGET_HAS_ext16u_i32
-        case INDEX_op_ext16u_i32:
+#if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64
+        CASE_32_64(ext16u)
             t0 = *tb_ptr++;
-            t1 = tci_read_r16(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint16_t)t1);
             break;
 #endif
-#if TCG_TARGET_HAS_bswap16_i32
-        case INDEX_op_bswap16_i32:
+#if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64
+        CASE_32_64(bswap16)
             t0 = *tb_ptr++;
-            t1 = tci_read_r16(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, bswap16(t1));
             break;
 #endif
-#if TCG_TARGET_HAS_bswap32_i32
-        case INDEX_op_bswap32_i32:
+#if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64
+        CASE_32_64(bswap32)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, bswap32(t1));
             break;
 #endif
-#if TCG_TARGET_HAS_not_i32
-        case INDEX_op_not_i32:
+#if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64
+        CASE_32_64(not)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, ~t1);
             break;
 #endif
-#if TCG_TARGET_HAS_neg_i32
-        case INDEX_op_neg_i32:
+#if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64
+        CASE_32_64(neg)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, -t1);
             break;
 #endif
 #if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_mov_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
         case INDEX_op_tci_movi_i64:
             t0 = *tb_ptr++;
             t1 = tci_read_i64(&tb_ptr);
@@ -805,7 +685,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             tci_write_reg(regs, t0, *(uint64_t *)(t1 + t2));
             break;
         case INDEX_op_st_i64:
-            t0 = tci_read_r64(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
             t1 = tci_read_r(regs, &tb_ptr);
             t2 = tci_read_s32(&tb_ptr);
             *(uint64_t *)(t1 + t2) = t0;
@@ -813,106 +693,70 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
 
             /* Arithmetic operations (64 bit). */
 
-        case INDEX_op_add_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 + t2);
-            break;
-        case INDEX_op_sub_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 - t2);
-            break;
-        case INDEX_op_mul_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 * t2);
-            break;
         case INDEX_op_div_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, (int64_t)t1 / (int64_t)t2);
             break;
         case INDEX_op_divu_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, (uint64_t)t1 / (uint64_t)t2);
             break;
         case INDEX_op_rem_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, (int64_t)t1 % (int64_t)t2);
             break;
         case INDEX_op_remu_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, (uint64_t)t1 % (uint64_t)t2);
             break;
-        case INDEX_op_and_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 & t2);
-            break;
-        case INDEX_op_or_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 | t2);
-            break;
-        case INDEX_op_xor_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 ^ t2);
-            break;
 
             /* Shift/rotate operations (64 bit). */
 
         case INDEX_op_shl_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1 << (t2 & 63));
             break;
         case INDEX_op_shr_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1 >> (t2 & 63));
             break;
         case INDEX_op_sar_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, ((int64_t)t1 >> (t2 & 63)));
             break;
 #if TCG_TARGET_HAS_rot_i64
         case INDEX_op_rotl_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, rol64(t1, t2 & 63));
             break;
         case INDEX_op_rotr_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, ror64(t1, t2 & 63));
             break;
 #endif
 #if TCG_TARGET_HAS_deposit_i64
         case INDEX_op_deposit_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tmp16 = *tb_ptr++;
             tmp8 = *tb_ptr++;
             tmp64 = (((1ULL << tmp8) - 1) << tmp16);
@@ -920,8 +764,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             break;
 #endif
         case INDEX_op_brcond_i64:
-            t0 = tci_read_r64(regs, &tb_ptr);
-            t1 = tci_read_r64(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             condition = *tb_ptr++;
             label = tci_read_label(&tb_ptr);
             if (tci_compare64(t0, t1, condition)) {
@@ -930,85 +774,29 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                 continue;
             }
             break;
-#if TCG_TARGET_HAS_ext8u_i64
-        case INDEX_op_ext8u_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r8(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext8s_i64
-        case INDEX_op_ext8s_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r8s(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext16s_i64
-        case INDEX_op_ext16s_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16s(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext16u_i64
-        case INDEX_op_ext16u_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
-#endif
 #if TCG_TARGET_HAS_ext32s_i64
         case INDEX_op_ext32s_i64:
 #endif
         case INDEX_op_ext_i32_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32s(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (int32_t)t1);
             break;
 #if TCG_TARGET_HAS_ext32u_i64
         case INDEX_op_ext32u_i64:
 #endif
         case INDEX_op_extu_i32_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
-#if TCG_TARGET_HAS_bswap16_i64
-        case INDEX_op_bswap16_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16(regs, &tb_ptr);
-            tci_write_reg(regs, t0, bswap16(t1));
-            break;
-#endif
-#if TCG_TARGET_HAS_bswap32_i64
-        case INDEX_op_bswap32_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, bswap32(t1));
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint32_t)t1);
             break;
-#endif
 #if TCG_TARGET_HAS_bswap64_i64
         case INDEX_op_bswap64_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, bswap64(t1));
             break;
 #endif
-#if TCG_TARGET_HAS_not_i64
-        case INDEX_op_not_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, ~t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_neg_i64
-        case INDEX_op_neg_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, -t1);
-            break;
-#endif
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
             /* QEMU specific operations. */
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index feac4659cc..c79f9c32d8 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -380,6 +380,18 @@ static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
     old_code_ptr[1] = s->code_ptr - old_code_ptr;
 }
 
+#if TCG_TARGET_REG_BITS == 64
+# define CASE_32_64(x) \
+        case glue(glue(INDEX_op_, x), _i64): \
+        case glue(glue(INDEX_op_, x), _i32):
+# define CASE_64(x) \
+        case glue(glue(INDEX_op_, x), _i64):
+#else
+# define CASE_32_64(x) \
+        case glue(glue(INDEX_op_, x), _i32):
+# define CASE_64(x)
+#endif
+
 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                        const int *const_args)
 {
@@ -391,6 +403,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_exit_tb:
         tcg_out64(s, args[0]);
         break;
+
     case INDEX_op_goto_tb:
         if (s->tb_jmp_insn_offset) {
             /* Direct jump method. */
@@ -404,15 +417,18 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         }
         set_jmp_reset_offset(s, args[0]);
         break;
+
     case INDEX_op_br:
         tci_out_label(s, arg_label(args[0]));
         break;
-    case INDEX_op_setcond_i32:
+
+    CASE_32_64(setcond)
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         tcg_out_r(s, args[2]);
         tcg_out8(s, args[3]);   /* condition */
         break;
+
 #if TCG_TARGET_REG_BITS == 32
     case INDEX_op_setcond2_i32:
         /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */
@@ -423,95 +439,54 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out_r(s, args[4]);
         tcg_out8(s, args[5]);   /* condition */
         break;
-#elif TCG_TARGET_REG_BITS == 64
-    case INDEX_op_setcond_i64:
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        tcg_out8(s, args[3]);   /* condition */
-        break;
 #endif
-    case INDEX_op_ld8u_i32:
-    case INDEX_op_ld8s_i32:
-    case INDEX_op_ld16u_i32:
-    case INDEX_op_ld16s_i32:
+
+    CASE_32_64(ld8u)
+    CASE_32_64(ld8s)
+    CASE_32_64(ld16u)
+    CASE_32_64(ld16s)
     case INDEX_op_ld_i32:
-    case INDEX_op_st8_i32:
-    case INDEX_op_st16_i32:
+    CASE_64(ld32u)
+    CASE_64(ld32s)
+    CASE_64(ld)
+    CASE_32_64(st8)
+    CASE_32_64(st16)
     case INDEX_op_st_i32:
-    case INDEX_op_ld8u_i64:
-    case INDEX_op_ld8s_i64:
-    case INDEX_op_ld16u_i64:
-    case INDEX_op_ld16s_i64:
-    case INDEX_op_ld32u_i64:
-    case INDEX_op_ld32s_i64:
-    case INDEX_op_ld_i64:
-    case INDEX_op_st8_i64:
-    case INDEX_op_st16_i64:
-    case INDEX_op_st32_i64:
-    case INDEX_op_st_i64:
+    CASE_64(st32)
+    CASE_64(st)
         stack_bounds_check(args[1], args[2]);
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         tcg_debug_assert(args[2] == (int32_t)args[2]);
         tcg_out32(s, args[2]);
         break;
-    case INDEX_op_add_i32:
-    case INDEX_op_sub_i32:
-    case INDEX_op_mul_i32:
-    case INDEX_op_and_i32:
-    case INDEX_op_andc_i32:     /* Optional (TCG_TARGET_HAS_andc_i32). */
-    case INDEX_op_eqv_i32:      /* Optional (TCG_TARGET_HAS_eqv_i32). */
-    case INDEX_op_nand_i32:     /* Optional (TCG_TARGET_HAS_nand_i32). */
-    case INDEX_op_nor_i32:      /* Optional (TCG_TARGET_HAS_nor_i32). */
-    case INDEX_op_or_i32:
-    case INDEX_op_orc_i32:      /* Optional (TCG_TARGET_HAS_orc_i32). */
-    case INDEX_op_xor_i32:
-    case INDEX_op_shl_i32:
-    case INDEX_op_shr_i32:
-    case INDEX_op_sar_i32:
-    case INDEX_op_rotl_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
-    case INDEX_op_rotr_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        break;
-    case INDEX_op_deposit_i32:  /* Optional (TCG_TARGET_HAS_deposit_i32). */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        tcg_debug_assert(args[3] <= UINT8_MAX);
-        tcg_out8(s, args[3]);
-        tcg_debug_assert(args[4] <= UINT8_MAX);
-        tcg_out8(s, args[4]);
-        break;
 
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_add_i64:
-    case INDEX_op_sub_i64:
-    case INDEX_op_mul_i64:
-    case INDEX_op_and_i64:
-    case INDEX_op_andc_i64:     /* Optional (TCG_TARGET_HAS_andc_i64). */
-    case INDEX_op_eqv_i64:      /* Optional (TCG_TARGET_HAS_eqv_i64). */
-    case INDEX_op_nand_i64:     /* Optional (TCG_TARGET_HAS_nand_i64). */
-    case INDEX_op_nor_i64:      /* Optional (TCG_TARGET_HAS_nor_i64). */
-    case INDEX_op_or_i64:
-    case INDEX_op_orc_i64:      /* Optional (TCG_TARGET_HAS_orc_i64). */
-    case INDEX_op_xor_i64:
-    case INDEX_op_shl_i64:
-    case INDEX_op_shr_i64:
-    case INDEX_op_sar_i64:
-    case INDEX_op_rotl_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
-    case INDEX_op_rotr_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
-    case INDEX_op_div_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */
-    case INDEX_op_divu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */
-    case INDEX_op_rem_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */
-    case INDEX_op_remu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */
+    CASE_32_64(add)
+    CASE_32_64(sub)
+    CASE_32_64(mul)
+    CASE_32_64(and)
+    CASE_32_64(or)
+    CASE_32_64(xor)
+    CASE_32_64(andc)     /* Optional (TCG_TARGET_HAS_andc_*). */
+    CASE_32_64(orc)      /* Optional (TCG_TARGET_HAS_orc_*). */
+    CASE_32_64(eqv)      /* Optional (TCG_TARGET_HAS_eqv_*). */
+    CASE_32_64(nand)     /* Optional (TCG_TARGET_HAS_nand_*). */
+    CASE_32_64(nor)      /* Optional (TCG_TARGET_HAS_nor_*). */
+    CASE_32_64(shl)
+    CASE_32_64(shr)
+    CASE_32_64(sar)
+    CASE_32_64(rotl)     /* Optional (TCG_TARGET_HAS_rot_*). */
+    CASE_32_64(rotr)     /* Optional (TCG_TARGET_HAS_rot_*). */
+    CASE_32_64(div)      /* Optional (TCG_TARGET_HAS_div_*). */
+    CASE_32_64(divu)     /* Optional (TCG_TARGET_HAS_div_*). */
+    CASE_32_64(rem)      /* Optional (TCG_TARGET_HAS_div_*). */
+    CASE_32_64(remu)     /* Optional (TCG_TARGET_HAS_div_*). */
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         tcg_out_r(s, args[2]);
         break;
-    case INDEX_op_deposit_i64:  /* Optional (TCG_TARGET_HAS_deposit_i64). */
+
+    CASE_32_64(deposit)  /* Optional (TCG_TARGET_HAS_deposit_*). */
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         tcg_out_r(s, args[2]);
@@ -520,45 +495,31 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_debug_assert(args[4] <= UINT8_MAX);
         tcg_out8(s, args[4]);
         break;
-    case INDEX_op_brcond_i64:
+
+    CASE_32_64(brcond)
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         tcg_out8(s, args[2]);           /* condition */
         tci_out_label(s, arg_label(args[3]));
         break;
-    case INDEX_op_bswap16_i64:  /* Optional (TCG_TARGET_HAS_bswap16_i64). */
-    case INDEX_op_bswap32_i64:  /* Optional (TCG_TARGET_HAS_bswap32_i64). */
-    case INDEX_op_bswap64_i64:  /* Optional (TCG_TARGET_HAS_bswap64_i64). */
-    case INDEX_op_not_i64:      /* Optional (TCG_TARGET_HAS_not_i64). */
-    case INDEX_op_neg_i64:      /* Optional (TCG_TARGET_HAS_neg_i64). */
-    case INDEX_op_ext8s_i64:    /* Optional (TCG_TARGET_HAS_ext8s_i64). */
-    case INDEX_op_ext8u_i64:    /* Optional (TCG_TARGET_HAS_ext8u_i64). */
-    case INDEX_op_ext16s_i64:   /* Optional (TCG_TARGET_HAS_ext16s_i64). */
-    case INDEX_op_ext16u_i64:   /* Optional (TCG_TARGET_HAS_ext16u_i64). */
-    case INDEX_op_ext32s_i64:   /* Optional (TCG_TARGET_HAS_ext32s_i64). */
-    case INDEX_op_ext32u_i64:   /* Optional (TCG_TARGET_HAS_ext32u_i64). */
-    case INDEX_op_ext_i32_i64:
-    case INDEX_op_extu_i32_i64:
-#endif /* TCG_TARGET_REG_BITS == 64 */
-    case INDEX_op_neg_i32:      /* Optional (TCG_TARGET_HAS_neg_i32). */
-    case INDEX_op_not_i32:      /* Optional (TCG_TARGET_HAS_not_i32). */
-    case INDEX_op_ext8s_i32:    /* Optional (TCG_TARGET_HAS_ext8s_i32). */
-    case INDEX_op_ext16s_i32:   /* Optional (TCG_TARGET_HAS_ext16s_i32). */
-    case INDEX_op_ext8u_i32:    /* Optional (TCG_TARGET_HAS_ext8u_i32). */
-    case INDEX_op_ext16u_i32:   /* Optional (TCG_TARGET_HAS_ext16u_i32). */
-    case INDEX_op_bswap16_i32:  /* Optional (TCG_TARGET_HAS_bswap16_i32). */
-    case INDEX_op_bswap32_i32:  /* Optional (TCG_TARGET_HAS_bswap32_i32). */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        break;
-    case INDEX_op_div_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
-    case INDEX_op_divu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */
-    case INDEX_op_rem_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
-    case INDEX_op_remu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */
+
+    CASE_32_64(neg)      /* Optional (TCG_TARGET_HAS_neg_*). */
+    CASE_32_64(not)      /* Optional (TCG_TARGET_HAS_not_*). */
+    CASE_32_64(ext8s)    /* Optional (TCG_TARGET_HAS_ext8s_*). */
+    CASE_32_64(ext8u)    /* Optional (TCG_TARGET_HAS_ext8u_*). */
+    CASE_32_64(ext16s)   /* Optional (TCG_TARGET_HAS_ext16s_*). */
+    CASE_32_64(ext16u)   /* Optional (TCG_TARGET_HAS_ext16u_*). */
+    CASE_64(ext32s)      /* Optional (TCG_TARGET_HAS_ext32s_i64). */
+    CASE_64(ext32u)      /* Optional (TCG_TARGET_HAS_ext32u_i64). */
+    CASE_64(ext_i32)
+    CASE_64(extu_i32)
+    CASE_32_64(bswap16)  /* Optional (TCG_TARGET_HAS_bswap16_*). */
+    CASE_32_64(bswap32)  /* Optional (TCG_TARGET_HAS_bswap32_*). */
+    CASE_64(bswap64)     /* Optional (TCG_TARGET_HAS_bswap64_i64). */
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
         break;
+
 #if TCG_TARGET_REG_BITS == 32
     case INDEX_op_add2_i32:
     case INDEX_op_sub2_i32:
@@ -584,31 +545,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out_r(s, args[3]);
         break;
 #endif
-    case INDEX_op_brcond_i32:
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out8(s, args[2]);           /* condition */
-        tci_out_label(s, arg_label(args[3]));
-        break;
+
     case INDEX_op_qemu_ld_i32:
-        tcg_out_r(s, *args++);
-        tcg_out_r(s, *args++);
-        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-            tcg_out_r(s, *args++);
-        }
-        tcg_out_i(s, *args++);
-        break;
-    case INDEX_op_qemu_ld_i64:
-        tcg_out_r(s, *args++);
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_r(s, *args++);
-        }
-        tcg_out_r(s, *args++);
-        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-            tcg_out_r(s, *args++);
-        }
-        tcg_out_i(s, *args++);
-        break;
     case INDEX_op_qemu_st_i32:
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
@@ -617,6 +555,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         }
         tcg_out_i(s, *args++);
         break;
+
+    case INDEX_op_qemu_ld_i64:
     case INDEX_op_qemu_st_i64:
         tcg_out_r(s, *args++);
         if (TCG_TARGET_REG_BITS == 32) {
@@ -628,8 +568,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         }
         tcg_out_i(s, *args++);
         break;
+
     case INDEX_op_mb:
         break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 12aa9ed37e..5fb65b4bef 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -153,7 +153,7 @@ class TestSingleDrive(iotests.QMPTestCase):
     def test_device_not_found(self):
         result = self.vm.qmp('block-stream', device='nonexistent')
         self.assert_qmp(result, 'error/desc',
-            'Cannot find device=nonexistent nor node_name=nonexistent')
+            'Cannot find device=\'nonexistent\' nor node-name=\'nonexistent\'')
 
     def test_job_id_missing(self):
         result = self.vm.qmp('block-stream', device='mid')
@@ -507,7 +507,7 @@ class TestParallelOps(iotests.QMPTestCase):
         # Error: the base node does not exist
         result = self.vm.qmp('block-stream', device='node4', base_node='none', job_id='stream')
         self.assert_qmp(result, 'error/desc',
-            'Cannot find device= nor node_name=none')
+            'Cannot find device=\'\' nor node-name=\'none\'')
 
         # Error: the base node is not a backing file of the top node
         result = self.vm.qmp('block-stream', device='node4', base_node='node6', job_id='stream')
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 7ebc9ed825..336ff7c4f2 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -175,13 +175,13 @@ class TestSingleDrive(ImageCommitTestCase):
         self.assert_no_active_block_jobs()
         result = self.vm.qmp('block-commit', device='drive0', top_node='badfile', base_node='base')
         self.assert_qmp(result, 'error/class', 'GenericError')
-        self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile")
+        self.assert_qmp(result, 'error/desc', "Cannot find device='' nor node-name='badfile'")
 
     def test_base_node_invalid(self):
         self.assert_no_active_block_jobs()
         result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='badfile')
         self.assert_qmp(result, 'error/class', 'GenericError')
-        self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile")
+        self.assert_qmp(result, 'error/desc', "Cannot find device='' nor node-name='badfile'")
 
     def test_top_path_and_node(self):
         self.assert_no_active_block_jobs()
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index f707471fb0..f570610f64 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -61,13 +61,13 @@ QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) quit
 
 Testing: -drive file=TEST_DIR/t.qcow2,node-name=123foo
-QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node name
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node-name: '123foo'
 
 Testing: -drive file=TEST_DIR/t.qcow2,node-name=_foo
-QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node name
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node-name: '_foo'
 
 Testing: -drive file=TEST_DIR/t.qcow2,node-name=foo#12
-QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node name
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node-name: 'foo#12'
 
 
 === Device without drive ===
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 1974262fac..615c083549 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -140,7 +140,7 @@ Testing:
 QMP_VERSION
 {"return": {}}
 {"error": {"class": "GenericError", "desc": "blkverify=on can only be set if there are exactly two files and vote-threshold is 2"}}
-{"error": {"class": "GenericError", "desc": "Cannot find device=drive0-quorum nor node_name=drive0-quorum"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='drive0-quorum' nor node-name='drive0-quorum'"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 
diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
index 32a193f2c2..1d4c565b6d 100644
--- a/tests/qemu-iotests/085.out
+++ b/tests/qemu-iotests/085.out
@@ -24,7 +24,7 @@ Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended
 { 'execute': 'blockdev-snapshot-sync',
                          'arguments': { 'snapshot-file':'TEST_DIR/1-snapshot-v0.IMGFMT',
                                      'format': 'IMGFMT' } }
-{"error": {"class": "GenericError", "desc": "Cannot find device= nor node_name="}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='' nor node-name=''"}}
 
 === Invalid command - missing snapshot-file ===
 
@@ -222,10 +222,10 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/
 { 'execute': 'blockdev-snapshot',
                       'arguments': { 'node': 'virtio0',
                                      'overlay':'snap_14' } }
-{"error": {"class": "GenericError", "desc": "Cannot find device=snap_14 nor node_name=snap_14"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='snap_14' nor node-name='snap_14'"}}
 { 'execute': 'blockdev-snapshot',
                      'arguments': { 'node':'nodevice',
                                     'overlay':'snap_13' }
                    }
-{"error": {"class": "GenericError", "desc": "Cannot find device=nodevice nor node_name=nodevice"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='nodevice' nor node-name='nodevice'"}}
 *** done
diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index edd43f1a28..d8e0e384cd 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087
@@ -143,9 +143,7 @@ run_qemu <<EOF
   "arguments": {
       "qom-type": "secret",
       "id": "sec0",
-      "props": {
-          "data": "123456"
-      }
+      "data": "123456"
   }
 }
 { "execute": "blockdev-add",
@@ -176,9 +174,7 @@ run_qemu <<EOF
   "arguments": {
       "qom-type": "secret",
       "id": "sec0",
-      "props": {
-          "data": "123456"
-      }
+      "data": "123456"
   }
 }
 { "execute": "blockdev-add",
diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index b61ba638af..e1c23a6983 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out
@@ -17,7 +17,7 @@ Testing: -drive driver=IMGFMT,id=disk,node-name=test-node,file=TEST_DIR/t.IMGFMT
 QMP_VERSION
 {"return": {}}
 {"error": {"class": "GenericError", "desc": "node-name=disk is conflicting with a device id"}}
-{"error": {"class": "GenericError", "desc": "Duplicate node name"}}
+{"error": {"class": "GenericError", "desc": "Duplicate nodes with node-name='test-node'"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 
diff --git a/tests/qemu-iotests/184 b/tests/qemu-iotests/184
index 513d167098..e4cbcd8634 100755
--- a/tests/qemu-iotests/184
+++ b/tests/qemu-iotests/184
@@ -67,10 +67,8 @@ run_qemu <<EOF
   "arguments": {
     "qom-type": "throttle-group",
     "id": "group0",
-    "props": {
-      "limits" : {
-        "iops-total": 1000
-      }
+    "limits" : {
+      "iops-total": 1000
     }
   }
 }
@@ -96,10 +94,8 @@ run_qemu <<EOF
   "arguments": {
     "qom-type": "throttle-group",
     "id": "group0",
-    "props" : {
-      "limits": {
-          "iops-total": 1000
-      }
+    "limits": {
+        "iops-total": 1000
     }
   }
 }
@@ -136,10 +132,8 @@ run_qemu <<EOF
   "arguments": {
     "qom-type": "throttle-group",
     "id": "group0",
-    "props" : {
-      "limits": {
-          "iops-total": 1000
-      }
+    "limits": {
+        "iops-total": 1000
     }
   }
 }
diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out
index 5dd589d14e..b68c443867 100644
--- a/tests/qemu-iotests/206.out
+++ b/tests/qemu-iotests/206.out
@@ -155,7 +155,7 @@ Format specific information:
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "this doesn't exist", "size": 33554432}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/210.out b/tests/qemu-iotests/210.out
index 2e9fc596eb..55c0844370 100644
--- a/tests/qemu-iotests/210.out
+++ b/tests/qemu-iotests/210.out
@@ -108,7 +108,7 @@ Format specific information:
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "luks", "file": "this doesn't exist", "size": 67108864}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/211.out b/tests/qemu-iotests/211.out
index b83384deea..3bc092a8a8 100644
--- a/tests/qemu-iotests/211.out
+++ b/tests/qemu-iotests/211.out
@@ -62,7 +62,7 @@ cluster_size: 1048576
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "vdi", "file": "this doesn't exist", "size": 33554432}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/212.out b/tests/qemu-iotests/212.out
index 1538d679be..8102033488 100644
--- a/tests/qemu-iotests/212.out
+++ b/tests/qemu-iotests/212.out
@@ -52,7 +52,7 @@ virtual size: 32 MiB (33554432 bytes)
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "parallels", "file": "this doesn't exist", "size": 33554432}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/213.out b/tests/qemu-iotests/213.out
index be4ae85180..3cdce4d790 100644
--- a/tests/qemu-iotests/213.out
+++ b/tests/qemu-iotests/213.out
@@ -55,7 +55,7 @@ cluster_size: 268435456
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "vhdx", "file": "this doesn't exist", "size": 33554432}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/218 b/tests/qemu-iotests/218
index ae7c4fb187..325d8244fb 100755
--- a/tests/qemu-iotests/218
+++ b/tests/qemu-iotests/218
@@ -152,7 +152,7 @@ with iotests.VM() as vm, \
     vm.launch()
 
     ret = vm.qmp('object-add', qom_type='throttle-group', id='tg',
-                 props={'x-bps-read': 4096})
+                 limits={'bps-read': 4096})
     assert ret['return'] == {}
 
     ret = vm.qmp('blockdev-add',
diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out
index bbc85289e3..083b62d053 100644
--- a/tests/qemu-iotests/223.out
+++ b/tests/qemu-iotests/223.out
@@ -53,7 +53,7 @@ exports available: 0
 {"return": {}}
 {"execute":"nbd-server-add",
   "arguments":{"device":"nosuch"}}
-{"error": {"class": "GenericError", "desc": "Cannot find device=nosuch nor node_name=nosuch"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='nosuch' nor node-name='nosuch'"}}
 {"execute":"nbd-server-add",
   "arguments":{"device":"n"}}
 {"error": {"class": "GenericError", "desc": "Block export id 'n' is already in use"}}
@@ -154,7 +154,7 @@ exports available: 0
 {"return": {}}
 {"execute":"nbd-server-add",
   "arguments":{"device":"nosuch"}}
-{"error": {"class": "GenericError", "desc": "Cannot find device=nosuch nor node_name=nosuch"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='nosuch' nor node-name='nosuch'"}}
 {"execute":"nbd-server-add",
   "arguments":{"device":"n"}}
 {"error": {"class": "GenericError", "desc": "Block export id 'n' is already in use"}}
diff --git a/tests/qemu-iotests/235 b/tests/qemu-iotests/235
index 20d16dbf38..8aed45f9a7 100755
--- a/tests/qemu-iotests/235
+++ b/tests/qemu-iotests/235
@@ -57,7 +57,7 @@ vm.add_args('-drive', 'id=src,file=' + disk)
 vm.launch()
 
 log(vm.qmp('object-add', qom_type='throttle-group', id='tg0',
-           props={ 'x-bps-total': size }))
+           limits={'bps-total': size}))
 
 log(vm.qmp('blockdev-add',
            **{ 'node-name': 'target',
diff --git a/tests/qemu-iotests/237.out b/tests/qemu-iotests/237.out
index a8c800bfad..aa94986803 100644
--- a/tests/qemu-iotests/237.out
+++ b/tests/qemu-iotests/237.out
@@ -85,7 +85,7 @@ Format specific information:
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "vmdk", "file": "this doesn't exist", "size": 33554432}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245
index cfdeb902be..11104b9208 100755
--- a/tests/qemu-iotests/245
+++ b/tests/qemu-iotests/245
@@ -140,8 +140,8 @@ class TestBlockdevReopen(iotests.QMPTestCase):
         self.reopen(opts, {'file': 'hd0-file'})
 
         # We cannot change any of these
-        self.reopen(opts, {'node-name': 'not-found'}, "Cannot find node named 'not-found'")
-        self.reopen(opts, {'node-name': ''}, "Cannot find node named ''")
+        self.reopen(opts, {'node-name': 'not-found'}, "Failed to find node with node-name='not-found'")
+        self.reopen(opts, {'node-name': ''}, "Failed to find node with node-name=''")
         self.reopen(opts, {'node-name': None}, "Invalid parameter type for 'node-name', expected: string")
         self.reopen(opts, {'driver': 'raw'}, "Cannot change the option 'driver'")
         self.reopen(opts, {'driver': ''}, "Invalid parameter ''")
@@ -158,7 +158,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
 
         # node-name is optional in BlockdevOptions, but x-blockdev-reopen needs it
         del opts['node-name']
-        self.reopen(opts, {}, "Node name not specified")
+        self.reopen(opts, {}, "node-name not specified")
 
         # Check that nothing has changed
         self.check_node_graph(original_graph)
@@ -187,8 +187,8 @@ class TestBlockdevReopen(iotests.QMPTestCase):
         self.reopen(opts, {'backing': backing_node_name})
 
         # We can't use a non-existing or empty (non-NULL) node as the backing image
-        self.reopen(opts, {'backing': 'not-found'}, "Cannot find device= nor node_name=not-found")
-        self.reopen(opts, {'backing': ''}, "Cannot find device= nor node_name=")
+        self.reopen(opts, {'backing': 'not-found'}, "Cannot find device=\'\' nor node-name=\'not-found\'")
+        self.reopen(opts, {'backing': ''}, "Cannot find device=\'\' nor node-name=\'\'")
 
         # We can reopen the image just fine if we specify the backing options
         opts['backing'] = {'driver': iotests.imgfmt,
@@ -644,12 +644,12 @@ class TestBlockdevReopen(iotests.QMPTestCase):
         ###### throttle ######
         ######################
         opts = { 'qom-type': 'throttle-group', 'id': 'group0',
-                 'props': { 'limits': { 'iops-total': 1000 } } }
+                 'limits': { 'iops-total': 1000 } }
         result = self.vm.qmp('object-add', conv_keys = False, **opts)
         self.assert_qmp(result, 'return', {})
 
         opts = { 'qom-type': 'throttle-group', 'id': 'group1',
-                 'props': { 'limits': { 'iops-total': 2000 } } }
+                 'limits': { 'iops-total': 2000 } }
         result = self.vm.qmp('object-add', conv_keys = False, **opts)
         self.assert_qmp(result, 'return', {})
 
diff --git a/tests/qemu-iotests/249.out b/tests/qemu-iotests/249.out
index 92ec81db03..d2bf9be85e 100644
--- a/tests/qemu-iotests/249.out
+++ b/tests/qemu-iotests/249.out
@@ -18,7 +18,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
                      'filter-node-name': '1234'}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
-{"error": {"class": "GenericError", "desc": "Invalid node name"}}
+{"error": {"class": "GenericError", "desc": "Invalid node-name: '1234'"}}
 
 === Send a write command to a drive opened in read-only mode (2)
 
diff --git a/tests/qemu-iotests/258 b/tests/qemu-iotests/258
index 9a2d33ae5e..a6618208a8 100755
--- a/tests/qemu-iotests/258
+++ b/tests/qemu-iotests/258
@@ -103,9 +103,9 @@ def test_concurrent_finish(write_to_stream_node):
         vm.qmp_log('object-add',
                    qom_type='throttle-group',
                    id='tg',
-                   props={
-                       'x-iops-write': 1,
-                       'x-iops-write-max': 1
+                   limits={
+                       'iops-write': 1,
+                       'iops-write-max': 1
                    })
 
         vm.qmp_log('blockdev-add',
diff --git a/tests/qemu-iotests/258.out b/tests/qemu-iotests/258.out
index ce6e9ba3e5..c3a003d3e3 100644
--- a/tests/qemu-iotests/258.out
+++ b/tests/qemu-iotests/258.out
@@ -2,7 +2,7 @@ Running tests:
 
 === Commit and stream finish concurrently (letting stream write) ===
 
-{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}}
+{"execute": "object-add", "arguments": {"id": "tg", "limits": {"iops-write": 1, "iops-write-max": 1}, "qom-type": "throttle-group"}}
 {"return": {}}
 {"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "throttle-group": "tg"}, "node-name": "node4"}}
 {"return": {}}
@@ -18,7 +18,7 @@ Running tests:
 
 === Commit and stream finish concurrently (letting commit write) ===
 
-{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}}
+{"execute": "object-add", "arguments": {"id": "tg", "limits": {"iops-write": 1, "iops-write-max": 1}, "qom-type": "throttle-group"}}
 {"return": {}}
 {"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "throttle-group": "tg"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "node-name": "node4"}}
 {"return": {}}
diff --git a/tests/qemu-iotests/283 b/tests/qemu-iotests/283
index 79643e375b..010c22f0a2 100755
--- a/tests/qemu-iotests/283
+++ b/tests/qemu-iotests/283
@@ -97,3 +97,56 @@ vm.qmp_log('blockdev-add', **{
 vm.qmp_log('blockdev-backup', sync='full', device='source', target='target')
 
 vm.shutdown()
+
+
+print('\n=== backup-top should be gone after job-finalize ===\n')
+
+# Check that the backup-top node is gone after job-finalize.
+#
+# During finalization, the node becomes inactive and can no longer
+# function.  If it is still present, new parents might be attached, and
+# there would be no meaningful way to handle their I/O requests.
+
+vm = iotests.VM()
+vm.launch()
+
+vm.qmp_log('blockdev-add', **{
+    'node-name': 'source',
+    'driver': 'null-co',
+})
+
+vm.qmp_log('blockdev-add', **{
+    'node-name': 'target',
+    'driver': 'null-co',
+})
+
+vm.qmp_log('blockdev-backup',
+           job_id='backup',
+           device='source',
+           target='target',
+           sync='full',
+           filter_node_name='backup-filter',
+           auto_finalize=False,
+           auto_dismiss=False)
+
+vm.event_wait('BLOCK_JOB_PENDING', 5.0)
+
+# The backup-top filter should still be present prior to finalization
+assert vm.node_info('backup-filter') is not None
+
+vm.qmp_log('job-finalize', id='backup')
+vm.event_wait('BLOCK_JOB_COMPLETED', 5.0)
+
+# The filter should be gone now.  Check that by trying to access it
+# with qemu-io (which will most likely crash qemu if it is still
+# there.).
+vm.qmp_log('human-monitor-command',
+           command_line='qemu-io backup-filter "write 0 1M"')
+
+# (Also, do an explicit check.)
+assert vm.node_info('backup-filter') is None
+
+vm.qmp_log('job-dismiss', id='backup')
+vm.event_wait('JOB_STATUS_CHANGE', 5.0, {'data': {'status': 'null'}})
+
+vm.shutdown()
diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out
index d8cff22cc1..37c35058ae 100644
--- a/tests/qemu-iotests/283.out
+++ b/tests/qemu-iotests/283.out
@@ -6,3 +6,18 @@
 {"return": {}}
 {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}}
 {"error": {"class": "GenericError", "desc": "Cannot set permissions for backup-top filter: Conflicts with use by other as 'image', which uses 'write' on base"}}
+
+=== backup-top should be gone after job-finalize ===
+
+{"execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "source"}}
+{"return": {}}
+{"execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "target"}}
+{"return": {}}
+{"execute": "blockdev-backup", "arguments": {"auto-dismiss": false, "auto-finalize": false, "device": "source", "filter-node-name": "backup-filter", "job-id": "backup", "sync": "full", "target": "target"}}
+{"return": {}}
+{"execute": "job-finalize", "arguments": {"id": "backup"}}
+{"return": {}}
+{"execute": "human-monitor-command", "arguments": {"command-line": "qemu-io backup-filter \"write 0 1M\""}}
+{"return": "Error: Cannot find device='' nor node-name='backup-filter'\r\n"}
+{"execute": "job-dismiss", "arguments": {"id": "backup"}}
+{"return": {}}
diff --git a/tests/qemu-iotests/295 b/tests/qemu-iotests/295
index 01a6c0b31f..270ad3999f 100755
--- a/tests/qemu-iotests/295
+++ b/tests/qemu-iotests/295
@@ -43,7 +43,7 @@ class Secret:
 
     def to_qmp_object(self):
         return { "qom_type" : "secret", "id": self.id(),
-                 "props": { "data": self.secret() } }
+                 "data": self.secret() }
 
 ################################################################################
 class EncryptionSetupTestCase(iotests.QMPTestCase):
diff --git a/tests/qemu-iotests/296 b/tests/qemu-iotests/296
index 0bc3c6c7d7..7c65e987a1 100755
--- a/tests/qemu-iotests/296
+++ b/tests/qemu-iotests/296
@@ -43,7 +43,7 @@ class Secret:
 
     def to_qmp_object(self):
         return { "qom_type" : "secret", "id": self.id(),
-                 "props": { "data": self.secret() } }
+                 "data": self.secret() }
 
 ################################################################################
 
diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300
index 63036f6a6e..b475a92c47 100755
--- a/tests/qemu-iotests/300
+++ b/tests/qemu-iotests/300
@@ -22,7 +22,7 @@
 import os
 import random
 import re
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional
 
 import iotests
 
@@ -30,7 +30,7 @@ import iotests
 # pylint: disable=wrong-import-order
 import qemu
 
-BlockBitmapMapping = List[Dict[str, Union[str, List[Dict[str, str]]]]]
+BlockBitmapMapping = List[Dict[str, object]]
 
 mig_sock = os.path.join(iotests.sock_dir, 'mig_sock')
 
@@ -189,8 +189,8 @@ class TestAliasMigration(TestDirtyBitmapMigration):
         # Check for error message on the destination
         if self.src_node_name != self.dst_node_name:
             self.verify_dest_error(f"Cannot find "
-                                   f"device={self.src_node_name} nor "
-                                   f"node_name={self.src_node_name}")
+                                   f"device='{self.src_node_name}' nor "
+                                   f"node-name='{self.src_node_name}'")
         else:
             self.verify_dest_error(None)
 
@@ -602,7 +602,8 @@ class TestCrossAliasMigration(TestDirtyBitmapMigration):
 
 class TestAliasTransformMigration(TestDirtyBitmapMigration):
     """
-    Tests the 'transform' option which modifies bitmap persistence on migration.
+    Tests the 'transform' option which modifies bitmap persistence on
+    migration.
     """
 
     src_node_name = 'node-a'
@@ -674,7 +675,8 @@ class TestAliasTransformMigration(TestDirtyBitmapMigration):
         bitmaps = self.vm_b.query_bitmaps()
 
         for node in bitmaps:
-            bitmaps[node] = sorted(((bmap['name'], bmap['persistent']) for bmap in bitmaps[node]))
+            bitmaps[node] = sorted(((bmap['name'], bmap['persistent'])
+                                    for bmap in bitmaps[node]))
 
         self.assertEqual(bitmaps,
                          {'node-a': [('bmap-a', True), ('bmap-b', False)],
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 4e758308f2..90d0b62523 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -17,6 +17,7 @@
 #
 
 import atexit
+import bz2
 from collections import OrderedDict
 import faulthandler
 import io
@@ -24,6 +25,7 @@ import json
 import logging
 import os
 import re
+import shutil
 import signal
 import struct
 import subprocess
@@ -96,6 +98,14 @@ luks_default_secret_object = 'secret,id=keysec0,data=' + \
                              os.environ.get('IMGKEYSECRET', '')
 luks_default_key_secret_opt = 'key-secret=keysec0'
 
+sample_img_dir = os.environ['SAMPLE_IMG_DIR']
+
+
+def unarchive_sample_image(sample, fname):
+    sample_fname = os.path.join(sample_img_dir, sample + '.bz2')
+    with bz2.open(sample_fname) as f_in, open(fname, 'wb') as f_out:
+        shutil.copyfileobj(f_in, f_out)
+
 
 def qemu_tool_pipe_and_status(tool: str, args: Sequence[str],
                               connect_stderr: bool = True) -> Tuple[str, int]:
diff --git a/tests/qemu-iotests/sample_images/parallels-with-bitmap.bz2 b/tests/qemu-iotests/sample_images/parallels-with-bitmap.bz2
new file mode 100644
index 0000000000..54892fd4d0
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/parallels-with-bitmap.bz2
Binary files differdiff --git a/tests/qemu-iotests/sample_images/parallels-with-bitmap.sh b/tests/qemu-iotests/sample_images/parallels-with-bitmap.sh
new file mode 100755
index 0000000000..30615aa6bd
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/parallels-with-bitmap.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Test parallels load bitmap
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+CT=parallels-with-bitmap-ct
+DIR=$PWD/parallels-with-bitmap-dir
+IMG=$DIR/root.hds
+XML=$DIR/DiskDescriptor.xml
+TARGET=parallels-with-bitmap.bz2
+
+rm -rf $DIR
+
+prlctl create $CT --vmtype ct
+prlctl set $CT --device-add hdd --image $DIR --recreate --size 2G
+
+# cleanup the image
+qemu-img create -f parallels $IMG 64G
+
+# create bitmap
+prlctl backup $CT
+
+prlctl set $CT --device-del hdd1
+prlctl destroy $CT
+
+dev=$(ploop mount $XML | sed -n 's/^Adding delta dev=\(\/dev\/ploop[0-9]\+\).*/\1/p')
+dd if=/dev/zero of=$dev bs=64K seek=5 count=2 oflag=direct
+dd if=/dev/zero of=$dev bs=64K seek=30 count=1 oflag=direct
+dd if=/dev/zero of=$dev bs=64K seek=10 count=3 oflag=direct
+ploop umount $XML  # bitmap name will be in the output
+
+bzip2 -z $IMG
+
+mv $IMG.bz2 $TARGET
+
+rm -rf $DIR
diff --git a/tests/qemu-iotests/tests/parallels-read-bitmap b/tests/qemu-iotests/tests/parallels-read-bitmap
new file mode 100755
index 0000000000..af6b9c5db3
--- /dev/null
+++ b/tests/qemu-iotests/tests/parallels-read-bitmap
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+#
+# Test parallels load bitmap
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import json
+import iotests
+from iotests import qemu_nbd_popen, qemu_img_pipe, log, file_path
+
+iotests.script_initialize(supported_fmts=['parallels'])
+
+nbd_sock = file_path('nbd-sock', base_dir=iotests.sock_dir)
+disk = iotests.file_path('disk')
+bitmap = 'e4f2eed0-37fe-4539-b50b-85d2e7fd235f'
+nbd_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}' \
+        f',x-dirty-bitmap=qemu:dirty-bitmap:{bitmap}'
+
+
+iotests.unarchive_sample_image('parallels-with-bitmap', disk)
+
+
+with qemu_nbd_popen('--read-only', f'--socket={nbd_sock}',
+                    f'--bitmap={bitmap}', '-f', iotests.imgfmt, disk):
+    out = qemu_img_pipe('map', '--output=json', '--image-opts', nbd_opts)
+    chunks = json.loads(out)
+    cluster = 64 * 1024
+
+    log('dirty clusters (cluster size is 64K):')
+    for c in chunks:
+        assert c['start'] % cluster == 0
+        assert c['length'] % cluster == 0
+        if c['data']:
+            continue
+
+        a = c['start'] // cluster
+        b = (c['start'] + c['length']) // cluster
+        if b - a > 1:
+            log(f'{a}-{b-1}')
+        else:
+            log(a)
diff --git a/tests/qemu-iotests/tests/parallels-read-bitmap.out b/tests/qemu-iotests/tests/parallels-read-bitmap.out
new file mode 100644
index 0000000000..e8f6bc9e96
--- /dev/null
+++ b/tests/qemu-iotests/tests/parallels-read-bitmap.out
@@ -0,0 +1,6 @@
+Start NBD server
+dirty clusters (cluster size is 64K):
+5-6
+10-12
+30
+Kill NBD server
diff --git a/tests/qtest/libqos/libqtest.h b/tests/qtest/libqos/libqtest.h
index 724f65aa94..a68dcd79d4 100644
--- a/tests/qtest/libqos/libqtest.h
+++ b/tests/qtest/libqos/libqtest.h
@@ -75,6 +75,17 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args);
 QTestState *qtest_init_with_serial(const char *extra_args, int *sock_fd);
 
 /**
+ * qtest_kill_qemu:
+ * @s: #QTestState instance to operate on.
+ *
+ * Kill the QEMU process and wait for it to terminate. It is safe to call this
+ * function multiple times. Normally qtest_quit() is used instead because it
+ * also frees QTestState. Use qtest_kill_qemu() when you just want to kill QEMU
+ * and qtest_quit() will be called later.
+ */
+void qtest_kill_qemu(QTestState *s);
+
+/**
  * qtest_quit:
  * @s: #QTestState instance to operate on.
  *
@@ -133,6 +144,14 @@ void qtest_qmp_send_raw(QTestState *s, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
 
 /**
+ * qtest_socket_server:
+ * @socket_path: the UNIX domain socket path
+ *
+ * Create and return a listen socket file descriptor, or abort on failure.
+ */
+int qtest_socket_server(const char *socket_path);
+
+/**
  * qtest_vqmp_fds:
  * @s: #QTestState instance to operate on.
  * @fds: array of file descriptors
@@ -630,9 +649,27 @@ void qtest_add_data_func_full(const char *str, void *data,
         g_free(path); \
     } while (0)
 
+/**
+ * qtest_add_abrt_handler:
+ * @fn: Handler function
+ * @data: Argument that is passed to the handler
+ *
+ * Add a handler function that is invoked on SIGABRT. This can be used to
+ * terminate processes and perform other cleanup. The handler can be removed
+ * with qtest_remove_abrt_handler().
+ */
 void qtest_add_abrt_handler(GHookFunc fn, const void *data);
 
 /**
+ * qtest_remove_abrt_handler:
+ * @data: Argument previously passed to qtest_add_abrt_handler()
+ *
+ * Remove an abrt handler that was previously added with
+ * qtest_add_abrt_handler().
+ */
+void qtest_remove_abrt_handler(void *data);
+
+/**
  * qtest_qmp_assert_success:
  * @qts: QTestState instance to operate on
  * @fmt: QMP message to send to qemu, formatted like
diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index fd043b0570..71e359efcd 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -81,24 +81,8 @@ static void qtest_client_set_rx_handler(QTestState *s, QTestRecvFn recv);
 
 static int init_socket(const char *socket_path)
 {
-    struct sockaddr_un addr;
-    int sock;
-    int ret;
-
-    sock = socket(PF_UNIX, SOCK_STREAM, 0);
-    g_assert_cmpint(sock, !=, -1);
-
-    addr.sun_family = AF_UNIX;
-    snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", socket_path);
+    int sock = qtest_socket_server(socket_path);
     qemu_set_cloexec(sock);
-
-    do {
-        ret = bind(sock, (struct sockaddr *)&addr, sizeof(addr));
-    } while (ret == -1 && errno == EINTR);
-    g_assert_cmpint(ret, !=, -1);
-    ret = listen(sock, 1);
-    g_assert_cmpint(ret, !=, -1);
-
     return sock;
 }
 
@@ -149,7 +133,7 @@ void qtest_set_expected_status(QTestState *s, int status)
     s->expected_status = status;
 }
 
-static void kill_qemu(QTestState *s)
+void qtest_kill_qemu(QTestState *s)
 {
     pid_t pid = s->qemu_pid;
     int wstatus;
@@ -159,6 +143,7 @@ static void kill_qemu(QTestState *s)
         kill(pid, SIGTERM);
         TFR(pid = waitpid(s->qemu_pid, &s->wstatus, 0));
         assert(pid == s->qemu_pid);
+        s->qemu_pid = -1;
     }
 
     /*
@@ -185,7 +170,7 @@ static void kill_qemu(QTestState *s)
 
 static void kill_qemu_hook_func(void *s)
 {
-    kill_qemu(s);
+    qtest_kill_qemu(s);
 }
 
 static void sigabrt_handler(int signo)
@@ -211,15 +196,30 @@ static void cleanup_sigabrt_handler(void)
     sigaction(SIGABRT, &sigact_old, NULL);
 }
 
+static bool hook_list_is_empty(GHookList *hook_list)
+{
+    GHook *hook = g_hook_first_valid(hook_list, TRUE);
+
+    if (!hook) {
+        return false;
+    }
+
+    g_hook_unref(hook_list, hook);
+    return true;
+}
+
 void qtest_add_abrt_handler(GHookFunc fn, const void *data)
 {
     GHook *hook;
 
-    /* Only install SIGABRT handler once */
     if (!abrt_hooks.is_setup) {
         g_hook_list_init(&abrt_hooks, sizeof(GHook));
     }
-    setup_sigabrt_handler();
+
+    /* Only install SIGABRT handler once */
+    if (hook_list_is_empty(&abrt_hooks)) {
+        setup_sigabrt_handler();
+    }
 
     hook = g_hook_alloc(&abrt_hooks);
     hook->func = fn;
@@ -228,6 +228,17 @@ void qtest_add_abrt_handler(GHookFunc fn, const void *data)
     g_hook_prepend(&abrt_hooks, hook);
 }
 
+void qtest_remove_abrt_handler(void *data)
+{
+    GHook *hook = g_hook_find_data(&abrt_hooks, TRUE, data);
+    g_hook_destroy_link(&abrt_hooks, hook);
+
+    /* Uninstall SIGABRT handler on last instance */
+    if (hook_list_is_empty(&abrt_hooks)) {
+        cleanup_sigabrt_handler();
+    }
+}
+
 static const char *qtest_qemu_binary(void)
 {
     const char *qemu_bin;
@@ -384,12 +395,9 @@ QTestState *qtest_init_with_serial(const char *extra_args, int *sock_fd)
 
 void qtest_quit(QTestState *s)
 {
-    g_hook_destroy_link(&abrt_hooks, g_hook_find_data(&abrt_hooks, TRUE, s));
-
-    /* Uninstall SIGABRT handler on last instance */
-    cleanup_sigabrt_handler();
+    qtest_remove_abrt_handler(s);
 
-    kill_qemu(s);
+    qtest_kill_qemu(s);
     close(s->fd);
     close(s->qmp_fd);
     g_string_free(s->rx, true);
@@ -638,6 +646,28 @@ QDict *qtest_qmp_receive_dict(QTestState *s)
     return qmp_fd_receive(s->qmp_fd);
 }
 
+int qtest_socket_server(const char *socket_path)
+{
+    struct sockaddr_un addr;
+    int sock;
+    int ret;
+
+    sock = socket(PF_UNIX, SOCK_STREAM, 0);
+    g_assert_cmpint(sock, !=, -1);
+
+    addr.sun_family = AF_UNIX;
+    snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", socket_path);
+
+    do {
+        ret = bind(sock, (struct sockaddr *)&addr, sizeof(addr));
+    } while (ret == -1 && errno == EINTR);
+    g_assert_cmpint(ret, !=, -1);
+    ret = listen(sock, 1);
+    g_assert_cmpint(ret, !=, -1);
+
+    return sock;
+}
+
 /**
  * Allow users to send a message without waiting for the reply,
  * in the case that they choose to discard all replies up until