summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--CODING_STYLE17
-rw-r--r--MAINTAINERS2
-rw-r--r--accel/tcg/cputlb.c59
-rw-r--r--block.c1
-rw-r--r--block/block-backend.c1
-rw-r--r--block/crypto.c12
-rw-r--r--block/gluster.c1
-rw-r--r--block/iscsi.c24
-rw-r--r--block/nbd.c16
-rw-r--r--block/nfs.c8
-rw-r--r--block/parallels.c11
-rw-r--r--block/qcow.c11
-rw-r--r--block/qcow2.c11
-rw-r--r--block/qed.c11
-rw-r--r--block/quorum.c1
-rw-r--r--block/rbd.c85
-rw-r--r--block/sheepdog.c23
-rw-r--r--block/snapshot.c1
-rw-r--r--block/ssh.c16
-rw-r--r--block/vdi.c8
-rw-r--r--block/vhdx.c11
-rw-r--r--block/vpc.c11
-rw-r--r--block/vvfat.c1
-rw-r--r--block/vxhs.c1
-rw-r--r--blockdev.c111
-rw-r--r--device-hotplug.c4
-rw-r--r--docs/devel/loads-stores.rst15
-rw-r--r--exec.c263
-rw-r--r--hmp-commands.hx1
-rw-r--r--hw/alpha/typhoon.c3
-rw-r--r--hw/arm/armv7m.c28
-rw-r--r--hw/arm/mps2-tz.c32
-rw-r--r--hw/arm/smmuv3.c2
-rw-r--r--hw/arm/stellaris.c12
-rw-r--r--hw/block/block.c27
-rw-r--r--hw/block/m25p80.c1
-rw-r--r--hw/block/nvme.c1
-rw-r--r--hw/block/pflash_cfi02.c97
-rw-r--r--hw/block/virtio-blk.c1
-rw-r--r--hw/char/parallel.c50
-rw-r--r--hw/core/or-irq.c39
-rw-r--r--hw/dma/rc4030.c2
-rw-r--r--hw/i386/amd_iommu.c2
-rw-r--r--hw/i386/intel_iommu.c8
-rw-r--r--hw/ide/qdev.c1
-rw-r--r--hw/input/pckbd.c14
-rw-r--r--hw/intc/arm_gicv3_kvm.c18
-rw-r--r--hw/intc/armv7m_nvic.c6
-rw-r--r--hw/m68k/mcf5206.c48
-rw-r--r--hw/misc/aspeed_scu.c20
-rw-r--r--hw/ppc/spapr_iommu.c5
-rw-r--r--hw/s390x/s390-pci-bus.c2
-rw-r--r--hw/s390x/s390-pci-inst.c4
-rw-r--r--hw/scsi/scsi-disk.c1
-rw-r--r--hw/sh4/sh7750.c44
-rw-r--r--hw/sparc/sun4m_iommu.c3
-rw-r--r--hw/sparc64/sun4u_iommu.c2
-rw-r--r--hw/usb/dev-storage.c1
-rw-r--r--hw/vfio/common.c6
-rw-r--r--hw/virtio/vhost.c7
-rw-r--r--hw/watchdog/wdt_i6300esb.c48
-rw-r--r--include/block/qdict.h34
-rw-r--r--include/exec/cpu-all.h4
-rw-r--r--include/exec/cpu-defs.h9
-rw-r--r--include/exec/exec-all.h16
-rw-r--r--include/exec/memory.h65
-rw-r--r--include/hw/arm/arm.h8
-rw-r--r--include/hw/block/block.h1
-rw-r--r--include/hw/or-irq.h5
-rw-r--r--include/qapi/qmp/qdict.h17
-rw-r--r--include/qemu/bswap.h52
-rw-r--r--include/qom/cpu.h3
-rw-r--r--include/sysemu/blockdev.h3
-rw-r--r--memory.c33
-rw-r--r--qapi/block-core.json19
-rw-r--r--qapi/job.json23
-rw-r--r--qemu-doc.texi15
-rw-r--r--qemu-img.c2
-rw-r--r--qemu-options.hx14
-rw-r--r--qobject/Makefile.objs1
-rw-r--r--qobject/block-qdict.c722
-rw-r--r--qobject/qdict.c628
-rw-r--r--target/arm/cpu.c18
-rw-r--r--target/arm/helper-sve.h294
-rw-r--r--target/arm/helper.h19
-rw-r--r--target/arm/sve.decode248
-rw-r--r--target/arm/sve_helper.c1250
-rw-r--r--target/arm/translate-a64.h26
-rw-r--r--target/arm/translate-sve.c1458
-rw-r--r--target/arm/translate.c43
-rw-r--r--target/arm/vec_helper.c69
-rw-r--r--target/microblaze/mmu.c1
-rw-r--r--target/microblaze/translate.c15
-rw-r--r--tests/Makefile.include4
-rw-r--r--tests/ahci-test.c6
-rw-r--r--tests/check-block-qdict.c690
-rw-r--r--tests/check-qdict.c641
-rw-r--r--tests/check-qobject.c1
-rw-r--r--tests/hd-geo-test.c37
-rw-r--r--tests/ide-test.c8
-rwxr-xr-xtests/qemu-iotests/22160
-rw-r--r--tests/qemu-iotests/221.out16
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/test-replication.c1
-rw-r--r--util/qemu-config.c1
105 files changed, 5810 insertions, 2043 deletions
diff --git a/CODING_STYLE b/CODING_STYLE
index 12ba58ee29..ec075dedc4 100644
--- a/CODING_STYLE
+++ b/CODING_STYLE
@@ -124,6 +124,23 @@ We use traditional C-style /* */ comments and avoid // comments.
 Rationale: The // form is valid in C99, so this is purely a matter of
 consistency of style. The checkpatch script will warn you about this.
 
+Multiline comment blocks should have a row of stars on the left,
+and the initial /* and terminating */ both on their own lines:
+    /*
+     * like
+     * this
+     */
+This is the same format required by the Linux kernel coding style.
+
+(Some of the existing comments in the codebase use the GNU Coding
+Standards form which does not have stars on the left, or other
+variations; avoid these when writing new comments, but don't worry
+about converting to the preferred form unless you're editing that
+comment anyway.)
+
+Rationale: Consistency, and ease of visually picking out a multiline
+comment from the surrounding code.
+
 8. trace-events style
 
 8.1 0x prefix
diff --git a/MAINTAINERS b/MAINTAINERS
index 8a94517e9e..0fb5f38f9f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1369,6 +1369,8 @@ F: qemu-img*
 F: qemu-io*
 F: tests/qemu-iotests/
 F: util/qemu-progress.c
+F: qobject/block-qdict.c
+F: test/check-block-qdict.c
 T: git git://repo.or.cz/qemu/kevin.git block
 
 Block I/O path
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 05439039e9..0a721bb9c4 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -632,7 +632,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     }
 
     sz = size;
-    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz);
+    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz,
+                                                attrs, &prot);
     assert(sz >= TARGET_PAGE_SIZE);
 
     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
@@ -664,6 +665,18 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
 
     /* refill the tlb */
+    /*
+     * At this point iotlb contains a physical section number in the lower
+     * TARGET_PAGE_BITS, and either
+     *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
+     *  + the offset within section->mr of the page base (otherwise)
+     * We subtract the vaddr (which is page aligned and thus won't
+     * disturb the low bits) to give an offset which can be added to the
+     * (non-page-aligned) vaddr of the eventual memory access to get
+     * the MemoryRegion offset for the access. Note that the vaddr we
+     * subtract here is that of the page base, and not the same as the
+     * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
+     */
     env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
     env->iotlb[mmu_idx][index].attrs = attrs;
 
@@ -765,13 +778,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
                          target_ulong addr, uintptr_t retaddr, int size)
 {
     CPUState *cpu = ENV_GET_CPU(env);
-    hwaddr physaddr = iotlbentry->addr;
-    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+    hwaddr mr_offset;
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
     uint64_t val;
     bool locked = false;
     MemTxResult r;
 
-    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+    mr = section->mr;
+    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
     cpu->mem_io_pc = retaddr;
     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
@@ -783,9 +799,13 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
         qemu_mutex_lock_iothread();
         locked = true;
     }
-    r = memory_region_dispatch_read(mr, physaddr,
+    r = memory_region_dispatch_read(mr, mr_offset,
                                     &val, size, iotlbentry->attrs);
     if (r != MEMTX_OK) {
+        hwaddr physaddr = mr_offset +
+            section->offset_within_address_space -
+            section->offset_within_region;
+
         cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_LOAD,
                                mmu_idx, iotlbentry->attrs, r, retaddr);
     }
@@ -802,12 +822,15 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
                       uintptr_t retaddr, int size)
 {
     CPUState *cpu = ENV_GET_CPU(env);
-    hwaddr physaddr = iotlbentry->addr;
-    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+    hwaddr mr_offset;
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
     bool locked = false;
     MemTxResult r;
 
-    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+    mr = section->mr;
+    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
     }
@@ -818,9 +841,13 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
         qemu_mutex_lock_iothread();
         locked = true;
     }
-    r = memory_region_dispatch_write(mr, physaddr,
+    r = memory_region_dispatch_write(mr, mr_offset,
                                      val, size, iotlbentry->attrs);
     if (r != MEMTX_OK) {
+        hwaddr physaddr = mr_offset +
+            section->offset_within_address_space -
+            section->offset_within_region;
+
         cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
                                mmu_idx, iotlbentry->attrs, r, retaddr);
     }
@@ -868,12 +895,13 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
  */
 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
 {
-    int mmu_idx, index, pd;
+    int mmu_idx, index;
     void *p;
     MemoryRegion *mr;
+    MemoryRegionSection *section;
     CPUState *cpu = ENV_GET_CPU(env);
     CPUIOTLBEntry *iotlbentry;
-    hwaddr physaddr;
+    hwaddr physaddr, mr_offset;
 
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     mmu_idx = cpu_mmu_index(env, true);
@@ -884,8 +912,8 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
         }
     }
     iotlbentry = &env->iotlb[mmu_idx][index];
-    pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
-    mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
+    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+    mr = section->mr;
     if (memory_region_is_unassigned(mr)) {
         qemu_mutex_lock_iothread();
         if (memory_region_request_mmio_ptr(mr, addr)) {
@@ -906,7 +934,10 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
          * and use the MemTXResult it produced). However it is the
          * simplest place we have currently available for the check.
          */
-        physaddr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+        mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+        physaddr = mr_offset +
+            section->offset_within_address_space -
+            section->offset_within_region;
         cpu_transaction_failed(cpu, physaddr, addr, 0, MMU_INST_FETCH, mmu_idx,
                                iotlbentry->attrs, MEMTX_DECODE_ERROR, 0);
 
diff --git a/block.c b/block.c
index 50887087f3..afe30caac3 100644
--- a/block.c
+++ b/block.c
@@ -27,6 +27,7 @@
 #include "block/block_int.h"
 #include "block/blockjob.h"
 #include "block/nbd.h"
+#include "block/qdict.h"
 #include "qemu/error-report.h"
 #include "module_block.h"
 #include "qemu/module.h"
diff --git a/block/block-backend.c b/block/block-backend.c
index d55c328736..2d1a3463e8 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -419,7 +419,6 @@ static void drive_info_del(DriveInfo *dinfo)
         return;
     }
     qemu_opts_del(dinfo->opts);
-    g_free(dinfo->serial);
     g_free(dinfo);
 }
 
diff --git a/block/crypto.c b/block/crypto.c
index bc322b50f5..82091c5f70 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -21,11 +21,11 @@
 #include "qemu/osdep.h"
 
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "crypto/block.h"
 #include "qapi/opts-visitor.h"
 #include "qapi/qapi-visit-crypto.h"
-#include "qapi/qmp/qdict.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/error.h"
 #include "qemu/option.h"
@@ -159,7 +159,10 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
     ret = g_new0(QCryptoBlockOpenOptions, 1);
     ret->format = format;
 
-    v = qobject_input_visitor_new_keyval(QOBJECT(opts));
+    v = qobject_input_visitor_new_flat_confused(opts, &local_err);
+    if (local_err) {
+        goto out;
+    }
 
     visit_start_struct(v, NULL, NULL, 0, &local_err);
     if (local_err) {
@@ -210,7 +213,10 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
     ret = g_new0(QCryptoBlockCreateOptions, 1);
     ret->format = format;
 
-    v = qobject_input_visitor_new_keyval(QOBJECT(opts));
+    v = qobject_input_visitor_new_flat_confused(opts, &local_err);
+    if (local_err) {
+        goto out;
+    }
 
     visit_start_struct(v, NULL, NULL, 0, &local_err);
     if (local_err) {
diff --git a/block/gluster.c b/block/gluster.c
index 9900b6420c..b5fe7f3e87 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -11,6 +11,7 @@
 #include "qemu/osdep.h"
 #include <glusterfs/api/glfs.h>
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
diff --git a/block/iscsi.c b/block/iscsi.c
index c2fbd8a8aa..9f00fb47a5 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -33,6 +33,7 @@
 #include "qemu/bitops.h"
 #include "qemu/bitmap.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "scsi/constants.h"
 #include "qemu/iov.h"
 #include "qemu/option.h"
@@ -1713,10 +1714,6 @@ static QemuOptsList runtime_opts = {
             .name = "timeout",
             .type = QEMU_OPT_NUMBER,
         },
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-        },
         { /* end of list */ }
     },
 };
@@ -1756,27 +1753,12 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     char *initiator_name = NULL;
     QemuOpts *opts;
     Error *local_err = NULL;
-    const char *transport_name, *portal, *target, *filename;
+    const char *transport_name, *portal, *target;
 #if LIBISCSI_API_VERSION >= (20160603)
     enum iscsi_transport_type transport;
 #endif
     int i, ret = 0, timeout = 0, lun;
 
-    /* If we are given a filename, parse the filename, with precedence given to
-     * filename encoded options */
-    filename = qdict_get_try_str(options, "filename");
-    if (filename) {
-        warn_report("'filename' option specified. "
-                    "This is an unsupported option, and may be deprecated "
-                    "in the future");
-        iscsi_parse_filename(filename, options, &local_err);
-        if (local_err) {
-            ret = -EINVAL;
-            error_propagate(errp, local_err);
-            goto exit;
-        }
-    }
-
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
     if (local_err) {
@@ -2006,7 +1988,7 @@ out:
         }
         memset(iscsilun, 0, sizeof(IscsiLun));
     }
-exit:
+
     return ret;
 }
 
diff --git a/block/nbd.c b/block/nbd.c
index ff8333e3c1..13db4030e6 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -28,6 +28,7 @@
 
 #include "qemu/osdep.h"
 #include "nbd-client.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qemu/uri.h"
 #include "block/block_int.h"
@@ -262,7 +263,6 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
 {
     SocketAddress *saddr = NULL;
     QDict *addr = NULL;
-    QObject *crumpled_addr = NULL;
     Visitor *iv = NULL;
     Error *local_err = NULL;
 
@@ -272,20 +272,11 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
         goto done;
     }
 
-    crumpled_addr = qdict_crumple(addr, errp);
-    if (!crumpled_addr) {
+    iv = qobject_input_visitor_new_flat_confused(addr, errp);
+    if (!iv) {
         goto done;
     }
 
-    /*
-     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
-     * server.type=inet.  .to doesn't matter, it's ignored anyway.
-     * That's because when @options come from -blockdev or
-     * blockdev_add, members are typed according to the QAPI schema,
-     * but when they come from -drive, they're all QString.  The
-     * visitor expects the former.
-     */
-    iv = qobject_input_visitor_new(crumpled_addr);
     visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -294,7 +285,6 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
 
 done:
     qobject_unref(addr);
-    qobject_unref(crumpled_addr);
     visit_free(iv);
     return saddr;
 }
diff --git a/block/nfs.c b/block/nfs.c
index 3349b67a76..743ca0450e 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -29,6 +29,7 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "trace.h"
 #include "qemu/iov.h"
 #include "qemu/option.h"
@@ -555,20 +556,17 @@ static BlockdevOptionsNfs *nfs_options_qdict_to_qapi(QDict *options,
                                                      Error **errp)
 {
     BlockdevOptionsNfs *opts = NULL;
-    QObject *crumpled = NULL;
     Visitor *v;
     const QDictEntry *e;
     Error *local_err = NULL;
 
-    crumpled = qdict_crumple(options, errp);
-    if (crumpled == NULL) {
+    v = qobject_input_visitor_new_flat_confused(options, errp);
+    if (!v) {
         return NULL;
     }
 
-    v = qobject_input_visitor_new_keyval(crumpled);
     visit_type_BlockdevOptionsNfs(v, NULL, &opts, &local_err);
     visit_free(v);
-    qobject_unref(crumpled);
 
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/block/parallels.c b/block/parallels.c
index 6e9c37f44e..fd215e202a 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -31,6 +31,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -615,8 +616,7 @@ static int coroutine_fn parallels_co_create_opts(const char *filename,
     BlockdevCreateOptions *create_options = NULL;
     Error *local_err = NULL;
     BlockDriverState *bs = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
     Visitor *v;
     int ret;
 
@@ -652,15 +652,12 @@ static int coroutine_fn parallels_co_create_opts(const char *filename,
     qdict_put_str(qdict, "driver", "parallels");
     qdict_put_str(qdict, "file", bs->node_name);
 
-    qobj = qdict_crumple(qdict, errp);
-    qobject_unref(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
         ret = -EINVAL;
         goto done;
     }
 
-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
     visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
     visit_free(v);
 
diff --git a/block/qcow.c b/block/qcow.c
index 1f866af0d3..5532731b9f 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -26,6 +26,7 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -945,8 +946,7 @@ static int coroutine_fn qcow_co_create_opts(const char *filename,
 {
     BlockdevCreateOptions *create_options = NULL;
     BlockDriverState *bs = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
     Visitor *v;
     const char *val;
     Error *local_err = NULL;
@@ -996,15 +996,12 @@ static int coroutine_fn qcow_co_create_opts(const char *filename,
     qdict_put_str(qdict, "driver", "qcow");
     qdict_put_str(qdict, "file", bs->node_name);
 
-    qobj = qdict_crumple(qdict, errp);
-    qobject_unref(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
         ret = -EINVAL;
         goto fail;
     }
 
-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
     visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
     visit_free(v);
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 6fa5e1d71a..945132f692 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -24,6 +24,7 @@
 
 #include "qemu/osdep.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include <zlib.h>
@@ -3079,8 +3080,7 @@ static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opt
                                              Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
     Visitor *v;
     BlockDriverState *bs = NULL;
     Error *local_err = NULL;
@@ -3151,15 +3151,12 @@ static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opt
     qdict_put_str(qdict, "file", bs->node_name);
 
     /* Now get the QAPI type BlockdevCreateOptions */
-    qobj = qdict_crumple(qdict, errp);
-    qobject_unref(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
         ret = -EINVAL;
         goto finish;
     }
 
-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
     visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
     visit_free(v);
 
diff --git a/block/qed.c b/block/qed.c
index 65cfe92393..2363814538 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -13,6 +13,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qemu/timer.h"
 #include "qemu/bswap.h"
@@ -721,8 +722,7 @@ static int coroutine_fn bdrv_qed_co_create_opts(const char *filename,
                                                 Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
     Visitor *v;
     BlockDriverState *bs = NULL;
     Error *local_err = NULL;
@@ -762,15 +762,12 @@ static int coroutine_fn bdrv_qed_co_create_opts(const char *filename,
     qdict_put_str(qdict, "driver", "qed");
     qdict_put_str(qdict, "file", bs->node_name);
 
-    qobj = qdict_crumple(qdict, errp);
-    qobject_unref(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
         ret = -EINVAL;
         goto fail;
     }
 
-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
     visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
     visit_free(v);
 
diff --git a/block/quorum.c b/block/quorum.c
index b6476c405a..9152da8c58 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -17,6 +17,7 @@
 #include "qemu/cutils.h"
 #include "qemu/option.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-block.h"
 #include "qapi/qmp/qdict.h"
diff --git a/block/rbd.c b/block/rbd.c
index a16431e267..f2c6965418 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -18,6 +18,7 @@
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "crypto/secret.h"
 #include "qemu/cutils.h"
 #include "qapi/qmp/qstring.h"
@@ -238,21 +239,44 @@ static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
 }
 
 
-static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
+static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts,
                              Error **errp)
 {
-    if (secretid == 0) {
-        return 0;
-    }
+    char *key, *acr;
+    int r;
+    GString *accu;
+    RbdAuthModeList *auth;
 
-    gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
-                                                    errp);
-    if (!secret) {
-        return -1;
+    if (opts->key_secret) {
+        key = qcrypto_secret_lookup_as_base64(opts->key_secret, errp);
+        if (!key) {
+            return -EIO;
+        }
+        r = rados_conf_set(cluster, "key", key);
+        g_free(key);
+        if (r < 0) {
+            error_setg_errno(errp, -r, "Could not set 'key'");
+            return r;
+        }
     }
 
-    rados_conf_set(cluster, "key", secret);
-    g_free(secret);
+    if (opts->has_auth_client_required) {
+        accu = g_string_new("");
+        for (auth = opts->auth_client_required; auth; auth = auth->next) {
+            if (accu->str[0]) {
+                g_string_append_c(accu, ';');
+            }
+            g_string_append(accu, RbdAuthMode_str(auth->value));
+        }
+        acr = g_string_free(accu, FALSE);
+        r = rados_conf_set(cluster, "auth_client_required", acr);
+        g_free(acr);
+        if (r < 0) {
+            error_setg_errno(errp, -r,
+                             "Could not set 'auth_client_required'");
+            return r;
+        }
+    }
 
     return 0;
 }
@@ -344,9 +368,7 @@ static QemuOptsList runtime_opts = {
     },
 };
 
-/* FIXME Deprecate and remove keypairs or make it available in QMP.
- * password_secret should eventually be configurable in opts->location. Support
- * for it in .bdrv_open will make it work here as well. */
+/* FIXME Deprecate and remove keypairs or make it available in QMP. */
 static int qemu_rbd_do_create(BlockdevCreateOptions *options,
                               const char *keypairs, const char *password_secret,
                               Error **errp)
@@ -552,6 +574,16 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
     Error *local_err = NULL;
     int r;
 
+    if (secretid) {
+        if (opts->key_secret) {
+            error_setg(errp,
+                       "Legacy 'password-secret' clashes with 'key-secret'");
+            return -EINVAL;
+        }
+        opts->key_secret = g_strdup(secretid);
+        opts->has_key_secret = true;
+    }
+
     mon_host = qemu_rbd_mon_host(opts, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -584,8 +616,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
         }
     }
 
-    if (qemu_rbd_set_auth(*cluster, secretid, errp) < 0) {
-        r = -EIO;
+    r = qemu_rbd_set_auth(*cluster, opts, errp);
+    if (r < 0) {
         goto failed_shutdown;
     }
 
@@ -629,28 +661,11 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVRBDState *s = bs->opaque;
     BlockdevOptionsRbd *opts = NULL;
     Visitor *v;
-    QObject *crumpled = NULL;
     const QDictEntry *e;
     Error *local_err = NULL;
-    const char *filename;
     char *keypairs, *secretid;
     int r;
 
-    /* If we are given a filename, parse the filename, with precedence given to
-     * filename encoded options */
-    filename = qdict_get_try_str(options, "filename");
-    if (filename) {
-        warn_report("'filename' option specified. "
-                    "This is an unsupported option, and may be deprecated "
-                    "in the future");
-        qemu_rbd_parse_filename(filename, options, &local_err);
-        qdict_del(options, "filename");
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return -EINVAL;
-        }
-    }
-
     keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
     if (keypairs) {
         qdict_del(options, "=keyvalue-pairs");
@@ -662,16 +677,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* Convert the remaining options into a QAPI object */
-    crumpled = qdict_crumple(options, errp);
-    if (crumpled == NULL) {
+    v = qobject_input_visitor_new_flat_confused(options, errp);
+    if (!v) {
         r = -EINVAL;
         goto out;
     }
 
-    v = qobject_input_visitor_new_keyval(crumpled);
     visit_type_BlockdevOptionsRbd(v, NULL, &opts, &local_err);
     visit_free(v);
-    qobject_unref(crumpled);
 
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 7b98725af7..665b1763eb 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -24,6 +24,7 @@
 #include "qemu/option.h"
 #include "qemu/sockets.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/bitops.h"
 #include "qemu/cutils.h"
@@ -538,27 +539,17 @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
 static SocketAddress *sd_server_config(QDict *options, Error **errp)
 {
     QDict *server = NULL;
-    QObject *crumpled_server = NULL;
     Visitor *iv = NULL;
     SocketAddress *saddr = NULL;
     Error *local_err = NULL;
 
     qdict_extract_subqdict(options, &server, "server.");
 
-    crumpled_server = qdict_crumple(server, errp);
-    if (!crumpled_server) {
+    iv = qobject_input_visitor_new_flat_confused(server, errp);
+    if (!iv) {
         goto done;
     }
 
-    /*
-     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
-     * server.type=inet.  .to doesn't matter, it's ignored anyway.
-     * That's because when @options come from -blockdev or
-     * blockdev_add, members are typed according to the QAPI schema,
-     * but when they come from -drive, they're all QString.  The
-     * visitor expects the former.
-     */
-    iv = qobject_input_visitor_new(crumpled_server);
     visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -567,7 +558,6 @@ static SocketAddress *sd_server_config(QDict *options, Error **errp)
 
 done:
     visit_free(iv);
-    qobject_unref(crumpled_server);
     qobject_unref(server);
     return saddr;
 }
@@ -2180,7 +2170,6 @@ static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts,
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict, *location_qdict;
-    QObject *crumpled;
     Visitor *v;
     char *redundancy;
     Error *local_err = NULL;
@@ -2216,16 +2205,14 @@ static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts,
     }
 
     /* Get the QAPI object */
-    crumpled = qdict_crumple(qdict, errp);
-    if (crumpled == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
         ret = -EINVAL;
         goto fail;
     }
 
-    v = qobject_input_visitor_new_keyval(crumpled);
     visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
     visit_free(v);
-    qobject_unref(crumpled);
 
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/block/snapshot.c b/block/snapshot.c
index 2953d96c06..f9903bc94e 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "block/snapshot.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
diff --git a/block/ssh.c b/block/ssh.c
index 4c4fa3ccfc..da7bbf73e2 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -28,6 +28,7 @@
 #include <libssh2_sftp.h>
 
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/option.h"
@@ -605,7 +606,6 @@ static BlockdevOptionsSsh *ssh_parse_options(QDict *options, Error **errp)
     BlockdevOptionsSsh *result = NULL;
     QemuOpts *opts = NULL;
     Error *local_err = NULL;
-    QObject *crumpled;
     const QDictEntry *e;
     Visitor *v;
 
@@ -622,23 +622,13 @@ static BlockdevOptionsSsh *ssh_parse_options(QDict *options, Error **errp)
     }
 
     /* Create the QAPI object */
-    crumpled = qdict_crumple(options, errp);
-    if (crumpled == NULL) {
+    v = qobject_input_visitor_new_flat_confused(options, errp);
+    if (!v) {
         goto fail;
     }
 
-    /*
-     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive.
-     * .to doesn't matter, it's ignored anyway.
-     * That's because when @options come from -blockdev or
-     * blockdev_add, members are typed according to the QAPI schema,
-     * but when they come from -drive, they're all QString.  The
-     * visitor expects the former.
-     */
-    v = qobject_input_visitor_new(crumpled);
     visit_type_BlockdevOptionsSsh(v, NULL, &result, &local_err);
     visit_free(v);
-    qobject_unref(crumpled);
 
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/block/vdi.c b/block/vdi.c
index 668af0a828..1d8ed67dbf 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -51,10 +51,10 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qdict.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -934,7 +934,11 @@ static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts,
     }
 
     /* Get the QAPI object */
-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
+        ret = -EINVAL;
+        goto done;
+    }
     visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
     visit_free(v);
 
diff --git a/block/vhdx.c b/block/vhdx.c
index 0831c5c5f4..a677703a9e 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -18,6 +18,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -1964,8 +1965,7 @@ static int coroutine_fn vhdx_co_create_opts(const char *filename,
                                             Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
     Visitor *v;
     BlockDriverState *bs = NULL;
     Error *local_err = NULL;
@@ -2004,15 +2004,12 @@ static int coroutine_fn vhdx_co_create_opts(const char *filename,
     qdict_put_str(qdict, "driver", "vhdx");
     qdict_put_str(qdict, "file", bs->node_name);
 
-    qobj = qdict_crumple(qdict, errp);
-    qobject_unref(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
         ret = -EINVAL;
         goto fail;
     }
 
-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
     visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
     visit_free(v);
 
diff --git a/block/vpc.c b/block/vpc.c
index 0ebfcd3cc8..bf294abfa7 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -1080,8 +1081,7 @@ static int coroutine_fn vpc_co_create_opts(const char *filename,
                                            QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
     Visitor *v;
     BlockDriverState *bs = NULL;
     Error *local_err = NULL;
@@ -1118,15 +1118,12 @@ static int coroutine_fn vpc_co_create_opts(const char *filename,
     qdict_put_str(qdict, "driver", "vpc");
     qdict_put_str(qdict, "file", bs->node_name);
 
-    qobj = qdict_crumple(qdict, errp);
-    qobject_unref(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
         ret = -EINVAL;
         goto fail;
     }
 
-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
     visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
     visit_free(v);
 
diff --git a/block/vvfat.c b/block/vvfat.c
index 662dca0114..4595f335b8 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -27,6 +27,7 @@
 #include <dirent.h>
 #include "qapi/error.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
 #include "qemu/bswap.h"
diff --git a/block/vxhs.c b/block/vxhs.c
index 339e23218d..0cb0a007e9 100644
--- a/block/vxhs.c
+++ b/block/vxhs.c
@@ -12,6 +12,7 @@
 #include <qnio/qnio_api.h>
 #include <sys/param.h>
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
diff --git a/blockdev.c b/blockdev.c
index 4862323012..7f65cd7497 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -35,6 +35,7 @@
 #include "sysemu/blockdev.h"
 #include "hw/block/block.h"
 #include "block/blockjob.h"
+#include "block/qdict.h"
 #include "block/throttle-groups.h"
 #include "monitor/monitor.h"
 #include "qemu/error-report.h"
@@ -730,30 +731,6 @@ QemuOptsList qemu_legacy_drive_opts = {
             .type = QEMU_OPT_STRING,
             .help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio)",
         },{
-            .name = "cyls",
-            .type = QEMU_OPT_NUMBER,
-            .help = "number of cylinders (ide disk geometry)",
-        },{
-            .name = "heads",
-            .type = QEMU_OPT_NUMBER,
-            .help = "number of heads (ide disk geometry)",
-        },{
-            .name = "secs",
-            .type = QEMU_OPT_NUMBER,
-            .help = "number of sectors (ide disk geometry)",
-        },{
-            .name = "trans",
-            .type = QEMU_OPT_STRING,
-            .help = "chs translation (auto, lba, none)",
-        },{
-            .name = "addr",
-            .type = QEMU_OPT_STRING,
-            .help = "pci address (virtio only)",
-        },{
-            .name = "serial",
-            .type = QEMU_OPT_STRING,
-            .help = "disk serial number",
-        },{
             .name = "file",
             .type = QEMU_OPT_STRING,
             .help = "file name",
@@ -791,19 +768,13 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
     QemuOpts *legacy_opts;
     DriveMediaType media = MEDIA_DISK;
     BlockInterfaceType type;
-    int cyls, heads, secs, translation;
     int max_devs, bus_id, unit_id, index;
-    const char *devaddr;
     const char *werror, *rerror;
     bool read_only = false;
     bool copy_on_read;
-    const char *serial;
     const char *filename;
     Error *local_err = NULL;
     int i;
-    const char *deprecated[] = {
-        "serial", "trans", "secs", "heads", "cyls", "addr"
-    };
 
     /* Change legacy command line options into QMP ones */
     static const struct {
@@ -880,16 +851,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
         goto fail;
     }
 
-    /* Other deprecated options */
-    if (!qtest_enabled()) {
-        for (i = 0; i < ARRAY_SIZE(deprecated); i++) {
-            if (qemu_opt_get(legacy_opts, deprecated[i]) != NULL) {
-                error_report("'%s' is deprecated, please use the corresponding "
-                             "option of '-device' instead", deprecated[i]);
-            }
-        }
-    }
-
     /* Media type */
     value = qemu_opt_get(legacy_opts, "media");
     if (value) {
@@ -931,57 +892,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
         type = block_default_type;
     }
 
-    /* Geometry */
-    cyls  = qemu_opt_get_number(legacy_opts, "cyls", 0);
-    heads = qemu_opt_get_number(legacy_opts, "heads", 0);
-    secs  = qemu_opt_get_number(legacy_opts, "secs", 0);
-
-    if (cyls || heads || secs) {
-        if (cyls < 1) {
-            error_report("invalid physical cyls number");
-            goto fail;
-        }
-        if (heads < 1) {
-            error_report("invalid physical heads number");
-            goto fail;
-        }
-        if (secs < 1) {
-            error_report("invalid physical secs number");
-            goto fail;
-        }
-    }
-
-    translation = BIOS_ATA_TRANSLATION_AUTO;
-    value = qemu_opt_get(legacy_opts, "trans");
-    if (value != NULL) {
-        if (!cyls) {
-            error_report("'%s' trans must be used with cyls, heads and secs",
-                         value);
-            goto fail;
-        }
-        if (!strcmp(value, "none")) {
-            translation = BIOS_ATA_TRANSLATION_NONE;
-        } else if (!strcmp(value, "lba")) {
-            translation = BIOS_ATA_TRANSLATION_LBA;
-        } else if (!strcmp(value, "large")) {
-            translation = BIOS_ATA_TRANSLATION_LARGE;
-        } else if (!strcmp(value, "rechs")) {
-            translation = BIOS_ATA_TRANSLATION_RECHS;
-        } else if (!strcmp(value, "auto")) {
-            translation = BIOS_ATA_TRANSLATION_AUTO;
-        } else {
-            error_report("'%s' invalid translation type", value);
-            goto fail;
-        }
-    }
-
-    if (media == MEDIA_CDROM) {
-        if (cyls || secs || heads) {
-            error_report("CHS can't be set with media=cdrom");
-            goto fail;
-        }
-    }
-
     /* Device address specified by bus/unit or index.
      * If none was specified, try to find the first free one. */
     bus_id  = qemu_opt_get_number(legacy_opts, "bus", 0);
@@ -1021,9 +931,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
         goto fail;
     }
 
-    /* Serial number */
-    serial = qemu_opt_get(legacy_opts, "serial");
-
     /* no id supplied -> create one */
     if (qemu_opts_id(all_opts) == NULL) {
         char *new_id;
@@ -1043,12 +950,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
     }
 
     /* Add virtio block device */
-    devaddr = qemu_opt_get(legacy_opts, "addr");
-    if (devaddr && type != IF_VIRTIO) {
-        error_report("addr is not supported by this bus type");
-        goto fail;
-    }
-
     if (type == IF_VIRTIO) {
         QemuOpts *devopts;
         devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
@@ -1060,9 +961,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
         }
         qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
                      &error_abort);
-        if (devaddr) {
-            qemu_opt_set(devopts, "addr", devaddr, &error_abort);
-        }
     }
 
     filename = qemu_opt_get(legacy_opts, "file");
@@ -1104,16 +1002,9 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
     dinfo = g_malloc0(sizeof(*dinfo));
     dinfo->opts = all_opts;
 
-    dinfo->cyls = cyls;
-    dinfo->heads = heads;
-    dinfo->secs = secs;
-    dinfo->trans = translation;
-
     dinfo->type = type;
     dinfo->bus = bus_id;
     dinfo->unit = unit_id;
-    dinfo->devaddr = devaddr;
-    dinfo->serial = g_strdup(serial);
 
     blk_set_legacy_dinfo(blk, dinfo);
 
diff --git a/device-hotplug.c b/device-hotplug.c
index 23fd6656f1..cd427e2c76 100644
--- a/device-hotplug.c
+++ b/device-hotplug.c
@@ -69,10 +69,6 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict)
     if (!dinfo) {
         goto err;
     }
-    if (dinfo->devaddr) {
-        monitor_printf(mon, "Parameter addr not supported\n");
-        goto err;
-    }
 
     switch (dinfo->type) {
     case IF_NONE:
diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
index 6a990cc243..57d8c524bf 100644
--- a/docs/devel/loads-stores.rst
+++ b/docs/devel/loads-stores.rst
@@ -53,9 +53,24 @@ The ``_{endian}`` infix is omitted for target-endian accesses.
 The target endian accessors are only available to source
 files which are built per-target.
 
+There are also functions which take the size as an argument:
+
+load: ``ldn{endian}_p(ptr, sz)``
+
+which performs an unsigned load of ``sz`` bytes from ``ptr``
+as an ``{endian}`` order value and returns it in a uint64_t.
+
+store: ``stn{endian}_p(ptr, sz, val)``
+
+which stores ``val`` to ``ptr`` as an ``{endian}`` order value
+of size ``sz`` bytes.
+
+
 Regexes for git grep
  - ``\<ldf\?[us]\?[bwlq]\(_[hbl]e\)\?_p\>``
  - ``\<stf\?[bwlq]\(_[hbl]e\)\?_p\>``
+ - ``\<ldn_\([hbl]e\)?_p\>``
+ - ``\<stn_\([hbl]e\)?_p\>``
 
 ``cpu_{ld,st}_*``
 ~~~~~~~~~~~~~~~~~
diff --git a/exec.c b/exec.c
index f6645ede0c..ebadc0e302 100644
--- a/exec.c
+++ b/exec.c
@@ -501,8 +501,15 @@ static MemoryRegionSection address_space_translate_iommu(IOMMUMemoryRegion *iomm
     do {
         hwaddr addr = *xlat;
         IOMMUMemoryRegionClass *imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
-        IOMMUTLBEntry iotlb = imrc->translate(iommu_mr, addr, is_write ?
-                                              IOMMU_WO : IOMMU_RO);
+        int iommu_idx = 0;
+        IOMMUTLBEntry iotlb;
+
+        if (imrc->attrs_to_index) {
+            iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
+        }
+
+        iotlb = imrc->translate(iommu_mr, addr, is_write ?
+                                IOMMU_WO : IOMMU_RO, iommu_idx);
 
         if (!(iotlb.perm & (1 << is_write))) {
             goto unassigned;
@@ -646,18 +653,144 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
     return mr;
 }
 
+typedef struct TCGIOMMUNotifier {
+    IOMMUNotifier n;
+    MemoryRegion *mr;
+    CPUState *cpu;
+    int iommu_idx;
+    bool active;
+} TCGIOMMUNotifier;
+
+static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);
+
+    if (!notifier->active) {
+        return;
+    }
+    tlb_flush(notifier->cpu);
+    notifier->active = false;
+    /* We leave the notifier struct on the list to avoid reallocating it later.
+     * Generally the number of IOMMUs a CPU deals with will be small.
+     * In any case we can't unregister the iommu notifier from a notify
+     * callback.
+     */
+}
+
+static void tcg_register_iommu_notifier(CPUState *cpu,
+                                        IOMMUMemoryRegion *iommu_mr,
+                                        int iommu_idx)
+{
+    /* Make sure this CPU has an IOMMU notifier registered for this
+     * IOMMU/IOMMU index combination, so that we can flush its TLB
+     * when the IOMMU tells us the mappings we've cached have changed.
+     */
+    MemoryRegion *mr = MEMORY_REGION(iommu_mr);
+    TCGIOMMUNotifier *notifier;
+    int i;
+
+    for (i = 0; i < cpu->iommu_notifiers->len; i++) {
+        notifier = &g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier, i);
+        if (notifier->mr == mr && notifier->iommu_idx == iommu_idx) {
+            break;
+        }
+    }
+    if (i == cpu->iommu_notifiers->len) {
+        /* Not found, add a new entry at the end of the array */
+        cpu->iommu_notifiers = g_array_set_size(cpu->iommu_notifiers, i + 1);
+        notifier = &g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier, i);
+
+        notifier->mr = mr;
+        notifier->iommu_idx = iommu_idx;
+        notifier->cpu = cpu;
+        /* Rather than trying to register interest in the specific part
+         * of the iommu's address space that we've accessed and then
+         * expand it later as subsequent accesses touch more of it, we
+         * just register interest in the whole thing, on the assumption
+         * that iommu reconfiguration will be rare.
+         */
+        iommu_notifier_init(&notifier->n,
+                            tcg_iommu_unmap_notify,
+                            IOMMU_NOTIFIER_UNMAP,
+                            0,
+                            HWADDR_MAX,
+                            iommu_idx);
+        memory_region_register_iommu_notifier(notifier->mr, &notifier->n);
+    }
+
+    if (!notifier->active) {
+        notifier->active = true;
+    }
+}
+
+static void tcg_iommu_free_notifier_list(CPUState *cpu)
+{
+    /* Destroy the CPU's notifier list */
+    int i;
+    TCGIOMMUNotifier *notifier;
+
+    for (i = 0; i < cpu->iommu_notifiers->len; i++) {
+        notifier = &g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier, i);
+        memory_region_unregister_iommu_notifier(notifier->mr, &notifier->n);
+    }
+    g_array_free(cpu->iommu_notifiers, true);
+}
+
 /* Called from RCU critical section */
 MemoryRegionSection *
 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
-                                  hwaddr *xlat, hwaddr *plen)
+                                  hwaddr *xlat, hwaddr *plen,
+                                  MemTxAttrs attrs, int *prot)
 {
     MemoryRegionSection *section;
+    IOMMUMemoryRegion *iommu_mr;
+    IOMMUMemoryRegionClass *imrc;
+    IOMMUTLBEntry iotlb;
+    int iommu_idx;
     AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
 
-    section = address_space_translate_internal(d, addr, xlat, plen, false);
+    for (;;) {
+        section = address_space_translate_internal(d, addr, &addr, plen, false);
+
+        iommu_mr = memory_region_get_iommu(section->mr);
+        if (!iommu_mr) {
+            break;
+        }
+
+        imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
+
+        iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
+        tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);
+        /* We need all the permissions, so pass IOMMU_NONE so the IOMMU
+         * doesn't short-cut its translation table walk.
+         */
+        iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx);
+        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
+                | (addr & iotlb.addr_mask));
+        /* Update the caller's prot bits to remove permissions the IOMMU
+         * is giving us a failure response for. If we get down to no
+         * permissions left at all we can give up now.
+         */
+        if (!(iotlb.perm & IOMMU_RO)) {
+            *prot &= ~(PAGE_READ | PAGE_EXEC);
+        }
+        if (!(iotlb.perm & IOMMU_WO)) {
+            *prot &= ~PAGE_WRITE;
+        }
+
+        if (!*prot) {
+            goto translate_fail;
+        }
+
+        d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as));
+    }
 
     assert(!memory_region_is_iommu(section->mr));
+    *xlat = addr;
     return section;
+
+translate_fail:
+    return &d->map.sections[PHYS_SECTION_UNASSIGNED];
 }
 #endif
 
@@ -816,6 +949,9 @@ void cpu_exec_unrealizefn(CPUState *cpu)
     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
         vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
     }
+#ifndef CONFIG_USER_ONLY
+    tcg_iommu_free_notifier_list(cpu);
+#endif
 }
 
 Property cpu_common_props[] = {
@@ -863,6 +999,8 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
     if (cc->vmsd != NULL) {
         vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
     }
+
+    cpu->iommu_notifiers = g_array_new(false, true, sizeof(TCGIOMMUNotifier));
 #endif
 }
 
@@ -2544,22 +2682,7 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
     memory_notdirty_write_prepare(&ndi, current_cpu, current_cpu->mem_io_vaddr,
                          ram_addr, size);
 
-    switch (size) {
-    case 1:
-        stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
-        break;
-    case 2:
-        stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
-        break;
-    case 4:
-        stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
-        break;
-    case 8:
-        stq_p(qemu_map_ram_ptr(NULL, ram_addr), val);
-        break;
-    default:
-        abort();
-    }
+    stn_p(qemu_map_ram_ptr(NULL, ram_addr), size, val);
     memory_notdirty_write_complete(&ndi);
 }
 
@@ -2739,22 +2862,8 @@ static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
     if (res) {
         return res;
     }
-    switch (len) {
-    case 1:
-        *data = ldub_p(buf);
-        return MEMTX_OK;
-    case 2:
-        *data = lduw_p(buf);
-        return MEMTX_OK;
-    case 4:
-        *data = ldl_p(buf);
-        return MEMTX_OK;
-    case 8:
-        *data = ldq_p(buf);
-        return MEMTX_OK;
-    default:
-        abort();
-    }
+    *data = ldn_p(buf, len);
+    return MEMTX_OK;
 }
 
 static MemTxResult subpage_write(void *opaque, hwaddr addr,
@@ -2768,22 +2877,7 @@ static MemTxResult subpage_write(void *opaque, hwaddr addr,
            " value %"PRIx64"\n",
            __func__, subpage, len, addr, value);
 #endif
-    switch (len) {
-    case 1:
-        stb_p(buf, value);
-        break;
-    case 2:
-        stw_p(buf, value);
-        break;
-    case 4:
-        stl_p(buf, value);
-        break;
-    case 8:
-        stq_p(buf, value);
-        break;
-    default:
-        abort();
-    }
+    stn_p(buf, len, value);
     return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len);
 }
 
@@ -2897,14 +2991,15 @@ static const MemoryRegionOps readonly_mem_ops = {
     },
 };
 
-MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
+MemoryRegionSection *iotlb_to_section(CPUState *cpu,
+                                      hwaddr index, MemTxAttrs attrs)
 {
     int asidx = cpu_asidx_from_attrs(cpu, attrs);
     CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
     MemoryRegionSection *sections = d->map.sections;
 
-    return sections[index & ~TARGET_PAGE_MASK].mr;
+    return &sections[index & ~TARGET_PAGE_MASK];
 }
 
 static void io_mem_init(void)
@@ -3128,34 +3223,8 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr,
             l = memory_access_size(mr, l, addr1);
             /* XXX: could force current_cpu to NULL to avoid
                potential bugs */
-            switch (l) {
-            case 8:
-                /* 64 bit write access */
-                val = ldq_p(buf);
-                result |= memory_region_dispatch_write(mr, addr1, val, 8,
-                                                       attrs);
-                break;
-            case 4:
-                /* 32 bit write access */
-                val = (uint32_t)ldl_p(buf);
-                result |= memory_region_dispatch_write(mr, addr1, val, 4,
-                                                       attrs);
-                break;
-            case 2:
-                /* 16 bit write access */
-                val = lduw_p(buf);
-                result |= memory_region_dispatch_write(mr, addr1, val, 2,
-                                                       attrs);
-                break;
-            case 1:
-                /* 8 bit write access */
-                val = ldub_p(buf);
-                result |= memory_region_dispatch_write(mr, addr1, val, 1,
-                                                       attrs);
-                break;
-            default:
-                abort();
-            }
+            val = ldn_p(buf, l);
+            result |= memory_region_dispatch_write(mr, addr1, val, l, attrs);
         } else {
             /* RAM case */
             ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
@@ -3216,34 +3285,8 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr,
             /* I/O case */
             release_lock |= prepare_mmio_access(mr);
             l = memory_access_size(mr, l, addr1);
-            switch (l) {
-            case 8:
-                /* 64 bit read access */
-                result |= memory_region_dispatch_read(mr, addr1, &val, 8,
-                                                      attrs);
-                stq_p(buf, val);
-                break;
-            case 4:
-                /* 32 bit read access */
-                result |= memory_region_dispatch_read(mr, addr1, &val, 4,
-                                                      attrs);
-                stl_p(buf, val);
-                break;
-            case 2:
-                /* 16 bit read access */
-                result |= memory_region_dispatch_read(mr, addr1, &val, 2,
-                                                      attrs);
-                stw_p(buf, val);
-                break;
-            case 1:
-                /* 8 bit read access */
-                result |= memory_region_dispatch_read(mr, addr1, &val, 1,
-                                                      attrs);
-                stb_p(buf, val);
-                break;
-            default:
-                abort();
-            }
+            result |= memory_region_dispatch_read(mr, addr1, &val, l, attrs);
+            stn_p(buf, l, val);
         } else {
             /* RAM case */
             ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 0734fea931..0de7c4c29e 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1283,7 +1283,6 @@ ETEXI
         .params     = "[-n] [[<domain>:]<bus>:]<slot>\n"
                       "[file=file][,if=type][,bus=n]\n"
                       "[,unit=m][,media=d][,index=i]\n"
-                      "[,cyls=c,heads=h,secs=s[,trans=t]]\n"
                       "[,snapshot=on|off][,cache=on|off]\n"
                       "[,readonly=on|off][,copy-on-read=on|off]",
         .help       = "add drive to PCI storage controller",
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 6a40869488..d3ed7cdbe8 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -666,7 +666,8 @@ static bool window_translate(TyphoonWindow *win, hwaddr addr,
    Pchip and generate a machine check interrupt.  */
 static IOMMUTLBEntry typhoon_translate_iommu(IOMMUMemoryRegion *iommu,
                                              hwaddr addr,
-                                             IOMMUAccessFlags flag)
+                                             IOMMUAccessFlags flag,
+                                             int iommu_idx)
 {
     TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu);
     IOMMUTLBEntry ret;
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index f123cc7d3d..9e00d4037c 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -178,6 +178,12 @@ static void armv7m_realize(DeviceState *dev, Error **errp)
             return;
         }
     }
+
+    /* Tell the CPU where the NVIC is; it will fail realize if it doesn't
+     * have one.
+     */
+    s->cpu->env.nvic = &s->nvic;
+
     object_property_set_bool(OBJECT(s->cpu), true, "realized", &err);
     if (err != NULL) {
         error_propagate(errp, err);
@@ -202,7 +208,6 @@ static void armv7m_realize(DeviceState *dev, Error **errp)
     sbd = SYS_BUS_DEVICE(&s->nvic);
     sysbus_connect_irq(sbd, 0,
                        qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
-    s->cpu->env.nvic = &s->nvic;
 
     memory_region_add_subregion(&s->container, 0xe000e000,
                                 sysbus_mmio_get_region(sbd, 0));
@@ -261,27 +266,6 @@ static void armv7m_reset(void *opaque)
     cpu_reset(CPU(cpu));
 }
 
-/* Init CPU and memory for a v7-M based board.
-   mem_size is in bytes.
-   Returns the ARMv7M device.  */
-
-DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
-                         const char *kernel_filename, const char *cpu_type)
-{
-    DeviceState *armv7m;
-
-    armv7m = qdev_create(NULL, TYPE_ARMV7M);
-    qdev_prop_set_uint32(armv7m, "num-irq", num_irq);
-    qdev_prop_set_string(armv7m, "cpu-type", cpu_type);
-    object_property_set_link(OBJECT(armv7m), OBJECT(get_system_memory()),
-                                     "memory", &error_abort);
-    /* This will exit with an error if the user passed us a bad cpu_type */
-    qdev_init_nofail(armv7m);
-
-    armv7m_load_kernel(ARM_CPU(first_cpu), kernel_filename, mem_size);
-    return armv7m;
-}
-
 void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size)
 {
     int image_size;
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
index 8dc8bfd4ab..c5ef95e4cc 100644
--- a/hw/arm/mps2-tz.c
+++ b/hw/arm/mps2-tz.c
@@ -74,12 +74,13 @@ typedef struct {
     UnimplementedDeviceState spi[5];
     UnimplementedDeviceState i2c[4];
     UnimplementedDeviceState i2s_audio;
-    UnimplementedDeviceState gpio[5];
+    UnimplementedDeviceState gpio[4];
     UnimplementedDeviceState dma[4];
     UnimplementedDeviceState gfx;
     CMSDKAPBUART uart[5];
     SplitIRQ sec_resp_splitter;
     qemu_or_irq uart_irq_orgate;
+    DeviceState *lan9118;
 } MPS2TZMachineState;
 
 #define TYPE_MPS2TZ_MACHINE "mps2tz"
@@ -224,6 +225,26 @@ static MemoryRegion *make_fpgaio(MPS2TZMachineState *mms, void *opaque,
     return sysbus_mmio_get_region(SYS_BUS_DEVICE(fpgaio), 0);
 }
 
+static MemoryRegion *make_eth_dev(MPS2TZMachineState *mms, void *opaque,
+                                  const char *name, hwaddr size)
+{
+    SysBusDevice *s;
+    DeviceState *iotkitdev = DEVICE(&mms->iotkit);
+    NICInfo *nd = &nd_table[0];
+
+    /* In hardware this is a LAN9220; the LAN9118 is software compatible
+     * except that it doesn't support the checksum-offload feature.
+     */
+    qemu_check_nic_model(nd, "lan9118");
+    mms->lan9118 = qdev_create(NULL, "lan9118");
+    qdev_set_nic_properties(mms->lan9118, nd);
+    qdev_init_nofail(mms->lan9118);
+
+    s = SYS_BUS_DEVICE(mms->lan9118);
+    sysbus_connect_irq(s, 0, qdev_get_gpio_in_named(iotkitdev, "EXP_IRQ", 16));
+    return sysbus_mmio_get_region(s, 0);
+}
+
 static void mps2tz_common_init(MachineState *machine)
 {
     MPS2TZMachineState *mms = MPS2TZ_MACHINE(machine);
@@ -363,7 +384,7 @@ static void mps2tz_common_init(MachineState *machine)
                 { "gpio1", make_unimp_dev, &mms->gpio[1], 0x40101000, 0x1000 },
                 { "gpio2", make_unimp_dev, &mms->gpio[2], 0x40102000, 0x1000 },
                 { "gpio3", make_unimp_dev, &mms->gpio[3], 0x40103000, 0x1000 },
-                { "gpio4", make_unimp_dev, &mms->gpio[4], 0x40104000, 0x1000 },
+                { "eth", make_eth_dev, NULL, 0x42000000, 0x100000 },
             },
         }, {
             .name = "ahb_ppcexp1",
@@ -447,13 +468,6 @@ static void mps2tz_common_init(MachineState *machine)
                                                      "cfg_sec_resp", 0));
     }
 
-    /* In hardware this is a LAN9220; the LAN9118 is software compatible
-     * except that it doesn't support the checksum-offload feature.
-     * The ethernet controller is not behind a PPC.
-     */
-    lan9118_init(&nd_table[0], 0x42000000,
-                 qdev_get_gpio_in_named(iotkitdev, "EXP_IRQ", 16));
-
     create_unimplemented_device("FPGA NS PC", 0x48007000, 0x1000);
 
     armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, 0x400000);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 42dc521c13..978330900d 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -538,7 +538,7 @@ static int smmuv3_decode_config(IOMMUMemoryRegion *mr, SMMUTransCfg *cfg,
 }
 
 static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
-                                      IOMMUAccessFlags flag)
+                                      IOMMUAccessFlags flag, int iommu_idx)
 {
     SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
     SMMUv3State *s = sdev->smmu;
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index 502a20842c..a8f1f6a912 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -20,6 +20,7 @@
 #include "qemu/log.h"
 #include "exec/address-spaces.h"
 #include "sysemu/sysemu.h"
+#include "hw/arm/armv7m.h"
 #include "hw/char/pl011.h"
 #include "hw/misc/unimp.h"
 #include "cpu.h"
@@ -1298,8 +1299,13 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
                            &error_fatal);
     memory_region_add_subregion(system_memory, 0x20000000, sram);
 
-    nvic = armv7m_init(system_memory, flash_size, NUM_IRQ_LINES,
-                       ms->kernel_filename, ms->cpu_type);
+    nvic = qdev_create(NULL, TYPE_ARMV7M);
+    qdev_prop_set_uint32(nvic, "num-irq", NUM_IRQ_LINES);
+    qdev_prop_set_string(nvic, "cpu-type", ms->cpu_type);
+    object_property_set_link(OBJECT(nvic), OBJECT(get_system_memory()),
+                                     "memory", &error_abort);
+    /* This will exit with an error if the user passed us a bad cpu_type */
+    qdev_init_nofail(nvic);
 
     qdev_connect_gpio_out_named(nvic, "SYSRESETREQ", 0,
                                 qemu_allocate_irq(&do_sys_reset, NULL, 0));
@@ -1431,6 +1437,8 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
     create_unimplemented_device("analogue-comparator", 0x4003c000, 0x1000);
     create_unimplemented_device("hibernation", 0x400fc000, 0x1000);
     create_unimplemented_device("flash-control", 0x400fd000, 0x1000);
+
+    armv7m_load_kernel(ARM_CPU(first_cpu), ms->kernel_filename, flash_size);
 }
 
 /* FIXME: Figure out how to generate these from stellaris_boards.  */
diff --git a/hw/block/block.c b/hw/block/block.c
index b91e2b6d7e..cf0eb826f1 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -15,19 +15,6 @@
 #include "qapi/qapi-types-block.h"
 #include "qemu/error-report.h"
 
-void blkconf_serial(BlockConf *conf, char **serial)
-{
-    DriveInfo *dinfo;
-
-    if (!*serial) {
-        /* try to fall back to value set with legacy -drive serial=... */
-        dinfo = blk_legacy_dinfo(conf->blk);
-        if (dinfo) {
-            *serial = g_strdup(dinfo->serial);
-        }
-    }
-}
-
 void blkconf_blocksizes(BlockConf *conf)
 {
     BlockBackend *blk = conf->blk;
@@ -108,20 +95,6 @@ bool blkconf_geometry(BlockConf *conf, int *ptrans,
                       unsigned cyls_max, unsigned heads_max, unsigned secs_max,
                       Error **errp)
 {
-    DriveInfo *dinfo;
-
-    if (!conf->cyls && !conf->heads && !conf->secs) {
-        /* try to fall back to value set with legacy -drive cyls=... */
-        dinfo = blk_legacy_dinfo(conf->blk);
-        if (dinfo) {
-            conf->cyls  = dinfo->cyls;
-            conf->heads = dinfo->heads;
-            conf->secs  = dinfo->secs;
-            if (ptrans) {
-                *ptrans = dinfo->trans;
-            }
-        }
-    }
     if (!conf->cyls && !conf->heads && !conf->secs) {
         hd_geometry_guess(conf->blk,
                           &conf->cyls, &conf->heads, &conf->secs,
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index a5ccffb4aa..b0ed8fa418 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -698,6 +698,7 @@ static void complete_collecting_data(Flash *s)
         case MAN_MACRONIX:
             s->quad_enable = extract32(s->data[0], 6, 1);
             if (s->len > 1) {
+                s->volatile_cfg = s->data[1];
                 s->four_bytes_address_mode = extract32(s->data[1], 5, 1);
             }
             break;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 811084b6a7..d5bf95b79b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1215,7 +1215,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
         return;
     }
 
-    blkconf_serial(&n->conf, &n->serial);
     if (!n->serial) {
         error_setg(errp, "serial property not set");
         return;
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index a8b3f7f978..6c18e5e578 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -493,102 +493,41 @@ static void pflash_write (pflash_t *pfl, hwaddr offset,
     pfl->cmd = 0;
 }
 
-
-static uint32_t pflash_readb_be(void *opaque, hwaddr addr)
-{
-    return pflash_read(opaque, addr, 1, 1);
-}
-
-static uint32_t pflash_readb_le(void *opaque, hwaddr addr)
+static uint64_t pflash_be_readfn(void *opaque, hwaddr addr, unsigned size)
 {
-    return pflash_read(opaque, addr, 1, 0);
+    return pflash_read(opaque, addr, size, 1);
 }
 
-static uint32_t pflash_readw_be(void *opaque, hwaddr addr)
+static void pflash_be_writefn(void *opaque, hwaddr addr,
+                              uint64_t value, unsigned size)
 {
-    pflash_t *pfl = opaque;
-
-    return pflash_read(pfl, addr, 2, 1);
-}
-
-static uint32_t pflash_readw_le(void *opaque, hwaddr addr)
-{
-    pflash_t *pfl = opaque;
-
-    return pflash_read(pfl, addr, 2, 0);
-}
-
-static uint32_t pflash_readl_be(void *opaque, hwaddr addr)
-{
-    pflash_t *pfl = opaque;
-
-    return pflash_read(pfl, addr, 4, 1);
-}
-
-static uint32_t pflash_readl_le(void *opaque, hwaddr addr)
-{
-    pflash_t *pfl = opaque;
-
-    return pflash_read(pfl, addr, 4, 0);
+    pflash_write(opaque, addr, value, size, 1);
 }
 
-static void pflash_writeb_be(void *opaque, hwaddr addr,
-                             uint32_t value)
+static uint64_t pflash_le_readfn(void *opaque, hwaddr addr, unsigned size)
 {
-    pflash_write(opaque, addr, value, 1, 1);
+    return pflash_read(opaque, addr, size, 0);
 }
 
-static void pflash_writeb_le(void *opaque, hwaddr addr,
-                             uint32_t value)
+static void pflash_le_writefn(void *opaque, hwaddr addr,
+                              uint64_t value, unsigned size)
 {
-    pflash_write(opaque, addr, value, 1, 0);
-}
-
-static void pflash_writew_be(void *opaque, hwaddr addr,
-                             uint32_t value)
-{
-    pflash_t *pfl = opaque;
-
-    pflash_write(pfl, addr, value, 2, 1);
-}
-
-static void pflash_writew_le(void *opaque, hwaddr addr,
-                             uint32_t value)
-{
-    pflash_t *pfl = opaque;
-
-    pflash_write(pfl, addr, value, 2, 0);
-}
-
-static void pflash_writel_be(void *opaque, hwaddr addr,
-                             uint32_t value)
-{
-    pflash_t *pfl = opaque;
-
-    pflash_write(pfl, addr, value, 4, 1);
-}
-
-static void pflash_writel_le(void *opaque, hwaddr addr,
-                             uint32_t value)
-{
-    pflash_t *pfl = opaque;
-
-    pflash_write(pfl, addr, value, 4, 0);
+    pflash_write(opaque, addr, value, size, 0);
 }
 
 static const MemoryRegionOps pflash_cfi02_ops_be = {
-    .old_mmio = {
-        .read = { pflash_readb_be, pflash_readw_be, pflash_readl_be, },
-        .write = { pflash_writeb_be, pflash_writew_be, pflash_writel_be, },
-    },
+    .read = pflash_be_readfn,
+    .write = pflash_be_writefn,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
 static const MemoryRegionOps pflash_cfi02_ops_le = {
-    .old_mmio = {
-        .read = { pflash_readb_le, pflash_readw_le, pflash_readl_le, },
-        .write = { pflash_writeb_le, pflash_writew_le, pflash_writel_le, },
-    },
+    .read = pflash_le_readfn,
+    .write = pflash_le_writefn,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 50b5c869e3..225fe44b7a 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -935,7 +935,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    blkconf_serial(&conf->conf, &conf->serial);
     if (!blkconf_apply_backend_options(&conf->conf,
                                        blk_is_read_only(conf->conf.blk), true,
                                        errp)) {
diff --git a/hw/char/parallel.c b/hw/char/parallel.c
index 1542d62201..35748e6c1b 100644
--- a/hw/char/parallel.c
+++ b/hw/char/parallel.c
@@ -554,56 +554,28 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp)
 }
 
 /* Memory mapped interface */
-static uint32_t parallel_mm_readb (void *opaque, hwaddr addr)
+static uint64_t parallel_mm_readfn(void *opaque, hwaddr addr, unsigned size)
 {
     ParallelState *s = opaque;
 
-    return parallel_ioport_read_sw(s, addr >> s->it_shift) & 0xFF;
+    return parallel_ioport_read_sw(s, addr >> s->it_shift) &
+        MAKE_64BIT_MASK(0, size * 8);
 }
 
-static void parallel_mm_writeb (void *opaque,
-                                hwaddr addr, uint32_t value)
+static void parallel_mm_writefn(void *opaque, hwaddr addr,
+                                uint64_t value, unsigned size)
 {
     ParallelState *s = opaque;
 
-    parallel_ioport_write_sw(s, addr >> s->it_shift, value & 0xFF);
-}
-
-static uint32_t parallel_mm_readw (void *opaque, hwaddr addr)
-{
-    ParallelState *s = opaque;
-
-    return parallel_ioport_read_sw(s, addr >> s->it_shift) & 0xFFFF;
-}
-
-static void parallel_mm_writew (void *opaque,
-                                hwaddr addr, uint32_t value)
-{
-    ParallelState *s = opaque;
-
-    parallel_ioport_write_sw(s, addr >> s->it_shift, value & 0xFFFF);
-}
-
-static uint32_t parallel_mm_readl (void *opaque, hwaddr addr)
-{
-    ParallelState *s = opaque;
-
-    return parallel_ioport_read_sw(s, addr >> s->it_shift);
-}
-
-static void parallel_mm_writel (void *opaque,
-                                hwaddr addr, uint32_t value)
-{
-    ParallelState *s = opaque;
-
-    parallel_ioport_write_sw(s, addr >> s->it_shift, value);
+    parallel_ioport_write_sw(s, addr >> s->it_shift,
+                             value & MAKE_64BIT_MASK(0, size * 8));
 }
 
 static const MemoryRegionOps parallel_mm_ops = {
-    .old_mmio = {
-        .read = { parallel_mm_readb, parallel_mm_readw, parallel_mm_readl },
-        .write = { parallel_mm_writeb, parallel_mm_writew, parallel_mm_writel },
-    },
+    .read = parallel_mm_readfn,
+    .write = parallel_mm_writefn,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c
index f9d76c4641..a86901b673 100644
--- a/hw/core/or-irq.c
+++ b/hw/core/or-irq.c
@@ -66,14 +66,49 @@ static void or_irq_init(Object *obj)
     qdev_init_gpio_out(DEVICE(obj), &s->out_irq, 1);
 }
 
+/* The original version of this device had a fixed 16 entries in its
+ * VMState array; devices with more inputs than this need to
+ * migrate the extra lines via a subsection.
+ * The subsection migrates as much of the levels[] array as is needed
+ * (including repeating the first 16 elements), to avoid the awkwardness
+ * of splitting it in two to meet the requirements of VMSTATE_VARRAY_UINT16.
+ */
+#define OLD_MAX_OR_LINES 16
+#if MAX_OR_LINES < OLD_MAX_OR_LINES
+#error MAX_OR_LINES must be at least 16 for migration compatibility
+#endif
+
+static bool vmstate_extras_needed(void *opaque)
+{
+    qemu_or_irq *s = OR_IRQ(opaque);
+
+    return s->num_lines >= OLD_MAX_OR_LINES;
+}
+
+static const VMStateDescription vmstate_or_irq_extras = {
+    .name = "or-irq-extras",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = vmstate_extras_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_VARRAY_UINT16_UNSAFE(levels, qemu_or_irq, num_lines, 0,
+                                     vmstate_info_bool, bool),
+        VMSTATE_END_OF_LIST(),
+    },
+};
+
 static const VMStateDescription vmstate_or_irq = {
     .name = TYPE_OR_IRQ,
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_BOOL_ARRAY(levels, qemu_or_irq, MAX_OR_LINES),
+        VMSTATE_BOOL_SUB_ARRAY(levels, qemu_or_irq, 0, OLD_MAX_OR_LINES),
         VMSTATE_END_OF_LIST(),
-    }
+    },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_or_irq_extras,
+        NULL
+    },
 };
 
 static Property or_irq_properties[] = {
diff --git a/hw/dma/rc4030.c b/hw/dma/rc4030.c
index 5d4833eeca..ccd8612888 100644
--- a/hw/dma/rc4030.c
+++ b/hw/dma/rc4030.c
@@ -491,7 +491,7 @@ static const MemoryRegionOps jazzio_ops = {
 };
 
 static IOMMUTLBEntry rc4030_dma_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
-                                          IOMMUAccessFlags flag)
+                                          IOMMUAccessFlags flag, int iommu_idx)
 {
     rc4030State *s = container_of(iommu, rc4030State, dma_mr);
     IOMMUTLBEntry ret = {
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 63d46ff6ee..1fd669fef8 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -991,7 +991,7 @@ static inline bool amdvi_is_interrupt_addr(hwaddr addr)
 }
 
 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
-                                     IOMMUAccessFlags flag)
+                                     IOMMUAccessFlags flag, int iommu_idx)
 {
     AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
     AMDVIState *s = as->iommu_state;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b5a09b7908..0a8cd4e9cc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1023,7 +1023,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
 static int vtd_sync_shadow_page_hook(IOMMUTLBEntry *entry,
                                      void *private)
 {
-    memory_region_notify_iommu((IOMMUMemoryRegion *)private, *entry);
+    memory_region_notify_iommu((IOMMUMemoryRegion *)private, 0, *entry);
     return 0;
 }
 
@@ -1581,7 +1581,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
                     .addr_mask = size - 1,
                     .perm = IOMMU_NONE,
                 };
-                memory_region_notify_iommu(&vtd_as->iommu, entry);
+                memory_region_notify_iommu(&vtd_as->iommu, 0, entry);
             }
         }
     }
@@ -2015,7 +2015,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
     entry.iova = addr;
     entry.perm = IOMMU_NONE;
     entry.translated_addr = 0;
-    memory_region_notify_iommu(&vtd_dev_as->iommu, entry);
+    memory_region_notify_iommu(&vtd_dev_as->iommu, 0, entry);
 
 done:
     return true;
@@ -2471,7 +2471,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 }
 
 static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
-                                         IOMMUAccessFlags flag)
+                                         IOMMUAccessFlags flag, int iommu_idx)
 {
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
     IntelIOMMUState *s = vtd_as->iommu_state;
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index f395d24592..573b022e1e 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -188,7 +188,6 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp)
         return;
     }
 
-    blkconf_serial(&dev->conf, &dev->serial);
     if (kind != IDE_CD) {
         if (!blkconf_geometry(&dev->conf, &dev->chs_trans, 65535, 16, 255,
                               errp)) {
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index f17f18e51b..f33e3fc63d 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -434,7 +434,7 @@ static const VMStateDescription vmstate_kbd = {
 };
 
 /* Memory mapped interface */
-static uint32_t kbd_mm_readb (void *opaque, hwaddr addr)
+static uint64_t kbd_mm_readfn(void *opaque, hwaddr addr, unsigned size)
 {
     KBDState *s = opaque;
 
@@ -444,7 +444,8 @@ static uint32_t kbd_mm_readb (void *opaque, hwaddr addr)
         return kbd_read_data(s, 0, 1) & 0xff;
 }
 
-static void kbd_mm_writeb (void *opaque, hwaddr addr, uint32_t value)
+static void kbd_mm_writefn(void *opaque, hwaddr addr,
+                           uint64_t value, unsigned size)
 {
     KBDState *s = opaque;
 
@@ -454,12 +455,13 @@ static void kbd_mm_writeb (void *opaque, hwaddr addr, uint32_t value)
         kbd_write_data(s, 0, value & 0xff, 1);
 }
 
+
 static const MemoryRegionOps i8042_mmio_ops = {
+    .read = kbd_mm_readfn,
+    .write = kbd_mm_writefn,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
     .endianness = DEVICE_NATIVE_ENDIAN,
-    .old_mmio = {
-        .read = { kbd_mm_readb, kbd_mm_readb, kbd_mm_readb },
-        .write = { kbd_mm_writeb, kbd_mm_writeb, kbd_mm_writeb },
-    },
 };
 
 void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq,
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 5649cac46e..d8d3b25403 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -135,7 +135,14 @@ static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
     uint32_t reg, *field;
     int irq;
 
-    field = (uint32_t *)bmp;
+    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
+     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
+     * functionality is replaced by GICR_IPRIORITYR<n>. It doesn't need to
+     * sync them. So it needs to skip the field of GIC_INTERNAL irqs in bmp and
+     * offset.
+     */
+    field = (uint32_t *)(bmp + GIC_INTERNAL);
+    offset += (GIC_INTERNAL * 8) / 8;
     for_each_dist_irq_reg(irq, s->num_irq, 8) {
         kvm_gicd_access(s, offset, &reg, false);
         *field = reg;
@@ -149,7 +156,14 @@ static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
     uint32_t reg, *field;
     int irq;
 
-    field = (uint32_t *)bmp;
+    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
+     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
+     * functionality is replaced by GICR_IPRIORITYR<n>. It doesn't need to
+     * sync them. So it needs to skip the field of GIC_INTERNAL irqs in bmp and
+     * offset.
+     */
+    field = (uint32_t *)(bmp + GIC_INTERNAL);
+    offset += (GIC_INTERNAL * 8) / 8;
     for_each_dist_irq_reg(irq, s->num_irq, 8) {
         reg = *field;
         kvm_gicd_access(s, offset, &reg, true);
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index c51151fa8a..661be8878a 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -2183,7 +2183,11 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
     int regionlen;
 
     s->cpu = ARM_CPU(qemu_get_cpu(0));
-    assert(s->cpu);
+
+    if (!s->cpu || !arm_feature(&s->cpu->env, ARM_FEATURE_M)) {
+        error_setg(errp, "The NVIC can only be used with a Cortex-M CPU");
+        return;
+    }
 
     if (s->num_irq > NVIC_MAX_IRQ) {
         error_setg(errp, "num-irq %d exceeds NVIC maximum", s->num_irq);
diff --git a/hw/m68k/mcf5206.c b/hw/m68k/mcf5206.c
index 7abd84ac47..d7f26d6810 100644
--- a/hw/m68k/mcf5206.c
+++ b/hw/m68k/mcf5206.c
@@ -512,19 +512,43 @@ static void m5206_mbar_writel(void *opaque, hwaddr offset,
     m5206_mbar_write(s, offset, value, 4);
 }
 
+static uint64_t m5206_mbar_readfn(void *opaque, hwaddr addr, unsigned size)
+{
+    switch (size) {
+    case 1:
+        return m5206_mbar_readb(opaque, addr);
+    case 2:
+        return m5206_mbar_readw(opaque, addr);
+    case 4:
+        return m5206_mbar_readl(opaque, addr);
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void m5206_mbar_writefn(void *opaque, hwaddr addr,
+                               uint64_t value, unsigned size)
+{
+    switch (size) {
+    case 1:
+        m5206_mbar_writeb(opaque, addr, value);
+        break;
+    case 2:
+        m5206_mbar_writew(opaque, addr, value);
+        break;
+    case 4:
+        m5206_mbar_writel(opaque, addr, value);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static const MemoryRegionOps m5206_mbar_ops = {
-    .old_mmio = {
-        .read = {
-            m5206_mbar_readb,
-            m5206_mbar_readw,
-            m5206_mbar_readl,
-        },
-        .write = {
-            m5206_mbar_writeb,
-            m5206_mbar_writew,
-            m5206_mbar_writel,
-        },
-    },
+    .read = m5206_mbar_readfn,
+    .write = m5206_mbar_writefn,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index 5e6d5744ee..59315010db 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -16,6 +16,7 @@
 #include "qapi/visitor.h"
 #include "qemu/bitops.h"
 #include "qemu/log.h"
+#include "crypto/random.h"
 #include "trace.h"
 
 #define TO_REG(offset) ((offset) >> 2)
@@ -154,6 +155,19 @@ static const uint32_t ast2500_a1_resets[ASPEED_SCU_NR_REGS] = {
      [BMC_DEV_ID]      = 0x00002402U
 };
 
+static uint32_t aspeed_scu_get_random(void)
+{
+    Error *err = NULL;
+    uint32_t num;
+
+    if (qcrypto_random_bytes((uint8_t *)&num, sizeof(num), &err)) {
+        error_report_err(err);
+        exit(1);
+    }
+
+    return num;
+}
+
 static uint64_t aspeed_scu_read(void *opaque, hwaddr offset, unsigned size)
 {
     AspeedSCUState *s = ASPEED_SCU(opaque);
@@ -167,6 +181,12 @@ static uint64_t aspeed_scu_read(void *opaque, hwaddr offset, unsigned size)
     }
 
     switch (reg) {
+    case RNG_DATA:
+        /* On hardware, RNG_DATA works regardless of
+         * the state of the enable bit in RNG_CTRL
+         */
+        s->regs[RNG_DATA] = aspeed_scu_get_random();
+        break;
     case WAKEUP_EN:
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: Read of write-only offset 0x%" HWADDR_PRIx "\n",
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index aaa6010d5c..1b0880ac9e 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -112,7 +112,8 @@ static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table)
 /* Called from RCU critical section */
 static IOMMUTLBEntry spapr_tce_translate_iommu(IOMMUMemoryRegion *iommu,
                                                hwaddr addr,
-                                               IOMMUAccessFlags flag)
+                                               IOMMUAccessFlags flag,
+                                               int iommu_idx)
 {
     sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
     uint64_t tce;
@@ -428,7 +429,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
     entry.translated_addr = tce & page_mask;
     entry.addr_mask = ~page_mask;
     entry.perm = spapr_tce_iommu_access_flags(tce);
-    memory_region_notify_iommu(&tcet->iommu, entry);
+    memory_region_notify_iommu(&tcet->iommu, 0, entry);
 
     return H_SUCCESS;
 }
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 10da87458e..e3e0ebb7f6 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -484,7 +484,7 @@ uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
 }
 
 static IOMMUTLBEntry s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
-                                          IOMMUAccessFlags flag)
+                                          IOMMUAccessFlags flag, int iommu_idx)
 {
     S390PCIIOMMU *iommu = container_of(mr, S390PCIIOMMU, iommu_mr);
     S390IOTLBEntry *entry;
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index d1a5f79678..7b61367ee3 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -589,7 +589,7 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
             }
 
             notify.perm = IOMMU_NONE;
-            memory_region_notify_iommu(&iommu->iommu_mr, notify);
+            memory_region_notify_iommu(&iommu->iommu_mr, 0, notify);
             notify.perm = entry->perm;
         }
 
@@ -601,7 +601,7 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
         g_hash_table_replace(iommu->iotlb, &cache->iova, cache);
     }
 
-    memory_region_notify_iommu(&iommu->iommu_mr, notify);
+    memory_region_notify_iommu(&iommu->iommu_mr, 0, notify);
 }
 
 int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index ded23d36ca..aeaf611854 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2368,7 +2368,6 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
         return;
     }
 
-    blkconf_serial(&s->qdev.conf, &s->serial);
     blkconf_blocksizes(&s->qdev.conf);
 
     if (s->qdev.conf.logical_block_size >
diff --git a/hw/sh4/sh7750.c b/hw/sh4/sh7750.c
index 2dc07a904b..2fb6e618d9 100644
--- a/hw/sh4/sh7750.c
+++ b/hw/sh4/sh7750.c
@@ -450,15 +450,43 @@ static void sh7750_mem_writel(void *opaque, hwaddr addr,
     }
 }
 
+static uint64_t sh7750_mem_readfn(void *opaque, hwaddr addr, unsigned size)
+{
+    switch (size) {
+    case 1:
+        return sh7750_mem_readb(opaque, addr);
+    case 2:
+        return sh7750_mem_readw(opaque, addr);
+    case 4:
+        return sh7750_mem_readl(opaque, addr);
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void sh7750_mem_writefn(void *opaque, hwaddr addr,
+                               uint64_t value, unsigned size)
+{
+    switch (size) {
+    case 1:
+        sh7750_mem_writeb(opaque, addr, value);
+        break;
+    case 2:
+        sh7750_mem_writew(opaque, addr, value);
+        break;
+    case 4:
+        sh7750_mem_writel(opaque, addr, value);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static const MemoryRegionOps sh7750_mem_ops = {
-    .old_mmio = {
-        .read = {sh7750_mem_readb,
-                 sh7750_mem_readw,
-                 sh7750_mem_readl },
-        .write = {sh7750_mem_writeb,
-                  sh7750_mem_writew,
-                  sh7750_mem_writel },
-    },
+    .read = sh7750_mem_readfn,
+    .write = sh7750_mem_writefn,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
diff --git a/hw/sparc/sun4m_iommu.c b/hw/sparc/sun4m_iommu.c
index b677601fc6..7ca1e3fce4 100644
--- a/hw/sparc/sun4m_iommu.c
+++ b/hw/sparc/sun4m_iommu.c
@@ -282,7 +282,8 @@ static void iommu_bad_addr(IOMMUState *s, hwaddr addr,
 /* Called from RCU critical section */
 static IOMMUTLBEntry sun4m_translate_iommu(IOMMUMemoryRegion *iommu,
                                            hwaddr addr,
-                                           IOMMUAccessFlags flags)
+                                           IOMMUAccessFlags flags,
+                                           int iommu_idx)
 {
     IOMMUState *is = container_of(iommu, IOMMUState, iommu);
     hwaddr page, pa;
diff --git a/hw/sparc64/sun4u_iommu.c b/hw/sparc64/sun4u_iommu.c
index eb3aaa87e6..1ef7645ba5 100644
--- a/hw/sparc64/sun4u_iommu.c
+++ b/hw/sparc64/sun4u_iommu.c
@@ -73,7 +73,7 @@
 /* Called from RCU critical section */
 static IOMMUTLBEntry sun4u_translate_iommu(IOMMUMemoryRegion *iommu,
                                            hwaddr addr,
-                                           IOMMUAccessFlags flag)
+                                           IOMMUAccessFlags flag, int iommu_idx)
 {
     IOMMUState *is = container_of(iommu, IOMMUState, iommu);
     hwaddr baseaddr, offset;
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index 481694a473..47b992f403 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -606,7 +606,6 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp)
         return;
     }
 
-    blkconf_serial(&s->conf, &dev->serial);
     blkconf_blocksizes(&s->conf);
     if (!blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true,
                                        errp)) {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 8e57265edf..fb396cf00a 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -507,6 +507,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
     if (memory_region_is_iommu(section->mr)) {
         VFIOGuestIOMMU *giommu;
         IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+        int iommu_idx;
 
         trace_vfio_listener_region_add_iommu(iova, end);
         /*
@@ -523,10 +524,13 @@ static void vfio_listener_region_add(MemoryListener *listener,
         llend = int128_add(int128_make64(section->offset_within_region),
                            section->size);
         llend = int128_sub(llend, int128_one());
+        iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
+                                                       MEMTXATTRS_UNSPECIFIED);
         iommu_notifier_init(&giommu->n, vfio_iommu_map_notify,
                             IOMMU_NOTIFIER_ALL,
                             section->offset_within_region,
-                            int128_get64(llend));
+                            int128_get64(llend),
+                            iommu_idx);
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 
         memory_region_register_iommu_notifier(section->mr, &giommu->n);
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 96175b214d..b129cb9ddd 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -662,6 +662,8 @@ static void vhost_iommu_region_add(MemoryListener *listener,
                                          iommu_listener);
     struct vhost_iommu *iommu;
     Int128 end;
+    int iommu_idx;
+    IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
 
     if (!memory_region_is_iommu(section->mr)) {
         return;
@@ -671,10 +673,13 @@ static void vhost_iommu_region_add(MemoryListener *listener,
     end = int128_add(int128_make64(section->offset_within_region),
                      section->size);
     end = int128_sub(end, int128_one());
+    iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
+                                                   MEMTXATTRS_UNSPECIFIED);
     iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
                         IOMMU_NOTIFIER_UNMAP,
                         section->offset_within_region,
-                        int128_get64(end));
+                        int128_get64(end),
+                        iommu_idx);
     iommu->mr = section->mr;
     iommu->iommu_offset = section->offset_within_address_space -
                           section->offset_within_region;
diff --git a/hw/watchdog/wdt_i6300esb.c b/hw/watchdog/wdt_i6300esb.c
index e596b0804d..7b59469888 100644
--- a/hw/watchdog/wdt_i6300esb.c
+++ b/hw/watchdog/wdt_i6300esb.c
@@ -361,19 +361,43 @@ static void i6300esb_mem_writel(void *vp, hwaddr addr, uint32_t val)
     }
 }
 
+static uint64_t i6300esb_mem_readfn(void *opaque, hwaddr addr, unsigned size)
+{
+    switch (size) {
+    case 1:
+        return i6300esb_mem_readb(opaque, addr);
+    case 2:
+        return i6300esb_mem_readw(opaque, addr);
+    case 4:
+        return i6300esb_mem_readl(opaque, addr);
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void i6300esb_mem_writefn(void *opaque, hwaddr addr,
+                                 uint64_t value, unsigned size)
+{
+    switch (size) {
+    case 1:
+        i6300esb_mem_writeb(opaque, addr, value);
+        break;
+    case 2:
+        i6300esb_mem_writew(opaque, addr, value);
+        break;
+    case 4:
+        i6300esb_mem_writel(opaque, addr, value);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static const MemoryRegionOps i6300esb_ops = {
-    .old_mmio = {
-        .read = {
-            i6300esb_mem_readb,
-            i6300esb_mem_readw,
-            i6300esb_mem_readl,
-        },
-        .write = {
-            i6300esb_mem_writeb,
-            i6300esb_mem_writew,
-            i6300esb_mem_writel,
-        },
-    },
+    .read = i6300esb_mem_readfn,
+    .write = i6300esb_mem_writefn,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
diff --git a/include/block/qdict.h b/include/block/qdict.h
new file mode 100644
index 0000000000..d8cb502d7d
--- /dev/null
+++ b/include/block/qdict.h
@@ -0,0 +1,34 @@
+/*
+ * Special QDict functions used by the block layer
+ *
+ * Copyright (c) 2013-2018 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_QDICT_H
+#define BLOCK_QDICT_H
+
+#include "qapi/qmp/qdict.h"
+
+void qdict_copy_default(QDict *dst, QDict *src, const char *key);
+void qdict_set_default_str(QDict *dst, const char *key, const char *val);
+
+void qdict_join(QDict *dest, QDict *src, bool overwrite);
+
+void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start);
+void qdict_array_split(QDict *src, QList **dst);
+int qdict_array_entries(QDict *src, const char *subqdict);
+QObject *qdict_crumple(const QDict *src, Error **errp);
+void qdict_flatten(QDict *qdict);
+
+typedef struct QDictRenames {
+    const char *from;
+    const char *to;
+} QDictRenames;
+bool qdict_rename_keys(QDict *qdict, const QDictRenames *renames, Error **errp);
+
+Visitor *qobject_input_visitor_new_flat_confused(QDict *qdict,
+                                                 Error **errp);
+#endif
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index a635f532f9..7fa726b8e3 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -133,6 +133,8 @@ static inline void tswap64s(uint64_t *s)
 #define stq_p(p, v) stq_be_p(p, v)
 #define stfl_p(p, v) stfl_be_p(p, v)
 #define stfq_p(p, v) stfq_be_p(p, v)
+#define ldn_p(p, sz) ldn_be_p(p, sz)
+#define stn_p(p, sz, v) stn_be_p(p, sz, v)
 #else
 #define lduw_p(p) lduw_le_p(p)
 #define ldsw_p(p) ldsw_le_p(p)
@@ -145,6 +147,8 @@ static inline void tswap64s(uint64_t *s)
 #define stq_p(p, v) stq_le_p(p, v)
 #define stfl_p(p, v) stfl_le_p(p, v)
 #define stfq_p(p, v) stfq_le_p(p, v)
+#define ldn_p(p, sz) ldn_le_p(p, sz)
+#define stn_p(p, sz, v) stn_le_p(p, sz, v)
 #endif
 
 /* MMU memory access macros */
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index e43ff8346b..a171ffc1a4 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -127,6 +127,15 @@ QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
  * structs into one.)
  */
 typedef struct CPUIOTLBEntry {
+    /*
+     * @addr contains:
+     *  - in the lower TARGET_PAGE_BITS, a physical section number
+     *  - with the lower TARGET_PAGE_BITS masked off, an offset which
+     *    must be added to the virtual address to obtain:
+     *     + the ram_addr_t of the target RAM (if the physical section
+     *       number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
+     *     + the offset within the target MemoryRegion (otherwise)
+     */
     hwaddr addr;
     MemTxAttrs attrs;
 } CPUIOTLBEntry;
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 4d09eaba72..8bbea787a9 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -437,8 +437,17 @@ void tb_lock_reset(void);
 
 #if !defined(CONFIG_USER_ONLY)
 
-struct MemoryRegion *iotlb_to_region(CPUState *cpu,
-                                     hwaddr index, MemTxAttrs attrs);
+/**
+ * iotlb_to_section:
+ * @cpu: CPU performing the access
+ * @index: TCG CPU IOTLB entry
+ *
+ * Given a TCG CPU IOTLB entry, return the MemoryRegionSection that
+ * it refers to. @index will have been initially created and returned
+ * by memory_region_section_get_iotlb().
+ */
+struct MemoryRegionSection *iotlb_to_section(CPUState *cpu,
+                                             hwaddr index, MemTxAttrs attrs);
 
 void tlb_fill(CPUState *cpu, target_ulong addr, int size,
               MMUAccessType access_type, int mmu_idx, uintptr_t retaddr);
@@ -469,7 +478,8 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr);
 
 MemoryRegionSection *
 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
-                                  hwaddr *xlat, hwaddr *plen);
+                                  hwaddr *xlat, hwaddr *plen,
+                                  MemTxAttrs attrs, int *prot);
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                        MemoryRegionSection *section,
                                        target_ulong vaddr,
diff --git a/include/exec/memory.h b/include/exec/memory.h
index eb2ba06519..050323f532 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -98,18 +98,21 @@ struct IOMMUNotifier {
     /* Notify for address space range start <= addr <= end */
     hwaddr start;
     hwaddr end;
+    int iommu_idx;
     QLIST_ENTRY(IOMMUNotifier) node;
 };
 typedef struct IOMMUNotifier IOMMUNotifier;
 
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
-                                       hwaddr start, hwaddr end)
+                                       hwaddr start, hwaddr end,
+                                       int iommu_idx)
 {
     n->notify = fn;
     n->notifier_flags = flags;
     n->start = start;
     n->end = end;
+    n->iommu_idx = iommu_idx;
 }
 
 /*
@@ -206,6 +209,20 @@ enum IOMMUMemoryRegionAttr {
  * to report whenever mappings are changed, by calling
  * memory_region_notify_iommu() (or, if necessary, by calling
  * memory_region_notify_one() for each registered notifier).
+ *
+ * Conceptually an IOMMU provides a mapping from input address
+ * to an output TLB entry. If the IOMMU is aware of memory transaction
+ * attributes and the output TLB entry depends on the transaction
+ * attributes, we represent this using IOMMU indexes. Each index
+ * selects a particular translation table that the IOMMU has:
+ *   @attrs_to_index returns the IOMMU index for a set of transaction attributes
+ *   @translate takes an input address and an IOMMU index
+ * and the mapping returned can only depend on the input address and the
+ * IOMMU index.
+ *
+ * Most IOMMUs don't care about the transaction attributes and support
+ * only a single IOMMU index. A more complex IOMMU might have one index
+ * for secure transactions and one for non-secure transactions.
  */
 typedef struct IOMMUMemoryRegionClass {
     /* private */
@@ -234,9 +251,10 @@ typedef struct IOMMUMemoryRegionClass {
      * @iommu: the IOMMUMemoryRegion
      * @hwaddr: address to be translated within the memory region
      * @flag: requested access permissions
+     * @iommu_idx: IOMMU index for the translation
      */
     IOMMUTLBEntry (*translate)(IOMMUMemoryRegion *iommu, hwaddr addr,
-                               IOMMUAccessFlags flag);
+                               IOMMUAccessFlags flag, int iommu_idx);
     /* Returns minimum supported page size in bytes.
      * If this method is not provided then the minimum is assumed to
      * be TARGET_PAGE_SIZE.
@@ -290,6 +308,29 @@ typedef struct IOMMUMemoryRegionClass {
      */
     int (*get_attr)(IOMMUMemoryRegion *iommu, enum IOMMUMemoryRegionAttr attr,
                     void *data);
+
+    /* Return the IOMMU index to use for a given set of transaction attributes.
+     *
+     * Optional method: if an IOMMU only supports a single IOMMU index then
+     * the default implementation of memory_region_iommu_attrs_to_index()
+     * will return 0.
+     *
+     * The indexes supported by an IOMMU must be contiguous, starting at 0.
+     *
+     * @iommu: the IOMMUMemoryRegion
+     * @attrs: memory transaction attributes
+     */
+    int (*attrs_to_index)(IOMMUMemoryRegion *iommu, MemTxAttrs attrs);
+
+    /* Return the number of IOMMU indexes this IOMMU supports.
+     *
+     * Optional method: if this method is not provided, then
+     * memory_region_iommu_num_indexes() will return 1, indicating that
+     * only a single IOMMU index is supported.
+     *
+     * @iommu: the IOMMUMemoryRegion
+     */
+    int (*num_indexes)(IOMMUMemoryRegion *iommu);
 } IOMMUMemoryRegionClass;
 
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
@@ -971,11 +1012,13 @@ uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr);
  * should be notified with an UNMAP followed by a MAP.
  *
  * @iommu_mr: the memory region that was changed
+ * @iommu_idx: the IOMMU index for the translation table which has changed
  * @entry: the new entry in the IOMMU translation table.  The entry
  *         replaces all old entries for the same virtual I/O address range.
  *         Deleted entries have .@perm == 0.
  */
 void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr,
+                                int iommu_idx,
                                 IOMMUTLBEntry entry);
 
 /**
@@ -1055,6 +1098,24 @@ int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr,
                                  void *data);
 
 /**
+ * memory_region_iommu_attrs_to_index: return the IOMMU index to
+ * use for translations with the given memory transaction attributes.
+ *
+ * @iommu_mr: the memory region
+ * @attrs: the memory transaction attributes
+ */
+int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr,
+                                       MemTxAttrs attrs);
+
+/**
+ * memory_region_iommu_num_indexes: return the total number of IOMMU
+ * indexes that this IOMMU supports.
+ *
+ * @iommu_mr: the memory region
+ */
+int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr);
+
+/**
  * memory_region_name: get a memory region's name
  *
  * Returns the string that was used to initialize the memory region.
diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
index 70fa2287e2..ffed39252d 100644
--- a/include/hw/arm/arm.h
+++ b/include/hw/arm/arm.h
@@ -23,9 +23,6 @@ typedef enum {
     ARM_ENDIANNESS_BE32,
 } arm_endianness;
 
-/* armv7m.c */
-DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
-                         const char *kernel_filename, const char *cpu_type);
 /**
  * armv7m_load_kernel:
  * @cpu: CPU
@@ -33,9 +30,8 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
  * @mem_size: mem_size: maximum image size to load
  *
  * Load the guest image for an ARMv7M system. This must be called by
- * any ARMv7M board, either directly or via armv7m_init(). (This is
- * necessary to ensure that the CPU resets correctly on system reset,
- * as well as for kernel loading.)
+ * any ARMv7M board. (This is necessary to ensure that the CPU resets
+ * correctly on system reset, as well as for kernel loading.)
  */
 void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size);
 
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index d4f4dfffab..e9f9e2223f 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -72,7 +72,6 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
 
 /* Configuration helpers */
 
-void blkconf_serial(BlockConf *conf, char **serial);
 bool blkconf_geometry(BlockConf *conf, int *trans,
                       unsigned cyls_max, unsigned heads_max, unsigned secs_max,
                       Error **errp);
diff --git a/include/hw/or-irq.h b/include/hw/or-irq.h
index 3f6fc1b58a..5a31e5a188 100644
--- a/include/hw/or-irq.h
+++ b/include/hw/or-irq.h
@@ -31,7 +31,10 @@
 
 #define TYPE_OR_IRQ "or-irq"
 
-#define MAX_OR_LINES      16
+/* This can safely be increased if necessary without breaking
+ * migration compatibility (as long as it remains greater than 15).
+ */
+#define MAX_OR_LINES      32
 
 typedef struct OrIRQState qemu_or_irq;
 
diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h
index 921a28d2d3..7f3ec10a10 100644
--- a/include/qapi/qmp/qdict.h
+++ b/include/qapi/qmp/qdict.h
@@ -67,23 +67,6 @@ int64_t qdict_get_try_int(const QDict *qdict, const char *key,
 bool qdict_get_try_bool(const QDict *qdict, const char *key, bool def_value);
 const char *qdict_get_try_str(const QDict *qdict, const char *key);
 
-void qdict_copy_default(QDict *dst, QDict *src, const char *key);
-void qdict_set_default_str(QDict *dst, const char *key, const char *val);
-
 QDict *qdict_clone_shallow(const QDict *src);
-void qdict_flatten(QDict *qdict);
-
-void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start);
-void qdict_array_split(QDict *src, QList **dst);
-int qdict_array_entries(QDict *src, const char *subqdict);
-QObject *qdict_crumple(const QDict *src, Error **errp);
-
-void qdict_join(QDict *dest, QDict *src, bool overwrite);
-
-typedef struct QDictRenames {
-    const char *from;
-    const char *to;
-} QDictRenames;
-bool qdict_rename_keys(QDict *qdict, const QDictRenames *renames, Error **errp);
 
 #endif /* QDICT_H */
diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
index 3f28f661b1..a684c1a7a2 100644
--- a/include/qemu/bswap.h
+++ b/include/qemu/bswap.h
@@ -290,6 +290,15 @@ typedef union {
  * For accessors that take a guest address rather than a
  * host address, see the cpu_{ld,st}_* accessors defined in
  * cpu_ldst.h.
+ *
+ * For cases where the size to be used is not fixed at compile time,
+ * there are
+ *  stn{endian}_p(ptr, sz, val)
+ * which stores @val to @ptr as an @endian-order number @sz bytes in size
+ * and
+ *  ldn{endian}_p(ptr, sz)
+ * which loads @sz bytes from @ptr as an unsigned @endian-order number
+ * and returns it in a uint64_t.
  */
 
 static inline int ldub_p(const void *ptr)
@@ -495,6 +504,49 @@ static inline unsigned long leul_to_cpu(unsigned long v)
 #endif
 }
 
+/* Store v to p as a sz byte value in host order */
+#define DO_STN_LDN_P(END) \
+    static inline void stn_## END ## _p(void *ptr, int sz, uint64_t v)  \
+    {                                                                   \
+        switch (sz) {                                                   \
+        case 1:                                                         \
+            stb_p(ptr, v);                                              \
+            break;                                                      \
+        case 2:                                                         \
+            stw_ ## END ## _p(ptr, v);                                  \
+            break;                                                      \
+        case 4:                                                         \
+            stl_ ## END ## _p(ptr, v);                                  \
+            break;                                                      \
+        case 8:                                                         \
+            stq_ ## END ## _p(ptr, v);                                  \
+            break;                                                      \
+        default:                                                        \
+            g_assert_not_reached();                                     \
+        }                                                               \
+    }                                                                   \
+    static inline uint64_t ldn_## END ## _p(const void *ptr, int sz)    \
+    {                                                                   \
+        switch (sz) {                                                   \
+        case 1:                                                         \
+            return ldub_p(ptr);                                         \
+        case 2:                                                         \
+            return lduw_ ## END ## _p(ptr);                             \
+        case 4:                                                         \
+            return (uint32_t)ldl_ ## END ## _p(ptr);                    \
+        case 8:                                                         \
+            return ldq_ ## END ## _p(ptr);                              \
+        default:                                                        \
+            g_assert_not_reached();                                     \
+        }                                                               \
+    }
+
+DO_STN_LDN_P(he)
+DO_STN_LDN_P(le)
+DO_STN_LDN_P(be)
+
+#undef DO_STN_LDN_P
+
 #undef le_bswap
 #undef be_bswap
 #undef le_bswaps
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 9d3afc6c75..cce2fd6acc 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -429,6 +429,9 @@ struct CPUState {
     uint16_t pending_tlb_flush;
 
     int hvf_fd;
+
+    /* track IOMMUs whose translations we've cached in the TCG TLB */
+    GArray *iommu_notifiers;
 };
 
 QTAILQ_HEAD(CPUTailQ, CPUState);
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index ac22f2ae1f..24954b94e0 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -28,16 +28,13 @@ typedef enum {
 } BlockInterfaceType;
 
 struct DriveInfo {
-    const char *devaddr;
     BlockInterfaceType type;
     int bus;
     int unit;
     int auto_del;               /* see blockdev_mark_auto_del() */
     bool is_default;            /* Added by default_drive() ?  */
     int media_cd;
-    int cyls, heads, secs, trans;
     QemuOpts *opts;
-    char *serial;
     QTAILQ_ENTRY(DriveInfo) next;
 };
 
diff --git a/memory.c b/memory.c
index 3212acc7f4..21aa57d24c 100644
--- a/memory.c
+++ b/memory.c
@@ -1799,6 +1799,9 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr,
     iommu_mr = IOMMU_MEMORY_REGION(mr);
     assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
     assert(n->start <= n->end);
+    assert(n->iommu_idx >= 0 &&
+           n->iommu_idx < memory_region_iommu_num_indexes(iommu_mr));
+
     QLIST_INSERT_HEAD(&iommu_mr->iommu_notify, n, node);
     memory_region_update_iommu_notify_flags(iommu_mr);
 }
@@ -1829,7 +1832,7 @@ void memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
     granularity = memory_region_iommu_get_min_page_size(iommu_mr);
 
     for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
-        iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE);
+        iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx);
         if (iotlb.perm != IOMMU_NONE) {
             n->notify(n, &iotlb);
         }
@@ -1891,6 +1894,7 @@ void memory_region_notify_one(IOMMUNotifier *notifier,
 }
 
 void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr,
+                                int iommu_idx,
                                 IOMMUTLBEntry entry)
 {
     IOMMUNotifier *iommu_notifier;
@@ -1898,7 +1902,9 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr,
     assert(memory_region_is_iommu(MEMORY_REGION(iommu_mr)));
 
     IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) {
-        memory_region_notify_one(iommu_notifier, &entry);
+        if (iommu_notifier->iommu_idx == iommu_idx) {
+            memory_region_notify_one(iommu_notifier, &entry);
+        }
     }
 }
 
@@ -1915,6 +1921,29 @@ int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr,
     return imrc->get_attr(iommu_mr, attr, data);
 }
 
+int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr,
+                                       MemTxAttrs attrs)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+
+    if (!imrc->attrs_to_index) {
+        return 0;
+    }
+
+    return imrc->attrs_to_index(iommu_mr, attrs);
+}
+
+int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+
+    if (!imrc->num_indexes) {
+        return 1;
+    }
+
+    return imrc->num_indexes(iommu_mr);
+}
+
 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
 {
     uint8_t mask = 1 << client;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index fff23fc82b..ab629d1647 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3178,6 +3178,14 @@
 
 
 ##
+# @RbdAuthMode:
+#
+# Since: 3.0
+##
+{ 'enum': 'RbdAuthMode',
+  'data': [ 'cephx', 'none' ] }
+
+##
 # @BlockdevOptionsRbd:
 #
 # @pool:               Ceph pool name.
@@ -3192,6 +3200,15 @@
 #
 # @user:               Ceph id name.
 #
+# @auth-client-required: Acceptable authentication modes.
+#                      This maps to Ceph configuration option
+#                      "auth_client_required".  (Since 3.0)
+#
+# @key-secret:         ID of a QCryptoSecret object providing a key
+#                      for cephx authentication.
+#                      This maps to Ceph configuration option
+#                      "key".  (Since 3.0)
+#
 # @server:             Monitor host address and port.  This maps
 #                      to the "mon_host" Ceph option.
 #
@@ -3203,6 +3220,8 @@
             '*conf': 'str',
             '*snapshot': 'str',
             '*user': 'str',
+            '*auth-client-required': ['RbdAuthMode'],
+            '*key-secret': 'str',
             '*server': ['InetSocketAddressBase'] } }
 
 ##
diff --git a/qapi/job.json b/qapi/job.json
index 17d10037c4..9d074eb8d2 100644
--- a/qapi/job.json
+++ b/qapi/job.json
@@ -50,16 +50,17 @@
 #           the last job in a transaction.
 #
 # @pending: The job has finished its work, but has finalization steps that it
-#           needs to make prior to completing. These changes may require
-#           manual intervention by the management process if manual was set
-#           to true. These changes may still fail.
+#           needs to make prior to completing. These changes will require
+#           manual intervention via @job-finalize if auto-finalize was set to
+#           false. These pending changes may still fail.
 #
 # @aborting: The job is in the process of being aborted, and will finish with
 #            an error. The job will afterwards report that it is @concluded.
 #            This status may not be visible to the management process.
 #
-# @concluded: The job has finished all work. If manual was set to true, the job
-#             will remain in the query list until it is dismissed.
+# @concluded: The job has finished all work. If auto-dismiss was set to false,
+#             the job will remain in the query list until it is dismissed via
+#             @job-dismiss.
 #
 # @null: The job is in the process of being dismantled. This state should not
 #        ever be visible externally.
@@ -75,19 +76,19 @@
 #
 # Represents command verbs that can be applied to a job.
 #
-# @cancel: see @block-job-cancel
+# @cancel: see @job-cancel
 #
-# @pause: see @block-job-pause
+# @pause: see @job-pause
 #
-# @resume: see @block-job-resume
+# @resume: see @job-resume
 #
 # @set-speed: see @block-job-set-speed
 #
-# @complete: see @block-job-complete
+# @complete: see @job-complete
 #
-# @dismiss: see @block-job-dismiss
+# @dismiss: see @job-dismiss
 #
-# @finalize: see @block-job-finalize
+# @finalize: see @job-finalize
 #
 # Since: 2.12
 ##
diff --git a/qemu-doc.texi b/qemu-doc.texi
index cd05760cac..282bc3dc35 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -2850,21 +2850,6 @@ with ``-device ...,netdev=x''), or ``-nic user,smb=/some/dir''
 (for embedded NICs). The new syntax allows different settings to be
 provided per NIC.
 
-@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0)
-
-The drive geometry arguments are replaced by the the geometry arguments
-that can be specified with the ``-device'' parameter.
-
-@subsection -drive serial=... (since 2.10.0)
-
-The drive serial argument is replaced by the the serial argument
-that can be specified with the ``-device'' parameter.
-
-@subsection -drive addr=... (since 2.10.0)
-
-The drive addr argument is replaced by the the addr argument
-that can be specified with the ``-device'' parameter.
-
 @subsection -usbdevice (since 2.10.0)
 
 The ``-usbdevice DEV'' argument is now a synonym for setting
diff --git a/qemu-img.c b/qemu-img.c
index 1dcdd47254..e1a506f7f6 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2906,7 +2906,7 @@ static int img_map(int argc, char **argv)
         int64_t n;
 
         /* Probe up to 1 GiB at a time.  */
-        n = QEMU_ALIGN_DOWN(MIN(1 << 30, length - offset), BDRV_SECTOR_SIZE);
+        n = MIN(1 << 30, length - offset);
         ret = get_block_status(bs, offset, n, &next);
 
         if (ret < 0) {
diff --git a/qemu-options.hx b/qemu-options.hx
index c0d3951e9f..d5b0c26e8e 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -804,9 +804,8 @@ ETEXI
 
 DEF("drive", HAS_ARG, QEMU_OPTION_drive,
     "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
-    "       [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
     "       [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
-    "       [,serial=s][,addr=A][,rerror=ignore|stop|report]\n"
+    "       [,snapshot=on|off][,rerror=ignore|stop|report]\n"
     "       [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
     "       [,readonly=on|off][,copy-on-read=on|off]\n"
     "       [,discard=ignore|unmap][,detect-zeroes=on|off|unmap]\n"
@@ -847,10 +846,6 @@ This option defines where is connected the drive by using an index in the list
 of available connectors of a given interface type.
 @item media=@var{media}
 This option defines the type of the media: disk or cdrom.
-@item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}]
-Force disk physical geometry and the optional BIOS translation (trans=none or
-lba). These parameters are deprecated, use the corresponding parameters
-of @code{-device} instead.
 @item snapshot=@var{snapshot}
 @var{snapshot} is "on" or "off" and controls snapshot mode for the given drive
 (see @option{-snapshot}).
@@ -884,13 +879,6 @@ The default mode is @option{cache=writeback}.
 Specify which disk @var{format} will be used rather than detecting
 the format.  Can be used to specify format=raw to avoid interpreting
 an untrusted format header.
-@item serial=@var{serial}
-This option specifies the serial number to assign to the device. This
-parameter is deprecated, use the corresponding parameter of @code{-device}
-instead.
-@item addr=@var{addr}
-Specify the controller's PCI address (if=virtio only). This parameter is
-deprecated, use the corresponding parameter of @code{-device} instead.
 @item werror=@var{action},rerror=@var{action}
 Specify which @var{action} to take on write and read errors. Valid actions are:
 "ignore" (ignore the error and try to continue), "stop" (pause QEMU),
diff --git a/qobject/Makefile.objs b/qobject/Makefile.objs
index 002d25873a..7b12c9cacf 100644
--- a/qobject/Makefile.objs
+++ b/qobject/Makefile.objs
@@ -1,2 +1,3 @@
 util-obj-y = qnull.o qnum.o qstring.o qdict.o qlist.o qbool.o qlit.o
 util-obj-y += qjson.o qobject.o json-lexer.o json-streamer.o json-parser.o
+util-obj-y += block-qdict.o
diff --git a/qobject/block-qdict.c b/qobject/block-qdict.c
new file mode 100644
index 0000000000..df833083a7
--- /dev/null
+++ b/qobject/block-qdict.c
@@ -0,0 +1,722 @@
+/*
+ * Special QDict functions used by the block layer
+ *
+ * Copyright (c) 2013-2018 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "block/qdict.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+
+/**
+ * qdict_copy_default(): If no entry mapped by 'key' exists in 'dst' yet, the
+ * value of 'key' in 'src' is copied there (and the refcount increased
+ * accordingly).
+ */
+void qdict_copy_default(QDict *dst, QDict *src, const char *key)
+{
+    QObject *val;
+
+    if (qdict_haskey(dst, key)) {
+        return;
+    }
+
+    val = qdict_get(src, key);
+    if (val) {
+        qdict_put_obj(dst, key, qobject_ref(val));
+    }
+}
+
+/**
+ * qdict_set_default_str(): If no entry mapped by 'key' exists in 'dst' yet, a
+ * new QString initialised by 'val' is put there.
+ */
+void qdict_set_default_str(QDict *dst, const char *key, const char *val)
+{
+    if (qdict_haskey(dst, key)) {
+        return;
+    }
+
+    qdict_put_str(dst, key, val);
+}
+
+static void qdict_flatten_qdict(QDict *qdict, QDict *target,
+                                const char *prefix);
+
+static void qdict_flatten_qlist(QList *qlist, QDict *target, const char *prefix)
+{
+    QObject *value;
+    const QListEntry *entry;
+    QDict *dict_val;
+    QList *list_val;
+    char *new_key;
+    int i;
+
+    /* This function is never called with prefix == NULL, i.e., it is always
+     * called from within qdict_flatten_q(list|dict)(). Therefore, it does not
+     * need to remove list entries during the iteration (the whole list will be
+     * deleted eventually anyway from qdict_flatten_qdict()). */
+    assert(prefix);
+
+    entry = qlist_first(qlist);
+
+    for (i = 0; entry; entry = qlist_next(entry), i++) {
+        value = qlist_entry_obj(entry);
+        dict_val = qobject_to(QDict, value);
+        list_val = qobject_to(QList, value);
+        new_key = g_strdup_printf("%s.%i", prefix, i);
+
+        /*
+         * Flatten non-empty QDict and QList recursively into @target,
+         * copy other objects to @target
+         */
+        if (dict_val && qdict_size(dict_val)) {
+            qdict_flatten_qdict(dict_val, target, new_key);
+        } else if (list_val && !qlist_empty(list_val)) {
+            qdict_flatten_qlist(list_val, target, new_key);
+        } else {
+            qdict_put_obj(target, new_key, qobject_ref(value));
+        }
+
+        g_free(new_key);
+    }
+}
+
+static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix)
+{
+    QObject *value;
+    const QDictEntry *entry, *next;
+    QDict *dict_val;
+    QList *list_val;
+    char *new_key;
+
+    entry = qdict_first(qdict);
+
+    while (entry != NULL) {
+        next = qdict_next(qdict, entry);
+        value = qdict_entry_value(entry);
+        dict_val = qobject_to(QDict, value);
+        list_val = qobject_to(QList, value);
+        new_key = NULL;
+
+        if (prefix) {
+            new_key = g_strdup_printf("%s.%s", prefix, entry->key);
+        }
+
+        /*
+         * Flatten non-empty QDict and QList recursively into @target,
+         * copy other objects to @target
+         */
+        if (dict_val && qdict_size(dict_val)) {
+            qdict_flatten_qdict(dict_val, target,
+                                new_key ? new_key : entry->key);
+            qdict_del(qdict, entry->key);
+        } else if (list_val && !qlist_empty(list_val)) {
+            qdict_flatten_qlist(list_val, target,
+                                new_key ? new_key : entry->key);
+            qdict_del(qdict, entry->key);
+        } else if (target != qdict) {
+            qdict_put_obj(target, new_key, qobject_ref(value));
+            qdict_del(qdict, entry->key);
+        }
+
+        g_free(new_key);
+        entry = next;
+    }
+}
+
+/**
+ * qdict_flatten(): For each nested non-empty QDict with key x, all
+ * fields with key y are moved to this QDict and their key is renamed
+ * to "x.y". For each nested non-empty QList with key x, the field at
+ * index y is moved to this QDict with the key "x.y" (i.e., the
+ * reverse of what qdict_array_split() does).
+ * This operation is applied recursively for nested QDicts and QLists.
+ */
+void qdict_flatten(QDict *qdict)
+{
+    qdict_flatten_qdict(qdict, qdict, NULL);
+}
+
+/* extract all the src QDict entries starting by start into dst */
+void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start)
+
+{
+    const QDictEntry *entry, *next;
+    const char *p;
+
+    *dst = qdict_new();
+    entry = qdict_first(src);
+
+    while (entry != NULL) {
+        next = qdict_next(src, entry);
+        if (strstart(entry->key, start, &p)) {
+            qdict_put_obj(*dst, p, qobject_ref(entry->value));
+            qdict_del(src, entry->key);
+        }
+        entry = next;
+    }
+}
+
+static int qdict_count_prefixed_entries(const QDict *src, const char *start)
+{
+    const QDictEntry *entry;
+    int count = 0;
+
+    for (entry = qdict_first(src); entry; entry = qdict_next(src, entry)) {
+        if (strstart(entry->key, start, NULL)) {
+            if (count == INT_MAX) {
+                return -ERANGE;
+            }
+            count++;
+        }
+    }
+
+    return count;
+}
+
+/**
+ * qdict_array_split(): This function moves array-like elements of a QDict into
+ * a new QList. Every entry in the original QDict with a key "%u" or one
+ * prefixed "%u.", where %u designates an unsigned integer starting at 0 and
+ * incrementally counting up, will be moved to a new QDict at index %u in the
+ * output QList with the key prefix removed, if that prefix is "%u.". If the
+ * whole key is just "%u", the whole QObject will be moved unchanged without
+ * creating a new QDict. The function terminates when there is no entry in the
+ * QDict with a prefix directly (incrementally) following the last one; it also
+ * returns if there are both entries with "%u" and "%u." for the same index %u.
+ * Example: {"0.a": 42, "0.b": 23, "1.x": 0, "4.y": 1, "o.o": 7, "2": 66}
+ *      (or {"1.x": 0, "4.y": 1, "0.a": 42, "o.o": 7, "0.b": 23, "2": 66})
+ *       => [{"a": 42, "b": 23}, {"x": 0}, 66]
+ *      and {"4.y": 1, "o.o": 7} (remainder of the old QDict)
+ */
+void qdict_array_split(QDict *src, QList **dst)
+{
+    unsigned i;
+
+    *dst = qlist_new();
+
+    for (i = 0; i < UINT_MAX; i++) {
+        QObject *subqobj;
+        bool is_subqdict;
+        QDict *subqdict;
+        char indexstr[32], prefix[32];
+        size_t snprintf_ret;
+
+        snprintf_ret = snprintf(indexstr, 32, "%u", i);
+        assert(snprintf_ret < 32);
+
+        subqobj = qdict_get(src, indexstr);
+
+        snprintf_ret = snprintf(prefix, 32, "%u.", i);
+        assert(snprintf_ret < 32);
+
+        /* Overflow is the same as positive non-zero results */
+        is_subqdict = qdict_count_prefixed_entries(src, prefix);
+
+        /*
+         * There may be either a single subordinate object (named
+         * "%u") or multiple objects (each with a key prefixed "%u."),
+         * but not both.
+         */
+        if (!subqobj == !is_subqdict) {
+            break;
+        }
+
+        if (is_subqdict) {
+            qdict_extract_subqdict(src, &subqdict, prefix);
+            assert(qdict_size(subqdict) > 0);
+        } else {
+            qobject_ref(subqobj);
+            qdict_del(src, indexstr);
+        }
+
+        qlist_append_obj(*dst, subqobj ?: QOBJECT(subqdict));
+    }
+}
+
+/**
+ * qdict_split_flat_key:
+ * @key: the key string to split
+ * @prefix: non-NULL pointer to hold extracted prefix
+ * @suffix: non-NULL pointer to remaining suffix
+ *
+ * Given a flattened key such as 'foo.0.bar', split it into two parts
+ * at the first '.' separator. Allows double dot ('..') to escape the
+ * normal separator.
+ *
+ * e.g.
+ *    'foo.0.bar' -> prefix='foo' and suffix='0.bar'
+ *    'foo..0.bar' -> prefix='foo.0' and suffix='bar'
+ *
+ * The '..' sequence will be unescaped in the returned 'prefix'
+ * string. The 'suffix' string will be left in escaped format, so it
+ * can be fed back into the qdict_split_flat_key() key as the input
+ * later.
+ *
+ * The caller is responsible for freeing the string returned in @prefix
+ * using g_free().
+ */
+static void qdict_split_flat_key(const char *key, char **prefix,
+                                 const char **suffix)
+{
+    const char *separator;
+    size_t i, j;
+
+    /* Find first '.' separator, but if there is a pair '..'
+     * that acts as an escape, so skip over '..' */
+    separator = NULL;
+    do {
+        if (separator) {
+            separator += 2;
+        } else {
+            separator = key;
+        }
+        separator = strchr(separator, '.');
+    } while (separator && separator[1] == '.');
+
+    if (separator) {
+        *prefix = g_strndup(key, separator - key);
+        *suffix = separator + 1;
+    } else {
+        *prefix = g_strdup(key);
+        *suffix = NULL;
+    }
+
+    /* Unescape the '..' sequence into '.' */
+    for (i = 0, j = 0; (*prefix)[i] != '\0'; i++, j++) {
+        if ((*prefix)[i] == '.') {
+            assert((*prefix)[i + 1] == '.');
+            i++;
+        }
+        (*prefix)[j] = (*prefix)[i];
+    }
+    (*prefix)[j] = '\0';
+}
+
+/**
+ * qdict_is_list:
+ * @maybe_list: dict to check if keys represent list elements.
+ *
+ * Determine whether all keys in @maybe_list are valid list elements.
+ * If @maybe_list is non-zero in length and all the keys look like
+ * valid list indexes, this will return 1. If @maybe_list is zero
+ * length or all keys are non-numeric then it will return 0 to indicate
+ * it is a normal qdict. If there is a mix of numeric and non-numeric
+ * keys, or the list indexes are non-contiguous, an error is reported.
+ *
+ * Returns: 1 if a valid list, 0 if a dict, -1 on error
+ */
+static int qdict_is_list(QDict *maybe_list, Error **errp)
+{
+    const QDictEntry *ent;
+    ssize_t len = 0;
+    ssize_t max = -1;
+    int is_list = -1;
+    int64_t val;
+
+    for (ent = qdict_first(maybe_list); ent != NULL;
+         ent = qdict_next(maybe_list, ent)) {
+        int is_index = !qemu_strtoi64(ent->key, NULL, 10, &val);
+
+        if (is_list == -1) {
+            is_list = is_index;
+        }
+
+        if (is_index != is_list) {
+            error_setg(errp, "Cannot mix list and non-list keys");
+            return -1;
+        }
+
+        if (is_index) {
+            len++;
+            if (val > max) {
+                max = val;
+            }
+        }
+    }
+
+    if (is_list == -1) {
+        assert(!qdict_size(maybe_list));
+        is_list = 0;
+    }
+
+    /* NB this isn't a perfect check - e.g. it won't catch
+     * a list containing '1', '+1', '01', '3', but that
+     * does not matter - we've still proved that the
+     * input is a list. It is up the caller to do a
+     * stricter check if desired */
+    if (len != (max + 1)) {
+        error_setg(errp, "List indices are not contiguous, "
+                   "saw %zd elements but %zd largest index",
+                   len, max);
+        return -1;
+    }
+
+    return is_list;
+}
+
+/**
+ * qdict_crumple:
+ * @src: the original flat dictionary (only scalar values) to crumple
+ *
+ * Takes a flat dictionary whose keys use '.' separator to indicate
+ * nesting, and values are scalars, empty dictionaries or empty lists,
+ * and crumples it into a nested structure.
+ *
+ * To include a literal '.' in a key name, it must be escaped as '..'
+ *
+ * For example, an input of:
+ *
+ * { 'foo.0.bar': 'one', 'foo.0.wizz': '1',
+ *   'foo.1.bar': 'two', 'foo.1.wizz': '2' }
+ *
+ * will result in an output of:
+ *
+ * {
+ *   'foo': [
+ *      { 'bar': 'one', 'wizz': '1' },
+ *      { 'bar': 'two', 'wizz': '2' }
+ *   ],
+ * }
+ *
+ * The following scenarios in the input dict will result in an
+ * error being returned:
+ *
+ *  - Any values in @src are non-scalar types
+ *  - If keys in @src imply that a particular level is both a
+ *    list and a dict. e.g., "foo.0.bar" and "foo.eek.bar".
+ *  - If keys in @src imply that a particular level is a list,
+ *    but the indices are non-contiguous. e.g. "foo.0.bar" and
+ *    "foo.2.bar" without any "foo.1.bar" present.
+ *  - If keys in @src represent list indexes, but are not in
+ *    the "%zu" format. e.g. "foo.+0.bar"
+ *
+ * Returns: either a QDict or QList for the nested data structure, or NULL
+ * on error
+ */
+QObject *qdict_crumple(const QDict *src, Error **errp)
+{
+    const QDictEntry *ent;
+    QDict *two_level, *multi_level = NULL, *child_dict;
+    QDict *dict_val;
+    QList *list_val;
+    QObject *dst = NULL, *child;
+    size_t i;
+    char *prefix = NULL;
+    const char *suffix = NULL;
+    int is_list;
+
+    two_level = qdict_new();
+
+    /* Step 1: split our totally flat dict into a two level dict */
+    for (ent = qdict_first(src); ent != NULL; ent = qdict_next(src, ent)) {
+        dict_val = qobject_to(QDict, ent->value);
+        list_val = qobject_to(QList, ent->value);
+        if ((dict_val && qdict_size(dict_val))
+            || (list_val && !qlist_empty(list_val))) {
+            error_setg(errp, "Value %s is not flat", ent->key);
+            goto error;
+        }
+
+        qdict_split_flat_key(ent->key, &prefix, &suffix);
+        child = qdict_get(two_level, prefix);
+        child_dict = qobject_to(QDict, child);
+
+        if (child) {
+            /*
+             * If @child_dict, then all previous keys with this prefix
+             * had a suffix.  If @suffix, this one has one as well,
+             * and we're good, else there's a clash.
+             */
+            if (!child_dict || !suffix) {
+                error_setg(errp, "Cannot mix scalar and non-scalar keys");
+                goto error;
+            }
+        }
+
+        if (suffix) {
+            if (!child_dict) {
+                child_dict = qdict_new();
+                qdict_put(two_level, prefix, child_dict);
+            }
+            qdict_put_obj(child_dict, suffix, qobject_ref(ent->value));
+        } else {
+            qdict_put_obj(two_level, prefix, qobject_ref(ent->value));
+        }
+
+        g_free(prefix);
+        prefix = NULL;
+    }
+
+    /* Step 2: optionally process the two level dict recursively
+     * into a multi-level dict */
+    multi_level = qdict_new();
+    for (ent = qdict_first(two_level); ent != NULL;
+         ent = qdict_next(two_level, ent)) {
+        dict_val = qobject_to(QDict, ent->value);
+        if (dict_val && qdict_size(dict_val)) {
+            child = qdict_crumple(dict_val, errp);
+            if (!child) {
+                goto error;
+            }
+
+            qdict_put_obj(multi_level, ent->key, child);
+        } else {
+            qdict_put_obj(multi_level, ent->key, qobject_ref(ent->value));
+        }
+    }
+    qobject_unref(two_level);
+    two_level = NULL;
+
+    /* Step 3: detect if we need to turn our dict into list */
+    is_list = qdict_is_list(multi_level, errp);
+    if (is_list < 0) {
+        goto error;
+    }
+
+    if (is_list) {
+        dst = QOBJECT(qlist_new());
+
+        for (i = 0; i < qdict_size(multi_level); i++) {
+            char *key = g_strdup_printf("%zu", i);
+
+            child = qdict_get(multi_level, key);
+            g_free(key);
+
+            if (!child) {
+                error_setg(errp, "Missing list index %zu", i);
+                goto error;
+            }
+
+            qlist_append_obj(qobject_to(QList, dst), qobject_ref(child));
+        }
+        qobject_unref(multi_level);
+        multi_level = NULL;
+    } else {
+        dst = QOBJECT(multi_level);
+    }
+
+    return dst;
+
+ error:
+    g_free(prefix);
+    qobject_unref(multi_level);
+    qobject_unref(two_level);
+    qobject_unref(dst);
+    return NULL;
+}
+
+/**
+ * qdict_crumple_for_keyval_qiv:
+ * @src: the flat dictionary (only scalar values) to crumple
+ * @errp: location to store error
+ *
+ * Like qdict_crumple(), but additionally transforms scalar values so
+ * the result can be passed to qobject_input_visitor_new_keyval().
+ *
+ * The block subsystem uses this function to prepare its flat QDict
+ * with possibly confused scalar types for a visit.  It should not be
+ * used for anything else, and it should go away once the block
+ * subsystem has been cleaned up.
+ */
+static QObject *qdict_crumple_for_keyval_qiv(QDict *src, Error **errp)
+{
+    QDict *tmp = NULL;
+    char *buf;
+    const char *s;
+    const QDictEntry *ent;
+    QObject *dst;
+
+    for (ent = qdict_first(src); ent; ent = qdict_next(src, ent)) {
+        buf = NULL;
+        switch (qobject_type(ent->value)) {
+        case QTYPE_QNULL:
+        case QTYPE_QSTRING:
+            continue;
+        case QTYPE_QNUM:
+            s = buf = qnum_to_string(qobject_to(QNum, ent->value));
+            break;
+        case QTYPE_QDICT:
+        case QTYPE_QLIST:
+            /* @src isn't flat; qdict_crumple() will fail */
+            continue;
+        case QTYPE_QBOOL:
+            s = qbool_get_bool(qobject_to(QBool, ent->value))
+                ? "on" : "off";
+            break;
+        default:
+            abort();
+        }
+
+        if (!tmp) {
+            tmp = qdict_clone_shallow(src);
+        }
+        qdict_put(tmp, ent->key, qstring_from_str(s));
+        g_free(buf);
+    }
+
+    dst = qdict_crumple(tmp ?: src, errp);
+    qobject_unref(tmp);
+    return dst;
+}
+
+/**
+ * qdict_array_entries(): Returns the number of direct array entries if the
+ * sub-QDict of src specified by the prefix in subqdict (or src itself for
+ * prefix == "") is valid as an array, i.e. the length of the created list if
+ * the sub-QDict would become empty after calling qdict_array_split() on it. If
+ * the array is not valid, -EINVAL is returned.
+ */
+int qdict_array_entries(QDict *src, const char *subqdict)
+{
+    const QDictEntry *entry;
+    unsigned i;
+    unsigned entries = 0;
+    size_t subqdict_len = strlen(subqdict);
+
+    assert(!subqdict_len || subqdict[subqdict_len - 1] == '.');
+
+    /* qdict_array_split() loops until UINT_MAX, but as we want to return
+     * negative errors, we only have a signed return value here. Any additional
+     * entries will lead to -EINVAL. */
+    for (i = 0; i < INT_MAX; i++) {
+        QObject *subqobj;
+        int subqdict_entries;
+        char *prefix = g_strdup_printf("%s%u.", subqdict, i);
+
+        subqdict_entries = qdict_count_prefixed_entries(src, prefix);
+
+        /* Remove ending "." */
+        prefix[strlen(prefix) - 1] = 0;
+        subqobj = qdict_get(src, prefix);
+
+        g_free(prefix);
+
+        if (subqdict_entries < 0) {
+            return subqdict_entries;
+        }
+
+        /* There may be either a single subordinate object (named "%u") or
+         * multiple objects (each with a key prefixed "%u."), but not both. */
+        if (subqobj && subqdict_entries) {
+            return -EINVAL;
+        } else if (!subqobj && !subqdict_entries) {
+            break;
+        }
+
+        entries += subqdict_entries ? subqdict_entries : 1;
+    }
+
+    /* Consider everything handled that isn't part of the given sub-QDict */
+    for (entry = qdict_first(src); entry; entry = qdict_next(src, entry)) {
+        if (!strstart(qdict_entry_key(entry), subqdict, NULL)) {
+            entries++;
+        }
+    }
+
+    /* Anything left in the sub-QDict that wasn't handled? */
+    if (qdict_size(src) != entries) {
+        return -EINVAL;
+    }
+
+    return i;
+}
+
+/**
+ * qdict_join(): Absorb the src QDict into the dest QDict, that is, move all
+ * elements from src to dest.
+ *
+ * If an element from src has a key already present in dest, it will not be
+ * moved unless overwrite is true.
+ *
+ * If overwrite is true, the conflicting values in dest will be discarded and
+ * replaced by the corresponding values from src.
+ *
+ * Therefore, with overwrite being true, the src QDict will always be empty when
+ * this function returns. If overwrite is false, the src QDict will be empty
+ * iff there were no conflicts.
+ */
+void qdict_join(QDict *dest, QDict *src, bool overwrite)
+{
+    const QDictEntry *entry, *next;
+
+    entry = qdict_first(src);
+    while (entry) {
+        next = qdict_next(src, entry);
+
+        if (overwrite || !qdict_haskey(dest, entry->key)) {
+            qdict_put_obj(dest, entry->key, qobject_ref(entry->value));
+            qdict_del(src, entry->key);
+        }
+
+        entry = next;
+    }
+}
+
+/**
+ * qdict_rename_keys(): Rename keys in qdict according to the replacements
+ * specified in the array renames. The array must be terminated by an entry
+ * with from = NULL.
+ *
+ * The renames are performed individually in the order of the array, so entries
+ * may be renamed multiple times and may or may not conflict depending on the
+ * order of the renames array.
+ *
+ * Returns true for success, false in error cases.
+ */
+bool qdict_rename_keys(QDict *qdict, const QDictRenames *renames, Error **errp)
+{
+    QObject *qobj;
+
+    while (renames->from) {
+        if (qdict_haskey(qdict, renames->from)) {
+            if (qdict_haskey(qdict, renames->to)) {
+                error_setg(errp, "'%s' and its alias '%s' can't be used at the "
+                           "same time", renames->to, renames->from);
+                return false;
+            }
+
+            qobj = qdict_get(qdict, renames->from);
+            qdict_put_obj(qdict, renames->to, qobject_ref(qobj));
+            qdict_del(qdict, renames->from);
+        }
+
+        renames++;
+    }
+    return true;
+}
+
+/*
+ * Create a QObject input visitor for flat @qdict with possibly
+ * confused scalar types.
+ *
+ * The block subsystem uses this function to visit its flat QDict with
+ * possibly confused scalar types.  It should not be used for anything
+ * else, and it should go away once the block subsystem has been
+ * cleaned up.
+ */
+Visitor *qobject_input_visitor_new_flat_confused(QDict *qdict,
+                                                 Error **errp)
+{
+    QObject *crumpled;
+    Visitor *v;
+
+    crumpled = qdict_crumple_for_keyval_qiv(qdict, errp);
+    if (!crumpled) {
+        return NULL;
+    }
+
+    v = qobject_input_visitor_new_keyval(crumpled);
+    qobject_unref(crumpled);
+    return v;
+}
diff --git a/qobject/qdict.c b/qobject/qdict.c
index 22800eeceb..3d8c2f7bbc 100644
--- a/qobject/qdict.c
+++ b/qobject/qdict.c
@@ -14,13 +14,8 @@
 #include "qapi/qmp/qnum.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qbool.h"
-#include "qapi/qmp/qlist.h"
 #include "qapi/qmp/qnull.h"
 #include "qapi/qmp/qstring.h"
-#include "qapi/error.h"
-#include "qemu/queue.h"
-#include "qemu-common.h"
-#include "qemu/cutils.h"
 
 /**
  * qdict_new(): Create a new QDict
@@ -463,626 +458,3 @@ void qdict_destroy_obj(QObject *obj)
 
     g_free(qdict);
 }
-
-/**
- * qdict_copy_default(): If no entry mapped by 'key' exists in 'dst' yet, the
- * value of 'key' in 'src' is copied there (and the refcount increased
- * accordingly).
- */
-void qdict_copy_default(QDict *dst, QDict *src, const char *key)
-{
-    QObject *val;
-
-    if (qdict_haskey(dst, key)) {
-        return;
-    }
-
-    val = qdict_get(src, key);
-    if (val) {
-        qdict_put_obj(dst, key, qobject_ref(val));
-    }
-}
-
-/**
- * qdict_set_default_str(): If no entry mapped by 'key' exists in 'dst' yet, a
- * new QString initialised by 'val' is put there.
- */
-void qdict_set_default_str(QDict *dst, const char *key, const char *val)
-{
-    if (qdict_haskey(dst, key)) {
-        return;
-    }
-
-    qdict_put_str(dst, key, val);
-}
-
-static void qdict_flatten_qdict(QDict *qdict, QDict *target,
-                                const char *prefix);
-
-static void qdict_flatten_qlist(QList *qlist, QDict *target, const char *prefix)
-{
-    QObject *value;
-    const QListEntry *entry;
-    char *new_key;
-    int i;
-
-    /* This function is never called with prefix == NULL, i.e., it is always
-     * called from within qdict_flatten_q(list|dict)(). Therefore, it does not
-     * need to remove list entries during the iteration (the whole list will be
-     * deleted eventually anyway from qdict_flatten_qdict()). */
-    assert(prefix);
-
-    entry = qlist_first(qlist);
-
-    for (i = 0; entry; entry = qlist_next(entry), i++) {
-        value = qlist_entry_obj(entry);
-        new_key = g_strdup_printf("%s.%i", prefix, i);
-
-        if (qobject_type(value) == QTYPE_QDICT) {
-            qdict_flatten_qdict(qobject_to(QDict, value), target, new_key);
-        } else if (qobject_type(value) == QTYPE_QLIST) {
-            qdict_flatten_qlist(qobject_to(QList, value), target, new_key);
-        } else {
-            /* All other types are moved to the target unchanged. */
-            qdict_put_obj(target, new_key, qobject_ref(value));
-        }
-
-        g_free(new_key);
-    }
-}
-
-static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix)
-{
-    QObject *value;
-    const QDictEntry *entry, *next;
-    char *new_key;
-    bool delete;
-
-    entry = qdict_first(qdict);
-
-    while (entry != NULL) {
-
-        next = qdict_next(qdict, entry);
-        value = qdict_entry_value(entry);
-        new_key = NULL;
-        delete = false;
-
-        if (prefix) {
-            new_key = g_strdup_printf("%s.%s", prefix, entry->key);
-        }
-
-        if (qobject_type(value) == QTYPE_QDICT) {
-            /* Entries of QDicts are processed recursively, the QDict object
-             * itself disappears. */
-            qdict_flatten_qdict(qobject_to(QDict, value), target,
-                                new_key ? new_key : entry->key);
-            delete = true;
-        } else if (qobject_type(value) == QTYPE_QLIST) {
-            qdict_flatten_qlist(qobject_to(QList, value), target,
-                                new_key ? new_key : entry->key);
-            delete = true;
-        } else if (prefix) {
-            /* All other objects are moved to the target unchanged. */
-            qdict_put_obj(target, new_key, qobject_ref(value));
-            delete = true;
-        }
-
-        g_free(new_key);
-
-        if (delete) {
-            qdict_del(qdict, entry->key);
-
-            /* Restart loop after modifying the iterated QDict */
-            entry = qdict_first(qdict);
-            continue;
-        }
-
-        entry = next;
-    }
-}
-
-/**
- * qdict_flatten(): For each nested QDict with key x, all fields with key y
- * are moved to this QDict and their key is renamed to "x.y". For each nested
- * QList with key x, the field at index y is moved to this QDict with the key
- * "x.y" (i.e., the reverse of what qdict_array_split() does).
- * This operation is applied recursively for nested QDicts and QLists.
- */
-void qdict_flatten(QDict *qdict)
-{
-    qdict_flatten_qdict(qdict, qdict, NULL);
-}
-
-/* extract all the src QDict entries starting by start into dst */
-void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start)
-
-{
-    const QDictEntry *entry, *next;
-    const char *p;
-
-    *dst = qdict_new();
-    entry = qdict_first(src);
-
-    while (entry != NULL) {
-        next = qdict_next(src, entry);
-        if (strstart(entry->key, start, &p)) {
-            qdict_put_obj(*dst, p, qobject_ref(entry->value));
-            qdict_del(src, entry->key);
-        }
-        entry = next;
-    }
-}
-
-static int qdict_count_prefixed_entries(const QDict *src, const char *start)
-{
-    const QDictEntry *entry;
-    int count = 0;
-
-    for (entry = qdict_first(src); entry; entry = qdict_next(src, entry)) {
-        if (strstart(entry->key, start, NULL)) {
-            if (count == INT_MAX) {
-                return -ERANGE;
-            }
-            count++;
-        }
-    }
-
-    return count;
-}
-
-/**
- * qdict_array_split(): This function moves array-like elements of a QDict into
- * a new QList. Every entry in the original QDict with a key "%u" or one
- * prefixed "%u.", where %u designates an unsigned integer starting at 0 and
- * incrementally counting up, will be moved to a new QDict at index %u in the
- * output QList with the key prefix removed, if that prefix is "%u.". If the
- * whole key is just "%u", the whole QObject will be moved unchanged without
- * creating a new QDict. The function terminates when there is no entry in the
- * QDict with a prefix directly (incrementally) following the last one; it also
- * returns if there are both entries with "%u" and "%u." for the same index %u.
- * Example: {"0.a": 42, "0.b": 23, "1.x": 0, "4.y": 1, "o.o": 7, "2": 66}
- *      (or {"1.x": 0, "4.y": 1, "0.a": 42, "o.o": 7, "0.b": 23, "2": 66})
- *       => [{"a": 42, "b": 23}, {"x": 0}, 66]
- *      and {"4.y": 1, "o.o": 7} (remainder of the old QDict)
- */
-void qdict_array_split(QDict *src, QList **dst)
-{
-    unsigned i;
-
-    *dst = qlist_new();
-
-    for (i = 0; i < UINT_MAX; i++) {
-        QObject *subqobj;
-        bool is_subqdict;
-        QDict *subqdict;
-        char indexstr[32], prefix[32];
-        size_t snprintf_ret;
-
-        snprintf_ret = snprintf(indexstr, 32, "%u", i);
-        assert(snprintf_ret < 32);
-
-        subqobj = qdict_get(src, indexstr);
-
-        snprintf_ret = snprintf(prefix, 32, "%u.", i);
-        assert(snprintf_ret < 32);
-
-        /* Overflow is the same as positive non-zero results */
-        is_subqdict = qdict_count_prefixed_entries(src, prefix);
-
-        // There may be either a single subordinate object (named "%u") or
-        // multiple objects (each with a key prefixed "%u."), but not both.
-        if (!subqobj == !is_subqdict) {
-            break;
-        }
-
-        if (is_subqdict) {
-            qdict_extract_subqdict(src, &subqdict, prefix);
-            assert(qdict_size(subqdict) > 0);
-        } else {
-            qobject_ref(subqobj);
-            qdict_del(src, indexstr);
-        }
-
-        qlist_append_obj(*dst, subqobj ?: QOBJECT(subqdict));
-    }
-}
-
-/**
- * qdict_split_flat_key:
- * @key: the key string to split
- * @prefix: non-NULL pointer to hold extracted prefix
- * @suffix: non-NULL pointer to remaining suffix
- *
- * Given a flattened key such as 'foo.0.bar', split it into two parts
- * at the first '.' separator. Allows double dot ('..') to escape the
- * normal separator.
- *
- * e.g.
- *    'foo.0.bar' -> prefix='foo' and suffix='0.bar'
- *    'foo..0.bar' -> prefix='foo.0' and suffix='bar'
- *
- * The '..' sequence will be unescaped in the returned 'prefix'
- * string. The 'suffix' string will be left in escaped format, so it
- * can be fed back into the qdict_split_flat_key() key as the input
- * later.
- *
- * The caller is responsible for freeing the string returned in @prefix
- * using g_free().
- */
-static void qdict_split_flat_key(const char *key, char **prefix,
-                                 const char **suffix)
-{
-    const char *separator;
-    size_t i, j;
-
-    /* Find first '.' separator, but if there is a pair '..'
-     * that acts as an escape, so skip over '..' */
-    separator = NULL;
-    do {
-        if (separator) {
-            separator += 2;
-        } else {
-            separator = key;
-        }
-        separator = strchr(separator, '.');
-    } while (separator && separator[1] == '.');
-
-    if (separator) {
-        *prefix = g_strndup(key, separator - key);
-        *suffix = separator + 1;
-    } else {
-        *prefix = g_strdup(key);
-        *suffix = NULL;
-    }
-
-    /* Unescape the '..' sequence into '.' */
-    for (i = 0, j = 0; (*prefix)[i] != '\0'; i++, j++) {
-        if ((*prefix)[i] == '.') {
-            assert((*prefix)[i + 1] == '.');
-            i++;
-        }
-        (*prefix)[j] = (*prefix)[i];
-    }
-    (*prefix)[j] = '\0';
-}
-
-/**
- * qdict_is_list:
- * @maybe_list: dict to check if keys represent list elements.
- *
- * Determine whether all keys in @maybe_list are valid list elements.
- * If @maybe_list is non-zero in length and all the keys look like
- * valid list indexes, this will return 1. If @maybe_list is zero
- * length or all keys are non-numeric then it will return 0 to indicate
- * it is a normal qdict. If there is a mix of numeric and non-numeric
- * keys, or the list indexes are non-contiguous, an error is reported.
- *
- * Returns: 1 if a valid list, 0 if a dict, -1 on error
- */
-static int qdict_is_list(QDict *maybe_list, Error **errp)
-{
-    const QDictEntry *ent;
-    ssize_t len = 0;
-    ssize_t max = -1;
-    int is_list = -1;
-    int64_t val;
-
-    for (ent = qdict_first(maybe_list); ent != NULL;
-         ent = qdict_next(maybe_list, ent)) {
-
-        if (qemu_strtoi64(ent->key, NULL, 10, &val) == 0) {
-            if (is_list == -1) {
-                is_list = 1;
-            } else if (!is_list) {
-                error_setg(errp,
-                           "Cannot mix list and non-list keys");
-                return -1;
-            }
-            len++;
-            if (val > max) {
-                max = val;
-            }
-        } else {
-            if (is_list == -1) {
-                is_list = 0;
-            } else if (is_list) {
-                error_setg(errp,
-                           "Cannot mix list and non-list keys");
-                return -1;
-            }
-        }
-    }
-
-    if (is_list == -1) {
-        assert(!qdict_size(maybe_list));
-        is_list = 0;
-    }
-
-    /* NB this isn't a perfect check - e.g. it won't catch
-     * a list containing '1', '+1', '01', '3', but that
-     * does not matter - we've still proved that the
-     * input is a list. It is up the caller to do a
-     * stricter check if desired */
-    if (len != (max + 1)) {
-        error_setg(errp, "List indices are not contiguous, "
-                   "saw %zd elements but %zd largest index",
-                   len, max);
-        return -1;
-    }
-
-    return is_list;
-}
-
-/**
- * qdict_crumple:
- * @src: the original flat dictionary (only scalar values) to crumple
- *
- * Takes a flat dictionary whose keys use '.' separator to indicate
- * nesting, and values are scalars, and crumples it into a nested
- * structure.
- *
- * To include a literal '.' in a key name, it must be escaped as '..'
- *
- * For example, an input of:
- *
- * { 'foo.0.bar': 'one', 'foo.0.wizz': '1',
- *   'foo.1.bar': 'two', 'foo.1.wizz': '2' }
- *
- * will result in an output of:
- *
- * {
- *   'foo': [
- *      { 'bar': 'one', 'wizz': '1' },
- *      { 'bar': 'two', 'wizz': '2' }
- *   ],
- * }
- *
- * The following scenarios in the input dict will result in an
- * error being returned:
- *
- *  - Any values in @src are non-scalar types
- *  - If keys in @src imply that a particular level is both a
- *    list and a dict. e.g., "foo.0.bar" and "foo.eek.bar".
- *  - If keys in @src imply that a particular level is a list,
- *    but the indices are non-contiguous. e.g. "foo.0.bar" and
- *    "foo.2.bar" without any "foo.1.bar" present.
- *  - If keys in @src represent list indexes, but are not in
- *    the "%zu" format. e.g. "foo.+0.bar"
- *
- * Returns: either a QDict or QList for the nested data structure, or NULL
- * on error
- */
-QObject *qdict_crumple(const QDict *src, Error **errp)
-{
-    const QDictEntry *ent;
-    QDict *two_level, *multi_level = NULL;
-    QObject *dst = NULL, *child;
-    size_t i;
-    char *prefix = NULL;
-    const char *suffix = NULL;
-    int is_list;
-
-    two_level = qdict_new();
-
-    /* Step 1: split our totally flat dict into a two level dict */
-    for (ent = qdict_first(src); ent != NULL; ent = qdict_next(src, ent)) {
-        if (qobject_type(ent->value) == QTYPE_QDICT ||
-            qobject_type(ent->value) == QTYPE_QLIST) {
-            error_setg(errp, "Value %s is not a scalar",
-                       ent->key);
-            goto error;
-        }
-
-        qdict_split_flat_key(ent->key, &prefix, &suffix);
-
-        child = qdict_get(two_level, prefix);
-        if (suffix) {
-            QDict *child_dict = qobject_to(QDict, child);
-            if (!child_dict) {
-                if (child) {
-                    error_setg(errp, "Key %s prefix is already set as a scalar",
-                               prefix);
-                    goto error;
-                }
-
-                child_dict = qdict_new();
-                qdict_put_obj(two_level, prefix, QOBJECT(child_dict));
-            }
-
-            qdict_put_obj(child_dict, suffix, qobject_ref(ent->value));
-        } else {
-            if (child) {
-                error_setg(errp, "Key %s prefix is already set as a dict",
-                           prefix);
-                goto error;
-            }
-            qdict_put_obj(two_level, prefix, qobject_ref(ent->value));
-        }
-
-        g_free(prefix);
-        prefix = NULL;
-    }
-
-    /* Step 2: optionally process the two level dict recursively
-     * into a multi-level dict */
-    multi_level = qdict_new();
-    for (ent = qdict_first(two_level); ent != NULL;
-         ent = qdict_next(two_level, ent)) {
-        QDict *dict = qobject_to(QDict, ent->value);
-        if (dict) {
-            child = qdict_crumple(dict, errp);
-            if (!child) {
-                goto error;
-            }
-
-            qdict_put_obj(multi_level, ent->key, child);
-        } else {
-            qdict_put_obj(multi_level, ent->key, qobject_ref(ent->value));
-        }
-    }
-    qobject_unref(two_level);
-    two_level = NULL;
-
-    /* Step 3: detect if we need to turn our dict into list */
-    is_list = qdict_is_list(multi_level, errp);
-    if (is_list < 0) {
-        goto error;
-    }
-
-    if (is_list) {
-        dst = QOBJECT(qlist_new());
-
-        for (i = 0; i < qdict_size(multi_level); i++) {
-            char *key = g_strdup_printf("%zu", i);
-
-            child = qdict_get(multi_level, key);
-            g_free(key);
-
-            if (!child) {
-                error_setg(errp, "Missing list index %zu", i);
-                goto error;
-            }
-
-            qlist_append_obj(qobject_to(QList, dst), qobject_ref(child));
-        }
-        qobject_unref(multi_level);
-        multi_level = NULL;
-    } else {
-        dst = QOBJECT(multi_level);
-    }
-
-    return dst;
-
- error:
-    g_free(prefix);
-    qobject_unref(multi_level);
-    qobject_unref(two_level);
-    qobject_unref(dst);
-    return NULL;
-}
-
-/**
- * qdict_array_entries(): Returns the number of direct array entries if the
- * sub-QDict of src specified by the prefix in subqdict (or src itself for
- * prefix == "") is valid as an array, i.e. the length of the created list if
- * the sub-QDict would become empty after calling qdict_array_split() on it. If
- * the array is not valid, -EINVAL is returned.
- */
-int qdict_array_entries(QDict *src, const char *subqdict)
-{
-    const QDictEntry *entry;
-    unsigned i;
-    unsigned entries = 0;
-    size_t subqdict_len = strlen(subqdict);
-
-    assert(!subqdict_len || subqdict[subqdict_len - 1] == '.');
-
-    /* qdict_array_split() loops until UINT_MAX, but as we want to return
-     * negative errors, we only have a signed return value here. Any additional
-     * entries will lead to -EINVAL. */
-    for (i = 0; i < INT_MAX; i++) {
-        QObject *subqobj;
-        int subqdict_entries;
-        char *prefix = g_strdup_printf("%s%u.", subqdict, i);
-
-        subqdict_entries = qdict_count_prefixed_entries(src, prefix);
-
-        /* Remove ending "." */
-        prefix[strlen(prefix) - 1] = 0;
-        subqobj = qdict_get(src, prefix);
-
-        g_free(prefix);
-
-        if (subqdict_entries < 0) {
-            return subqdict_entries;
-        }
-
-        /* There may be either a single subordinate object (named "%u") or
-         * multiple objects (each with a key prefixed "%u."), but not both. */
-        if (subqobj && subqdict_entries) {
-            return -EINVAL;
-        } else if (!subqobj && !subqdict_entries) {
-            break;
-        }
-
-        entries += subqdict_entries ? subqdict_entries : 1;
-    }
-
-    /* Consider everything handled that isn't part of the given sub-QDict */
-    for (entry = qdict_first(src); entry; entry = qdict_next(src, entry)) {
-        if (!strstart(qdict_entry_key(entry), subqdict, NULL)) {
-            entries++;
-        }
-    }
-
-    /* Anything left in the sub-QDict that wasn't handled? */
-    if (qdict_size(src) != entries) {
-        return -EINVAL;
-    }
-
-    return i;
-}
-
-/**
- * qdict_join(): Absorb the src QDict into the dest QDict, that is, move all
- * elements from src to dest.
- *
- * If an element from src has a key already present in dest, it will not be
- * moved unless overwrite is true.
- *
- * If overwrite is true, the conflicting values in dest will be discarded and
- * replaced by the corresponding values from src.
- *
- * Therefore, with overwrite being true, the src QDict will always be empty when
- * this function returns. If overwrite is false, the src QDict will be empty
- * iff there were no conflicts.
- */
-void qdict_join(QDict *dest, QDict *src, bool overwrite)
-{
-    const QDictEntry *entry, *next;
-
-    entry = qdict_first(src);
-    while (entry) {
-        next = qdict_next(src, entry);
-
-        if (overwrite || !qdict_haskey(dest, entry->key)) {
-            qdict_put_obj(dest, entry->key, qobject_ref(entry->value));
-            qdict_del(src, entry->key);
-        }
-
-        entry = next;
-    }
-}
-
-/**
- * qdict_rename_keys(): Rename keys in qdict according to the replacements
- * specified in the array renames. The array must be terminated by an entry
- * with from = NULL.
- *
- * The renames are performed individually in the order of the array, so entries
- * may be renamed multiple times and may or may not conflict depending on the
- * order of the renames array.
- *
- * Returns true for success, false in error cases.
- */
-bool qdict_rename_keys(QDict *qdict, const QDictRenames *renames, Error **errp)
-{
-    QObject *qobj;
-
-    while (renames->from) {
-        if (qdict_haskey(qdict, renames->from)) {
-            if (qdict_haskey(qdict, renames->to)) {
-                error_setg(errp, "'%s' and its alias '%s' can't be used at the "
-                           "same time", renames->to, renames->from);
-                return false;
-            }
-
-            qobj = qdict_get(qdict, renames->from);
-            qdict_put_obj(qdict, renames->to, qobject_ref(qobj));
-            qdict_del(qdict, renames->from);
-        }
-
-        renames++;
-    }
-    return true;
-}
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index ab047b9402..e1de45e904 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -767,6 +767,24 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         return;
     }
 
+#ifndef CONFIG_USER_ONLY
+    /* The NVIC and M-profile CPU are two halves of a single piece of
+     * hardware; trying to use one without the other is a command line
+     * error and will result in segfaults if not caught here.
+     */
+    if (arm_feature(env, ARM_FEATURE_M)) {
+        if (!env->nvic) {
+            error_setg(errp, "This board cannot be used with Cortex-M CPUs");
+            return;
+        }
+    } else {
+        if (env->nvic) {
+            error_setg(errp, "This board can only be used with Cortex-M CPUs");
+            return;
+        }
+    }
+#endif
+
     cpu_exec_realizefn(cs, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 94f4356ce9..2e76084992 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -195,6 +195,15 @@ DEF_HELPER_FLAGS_5(sve_lsl_zpzz_s, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(sve_lsl_zpzz_d, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(sve_asr_zpzw_b, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_asr_zpzw_h, TCG_CALL_NO_RWG,
@@ -416,6 +425,230 @@ DEF_HELPER_FLAGS_4(sve_cpy_z_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
 DEF_HELPER_FLAGS_4(sve_ext, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(sve_insr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_insr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_insr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_insr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_3(sve_rev_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_rev_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_rev_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_rev_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_tbl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_sunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_sunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_sunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_uunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_uunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_uunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_zip_p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uzp_p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_trn_p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_rev_p, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_punpk_p, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_zip_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_zip_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_zip_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_zip_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_uzp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uzp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uzp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uzp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_trn_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_trn_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_trn_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_trn_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_compact_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_compact_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_2(sve_last_active_element, TCG_CALL_NO_RWG, s32, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_revb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_revb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_revb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_revh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_rbit_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_splice, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzz_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzz_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzz_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzz_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzz_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzz_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzz_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzz_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzz_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzz_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzz_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzz_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzz_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzz_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzz_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzz_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzz_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzz_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzz_d, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzz_d, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzz_d, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzz_d, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzz_d, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzz_d, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzw_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzw_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzw_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzw_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzw_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzw_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmple_ppzw_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplt_ppzw_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplo_ppzw_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpls_ppzw_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzw_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzw_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzw_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzw_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzw_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzw_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmple_ppzw_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplt_ppzw_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplo_ppzw_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpls_ppzw_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_cmpeq_ppzw_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpne_ppzw_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpge_ppzw_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpgt_ppzw_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphi_ppzw_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmphs_ppzw_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmple_ppzw_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplt_ppzw_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmplo_ppzw_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_cmpls_ppzw_s, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cmpeq_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpne_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpgt_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpge_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplt_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmple_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphs_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphi_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplo_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpls_ppzi_b, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cmpeq_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpne_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpgt_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpge_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplt_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmple_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphs_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphi_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplo_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpls_ppzi_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cmpeq_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpne_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpgt_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpge_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplt_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmple_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphs_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphi_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplo_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpls_ppzi_s, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cmpeq_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpne_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpgt_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpge_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplt_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmple_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphs_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmphi_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmplo_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cmpls_ppzi_d, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(sve_and_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_bic_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_eor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@@ -425,3 +658,64 @@ DEF_HELPER_FLAGS_5(sve_orn_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_nor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_nand_pppp, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_brkpa, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_brkpb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_brkpas, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_brkpbs, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_brka_z, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkb_z, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brka_m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkb_m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_brkas_z, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkbs_z, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkas_m, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkbs_m, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_while, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
+
+DEF_HELPER_FLAGS_4(sve_subri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_subri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_subri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_subri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_smaxi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smaxi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smaxi_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smaxi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_smini_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smini_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smini_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_smini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_umaxi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umaxi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umaxi_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umaxi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_umini_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umini_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umini_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_umini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_5(gvec_recps_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_recps_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_recps_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_rsqrts_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 0c6a144458..879a7229e9 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -601,6 +601,25 @@ DEF_HELPER_FLAGS_5(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(gvec_fcmlad, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 4761d1921e..6f436f9096 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -24,6 +24,7 @@
 
 %imm4_16_p1     16:4 !function=plus1
 %imm6_22_5      22:1 5:5
+%imm7_22_16     22:2 16:5
 %imm8_16_10     16:5 10:3
 %imm9_16_10     16:s6 10:3
 
@@ -41,6 +42,8 @@
 
 # Signed 8-bit immediate, optionally shifted left by 8.
 %sh8_i8s        5:9 !function=expand_imm_sh8s
+# Unsigned 8-bit immediate, optionally shifted left by 8.
+%sh8_i8u        5:9 !function=expand_imm_sh8u
 
 # Either a copy of rd (at bit 0), or a different source
 # as propagated via the MOVPRFX instruction.
@@ -58,6 +61,7 @@
 &rri_esz        rd rn imm esz
 &rrr_esz        rd rn rm esz
 &rpr_esz        rd pg rn esz
+&rpr_s          rd pg rn s
 &rprr_s         rd pg rn rm s
 &rprr_esz       rd pg rn rm esz
 &rprrr_esz      rd pg rn rm ra esz
@@ -65,6 +69,8 @@
 &ptrue          rd esz pat s
 &incdec_cnt     rd pat esz imm d u
 &incdec2_cnt    rd rn pat esz imm d u
+&incdec_pred    rd pg esz d u
+&incdec2_pred   rd rn pg esz d u
 
 ###########################################################################
 # Named instruction formats.  These are generally used to
@@ -77,6 +83,9 @@
 @pd_pn          ........ esz:2 .. .... ....... rn:4 . rd:4      &rr_esz
 @rd_rn          ........ esz:2 ...... ...... rn:5 rd:5          &rr_esz
 
+# Two operand with governing predicate, flags setting
+@pd_pg_pn_s     ........ . s:1 ...... .. pg:4 . rn:4 . rd:4     &rpr_s
+
 # Three operand with unused vector element size
 @rd_rn_rm_e0    ........ ... rm:5 ... ... rn:5 rd:5             &rrr_esz esz=0
 
@@ -85,6 +94,15 @@
 
 # Three operand, vector element size
 @rd_rn_rm       ........ esz:2 . rm:5 ... ... rn:5 rd:5         &rrr_esz
+@pd_pn_pm       ........ esz:2 .. rm:4 ....... rn:4 . rd:4      &rrr_esz
+@rdn_rm         ........ esz:2 ...... ...... rm:5 rd:5 \
+                &rrr_esz rn=%reg_movprfx
+@rdn_sh_i8u     ........ esz:2 ...... ...... ..... rd:5 \
+                &rri_esz rn=%reg_movprfx imm=%sh8_i8u
+@rdn_i8u        ........ esz:2 ...... ... imm:8 rd:5 \
+                &rri_esz rn=%reg_movprfx
+@rdn_i8s        ........ esz:2 ...... ... imm:s8 rd:5 \
+                &rri_esz rn=%reg_movprfx
 
 # Three operand with "memory" size, aka immediate left shift
 @rd_rn_msz_rm   ........ ... rm:5 .... imm:2 rn:5 rd:5          &rrri
@@ -94,6 +112,8 @@
                 &rprr_esz rn=%reg_movprfx
 @rdm_pg_rn      ........ esz:2 ... ... ... pg:3 rn:5 rd:5 \
                 &rprr_esz rm=%reg_movprfx
+@rd_pg4_rn_rm   ........ esz:2 . rm:5  .. pg:4  rn:5 rd:5       &rprr_esz
+@pd_pg_rn_rm    ........ esz:2 . rm:5 ... pg:3 rn:5 . rd:4      &rprr_esz
 
 # Three register operand, with governing predicate, vector element size
 @rda_pg_rn_rm   ........ esz:2 . rm:5  ... pg:3 rn:5 rd:5 \
@@ -103,6 +123,7 @@
 
 # One register operand, with governing predicate, vector element size
 @rd_pg_rn       ........ esz:2 ... ... ... pg:3 rn:5 rd:5       &rpr_esz
+@rd_pg4_pn      ........ esz:2 ... ... .. pg:4 . rn:4 rd:5      &rpr_esz
 
 # Two register operands with a 6-bit signed immediate.
 @rd_rn_i6       ........ ... rn:5 ..... imm:s6 rd:5             &rri
@@ -125,6 +146,11 @@
 @rdn_dbm        ........ .. .... dbm:13 rd:5 \
                 &rr_dbm rn=%reg_movprfx
 
+# Predicate output, vector and immediate input,
+# controlling predicate, element size.
+@pd_pg_rn_i7    ........ esz:2 . imm:7 . pg:3 rn:5 . rd:4       &rpri_esz
+@pd_pg_rn_i5    ........ esz:2 . imm:s5 ... pg:3 rn:5 . rd:4    &rpri_esz
+
 # Basic Load/Store with 9-bit immediate offset
 @pd_rn_i9       ........ ........ ...... rn:5 . rd:4    \
                 &rri imm=%imm9_16_10
@@ -138,6 +164,12 @@
 @incdec2_cnt    ........ esz:2 .. .... ...... pat:5 rd:5 \
                 &incdec2_cnt imm=%imm4_16_p1 rn=%reg_movprfx
 
+# One register, predicate.
+# User must fill in U and D.
+@incdec_pred    ........ esz:2 .... .. ..... .. pg:4 rd:5       &incdec_pred
+@incdec2_pred   ........ esz:2 .... .. ..... .. pg:4 rd:5 \
+                &incdec2_pred rn=%reg_movprfx
+
 ###########################################################################
 # Instruction patterns.  Grouped according to the SVE encodingindex.xhtml.
 
@@ -369,6 +401,145 @@ CPY_z_i         00000101 .. 01 .... 00 . ........ .....   @rdn_pg4 imm=%sh8_i8s
 EXT             00000101 001 ..... 000 ... rm:5 rd:5 \
                 &rrri rn=%reg_movprfx imm=%imm8_16_10
 
+### SVE Permute - Unpredicated Group
+
+# SVE broadcast general register
+DUP_s           00000101 .. 1 00000 001110 ..... .....          @rd_rn
+
+# SVE broadcast indexed element
+DUP_x           00000101 .. 1 ..... 001000 rn:5 rd:5 \
+                &rri imm=%imm7_22_16
+
+# SVE insert SIMD&FP scalar register
+INSR_f          00000101 .. 1 10100 001110 ..... .....          @rdn_rm
+
+# SVE insert general register
+INSR_r          00000101 .. 1 00100 001110 ..... .....          @rdn_rm
+
+# SVE reverse vector elements
+REV_v           00000101 .. 1 11000 001110 ..... .....          @rd_rn
+
+# SVE vector table lookup
+TBL             00000101 .. 1 ..... 001100 ..... .....          @rd_rn_rm
+
+# SVE unpack vector elements
+UNPK            00000101 esz:2 1100 u:1 h:1 001110 rn:5 rd:5
+
+### SVE Permute - Predicates Group
+
+# SVE permute predicate elements
+ZIP1_p          00000101 .. 10 .... 010 000 0 .... 0 ....       @pd_pn_pm
+ZIP2_p          00000101 .. 10 .... 010 001 0 .... 0 ....       @pd_pn_pm
+UZP1_p          00000101 .. 10 .... 010 010 0 .... 0 ....       @pd_pn_pm
+UZP2_p          00000101 .. 10 .... 010 011 0 .... 0 ....       @pd_pn_pm
+TRN1_p          00000101 .. 10 .... 010 100 0 .... 0 ....       @pd_pn_pm
+TRN2_p          00000101 .. 10 .... 010 101 0 .... 0 ....       @pd_pn_pm
+
+# SVE reverse predicate elements
+REV_p           00000101 .. 11 0100 010 000 0 .... 0 ....       @pd_pn
+
+# SVE unpack predicate elements
+PUNPKLO         00000101 00 11 0000 010 000 0 .... 0 ....       @pd_pn_e0
+PUNPKHI         00000101 00 11 0001 010 000 0 .... 0 ....       @pd_pn_e0
+
+### SVE Permute - Interleaving Group
+
+# SVE permute vector elements
+ZIP1_z          00000101 .. 1 ..... 011 000 ..... .....         @rd_rn_rm
+ZIP2_z          00000101 .. 1 ..... 011 001 ..... .....         @rd_rn_rm
+UZP1_z          00000101 .. 1 ..... 011 010 ..... .....         @rd_rn_rm
+UZP2_z          00000101 .. 1 ..... 011 011 ..... .....         @rd_rn_rm
+TRN1_z          00000101 .. 1 ..... 011 100 ..... .....         @rd_rn_rm
+TRN2_z          00000101 .. 1 ..... 011 101 ..... .....         @rd_rn_rm
+
+### SVE Permute - Predicated Group
+
+# SVE compress active elements
+# Note esz >= 2
+COMPACT         00000101 .. 100001 100 ... ..... .....          @rd_pg_rn
+
+# SVE conditionally broadcast element to vector
+CLASTA_z        00000101 .. 10100 0 100 ... ..... .....         @rdn_pg_rm
+CLASTB_z        00000101 .. 10100 1 100 ... ..... .....         @rdn_pg_rm
+
+# SVE conditionally copy element to SIMD&FP scalar
+CLASTA_v        00000101 .. 10101 0 100 ... ..... .....         @rd_pg_rn
+CLASTB_v        00000101 .. 10101 1 100 ... ..... .....         @rd_pg_rn
+
+# SVE conditionally copy element to general register
+CLASTA_r        00000101 .. 11000 0 101 ... ..... .....         @rd_pg_rn
+CLASTB_r        00000101 .. 11000 1 101 ... ..... .....         @rd_pg_rn
+
+# SVE copy element to SIMD&FP scalar register
+LASTA_v         00000101 .. 10001 0 100 ... ..... .....         @rd_pg_rn
+LASTB_v         00000101 .. 10001 1 100 ... ..... .....         @rd_pg_rn
+
+# SVE copy element to general register
+LASTA_r         00000101 .. 10000 0 101 ... ..... .....         @rd_pg_rn
+LASTB_r         00000101 .. 10000 1 101 ... ..... .....         @rd_pg_rn
+
+# SVE copy element from SIMD&FP scalar register
+CPY_m_v         00000101 .. 100000 100 ... ..... .....          @rd_pg_rn
+
+# SVE copy element from general register to vector (predicated)
+CPY_m_r         00000101 .. 101000 101 ... ..... .....          @rd_pg_rn
+
+# SVE reverse within elements
+# Note esz >= operation size
+REVB            00000101 .. 1001 00 100 ... ..... .....         @rd_pg_rn
+REVH            00000101 .. 1001 01 100 ... ..... .....         @rd_pg_rn
+REVW            00000101 .. 1001 10 100 ... ..... .....         @rd_pg_rn
+RBIT            00000101 .. 1001 11 100 ... ..... .....         @rd_pg_rn
+
+# SVE vector splice (predicated)
+SPLICE          00000101 .. 101 100 100 ... ..... .....         @rdn_pg_rm
+
+### SVE Select Vectors Group
+
+# SVE select vector elements (predicated)
+SEL_zpzz        00000101 .. 1 ..... 11 .... ..... .....         @rd_pg4_rn_rm
+
+### SVE Integer Compare - Vectors Group
+
+# SVE integer compare_vectors
+CMPHS_ppzz      00100100 .. 0 ..... 000 ... ..... 0 ....        @pd_pg_rn_rm
+CMPHI_ppzz      00100100 .. 0 ..... 000 ... ..... 1 ....        @pd_pg_rn_rm
+CMPGE_ppzz      00100100 .. 0 ..... 100 ... ..... 0 ....        @pd_pg_rn_rm
+CMPGT_ppzz      00100100 .. 0 ..... 100 ... ..... 1 ....        @pd_pg_rn_rm
+CMPEQ_ppzz      00100100 .. 0 ..... 101 ... ..... 0 ....        @pd_pg_rn_rm
+CMPNE_ppzz      00100100 .. 0 ..... 101 ... ..... 1 ....        @pd_pg_rn_rm
+
+# SVE integer compare with wide elements
+# Note these require esz != 3.
+CMPEQ_ppzw      00100100 .. 0 ..... 001 ... ..... 0 ....        @pd_pg_rn_rm
+CMPNE_ppzw      00100100 .. 0 ..... 001 ... ..... 1 ....        @pd_pg_rn_rm
+CMPGE_ppzw      00100100 .. 0 ..... 010 ... ..... 0 ....        @pd_pg_rn_rm
+CMPGT_ppzw      00100100 .. 0 ..... 010 ... ..... 1 ....        @pd_pg_rn_rm
+CMPLT_ppzw      00100100 .. 0 ..... 011 ... ..... 0 ....        @pd_pg_rn_rm
+CMPLE_ppzw      00100100 .. 0 ..... 011 ... ..... 1 ....        @pd_pg_rn_rm
+CMPHS_ppzw      00100100 .. 0 ..... 110 ... ..... 0 ....        @pd_pg_rn_rm
+CMPHI_ppzw      00100100 .. 0 ..... 110 ... ..... 1 ....        @pd_pg_rn_rm
+CMPLO_ppzw      00100100 .. 0 ..... 111 ... ..... 0 ....        @pd_pg_rn_rm
+CMPLS_ppzw      00100100 .. 0 ..... 111 ... ..... 1 ....        @pd_pg_rn_rm
+
+### SVE Integer Compare - Unsigned Immediate Group
+
+# SVE integer compare with unsigned immediate
+CMPHS_ppzi      00100100 .. 1 ....... 0 ... ..... 0 ....      @pd_pg_rn_i7
+CMPHI_ppzi      00100100 .. 1 ....... 0 ... ..... 1 ....      @pd_pg_rn_i7
+CMPLO_ppzi      00100100 .. 1 ....... 1 ... ..... 0 ....      @pd_pg_rn_i7
+CMPLS_ppzi      00100100 .. 1 ....... 1 ... ..... 1 ....      @pd_pg_rn_i7
+
+### SVE Integer Compare - Signed Immediate Group
+
+# SVE integer compare with signed immediate
+CMPGE_ppzi      00100101 .. 0 ..... 000 ... ..... 0 ....      @pd_pg_rn_i5
+CMPGT_ppzi      00100101 .. 0 ..... 000 ... ..... 1 ....      @pd_pg_rn_i5
+CMPLT_ppzi      00100101 .. 0 ..... 001 ... ..... 0 ....      @pd_pg_rn_i5
+CMPLE_ppzi      00100101 .. 0 ..... 001 ... ..... 1 ....      @pd_pg_rn_i5
+CMPEQ_ppzi      00100101 .. 0 ..... 100 ... ..... 0 ....      @pd_pg_rn_i5
+CMPNE_ppzi      00100101 .. 0 ..... 100 ... ..... 1 ....      @pd_pg_rn_i5
+
 ### SVE Predicate Logical Operations Group
 
 # SVE predicate logical operations
@@ -410,6 +581,83 @@ PFIRST          00100101 01 011 000 11000 00 .... 0 ....        @pd_pn_e0
 # SVE predicate next active
 PNEXT           00100101 .. 011 001 11000 10 .... 0 ....        @pd_pn
 
+### SVE Partition Break Group
+
+# SVE propagate break from previous partition
+BRKPA           00100101 0. 00 .... 11 .... 0 .... 0 ....       @pd_pg_pn_pm_s
+BRKPB           00100101 0. 00 .... 11 .... 0 .... 1 ....       @pd_pg_pn_pm_s
+
+# SVE partition break condition
+BRKA_z          00100101 0. 01000001 .... 0 .... 0 ....         @pd_pg_pn_s
+BRKB_z          00100101 1. 01000001 .... 0 .... 0 ....         @pd_pg_pn_s
+BRKA_m          00100101 0. 01000001 .... 0 .... 1 ....         @pd_pg_pn_s
+BRKB_m          00100101 1. 01000001 .... 0 .... 1 ....         @pd_pg_pn_s
+
+# SVE propagate break to next partition
+BRKN            00100101 0. 01100001 .... 0 .... 0 ....         @pd_pg_pn_s
+
+### SVE Predicate Count Group
+
+# SVE predicate count
+CNTP            00100101 .. 100 000 10 .... 0 .... .....        @rd_pg4_pn
+
+# SVE inc/dec register by predicate count
+INCDECP_r       00100101 .. 10110 d:1 10001 00 .... .....     @incdec_pred u=1
+
+# SVE inc/dec vector by predicate count
+INCDECP_z       00100101 .. 10110 d:1 10000 00 .... .....    @incdec2_pred u=1
+
+# SVE saturating inc/dec register by predicate count
+SINCDECP_r_32   00100101 .. 1010 d:1 u:1 10001 00 .... .....    @incdec_pred
+SINCDECP_r_64   00100101 .. 1010 d:1 u:1 10001 10 .... .....    @incdec_pred
+
+# SVE saturating inc/dec vector by predicate count
+SINCDECP_z      00100101 .. 1010 d:1 u:1 10000 00 .... .....    @incdec2_pred
+
+### SVE Integer Compare - Scalars Group
+
+# SVE conditionally terminate scalars
+CTERM           00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
+
+# SVE integer compare scalar count and limit
+WHILE           00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4
+
+### SVE Integer Wide Immediate - Unpredicated Group
+
+# SVE broadcast floating-point immediate (unpredicated)
+FDUP            00100101 esz:2 111 00 1110 imm:8 rd:5
+
+# SVE broadcast integer immediate (unpredicated)
+DUP_i           00100101 esz:2 111 00 011 . ........ rd:5       imm=%sh8_i8s
+
+# SVE integer add/subtract immediate (unpredicated)
+ADD_zzi         00100101 .. 100 000 11 . ........ .....         @rdn_sh_i8u
+SUB_zzi         00100101 .. 100 001 11 . ........ .....         @rdn_sh_i8u
+SUBR_zzi        00100101 .. 100 011 11 . ........ .....         @rdn_sh_i8u
+SQADD_zzi       00100101 .. 100 100 11 . ........ .....         @rdn_sh_i8u
+UQADD_zzi       00100101 .. 100 101 11 . ........ .....         @rdn_sh_i8u
+SQSUB_zzi       00100101 .. 100 110 11 . ........ .....         @rdn_sh_i8u
+UQSUB_zzi       00100101 .. 100 111 11 . ........ .....         @rdn_sh_i8u
+
+# SVE integer min/max immediate (unpredicated)
+SMAX_zzi        00100101 .. 101 000 110 ........ .....          @rdn_i8s
+UMAX_zzi        00100101 .. 101 001 110 ........ .....          @rdn_i8u
+SMIN_zzi        00100101 .. 101 010 110 ........ .....          @rdn_i8s
+UMIN_zzi        00100101 .. 101 011 110 ........ .....          @rdn_i8u
+
+# SVE integer multiply immediate (unpredicated)
+MUL_zzi         00100101 .. 110 000 110 ........ .....          @rdn_i8s
+
+### SVE Floating Point Arithmetic - Unpredicated Group
+
+# SVE floating-point arithmetic (unpredicated)
+FADD_zzz        01100101 .. 0 ..... 000 000 ..... .....         @rd_rn_rm
+FSUB_zzz        01100101 .. 0 ..... 000 001 ..... .....         @rd_rn_rm
+FMUL_zzz        01100101 .. 0 ..... 000 010 ..... .....         @rd_rn_rm
+FTSMUL          01100101 .. 0 ..... 000 011 ..... .....         @rd_rn_rm
+FRECPS          01100101 .. 0 ..... 000 110 ..... .....         @rd_rn_rm
+FRSQRTS         01100101 .. 0 ..... 000 111 ..... .....         @rd_rn_rm
+
 ### SVE Memory - 32-bit Gather and Unsized Contiguous Group
 
 # SVE load predicate register
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index b825e44cb5..128bbf9b04 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -74,6 +74,28 @@ static uint32_t iter_predtest_fwd(uint64_t d, uint64_t g, uint32_t flags)
     return flags;
 }
 
+/* This is an iterative function, called for each Pd and Pg word
+ * moving backward.
+ */
+static uint32_t iter_predtest_bwd(uint64_t d, uint64_t g, uint32_t flags)
+{
+    if (likely(g)) {
+        /* Compute C from first (i.e last) !(D & G).
+           Use bit 2 to signal first G bit seen.  */
+        if (!(flags & 4)) {
+            flags += 4 - 1; /* add bit 2, subtract C from PREDTEST_INIT */
+            flags |= (d & pow2floor(g)) == 0;
+        }
+
+        /* Accumulate Z from each D & G.  */
+        flags |= ((d & g) != 0) << 1;
+
+        /* Compute N from last (i.e first) D & G.  Replace previous.  */
+        flags = deposit32(flags, 31, 1, (d & (g & -g)) != 0);
+    }
+    return flags;
+}
+
 /* The same for a single word predicate.  */
 uint32_t HELPER(sve_predtest1)(uint64_t d, uint64_t g)
 {
@@ -238,6 +260,26 @@ static inline uint64_t expand_pred_s(uint8_t byte)
     return word[byte & 0x11];
 }
 
+/* Swap 16-bit words within a 32-bit word.  */
+static inline uint32_t hswap32(uint32_t h)
+{
+    return rol32(h, 16);
+}
+
+/* Swap 16-bit words within a 64-bit word.  */
+static inline uint64_t hswap64(uint64_t h)
+{
+    uint64_t m = 0x0000ffff0000ffffull;
+    h = rol64(h, 32);
+    return ((h & m) << 16) | ((h >> 16) & m);
+}
+
+/* Swap 32-bit words within a 64-bit word.  */
+static inline uint64_t wswap64(uint64_t h)
+{
+    return rol64(h, 32);
+}
+
 #define LOGICAL_PPPP(NAME, FUNC) \
 void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)  \
 {                                                                         \
@@ -616,6 +658,20 @@ DO_ZPZ(sve_neg_h, uint16_t, H1_2, DO_NEG)
 DO_ZPZ(sve_neg_s, uint32_t, H1_4, DO_NEG)
 DO_ZPZ_D(sve_neg_d, uint64_t, DO_NEG)
 
+DO_ZPZ(sve_revb_h, uint16_t, H1_2, bswap16)
+DO_ZPZ(sve_revb_s, uint32_t, H1_4, bswap32)
+DO_ZPZ_D(sve_revb_d, uint64_t, bswap64)
+
+DO_ZPZ(sve_revh_s, uint32_t, H1_4, hswap32)
+DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
+
+DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
+
+DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
+DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
+DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
+DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64)
+
 /* Three-operand expander, unpredicated, in which the third operand is "wide".
  */
 #define DO_ZZW(NAME, TYPE, TYPEW, H, OP)                       \
@@ -748,6 +804,46 @@ DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN)
 #undef DO_VPZ
 #undef DO_VPZ_D
 
+/* Two vector operand, one scalar operand, unpredicated.  */
+#define DO_ZZI(NAME, TYPE, OP)                                       \
+void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc)   \
+{                                                                    \
+    intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE);            \
+    TYPE s = s64, *d = vd, *n = vn;                                  \
+    for (i = 0; i < opr_sz; ++i) {                                   \
+        d[i] = OP(n[i], s);                                          \
+    }                                                                \
+}
+
+#define DO_SUBR(X, Y)   (Y - X)
+
+DO_ZZI(sve_subri_b, uint8_t, DO_SUBR)
+DO_ZZI(sve_subri_h, uint16_t, DO_SUBR)
+DO_ZZI(sve_subri_s, uint32_t, DO_SUBR)
+DO_ZZI(sve_subri_d, uint64_t, DO_SUBR)
+
+DO_ZZI(sve_smaxi_b, int8_t, DO_MAX)
+DO_ZZI(sve_smaxi_h, int16_t, DO_MAX)
+DO_ZZI(sve_smaxi_s, int32_t, DO_MAX)
+DO_ZZI(sve_smaxi_d, int64_t, DO_MAX)
+
+DO_ZZI(sve_smini_b, int8_t, DO_MIN)
+DO_ZZI(sve_smini_h, int16_t, DO_MIN)
+DO_ZZI(sve_smini_s, int32_t, DO_MIN)
+DO_ZZI(sve_smini_d, int64_t, DO_MIN)
+
+DO_ZZI(sve_umaxi_b, uint8_t, DO_MAX)
+DO_ZZI(sve_umaxi_h, uint16_t, DO_MAX)
+DO_ZZI(sve_umaxi_s, uint32_t, DO_MAX)
+DO_ZZI(sve_umaxi_d, uint64_t, DO_MAX)
+
+DO_ZZI(sve_umini_b, uint8_t, DO_MIN)
+DO_ZZI(sve_umini_h, uint16_t, DO_MIN)
+DO_ZZI(sve_umini_s, uint32_t, DO_MIN)
+DO_ZZI(sve_umini_d, uint64_t, DO_MIN)
+
+#undef DO_ZZI
+
 #undef DO_AND
 #undef DO_ORR
 #undef DO_EOR
@@ -762,6 +858,7 @@ DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN)
 #undef DO_ASR
 #undef DO_LSR
 #undef DO_LSL
+#undef DO_SUBR
 
 /* Similar to the ARM LastActiveElement pseudocode function, except the
    result is multiplied by the element size.  This includes the not found
@@ -1560,3 +1657,1156 @@ void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
         memcpy(vd + n_siz, &tmp, n_ofs);
     }
 }
+
+#define DO_INSR(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \
+{                                                                  \
+    intptr_t opr_sz = simd_oprsz(desc);                            \
+    swap_memmove(vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE));    \
+    *(TYPE *)(vd + H(0)) = val;                                    \
+}
+
+DO_INSR(sve_insr_b, uint8_t, H1)
+DO_INSR(sve_insr_h, uint16_t, H1_2)
+DO_INSR(sve_insr_s, uint32_t, H1_4)
+DO_INSR(sve_insr_d, uint64_t, )
+
+#undef DO_INSR
+
+void HELPER(sve_rev_b)(void *vd, void *vn, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+        uint64_t f = *(uint64_t *)(vn + i);
+        uint64_t b = *(uint64_t *)(vn + j);
+        *(uint64_t *)(vd + i) = bswap64(b);
+        *(uint64_t *)(vd + j) = bswap64(f);
+    }
+}
+
+void HELPER(sve_rev_h)(void *vd, void *vn, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+        uint64_t f = *(uint64_t *)(vn + i);
+        uint64_t b = *(uint64_t *)(vn + j);
+        *(uint64_t *)(vd + i) = hswap64(b);
+        *(uint64_t *)(vd + j) = hswap64(f);
+    }
+}
+
+void HELPER(sve_rev_s)(void *vd, void *vn, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+        uint64_t f = *(uint64_t *)(vn + i);
+        uint64_t b = *(uint64_t *)(vn + j);
+        *(uint64_t *)(vd + i) = rol64(b, 32);
+        *(uint64_t *)(vd + j) = rol64(f, 32);
+    }
+}
+
+void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+        uint64_t f = *(uint64_t *)(vn + i);
+        uint64_t b = *(uint64_t *)(vn + j);
+        *(uint64_t *)(vd + i) = b;
+        *(uint64_t *)(vd + j) = f;
+    }
+}
+
+#define DO_TBL(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{                                                              \
+    intptr_t i, opr_sz = simd_oprsz(desc);                     \
+    uintptr_t elem = opr_sz / sizeof(TYPE);                    \
+    TYPE *d = vd, *n = vn, *m = vm;                            \
+    ARMVectorReg tmp;                                          \
+    if (unlikely(vd == vn)) {                                  \
+        n = memcpy(&tmp, vn, opr_sz);                          \
+    }                                                          \
+    for (i = 0; i < elem; i++) {                               \
+        TYPE j = m[H(i)];                                      \
+        d[H(i)] = j < elem ? n[H(j)] : 0;                      \
+    }                                                          \
+}
+
+DO_TBL(sve_tbl_b, uint8_t, H1)
+DO_TBL(sve_tbl_h, uint16_t, H2)
+DO_TBL(sve_tbl_s, uint32_t, H4)
+DO_TBL(sve_tbl_d, uint64_t, )
+
+#undef TBL
+
+#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)           \
+{                                                              \
+    intptr_t i, opr_sz = simd_oprsz(desc);                     \
+    TYPED *d = vd;                                             \
+    TYPES *n = vn;                                             \
+    ARMVectorReg tmp;                                          \
+    if (unlikely(vn - vd < opr_sz)) {                          \
+        n = memcpy(&tmp, n, opr_sz / 2);                       \
+    }                                                          \
+    for (i = 0; i < opr_sz / sizeof(TYPED); i++) {             \
+        d[HD(i)] = n[HS(i)];                                   \
+    }                                                          \
+}
+
+DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1)
+DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2)
+DO_UNPK(sve_sunpk_d, int64_t, int32_t, , H4)
+
+DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1)
+DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2)
+DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4)
+
+#undef DO_UNPK
+
+/* Mask of bits included in the even numbered predicates of width esz.
+ * We also use this for expand_bits/compress_bits, and so extend the
+ * same pattern out to 16-bit units.
+ */
+static const uint64_t even_bit_esz_masks[5] = {
+    0x5555555555555555ull,
+    0x3333333333333333ull,
+    0x0f0f0f0f0f0f0f0full,
+    0x00ff00ff00ff00ffull,
+    0x0000ffff0000ffffull,
+};
+
+/* Zero-extend units of 2**N bits to units of 2**(N+1) bits.
+ * For N==0, this corresponds to the operation that in qemu/bitops.h
+ * we call half_shuffle64; this algorithm is from Hacker's Delight,
+ * section 7-2 Shuffling Bits.
+ */
+static uint64_t expand_bits(uint64_t x, int n)
+{
+    int i;
+
+    x &= 0xffffffffu;
+    for (i = 4; i >= n; i--) {
+        int sh = 1 << i;
+        x = ((x << sh) | x) & even_bit_esz_masks[i];
+    }
+    return x;
+}
+
+/* Compress units of 2**(N+1) bits to units of 2**N bits.
+ * For N==0, this corresponds to the operation that in qemu/bitops.h
+ * we call half_unshuffle64; this algorithm is from Hacker's Delight,
+ * section 7-2 Shuffling Bits, where it is called an inverse half shuffle.
+ */
+static uint64_t compress_bits(uint64_t x, int n)
+{
+    int i;
+
+    for (i = n; i <= 4; i++) {
+        int sh = 1 << i;
+        x &= even_bit_esz_masks[i];
+        x = (x >> sh) | x;
+    }
+    return x & 0xffffffffu;
+}
+
+void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
+    uint64_t *d = vd;
+    intptr_t i;
+
+    if (oprsz <= 8) {
+        uint64_t nn = *(uint64_t *)vn;
+        uint64_t mm = *(uint64_t *)vm;
+        int half = 4 * oprsz;
+
+        nn = extract64(nn, high * half, half);
+        mm = extract64(mm, high * half, half);
+        nn = expand_bits(nn, esz);
+        mm = expand_bits(mm, esz);
+        d[0] = nn + (mm << (1 << esz));
+    } else {
+        ARMPredicateReg tmp_n, tmp_m;
+
+        /* We produce output faster than we consume input.
+           Therefore we must be mindful of possible overlap.  */
+        if ((vn - vd) < (uintptr_t)oprsz) {
+            vn = memcpy(&tmp_n, vn, oprsz);
+        }
+        if ((vm - vd) < (uintptr_t)oprsz) {
+            vm = memcpy(&tmp_m, vm, oprsz);
+        }
+        if (high) {
+            high = oprsz >> 1;
+        }
+
+        if ((high & 3) == 0) {
+            uint32_t *n = vn, *m = vm;
+            high >>= 2;
+
+            for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+                uint64_t nn = n[H4(high + i)];
+                uint64_t mm = m[H4(high + i)];
+
+                nn = expand_bits(nn, esz);
+                mm = expand_bits(mm, esz);
+                d[i] = nn + (mm << (1 << esz));
+            }
+        } else {
+            uint8_t *n = vn, *m = vm;
+            uint16_t *d16 = vd;
+
+            for (i = 0; i < oprsz / 2; i++) {
+                uint16_t nn = n[H1(high + i)];
+                uint16_t mm = m[H1(high + i)];
+
+                nn = expand_bits(nn, esz);
+                mm = expand_bits(mm, esz);
+                d16[H2(i)] = nn + (mm << (1 << esz));
+            }
+        }
+    }
+}
+
+void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    int odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1) << esz;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint64_t l, h;
+    intptr_t i;
+
+    if (oprsz <= 8) {
+        l = compress_bits(n[0] >> odd, esz);
+        h = compress_bits(m[0] >> odd, esz);
+        d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz);
+    } else {
+        ARMPredicateReg tmp_m;
+        intptr_t oprsz_16 = oprsz / 16;
+
+        if ((vm - vd) < (uintptr_t)oprsz) {
+            m = memcpy(&tmp_m, vm, oprsz);
+        }
+
+        for (i = 0; i < oprsz_16; i++) {
+            l = n[2 * i + 0];
+            h = n[2 * i + 1];
+            l = compress_bits(l >> odd, esz);
+            h = compress_bits(h >> odd, esz);
+            d[i] = l + (h << 32);
+        }
+
+        /* For VL which is not a power of 2, the results from M do not
+           align nicely with the uint64_t for D.  Put the aligned results
+           from M into TMP_M and then copy it into place afterward.  */
+        if (oprsz & 15) {
+            d[i] = compress_bits(n[2 * i] >> odd, esz);
+
+            for (i = 0; i < oprsz_16; i++) {
+                l = m[2 * i + 0];
+                h = m[2 * i + 1];
+                l = compress_bits(l >> odd, esz);
+                h = compress_bits(h >> odd, esz);
+                tmp_m.p[i] = l + (h << 32);
+            }
+            tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz);
+
+            swap_memmove(vd + oprsz / 2, &tmp_m, oprsz / 2);
+        } else {
+            for (i = 0; i < oprsz_16; i++) {
+                l = m[2 * i + 0];
+                h = m[2 * i + 1];
+                l = compress_bits(l >> odd, esz);
+                h = compress_bits(h >> odd, esz);
+                d[oprsz_16 + i] = l + (h << 32);
+            }
+        }
+    }
+}
+
+void HELPER(sve_trn_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    uintptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    bool odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint64_t mask;
+    int shr, shl;
+    intptr_t i;
+
+    shl = 1 << esz;
+    shr = 0;
+    mask = even_bit_esz_masks[esz];
+    if (odd) {
+        mask <<= shl;
+        shr = shl;
+        shl = 0;
+    }
+
+    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+        uint64_t nn = (n[i] & mask) >> shr;
+        uint64_t mm = (m[i] & mask) << shl;
+        d[i] = nn + mm;
+    }
+}
+
+/* Reverse units of 2**N bits.  */
+static uint64_t reverse_bits_64(uint64_t x, int n)
+{
+    int i, sh;
+
+    x = bswap64(x);
+    for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
+        uint64_t mask = even_bit_esz_masks[i];
+        x = ((x & mask) << sh) | ((x >> sh) & mask);
+    }
+    return x;
+}
+
+static uint8_t reverse_bits_8(uint8_t x, int n)
+{
+    static const uint8_t mask[3] = { 0x55, 0x33, 0x0f };
+    int i, sh;
+
+    for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
+        x = ((x & mask[i]) << sh) | ((x >> sh) & mask[i]);
+    }
+    return x;
+}
+
+void HELPER(sve_rev_p)(void *vd, void *vn, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    intptr_t i, oprsz_2 = oprsz / 2;
+
+    if (oprsz <= 8) {
+        uint64_t l = *(uint64_t *)vn;
+        l = reverse_bits_64(l << (64 - 8 * oprsz), esz);
+        *(uint64_t *)vd = l;
+    } else if ((oprsz & 15) == 0) {
+        for (i = 0; i < oprsz_2; i += 8) {
+            intptr_t ih = oprsz - 8 - i;
+            uint64_t l = reverse_bits_64(*(uint64_t *)(vn + i), esz);
+            uint64_t h = reverse_bits_64(*(uint64_t *)(vn + ih), esz);
+            *(uint64_t *)(vd + i) = h;
+            *(uint64_t *)(vd + ih) = l;
+        }
+    } else {
+        for (i = 0; i < oprsz_2; i += 1) {
+            intptr_t il = H1(i);
+            intptr_t ih = H1(oprsz - 1 - i);
+            uint8_t l = reverse_bits_8(*(uint8_t *)(vn + il), esz);
+            uint8_t h = reverse_bits_8(*(uint8_t *)(vn + ih), esz);
+            *(uint8_t *)(vd + il) = h;
+            *(uint8_t *)(vd + ih) = l;
+        }
+    }
+}
+
+void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
+    uint64_t *d = vd;
+    intptr_t i;
+
+    if (oprsz <= 8) {
+        uint64_t nn = *(uint64_t *)vn;
+        int half = 4 * oprsz;
+
+        nn = extract64(nn, high * half, half);
+        nn = expand_bits(nn, 0);
+        d[0] = nn;
+    } else {
+        ARMPredicateReg tmp_n;
+
+        /* We produce output faster than we consume input.
+           Therefore we must be mindful of possible overlap.  */
+        if ((vn - vd) < (uintptr_t)oprsz) {
+            vn = memcpy(&tmp_n, vn, oprsz);
+        }
+        if (high) {
+            high = oprsz >> 1;
+        }
+
+        if ((high & 3) == 0) {
+            uint32_t *n = vn;
+            high >>= 2;
+
+            for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+                uint64_t nn = n[H4(high + i)];
+                d[i] = expand_bits(nn, 0);
+            }
+        } else {
+            uint16_t *d16 = vd;
+            uint8_t *n = vn;
+
+            for (i = 0; i < oprsz / 2; i++) {
+                uint16_t nn = n[H1(high + i)];
+                d16[H2(i)] = expand_bits(nn, 0);
+            }
+        }
+    }
+}
+
+#define DO_ZIP(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)       \
+{                                                                    \
+    intptr_t oprsz = simd_oprsz(desc);                               \
+    intptr_t i, oprsz_2 = oprsz / 2;                                 \
+    ARMVectorReg tmp_n, tmp_m;                                       \
+    /* We produce output faster than we consume input.               \
+       Therefore we must be mindful of possible overlap.  */         \
+    if (unlikely((vn - vd) < (uintptr_t)oprsz)) {                    \
+        vn = memcpy(&tmp_n, vn, oprsz_2);                            \
+    }                                                                \
+    if (unlikely((vm - vd) < (uintptr_t)oprsz)) {                    \
+        vm = memcpy(&tmp_m, vm, oprsz_2);                            \
+    }                                                                \
+    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                    \
+        *(TYPE *)(vd + H(2 * i + 0)) = *(TYPE *)(vn + H(i));         \
+        *(TYPE *)(vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)(vm + H(i)); \
+    }                                                                \
+}
+
+DO_ZIP(sve_zip_b, uint8_t, H1)
+DO_ZIP(sve_zip_h, uint16_t, H1_2)
+DO_ZIP(sve_zip_s, uint32_t, H1_4)
+DO_ZIP(sve_zip_d, uint64_t, )
+
+#define DO_UZP(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)         \
+{                                                                      \
+    intptr_t oprsz = simd_oprsz(desc);                                 \
+    intptr_t oprsz_2 = oprsz / 2;                                      \
+    intptr_t odd_ofs = simd_data(desc);                                \
+    intptr_t i;                                                        \
+    ARMVectorReg tmp_m;                                                \
+    if (unlikely((vm - vd) < (uintptr_t)oprsz)) {                      \
+        vm = memcpy(&tmp_m, vm, oprsz);                                \
+    }                                                                  \
+    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                      \
+        *(TYPE *)(vd + H(i)) = *(TYPE *)(vn + H(2 * i + odd_ofs));     \
+    }                                                                  \
+    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                      \
+        *(TYPE *)(vd + H(oprsz_2 + i)) = *(TYPE *)(vm + H(2 * i + odd_ofs)); \
+    }                                                                  \
+}
+
+DO_UZP(sve_uzp_b, uint8_t, H1)
+DO_UZP(sve_uzp_h, uint16_t, H1_2)
+DO_UZP(sve_uzp_s, uint32_t, H1_4)
+DO_UZP(sve_uzp_d, uint64_t, )
+
+#define DO_TRN(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)         \
+{                                                                      \
+    intptr_t oprsz = simd_oprsz(desc);                                 \
+    intptr_t odd_ofs = simd_data(desc);                                \
+    intptr_t i;                                                        \
+    for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) {                    \
+        TYPE ae = *(TYPE *)(vn + H(i + odd_ofs));                      \
+        TYPE be = *(TYPE *)(vm + H(i + odd_ofs));                      \
+        *(TYPE *)(vd + H(i + 0)) = ae;                                 \
+        *(TYPE *)(vd + H(i + sizeof(TYPE))) = be;                      \
+    }                                                                  \
+}
+
+DO_TRN(sve_trn_b, uint8_t, H1)
+DO_TRN(sve_trn_h, uint16_t, H1_2)
+DO_TRN(sve_trn_s, uint32_t, H1_4)
+DO_TRN(sve_trn_d, uint64_t, )
+
+#undef DO_ZIP
+#undef DO_UZP
+#undef DO_TRN
+
+void HELPER(sve_compact_s)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc) / 4;
+    uint32_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+
+    for (i = j = 0; i < opr_sz; i++) {
+        if (pg[H1(i / 2)] & (i & 1 ? 0x10 : 0x01)) {
+            d[H4(j)] = n[H4(i)];
+            j++;
+        }
+    }
+    for (; j < opr_sz; j++) {
+        d[H4(j)] = 0;
+    }
+}
+
+void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+
+    for (i = j = 0; i < opr_sz; i++) {
+        if (pg[H1(i)] & 1) {
+            d[j] = n[i];
+            j++;
+        }
+    }
+    for (; j < opr_sz; j++) {
+        d[j] = 0;
+    }
+}
+
+/* Similar to the ARM LastActiveElement pseudocode function, except the
+ * result is multiplied by the element size.  This includes the not found
+ * indication; e.g. not found for esz=3 is -8.
+ */
+int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+
+    return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz);
+}
+
+void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)
+{
+    intptr_t opr_sz = simd_oprsz(desc) / 8;
+    int esz = simd_data(desc);
+    uint64_t pg, first_g, last_g, len, mask = pred_esz_masks[esz];
+    intptr_t i, first_i, last_i;
+    ARMVectorReg tmp;
+
+    first_i = last_i = 0;
+    first_g = last_g = 0;
+
+    /* Find the extent of the active elements within VG.  */
+    for (i = QEMU_ALIGN_UP(opr_sz, 8) - 8; i >= 0; i -= 8) {
+        pg = *(uint64_t *)(vg + i) & mask;
+        if (pg) {
+            if (last_g == 0) {
+                last_g = pg;
+                last_i = i;
+            }
+            first_g = pg;
+            first_i = i;
+        }
+    }
+
+    len = 0;
+    if (first_g != 0) {
+        first_i = first_i * 8 + ctz64(first_g);
+        last_i = last_i * 8 + 63 - clz64(last_g);
+        len = last_i - first_i + (1 << esz);
+        if (vd == vm) {
+            vm = memcpy(&tmp, vm, opr_sz * 8);
+        }
+        swap_memmove(vd, vn + first_i, len);
+    }
+    swap_memmove(vd + len, vm, opr_sz * 8 - len);
+}
+
+void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm,
+                            void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i], mm = m[i];
+        uint64_t pp = expand_pred_b(pg[H1(i)]);
+        d[i] = (nn & pp) | (mm & ~pp);
+    }
+}
+
+void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm,
+                            void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i], mm = m[i];
+        uint64_t pp = expand_pred_h(pg[H1(i)]);
+        d[i] = (nn & pp) | (mm & ~pp);
+    }
+}
+
+void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm,
+                            void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i], mm = m[i];
+        uint64_t pp = expand_pred_s(pg[H1(i)]);
+        d[i] = (nn & pp) | (mm & ~pp);
+    }
+}
+
+void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
+                            void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i], mm = m[i];
+        d[i] = (pg[H1(i)] & 1 ? nn : mm);
+    }
+}
+
+/* Two operand comparison controlled by a predicate.
+ * ??? It is very tempting to want to be able to expand this inline
+ * with x86 instructions, e.g.
+ *
+ *    vcmpeqw    zm, zn, %ymm0
+ *    vpmovmskb  %ymm0, %eax
+ *    and        $0x5555, %eax
+ *    and        pg, %eax
+ *
+ * or even aarch64, e.g.
+ *
+ *    // mask = 4000 1000 0400 0100 0040 0010 0004 0001
+ *    cmeq       v0.8h, zn, zm
+ *    and        v0.8h, v0.8h, mask
+ *    addv       h0, v0.8h
+ *    and        v0.8b, pg
+ *
+ * However, coming up with an abstraction that allows vector inputs and
+ * a scalar output, and also handles the byte-ordering of sub-uint64_t
+ * scalar outputs, is tricky.
+ */
+#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK)                                 \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{                                                                            \
+    intptr_t opr_sz = simd_oprsz(desc);                                      \
+    uint32_t flags = PREDTEST_INIT;                                          \
+    intptr_t i = opr_sz;                                                     \
+    do {                                                                     \
+        uint64_t out = 0, pg;                                                \
+        do {                                                                 \
+            i -= sizeof(TYPE), out <<= sizeof(TYPE);                         \
+            TYPE nn = *(TYPE *)(vn + H(i));                                  \
+            TYPE mm = *(TYPE *)(vm + H(i));                                  \
+            out |= nn OP mm;                                                 \
+        } while (i & 63);                                                    \
+        pg = *(uint64_t *)(vg + (i >> 3)) & MASK;                            \
+        out &= pg;                                                           \
+        *(uint64_t *)(vd + (i >> 3)) = out;                                  \
+        flags = iter_predtest_bwd(out, pg, flags);                           \
+    } while (i > 0);                                                         \
+    return flags;                                                            \
+}
+
+#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \
+    DO_CMP_PPZZ(NAME, TYPE, OP, H1,   0xffffffffffffffffull)
+#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \
+    DO_CMP_PPZZ(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \
+    DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
+#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \
+    DO_CMP_PPZZ(NAME, TYPE, OP,     , 0x0101010101010101ull)
+
+DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t,  ==)
+DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==)
+DO_CMP_PPZZ_S(sve_cmpeq_ppzz_s, uint32_t, ==)
+DO_CMP_PPZZ_D(sve_cmpeq_ppzz_d, uint64_t, ==)
+
+DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t,  !=)
+DO_CMP_PPZZ_H(sve_cmpne_ppzz_h, uint16_t, !=)
+DO_CMP_PPZZ_S(sve_cmpne_ppzz_s, uint32_t, !=)
+DO_CMP_PPZZ_D(sve_cmpne_ppzz_d, uint64_t, !=)
+
+DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t,  >)
+DO_CMP_PPZZ_H(sve_cmpgt_ppzz_h, int16_t, >)
+DO_CMP_PPZZ_S(sve_cmpgt_ppzz_s, int32_t, >)
+DO_CMP_PPZZ_D(sve_cmpgt_ppzz_d, int64_t, >)
+
+DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t,  >=)
+DO_CMP_PPZZ_H(sve_cmpge_ppzz_h, int16_t, >=)
+DO_CMP_PPZZ_S(sve_cmpge_ppzz_s, int32_t, >=)
+DO_CMP_PPZZ_D(sve_cmpge_ppzz_d, int64_t, >=)
+
+DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t,  >)
+DO_CMP_PPZZ_H(sve_cmphi_ppzz_h, uint16_t, >)
+DO_CMP_PPZZ_S(sve_cmphi_ppzz_s, uint32_t, >)
+DO_CMP_PPZZ_D(sve_cmphi_ppzz_d, uint64_t, >)
+
+DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t,  >=)
+DO_CMP_PPZZ_H(sve_cmphs_ppzz_h, uint16_t, >=)
+DO_CMP_PPZZ_S(sve_cmphs_ppzz_s, uint32_t, >=)
+DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=)
+
+#undef DO_CMP_PPZZ_B
+#undef DO_CMP_PPZZ_H
+#undef DO_CMP_PPZZ_S
+#undef DO_CMP_PPZZ_D
+#undef DO_CMP_PPZZ
+
+/* Similar, but the second source is "wide".  */
+#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK)                     \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{                                                                            \
+    intptr_t opr_sz = simd_oprsz(desc);                                      \
+    uint32_t flags = PREDTEST_INIT;                                          \
+    intptr_t i = opr_sz;                                                     \
+    do {                                                                     \
+        uint64_t out = 0, pg;                                                \
+        do {                                                                 \
+            TYPEW mm = *(TYPEW *)(vm + i - 8);                               \
+            do {                                                             \
+                i -= sizeof(TYPE), out <<= sizeof(TYPE);                     \
+                TYPE nn = *(TYPE *)(vn + H(i));                              \
+                out |= nn OP mm;                                             \
+            } while (i & 7);                                                 \
+        } while (i & 63);                                                    \
+        pg = *(uint64_t *)(vg + (i >> 3)) & MASK;                            \
+        out &= pg;                                                           \
+        *(uint64_t *)(vd + (i >> 3)) = out;                                  \
+        flags = iter_predtest_bwd(out, pg, flags);                           \
+    } while (i > 0);                                                         \
+    return flags;                                                            \
+}
+
+#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \
+    DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1,   0xffffffffffffffffull)
+#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \
+    DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \
+    DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_4, 0x1111111111111111ull)
+
+DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, uint8_t,  uint64_t, ==)
+DO_CMP_PPZW_H(sve_cmpeq_ppzw_h, uint16_t, uint64_t, ==)
+DO_CMP_PPZW_S(sve_cmpeq_ppzw_s, uint32_t, uint64_t, ==)
+
+DO_CMP_PPZW_B(sve_cmpne_ppzw_b, uint8_t,  uint64_t, !=)
+DO_CMP_PPZW_H(sve_cmpne_ppzw_h, uint16_t, uint64_t, !=)
+DO_CMP_PPZW_S(sve_cmpne_ppzw_s, uint32_t, uint64_t, !=)
+
+DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t,   int64_t, >)
+DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t,  int64_t, >)
+DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t,  int64_t, >)
+
+DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t,   int64_t, >=)
+DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t,  int64_t, >=)
+DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t,  int64_t, >=)
+
+DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t,  uint64_t, >)
+DO_CMP_PPZW_H(sve_cmphi_ppzw_h, uint16_t, uint64_t, >)
+DO_CMP_PPZW_S(sve_cmphi_ppzw_s, uint32_t, uint64_t, >)
+
+DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t,  uint64_t, >=)
+DO_CMP_PPZW_H(sve_cmphs_ppzw_h, uint16_t, uint64_t, >=)
+DO_CMP_PPZW_S(sve_cmphs_ppzw_s, uint32_t, uint64_t, >=)
+
+DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t,   int64_t, <)
+DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t,  int64_t, <)
+DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t,  int64_t, <)
+
+DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t,   int64_t, <=)
+DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t,  int64_t, <=)
+DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t,  int64_t, <=)
+
+DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t,  uint64_t, <)
+DO_CMP_PPZW_H(sve_cmplo_ppzw_h, uint16_t, uint64_t, <)
+DO_CMP_PPZW_S(sve_cmplo_ppzw_s, uint32_t, uint64_t, <)
+
+DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t,  uint64_t, <=)
+DO_CMP_PPZW_H(sve_cmpls_ppzw_h, uint16_t, uint64_t, <=)
+DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=)
+
+#undef DO_CMP_PPZW_B
+#undef DO_CMP_PPZW_H
+#undef DO_CMP_PPZW_S
+#undef DO_CMP_PPZW
+
+/* Similar, but the second source is immediate.  */
+#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK)                         \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)   \
+{                                                                    \
+    intptr_t opr_sz = simd_oprsz(desc);                              \
+    uint32_t flags = PREDTEST_INIT;                                  \
+    TYPE mm = simd_data(desc);                                       \
+    intptr_t i = opr_sz;                                             \
+    do {                                                             \
+        uint64_t out = 0, pg;                                        \
+        do {                                                         \
+            i -= sizeof(TYPE), out <<= sizeof(TYPE);                 \
+            TYPE nn = *(TYPE *)(vn + H(i));                          \
+            out |= nn OP mm;                                         \
+        } while (i & 63);                                            \
+        pg = *(uint64_t *)(vg + (i >> 3)) & MASK;                    \
+        out &= pg;                                                   \
+        *(uint64_t *)(vd + (i >> 3)) = out;                          \
+        flags = iter_predtest_bwd(out, pg, flags);                   \
+    } while (i > 0);                                                 \
+    return flags;                                                    \
+}
+
+#define DO_CMP_PPZI_B(NAME, TYPE, OP) \
+    DO_CMP_PPZI(NAME, TYPE, OP, H1,   0xffffffffffffffffull)
+#define DO_CMP_PPZI_H(NAME, TYPE, OP) \
+    DO_CMP_PPZI(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZI_S(NAME, TYPE, OP) \
+    DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
+#define DO_CMP_PPZI_D(NAME, TYPE, OP) \
+    DO_CMP_PPZI(NAME, TYPE, OP,     , 0x0101010101010101ull)
+
+DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t,  ==)
+DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==)
+DO_CMP_PPZI_S(sve_cmpeq_ppzi_s, uint32_t, ==)
+DO_CMP_PPZI_D(sve_cmpeq_ppzi_d, uint64_t, ==)
+
+DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t,  !=)
+DO_CMP_PPZI_H(sve_cmpne_ppzi_h, uint16_t, !=)
+DO_CMP_PPZI_S(sve_cmpne_ppzi_s, uint32_t, !=)
+DO_CMP_PPZI_D(sve_cmpne_ppzi_d, uint64_t, !=)
+
+DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t,  >)
+DO_CMP_PPZI_H(sve_cmpgt_ppzi_h, int16_t, >)
+DO_CMP_PPZI_S(sve_cmpgt_ppzi_s, int32_t, >)
+DO_CMP_PPZI_D(sve_cmpgt_ppzi_d, int64_t, >)
+
+DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t,  >=)
+DO_CMP_PPZI_H(sve_cmpge_ppzi_h, int16_t, >=)
+DO_CMP_PPZI_S(sve_cmpge_ppzi_s, int32_t, >=)
+DO_CMP_PPZI_D(sve_cmpge_ppzi_d, int64_t, >=)
+
+DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t,  >)
+DO_CMP_PPZI_H(sve_cmphi_ppzi_h, uint16_t, >)
+DO_CMP_PPZI_S(sve_cmphi_ppzi_s, uint32_t, >)
+DO_CMP_PPZI_D(sve_cmphi_ppzi_d, uint64_t, >)
+
+DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t,  >=)
+DO_CMP_PPZI_H(sve_cmphs_ppzi_h, uint16_t, >=)
+DO_CMP_PPZI_S(sve_cmphs_ppzi_s, uint32_t, >=)
+DO_CMP_PPZI_D(sve_cmphs_ppzi_d, uint64_t, >=)
+
+DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t,  <)
+DO_CMP_PPZI_H(sve_cmplt_ppzi_h, int16_t, <)
+DO_CMP_PPZI_S(sve_cmplt_ppzi_s, int32_t, <)
+DO_CMP_PPZI_D(sve_cmplt_ppzi_d, int64_t, <)
+
+DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t,  <=)
+DO_CMP_PPZI_H(sve_cmple_ppzi_h, int16_t, <=)
+DO_CMP_PPZI_S(sve_cmple_ppzi_s, int32_t, <=)
+DO_CMP_PPZI_D(sve_cmple_ppzi_d, int64_t, <=)
+
+DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t,  <)
+DO_CMP_PPZI_H(sve_cmplo_ppzi_h, uint16_t, <)
+DO_CMP_PPZI_S(sve_cmplo_ppzi_s, uint32_t, <)
+DO_CMP_PPZI_D(sve_cmplo_ppzi_d, uint64_t, <)
+
+DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t,  <=)
+DO_CMP_PPZI_H(sve_cmpls_ppzi_h, uint16_t, <=)
+DO_CMP_PPZI_S(sve_cmpls_ppzi_s, uint32_t, <=)
+DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=)
+
+#undef DO_CMP_PPZI_B
+#undef DO_CMP_PPZI_H
+#undef DO_CMP_PPZI_S
+#undef DO_CMP_PPZI_D
+#undef DO_CMP_PPZI
+
+/* Similar to the ARM LastActive pseudocode function.  */
+static bool last_active_pred(void *vd, void *vg, intptr_t oprsz)
+{
+    intptr_t i;
+
+    for (i = QEMU_ALIGN_UP(oprsz, 8) - 8; i >= 0; i -= 8) {
+        uint64_t pg = *(uint64_t *)(vg + i);
+        if (pg) {
+            return (pow2floor(pg) & *(uint64_t *)(vd + i)) != 0;
+        }
+    }
+    return 0;
+}
+
+/* Compute a mask into RETB that is true for all G, up to and including
+ * (if after) or excluding (if !after) the first G & N.
+ * Return true if BRK found.
+ */
+static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g,
+                        bool brk, bool after)
+{
+    uint64_t b;
+
+    if (brk) {
+        b = 0;
+    } else if ((g & n) == 0) {
+        /* For all G, no N are set; break not found.  */
+        b = g;
+    } else {
+        /* Break somewhere in N.  Locate it.  */
+        b = g & n;            /* guard true, pred true */
+        b = b & -b;           /* first such */
+        if (after) {
+            b = b | (b - 1);  /* break after same */
+        } else {
+            b = b - 1;        /* break before same */
+        }
+        brk = true;
+    }
+
+    *retb = b;
+    return brk;
+}
+
+/* Compute a zeroing BRK.  */
+static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g,
+                          intptr_t oprsz, bool after)
+{
+    bool brk = false;
+    intptr_t i;
+
+    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+        uint64_t this_b, this_g = g[i];
+
+        brk = compute_brk(&this_b, n[i], this_g, brk, after);
+        d[i] = this_b & this_g;
+    }
+}
+
+/* Likewise, but also compute flags.  */
+static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g,
+                               intptr_t oprsz, bool after)
+{
+    uint32_t flags = PREDTEST_INIT;
+    bool brk = false;
+    intptr_t i;
+
+    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+        uint64_t this_b, this_d, this_g = g[i];
+
+        brk = compute_brk(&this_b, n[i], this_g, brk, after);
+        d[i] = this_d = this_b & this_g;
+        flags = iter_predtest_fwd(this_d, this_g, flags);
+    }
+    return flags;
+}
+
+/* Compute a merging BRK.  */
+static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g,
+                          intptr_t oprsz, bool after)
+{
+    bool brk = false;
+    intptr_t i;
+
+    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+        uint64_t this_b, this_g = g[i];
+
+        brk = compute_brk(&this_b, n[i], this_g, brk, after);
+        d[i] = (this_b & this_g) | (d[i] & ~this_g);
+    }
+}
+
+/* Likewise, but also compute flags.  */
+static uint32_t compute_brks_m(uint64_t *d, uint64_t *n, uint64_t *g,
+                               intptr_t oprsz, bool after)
+{
+    uint32_t flags = PREDTEST_INIT;
+    bool brk = false;
+    intptr_t i;
+
+    for (i = 0; i < oprsz / 8; ++i) {
+        uint64_t this_b, this_d = d[i], this_g = g[i];
+
+        brk = compute_brk(&this_b, n[i], this_g, brk, after);
+        d[i] = this_d = (this_b & this_g) | (this_d & ~this_g);
+        flags = iter_predtest_fwd(this_d, this_g, flags);
+    }
+    return flags;
+}
+
+static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz)
+{
+    /* It is quicker to zero the whole predicate than loop on OPRSZ.
+     * The compiler should turn this into 4 64-bit integer stores.
+     */
+    memset(d, 0, sizeof(ARMPredicateReg));
+    return PREDTEST_INIT;
+}
+
+void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg,
+                       uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    if (last_active_pred(vn, vg, oprsz)) {
+        compute_brk_z(vd, vm, vg, oprsz, true);
+    } else {
+        do_zero(vd, oprsz);
+    }
+}
+
+uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    if (last_active_pred(vn, vg, oprsz)) {
+        return compute_brks_z(vd, vm, vg, oprsz, true);
+    } else {
+        return do_zero(vd, oprsz);
+    }
+}
+
+void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg,
+                       uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    if (last_active_pred(vn, vg, oprsz)) {
+        compute_brk_z(vd, vm, vg, oprsz, false);
+    } else {
+        do_zero(vd, oprsz);
+    }
+}
+
+uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    if (last_active_pred(vn, vg, oprsz)) {
+        return compute_brks_z(vd, vm, vg, oprsz, false);
+    } else {
+        return do_zero(vd, oprsz);
+    }
+}
+
+void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    compute_brk_z(vd, vn, vg, oprsz, true);
+}
+
+uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    return compute_brks_z(vd, vn, vg, oprsz, true);
+}
+
+void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    compute_brk_z(vd, vn, vg, oprsz, false);
+}
+
+uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    return compute_brks_z(vd, vn, vg, oprsz, false);
+}
+
+void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    compute_brk_m(vd, vn, vg, oprsz, true);
+}
+
+uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    return compute_brks_m(vd, vn, vg, oprsz, true);
+}
+
+void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    compute_brk_m(vd, vn, vg, oprsz, false);
+}
+
+uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    return compute_brks_m(vd, vn, vg, oprsz, false);
+}
+
+void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+
+    if (!last_active_pred(vn, vg, oprsz)) {
+        do_zero(vd, oprsz);
+    }
+}
+
+/* As if PredTest(Ones(PL), D, esz).  */
+static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz,
+                              uint64_t esz_mask)
+{
+    uint32_t flags = PREDTEST_INIT;
+    intptr_t i;
+
+    for (i = 0; i < oprsz / 8; i++) {
+        flags = iter_predtest_fwd(d->p[i], esz_mask, flags);
+    }
+    if (oprsz & 7) {
+        uint64_t mask = ~(-1ULL << (8 * (oprsz & 7)));
+        flags = iter_predtest_fwd(d->p[i], esz_mask & mask, flags);
+    }
+    return flags;
+}
+
+uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+
+    if (last_active_pred(vn, vg, oprsz)) {
+        return predtest_ones(vd, oprsz, -1);
+    } else {
+        return do_zero(vd, oprsz);
+    }
+}
+
+uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz];
+    intptr_t i;
+
+    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+        uint64_t t = n[i] & g[i] & mask;
+        sum += ctpop64(t);
+    }
+    return sum;
+}
+
+uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
+{
+    uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    uint64_t esz_mask = pred_esz_masks[esz];
+    ARMPredicateReg *d = vd;
+    uint32_t flags;
+    intptr_t i;
+
+    /* Begin with a zero predicate register.  */
+    flags = do_zero(d, oprsz);
+    if (count == 0) {
+        return flags;
+    }
+
+    /* Scale from predicate element count to bits.  */
+    count <<= esz;
+    /* Bound to the bits in the predicate.  */
+    count = MIN(count, oprsz * 8);
+
+    /* Set all of the requested bits.  */
+    for (i = 0; i < count / 64; ++i) {
+        d->p[i] = esz_mask;
+    }
+    if (count & 63) {
+        d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask;
+    }
+
+    return predtest_ones(d, oprsz, esz_mask);
+}
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index dd9c09f89b..63d958cf50 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -67,18 +67,26 @@ static inline void assert_fp_access_checked(DisasContext *s)
 static inline int vec_reg_offset(DisasContext *s, int regno,
                                  int element, TCGMemOp size)
 {
-    int offs = 0;
+    int element_size = 1 << size;
+    int offs = element * element_size;
 #ifdef HOST_WORDS_BIGENDIAN
     /* This is complicated slightly because vfp.zregs[n].d[0] is
-     * still the low half and vfp.zregs[n].d[1] the high half
-     * of the 128 bit vector, even on big endian systems.
-     * Calculate the offset assuming a fully bigendian 128 bits,
-     * then XOR to account for the order of the two 64 bit halves.
+     * still the lowest and vfp.zregs[n].d[15] the highest of the
+     * 256 byte vector, even on big endian systems.
+     *
+     * Calculate the offset assuming fully little-endian,
+     * then XOR to account for the order of the 8-byte units.
+     *
+     * For 16 byte elements, the two 8 byte halves will not form a
+     * host int128 if the host is bigendian, since they're in the
+     * wrong order.  However the only 16 byte operation we have is
+     * a move, so we can ignore this for the moment.  More complicated
+     * operations will have to special case loading and storing from
+     * the zregs array.
      */
-    offs += (16 - ((element + 1) * (1 << size)));
-    offs ^= 8;
-#else
-    offs += element * (1 << size);
+    if (element_size < 8) {
+        offs ^= 8 - element_size;
+    }
 #endif
     offs += offsetof(CPUARMState, vfp.zregs[regno]);
     assert_fp_access_checked(s);
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index c48d4b530a..226c97579c 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -33,6 +33,15 @@
 #include "trace-tcg.h"
 #include "translate-a64.h"
 
+
+typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
+                         TCGv_i64, uint32_t, uint32_t);
+
+typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
+                                     TCGv_ptr, TCGv_i32);
+typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
+                                     TCGv_ptr, TCGv_ptr, TCGv_i32);
+
 /*
  * Helpers for extracting complex instruction fields.
  */
@@ -68,6 +77,11 @@ static inline int expand_imm_sh8s(int x)
     return (int8_t)x << (x & 0x100 ? 8 : 0);
 }
 
+static inline int expand_imm_sh8u(int x)
+{
+    return (uint8_t)x << (x & 0x100 ? 8 : 0);
+}
+
 /*
  * Include the generated decoder.
  */
@@ -373,6 +387,8 @@ static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
     return do_zpzz_ool(s, a, fns[a->esz]);
 }
 
+DO_ZPZZ(SEL, sel)
+
 #undef DO_ZPZZ
 
 /*
@@ -1957,6 +1973,1448 @@ static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
 }
 
 /*
+ *** SVE Permute - Unpredicated Group
+ */
+
+static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
+                             vsz, vsz, cpu_reg_sp(s, a->rn));
+    }
+    return true;
+}
+
+static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
+{
+    if ((a->imm & 0x1f) == 0) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        unsigned dofs = vec_full_reg_offset(s, a->rd);
+        unsigned esz, index;
+
+        esz = ctz32(a->imm);
+        index = a->imm >> (esz + 1);
+
+        if ((index << esz) < vsz) {
+            unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
+            tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
+        } else {
+            tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
+        }
+    }
+    return true;
+}
+
+static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
+{
+    typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
+    static gen_insr * const fns[4] = {
+        gen_helper_sve_insr_b, gen_helper_sve_insr_h,
+        gen_helper_sve_insr_s, gen_helper_sve_insr_d,
+    };
+    unsigned vsz = vec_full_reg_size(s);
+    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
+    TCGv_ptr t_zd = tcg_temp_new_ptr();
+    TCGv_ptr t_zn = tcg_temp_new_ptr();
+
+    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
+    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
+
+    fns[a->esz](t_zd, t_zn, val, desc);
+
+    tcg_temp_free_ptr(t_zd);
+    tcg_temp_free_ptr(t_zn);
+    tcg_temp_free_i32(desc);
+}
+
+static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        TCGv_i64 t = tcg_temp_new_i64();
+        tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
+        do_insr_i64(s, a, t);
+        tcg_temp_free_i64(t);
+    }
+    return true;
+}
+
+static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        do_insr_i64(s, a, cpu_reg(s, a->rm));
+    }
+    return true;
+}
+
+static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
+{
+    static gen_helper_gvec_2 * const fns[4] = {
+        gen_helper_sve_rev_b, gen_helper_sve_rev_h,
+        gen_helper_sve_rev_s, gen_helper_sve_rev_d
+    };
+
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vsz, vsz, 0, fns[a->esz]);
+    }
+    return true;
+}
+
+static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
+        gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
+    };
+
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vsz, vsz, 0, fns[a->esz]);
+    }
+    return true;
+}
+
+static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
+{
+    static gen_helper_gvec_2 * const fns[4][2] = {
+        { NULL, NULL },
+        { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
+        { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
+        { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
+    };
+
+    if (a->esz == 0) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn)
+                           + (a->h ? vsz / 2 : 0),
+                           vsz, vsz, 0, fns[a->esz][a->u]);
+    }
+    return true;
+}
+
+/*
+ *** SVE Permute - Predicates Group
+ */
+
+static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
+                          gen_helper_gvec_3 *fn)
+{
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    unsigned vsz = pred_full_reg_size(s);
+
+    /* Predicate sizes may be smaller and cannot use simd_desc.
+       We cannot round up, as we do elsewhere, because we need
+       the exact size for ZIP2 and REV.  We retain the style for
+       the other helpers for consistency.  */
+    TCGv_ptr t_d = tcg_temp_new_ptr();
+    TCGv_ptr t_n = tcg_temp_new_ptr();
+    TCGv_ptr t_m = tcg_temp_new_ptr();
+    TCGv_i32 t_desc;
+    int desc;
+
+    desc = vsz - 2;
+    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
+    desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
+
+    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
+    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
+    tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
+    t_desc = tcg_const_i32(desc);
+
+    fn(t_d, t_n, t_m, t_desc);
+
+    tcg_temp_free_ptr(t_d);
+    tcg_temp_free_ptr(t_n);
+    tcg_temp_free_ptr(t_m);
+    tcg_temp_free_i32(t_desc);
+    return true;
+}
+
+static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
+                          gen_helper_gvec_2 *fn)
+{
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    unsigned vsz = pred_full_reg_size(s);
+    TCGv_ptr t_d = tcg_temp_new_ptr();
+    TCGv_ptr t_n = tcg_temp_new_ptr();
+    TCGv_i32 t_desc;
+    int desc;
+
+    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
+    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
+
+    /* Predicate sizes may be smaller and cannot use simd_desc.
+       We cannot round up, as we do elsewhere, because we need
+       the exact size for ZIP2 and REV.  We retain the style for
+       the other helpers for consistency.  */
+
+    desc = vsz - 2;
+    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
+    desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
+    t_desc = tcg_const_i32(desc);
+
+    fn(t_d, t_n, t_desc);
+
+    tcg_temp_free_i32(t_desc);
+    tcg_temp_free_ptr(t_d);
+    tcg_temp_free_ptr(t_n);
+    return true;
+}
+
+static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
+}
+
+static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
+}
+
+static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
+}
+
+static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
+}
+
+static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
+}
+
+static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
+}
+
+static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
+{
+    return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
+}
+
+static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
+{
+    return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
+}
+
+static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
+{
+    return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
+}
+
+/*
+ *** SVE Permute - Interleaving Group
+ */
+
+static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve_zip_b, gen_helper_sve_zip_h,
+        gen_helper_sve_zip_s, gen_helper_sve_zip_d,
+    };
+
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        unsigned high_ofs = high ? vsz / 2 : 0;
+        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn) + high_ofs,
+                           vec_full_reg_offset(s, a->rm) + high_ofs,
+                           vsz, vsz, 0, fns[a->esz]);
+    }
+    return true;
+}
+
+static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
+                            gen_helper_gvec_3 *fn)
+{
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vsz, vsz, data, fn);
+    }
+    return true;
+}
+
+static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_zip(s, a, false);
+}
+
+static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_zip(s, a, true);
+}
+
+static gen_helper_gvec_3 * const uzp_fns[4] = {
+    gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
+    gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
+};
+
+static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
+}
+
+static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
+}
+
+static gen_helper_gvec_3 * const trn_fns[4] = {
+    gen_helper_sve_trn_b, gen_helper_sve_trn_h,
+    gen_helper_sve_trn_s, gen_helper_sve_trn_d,
+};
+
+static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
+}
+
+static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+    return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
+}
+
+/*
+ *** SVE Permute Vector - Predicated Group
+ */
+
+static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
+    };
+    return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+/* Call the helper that computes the ARM LastActiveElement pseudocode
+ * function, scaled by the element size.  This includes the not found
+ * indication; e.g. not found for esz=3 is -8.
+ */
+static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
+{
+    /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
+     * round up, as we do elsewhere, because we need the exact size.
+     */
+    TCGv_ptr t_p = tcg_temp_new_ptr();
+    TCGv_i32 t_desc;
+    unsigned vsz = pred_full_reg_size(s);
+    unsigned desc;
+
+    desc = vsz - 2;
+    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
+
+    tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
+    t_desc = tcg_const_i32(desc);
+
+    gen_helper_sve_last_active_element(ret, t_p, t_desc);
+
+    tcg_temp_free_i32(t_desc);
+    tcg_temp_free_ptr(t_p);
+}
+
+/* Increment LAST to the offset of the next element in the vector,
+ * wrapping around to 0.
+ */
+static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
+{
+    unsigned vsz = vec_full_reg_size(s);
+
+    tcg_gen_addi_i32(last, last, 1 << esz);
+    if (is_power_of_2(vsz)) {
+        tcg_gen_andi_i32(last, last, vsz - 1);
+    } else {
+        TCGv_i32 max = tcg_const_i32(vsz);
+        TCGv_i32 zero = tcg_const_i32(0);
+        tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
+        tcg_temp_free_i32(max);
+        tcg_temp_free_i32(zero);
+    }
+}
+
+/* If LAST < 0, set LAST to the offset of the last element in the vector.  */
+static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
+{
+    unsigned vsz = vec_full_reg_size(s);
+
+    if (is_power_of_2(vsz)) {
+        tcg_gen_andi_i32(last, last, vsz - 1);
+    } else {
+        TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
+        TCGv_i32 zero = tcg_const_i32(0);
+        tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
+        tcg_temp_free_i32(max);
+        tcg_temp_free_i32(zero);
+    }
+}
+
+/* Load an unsigned element of ESZ from BASE+OFS.  */
+static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
+{
+    TCGv_i64 r = tcg_temp_new_i64();
+
+    switch (esz) {
+    case 0:
+        tcg_gen_ld8u_i64(r, base, ofs);
+        break;
+    case 1:
+        tcg_gen_ld16u_i64(r, base, ofs);
+        break;
+    case 2:
+        tcg_gen_ld32u_i64(r, base, ofs);
+        break;
+    case 3:
+        tcg_gen_ld_i64(r, base, ofs);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return r;
+}
+
+/* Load an unsigned element of ESZ from RM[LAST].  */
+static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
+                                 int rm, int esz)
+{
+    TCGv_ptr p = tcg_temp_new_ptr();
+    TCGv_i64 r;
+
+    /* Convert offset into vector into offset into ENV.
+     * The final adjustment for the vector register base
+     * is added via constant offset to the load.
+     */
+#ifdef HOST_WORDS_BIGENDIAN
+    /* Adjust for element ordering.  See vec_reg_offset.  */
+    if (esz < 3) {
+        tcg_gen_xori_i32(last, last, 8 - (1 << esz));
+    }
+#endif
+    tcg_gen_ext_i32_ptr(p, last);
+    tcg_gen_add_ptr(p, p, cpu_env);
+
+    r = load_esz(p, vec_full_reg_offset(s, rm), esz);
+    tcg_temp_free_ptr(p);
+
+    return r;
+}
+
+/* Compute CLAST for a Zreg.  */
+static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
+{
+    TCGv_i32 last;
+    TCGLabel *over;
+    TCGv_i64 ele;
+    unsigned vsz, esz = a->esz;
+
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    last = tcg_temp_local_new_i32();
+    over = gen_new_label();
+
+    find_last_active(s, last, esz, a->pg);
+
+    /* There is of course no movcond for a 2048-bit vector,
+     * so we must branch over the actual store.
+     */
+    tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
+
+    if (!before) {
+        incr_last_active(s, last, esz);
+    }
+
+    ele = load_last_active(s, last, a->rm, esz);
+    tcg_temp_free_i32(last);
+
+    vsz = vec_full_reg_size(s);
+    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
+    tcg_temp_free_i64(ele);
+
+    /* If this insn used MOVPRFX, we may need a second move.  */
+    if (a->rd != a->rn) {
+        TCGLabel *done = gen_new_label();
+        tcg_gen_br(done);
+
+        gen_set_label(over);
+        do_mov_z(s, a->rd, a->rn);
+
+        gen_set_label(done);
+    } else {
+        gen_set_label(over);
+    }
+    return true;
+}
+
+static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
+{
+    return do_clast_vector(s, a, false);
+}
+
+static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
+{
+    return do_clast_vector(s, a, true);
+}
+
+/* Compute CLAST for a scalar.  */
+static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
+                            bool before, TCGv_i64 reg_val)
+{
+    TCGv_i32 last = tcg_temp_new_i32();
+    TCGv_i64 ele, cmp, zero;
+
+    find_last_active(s, last, esz, pg);
+
+    /* Extend the original value of last prior to incrementing.  */
+    cmp = tcg_temp_new_i64();
+    tcg_gen_ext_i32_i64(cmp, last);
+
+    if (!before) {
+        incr_last_active(s, last, esz);
+    }
+
+    /* The conceit here is that while last < 0 indicates not found, after
+     * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
+     * from which we can load garbage.  We then discard the garbage with
+     * a conditional move.
+     */
+    ele = load_last_active(s, last, rm, esz);
+    tcg_temp_free_i32(last);
+
+    zero = tcg_const_i64(0);
+    tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
+
+    tcg_temp_free_i64(zero);
+    tcg_temp_free_i64(cmp);
+    tcg_temp_free_i64(ele);
+}
+
+/* Compute CLAST for a Vreg.  */
+static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+    if (sve_access_check(s)) {
+        int esz = a->esz;
+        int ofs = vec_reg_offset(s, a->rd, 0, esz);
+        TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
+
+        do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
+        write_fp_dreg(s, a->rd, reg);
+        tcg_temp_free_i64(reg);
+    }
+    return true;
+}
+
+static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    return do_clast_fp(s, a, false);
+}
+
+static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    return do_clast_fp(s, a, true);
+}
+
+/* Compute CLAST for a Xreg.  */
+static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+    TCGv_i64 reg;
+
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    reg = cpu_reg(s, a->rd);
+    switch (a->esz) {
+    case 0:
+        tcg_gen_ext8u_i64(reg, reg);
+        break;
+    case 1:
+        tcg_gen_ext16u_i64(reg, reg);
+        break;
+    case 2:
+        tcg_gen_ext32u_i64(reg, reg);
+        break;
+    case 3:
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
+    return true;
+}
+
+static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    return do_clast_general(s, a, false);
+}
+
+static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    return do_clast_general(s, a, true);
+}
+
+/* Compute LAST for a scalar.  */
+static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
+                               int pg, int rm, bool before)
+{
+    TCGv_i32 last = tcg_temp_new_i32();
+    TCGv_i64 ret;
+
+    find_last_active(s, last, esz, pg);
+    if (before) {
+        wrap_last_active(s, last, esz);
+    } else {
+        incr_last_active(s, last, esz);
+    }
+
+    ret = load_last_active(s, last, rm, esz);
+    tcg_temp_free_i32(last);
+    return ret;
+}
+
+/* Compute LAST for a Vreg.  */
+static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+    if (sve_access_check(s)) {
+        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
+        write_fp_dreg(s, a->rd, val);
+        tcg_temp_free_i64(val);
+    }
+    return true;
+}
+
+static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    return do_last_fp(s, a, false);
+}
+
+static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    return do_last_fp(s, a, true);
+}
+
+/* Compute LAST for a Xreg.  */
+static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+    if (sve_access_check(s)) {
+        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
+        tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
+        tcg_temp_free_i64(val);
+    }
+    return true;
+}
+
+static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    return do_last_general(s, a, false);
+}
+
+static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    return do_last_general(s, a, true);
+}
+
+static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
+    }
+    return true;
+}
+
+static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
+        TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
+        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
+        tcg_temp_free_i64(t);
+    }
+    return true;
+}
+
+static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        NULL,
+        gen_helper_sve_revb_h,
+        gen_helper_sve_revb_s,
+        gen_helper_sve_revb_d,
+    };
+    return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        NULL,
+        NULL,
+        gen_helper_sve_revh_s,
+        gen_helper_sve_revh_d,
+    };
+    return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
+}
+
+static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve_rbit_b,
+        gen_helper_sve_rbit_h,
+        gen_helper_sve_rbit_s,
+        gen_helper_sve_rbit_d,
+    };
+    return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           pred_full_reg_offset(s, a->pg),
+                           vsz, vsz, a->esz, gen_helper_sve_splice);
+    }
+    return true;
+}
+
+/*
+ *** SVE Integer Compare - Vectors Group
+ */
+
+static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
+                          gen_helper_gvec_flags_4 *gen_fn)
+{
+    TCGv_ptr pd, zn, zm, pg;
+    unsigned vsz;
+    TCGv_i32 t;
+
+    if (gen_fn == NULL) {
+        return false;
+    }
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    vsz = vec_full_reg_size(s);
+    t = tcg_const_i32(simd_desc(vsz, vsz, 0));
+    pd = tcg_temp_new_ptr();
+    zn = tcg_temp_new_ptr();
+    zm = tcg_temp_new_ptr();
+    pg = tcg_temp_new_ptr();
+
+    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
+    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
+    tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
+    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
+
+    gen_fn(t, pd, zn, zm, pg, t);
+
+    tcg_temp_free_ptr(pd);
+    tcg_temp_free_ptr(zn);
+    tcg_temp_free_ptr(zm);
+    tcg_temp_free_ptr(pg);
+
+    do_pred_flags(t);
+
+    tcg_temp_free_i32(t);
+    return true;
+}
+
+#define DO_PPZZ(NAME, name) \
+static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
+                                uint32_t insn)                            \
+{                                                                         \
+    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
+        gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
+        gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
+    };                                                                    \
+    return do_ppzz_flags(s, a, fns[a->esz]);                              \
+}
+
+DO_PPZZ(CMPEQ, cmpeq)
+DO_PPZZ(CMPNE, cmpne)
+DO_PPZZ(CMPGT, cmpgt)
+DO_PPZZ(CMPGE, cmpge)
+DO_PPZZ(CMPHI, cmphi)
+DO_PPZZ(CMPHS, cmphs)
+
+#undef DO_PPZZ
+
+#define DO_PPZW(NAME, name) \
+static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
+                                uint32_t insn)                            \
+{                                                                         \
+    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
+        gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
+        gen_helper_sve_##name##_ppzw_s, NULL                              \
+    };                                                                    \
+    return do_ppzz_flags(s, a, fns[a->esz]);                              \
+}
+
+DO_PPZW(CMPEQ, cmpeq)
+DO_PPZW(CMPNE, cmpne)
+DO_PPZW(CMPGT, cmpgt)
+DO_PPZW(CMPGE, cmpge)
+DO_PPZW(CMPHI, cmphi)
+DO_PPZW(CMPHS, cmphs)
+DO_PPZW(CMPLT, cmplt)
+DO_PPZW(CMPLE, cmple)
+DO_PPZW(CMPLO, cmplo)
+DO_PPZW(CMPLS, cmpls)
+
+#undef DO_PPZW
+
+/*
+ *** SVE Integer Compare - Immediate Groups
+ */
+
+static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
+                          gen_helper_gvec_flags_3 *gen_fn)
+{
+    TCGv_ptr pd, zn, pg;
+    unsigned vsz;
+    TCGv_i32 t;
+
+    if (gen_fn == NULL) {
+        return false;
+    }
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    vsz = vec_full_reg_size(s);
+    t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
+    pd = tcg_temp_new_ptr();
+    zn = tcg_temp_new_ptr();
+    pg = tcg_temp_new_ptr();
+
+    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
+    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
+    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
+
+    gen_fn(t, pd, zn, pg, t);
+
+    tcg_temp_free_ptr(pd);
+    tcg_temp_free_ptr(zn);
+    tcg_temp_free_ptr(pg);
+
+    do_pred_flags(t);
+
+    tcg_temp_free_i32(t);
+    return true;
+}
+
+#define DO_PPZI(NAME, name) \
+static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
+                                uint32_t insn)                            \
+{                                                                         \
+    static gen_helper_gvec_flags_3 * const fns[4] = {                     \
+        gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
+        gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
+    };                                                                    \
+    return do_ppzi_flags(s, a, fns[a->esz]);                              \
+}
+
+DO_PPZI(CMPEQ, cmpeq)
+DO_PPZI(CMPNE, cmpne)
+DO_PPZI(CMPGT, cmpgt)
+DO_PPZI(CMPGE, cmpge)
+DO_PPZI(CMPHI, cmphi)
+DO_PPZI(CMPHS, cmphs)
+DO_PPZI(CMPLT, cmplt)
+DO_PPZI(CMPLE, cmple)
+DO_PPZI(CMPLO, cmplo)
+DO_PPZI(CMPLS, cmpls)
+
+#undef DO_PPZI
+
+/*
+ *** SVE Partition Break Group
+ */
+
+static bool do_brk3(DisasContext *s, arg_rprr_s *a,
+                    gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
+{
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    unsigned vsz = pred_full_reg_size(s);
+
+    /* Predicate sizes may be smaller and cannot use simd_desc.  */
+    TCGv_ptr d = tcg_temp_new_ptr();
+    TCGv_ptr n = tcg_temp_new_ptr();
+    TCGv_ptr m = tcg_temp_new_ptr();
+    TCGv_ptr g = tcg_temp_new_ptr();
+    TCGv_i32 t = tcg_const_i32(vsz - 2);
+
+    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
+    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
+    tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
+    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
+
+    if (a->s) {
+        fn_s(t, d, n, m, g, t);
+        do_pred_flags(t);
+    } else {
+        fn(d, n, m, g, t);
+    }
+    tcg_temp_free_ptr(d);
+    tcg_temp_free_ptr(n);
+    tcg_temp_free_ptr(m);
+    tcg_temp_free_ptr(g);
+    tcg_temp_free_i32(t);
+    return true;
+}
+
+static bool do_brk2(DisasContext *s, arg_rpr_s *a,
+                    gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
+{
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    unsigned vsz = pred_full_reg_size(s);
+
+    /* Predicate sizes may be smaller and cannot use simd_desc.  */
+    TCGv_ptr d = tcg_temp_new_ptr();
+    TCGv_ptr n = tcg_temp_new_ptr();
+    TCGv_ptr g = tcg_temp_new_ptr();
+    TCGv_i32 t = tcg_const_i32(vsz - 2);
+
+    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
+    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
+    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
+
+    if (a->s) {
+        fn_s(t, d, n, g, t);
+        do_pred_flags(t);
+    } else {
+        fn(d, n, g, t);
+    }
+    tcg_temp_free_ptr(d);
+    tcg_temp_free_ptr(n);
+    tcg_temp_free_ptr(g);
+    tcg_temp_free_i32(t);
+    return true;
+}
+
+static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+    return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
+}
+
+static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+    return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
+}
+
+static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
+{
+    return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
+}
+
+static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
+{
+    return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
+}
+
+static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
+{
+    return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
+}
+
+static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
+{
+    return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
+}
+
+static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
+{
+    return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
+}
+
+/*
+ *** SVE Predicate Count Group
+ */
+
+static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
+{
+    unsigned psz = pred_full_reg_size(s);
+
+    if (psz <= 8) {
+        uint64_t psz_mask;
+
+        tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
+        if (pn != pg) {
+            TCGv_i64 g = tcg_temp_new_i64();
+            tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
+            tcg_gen_and_i64(val, val, g);
+            tcg_temp_free_i64(g);
+        }
+
+        /* Reduce the pred_esz_masks value simply to reduce the
+         * size of the code generated here.
+         */
+        psz_mask = MAKE_64BIT_MASK(0, psz * 8);
+        tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
+
+        tcg_gen_ctpop_i64(val, val);
+    } else {
+        TCGv_ptr t_pn = tcg_temp_new_ptr();
+        TCGv_ptr t_pg = tcg_temp_new_ptr();
+        unsigned desc;
+        TCGv_i32 t_desc;
+
+        desc = psz - 2;
+        desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
+
+        tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
+        tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
+        t_desc = tcg_const_i32(desc);
+
+        gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
+        tcg_temp_free_ptr(t_pn);
+        tcg_temp_free_ptr(t_pg);
+        tcg_temp_free_i32(t_desc);
+    }
+}
+
+static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
+    }
+    return true;
+}
+
+static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
+                            uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        TCGv_i64 reg = cpu_reg(s, a->rd);
+        TCGv_i64 val = tcg_temp_new_i64();
+
+        do_cntp(s, val, a->esz, a->pg, a->pg);
+        if (a->d) {
+            tcg_gen_sub_i64(reg, reg, val);
+        } else {
+            tcg_gen_add_i64(reg, reg, val);
+        }
+        tcg_temp_free_i64(val);
+    }
+    return true;
+}
+
+static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
+                            uint32_t insn)
+{
+    if (a->esz == 0) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        TCGv_i64 val = tcg_temp_new_i64();
+        GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
+
+        do_cntp(s, val, a->esz, a->pg, a->pg);
+        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
+                vec_full_reg_offset(s, a->rn), val, vsz, vsz);
+    }
+    return true;
+}
+
+static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
+                                uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        TCGv_i64 reg = cpu_reg(s, a->rd);
+        TCGv_i64 val = tcg_temp_new_i64();
+
+        do_cntp(s, val, a->esz, a->pg, a->pg);
+        do_sat_addsub_32(reg, val, a->u, a->d);
+    }
+    return true;
+}
+
+static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
+                                uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        TCGv_i64 reg = cpu_reg(s, a->rd);
+        TCGv_i64 val = tcg_temp_new_i64();
+
+        do_cntp(s, val, a->esz, a->pg, a->pg);
+        do_sat_addsub_64(reg, val, a->u, a->d);
+    }
+    return true;
+}
+
+static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
+                             uint32_t insn)
+{
+    if (a->esz == 0) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        TCGv_i64 val = tcg_temp_new_i64();
+        do_cntp(s, val, a->esz, a->pg, a->pg);
+        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
+    }
+    return true;
+}
+
+/*
+ *** SVE Integer Compare Scalars Group
+ */
+
+static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
+{
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
+    TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
+    TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
+    TCGv_i64 cmp = tcg_temp_new_i64();
+
+    tcg_gen_setcond_i64(cond, cmp, rn, rm);
+    tcg_gen_extrl_i64_i32(cpu_NF, cmp);
+    tcg_temp_free_i64(cmp);
+
+    /* VF = !NF & !CF.  */
+    tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
+    tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
+
+    /* Both NF and VF actually look at bit 31.  */
+    tcg_gen_neg_i32(cpu_NF, cpu_NF);
+    tcg_gen_neg_i32(cpu_VF, cpu_VF);
+    return true;
+}
+
+static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
+{
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
+    TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i32 t2, t3;
+    TCGv_ptr ptr;
+    unsigned desc, vsz = vec_full_reg_size(s);
+    TCGCond cond;
+
+    if (!a->sf) {
+        if (a->u) {
+            tcg_gen_ext32u_i64(op0, op0);
+            tcg_gen_ext32u_i64(op1, op1);
+        } else {
+            tcg_gen_ext32s_i64(op0, op0);
+            tcg_gen_ext32s_i64(op1, op1);
+        }
+    }
+
+    /* For the helper, compress the different conditions into a computation
+     * of how many iterations for which the condition is true.
+     *
+     * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
+     * 2**64 iterations, overflowing to 0.  Of course, predicate registers
+     * aren't that large, so any value >= predicate size is sufficient.
+     */
+    tcg_gen_sub_i64(t0, op1, op0);
+
+    /* t0 = MIN(op1 - op0, vsz).  */
+    tcg_gen_movi_i64(t1, vsz);
+    tcg_gen_umin_i64(t0, t0, t1);
+    if (a->eq) {
+        /* Equality means one more iteration.  */
+        tcg_gen_addi_i64(t0, t0, 1);
+    }
+
+    /* t0 = (condition true ? t0 : 0).  */
+    cond = (a->u
+            ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
+            : (a->eq ? TCG_COND_LE : TCG_COND_LT));
+    tcg_gen_movi_i64(t1, 0);
+    tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
+
+    t2 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t2, t0);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+
+    desc = (vsz / 8) - 2;
+    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
+    t3 = tcg_const_i32(desc);
+
+    ptr = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
+
+    gen_helper_sve_while(t2, ptr, t2, t3);
+    do_pred_flags(t2);
+
+    tcg_temp_free_ptr(ptr);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+    return true;
+}
+
+/*
+ *** SVE Integer Wide Immediate - Unpredicated Group
+ */
+
+static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
+{
+    if (a->esz == 0) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        int dofs = vec_full_reg_offset(s, a->rd);
+        uint64_t imm;
+
+        /* Decode the VFP immediate.  */
+        imm = vfp_expand_imm(a->esz, a->imm);
+        imm = dup_const(a->esz, imm);
+
+        tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
+    }
+    return true;
+}
+
+static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
+{
+    if (a->esz == 0 && extract32(insn, 13, 1)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        int dofs = vec_full_reg_offset(s, a->rd);
+
+        tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
+    }
+    return true;
+}
+
+static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+    if (a->esz == 0 && extract32(insn, 13, 1)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
+                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
+    }
+    return true;
+}
+
+static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+    a->imm = -a->imm;
+    return trans_ADD_zzi(s, a, insn);
+}
+
+static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+    static const GVecGen2s op[4] = {
+        { .fni8 = tcg_gen_vec_sub8_i64,
+          .fniv = tcg_gen_sub_vec,
+          .fno = gen_helper_sve_subri_b,
+          .opc = INDEX_op_sub_vec,
+          .vece = MO_8,
+          .scalar_first = true },
+        { .fni8 = tcg_gen_vec_sub16_i64,
+          .fniv = tcg_gen_sub_vec,
+          .fno = gen_helper_sve_subri_h,
+          .opc = INDEX_op_sub_vec,
+          .vece = MO_16,
+          .scalar_first = true },
+        { .fni4 = tcg_gen_sub_i32,
+          .fniv = tcg_gen_sub_vec,
+          .fno = gen_helper_sve_subri_s,
+          .opc = INDEX_op_sub_vec,
+          .vece = MO_32,
+          .scalar_first = true },
+        { .fni8 = tcg_gen_sub_i64,
+          .fniv = tcg_gen_sub_vec,
+          .fno = gen_helper_sve_subri_d,
+          .opc = INDEX_op_sub_vec,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .vece = MO_64,
+          .scalar_first = true }
+    };
+
+    if (a->esz == 0 && extract32(insn, 13, 1)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        TCGv_i64 c = tcg_const_i64(a->imm);
+        tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
+                        vec_full_reg_offset(s, a->rn),
+                        vsz, vsz, c, &op[a->esz]);
+        tcg_temp_free_i64(c);
+    }
+    return true;
+}
+
+static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
+                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
+    }
+    return true;
+}
+
+static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
+                       bool u, bool d)
+{
+    if (a->esz == 0 && extract32(insn, 13, 1)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        TCGv_i64 val = tcg_const_i64(a->imm);
+        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
+        tcg_temp_free_i64(val);
+    }
+    return true;
+}
+
+static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+    return do_zzi_sat(s, a, insn, false, false);
+}
+
+static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+    return do_zzi_sat(s, a, insn, true, false);
+}
+
+static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+    return do_zzi_sat(s, a, insn, false, true);
+}
+
+static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+    return do_zzi_sat(s, a, insn, true, true);
+}
+
+static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
+{
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        TCGv_i64 c = tcg_const_i64(a->imm);
+
+        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
+                            vec_full_reg_offset(s, a->rn),
+                            c, vsz, vsz, 0, fn);
+        tcg_temp_free_i64(c);
+    }
+    return true;
+}
+
+#define DO_ZZI(NAME, name) \
+static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
+                               uint32_t insn)                           \
+{                                                                       \
+    static gen_helper_gvec_2i * const fns[4] = {                        \
+        gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
+        gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
+    };                                                                  \
+    return do_zzi_ool(s, a, fns[a->esz]);                               \
+}
+
+DO_ZZI(SMAX, smax)
+DO_ZZI(UMAX, umax)
+DO_ZZI(SMIN, smin)
+DO_ZZI(UMIN, umin)
+
+#undef DO_ZZI
+
+/*
+ *** SVE Floating Point Arithmetic - Unpredicated Group
+ */
+
+static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
+                      gen_helper_gvec_3_ptr *fn)
+{
+    if (fn == NULL) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
+        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           status, vsz, vsz, 0, fn);
+        tcg_temp_free_ptr(status);
+    }
+    return true;
+}
+
+
+#define DO_FP3(NAME, name) \
+static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
+{                                                                   \
+    static gen_helper_gvec_3_ptr * const fns[4] = {                 \
+        NULL, gen_helper_gvec_##name##_h,                           \
+        gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
+    };                                                              \
+    return do_zzz_fp(s, a, fns[a->esz]);                            \
+}
+
+DO_FP3(FADD_zzz, fadd)
+DO_FP3(FSUB_zzz, fsub)
+DO_FP3(FMUL_zzz, fmul)
+DO_FP3(FTSMUL, ftsmul)
+DO_FP3(FRECPS, recps)
+DO_FP3(FRSQRTS, rsqrts)
+
+#undef DO_FP3
+
+/*
  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
  */
 
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 0ff5edf2ce..f405c82fb2 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -9965,7 +9965,8 @@ static bool thumb_insn_is_16bit(DisasContext *s, uint32_t insn)
      * end up actually treating this as two 16-bit insns, though,
      * if it's half of a bl/blx pair that might span a page boundary.
      */
-    if (arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
+    if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
+        arm_dc_feature(s, ARM_FEATURE_M)) {
         /* Thumb2 cores (including all M profile ones) always treat
          * 32-bit insns as 32-bit.
          */
@@ -10085,10 +10086,38 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
     int conds;
     int logic_cc;
 
-    /* The only 32 bit insn that's allowed for Thumb1 is the combined
-     * BL/BLX prefix and suffix.
+    /*
+     * ARMv6-M supports a limited subset of Thumb2 instructions.
+     * Other Thumb1 architectures allow only 32-bit
+     * combined BL/BLX prefix and suffix.
      */
-    if ((insn & 0xf800e800) != 0xf000e800) {
+    if (arm_dc_feature(s, ARM_FEATURE_M) &&
+        !arm_dc_feature(s, ARM_FEATURE_V7)) {
+        int i;
+        bool found = false;
+        const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
+                                        0xf3b08040 /* dsb */,
+                                        0xf3b08050 /* dmb */,
+                                        0xf3b08060 /* isb */,
+                                        0xf3e08000 /* mrs */,
+                                        0xf000d000 /* bl */};
+        const uint32_t armv6m_mask[] = {0xffe0d000,
+                                        0xfff0d0f0,
+                                        0xfff0d0f0,
+                                        0xfff0d0f0,
+                                        0xffe0d000,
+                                        0xf800d000};
+
+        for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
+            if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            goto illegal_op;
+        }
+    } else if ((insn & 0xf800e800) != 0xf000e800)  {
         ARCH(6T2);
     }
 
@@ -11009,7 +11038,11 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
                         }
                         break;
                     case 3: /* Special control operations.  */
-                        ARCH(7);
+                        if (!arm_dc_feature(s, ARM_FEATURE_V7) &&
+                            !(arm_dc_feature(s, ARM_FEATURE_V6) &&
+                              arm_dc_feature(s, ARM_FEATURE_M))) {
+                            goto illegal_op;
+                        }
                         op = (insn >> 4) & 0xf;
                         switch (op) {
                         case 2: /* clrex */
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 25e209da31..f504dd53c8 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -426,3 +426,72 @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm,
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
+
+/* Floating-point trigonometric starting value.
+ * See the ARM ARM pseudocode function FPTrigSMul.
+ */
+static float16 float16_ftsmul(float16 op1, uint16_t op2, float_status *stat)
+{
+    float16 result = float16_mul(op1, op1, stat);
+    if (!float16_is_any_nan(result)) {
+        result = float16_set_sign(result, op2 & 1);
+    }
+    return result;
+}
+
+static float32 float32_ftsmul(float32 op1, uint32_t op2, float_status *stat)
+{
+    float32 result = float32_mul(op1, op1, stat);
+    if (!float32_is_any_nan(result)) {
+        result = float32_set_sign(result, op2 & 1);
+    }
+    return result;
+}
+
+static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat)
+{
+    float64 result = float64_mul(op1, op1, stat);
+    if (!float64_is_any_nan(result)) {
+        result = float64_set_sign(result, op2 & 1);
+    }
+    return result;
+}
+
+#define DO_3OP(NAME, FUNC, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{                                                                          \
+    intptr_t i, oprsz = simd_oprsz(desc);                                  \
+    TYPE *d = vd, *n = vn, *m = vm;                                        \
+    for (i = 0; i < oprsz / sizeof(TYPE); i++) {                           \
+        d[i] = FUNC(n[i], m[i], stat);                                     \
+    }                                                                      \
+}
+
+DO_3OP(gvec_fadd_h, float16_add, float16)
+DO_3OP(gvec_fadd_s, float32_add, float32)
+DO_3OP(gvec_fadd_d, float64_add, float64)
+
+DO_3OP(gvec_fsub_h, float16_sub, float16)
+DO_3OP(gvec_fsub_s, float32_sub, float32)
+DO_3OP(gvec_fsub_d, float64_sub, float64)
+
+DO_3OP(gvec_fmul_h, float16_mul, float16)
+DO_3OP(gvec_fmul_s, float32_mul, float32)
+DO_3OP(gvec_fmul_d, float64_mul, float64)
+
+DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16)
+DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32)
+DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
+
+#ifdef TARGET_AARCH64
+
+DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
+DO_3OP(gvec_recps_s, helper_recpsf_f32, float32)
+DO_3OP(gvec_recps_d, helper_recpsf_f64, float64)
+
+DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16)
+DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
+DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
+
+#endif
+#undef DO_3OP
diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c
index f4ceaea520..fcf86b12d5 100644
--- a/target/microblaze/mmu.c
+++ b/target/microblaze/mmu.c
@@ -159,7 +159,6 @@ unsigned int mmu_translate(struct microblaze_mmu *mmu,
 
             lu->vaddr = tlb_tag;
             lu->paddr = tlb_rpn & mmu->c_addr_mask;
-            lu->paddr = tlb_rpn;
             lu->size = tlb_size;
             lu->err = ERR_HIT;
             lu->idx = i;
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index 6c64946398..78ca265b04 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -90,7 +90,6 @@ typedef struct DisasContext {
     uint32_t jmp_pc;
 
     int abort_at_next_insn;
-    int nr_nops;
     struct TranslationBlock *tb;
     int singlestep_enabled;
 } DisasContext;
@@ -1576,17 +1575,12 @@ static inline void decode(DisasContext *dc, uint32_t ir)
     dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
 
-    if (dc->ir)
-        dc->nr_nops = 0;
-    else {
+    if (ir == 0) {
         trap_illegal(dc, dc->cpu->env.pvr.regs[2] & PVR2_OPCODE_0x0_ILL_MASK);
-
-        LOG_DIS("nr_nops=%d\t", dc->nr_nops);
-        dc->nr_nops++;
-        if (dc->nr_nops > 4) {
-            cpu_abort(CPU(dc->cpu), "fetching nop sequence\n");
-        }
+        /* Don't decode nop/zero instructions any further.  */
+        return;
     }
+
     /* bit 2 seems to indicate insn type.  */
     dc->type_b = ir & (1 << 29);
 
@@ -1633,7 +1627,6 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
     dc->singlestep_enabled = cs->singlestep_enabled;
     dc->cpustate_changed = 0;
     dc->abort_at_next_insn = 0;
-    dc->nr_nops = 0;
 
     if (pc_start & 3) {
         cpu_abort(cs, "Microblaze: unaligned PC=%x\n", pc_start);
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 607afe5bed..ca91da26cb 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -39,6 +39,8 @@ SYSEMU_TARGET_LIST := $(subst -softmmu.mak,,$(notdir \
 
 check-unit-y = tests/check-qdict$(EXESUF)
 gcov-files-check-qdict-y = qobject/qdict.c
+check-unit-y = tests/check-block-qdict$(EXESUF)
+gcov-files-check-block-qdict-y = qobject/block-qdict.c
 check-unit-y += tests/test-char$(EXESUF)
 gcov-files-check-qdict-y = chardev/char.c
 check-unit-y += tests/check-qnum$(EXESUF)
@@ -584,6 +586,7 @@ GENERATED_FILES += tests/test-qapi-types.h tests/test-qapi-visit.h \
 test-obj-y = tests/check-qnum.o tests/check-qstring.o tests/check-qdict.o \
 	tests/check-qlist.o tests/check-qnull.o tests/check-qobject.o \
 	tests/check-qjson.o tests/check-qlit.o \
+	tests/check-block-qtest.o \
 	tests/test-coroutine.o tests/test-string-output-visitor.o \
 	tests/test-string-input-visitor.o tests/test-qobject-output-visitor.o \
 	tests/test-clone-visitor.o \
@@ -614,6 +617,7 @@ test-block-obj-y = $(block-obj-y) $(test-io-obj-y) tests/iothread.o
 tests/check-qnum$(EXESUF): tests/check-qnum.o $(test-util-obj-y)
 tests/check-qstring$(EXESUF): tests/check-qstring.o $(test-util-obj-y)
 tests/check-qdict$(EXESUF): tests/check-qdict.o $(test-util-obj-y)
+tests/check-block-qdict$(EXESUF): tests/check-block-qdict.o $(test-util-obj-y)
 tests/check-qlist$(EXESUF): tests/check-qlist.o $(test-util-obj-y)
 tests/check-qnull$(EXESUF): tests/check-qnull.o $(test-util-obj-y)
 tests/check-qobject$(EXESUF): tests/check-qobject.o $(test-util-obj-y)
diff --git a/tests/ahci-test.c b/tests/ahci-test.c
index 1a7b761304..937ed2f910 100644
--- a/tests/ahci-test.c
+++ b/tests/ahci-test.c
@@ -180,12 +180,12 @@ static AHCIQState *ahci_boot(const char *cli, ...)
         s = ahci_vboot(cli, ap);
         va_end(ap);
     } else {
-        cli = "-drive if=none,id=drive0,file=%s,cache=writeback,serial=%s"
-            ",format=%s"
+        cli = "-drive if=none,id=drive0,file=%s,cache=writeback,format=%s"
             " -M q35 "
             "-device ide-hd,drive=drive0 "
+            "-global ide-hd.serial=%s "
             "-global ide-hd.ver=%s";
-        s = ahci_boot(cli, tmp_path, "testdisk", imgfmt, "version");
+        s = ahci_boot(cli, tmp_path, imgfmt, "testdisk", "version");
     }
 
     return s;
diff --git a/tests/check-block-qdict.c b/tests/check-block-qdict.c
new file mode 100644
index 0000000000..1d20fccbd4
--- /dev/null
+++ b/tests/check-block-qdict.c
@@ -0,0 +1,690 @@
+/*
+ * Unit-tests for Block layer QDict extras
+ *
+ * Copyright (c) 2013-2018 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "block/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/error.h"
+
+static void qdict_defaults_test(void)
+{
+    QDict *dict, *copy;
+
+    dict = qdict_new();
+    copy = qdict_new();
+
+    qdict_set_default_str(dict, "foo", "abc");
+    qdict_set_default_str(dict, "foo", "def");
+    g_assert_cmpstr(qdict_get_str(dict, "foo"), ==, "abc");
+    qdict_set_default_str(dict, "bar", "ghi");
+
+    qdict_copy_default(copy, dict, "foo");
+    g_assert_cmpstr(qdict_get_str(copy, "foo"), ==, "abc");
+    qdict_set_default_str(copy, "bar", "xyz");
+    qdict_copy_default(copy, dict, "bar");
+    g_assert_cmpstr(qdict_get_str(copy, "bar"), ==, "xyz");
+
+    qobject_unref(copy);
+    qobject_unref(dict);
+}
+
+static void qdict_flatten_test(void)
+{
+    QList *e_1 = qlist_new();
+    QList *e = qlist_new();
+    QDict *e_1_2 = qdict_new();
+    QDict *f = qdict_new();
+    QList *y = qlist_new();
+    QDict *z = qdict_new();
+    QDict *root = qdict_new();
+
+    /*
+     * Test the flattening of
+     *
+     * {
+     *     "e": [
+     *         42,
+     *         [
+     *             23,
+     *             66,
+     *             {
+     *                 "a": 0,
+     *                 "b": 1
+     *             }
+     *         ]
+     *     ],
+     *     "f": {
+     *         "c": 2,
+     *         "d": 3,
+     *     },
+     *     "g": 4,
+     *     "y": [{}],
+     *     "z": {"a": []}
+     * }
+     *
+     * to
+     *
+     * {
+     *     "e.0": 42,
+     *     "e.1.0": 23,
+     *     "e.1.1": 66,
+     *     "e.1.2.a": 0,
+     *     "e.1.2.b": 1,
+     *     "f.c": 2,
+     *     "f.d": 3,
+     *     "g": 4,
+     *     "y.0": {},
+     *     "z.a": []
+     * }
+     */
+
+    qdict_put_int(e_1_2, "a", 0);
+    qdict_put_int(e_1_2, "b", 1);
+
+    qlist_append_int(e_1, 23);
+    qlist_append_int(e_1, 66);
+    qlist_append(e_1, e_1_2);
+    qlist_append_int(e, 42);
+    qlist_append(e, e_1);
+
+    qdict_put_int(f, "c", 2);
+    qdict_put_int(f, "d", 3);
+
+    qlist_append(y, qdict_new());
+
+    qdict_put(z, "a", qlist_new());
+
+    qdict_put(root, "e", e);
+    qdict_put(root, "f", f);
+    qdict_put_int(root, "g", 4);
+    qdict_put(root, "y", y);
+    qdict_put(root, "z", z);
+
+    qdict_flatten(root);
+
+    g_assert(qdict_get_int(root, "e.0") == 42);
+    g_assert(qdict_get_int(root, "e.1.0") == 23);
+    g_assert(qdict_get_int(root, "e.1.1") == 66);
+    g_assert(qdict_get_int(root, "e.1.2.a") == 0);
+    g_assert(qdict_get_int(root, "e.1.2.b") == 1);
+    g_assert(qdict_get_int(root, "f.c") == 2);
+    g_assert(qdict_get_int(root, "f.d") == 3);
+    g_assert(qdict_get_int(root, "g") == 4);
+    g_assert(!qdict_size(qdict_get_qdict(root, "y.0")));
+    g_assert(qlist_empty(qdict_get_qlist(root, "z.a")));
+
+    g_assert(qdict_size(root) == 10);
+
+    qobject_unref(root);
+}
+
+static void qdict_array_split_test(void)
+{
+    QDict *test_dict = qdict_new();
+    QDict *dict1, *dict2;
+    QNum *int1;
+    QList *test_list;
+
+    /*
+     * Test the split of
+     *
+     * {
+     *     "1.x": 0,
+     *     "4.y": 1,
+     *     "0.a": 42,
+     *     "o.o": 7,
+     *     "0.b": 23,
+     *     "2": 66
+     * }
+     *
+     * to
+     *
+     * [
+     *     {
+     *         "a": 42,
+     *         "b": 23
+     *     },
+     *     {
+     *         "x": 0
+     *     },
+     *     66
+     * ]
+     *
+     * and
+     *
+     * {
+     *     "4.y": 1,
+     *     "o.o": 7
+     * }
+     *
+     * (remaining in the old QDict)
+     *
+     * This example is given in the comment of qdict_array_split().
+     */
+
+    qdict_put_int(test_dict, "1.x", 0);
+    qdict_put_int(test_dict, "4.y", 1);
+    qdict_put_int(test_dict, "0.a", 42);
+    qdict_put_int(test_dict, "o.o", 7);
+    qdict_put_int(test_dict, "0.b", 23);
+    qdict_put_int(test_dict, "2", 66);
+
+    qdict_array_split(test_dict, &test_list);
+
+    dict1 = qobject_to(QDict, qlist_pop(test_list));
+    dict2 = qobject_to(QDict, qlist_pop(test_list));
+    int1 = qobject_to(QNum, qlist_pop(test_list));
+
+    g_assert(dict1);
+    g_assert(dict2);
+    g_assert(int1);
+    g_assert(qlist_empty(test_list));
+
+    qobject_unref(test_list);
+
+    g_assert(qdict_get_int(dict1, "a") == 42);
+    g_assert(qdict_get_int(dict1, "b") == 23);
+
+    g_assert(qdict_size(dict1) == 2);
+
+    qobject_unref(dict1);
+
+    g_assert(qdict_get_int(dict2, "x") == 0);
+
+    g_assert(qdict_size(dict2) == 1);
+
+    qobject_unref(dict2);
+
+    g_assert_cmpint(qnum_get_int(int1), ==, 66);
+
+    qobject_unref(int1);
+
+    g_assert(qdict_get_int(test_dict, "4.y") == 1);
+    g_assert(qdict_get_int(test_dict, "o.o") == 7);
+
+    g_assert(qdict_size(test_dict) == 2);
+
+    qobject_unref(test_dict);
+
+    /*
+     * Test the split of
+     *
+     * {
+     *     "0": 42,
+     *     "1": 23,
+     *     "1.x": 84
+     * }
+     *
+     * to
+     *
+     * [
+     *     42
+     * ]
+     *
+     * and
+     *
+     * {
+     *     "1": 23,
+     *     "1.x": 84
+     * }
+     *
+     * That is, test whether splitting stops if there is both an entry with key
+     * of "%u" and other entries with keys prefixed "%u." for the same index.
+     */
+
+    test_dict = qdict_new();
+
+    qdict_put_int(test_dict, "0", 42);
+    qdict_put_int(test_dict, "1", 23);
+    qdict_put_int(test_dict, "1.x", 84);
+
+    qdict_array_split(test_dict, &test_list);
+
+    int1 = qobject_to(QNum, qlist_pop(test_list));
+
+    g_assert(int1);
+    g_assert(qlist_empty(test_list));
+
+    qobject_unref(test_list);
+
+    g_assert_cmpint(qnum_get_int(int1), ==, 42);
+
+    qobject_unref(int1);
+
+    g_assert(qdict_get_int(test_dict, "1") == 23);
+    g_assert(qdict_get_int(test_dict, "1.x") == 84);
+
+    g_assert(qdict_size(test_dict) == 2);
+
+    qobject_unref(test_dict);
+}
+
+static void qdict_array_entries_test(void)
+{
+    QDict *dict = qdict_new();
+
+    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 0);
+
+    qdict_put_int(dict, "bar", 0);
+    qdict_put_int(dict, "baz.0", 0);
+    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 0);
+
+    qdict_put_int(dict, "foo.1", 0);
+    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, -EINVAL);
+    qdict_put_int(dict, "foo.0", 0);
+    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 2);
+    qdict_put_int(dict, "foo.bar", 0);
+    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, -EINVAL);
+    qdict_del(dict, "foo.bar");
+
+    qdict_put_int(dict, "foo.2.a", 0);
+    qdict_put_int(dict, "foo.2.b", 0);
+    qdict_put_int(dict, "foo.2.c", 0);
+    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 3);
+    g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL);
+
+    qobject_unref(dict);
+
+    dict = qdict_new();
+    qdict_put_int(dict, "1", 0);
+    g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL);
+    qdict_put_int(dict, "0", 0);
+    g_assert_cmpint(qdict_array_entries(dict, ""), ==, 2);
+    qdict_put_int(dict, "bar", 0);
+    g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL);
+    qdict_del(dict, "bar");
+
+    qdict_put_int(dict, "2.a", 0);
+    qdict_put_int(dict, "2.b", 0);
+    qdict_put_int(dict, "2.c", 0);
+    g_assert_cmpint(qdict_array_entries(dict, ""), ==, 3);
+
+    qobject_unref(dict);
+}
+
+static void qdict_join_test(void)
+{
+    QDict *dict1, *dict2;
+    bool overwrite = false;
+    int i;
+
+    dict1 = qdict_new();
+    dict2 = qdict_new();
+
+    /* Test everything once without overwrite and once with */
+    do {
+        /* Test empty dicts */
+        qdict_join(dict1, dict2, overwrite);
+
+        g_assert(qdict_size(dict1) == 0);
+        g_assert(qdict_size(dict2) == 0);
+
+        /* First iteration: Test movement */
+        /* Second iteration: Test empty source and non-empty destination */
+        qdict_put_int(dict2, "foo", 42);
+
+        for (i = 0; i < 2; i++) {
+            qdict_join(dict1, dict2, overwrite);
+
+            g_assert(qdict_size(dict1) == 1);
+            g_assert(qdict_size(dict2) == 0);
+
+            g_assert(qdict_get_int(dict1, "foo") == 42);
+        }
+
+        /* Test non-empty source and destination without conflict */
+        qdict_put_int(dict2, "bar", 23);
+
+        qdict_join(dict1, dict2, overwrite);
+
+        g_assert(qdict_size(dict1) == 2);
+        g_assert(qdict_size(dict2) == 0);
+
+        g_assert(qdict_get_int(dict1, "foo") == 42);
+        g_assert(qdict_get_int(dict1, "bar") == 23);
+
+        /* Test conflict */
+        qdict_put_int(dict2, "foo", 84);
+
+        qdict_join(dict1, dict2, overwrite);
+
+        g_assert(qdict_size(dict1) == 2);
+        g_assert(qdict_size(dict2) == !overwrite);
+
+        g_assert(qdict_get_int(dict1, "foo") == (overwrite ? 84 : 42));
+        g_assert(qdict_get_int(dict1, "bar") == 23);
+
+        if (!overwrite) {
+            g_assert(qdict_get_int(dict2, "foo") == 84);
+        }
+
+        /* Check the references */
+        g_assert(qdict_get(dict1, "foo")->base.refcnt == 1);
+        g_assert(qdict_get(dict1, "bar")->base.refcnt == 1);
+
+        if (!overwrite) {
+            g_assert(qdict_get(dict2, "foo")->base.refcnt == 1);
+        }
+
+        /* Clean up */
+        qdict_del(dict1, "foo");
+        qdict_del(dict1, "bar");
+
+        if (!overwrite) {
+            qdict_del(dict2, "foo");
+        }
+    } while (overwrite ^= true);
+
+    qobject_unref(dict1);
+    qobject_unref(dict2);
+}
+
+static void qdict_crumple_test_recursive(void)
+{
+    QDict *src, *dst, *rule, *vnc, *acl, *listen;
+    QDict *empty, *empty_dict, *empty_list_0;
+    QList *rules, *empty_list, *empty_dict_a;
+
+    src = qdict_new();
+    qdict_put_str(src, "vnc.listen.addr", "127.0.0.1");
+    qdict_put_str(src, "vnc.listen.port", "5901");
+    qdict_put_str(src, "vnc.acl.rules.0.match", "fred");
+    qdict_put_str(src, "vnc.acl.rules.0.policy", "allow");
+    qdict_put_str(src, "vnc.acl.rules.1.match", "bob");
+    qdict_put_str(src, "vnc.acl.rules.1.policy", "deny");
+    qdict_put_str(src, "vnc.acl.default", "deny");
+    qdict_put_str(src, "vnc.acl..name", "acl0");
+    qdict_put_str(src, "vnc.acl.rule..name", "acl0");
+    qdict_put(src, "empty.dict.a", qlist_new());
+    qdict_put(src, "empty.list.0", qdict_new());
+
+    dst = qobject_to(QDict, qdict_crumple(src, &error_abort));
+    g_assert(dst);
+    g_assert_cmpint(qdict_size(dst), ==, 2);
+
+    vnc = qdict_get_qdict(dst, "vnc");
+    g_assert(vnc);
+    g_assert_cmpint(qdict_size(vnc), ==, 3);
+
+    listen = qdict_get_qdict(vnc, "listen");
+    g_assert(listen);
+    g_assert_cmpint(qdict_size(listen), ==, 2);
+    g_assert_cmpstr("127.0.0.1", ==, qdict_get_str(listen, "addr"));
+    g_assert_cmpstr("5901", ==, qdict_get_str(listen, "port"));
+
+    acl = qdict_get_qdict(vnc, "acl");
+    g_assert(acl);
+    g_assert_cmpint(qdict_size(acl), ==, 3);
+
+    rules = qdict_get_qlist(acl, "rules");
+    g_assert(rules);
+    g_assert_cmpint(qlist_size(rules), ==, 2);
+
+    rule = qobject_to(QDict, qlist_pop(rules));
+    g_assert(rule);
+    g_assert_cmpint(qdict_size(rule), ==, 2);
+    g_assert_cmpstr("fred", ==, qdict_get_str(rule, "match"));
+    g_assert_cmpstr("allow", ==, qdict_get_str(rule, "policy"));
+    qobject_unref(rule);
+
+    rule = qobject_to(QDict, qlist_pop(rules));
+    g_assert(rule);
+    g_assert_cmpint(qdict_size(rule), ==, 2);
+    g_assert_cmpstr("bob", ==, qdict_get_str(rule, "match"));
+    g_assert_cmpstr("deny", ==, qdict_get_str(rule, "policy"));
+    qobject_unref(rule);
+
+    /* With recursive crumpling, we should see all names unescaped */
+    g_assert_cmpstr("acl0", ==, qdict_get_str(vnc, "acl.name"));
+    g_assert_cmpstr("acl0", ==, qdict_get_str(acl, "rule.name"));
+
+    empty = qdict_get_qdict(dst, "empty");
+    g_assert(empty);
+    g_assert_cmpint(qdict_size(empty), ==, 2);
+    empty_dict = qdict_get_qdict(empty, "dict");
+    g_assert(empty_dict);
+    g_assert_cmpint(qdict_size(empty_dict), ==, 1);
+    empty_dict_a = qdict_get_qlist(empty_dict, "a");
+    g_assert(empty_dict_a && qlist_empty(empty_dict_a));
+    empty_list = qdict_get_qlist(empty, "list");
+    g_assert(empty_list);
+    g_assert_cmpint(qlist_size(empty_list), ==, 1);
+    empty_list_0 = qobject_to(QDict, qlist_pop(empty_list));
+    g_assert(empty_list_0);
+    g_assert_cmpint(qdict_size(empty_list_0), ==, 0);
+
+    qobject_unref(src);
+    qobject_unref(dst);
+}
+
+static void qdict_crumple_test_empty(void)
+{
+    QDict *src, *dst;
+
+    src = qdict_new();
+
+    dst = qobject_to(QDict, qdict_crumple(src, &error_abort));
+
+    g_assert_cmpint(qdict_size(dst), ==, 0);
+
+    qobject_unref(src);
+    qobject_unref(dst);
+}
+
+static int qdict_count_entries(QDict *dict)
+{
+    const QDictEntry *e;
+    int count = 0;
+
+    for (e = qdict_first(dict); e; e = qdict_next(dict, e)) {
+        count++;
+    }
+
+    return count;
+}
+
+static void qdict_rename_keys_test(void)
+{
+    QDict *dict = qdict_new();
+    QDict *copy;
+    QDictRenames *renames;
+    Error *local_err = NULL;
+
+    qdict_put_str(dict, "abc", "foo");
+    qdict_put_str(dict, "abcdef", "bar");
+    qdict_put_int(dict, "number", 42);
+    qdict_put_bool(dict, "flag", true);
+    qdict_put_null(dict, "nothing");
+
+    /* Empty rename list */
+    renames = (QDictRenames[]) {
+        { NULL, "this can be anything" }
+    };
+    copy = qdict_clone_shallow(dict);
+    qdict_rename_keys(copy, renames, &error_abort);
+
+    g_assert_cmpstr(qdict_get_str(copy, "abc"), ==, "foo");
+    g_assert_cmpstr(qdict_get_str(copy, "abcdef"), ==, "bar");
+    g_assert_cmpint(qdict_get_int(copy, "number"), ==, 42);
+    g_assert_cmpint(qdict_get_bool(copy, "flag"), ==, true);
+    g_assert(qobject_type(qdict_get(copy, "nothing")) == QTYPE_QNULL);
+    g_assert_cmpint(qdict_count_entries(copy), ==, 5);
+
+    qobject_unref(copy);
+
+    /* Simple rename of all entries */
+    renames = (QDictRenames[]) {
+        { "abc",        "str1" },
+        { "abcdef",     "str2" },
+        { "number",     "int" },
+        { "flag",       "bool" },
+        { "nothing",    "null" },
+        { NULL , NULL }
+    };
+    copy = qdict_clone_shallow(dict);
+    qdict_rename_keys(copy, renames, &error_abort);
+
+    g_assert(!qdict_haskey(copy, "abc"));
+    g_assert(!qdict_haskey(copy, "abcdef"));
+    g_assert(!qdict_haskey(copy, "number"));
+    g_assert(!qdict_haskey(copy, "flag"));
+    g_assert(!qdict_haskey(copy, "nothing"));
+
+    g_assert_cmpstr(qdict_get_str(copy, "str1"), ==, "foo");
+    g_assert_cmpstr(qdict_get_str(copy, "str2"), ==, "bar");
+    g_assert_cmpint(qdict_get_int(copy, "int"), ==, 42);
+    g_assert_cmpint(qdict_get_bool(copy, "bool"), ==, true);
+    g_assert(qobject_type(qdict_get(copy, "null")) == QTYPE_QNULL);
+    g_assert_cmpint(qdict_count_entries(copy), ==, 5);
+
+    qobject_unref(copy);
+
+    /* Renames are processed top to bottom */
+    renames = (QDictRenames[]) {
+        { "abc",        "tmp" },
+        { "abcdef",     "abc" },
+        { "number",     "abcdef" },
+        { "flag",       "number" },
+        { "nothing",    "flag" },
+        { "tmp",        "nothing" },
+        { NULL , NULL }
+    };
+    copy = qdict_clone_shallow(dict);
+    qdict_rename_keys(copy, renames, &error_abort);
+
+    g_assert_cmpstr(qdict_get_str(copy, "nothing"), ==, "foo");
+    g_assert_cmpstr(qdict_get_str(copy, "abc"), ==, "bar");
+    g_assert_cmpint(qdict_get_int(copy, "abcdef"), ==, 42);
+    g_assert_cmpint(qdict_get_bool(copy, "number"), ==, true);
+    g_assert(qobject_type(qdict_get(copy, "flag")) == QTYPE_QNULL);
+    g_assert(!qdict_haskey(copy, "tmp"));
+    g_assert_cmpint(qdict_count_entries(copy), ==, 5);
+
+    qobject_unref(copy);
+
+    /* Conflicting rename */
+    renames = (QDictRenames[]) {
+        { "abcdef",     "abc" },
+        { NULL , NULL }
+    };
+    copy = qdict_clone_shallow(dict);
+    qdict_rename_keys(copy, renames, &local_err);
+
+    g_assert(local_err != NULL);
+    error_free(local_err);
+    local_err = NULL;
+
+    g_assert_cmpstr(qdict_get_str(copy, "abc"), ==, "foo");
+    g_assert_cmpstr(qdict_get_str(copy, "abcdef"), ==, "bar");
+    g_assert_cmpint(qdict_get_int(copy, "number"), ==, 42);
+    g_assert_cmpint(qdict_get_bool(copy, "flag"), ==, true);
+    g_assert(qobject_type(qdict_get(copy, "nothing")) == QTYPE_QNULL);
+    g_assert_cmpint(qdict_count_entries(copy), ==, 5);
+
+    qobject_unref(copy);
+
+    /* Renames in an empty dict */
+    renames = (QDictRenames[]) {
+        { "abcdef",     "abc" },
+        { NULL , NULL }
+    };
+
+    qobject_unref(dict);
+    dict = qdict_new();
+
+    qdict_rename_keys(dict, renames, &error_abort);
+    g_assert(qdict_first(dict) == NULL);
+
+    qobject_unref(dict);
+}
+
+static void qdict_crumple_test_bad_inputs(void)
+{
+    QDict *src, *nested;
+    Error *error = NULL;
+
+    src = qdict_new();
+    /* rule.0 can't be both a string and a dict */
+    qdict_put_str(src, "rule.0", "fred");
+    qdict_put_str(src, "rule.0.policy", "allow");
+
+    g_assert(qdict_crumple(src, &error) == NULL);
+    g_assert(error != NULL);
+    error_free(error);
+    error = NULL;
+    qobject_unref(src);
+
+    src = qdict_new();
+    /* rule can't be both a list and a dict */
+    qdict_put_str(src, "rule.0", "fred");
+    qdict_put_str(src, "rule.a", "allow");
+
+    g_assert(qdict_crumple(src, &error) == NULL);
+    g_assert(error != NULL);
+    error_free(error);
+    error = NULL;
+    qobject_unref(src);
+
+    src = qdict_new();
+    /* The input should be flat, ie no dicts or lists */
+    nested = qdict_new();
+    qdict_put(nested, "x", qdict_new());
+    qdict_put(src, "rule.a", nested);
+    qdict_put_str(src, "rule.b", "allow");
+
+    g_assert(qdict_crumple(src, &error) == NULL);
+    g_assert(error != NULL);
+    error_free(error);
+    error = NULL;
+    qobject_unref(src);
+
+    src = qdict_new();
+    /* List indexes must not have gaps */
+    qdict_put_str(src, "rule.0", "deny");
+    qdict_put_str(src, "rule.3", "allow");
+
+    g_assert(qdict_crumple(src, &error) == NULL);
+    g_assert(error != NULL);
+    error_free(error);
+    error = NULL;
+    qobject_unref(src);
+
+    src = qdict_new();
+    /* List indexes must be in %zu format */
+    qdict_put_str(src, "rule.0", "deny");
+    qdict_put_str(src, "rule.+1", "allow");
+
+    g_assert(qdict_crumple(src, &error) == NULL);
+    g_assert(error != NULL);
+    error_free(error);
+    error = NULL;
+    qobject_unref(src);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    g_test_add_func("/public/defaults", qdict_defaults_test);
+    g_test_add_func("/public/flatten", qdict_flatten_test);
+    g_test_add_func("/public/array_split", qdict_array_split_test);
+    g_test_add_func("/public/array_entries", qdict_array_entries_test);
+    g_test_add_func("/public/join", qdict_join_test);
+    g_test_add_func("/public/crumple/recursive",
+                    qdict_crumple_test_recursive);
+    g_test_add_func("/public/crumple/empty",
+                    qdict_crumple_test_empty);
+    g_test_add_func("/public/crumple/bad_inputs",
+                    qdict_crumple_test_bad_inputs);
+
+    g_test_add_func("/public/rename_keys", qdict_rename_keys_test);
+
+    return g_test_run();
+}
diff --git a/tests/check-qdict.c b/tests/check-qdict.c
index eba5d3528e..86e9fe7dc4 100644
--- a/tests/check-qdict.c
+++ b/tests/check-qdict.c
@@ -12,11 +12,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qlist.h"
-#include "qapi/qmp/qnum.h"
-#include "qapi/qmp/qstring.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
 
 /*
  * Public Interface test-cases
@@ -156,28 +151,6 @@ static void qdict_get_try_str_test(void)
     qobject_unref(tests_dict);
 }
 
-static void qdict_defaults_test(void)
-{
-    QDict *dict, *copy;
-
-    dict = qdict_new();
-    copy = qdict_new();
-
-    qdict_set_default_str(dict, "foo", "abc");
-    qdict_set_default_str(dict, "foo", "def");
-    g_assert_cmpstr(qdict_get_str(dict, "foo"), ==, "abc");
-    qdict_set_default_str(dict, "bar", "ghi");
-
-    qdict_copy_default(copy, dict, "foo");
-    g_assert_cmpstr(qdict_get_str(copy, "foo"), ==, "abc");
-    qdict_set_default_str(copy, "bar", "xyz");
-    qdict_copy_default(copy, dict, "bar");
-    g_assert_cmpstr(qdict_get_str(copy, "bar"), ==, "xyz");
-
-    qobject_unref(copy);
-    qobject_unref(dict);
-}
-
 static void qdict_haskey_not_test(void)
 {
     QDict *tests_dict = qdict_new();
@@ -253,606 +226,6 @@ static void qdict_iterapi_test(void)
     qobject_unref(tests_dict);
 }
 
-static void qdict_flatten_test(void)
-{
-    QList *list1 = qlist_new();
-    QList *list2 = qlist_new();
-    QDict *dict1 = qdict_new();
-    QDict *dict2 = qdict_new();
-    QDict *dict3 = qdict_new();
-
-    /*
-     * Test the flattening of
-     *
-     * {
-     *     "e": [
-     *         42,
-     *         [
-     *             23,
-     *             66,
-     *             {
-     *                 "a": 0,
-     *                 "b": 1
-     *             }
-     *         ]
-     *     ],
-     *     "f": {
-     *         "c": 2,
-     *         "d": 3,
-     *     },
-     *     "g": 4
-     * }
-     *
-     * to
-     *
-     * {
-     *     "e.0": 42,
-     *     "e.1.0": 23,
-     *     "e.1.1": 66,
-     *     "e.1.2.a": 0,
-     *     "e.1.2.b": 1,
-     *     "f.c": 2,
-     *     "f.d": 3,
-     *     "g": 4
-     * }
-     */
-
-    qdict_put_int(dict1, "a", 0);
-    qdict_put_int(dict1, "b", 1);
-
-    qlist_append_int(list1, 23);
-    qlist_append_int(list1, 66);
-    qlist_append(list1, dict1);
-    qlist_append_int(list2, 42);
-    qlist_append(list2, list1);
-
-    qdict_put_int(dict2, "c", 2);
-    qdict_put_int(dict2, "d", 3);
-    qdict_put(dict3, "e", list2);
-    qdict_put(dict3, "f", dict2);
-    qdict_put_int(dict3, "g", 4);
-
-    qdict_flatten(dict3);
-
-    g_assert(qdict_get_int(dict3, "e.0") == 42);
-    g_assert(qdict_get_int(dict3, "e.1.0") == 23);
-    g_assert(qdict_get_int(dict3, "e.1.1") == 66);
-    g_assert(qdict_get_int(dict3, "e.1.2.a") == 0);
-    g_assert(qdict_get_int(dict3, "e.1.2.b") == 1);
-    g_assert(qdict_get_int(dict3, "f.c") == 2);
-    g_assert(qdict_get_int(dict3, "f.d") == 3);
-    g_assert(qdict_get_int(dict3, "g") == 4);
-
-    g_assert(qdict_size(dict3) == 8);
-
-    qobject_unref(dict3);
-}
-
-static void qdict_array_split_test(void)
-{
-    QDict *test_dict = qdict_new();
-    QDict *dict1, *dict2;
-    QNum *int1;
-    QList *test_list;
-
-    /*
-     * Test the split of
-     *
-     * {
-     *     "1.x": 0,
-     *     "4.y": 1,
-     *     "0.a": 42,
-     *     "o.o": 7,
-     *     "0.b": 23,
-     *     "2": 66
-     * }
-     *
-     * to
-     *
-     * [
-     *     {
-     *         "a": 42,
-     *         "b": 23
-     *     },
-     *     {
-     *         "x": 0
-     *     },
-     *     66
-     * ]
-     *
-     * and
-     *
-     * {
-     *     "4.y": 1,
-     *     "o.o": 7
-     * }
-     *
-     * (remaining in the old QDict)
-     *
-     * This example is given in the comment of qdict_array_split().
-     */
-
-    qdict_put_int(test_dict, "1.x", 0);
-    qdict_put_int(test_dict, "4.y", 1);
-    qdict_put_int(test_dict, "0.a", 42);
-    qdict_put_int(test_dict, "o.o", 7);
-    qdict_put_int(test_dict, "0.b", 23);
-    qdict_put_int(test_dict, "2", 66);
-
-    qdict_array_split(test_dict, &test_list);
-
-    dict1 = qobject_to(QDict, qlist_pop(test_list));
-    dict2 = qobject_to(QDict, qlist_pop(test_list));
-    int1 = qobject_to(QNum, qlist_pop(test_list));
-
-    g_assert(dict1);
-    g_assert(dict2);
-    g_assert(int1);
-    g_assert(qlist_empty(test_list));
-
-    qobject_unref(test_list);
-
-    g_assert(qdict_get_int(dict1, "a") == 42);
-    g_assert(qdict_get_int(dict1, "b") == 23);
-
-    g_assert(qdict_size(dict1) == 2);
-
-    qobject_unref(dict1);
-
-    g_assert(qdict_get_int(dict2, "x") == 0);
-
-    g_assert(qdict_size(dict2) == 1);
-
-    qobject_unref(dict2);
-
-    g_assert_cmpint(qnum_get_int(int1), ==, 66);
-
-    qobject_unref(int1);
-
-    g_assert(qdict_get_int(test_dict, "4.y") == 1);
-    g_assert(qdict_get_int(test_dict, "o.o") == 7);
-
-    g_assert(qdict_size(test_dict) == 2);
-
-    qobject_unref(test_dict);
-
-    /*
-     * Test the split of
-     *
-     * {
-     *     "0": 42,
-     *     "1": 23,
-     *     "1.x": 84
-     * }
-     *
-     * to
-     *
-     * [
-     *     42
-     * ]
-     *
-     * and
-     *
-     * {
-     *     "1": 23,
-     *     "1.x": 84
-     * }
-     *
-     * That is, test whether splitting stops if there is both an entry with key
-     * of "%u" and other entries with keys prefixed "%u." for the same index.
-     */
-
-    test_dict = qdict_new();
-
-    qdict_put_int(test_dict, "0", 42);
-    qdict_put_int(test_dict, "1", 23);
-    qdict_put_int(test_dict, "1.x", 84);
-
-    qdict_array_split(test_dict, &test_list);
-
-    int1 = qobject_to(QNum, qlist_pop(test_list));
-
-    g_assert(int1);
-    g_assert(qlist_empty(test_list));
-
-    qobject_unref(test_list);
-
-    g_assert_cmpint(qnum_get_int(int1), ==, 42);
-
-    qobject_unref(int1);
-
-    g_assert(qdict_get_int(test_dict, "1") == 23);
-    g_assert(qdict_get_int(test_dict, "1.x") == 84);
-
-    g_assert(qdict_size(test_dict) == 2);
-
-    qobject_unref(test_dict);
-}
-
-static void qdict_array_entries_test(void)
-{
-    QDict *dict = qdict_new();
-
-    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 0);
-
-    qdict_put_int(dict, "bar", 0);
-    qdict_put_int(dict, "baz.0", 0);
-    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 0);
-
-    qdict_put_int(dict, "foo.1", 0);
-    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, -EINVAL);
-    qdict_put_int(dict, "foo.0", 0);
-    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 2);
-    qdict_put_int(dict, "foo.bar", 0);
-    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, -EINVAL);
-    qdict_del(dict, "foo.bar");
-
-    qdict_put_int(dict, "foo.2.a", 0);
-    qdict_put_int(dict, "foo.2.b", 0);
-    qdict_put_int(dict, "foo.2.c", 0);
-    g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 3);
-    g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL);
-
-    qobject_unref(dict);
-
-    dict = qdict_new();
-    qdict_put_int(dict, "1", 0);
-    g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL);
-    qdict_put_int(dict, "0", 0);
-    g_assert_cmpint(qdict_array_entries(dict, ""), ==, 2);
-    qdict_put_int(dict, "bar", 0);
-    g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL);
-    qdict_del(dict, "bar");
-
-    qdict_put_int(dict, "2.a", 0);
-    qdict_put_int(dict, "2.b", 0);
-    qdict_put_int(dict, "2.c", 0);
-    g_assert_cmpint(qdict_array_entries(dict, ""), ==, 3);
-
-    qobject_unref(dict);
-}
-
-static void qdict_join_test(void)
-{
-    QDict *dict1, *dict2;
-    bool overwrite = false;
-    int i;
-
-    dict1 = qdict_new();
-    dict2 = qdict_new();
-
-    /* Test everything once without overwrite and once with */
-    do
-    {
-        /* Test empty dicts */
-        qdict_join(dict1, dict2, overwrite);
-
-        g_assert(qdict_size(dict1) == 0);
-        g_assert(qdict_size(dict2) == 0);
-
-        /* First iteration: Test movement */
-        /* Second iteration: Test empty source and non-empty destination */
-        qdict_put_int(dict2, "foo", 42);
-
-        for (i = 0; i < 2; i++) {
-            qdict_join(dict1, dict2, overwrite);
-
-            g_assert(qdict_size(dict1) == 1);
-            g_assert(qdict_size(dict2) == 0);
-
-            g_assert(qdict_get_int(dict1, "foo") == 42);
-        }
-
-        /* Test non-empty source and destination without conflict */
-        qdict_put_int(dict2, "bar", 23);
-
-        qdict_join(dict1, dict2, overwrite);
-
-        g_assert(qdict_size(dict1) == 2);
-        g_assert(qdict_size(dict2) == 0);
-
-        g_assert(qdict_get_int(dict1, "foo") == 42);
-        g_assert(qdict_get_int(dict1, "bar") == 23);
-
-        /* Test conflict */
-        qdict_put_int(dict2, "foo", 84);
-
-        qdict_join(dict1, dict2, overwrite);
-
-        g_assert(qdict_size(dict1) == 2);
-        g_assert(qdict_size(dict2) == !overwrite);
-
-        g_assert(qdict_get_int(dict1, "foo") == (overwrite ? 84 : 42));
-        g_assert(qdict_get_int(dict1, "bar") == 23);
-
-        if (!overwrite) {
-            g_assert(qdict_get_int(dict2, "foo") == 84);
-        }
-
-        /* Check the references */
-        g_assert(qdict_get(dict1, "foo")->base.refcnt == 1);
-        g_assert(qdict_get(dict1, "bar")->base.refcnt == 1);
-
-        if (!overwrite) {
-            g_assert(qdict_get(dict2, "foo")->base.refcnt == 1);
-        }
-
-        /* Clean up */
-        qdict_del(dict1, "foo");
-        qdict_del(dict1, "bar");
-
-        if (!overwrite) {
-            qdict_del(dict2, "foo");
-        }
-    }
-    while (overwrite ^= true);
-
-    qobject_unref(dict1);
-    qobject_unref(dict2);
-}
-
-static void qdict_crumple_test_recursive(void)
-{
-    QDict *src, *dst, *rule, *vnc, *acl, *listen;
-    QList *rules;
-
-    src = qdict_new();
-    qdict_put_str(src, "vnc.listen.addr", "127.0.0.1");
-    qdict_put_str(src, "vnc.listen.port", "5901");
-    qdict_put_str(src, "vnc.acl.rules.0.match", "fred");
-    qdict_put_str(src, "vnc.acl.rules.0.policy", "allow");
-    qdict_put_str(src, "vnc.acl.rules.1.match", "bob");
-    qdict_put_str(src, "vnc.acl.rules.1.policy", "deny");
-    qdict_put_str(src, "vnc.acl.default", "deny");
-    qdict_put_str(src, "vnc.acl..name", "acl0");
-    qdict_put_str(src, "vnc.acl.rule..name", "acl0");
-
-    dst = qobject_to(QDict, qdict_crumple(src, &error_abort));
-    g_assert(dst);
-    g_assert_cmpint(qdict_size(dst), ==, 1);
-
-    vnc = qdict_get_qdict(dst, "vnc");
-    g_assert(vnc);
-    g_assert_cmpint(qdict_size(vnc), ==, 3);
-
-    listen = qdict_get_qdict(vnc, "listen");
-    g_assert(listen);
-    g_assert_cmpint(qdict_size(listen), ==, 2);
-    g_assert_cmpstr("127.0.0.1", ==, qdict_get_str(listen, "addr"));
-    g_assert_cmpstr("5901", ==, qdict_get_str(listen, "port"));
-
-    acl = qdict_get_qdict(vnc, "acl");
-    g_assert(acl);
-    g_assert_cmpint(qdict_size(acl), ==, 3);
-
-    rules = qdict_get_qlist(acl, "rules");
-    g_assert(rules);
-    g_assert_cmpint(qlist_size(rules), ==, 2);
-
-    rule = qobject_to(QDict, qlist_pop(rules));
-    g_assert(rule);
-    g_assert_cmpint(qdict_size(rule), ==, 2);
-    g_assert_cmpstr("fred", ==, qdict_get_str(rule, "match"));
-    g_assert_cmpstr("allow", ==, qdict_get_str(rule, "policy"));
-    qobject_unref(rule);
-
-    rule = qobject_to(QDict, qlist_pop(rules));
-    g_assert(rule);
-    g_assert_cmpint(qdict_size(rule), ==, 2);
-    g_assert_cmpstr("bob", ==, qdict_get_str(rule, "match"));
-    g_assert_cmpstr("deny", ==, qdict_get_str(rule, "policy"));
-    qobject_unref(rule);
-
-    /* With recursive crumpling, we should see all names unescaped */
-    g_assert_cmpstr("acl0", ==, qdict_get_str(vnc, "acl.name"));
-    g_assert_cmpstr("acl0", ==, qdict_get_str(acl, "rule.name"));
-
-    qobject_unref(src);
-    qobject_unref(dst);
-}
-
-static void qdict_crumple_test_empty(void)
-{
-    QDict *src, *dst;
-
-    src = qdict_new();
-
-    dst = qobject_to(QDict, qdict_crumple(src, &error_abort));
-
-    g_assert_cmpint(qdict_size(dst), ==, 0);
-
-    qobject_unref(src);
-    qobject_unref(dst);
-}
-
-static int qdict_count_entries(QDict *dict)
-{
-    const QDictEntry *e;
-    int count = 0;
-
-    for (e = qdict_first(dict); e; e = qdict_next(dict, e)) {
-        count++;
-    }
-
-    return count;
-}
-
-static void qdict_rename_keys_test(void)
-{
-    QDict *dict = qdict_new();
-    QDict *copy;
-    QDictRenames *renames;
-    Error *local_err = NULL;
-
-    qdict_put_str(dict, "abc", "foo");
-    qdict_put_str(dict, "abcdef", "bar");
-    qdict_put_int(dict, "number", 42);
-    qdict_put_bool(dict, "flag", true);
-    qdict_put_null(dict, "nothing");
-
-    /* Empty rename list */
-    renames = (QDictRenames[]) {
-        { NULL, "this can be anything" }
-    };
-    copy = qdict_clone_shallow(dict);
-    qdict_rename_keys(copy, renames, &error_abort);
-
-    g_assert_cmpstr(qdict_get_str(copy, "abc"), ==, "foo");
-    g_assert_cmpstr(qdict_get_str(copy, "abcdef"), ==, "bar");
-    g_assert_cmpint(qdict_get_int(copy, "number"), ==, 42);
-    g_assert_cmpint(qdict_get_bool(copy, "flag"), ==, true);
-    g_assert(qobject_type(qdict_get(copy, "nothing")) == QTYPE_QNULL);
-    g_assert_cmpint(qdict_count_entries(copy), ==, 5);
-
-    qobject_unref(copy);
-
-    /* Simple rename of all entries */
-    renames = (QDictRenames[]) {
-        { "abc",        "str1" },
-        { "abcdef",     "str2" },
-        { "number",     "int" },
-        { "flag",       "bool" },
-        { "nothing",    "null" },
-        { NULL , NULL }
-    };
-    copy = qdict_clone_shallow(dict);
-    qdict_rename_keys(copy, renames, &error_abort);
-
-    g_assert(!qdict_haskey(copy, "abc"));
-    g_assert(!qdict_haskey(copy, "abcdef"));
-    g_assert(!qdict_haskey(copy, "number"));
-    g_assert(!qdict_haskey(copy, "flag"));
-    g_assert(!qdict_haskey(copy, "nothing"));
-
-    g_assert_cmpstr(qdict_get_str(copy, "str1"), ==, "foo");
-    g_assert_cmpstr(qdict_get_str(copy, "str2"), ==, "bar");
-    g_assert_cmpint(qdict_get_int(copy, "int"), ==, 42);
-    g_assert_cmpint(qdict_get_bool(copy, "bool"), ==, true);
-    g_assert(qobject_type(qdict_get(copy, "null")) == QTYPE_QNULL);
-    g_assert_cmpint(qdict_count_entries(copy), ==, 5);
-
-    qobject_unref(copy);
-
-    /* Renames are processed top to bottom */
-    renames = (QDictRenames[]) {
-        { "abc",        "tmp" },
-        { "abcdef",     "abc" },
-        { "number",     "abcdef" },
-        { "flag",       "number" },
-        { "nothing",    "flag" },
-        { "tmp",        "nothing" },
-        { NULL , NULL }
-    };
-    copy = qdict_clone_shallow(dict);
-    qdict_rename_keys(copy, renames, &error_abort);
-
-    g_assert_cmpstr(qdict_get_str(copy, "nothing"), ==, "foo");
-    g_assert_cmpstr(qdict_get_str(copy, "abc"), ==, "bar");
-    g_assert_cmpint(qdict_get_int(copy, "abcdef"), ==, 42);
-    g_assert_cmpint(qdict_get_bool(copy, "number"), ==, true);
-    g_assert(qobject_type(qdict_get(copy, "flag")) == QTYPE_QNULL);
-    g_assert(!qdict_haskey(copy, "tmp"));
-    g_assert_cmpint(qdict_count_entries(copy), ==, 5);
-
-    qobject_unref(copy);
-
-    /* Conflicting rename */
-    renames = (QDictRenames[]) {
-        { "abcdef",     "abc" },
-        { NULL , NULL }
-    };
-    copy = qdict_clone_shallow(dict);
-    qdict_rename_keys(copy, renames, &local_err);
-
-    g_assert(local_err != NULL);
-    error_free(local_err);
-    local_err = NULL;
-
-    g_assert_cmpstr(qdict_get_str(copy, "abc"), ==, "foo");
-    g_assert_cmpstr(qdict_get_str(copy, "abcdef"), ==, "bar");
-    g_assert_cmpint(qdict_get_int(copy, "number"), ==, 42);
-    g_assert_cmpint(qdict_get_bool(copy, "flag"), ==, true);
-    g_assert(qobject_type(qdict_get(copy, "nothing")) == QTYPE_QNULL);
-    g_assert_cmpint(qdict_count_entries(copy), ==, 5);
-
-    qobject_unref(copy);
-
-    /* Renames in an empty dict */
-    renames = (QDictRenames[]) {
-        { "abcdef",     "abc" },
-        { NULL , NULL }
-    };
-
-    qobject_unref(dict);
-    dict = qdict_new();
-
-    qdict_rename_keys(dict, renames, &error_abort);
-    g_assert(qdict_first(dict) == NULL);
-
-    qobject_unref(dict);
-}
-
-static void qdict_crumple_test_bad_inputs(void)
-{
-    QDict *src;
-    Error *error = NULL;
-
-    src = qdict_new();
-    /* rule.0 can't be both a string and a dict */
-    qdict_put_str(src, "rule.0", "fred");
-    qdict_put_str(src, "rule.0.policy", "allow");
-
-    g_assert(qdict_crumple(src, &error) == NULL);
-    g_assert(error != NULL);
-    error_free(error);
-    error = NULL;
-    qobject_unref(src);
-
-    src = qdict_new();
-    /* rule can't be both a list and a dict */
-    qdict_put_str(src, "rule.0", "fred");
-    qdict_put_str(src, "rule.a", "allow");
-
-    g_assert(qdict_crumple(src, &error) == NULL);
-    g_assert(error != NULL);
-    error_free(error);
-    error = NULL;
-    qobject_unref(src);
-
-    src = qdict_new();
-    /* The input should be flat, ie no dicts or lists */
-    qdict_put(src, "rule.a", qdict_new());
-    qdict_put_str(src, "rule.b", "allow");
-
-    g_assert(qdict_crumple(src, &error) == NULL);
-    g_assert(error != NULL);
-    error_free(error);
-    error = NULL;
-    qobject_unref(src);
-
-    src = qdict_new();
-    /* List indexes must not have gaps */
-    qdict_put_str(src, "rule.0", "deny");
-    qdict_put_str(src, "rule.3", "allow");
-
-    g_assert(qdict_crumple(src, &error) == NULL);
-    g_assert(error != NULL);
-    error_free(error);
-    error = NULL;
-    qobject_unref(src);
-
-    src = qdict_new();
-    /* List indexes must be in %zu format */
-    qdict_put_str(src, "rule.0", "deny");
-    qdict_put_str(src, "rule.+1", "allow");
-
-    g_assert(qdict_crumple(src, &error) == NULL);
-    g_assert(error != NULL);
-    error_free(error);
-    error = NULL;
-    qobject_unref(src);
-}
-
 /*
  * Errors test-cases
  */
@@ -986,29 +359,15 @@ int main(int argc, char **argv)
     g_test_add_func("/public/get_try_int", qdict_get_try_int_test);
     g_test_add_func("/public/get_str", qdict_get_str_test);
     g_test_add_func("/public/get_try_str", qdict_get_try_str_test);
-    g_test_add_func("/public/defaults", qdict_defaults_test);
     g_test_add_func("/public/haskey_not", qdict_haskey_not_test);
     g_test_add_func("/public/haskey", qdict_haskey_test);
     g_test_add_func("/public/del", qdict_del_test);
     g_test_add_func("/public/to_qdict", qobject_to_qdict_test);
     g_test_add_func("/public/iterapi", qdict_iterapi_test);
-    g_test_add_func("/public/flatten", qdict_flatten_test);
-    g_test_add_func("/public/array_split", qdict_array_split_test);
-    g_test_add_func("/public/array_entries", qdict_array_entries_test);
-    g_test_add_func("/public/join", qdict_join_test);
 
     g_test_add_func("/errors/put_exists", qdict_put_exists_test);
     g_test_add_func("/errors/get_not_exists", qdict_get_not_exists_test);
 
-    g_test_add_func("/public/crumple/recursive",
-                    qdict_crumple_test_recursive);
-    g_test_add_func("/public/crumple/empty",
-                    qdict_crumple_test_empty);
-    g_test_add_func("/public/crumple/bad_inputs",
-                    qdict_crumple_test_bad_inputs);
-
-    g_test_add_func("/public/rename_keys", qdict_rename_keys_test);
-
     /* The Big one */
     if (g_test_slow()) {
         g_test_add_func("/stress/test", qdict_stress_test);
diff --git a/tests/check-qobject.c b/tests/check-qobject.c
index 5cb08fcb63..16ccbde82c 100644
--- a/tests/check-qobject.c
+++ b/tests/check-qobject.c
@@ -8,6 +8,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/qdict.h"
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c
index 24870b38f4..ce665f1f83 100644
--- a/tests/hd-geo-test.c
+++ b/tests/hd-geo-test.c
@@ -201,7 +201,7 @@ static void setup_mbr(int img_idx, MBRcontents mbr)
 
 static int setup_ide(int argc, char *argv[], int argv_sz,
                      int ide_idx, const char *dev, int img_idx,
-                     MBRcontents mbr, const char *opts)
+                     MBRcontents mbr)
 {
     char *s1, *s2, *s3;
 
@@ -216,7 +216,7 @@ static int setup_ide(int argc, char *argv[], int argv_sz,
         s3 = g_strdup(",media=cdrom");
     }
     argc = append_arg(argc, argv, argv_sz,
-                      g_strdup_printf("%s%s%s%s", s1, s2, s3, opts));
+                      g_strdup_printf("%s%s%s", s1, s2, s3));
     g_free(s1);
     g_free(s2);
     g_free(s3);
@@ -260,7 +260,7 @@ static void test_ide_mbr(bool use_device, MBRcontents mbr)
     for (i = 0; i < backend_last; i++) {
         cur_ide[i] = &hd_chst[i][mbr];
         dev = use_device ? (is_hd(cur_ide[i]) ? "ide-hd" : "ide-cd") : NULL;
-        argc = setup_ide(argc, argv, ARGV_SIZE, i, dev, i, mbr, "");
+        argc = setup_ide(argc, argv, ARGV_SIZE, i, dev, i, mbr);
     }
     args = g_strjoinv(" ", argv);
     qtest_start(args);
@@ -327,16 +327,12 @@ static void test_ide_drive_user(const char *dev, bool trans)
     const CHST expected_chst = { secs / (4 * 32) , 4, 32, trans };
 
     argc = setup_common(argv, ARGV_SIZE);
-    opts = g_strdup_printf("%s,%s%scyls=%d,heads=%d,secs=%d",
-                           dev ?: "",
-                           trans && dev ? "bios-chs-" : "",
-                           trans ? "trans=lba," : "",
+    opts = g_strdup_printf("%s,%scyls=%d,heads=%d,secs=%d",
+                           dev, trans ? "bios-chs-trans=lba," : "",
                            expected_chst.cyls, expected_chst.heads,
                            expected_chst.secs);
     cur_ide[0] = &expected_chst;
-    argc = setup_ide(argc, argv, ARGV_SIZE,
-                     0, dev ? opts : NULL, backend_small, mbr_chs,
-                     dev ? "" : opts);
+    argc = setup_ide(argc, argv, ARGV_SIZE, 0, opts, backend_small, mbr_chs);
     g_free(opts);
     args = g_strjoinv(" ", argv);
     qtest_start(args);
@@ -347,22 +343,6 @@ static void test_ide_drive_user(const char *dev, bool trans)
 }
 
 /*
- * Test case: IDE device (if=ide) with explicit CHS
- */
-static void test_ide_drive_user_chs(void)
-{
-    test_ide_drive_user(NULL, false);
-}
-
-/*
- * Test case: IDE device (if=ide) with explicit CHS and translation
- */
-static void test_ide_drive_user_chst(void)
-{
-    test_ide_drive_user(NULL, true);
-}
-
-/*
  * Test case: IDE device (if=none) with explicit CHS
  */
 static void test_ide_device_user_chs(void)
@@ -392,8 +372,7 @@ static void test_ide_drive_cd_0(void)
     for (i = 0; i <= backend_empty; i++) {
         ide_idx = backend_empty - i;
         cur_ide[ide_idx] = &hd_chst[i][mbr_blank];
-        argc = setup_ide(argc, argv, ARGV_SIZE,
-                         ide_idx, NULL, i, mbr_blank, "");
+        argc = setup_ide(argc, argv, ARGV_SIZE, ide_idx, NULL, i, mbr_blank);
     }
     args = g_strjoinv(" ", argv);
     qtest_start(args);
@@ -422,8 +401,6 @@ int main(int argc, char **argv)
     qtest_add_func("hd-geo/ide/drive/mbr/blank", test_ide_drive_mbr_blank);
     qtest_add_func("hd-geo/ide/drive/mbr/lba", test_ide_drive_mbr_lba);
     qtest_add_func("hd-geo/ide/drive/mbr/chs", test_ide_drive_mbr_chs);
-    qtest_add_func("hd-geo/ide/drive/user/chs", test_ide_drive_user_chs);
-    qtest_add_func("hd-geo/ide/drive/user/chst", test_ide_drive_user_chst);
     qtest_add_func("hd-geo/ide/drive/cd_0", test_ide_drive_cd_0);
     qtest_add_func("hd-geo/ide/device/mbr/blank", test_ide_device_mbr_blank);
     qtest_add_func("hd-geo/ide/device/mbr/lba", test_ide_device_mbr_lba);
diff --git a/tests/ide-test.c b/tests/ide-test.c
index 2384c2c3e2..f39431b1a9 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -529,8 +529,8 @@ static void test_bmdma_no_busmaster(void)
 static void test_bmdma_setup(void)
 {
     ide_test_start(
-        "-drive file=%s,if=ide,serial=%s,cache=writeback,format=raw "
-        "-global ide-hd.ver=%s",
+        "-drive file=%s,if=ide,cache=writeback,format=raw "
+        "-global ide-hd.serial=%s -global ide-hd.ver=%s",
         tmp_path, "testdisk", "version");
     qtest_irq_intercept_in(global_qtest, "ioapic");
 }
@@ -561,8 +561,8 @@ static void test_identify(void)
     int ret;
 
     ide_test_start(
-        "-drive file=%s,if=ide,serial=%s,cache=writeback,format=raw "
-        "-global ide-hd.ver=%s",
+        "-drive file=%s,if=ide,cache=writeback,format=raw "
+        "-global ide-hd.serial=%s -global ide-hd.ver=%s",
         tmp_path, "testdisk", "version");
 
     dev = get_pci_device(&bmdma_bar, &ide_bar);
diff --git a/tests/qemu-iotests/221 b/tests/qemu-iotests/221
new file mode 100755
index 0000000000..41c4e4bdf8
--- /dev/null
+++ b/tests/qemu-iotests/221
@@ -0,0 +1,60 @@
+#!/bin/bash
+#
+# Test qemu-img vs. unaligned images
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1 # failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt raw
+_supported_proto file
+_supported_os Linux
+
+echo
+echo "=== Check mapping of unaligned raw image ==="
+echo
+
+_make_test_img 43009 # qemu-img create rounds size up
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+truncate --size=43009 "$TEST_IMG" # so we resize it and check again
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+$QEMU_IO -c 'w 43008 1' "$TEST_IMG" | _filter_qemu_io # writing also rounds up
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+truncate --size=43009 "$TEST_IMG" # so we resize it and check again
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/221.out b/tests/qemu-iotests/221.out
new file mode 100644
index 0000000000..a9c0190aad
--- /dev/null
+++ b/tests/qemu-iotests/221.out
@@ -0,0 +1,16 @@
+QA output created by 221
+
+=== Check mapping of unaligned raw image ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=43009
+[{ "start": 0, "length": 43520, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 43520, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+wrote 1/1 bytes at offset 43008
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 40960, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 40960, "length": 2049, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 43009, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 40960, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 40960, "length": 2049, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 43009, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 0914c922d7..937a3d0a4d 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -218,3 +218,4 @@
 217 rw auto quick
 218 rw auto quick
 219 rw auto
+221 rw auto quick
diff --git a/tests/test-replication.c b/tests/test-replication.c
index 68c0d04f2a..c8165ae954 100644
--- a/tests/test-replication.c
+++ b/tests/test-replication.c
@@ -15,6 +15,7 @@
 #include "qemu/option.h"
 #include "replication.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 
 #define IMG_SIZE (64 * 1024 * 1024)
diff --git a/util/qemu-config.c b/util/qemu-config.c
index 14d84022dc..9d2e278e29 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -1,4 +1,5 @@
 #include "qemu/osdep.h"
+#include "block/qdict.h" /* for qdict_extract_subqdict() */
 #include "qapi/error.h"
 #include "qapi/qapi-commands-misc.h"
 #include "qapi/qmp/qdict.h"