summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2025-09-17 11:10:55 -0700
committerRichard Henderson <richard.henderson@linaro.org>2025-09-17 11:10:55 -0700
commitf0007b7f03e2d7fc33e71c3a582f2364c51a226b (patch)
tree44b34bb98c293bbfe5c839eb73762141633eec70
parent6be998b9863b470ab3f399f4e37cf3a9c59c8fd9 (diff)
parentaaf042299acf83919862c7d7dd5fc36acf4e0671 (diff)
downloadfocaccia-qemu-f0007b7f03e2d7fc33e71c3a582f2364c51a226b.tar.gz
focaccia-qemu-f0007b7f03e2d7fc33e71c3a582f2364c51a226b.zip
Merge tag 'pull-target-arm-20250916' of https://gitlab.com/pm215/qemu into staging
target-arm queue:
 * tests, scripts: Don't import print_function from __future__
 * Implement FEAT_ATS1A
 * Remove deprecated pxa CPU family
 * arm/kvm: report registers we failed to set
 * Expose SME registers to GDB via gdbstub
 * linux-user/aarch64: Generate ESR signal records
 * hw/arm/raspi4b: remove redundant check in raspi_add_memory_node
 * hw/arm/virt: Allow user-creatable SMMUv3 dev instantiation
 * system: drop the -old-param option

# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmjJpt8ZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3vRGEACO3VrePiMIA9N7egqlUiGn
# aRQVqIKeuPVj6TRVG7BSNWlAX8qvnOWOKg1yGVHDZv/nLvRje9UyfUAw7pf6jXod
# bzxWBCPJ0J0eOB64Tz87WRCLltKB5pEN+uIG00PtpBcXT1ixYCDgBZXyD3mwuJ4Q
# 5Yc5hEwQzpmh+EycLtfCHbmjKDw3x1ncpVlGceOG4h5fvzIvIhcNcZJXfAHhbhyO
# Y4c5PELrCkCLZaTtSSxd6VJ+vXQ9bNWyKaSZu2KRRnLcMeAqw2Ic7dLPlkzCVyxM
# PTOHy4TuDu+kqCbkxdnhpI6fvq5kcHyfTL6qX6tth8ZZS+qKGtvMEIXnYoy6q1kh
# 4jV5vizK8avx31fSiuTKVpttRv4dC+Aq5QrcgYtIVMeOwtkWHv610D8gcFPmXoG+
# uHX9WdzOjrYOzXVKzJaCZF6b7L31ptSEfOrx7asBC9k2wPRwonFXg4JGNq16Yann
# aAO5TM7NAUvM2IPgqS+Tf1Bk0iQqORxGfqzCyL76OO/QMMgfBy9elKH0UR0G+ePJ
# yjpub1oWIELSXsQGMrdFo1W4/NIpFMTu3DP9W+6XRPu1AvrAx/AsrTuvSvXoeFY9
# d/U3yWAXm5XxRzbCIUg7ke8I8zLwRz924M5PA8vophvSnfDLS3V8CJHLwbz/PqYc
# 0P2KCeI6d2NIhVik4mgEoQ==
# =5tK3
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 16 Sep 2025 11:05:19 AM PDT
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [unknown]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [unknown]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [unknown]
# gpg:                 aka "Peter Maydell <peter@archaic.org.uk>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* tag 'pull-target-arm-20250916' of https://gitlab.com/pm215/qemu: (36 commits)
  hw/usb/network: Remove hardcoded 0x40 prefix in STRING_ETHADDR response
  qtest/bios-tables-test: Update tables for smmuv3 tests
  qtest/bios-tables-test: Add tests for legacy smmuv3 and smmuv3 device
  bios-tables-test: Allow for smmuv3 test data.
  qemu-options.hx: Document the arm-smmuv3 device
  hw/arm/virt: Allow user-creatable SMMUv3 dev instantiation
  hw/pci: Introduce pci_setup_iommu_per_bus() for per-bus IOMMU ops retrieval
  hw/arm/virt: Add an SMMU_IO_LEN macro
  hw/arm/virt: Factor out common SMMUV3 dt bindings code
  hw/arm/virt-acpi-build: Update IORT for multiple smmuv3 devices
  hw/arm/virt-acpi-build: Re-arrange SMMUv3 IORT build
  hw/arm/smmu-common: Check SMMU has PCIe Root Complex association
  target/arm: Added test case for SME register exposure to GDB
  target/arm: Added support for SME register exposure to GDB
  target/arm: Increase MAX_PACKET_LENGTH for SME ZA remote gdb debugging
  arm/kvm: report registers we failed to set
  system: drop the -old-param option
  target/arm: Drop ARM_FEATURE_IWMMXT handling
  target/arm: Drop ARM_FEATURE_XSCALE handling
  target/arm: Remove iwmmxt helper functions
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--bsd-user/arm/target_arch_elf.h1
-rwxr-xr-xconfigure6
-rw-r--r--docs/about/deprecated.rst34
-rw-r--r--docs/about/removed-features.rst26
-rw-r--r--docs/system/arm/emulation.rst1
-rw-r--r--gdbstub/internals.h22
-rw-r--r--hw/arm/boot.c81
-rw-r--r--hw/arm/raspi4b.c22
-rw-r--r--hw/arm/smmu-common.c37
-rw-r--r--hw/arm/smmuv3.c2
-rw-r--r--hw/arm/virt-acpi-build.c201
-rw-r--r--hw/arm/virt.c111
-rw-r--r--hw/core/sysbus-fdt.c3
-rw-r--r--hw/pci-bridge/pci_expander_bridge.c1
-rw-r--r--hw/pci/pci.c31
-rw-r--r--hw/usb/dev-network.c2
-rw-r--r--include/hw/arm/smmu-common.h1
-rw-r--r--include/hw/arm/virt.h1
-rw-r--r--include/hw/pci/pci.h2
-rw-r--r--include/hw/pci/pci_bridge.h1
-rw-r--r--include/hw/pci/pci_bus.h1
-rw-r--r--include/system/system.h1
-rw-r--r--linux-user/aarch64/cpu_loop.c162
-rw-r--r--linux-user/aarch64/signal.c34
-rw-r--r--linux-user/arm/elfload.c1
-rw-r--r--linux-user/arm/signal.c67
-rw-r--r--qemu-options.hx14
-rwxr-xr-xscripts/userfaultfd-wrlat.py1
-rw-r--r--system/globals.c1
-rw-r--r--system/vl.c4
-rw-r--r--target/arm/cpregs.h29
-rw-r--r--target/arm/cpu-features.h5
-rw-r--r--target/arm/cpu.c21
-rw-r--r--target/arm/cpu.h30
-rw-r--r--target/arm/gdbstub.c10
-rw-r--r--target/arm/gdbstub64.c119
-rw-r--r--target/arm/helper.c59
-rw-r--r--target/arm/internals.h28
-rw-r--r--target/arm/kvm.c86
-rw-r--r--target/arm/machine.c21
-rw-r--r--target/arm/ptw.c71
-rw-r--r--target/arm/tcg/cpregs-at.c69
-rw-r--r--target/arm/tcg/cpu32.c163
-rw-r--r--target/arm/tcg/cpu64.c1
-rw-r--r--target/arm/tcg/helper.h95
-rw-r--r--target/arm/tcg/hflags.c13
-rw-r--r--target/arm/tcg/iwmmxt_helper.c672
-rw-r--r--target/arm/tcg/m_helper.c4
-rw-r--r--target/arm/tcg/meson.build2
-rw-r--r--target/arm/tcg/op_helper.c6
-rw-r--r--target/arm/tcg/translate.c1324
-rw-r--r--target/arm/tcg/translate.h2
-rw-r--r--tests/data/acpi/aarch64/virt/DSDT.smmuv3-devbin0 -> 10230 bytes
-rw-r--r--tests/data/acpi/aarch64/virt/DSDT.smmuv3-legacybin0 -> 10230 bytes
-rw-r--r--tests/data/acpi/aarch64/virt/IORT.smmuv3-devbin0 -> 364 bytes
-rw-r--r--tests/data/acpi/aarch64/virt/IORT.smmuv3-legacybin0 -> 276 bytes
-rw-r--r--tests/guest-debug/test_gdbstub.py1
-rw-r--r--tests/qtest/bios-tables-test.c86
-rw-r--r--tests/tcg/aarch64/Makefile.target29
-rw-r--r--tests/tcg/aarch64/gdbstub/test-mte.py1
-rw-r--r--tests/tcg/aarch64/gdbstub/test-sme.py117
-rw-r--r--tests/tcg/aarch64/gdbstub/test-sve-ioctl.py1
-rw-r--r--tests/tcg/aarch64/gdbstub/test-sve.py1
-rw-r--r--tests/tcg/multiarch/gdbstub/interrupt.py1
-rw-r--r--tests/tcg/multiarch/gdbstub/memory.py1
-rw-r--r--tests/tcg/multiarch/gdbstub/sha1.py1
-rw-r--r--tests/tcg/multiarch/gdbstub/test-proc-mappings.py1
-rw-r--r--tests/tcg/multiarch/gdbstub/test-qxfer-auxv-read.py1
-rw-r--r--tests/tcg/multiarch/gdbstub/test-qxfer-siginfo-read.py1
-rw-r--r--tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py1
-rw-r--r--tests/tcg/s390x/gdbstub/test-signals-s390x.py1
-rw-r--r--tests/tcg/s390x/gdbstub/test-svc.py1
72 files changed, 1152 insertions, 2795 deletions
diff --git a/bsd-user/arm/target_arch_elf.h b/bsd-user/arm/target_arch_elf.h
index b1c0fd2b32..b54bf5fbc6 100644
--- a/bsd-user/arm/target_arch_elf.h
+++ b/bsd-user/arm/target_arch_elf.h
@@ -86,7 +86,6 @@ static uint32_t get_elf_hwcap(void)
     /* probe for the extra features */
     /* EDSP is in v5TE and above */
     GET_FEATURE(ARM_FEATURE_V5, ARM_HWCAP_ARM_EDSP);
-    GET_FEATURE(ARM_FEATURE_IWMMXT, ARM_HWCAP_ARM_IWMMXT);
     GET_FEATURE(ARM_FEATURE_THUMB2EE, ARM_HWCAP_ARM_THUMBEE);
     GET_FEATURE(ARM_FEATURE_NEON, ARM_HWCAP_ARM_NEON);
     GET_FEATURE(ARM_FEATURE_V6K, ARM_HWCAP_ARM_TLS);
diff --git a/configure b/configure
index 274a778764..9aea02cf6a 100755
--- a/configure
+++ b/configure
@@ -1839,6 +1839,12 @@ for target in $target_list; do
           echo "GDB=$gdb_bin" >> $config_target_mak
       fi
 
+      if test "${gdb_arches#*$arch}" != "$gdb_arches" && version_ge $gdb_version 14.1; then
+          echo "GDB_HAS_SME_TILES=y" >> $config_target_mak
+      else
+          echo "GDB_HAS_SME_TILES=n" >> $config_target_mak
+      fi
+
       if test "${gdb_arches#*aarch64}" != "$gdb_arches" && version_ge $gdb_version 15.1; then
           echo "GDB_HAS_MTE=y" >> $config_target_mak
       fi
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 2fa2c47b68..aa300bbd50 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -68,19 +68,6 @@ configurations (e.g. -smp drawers=1,books=1,clusters=1 for x86 PC machine) is
 marked deprecated since 9.0, users have to ensure that all the topology members
 described with -smp are supported by the target machine.
 
-``-old-param`` option for booting Arm kernels via param_struct (since 10.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The ``-old-param`` command line option is specific to Arm targets:
-it is used when directly booting a guest kernel to pass it the
-command line and other information via the old ``param_struct`` ABI,
-rather than the newer ATAGS or DTB mechanisms. This option was only
-ever needed to support ancient kernels on some old board types
-like the ``akita`` or ``terrier``; it has been deprecated in the
-kernel since 2001. None of the board types QEMU supports need
-``param_struct`` support, so this option has been deprecated and will
-be removed in a future QEMU version.
-
 QEMU Machine Protocol (QMP) commands
 ------------------------------------
 
@@ -236,27 +223,6 @@ Keeping 32-bit host support alive is a substantial burden for the
 QEMU project.  Thus QEMU will in future drop the support for all
 32-bit host systems.
 
-linux-user mode CPUs
---------------------
-
-iwMMXt emulation and the ``pxa`` CPUs (since 10.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The ``pxa`` CPU family (``pxa250``, ``pxa255``, ``pxa260``,
-``pxa261``, ``pxa262``, ``pxa270-a0``, ``pxa270-a1``, ``pxa270``,
-``pxa270-b0``, ``pxa270-b1``, ``pxa270-c0``, ``pxa270-c5``) are no
-longer used in system emulation, because all the machine types which
-used these CPUs were removed in the QEMU 9.2 release. These CPUs can
-now only be used in linux-user mode, and to do that you would have to
-explicitly select one of these CPUs with the ``-cpu`` command line
-option or the ``QEMU_CPU`` environment variable.
-
-We don't believe that anybody is using the iwMMXt emulation, and we do
-not have any tests to validate it or any real hardware or similar
-known-good implementation to test against. GCC is in the process of
-dropping their support for iwMMXt codegen. These CPU types are
-therefore deprecated in QEMU, and will be removed in a future release.
-
 System emulator CPUs
 --------------------
 
diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst
index 2d3a684e53..a5338e44c2 100644
--- a/docs/about/removed-features.rst
+++ b/docs/about/removed-features.rst
@@ -560,6 +560,18 @@ the options along with the machine models they were intended for.
 
 Use ``-run-with user=..`` instead.
 
+``-old-param`` option for booting Arm kernels via param_struct (removed in 10.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``-old-param`` command line option was specific to Arm targets:
+it was used when directly booting a guest kernel to pass it the
+command line and other information via the old ``param_struct`` ABI,
+rather than the newer ATAGS or DTB mechanisms. This option was only
+ever needed to support ancient kernels on some old board types
+like the ``akita`` or ``terrier``; it has been deprecated in the
+kernel since 2001. None of the board types QEMU supports need
+``param_struct`` support, so this option has been removed.
+
 
 User-mode emulator command line arguments
 -----------------------------------------
@@ -1138,6 +1150,20 @@ reason the maintainers strongly suspected no one actually used it.
 QEMU Nios II architecture was orphan; Intel has EOL'ed the Nios II
 processor IP (see `Intel discontinuance notification`_).
 
+iwMMXt emulation and the ``pxa`` CPUs (removed in 10.2)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``pxa`` CPU family (``pxa250``, ``pxa255``, ``pxa260``,
+``pxa261``, ``pxa262``, ``pxa270-a0``, ``pxa270-a1``, ``pxa270``,
+``pxa270-b0``, ``pxa270-b1``, ``pxa270-c0``, ``pxa270-c5``) were
+not available in system emulation, because all the machine types which
+used these CPUs were removed in the QEMU 9.2 release. We don't
+believe that anybody was using the iwMMXt emulation (which you
+would have to explicitly enable on the command line), and we did
+not have any tests to validate it or any real hardware or similar
+known-good implementation to test against. These CPUs have
+therefore been removed in linux-user mode as well.
+
 TCG introspection features
 --------------------------
 
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index 4e8aca8b5d..6b04c96c8c 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -23,6 +23,7 @@ the following architecture extensions:
 - FEAT_AFP (Alternate floating-point behavior)
 - FEAT_Armv9_Crypto (Armv9 Cryptographic Extension)
 - FEAT_ASID16 (16 bit ASID)
+- FEAT_ATS1A (Address Translation operations that ignore stage 1 permissions)
 - FEAT_BBM at level 2 (Translation table break-before-make levels)
 - FEAT_BF16 (AArch64 BFloat16 instructions)
 - FEAT_BTI (Branch Target Identification)
diff --git a/gdbstub/internals.h b/gdbstub/internals.h
index bf5a5c6302..92466b28c1 100644
--- a/gdbstub/internals.h
+++ b/gdbstub/internals.h
@@ -11,7 +11,27 @@
 
 #include "exec/cpu-common.h"
 
-#define MAX_PACKET_LENGTH 4096
+/*
+ * Most "large" transfers (e.g. memory reads, feature XML
+ * transfer) have mechanisms in the gdb protocol for splitting
+ * them. However, register values in particular cannot currently
+ * be split. This packet size must therefore be at least big enough
+ * for the worst-case register size. Currently that is Arm SME
+ * ZA storage with a 256x256 byte value. We also must account
+ * for the conversion from raw data to hex in gdb_memtohex(),
+ * which writes 2 * size bytes, and for other protocol overhead
+ * including command, register number and checksum which add
+ * another 4 bytes of overhead. However, to be consistent with
+ * the changes made in gdbserver to address this same requirement,
+ * we add a total of 32 bytes to account for protocol overhead
+ * (unclear why specifically 32 bytes), bringing the value of
+ * MAX_PACKET_LENGTH to 2 * 256 * 256 + 32 = 131104.
+ *
+ * The commit making this change for gdbserver can be found here:
+ * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=
+ * b816042e88583f280ad186ff124ab84d31fb592b
+ */
+#define MAX_PACKET_LENGTH 131104
 
 /*
  * Shared structures and definitions
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index d0840308f5..e77d8679d8 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -337,81 +337,6 @@ static void set_kernel_args(const struct arm_boot_info *info, AddressSpace *as)
     WRITE_WORD(p, 0);
 }
 
-static void set_kernel_args_old(const struct arm_boot_info *info,
-                                AddressSpace *as)
-{
-    hwaddr p;
-    const char *s;
-    int initrd_size = info->initrd_size;
-    hwaddr base = info->loader_start;
-
-    /* see linux/include/asm-arm/setup.h */
-    p = base + KERNEL_ARGS_ADDR;
-    /* page_size */
-    WRITE_WORD(p, 4096);
-    /* nr_pages */
-    WRITE_WORD(p, info->ram_size / 4096);
-    /* ramdisk_size */
-    WRITE_WORD(p, 0);
-#define FLAG_READONLY 1
-#define FLAG_RDLOAD   4
-#define FLAG_RDPROMPT 8
-    /* flags */
-    WRITE_WORD(p, FLAG_READONLY | FLAG_RDLOAD | FLAG_RDPROMPT);
-    /* rootdev */
-    WRITE_WORD(p, (31 << 8) | 0); /* /dev/mtdblock0 */
-    /* video_num_cols */
-    WRITE_WORD(p, 0);
-    /* video_num_rows */
-    WRITE_WORD(p, 0);
-    /* video_x */
-    WRITE_WORD(p, 0);
-    /* video_y */
-    WRITE_WORD(p, 0);
-    /* memc_control_reg */
-    WRITE_WORD(p, 0);
-    /* unsigned char sounddefault */
-    /* unsigned char adfsdrives */
-    /* unsigned char bytes_per_char_h */
-    /* unsigned char bytes_per_char_v */
-    WRITE_WORD(p, 0);
-    /* pages_in_bank[4] */
-    WRITE_WORD(p, 0);
-    WRITE_WORD(p, 0);
-    WRITE_WORD(p, 0);
-    WRITE_WORD(p, 0);
-    /* pages_in_vram */
-    WRITE_WORD(p, 0);
-    /* initrd_start */
-    if (initrd_size) {
-        WRITE_WORD(p, info->initrd_start);
-    } else {
-        WRITE_WORD(p, 0);
-    }
-    /* initrd_size */
-    WRITE_WORD(p, initrd_size);
-    /* rd_start */
-    WRITE_WORD(p, 0);
-    /* system_rev */
-    WRITE_WORD(p, 0);
-    /* system_serial_low */
-    WRITE_WORD(p, 0);
-    /* system_serial_high */
-    WRITE_WORD(p, 0);
-    /* mem_fclk_21285 */
-    WRITE_WORD(p, 0);
-    /* zero unused fields */
-    while (p < base + KERNEL_ARGS_ADDR + 256 + 1024) {
-        WRITE_WORD(p, 0);
-    }
-    s = info->kernel_cmdline;
-    if (s) {
-        address_space_write(as, p, MEMTXATTRS_UNSPECIFIED, s, strlen(s) + 1);
-    } else {
-        WRITE_WORD(p, 0);
-    }
-}
-
 static int fdt_add_memory_node(void *fdt, uint32_t acells, hwaddr mem_base,
                                uint32_t scells, hwaddr mem_len,
                                int numa_node_id)
@@ -802,11 +727,7 @@ static void do_cpu_reset(void *opaque)
                 cpu_set_pc(cs, info->loader_start);
 
                 if (!have_dtb(info)) {
-                    if (old_param) {
-                        set_kernel_args_old(info, as);
-                    } else {
-                        set_kernel_args(info, as);
-                    }
+                    set_kernel_args(info, as);
                 }
             } else if (info->secondary_cpu_reset_hook) {
                 info->secondary_cpu_reset_hook(cpu, info);
diff --git a/hw/arm/raspi4b.c b/hw/arm/raspi4b.c
index 20082d5266..4df951a0d8 100644
--- a/hw/arm/raspi4b.c
+++ b/hw/arm/raspi4b.c
@@ -36,9 +36,8 @@ struct Raspi4bMachineState {
  * (see https://datasheets.raspberrypi.com/bcm2711/bcm2711-peripherals.pdf
  * 1.2 Address Map)
  */
-static int raspi_add_memory_node(void *fdt, hwaddr mem_base, hwaddr mem_len)
+static void raspi_add_memory_node(void *fdt, hwaddr mem_base, hwaddr mem_len)
 {
-    int ret;
     uint32_t acells, scells;
     char *nodename = g_strdup_printf("/memory@%" PRIx64, mem_base);
 
@@ -46,19 +45,16 @@ static int raspi_add_memory_node(void *fdt, hwaddr mem_base, hwaddr mem_len)
                                    NULL, &error_fatal);
     scells = qemu_fdt_getprop_cell(fdt, "/", "#size-cells",
                                    NULL, &error_fatal);
-    if (acells == 0 || scells == 0) {
-        fprintf(stderr, "dtb file invalid (#address-cells or #size-cells 0)\n");
-        ret = -1;
-    } else {
-        qemu_fdt_add_subnode(fdt, nodename);
-        qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory");
-        ret = qemu_fdt_setprop_sized_cells(fdt, nodename, "reg",
-                                           acells, mem_base,
-                                           scells, mem_len);
-    }
+    /* validated by arm_load_dtb */
+    g_assert(acells && scells);
+
+    qemu_fdt_add_subnode(fdt, nodename);
+    qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory");
+    qemu_fdt_setprop_sized_cells(fdt, nodename, "reg",
+                                        acells, mem_base,
+                                        scells, mem_len);
 
     g_free(nodename);
-    return ret;
 }
 
 static void raspi4_modify_dtb(const struct arm_boot_info *info, void *fdt)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 0dcaf2f589..62a7612184 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -20,6 +20,7 @@
 #include "trace.h"
 #include "exec/target_page.h"
 #include "hw/core/cpu.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/qdev-properties.h"
 #include "qapi/error.h"
 #include "qemu/jhash.h"
@@ -925,6 +926,7 @@ static void smmu_base_realize(DeviceState *dev, Error **errp)
 {
     SMMUState *s = ARM_SMMU(dev);
     SMMUBaseClass *sbc = ARM_SMMU_GET_CLASS(dev);
+    PCIBus *pci_bus = s->primary_bus;
     Error *local_err = NULL;
 
     sbc->parent_realize(dev, &local_err);
@@ -937,11 +939,39 @@ static void smmu_base_realize(DeviceState *dev, Error **errp)
                                      g_free, g_free);
     s->smmu_pcibus_by_busptr = g_hash_table_new(NULL, NULL);
 
-    if (s->primary_bus) {
-        pci_setup_iommu(s->primary_bus, &smmu_ops, s);
-    } else {
+    if (!pci_bus) {
         error_setg(errp, "SMMU is not attached to any PCI bus!");
+        return;
+    }
+
+    /*
+     * We only allow default PCIe Root Complex(pcie.0) or pxb-pcie based extra
+     * root complexes to be associated with SMMU.
+     */
+    if (pci_bus_is_express(pci_bus) && pci_bus_is_root(pci_bus) &&
+        object_dynamic_cast(OBJECT(pci_bus)->parent, TYPE_PCI_HOST_BRIDGE)) {
+        /*
+         * This condition matches either the default pcie.0, pxb-pcie, or
+         * pxb-cxl. For both pxb-pcie and pxb-cxl, parent_dev will be set.
+         * Currently, we don't allow pxb-cxl as it requires further
+         * verification. Therefore, make sure this is indeed pxb-pcie.
+         */
+        if (pci_bus->parent_dev) {
+            if (!object_dynamic_cast(OBJECT(pci_bus), TYPE_PXB_PCIE_BUS)) {
+                goto out_err;
+            }
+        }
+
+        if (s->smmu_per_bus) {
+            pci_setup_iommu_per_bus(pci_bus, &smmu_ops, s);
+        } else {
+            pci_setup_iommu(pci_bus, &smmu_ops, s);
+        }
+        return;
     }
+out_err:
+    error_setg(errp, "SMMU should be attached to a default PCIe root complex"
+               "(pcie.0) or a pxb-pcie based root complex");
 }
 
 /*
@@ -961,6 +991,7 @@ static void smmu_base_reset_exit(Object *obj, ResetType type)
 
 static const Property smmu_dev_properties[] = {
     DEFINE_PROP_UINT8("bus_num", SMMUState, bus_num, 0),
+    DEFINE_PROP_BOOL("smmu_per_bus", SMMUState, smmu_per_bus, false),
     DEFINE_PROP_LINK("primary-bus", SMMUState, primary_bus,
                      TYPE_PCI_BUS, PCIBus *),
 };
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index ab67972353..bcf8af8dc7 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1996,6 +1996,8 @@ static void smmuv3_class_init(ObjectClass *klass, const void *data)
     device_class_set_parent_realize(dc, smmu_realize,
                                     &c->parent_realize);
     device_class_set_props(dc, smmuv3_properties);
+    dc->hotpluggable = false;
+    dc->user_creatable = true;
 }
 
 static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index b01fc4f8ef..96830f7c4e 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -45,6 +45,7 @@
 #include "hw/acpi/generic_event_device.h"
 #include "hw/acpi/tpm.h"
 #include "hw/acpi/hmat.h"
+#include "hw/arm/smmuv3.h"
 #include "hw/cxl/cxl.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci.h"
@@ -305,29 +306,126 @@ static int iort_idmap_compare(gconstpointer a, gconstpointer b)
     return idmap_a->input_base - idmap_b->input_base;
 }
 
-/* Compute ID ranges (RIDs) from RC that are directed to the ITS Group node */
-static void create_rc_its_idmaps(GArray *its_idmaps, GArray *smmu_idmaps)
+typedef struct AcpiIortSMMUv3Dev {
+    int irq;
+    hwaddr base;
+    GArray *rc_smmu_idmaps;
+    /* Offset of the SMMUv3 IORT Node relative to the start of the IORT */
+    size_t offset;
+} AcpiIortSMMUv3Dev;
+
+/*
+ * Populate the struct AcpiIortSMMUv3Dev for the legacy SMMUv3 and
+ * return the total number of associated idmaps.
+ */
+static int populate_smmuv3_legacy_dev(GArray *sdev_blob)
 {
-    AcpiIortIdMapping *idmap;
-    AcpiIortIdMapping next_range = {0};
+    VirtMachineState *vms = VIRT_MACHINE(qdev_get_machine());
+    AcpiIortSMMUv3Dev sdev;
 
+    sdev.rc_smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
+    object_child_foreach_recursive(object_get_root(), iort_host_bridges,
+                                   sdev.rc_smmu_idmaps);
     /*
-     * Based on the RID ranges that are directed to the SMMU, determine the
-     * bypassed RID ranges, i.e., the ones that are directed to the ITS Group
-     * node and do not pass through the SMMU, by subtracting the SMMU-bound
-     * ranges from the full RID range (0x0000–0xFFFF).
+     * There can be only one legacy SMMUv3("iommu=smmuv3") as it is a machine
+     * wide one. Since it may cover multiple PCIe RCs(based on "bypass_iommu"
+     * property), may have multiple SMMUv3 idmaps. Sort it by input_base.
      */
-     for (int i = 0; i < smmu_idmaps->len; i++) {
-        idmap = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
+    g_array_sort(sdev.rc_smmu_idmaps, iort_idmap_compare);
 
-        if (next_range.input_base < idmap->input_base) {
-            next_range.id_count = idmap->input_base - next_range.input_base;
-            g_array_append_val(its_idmaps, next_range);
-        }
+    sdev.base = vms->memmap[VIRT_SMMU].base;
+    sdev.irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
+    g_array_append_val(sdev_blob, sdev);
+    return sdev.rc_smmu_idmaps->len;
+}
+
+static int smmuv3_dev_idmap_compare(gconstpointer a, gconstpointer b)
+{
+    AcpiIortSMMUv3Dev *sdev_a = (AcpiIortSMMUv3Dev *)a;
+    AcpiIortSMMUv3Dev *sdev_b = (AcpiIortSMMUv3Dev *)b;
+    AcpiIortIdMapping *map_a = &g_array_index(sdev_a->rc_smmu_idmaps,
+                                              AcpiIortIdMapping, 0);
+    AcpiIortIdMapping *map_b = &g_array_index(sdev_b->rc_smmu_idmaps,
+                                              AcpiIortIdMapping, 0);
+    return map_a->input_base - map_b->input_base;
+}
 
-        next_range.input_base = idmap->input_base + idmap->id_count;
+static int iort_smmuv3_devices(Object *obj, void *opaque)
+{
+    VirtMachineState *vms = VIRT_MACHINE(qdev_get_machine());
+    GArray *sdev_blob = opaque;
+    AcpiIortIdMapping idmap;
+    PlatformBusDevice *pbus;
+    AcpiIortSMMUv3Dev sdev;
+    int min_bus, max_bus;
+    SysBusDevice *sbdev;
+    PCIBus *bus;
+
+    if (!object_dynamic_cast(obj, TYPE_ARM_SMMUV3)) {
+        return 0;
     }
 
+    bus = PCI_BUS(object_property_get_link(obj, "primary-bus", &error_abort));
+    pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
+    sbdev = SYS_BUS_DEVICE(obj);
+    sdev.base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+    sdev.base += vms->memmap[VIRT_PLATFORM_BUS].base;
+    sdev.irq = platform_bus_get_irqn(pbus, sbdev, 0);
+    sdev.irq += vms->irqmap[VIRT_PLATFORM_BUS];
+    sdev.irq += ARM_SPI_BASE;
+
+    pci_bus_range(bus, &min_bus, &max_bus);
+    sdev.rc_smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
+    idmap.input_base = min_bus << 8,
+    idmap.id_count = (max_bus - min_bus + 1) << 8,
+    g_array_append_val(sdev.rc_smmu_idmaps, idmap);
+    g_array_append_val(sdev_blob, sdev);
+    return 0;
+}
+
+/*
+ * Populate the struct AcpiIortSMMUv3Dev for all SMMUv3 devices and
+ * return the total number of idmaps.
+ */
+static int populate_smmuv3_dev(GArray *sdev_blob)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   iort_smmuv3_devices, sdev_blob);
+    /* Sort the smmuv3 devices(if any) by smmu idmap input_base */
+    g_array_sort(sdev_blob, smmuv3_dev_idmap_compare);
+    /*
+     * Since each SMMUv3 dev is assocaited with specific host bridge,
+     * total number of idmaps equals to total number of smmuv3 devices.
+     */
+    return sdev_blob->len;
+}
+
+/* Compute ID ranges (RIDs) from RC that are directed to the ITS Group node */
+static void create_rc_its_idmaps(GArray *its_idmaps, GArray *smmuv3_devs)
+{
+    AcpiIortIdMapping *idmap;
+    AcpiIortIdMapping next_range = {0};
+    AcpiIortSMMUv3Dev *sdev;
+
+    for (int i = 0; i < smmuv3_devs->len; i++) {
+        sdev = &g_array_index(smmuv3_devs, AcpiIortSMMUv3Dev, i);
+        /*
+         * Based on the RID ranges that are directed to the SMMU, determine the
+         * bypassed RID ranges, i.e., the ones that are directed to the ITS
+         * Group node and do not pass through the SMMU, by subtracting the
+         * SMMU-bound ranges from the full RID range (0x0000–0xFFFF).
+         */
+         for (int j = 0; j < sdev->rc_smmu_idmaps->len; j++) {
+            idmap = &g_array_index(sdev->rc_smmu_idmaps, AcpiIortIdMapping, j);
+
+            if (next_range.input_base < idmap->input_base) {
+                next_range.id_count = idmap->input_base - next_range.input_base;
+                g_array_append_val(its_idmaps, next_range);
+            }
+
+            next_range.input_base = idmap->input_base + idmap->id_count;
+        }
+    }
     /*
      * Append the last RC -> ITS ID mapping.
      *
@@ -341,7 +439,6 @@ static void create_rc_its_idmaps(GArray *its_idmaps, GArray *smmu_idmaps)
     }
 }
 
-
 /*
  * Input Output Remapping Table (IORT)
  * Conforms to "IO Remapping Table System Software on ARM Platforms",
@@ -351,9 +448,12 @@ static void
 build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
     int i, nb_nodes, rc_mapping_count;
-    size_t node_size, smmu_offset = 0;
+    AcpiIortSMMUv3Dev *sdev;
+    size_t node_size;
+    int num_smmus = 0;
     uint32_t id = 0;
-    GArray *rc_smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
+    int rc_smmu_idmaps_len = 0;
+    GArray *smmuv3_devs = g_array_new(false, true, sizeof(AcpiIortSMMUv3Dev));
     GArray *rc_its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
 
     AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id,
@@ -361,22 +461,23 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
     /* Table 2 The IORT */
     acpi_table_begin(&table, table_data);
 
-    if (vms->iommu == VIRT_IOMMU_SMMUV3) {
-        object_child_foreach_recursive(object_get_root(),
-                                       iort_host_bridges, rc_smmu_idmaps);
-
-        /* Sort the smmu idmap by input_base */
-        g_array_sort(rc_smmu_idmaps, iort_idmap_compare);
+    if (vms->legacy_smmuv3_present) {
+        rc_smmu_idmaps_len = populate_smmuv3_legacy_dev(smmuv3_devs);
+    } else {
+        rc_smmu_idmaps_len = populate_smmuv3_dev(smmuv3_devs);
+    }
 
-        nb_nodes = 2; /* RC and SMMUv3 */
-        rc_mapping_count = rc_smmu_idmaps->len;
+    num_smmus = smmuv3_devs->len;
+    if (num_smmus) {
+        nb_nodes = num_smmus + 1; /* RC and SMMUv3 */
+        rc_mapping_count = rc_smmu_idmaps_len;
 
         if (vms->its) {
             /*
              * Knowing the ID ranges from the RC to the SMMU, it's possible to
              * determine the ID ranges from RC that go directly to ITS.
              */
-            create_rc_its_idmaps(rc_its_idmaps, rc_smmu_idmaps);
+            create_rc_its_idmaps(rc_its_idmaps, smmuv3_devs);
 
             nb_nodes++; /* ITS */
             rc_mapping_count += rc_its_idmaps->len;
@@ -411,9 +512,10 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
         build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4);
     }
 
-    if (vms->iommu == VIRT_IOMMU_SMMUV3) {
-        int irq =  vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
+    for (i = 0; i < num_smmus; i++) {
+        sdev = &g_array_index(smmuv3_devs, AcpiIortSMMUv3Dev, i);
         int smmu_mapping_count, offset_to_id_array;
+        int irq = sdev->irq;
 
         if (vms->its) {
             smmu_mapping_count = 1; /* ITS Group node */
@@ -422,7 +524,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
             smmu_mapping_count = 0; /* No ID mappings */
             offset_to_id_array = 0; /* No ID mappings array */
         }
-        smmu_offset = table_data->len - table.table_offset;
+        sdev->offset = table_data->len - table.table_offset;
         /* Table 9 SMMUv3 Format */
         build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */
         node_size =  SMMU_V3_ENTRY_SIZE +
@@ -435,7 +537,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
         /* Reference to ID Array */
         build_append_int_noprefix(table_data, offset_to_id_array, 4);
         /* Base address */
-        build_append_int_noprefix(table_data, vms->memmap[VIRT_SMMU].base, 8);
+        build_append_int_noprefix(table_data, sdev->base, 8);
         /* Flags */
         build_append_int_noprefix(table_data, 1 /* COHACC Override */, 4);
         build_append_int_noprefix(table_data, 0, 4); /* Reserved */
@@ -486,21 +588,26 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
     build_append_int_noprefix(table_data, 0, 3); /* Reserved */
 
     /* Output Reference */
-    if (vms->iommu == VIRT_IOMMU_SMMUV3) {
+    if (num_smmus) {
         AcpiIortIdMapping *range;
 
-        /*
-         * Map RIDs (input) from RC to SMMUv3 nodes: RC -> SMMUv3.
-         *
-         * N.B.: The mapping from SMMUv3 to ITS Group node (SMMUv3 -> ITS) is
-         * defined in the SMMUv3 table, where all SMMUv3 IDs are mapped to the
-         * ITS Group node, if ITS is available.
-         */
-        for (i = 0; i < rc_smmu_idmaps->len; i++) {
-            range = &g_array_index(rc_smmu_idmaps, AcpiIortIdMapping, i);
-            /* Output IORT node is the SMMUv3 node. */
-            build_iort_id_mapping(table_data, range->input_base,
-                                  range->id_count, smmu_offset);
+        for (i = 0; i < num_smmus; i++) {
+            sdev = &g_array_index(smmuv3_devs, AcpiIortSMMUv3Dev, i);
+
+            /*
+             * Map RIDs (input) from RC to SMMUv3 nodes: RC -> SMMUv3.
+             *
+             * N.B.: The mapping from SMMUv3 to ITS Group node (SMMUv3 -> ITS)
+             * is defined in the SMMUv3 table, where all SMMUv3 IDs are mapped
+             * to the ITS Group node, if ITS is available.
+             */
+             for (int j = 0; j < sdev->rc_smmu_idmaps->len; j++) {
+                range = &g_array_index(sdev->rc_smmu_idmaps,
+                                       AcpiIortIdMapping, j);
+                /* Output IORT node is the SMMUv3 node. */
+                build_iort_id_mapping(table_data, range->input_base,
+                                      range->id_count, sdev->offset);
+            }
         }
 
         if (vms->its) {
@@ -525,8 +632,12 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
     }
 
     acpi_table_end(linker, &table);
-    g_array_free(rc_smmu_idmaps, true);
     g_array_free(rc_its_idmaps, true);
+    for (i = 0; i < num_smmus; i++) {
+        sdev = &g_array_index(smmuv3_devs, AcpiIortSMMUv3Dev, i);
+        g_array_free(sdev->rc_smmu_idmaps, true);
+    }
+    g_array_free(smmuv3_devs, true);
 }
 
 /*
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6f01746e74..02209fadcf 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -55,6 +55,7 @@
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/pci-host/gpex.h"
 #include "hw/pci-bridge/pci_expander_bridge.h"
 #include "hw/virtio/virtio-pci.h"
@@ -149,6 +150,9 @@ static void arm_virt_compat_set(MachineClass *mc)
 #define LEGACY_RAMLIMIT_GB 255
 #define LEGACY_RAMLIMIT_BYTES (LEGACY_RAMLIMIT_GB * GiB)
 
+/* MMIO region size for SMMUv3 */
+#define SMMU_IO_LEN 0x20000
+
 /* Addresses and sizes of our components.
  * 0..128MB is space for a flash device so we can run bootrom code such as UEFI.
  * 128MB..256MB is used for miscellaneous device I/O.
@@ -180,7 +184,7 @@ static const MemMapEntry base_memmap[] = {
     [VIRT_FW_CFG] =             { 0x09020000, 0x00000018 },
     [VIRT_GPIO] =               { 0x09030000, 0x00001000 },
     [VIRT_UART1] =              { 0x09040000, 0x00001000 },
-    [VIRT_SMMU] =               { 0x09050000, 0x00020000 },
+    [VIRT_SMMU] =               { 0x09050000, SMMU_IO_LEN },
     [VIRT_PCDIMM_ACPI] =        { 0x09070000, MEMORY_HOTPLUG_IO_LEN },
     [VIRT_ACPI_GED] =           { 0x09080000, ACPI_GED_EVT_SEL_LEN },
     [VIRT_NVDIMM_ACPI] =        { 0x09090000, NVDIMM_ACPI_IO_LEN},
@@ -1442,19 +1446,66 @@ static void create_pcie_irq_map(const MachineState *ms,
                            0x7           /* PCI irq */);
 }
 
+static void create_smmuv3_dt_bindings(const VirtMachineState *vms, hwaddr base,
+                                      hwaddr size, int irq)
+{
+    char *node;
+    const char compat[] = "arm,smmu-v3";
+    const char irq_names[] = "eventq\0priq\0cmdq-sync\0gerror";
+    MachineState *ms = MACHINE(vms);
+
+    node = g_strdup_printf("/smmuv3@%" PRIx64, base);
+    qemu_fdt_add_subnode(ms->fdt, node);
+    qemu_fdt_setprop(ms->fdt, node, "compatible", compat, sizeof(compat));
+    qemu_fdt_setprop_sized_cells(ms->fdt, node, "reg", 2, base, 2, size);
+
+    qemu_fdt_setprop_cells(ms->fdt, node, "interrupts",
+            GIC_FDT_IRQ_TYPE_SPI, irq    , GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
+            GIC_FDT_IRQ_TYPE_SPI, irq + 1, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
+            GIC_FDT_IRQ_TYPE_SPI, irq + 2, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
+            GIC_FDT_IRQ_TYPE_SPI, irq + 3, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
+
+    qemu_fdt_setprop(ms->fdt, node, "interrupt-names", irq_names,
+                     sizeof(irq_names));
+
+    qemu_fdt_setprop(ms->fdt, node, "dma-coherent", NULL, 0);
+    qemu_fdt_setprop_cell(ms->fdt, node, "#iommu-cells", 1);
+    qemu_fdt_setprop_cell(ms->fdt, node, "phandle", vms->iommu_phandle);
+    g_free(node);
+}
+
+static void create_smmuv3_dev_dtb(VirtMachineState *vms,
+                                  DeviceState *dev, PCIBus *bus)
+{
+    PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
+    SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
+    int irq = platform_bus_get_irqn(pbus, sbdev, 0);
+    hwaddr base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+    MachineState *ms = MACHINE(vms);
+
+    if (!(vms->bootinfo.firmware_loaded && virt_is_acpi_enabled(vms)) &&
+        strcmp("pcie.0", bus->qbus.name)) {
+        warn_report("SMMUv3 device only supported with pcie.0 for DT");
+        return;
+    }
+    base += vms->memmap[VIRT_PLATFORM_BUS].base;
+    irq += vms->irqmap[VIRT_PLATFORM_BUS];
+
+    vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt);
+    create_smmuv3_dt_bindings(vms, base, SMMU_IO_LEN, irq);
+    qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map",
+                           0x0, vms->iommu_phandle, 0x0, 0x10000);
+}
+
 static void create_smmu(const VirtMachineState *vms,
                         PCIBus *bus)
 {
     VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
-    char *node;
-    const char compat[] = "arm,smmu-v3";
     int irq =  vms->irqmap[VIRT_SMMU];
     int i;
     hwaddr base = vms->memmap[VIRT_SMMU].base;
     hwaddr size = vms->memmap[VIRT_SMMU].size;
-    const char irq_names[] = "eventq\0priq\0cmdq-sync\0gerror";
     DeviceState *dev;
-    MachineState *ms = MACHINE(vms);
 
     if (vms->iommu != VIRT_IOMMU_SMMUV3 || !vms->iommu_phandle) {
         return;
@@ -1473,27 +1524,7 @@ static void create_smmu(const VirtMachineState *vms,
         sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
                            qdev_get_gpio_in(vms->gic, irq + i));
     }
-
-    node = g_strdup_printf("/smmuv3@%" PRIx64, base);
-    qemu_fdt_add_subnode(ms->fdt, node);
-    qemu_fdt_setprop(ms->fdt, node, "compatible", compat, sizeof(compat));
-    qemu_fdt_setprop_sized_cells(ms->fdt, node, "reg", 2, base, 2, size);
-
-    qemu_fdt_setprop_cells(ms->fdt, node, "interrupts",
-            GIC_FDT_IRQ_TYPE_SPI, irq    , GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
-            GIC_FDT_IRQ_TYPE_SPI, irq + 1, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
-            GIC_FDT_IRQ_TYPE_SPI, irq + 2, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
-            GIC_FDT_IRQ_TYPE_SPI, irq + 3, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
-
-    qemu_fdt_setprop(ms->fdt, node, "interrupt-names", irq_names,
-                     sizeof(irq_names));
-
-    qemu_fdt_setprop(ms->fdt, node, "dma-coherent", NULL, 0);
-
-    qemu_fdt_setprop_cell(ms->fdt, node, "#iommu-cells", 1);
-
-    qemu_fdt_setprop_cell(ms->fdt, node, "phandle", vms->iommu_phandle);
-    g_free(node);
+    create_smmuv3_dt_bindings(vms, base, size, irq);
 }
 
 static void create_virtio_iommu_dt_bindings(VirtMachineState *vms)
@@ -1649,6 +1680,7 @@ static void create_pcie(VirtMachineState *vms)
                 qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map",
                                        0x0, vms->iommu_phandle, 0x0, 0x10000);
             }
+            vms->legacy_smmuv3_present = true;
             break;
         default:
             g_assert_not_reached();
@@ -2996,6 +3028,16 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
         qlist_append_str(reserved_regions, resv_prop_str);
         qdev_prop_set_array(dev, "reserved-regions", reserved_regions);
         g_free(resv_prop_str);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3)) {
+        if (vms->legacy_smmuv3_present || vms->iommu == VIRT_IOMMU_VIRTIO) {
+            error_setg(errp, "virt machine already has %s set. "
+                       "Doesn't support incompatible iommus",
+                       (vms->legacy_smmuv3_present) ?
+                       "iommu=smmuv3" : "virtio-iommu");
+        } else if (vms->iommu == VIRT_IOMMU_NONE) {
+            /* The new SMMUv3 device is specific to the PCI bus */
+            object_property_set_bool(OBJECT(dev), "smmu_per_bus", true, NULL);
+        }
     }
 }
 
@@ -3019,6 +3061,22 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
         virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
     }
 
+    if (object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3)) {
+        if (!vms->legacy_smmuv3_present && vms->platform_bus_dev) {
+            PCIBus *bus;
+
+            bus = PCI_BUS(object_property_get_link(OBJECT(dev), "primary-bus",
+                                                   &error_abort));
+            if (pci_bus_bypass_iommu(bus)) {
+                error_setg(errp, "Bypass option cannot be set for SMMUv3 "
+                           "associated PCIe RC");
+                return;
+            }
+
+            create_smmuv3_dev_dtb(vms, dev, bus);
+        }
+    }
+
     if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
         PCIDevice *pdev = PCI_DEVICE(dev);
 
@@ -3218,6 +3276,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data)
     mc->max_cpus = 512;
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_UEFI_VARS_SYSBUS);
+    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3);
 #ifdef CONFIG_TPM
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
 #endif
diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
index 07117363a6..59f1d17de1 100644
--- a/hw/core/sysbus-fdt.c
+++ b/hw/core/sysbus-fdt.c
@@ -31,6 +31,7 @@
 #include "qemu/error-report.h"
 #include "system/device_tree.h"
 #include "system/tpm.h"
+#include "hw/arm/smmuv3.h"
 #include "hw/platform-bus.h"
 #include "hw/display/ramfb.h"
 #include "hw/uefi/var-service-api.h"
@@ -135,6 +136,8 @@ static const BindingEntry bindings[] = {
 #ifdef CONFIG_TPM
     TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node),
 #endif
+    /* No generic DT support for smmuv3 dev. Support added for arm virt only */
+    TYPE_BINDING(TYPE_ARM_SMMUV3, no_fdt_node),
     TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
     TYPE_BINDING(TYPE_UEFI_VARS_SYSBUS, add_uefi_vars_node),
     TYPE_BINDING("", NULL), /* last element */
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index 3a29dfefc2..1bcceddbc4 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -34,7 +34,6 @@ typedef struct PXBBus PXBBus;
 DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS,
                          TYPE_PXB_BUS)
 
-#define TYPE_PXB_PCIE_BUS "pxb-pcie-bus"
 DECLARE_INSTANCE_CHECKER(PXBBus, PXB_PCIE_BUS,
                          TYPE_PXB_PCIE_BUS)
 
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 297196b242..c3df9d6656 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2912,6 +2912,19 @@ static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
             }
         }
 
+        /*
+         * When multiple PCI Express Root Buses are defined using pxb-pcie,
+         * the IOMMU configuration may be specific to each root bus. However,
+         * pxb-pcie acts as a special root complex whose parent is effectively
+         * the default root complex(pcie.0). Ensure that we retrieve the
+         * correct IOMMU ops(if any) in such cases.
+         */
+        if (pci_bus_is_express(iommu_bus) && pci_bus_is_root(iommu_bus)) {
+            if (parent_bus->iommu_per_bus) {
+                break;
+            }
+        }
+
         iommu_bus = parent_bus;
     }
 
@@ -3172,6 +3185,24 @@ void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
     bus->iommu_opaque = opaque;
 }
 
+/*
+ * Similar to pci_setup_iommu(), but sets iommu_per_bus to true,
+ * indicating that the IOMMU is specific to this bus. This is used by
+ * IOMMU implementations that are tied to a specific PCIe root complex.
+ *
+ * In QEMU, pxb-pcie behaves as a special root complex whose parent is
+ * effectively the default root complex (pcie.0). The iommu_per_bus
+ * is checked in pci_device_get_iommu_bus_devfn() to ensure the correct
+ * IOMMU ops are returned, avoiding the use of the parent’s IOMMU when
+ * it's not appropriate.
+ */
+void pci_setup_iommu_per_bus(PCIBus *bus, const PCIIOMMUOps *ops,
+                             void *opaque)
+{
+    pci_setup_iommu(bus, ops, opaque);
+    bus->iommu_per_bus = true;
+}
+
 static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
 {
     Range *range = opaque;
diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c
index 81cc09dcac..1df2454181 100644
--- a/hw/usb/dev-network.c
+++ b/hw/usb/dev-network.c
@@ -1383,7 +1383,7 @@ static void usb_net_realize(USBDevice *dev, Error **errp)
     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
     snprintf(s->usbstring_mac, sizeof(s->usbstring_mac),
              "%02x%02x%02x%02x%02x%02x",
-             0x40,
+             s->conf.macaddr.a[0],
              s->conf.macaddr.a[1],
              s->conf.macaddr.a[2],
              s->conf.macaddr.a[3],
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index e5e2d09294..80d0fecfde 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -161,6 +161,7 @@ struct SMMUState {
     QLIST_HEAD(, SMMUDevice) devices_with_notifiers;
     uint8_t bus_num;
     PCIBus *primary_bus;
+    bool smmu_per_bus; /* SMMU is specific to the primary_bus */
 };
 
 struct SMMUBaseClass {
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 365a28b082..ea2cff05b0 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -179,6 +179,7 @@ struct VirtMachineState {
     char *oem_table_id;
     bool ns_el2_virt_timer_irq;
     CXLState cxl_devices_state;
+    bool legacy_smmuv3_present;
 };
 
 #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 6b7d3ac8a3..6bccb25ac2 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -773,6 +773,8 @@ int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
  */
 void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque);
 
+void pci_setup_iommu_per_bus(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque);
+
 pcibus_t pci_bar_address(PCIDevice *d,
                          int reg, uint8_t type, pcibus_t size);
 
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index 8cdacbc4e1..a055fd8d32 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -104,6 +104,7 @@ typedef struct PXBPCIEDev {
     PXBDev parent_obj;
 } PXBPCIEDev;
 
+#define TYPE_PXB_PCIE_BUS "pxb-pcie-bus"
 #define TYPE_PXB_CXL_BUS "pxb-cxl-bus"
 #define TYPE_PXB_DEV "pxb"
 OBJECT_DECLARE_SIMPLE_TYPE(PXBDev, PXB_DEV)
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 2261312546..c738446788 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -35,6 +35,7 @@ struct PCIBus {
     enum PCIBusFlags flags;
     const PCIIOMMUOps *iommu_ops;
     void *iommu_opaque;
+    bool iommu_per_bus;
     uint8_t devfn_min;
     uint32_t slot_reserved_mask;
     pci_set_irq_fn set_irq;
diff --git a/include/system/system.h b/include/system/system.h
index a7effe7dfd..03a2d0e900 100644
--- a/include/system/system.h
+++ b/include/system/system.h
@@ -42,7 +42,6 @@ extern int graphic_height;
 extern int graphic_depth;
 extern int display_opengl;
 extern const char *keyboard_layout;
-extern int old_param;
 extern uint8_t *boot_splash_filedata;
 extern bool enable_cpu_pm;
 extern QEMUClockType rtc_clock;
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
index 4c4921152e..6060572eed 100644
--- a/linux-user/aarch64/cpu_loop.c
+++ b/linux-user/aarch64/cpu_loop.c
@@ -27,11 +27,132 @@
 #include "target/arm/syndrome.h"
 #include "target/arm/cpu-features.h"
 
+/* Use the exception syndrome to map a cpu exception to a signal. */
+static void signal_for_exception(CPUARMState *env, vaddr addr)
+{
+    uint32_t syn = env->exception.syndrome;
+    int si_code, si_signo;
+
+    /* Let signal delivery see that ESR is live. */
+    env->cp15.esr_el[1] = syn;
+
+    switch (syn_get_ec(syn)) {
+    case EC_DATAABORT:
+    case EC_INSNABORT:
+        /* Both EC have the same format for FSC, or close enough. */
+        switch (extract32(syn, 0, 6)) {
+        case 0x04 ... 0x07: /* Translation fault, level {0-3} */
+            si_signo = TARGET_SIGSEGV;
+            si_code = TARGET_SEGV_MAPERR;
+            break;
+        case 0x09 ... 0x0b: /* Access flag fault, level {1-3} */
+        case 0x0d ... 0x0f: /* Permission fault, level {1-3} */
+            si_signo = TARGET_SIGSEGV;
+            si_code = TARGET_SEGV_ACCERR;
+            break;
+        case 0x11: /* Synchronous Tag Check Fault */
+            si_signo = TARGET_SIGSEGV;
+            si_code = TARGET_SEGV_MTESERR;
+            break;
+        case 0x21: /* Alignment fault */
+            si_signo = TARGET_SIGBUS;
+            si_code = TARGET_BUS_ADRALN;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        break;
+
+    case EC_PCALIGNMENT:
+        si_signo = TARGET_SIGBUS;
+        si_code = TARGET_BUS_ADRALN;
+        break;
+
+    case EC_UNCATEGORIZED:         /* E.g. undefined instruction */
+    case EC_SYSTEMREGISTERTRAP:    /* E.g. inaccessible register */
+    case EC_SMETRAP:               /* E.g. invalid insn in streaming state */
+    case EC_BTITRAP:               /* E.g. invalid guarded branch target */
+    case EC_ILLEGALSTATE:
+        /*
+         * Illegal state happens via an ERET from a privileged mode,
+         * so is not normally possible from user-only.  However, gdbstub
+         * is not prevented from writing CPSR_IL, aka PSTATE.IL, which
+         * would generate a trap from the next translated block.
+         * In the kernel, default case -> el0_inv -> bad_el0_sync.
+         */
+        si_signo = TARGET_SIGILL;
+        si_code = TARGET_ILL_ILLOPC;
+        break;
+
+    case EC_PACFAIL:
+        si_signo = TARGET_SIGILL;
+        si_code = TARGET_ILL_ILLOPN;
+        break;
+
+    case EC_MOP:
+        /*
+         * FIXME: The kernel fixes up wrong-option exceptions.
+         * For QEMU linux-user mode, you can only get these if
+         * the process is doing something silly (not executing
+         * the MOPS instructions in the required P/M/E sequence),
+         * so it is not a problem in practice that we do not.
+         *
+         * We ought ideally to implement the same "rewind to the
+         * start of the sequence" logic that the kernel does in
+         * arm64_mops_reset_regs(). In the meantime, deliver
+         * the guest a SIGILL, with the same ILLOPN si_code
+         * we've always used for this.
+         */
+        si_signo = TARGET_SIGILL;
+        si_code = TARGET_ILL_ILLOPN;
+        break;
+
+    case EC_WFX_TRAP:              /* user-only WFI implemented as NOP */
+    case EC_CP15RTTRAP:            /* AArch32 */
+    case EC_CP15RRTTRAP:           /* AArch32 */
+    case EC_CP14RTTRAP:            /* AArch32 */
+    case EC_CP14DTTRAP:            /* AArch32 */
+    case EC_ADVSIMDFPACCESSTRAP:   /* user-only does not disable fpu */
+    case EC_FPIDTRAP:              /* AArch32 */
+    case EC_PACTRAP:               /* user-only does not disable pac regs */
+    case EC_BXJTRAP:               /* AArch32 */
+    case EC_CP14RRTTRAP:           /* AArch32 */
+    case EC_AA32_SVC:              /* AArch32 */
+    case EC_AA32_HVC:              /* AArch32 */
+    case EC_AA32_SMC:              /* AArch32 */
+    case EC_AA64_SVC:              /* generates EXCP_SWI */
+    case EC_AA64_HVC:              /* user-only generates EC_UNCATEGORIZED */
+    case EC_AA64_SMC:              /* user-only generates EC_UNCATEGORIZED */
+    case EC_SVEACCESSTRAP:         /* user-only does not disable sve */
+    case EC_ERETTRAP:              /* user-only generates EC_UNCATEGORIZED */
+    case EC_GPC:                   /* user-only has no EL3 gpc tables */
+    case EC_INSNABORT_SAME_EL:     /* el0 cannot trap to el0 */
+    case EC_DATAABORT_SAME_EL:     /* el0 cannot trap to el0 */
+    case EC_SPALIGNMENT:           /* sp alignment checks not implemented */
+    case EC_AA32_FPTRAP:           /* fp exceptions not implemented */
+    case EC_AA64_FPTRAP:           /* fp exceptions not implemented */
+    case EC_SERROR:                /* user-only does not have hw faults */
+    case EC_BREAKPOINT:            /* user-only does not have hw debug */
+    case EC_BREAKPOINT_SAME_EL:    /* user-only does not have hw debug */
+    case EC_SOFTWARESTEP:          /* user-only does not have hw debug */
+    case EC_SOFTWARESTEP_SAME_EL:  /* user-only does not have hw debug */
+    case EC_WATCHPOINT:            /* user-only does not have hw debug */
+    case EC_WATCHPOINT_SAME_EL:    /* user-only does not have hw debug */
+    case EC_AA32_BKPT:             /* AArch32 */
+    case EC_VECTORCATCH:           /* AArch32 */
+    case EC_AA64_BKPT:             /* generates EXCP_BKPT */
+    default:
+        g_assert_not_reached();
+    }
+
+    force_sig_fault(si_signo, si_code, addr);
+}
+
 /* AArch64 main loop */
 void cpu_loop(CPUARMState *env)
 {
     CPUState *cs = env_cpu(env);
-    int trapnr, ec, fsc, si_code, si_signo;
+    int trapnr;
     abi_long ret;
 
     for (;;) {
@@ -63,46 +184,11 @@ void cpu_loop(CPUARMState *env)
             /* just indicate that signals should be handled asap */
             break;
         case EXCP_UDEF:
-            force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->pc);
+            signal_for_exception(env, env->pc);
             break;
         case EXCP_PREFETCH_ABORT:
         case EXCP_DATA_ABORT:
-            ec = syn_get_ec(env->exception.syndrome);
-            switch (ec) {
-            case EC_DATAABORT:
-            case EC_INSNABORT:
-                /* Both EC have the same format for FSC, or close enough. */
-                fsc = extract32(env->exception.syndrome, 0, 6);
-                switch (fsc) {
-                case 0x04 ... 0x07: /* Translation fault, level {0-3} */
-                    si_signo = TARGET_SIGSEGV;
-                    si_code = TARGET_SEGV_MAPERR;
-                    break;
-                case 0x09 ... 0x0b: /* Access flag fault, level {1-3} */
-                case 0x0d ... 0x0f: /* Permission fault, level {1-3} */
-                    si_signo = TARGET_SIGSEGV;
-                    si_code = TARGET_SEGV_ACCERR;
-                    break;
-                case 0x11: /* Synchronous Tag Check Fault */
-                    si_signo = TARGET_SIGSEGV;
-                    si_code = TARGET_SEGV_MTESERR;
-                    break;
-                case 0x21: /* Alignment fault */
-                    si_signo = TARGET_SIGBUS;
-                    si_code = TARGET_BUS_ADRALN;
-                    break;
-                default:
-                    g_assert_not_reached();
-                }
-                break;
-            case EC_PCALIGNMENT:
-                si_signo = TARGET_SIGBUS;
-                si_code = TARGET_BUS_ADRALN;
-                break;
-            default:
-                g_assert_not_reached();
-            }
-            force_sig_fault(si_signo, si_code, env->exception.vaddress);
+            signal_for_exception(env, env->exception.vaddress);
             break;
         case EXCP_DEBUG:
         case EXCP_BKPT:
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 668353bbda..ef97be3ac7 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -65,6 +65,13 @@ struct target_fpsimd_context {
     uint64_t vregs[32 * 2]; /* really uint128_t vregs[32] */
 };
 
+#define TARGET_ESR_MAGIC    0x45535201
+
+struct target_esr_context {
+    struct target_aarch64_ctx head;
+    uint64_t esr;
+};
+
 #define TARGET_EXTRA_MAGIC  0x45585401
 
 struct target_extra_context {
@@ -201,6 +208,14 @@ static void target_setup_fpsimd_record(struct target_fpsimd_context *fpsimd,
     }
 }
 
+static void target_setup_esr_record(struct target_esr_context *ctx,
+                                    CPUARMState *env)
+{
+    __put_user(TARGET_ESR_MAGIC, &ctx->head.magic);
+    __put_user(sizeof(*ctx), &ctx->head.size);
+    __put_user(env->cp15.esr_el[1], &ctx->esr);
+}
+
 static void target_setup_extra_record(struct target_extra_context *extra,
                                       uint64_t datap, uint32_t extra_size)
 {
@@ -531,6 +546,9 @@ static int target_restore_sigframe(CPUARMState *env,
             fpsimd = (struct target_fpsimd_context *)ctx;
             break;
 
+        case TARGET_ESR_MAGIC:
+            break; /* ignore */
+
         case TARGET_SVE_MAGIC:
             if (sve || size < sizeof(struct target_sve_context)) {
                 goto err;
@@ -683,7 +701,7 @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
                                uc.tuc_mcontext.__reserved),
     };
     int fpsimd_ofs, fr_ofs, sve_ofs = 0, za_ofs = 0, tpidr2_ofs = 0;
-    int zt_ofs = 0;
+    int zt_ofs = 0, esr_ofs = 0;
     int sve_size = 0, za_size = 0, tpidr2_size = 0, zt_size = 0;
     struct target_rt_sigframe *frame;
     struct target_rt_frame_record *fr;
@@ -693,6 +711,15 @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
     fpsimd_ofs = alloc_sigframe_space(sizeof(struct target_fpsimd_context),
                                       &layout);
 
+    /*
+     * In user mode, ESR_EL1 is only set by cpu_loop while queueing the
+     * signal, and it's only valid for the one sync insn.
+     */
+    if (env->cp15.esr_el[1]) {
+        esr_ofs = alloc_sigframe_space(sizeof(struct target_esr_context),
+                                       &layout);
+    }
+
     /* SVE state needs saving only if it exists.  */
     if (cpu_isar_feature(aa64_sve, env_archcpu(env)) ||
         cpu_isar_feature(aa64_sme, env_archcpu(env))) {
@@ -754,6 +781,11 @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
 
     target_setup_general_frame(frame, env, set);
     target_setup_fpsimd_record((void *)frame + fpsimd_ofs, env);
+    if (esr_ofs) {
+        target_setup_esr_record((void *)frame + esr_ofs, env);
+        /* Leave ESR_EL1 clear while it's not relevant. */
+        env->cp15.esr_el[1] = 0;
+    }
     target_setup_end_record((void *)frame + layout.std_end_ofs);
     if (layout.extra_ofs) {
         target_setup_extra_record((void *)frame + layout.extra_ofs,
diff --git a/linux-user/arm/elfload.c b/linux-user/arm/elfload.c
index 308ed23fcb..b1a4db4466 100644
--- a/linux-user/arm/elfload.c
+++ b/linux-user/arm/elfload.c
@@ -76,7 +76,6 @@ abi_ulong get_elf_hwcap(CPUState *cs)
 
     /* EDSP is in v5TE and above, but all our v5 CPUs are v5TE */
     GET_FEATURE(ARM_FEATURE_V5, ARM_HWCAP_ARM_EDSP);
-    GET_FEATURE(ARM_FEATURE_IWMMXT, ARM_HWCAP_ARM_IWMMXT);
     GET_FEATURE(ARM_FEATURE_THUMB2EE, ARM_HWCAP_ARM_THUMBEE);
     GET_FEATURE(ARM_FEATURE_NEON, ARM_HWCAP_ARM_NEON);
     GET_FEATURE(ARM_FEATURE_V6K, ARM_HWCAP_ARM_TLS);
diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c
index 8db1c4b233..3b387cd6d7 100644
--- a/linux-user/arm/signal.c
+++ b/linux-user/arm/signal.c
@@ -76,21 +76,7 @@ struct target_vfp_sigframe {
     struct target_user_vfp_exc ufp_exc;
 } __attribute__((__aligned__(8)));
 
-struct target_iwmmxt_sigframe {
-    abi_ulong magic;
-    abi_ulong size;
-    uint64_t regs[16];
-    /* Note that not all the coprocessor control registers are stored here */
-    uint32_t wcssf;
-    uint32_t wcasf;
-    uint32_t wcgr0;
-    uint32_t wcgr1;
-    uint32_t wcgr2;
-    uint32_t wcgr3;
-} __attribute__((__aligned__(8)));
-
 #define TARGET_VFP_MAGIC 0x56465001
-#define TARGET_IWMMXT_MAGIC 0x12ef842a
 
 struct sigframe
 {
@@ -267,25 +253,6 @@ static abi_ulong *setup_sigframe_vfp(abi_ulong *regspace, CPUARMState *env)
     return (abi_ulong*)(vfpframe+1);
 }
 
-static abi_ulong *setup_sigframe_iwmmxt(abi_ulong *regspace, CPUARMState *env)
-{
-    int i;
-    struct target_iwmmxt_sigframe *iwmmxtframe;
-    iwmmxtframe = (struct target_iwmmxt_sigframe *)regspace;
-    __put_user(TARGET_IWMMXT_MAGIC, &iwmmxtframe->magic);
-    __put_user(sizeof(*iwmmxtframe), &iwmmxtframe->size);
-    for (i = 0; i < 16; i++) {
-        __put_user(env->iwmmxt.regs[i], &iwmmxtframe->regs[i]);
-    }
-    __put_user(env->vfp.xregs[ARM_IWMMXT_wCSSF], &iwmmxtframe->wcssf);
-    __put_user(env->vfp.xregs[ARM_IWMMXT_wCASF], &iwmmxtframe->wcssf);
-    __put_user(env->vfp.xregs[ARM_IWMMXT_wCGR0], &iwmmxtframe->wcgr0);
-    __put_user(env->vfp.xregs[ARM_IWMMXT_wCGR1], &iwmmxtframe->wcgr1);
-    __put_user(env->vfp.xregs[ARM_IWMMXT_wCGR2], &iwmmxtframe->wcgr2);
-    __put_user(env->vfp.xregs[ARM_IWMMXT_wCGR3], &iwmmxtframe->wcgr3);
-    return (abi_ulong*)(iwmmxtframe+1);
-}
-
 static void setup_sigframe(struct target_ucontext *uc,
                            target_sigset_t *set, CPUARMState *env)
 {
@@ -306,9 +273,6 @@ static void setup_sigframe(struct target_ucontext *uc,
     if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) {
         regspace = setup_sigframe_vfp(regspace, env);
     }
-    if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
-        regspace = setup_sigframe_iwmmxt(regspace, env);
-    }
 
     /* Write terminating magic word */
     __put_user(0, regspace);
@@ -435,31 +399,6 @@ static abi_ulong *restore_sigframe_vfp(CPUARMState *env, abi_ulong *regspace)
     return (abi_ulong*)(vfpframe + 1);
 }
 
-static abi_ulong *restore_sigframe_iwmmxt(CPUARMState *env,
-                                          abi_ulong *regspace)
-{
-    int i;
-    abi_ulong magic, sz;
-    struct target_iwmmxt_sigframe *iwmmxtframe;
-    iwmmxtframe = (struct target_iwmmxt_sigframe *)regspace;
-
-    __get_user(magic, &iwmmxtframe->magic);
-    __get_user(sz, &iwmmxtframe->size);
-    if (magic != TARGET_IWMMXT_MAGIC || sz != sizeof(*iwmmxtframe)) {
-        return 0;
-    }
-    for (i = 0; i < 16; i++) {
-        __get_user(env->iwmmxt.regs[i], &iwmmxtframe->regs[i]);
-    }
-    __get_user(env->vfp.xregs[ARM_IWMMXT_wCSSF], &iwmmxtframe->wcssf);
-    __get_user(env->vfp.xregs[ARM_IWMMXT_wCASF], &iwmmxtframe->wcssf);
-    __get_user(env->vfp.xregs[ARM_IWMMXT_wCGR0], &iwmmxtframe->wcgr0);
-    __get_user(env->vfp.xregs[ARM_IWMMXT_wCGR1], &iwmmxtframe->wcgr1);
-    __get_user(env->vfp.xregs[ARM_IWMMXT_wCGR2], &iwmmxtframe->wcgr2);
-    __get_user(env->vfp.xregs[ARM_IWMMXT_wCGR3], &iwmmxtframe->wcgr3);
-    return (abi_ulong*)(iwmmxtframe + 1);
-}
-
 static int do_sigframe_return(CPUARMState *env,
                               target_ulong context_addr,
                               struct target_ucontext *uc)
@@ -482,12 +421,6 @@ static int do_sigframe_return(CPUARMState *env,
             return 1;
         }
     }
-    if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
-        regspace = restore_sigframe_iwmmxt(env, regspace);
-        if (!regspace) {
-            return 1;
-        }
-    }
 
     target_restore_altstack(&uc->tuc_stack, env);
 
diff --git a/qemu-options.hx b/qemu-options.hx
index ab23f14d21..075f4be2e3 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1231,6 +1231,13 @@ SRST
     ``aw-bits=val`` (val between 32 and 64, default depends on machine)
         This decides the address width of the IOVA address space.
 
+``-device arm-smmuv3,primary-bus=id``
+    This is only supported by ``-machine virt`` (ARM).
+
+    ``primary-bus=id``
+        Accepts either the default root complex (pcie.0) or a
+        pxb-pcie based root complex.
+
 ERST
 
 DEF("name", HAS_ARG, QEMU_OPTION_name,
@@ -5347,13 +5354,6 @@ SRST
         specified, the former is passed to semihosting as it always
         takes precedence.
 ERST
-DEF("old-param", 0, QEMU_OPTION_old_param,
-    "-old-param      old param mode\n", QEMU_ARCH_ARM)
-SRST
-``-old-param``
-    Old param mode (ARM only).
-ERST
-
 DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
     "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
     "          [,spawn=allow|deny][,resourcecontrol=allow|deny]\n" \
diff --git a/scripts/userfaultfd-wrlat.py b/scripts/userfaultfd-wrlat.py
index 0684be4e04..a61a9abbfc 100755
--- a/scripts/userfaultfd-wrlat.py
+++ b/scripts/userfaultfd-wrlat.py
@@ -17,7 +17,6 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later.  See the COPYING file in the top-level directory.
 
-from __future__ import print_function
 from bcc import BPF
 from ctypes import c_ushort, c_int, c_ulonglong
 from time import sleep
diff --git a/system/globals.c b/system/globals.c
index 9640c9511e..98f9876d5d 100644
--- a/system/globals.c
+++ b/system/globals.c
@@ -52,7 +52,6 @@ bool vga_interface_created;
 Chardev *parallel_hds[MAX_PARALLEL_PORTS];
 QEMUOptionRom option_rom[MAX_OPTION_ROMS];
 int nb_option_roms;
-int old_param;
 const char *qemu_name;
 unsigned int nb_prom_envs;
 const char *prom_envs[MAX_PROM_ENVS];
diff --git a/system/vl.c b/system/vl.c
index 3b7057e6c6..00f3694725 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -3524,10 +3524,6 @@ void qemu_init(int argc, char **argv)
                 prom_envs[nb_prom_envs] = optarg;
                 nb_prom_envs++;
                 break;
-            case QEMU_OPTION_old_param:
-                warn_report("-old-param is deprecated");
-                old_param = 1;
-                break;
             case QEMU_OPTION_rtc:
                 opts = qemu_opts_parse_noisily(qemu_find_opts("rtc"), optarg,
                                                false);
diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
index c9506aa6d5..2a4826f5c4 100644
--- a/target/arm/cpregs.h
+++ b/target/arm/cpregs.h
@@ -408,10 +408,19 @@ FIELD(HFGRTR_EL2, ERXPFGCTL_EL1, 47, 1)
 FIELD(HFGRTR_EL2, ERXPFGCDN_EL1, 48, 1)
 FIELD(HFGRTR_EL2, ERXADDR_EL1, 49, 1)
 FIELD(HFGRTR_EL2, NACCDATA_EL1, 50, 1)
-/* 51-53: RES0 */
+/* 51: RES0 */
+FIELD(HFGRTR_EL2, NGCS_EL0, 52, 1)
+FIELD(HFGRTR_EL2, NGCS_EL1, 53, 1)
 FIELD(HFGRTR_EL2, NSMPRI_EL1, 54, 1)
 FIELD(HFGRTR_EL2, NTPIDR2_EL0, 55, 1)
-/* 56-63: RES0 */
+FIELD(HFGRTR_EL2, NRCWMASK_EL1, 56, 1)
+FIELD(HFGRTR_EL2, NPIRE0_EL1, 57, 1)
+FIELD(HFGRTR_EL2, NPIR_EL1, 58, 1)
+FIELD(HFGRTR_EL2, NPOR_EL0, 59, 1)
+FIELD(HFGRTR_EL2, NPOR_EL1, 60, 1)
+FIELD(HFGRTR_EL2, NS2POR_EL1, 61, 1)
+FIELD(HFGRTR_EL2, NMAIR2_EL1, 62, 1)
+FIELD(HFGRTR_EL2, NAMAIR2_EL1, 63, 1)
 
 /* These match HFGRTR but bits for RO registers are RES0 */
 FIELD(HFGWTR_EL2, AFSR0_EL1, 0, 1)
@@ -452,8 +461,18 @@ FIELD(HFGWTR_EL2, ERXPFGCTL_EL1, 47, 1)
 FIELD(HFGWTR_EL2, ERXPFGCDN_EL1, 48, 1)
 FIELD(HFGWTR_EL2, ERXADDR_EL1, 49, 1)
 FIELD(HFGWTR_EL2, NACCDATA_EL1, 50, 1)
+FIELD(HFGWTR_EL2, NGCS_EL0, 52, 1)
+FIELD(HFGWTR_EL2, NGCS_EL1, 53, 1)
 FIELD(HFGWTR_EL2, NSMPRI_EL1, 54, 1)
 FIELD(HFGWTR_EL2, NTPIDR2_EL0, 55, 1)
+FIELD(HFGWTR_EL2, NRCWMASK_EL1, 56, 1)
+FIELD(HFGWTR_EL2, NPIRE0_EL1, 57, 1)
+FIELD(HFGWTR_EL2, NPIR_EL1, 58, 1)
+FIELD(HFGWTR_EL2, NPOR_EL0, 59, 1)
+FIELD(HFGWTR_EL2, NPOR_EL1, 60, 1)
+FIELD(HFGWTR_EL2, NS2POR_EL1, 61, 1)
+FIELD(HFGWTR_EL2, NMAIR2_EL1, 62, 1)
+FIELD(HFGWTR_EL2, NAMAIR2_EL1, 63, 1)
 
 FIELD(HFGITR_EL2, ICIALLUIS, 0, 1)
 FIELD(HFGITR_EL2, ICIALLU, 1, 1)
@@ -512,6 +531,11 @@ FIELD(HFGITR_EL2, SVC_EL1, 53, 1)
 FIELD(HFGITR_EL2, DCCVAC, 54, 1)
 FIELD(HFGITR_EL2, NBRBINJ, 55, 1)
 FIELD(HFGITR_EL2, NBRBIALL, 56, 1)
+FIELD(HFGITR_EL2, NGCSPUSHM_EL1, 57, 1)
+FIELD(HFGITR_EL2, NGCSSTR_EL1, 58, 1)
+FIELD(HFGITR_EL2, NGCSEPP, 59, 1)
+FIELD(HFGITR_EL2, COSPRCTX, 60, 1)
+FIELD(HFGITR_EL2, ATS1E1A, 62, 1)
 
 FIELD(HDFGRTR_EL2, DBGBCRN_EL1, 0, 1)
 FIELD(HDFGRTR_EL2, DBGBVRN_EL1, 1, 1)
@@ -830,6 +854,7 @@ typedef enum FGTBit {
     DO_BIT(HFGITR, DVPRCTX),
     DO_BIT(HFGITR, CPPRCTX),
     DO_BIT(HFGITR, DCCVAC),
+    DO_BIT(HFGITR, ATS1E1A),
 } FGTBit;
 
 #undef DO_BIT
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index e49e0ae3af..512eeaf551 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -619,6 +619,11 @@ static inline bool isar_feature_aa64_lut(const ARMISARegisters *id)
     return FIELD_EX64_IDREG(id, ID_AA64ISAR2, LUT);
 }
 
+static inline bool isar_feature_aa64_ats1a(const ARMISARegisters *id)
+{
+    return FIELD_EX64_IDREG(id, ID_AA64ISAR2, ATS1A);
+}
+
 static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id)
 {
     /* We always set the AdvSIMD and FP fields identically.  */
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index d0f6fcdfce..02e2a31a86 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -247,10 +247,6 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
 
     cpu->power_state = cs->start_powered_off ? PSCI_OFF : PSCI_ON;
 
-    if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
-        env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q';
-    }
-
     if (arm_feature(env, ARM_FEATURE_AARCH64)) {
         /* 64 bit CPUs always start in 64 bit mode */
         env->aarch64 = true;
@@ -349,11 +345,6 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
     env->uncached_cpsr = ARM_CPU_MODE_USR;
     /* For user mode we must enable access to coprocessors */
     env->vfp.xregs[ARM_VFP_FPEXC] = 1 << 30;
-    if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
-        env->cp15.c15_cpar = 3;
-    } else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
-        env->cp15.c15_cpar = 1;
-    }
 #else
 
     /*
@@ -2259,14 +2250,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
     }
 
 
-    /*
-     * We rely on no XScale CPU having VFP so we can use the same bits in the
-     * TB flags field for VECSTRIDE and XSCALE_CPAR.
-     */
-    assert(arm_feature(env, ARM_FEATURE_AARCH64) ||
-           !cpu_isar_feature(aa32_vfp_simd, cpu) ||
-           !arm_feature(env, ARM_FEATURE_XSCALE));
-
 #ifndef CONFIG_USER_ONLY
     {
         int pagebits;
@@ -2623,14 +2606,10 @@ static const Property arm_cpu_properties[] = {
 static const gchar *arm_gdb_arch_name(CPUState *cs)
 {
     ARMCPU *cpu = ARM_CPU(cs);
-    CPUARMState *env = &cpu->env;
 
     if (arm_gdbstub_is_aarch64(cpu)) {
         return "aarch64";
     }
-    if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
-        return "iwmmxt";
-    }
     return "arm";
 }
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index c15d79a106..1c0deb723d 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -341,7 +341,6 @@ typedef struct CPUArchState {
         uint64_t vsctlr; /* Virtualization System control register. */
         uint64_t cpacr_el1; /* Architectural feature access control register */
         uint64_t cptr_el[4];  /* ARMv8 feature trap registers */
-        uint32_t c1_xscaleauxcr; /* XScale auxiliary control register.  */
         uint64_t sder; /* Secure debug enable register. */
         uint32_t nsacr; /* Non-secure access control register. */
         union { /* MMU translation table base 0. */
@@ -513,7 +512,6 @@ typedef struct CPUArchState {
         uint64_t cntvoff_el2; /* Counter Virtual Offset register */
         uint64_t cntpoff_el2; /* Counter Physical Offset register */
         ARMGenericTimer c14_timer[NUM_GTIMERS];
-        uint32_t c15_cpar; /* XScale Coprocessor Access Register */
         uint32_t c15_ticonfig; /* TI925T configuration byte.  */
         uint32_t c15_i_max; /* Maximum D-cache dirty line index.  */
         uint32_t c15_i_min; /* Minimum D-cache dirty line index.  */
@@ -699,14 +697,6 @@ typedef struct CPUArchState {
      */
     uint64_t exclusive_high;
 
-    /* iwMMXt coprocessor state.  */
-    struct {
-        uint64_t regs[16];
-        uint64_t val;
-
-        uint32_t cregs[16];
-    } iwmmxt;
-
     struct {
         ARMPACKey apia;
         ARMPACKey apib;
@@ -935,6 +925,7 @@ struct ArchCPU {
 
     DynamicGDBFeatureInfo dyn_sysreg_feature;
     DynamicGDBFeatureInfo dyn_svereg_feature;
+    DynamicGDBFeatureInfo dyn_smereg_feature;
     DynamicGDBFeatureInfo dyn_m_systemreg_feature;
     DynamicGDBFeatureInfo dyn_m_secextreg_feature;
 
@@ -1865,16 +1856,6 @@ enum arm_cpu_mode {
 /* QEMU-internal value meaning "FPSCR, but we care only about NZCV" */
 #define QEMU_VFP_FPSCR_NZCV 0xffff
 
-/* iwMMXt coprocessor control registers.  */
-#define ARM_IWMMXT_wCID  0
-#define ARM_IWMMXT_wCon  1
-#define ARM_IWMMXT_wCSSF 2
-#define ARM_IWMMXT_wCASF 3
-#define ARM_IWMMXT_wCGR0 8
-#define ARM_IWMMXT_wCGR1 9
-#define ARM_IWMMXT_wCGR2 10
-#define ARM_IWMMXT_wCGR3 11
-
 /* V7M CCR bits */
 FIELD(V7M_CCR, NONBASETHRDENA, 0, 1)
 FIELD(V7M_CCR, USERSETMPEND, 1, 1)
@@ -2444,8 +2425,6 @@ QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK);
  */
 enum arm_features {
     ARM_FEATURE_AUXCR,  /* ARM1026 Auxiliary control register.  */
-    ARM_FEATURE_XSCALE, /* Intel XScale extensions.  */
-    ARM_FEATURE_IWMMXT, /* Intel iwMMXt extension.  */
     ARM_FEATURE_V6,
     ARM_FEATURE_V6K,
     ARM_FEATURE_V7,
@@ -3025,13 +3004,6 @@ FIELD(TBFLAG_AM32, THUMB, 23, 1)         /* Not cached. */
  */
 FIELD(TBFLAG_A32, VECLEN, 0, 3)         /* Not cached. */
 FIELD(TBFLAG_A32, VECSTRIDE, 3, 2)     /* Not cached. */
-/*
- * We store the bottom two bits of the CPAR as TB flags and handle
- * checks on the other bits at runtime. This shares the same bits as
- * VECSTRIDE, which is OK as no XScale CPU has VFP.
- * Not cached, because VECLEN+VECSTRIDE are not cached.
- */
-FIELD(TBFLAG_A32, XSCALE_CPAR, 5, 2)
 FIELD(TBFLAG_A32, VFPEN, 7, 1)         /* Partially cached, minus FPEXC. */
 FIELD(TBFLAG_A32, SCTLR__B, 8, 1)      /* Cannot overlap with SCTLR_B */
 FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index ce4497ad7c..2d331fff44 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -527,7 +527,8 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
          * registers so we don't need to include both.
          */
 #ifdef TARGET_AARCH64
-        if (isar_feature_aa64_sve(&cpu->isar)) {
+        if (isar_feature_aa64_sve(&cpu->isar) ||
+            isar_feature_aa64_sme(&cpu->isar)) {
             GDBFeature *feature = arm_gen_dynamic_svereg_feature(cs, cs->gdb_num_regs);
             gdb_register_coprocessor(cs, aarch64_gdb_get_sve_reg,
                                      aarch64_gdb_set_sve_reg, feature, 0);
@@ -537,6 +538,13 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
                                      gdb_find_static_feature("aarch64-fpu.xml"),
                                      0);
         }
+
+        if (isar_feature_aa64_sme(&cpu->isar)) {
+            GDBFeature *sme_feature =
+                arm_gen_dynamic_smereg_feature(cs, cs->gdb_num_regs);
+            gdb_register_coprocessor(cs, aarch64_gdb_get_sme_reg,
+                                     aarch64_gdb_set_sme_reg, sme_feature, 0);
+        }
         /*
          * Note that we report pauth information via the feature name
          * org.gnu.gdb.aarch64.pauth_v2, not org.gnu.gdb.aarch64.pauth.
diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c
index 08e2858539..3bccde2bf2 100644
--- a/target/arm/gdbstub64.c
+++ b/target/arm/gdbstub64.c
@@ -249,6 +249,90 @@ int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg)
     return 0;
 }
 
+int aarch64_gdb_get_sme_reg(CPUState *cs, GByteArray *buf, int reg)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+
+    switch (reg) {
+    case 0: /* svg register */
+    {
+        int vq = 0;
+        if (FIELD_EX64(env->svcr, SVCR, SM)) {
+            vq = sve_vqm1_for_el_sm(env, arm_current_el(env),
+                                    FIELD_EX64(env->svcr, SVCR, SM)) + 1;
+        }
+        /* svg = vector granules (2 * vector quardwords) in streaming mode */
+        return gdb_get_reg64(buf, vq * 2);
+    }
+    case 1: /* svcr register */
+        return gdb_get_reg64(buf, env->svcr);
+    case 2: /* za register */
+    {
+        int len = 0;
+        int vq = cpu->sme_max_vq;
+        int svl = vq * 16;
+        for (int i = 0; i < svl; i++) {
+            for (int q = 0; q < vq; q++) {
+                len += gdb_get_reg128(buf,
+                                      env->za_state.za[i].d[q * 2 + 1],
+                                      env->za_state.za[i].d[q * 2]);
+            }
+        }
+        return len;
+    }
+    default:
+        /* gdbstub asked for something out of range */
+        qemu_log_mask(LOG_UNIMP, "%s: out of range register %d", __func__, reg);
+        break;
+    }
+
+    return 0;
+}
+
+int aarch64_gdb_set_sme_reg(CPUState *cs, uint8_t *buf, int reg)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+
+    switch (reg) {
+    case 0: /* svg register */
+        /* cannot set svg via gdbstub */
+        return 8;
+    case 1: /* svcr register */
+        aarch64_set_svcr(env, ldq_le_p(buf),
+                         R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
+        return 8;
+    case 2: /* za register */
+    {
+        int len = 0;
+        int vq = cpu->sme_max_vq;
+        int svl = vq * 16;
+        for (int i = 0; i < svl; i++) {
+            for (int q = 0; q < vq; q++) {
+                if (target_big_endian()) {
+                    env->za_state.za[i].d[q * 2 + 1] = ldq_p(buf);
+                    buf += 8;
+                    env->za_state.za[i].d[q * 2] = ldq_p(buf);
+                } else{
+                    env->za_state.za[i].d[q * 2] = ldq_p(buf);
+                    buf += 8;
+                    env->za_state.za[i].d[q * 2 + 1] = ldq_p(buf);
+                }
+                buf += 8;
+                len += 16;
+            }
+        }
+        return len;
+    }
+    default:
+        /* gdbstub asked for something out of range */
+        break;
+    }
+
+    return 0;
+}
+
 int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg)
 {
     ARMCPU *cpu = ARM_CPU(cs);
@@ -413,6 +497,41 @@ GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cs, int base_reg)
     return &cpu->dyn_svereg_feature.desc;
 }
 
+GDBFeature *arm_gen_dynamic_smereg_feature(CPUState *cs, int base_reg)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+    int vq = cpu->sme_max_vq;
+    int svl = vq * 16;
+    GDBFeatureBuilder builder;
+    int reg = 0;
+
+    gdb_feature_builder_init(&builder, &cpu->dyn_smereg_feature.desc,
+                             "org.gnu.gdb.aarch64.sme", "sme-registers.xml",
+                             base_reg);
+
+
+    /* Create the sme_bv vector type. */
+    gdb_feature_builder_append_tag(
+        &builder, "<vector id=\"sme_bv\" type=\"uint8\" count=\"%d\"/>",
+        svl);
+
+    /* Create the sme_bvv vector type. */
+    gdb_feature_builder_append_tag(
+        &builder, "<vector id=\"sme_bvv\" type=\"sme_bv\" count=\"%d\"/>",
+        svl);
+
+    /* Define the svg, svcr, and za registers. */
+
+    gdb_feature_builder_append_reg(&builder, "svg", 64, reg++, "int", NULL);
+    gdb_feature_builder_append_reg(&builder, "svcr", 64, reg++, "int", NULL);
+    gdb_feature_builder_append_reg(&builder, "za", svl * svl * 8, reg++,
+                                   "sme_bvv", NULL);
+
+    gdb_feature_builder_end(&builder);
+
+    return &cpu->dyn_smereg_feature.desc;
+}
+
 #ifdef CONFIG_USER_ONLY
 int aarch64_gdb_get_tag_ctl_reg(CPUState *cs, GByteArray *buf, int reg)
 {
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 19637e7301..fa8dfac299 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -2923,39 +2923,6 @@ static const ARMCPRegInfo omap_cp_reginfo[] = {
       .type = ARM_CP_CONST | ARM_CP_OVERRIDE, .resetvalue = 0 },
 };
 
-static void xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                              uint64_t value)
-{
-    env->cp15.c15_cpar = value & 0x3fff;
-}
-
-static const ARMCPRegInfo xscale_cp_reginfo[] = {
-    { .name = "XSCALE_CPAR",
-      .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0, .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c15_cpar), .resetvalue = 0,
-      .writefn = xscale_cpar_write, },
-    { .name = "XSCALE_AUXCR",
-      .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c1_xscaleauxcr),
-      .resetvalue = 0, },
-    /*
-     * XScale specific cache-lockdown: since we have no cache we NOP these
-     * and hope the guest does not really rely on cache behaviour.
-     */
-    { .name = "XSCALE_LOCK_ICACHE_LINE",
-      .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0,
-      .access = PL1_W, .type = ARM_CP_NOP },
-    { .name = "XSCALE_UNLOCK_ICACHE",
-      .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1,
-      .access = PL1_W, .type = ARM_CP_NOP },
-    { .name = "XSCALE_DCACHE_LOCK",
-      .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 0,
-      .access = PL1_RW, .type = ARM_CP_NOP },
-    { .name = "XSCALE_UNLOCK_DCACHE",
-      .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 1,
-      .access = PL1_W, .type = ARM_CP_NOP },
-};
-
 static const ARMCPRegInfo dummy_c15_cp_reginfo[] = {
     /*
      * RAZ/WI the whole crn=15 space, when we don't have a more specific
@@ -3346,16 +3313,6 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
     /* This may enable/disable the MMU, so do a TLB flush.  */
     tlb_flush(CPU(cpu));
-
-    if (tcg_enabled() && ri->type & ARM_CP_SUPPRESS_TB_END) {
-        /*
-         * Normally we would always end the TB on an SCTLR write; see the
-         * comment in ARMCPRegInfo sctlr initialization below for why Xscale
-         * is special.  Setting ARM_CP_SUPPRESS_TB_END also stops the rebuild
-         * of hflags from the translator, so do it here.
-         */
-        arm_rebuild_hflags(env);
-    }
 }
 
 static void mdcr_el3_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -4563,11 +4520,6 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu)
         { K(3, 0, 14, 1, 0), K(3, 4, 14, 1, 0), K(3, 5, 14, 1, 0),
           "CNTKCTL", "CNTHCTL_EL2", "CNTKCTL_EL12" },
 
-        /*
-         * Note that redirection of ZCR is mentioned in the description
-         * of ZCR_EL2, and aliasing in the description of ZCR_EL1, but
-         * not in the summary table.
-         */
         { K(3, 0,  1, 2, 0), K(3, 4,  1, 2, 0), K(3, 5, 1, 2, 0),
           "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve },
         { K(3, 0,  1, 2, 6), K(3, 4,  1, 2, 6), K(3, 5, 1, 2, 6),
@@ -6899,9 +6851,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
     if (arm_feature(env, ARM_FEATURE_STRONGARM)) {
         define_arm_cp_regs(cpu, strongarm_cp_reginfo);
     }
-    if (arm_feature(env, ARM_FEATURE_XSCALE)) {
-        define_arm_cp_regs(cpu, xscale_cp_reginfo);
-    }
     if (arm_feature(env, ARM_FEATURE_DUMMY_C15_REGS)) {
         define_arm_cp_regs(cpu, dummy_c15_cp_reginfo);
     }
@@ -7250,14 +7199,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
             .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr,
             .raw_writefn = raw_write,
         };
-        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
-            /*
-             * Normally we would always end the TB on an SCTLR write, but Linux
-             * arch/arm/mach-pxa/sleep.S expects two instructions following
-             * an MMU enable to execute from cache.  Imitate this behaviour.
-             */
-            sctlr.type |= ARM_CP_SUPPRESS_TB_END;
-        }
         define_one_arm_cp_reg(cpu, &sctlr);
 
         if (arm_feature(env, ARM_FEATURE_PMSA) &&
diff --git a/target/arm/internals.h b/target/arm/internals.h
index f5a1e75db3..532fabcafc 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1602,30 +1602,27 @@ bool get_phys_addr(CPUARMState *env, vaddr address,
     __attribute__((nonnull));
 
 /**
- * get_phys_addr_with_space_nogpc: get the physical address for a virtual
- *                                 address
+ * get_phys_addr_for_at:
  * @env: CPUARMState
  * @address: virtual address to get physical address for
- * @access_type: 0 for read, 1 for write, 2 for execute
- * @memop: memory operation feeding this access, or 0 for none
+ * @prot_check: PAGE_{READ,WRITE,EXEC}, or 0
  * @mmu_idx: MMU index indicating required translation regime
  * @space: security space for the access
  * @result: set on translation success.
  * @fi: set to fault info if the translation fails
  *
- * Similar to get_phys_addr, but use the given security space and don't perform
- * a Granule Protection Check on the resulting address.
- */
-bool get_phys_addr_with_space_nogpc(CPUARMState *env, vaddr address,
-                                    MMUAccessType access_type, MemOp memop,
-                                    ARMMMUIdx mmu_idx, ARMSecuritySpace space,
-                                    GetPhysAddrResult *result,
-                                    ARMMMUFaultInfo *fi)
+ * Similar to get_phys_addr, but for use by AccessType_AT, i.e.
+ * system instructions for address translation.
+ */
+bool get_phys_addr_for_at(CPUARMState *env, vaddr address, unsigned prot_check,
+                          ARMMMUIdx mmu_idx, ARMSecuritySpace space,
+                          GetPhysAddrResult *result, ARMMMUFaultInfo *fi)
     __attribute__((nonnull));
 
 bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
-                       MMUAccessType access_type, ARMMMUIdx mmu_idx,
-                       bool is_secure, GetPhysAddrResult *result,
+                       MMUAccessType access_type, unsigned prot_check,
+                       ARMMMUIdx mmu_idx, bool is_secure,
+                       GetPhysAddrResult *result,
                        ARMMMUFaultInfo *fi, uint32_t *mregion);
 
 void arm_log_exception(CPUState *cs);
@@ -1820,8 +1817,11 @@ static inline uint64_t pmu_counter_mask(CPUARMState *env)
 }
 
 GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cpu, int base_reg);
+GDBFeature *arm_gen_dynamic_smereg_feature(CPUState *cpu, int base_reg);
 int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray *buf, int reg);
 int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg);
+int aarch64_gdb_get_sme_reg(CPUState *cs, GByteArray *buf, int reg);
+int aarch64_gdb_set_sme_reg(CPUState *cs, uint8_t *buf, int reg);
 int aarch64_gdb_get_fpu_reg(CPUState *cs, GByteArray *buf, int reg);
 int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg);
 int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg);
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 6672344855..c1ec6654ca 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -900,6 +900,58 @@ bool write_kvmstate_to_list(ARMCPU *cpu)
     return ok;
 }
 
+/* pretty-print a KVM register */
+#define CP_REG_ARM64_SYSREG_OP(_reg, _op)                       \
+    ((uint8_t)((_reg & CP_REG_ARM64_SYSREG_ ## _op ## _MASK) >> \
+               CP_REG_ARM64_SYSREG_ ## _op ## _SHIFT))
+
+static gchar *kvm_print_sve_register_name(uint64_t regidx)
+{
+    uint16_t sve_reg = regidx & 0x000000000000ffff;
+
+    if (regidx == KVM_REG_ARM64_SVE_VLS) {
+        return g_strdup_printf("SVE VLS");
+    }
+    /* zreg, preg, ffr */
+    switch (sve_reg & 0xfc00) {
+    case 0:
+        return g_strdup_printf("SVE zreg n:%d slice:%d",
+                               (sve_reg & 0x03e0) >> 5, sve_reg & 0x001f);
+    case 0x04:
+        return g_strdup_printf("SVE preg n:%d slice:%d",
+                               (sve_reg & 0x01e0) >> 5, sve_reg & 0x001f);
+    case 0x06:
+        return g_strdup_printf("SVE ffr slice:%d", sve_reg & 0x001f);
+    default:
+        return g_strdup_printf("SVE ???");
+    }
+}
+
+static gchar *kvm_print_register_name(uint64_t regidx)
+{
+        switch ((regidx & KVM_REG_ARM_COPROC_MASK)) {
+        case KVM_REG_ARM_CORE:
+            return g_strdup_printf("core reg %"PRIx64, regidx);
+        case KVM_REG_ARM_DEMUX:
+            return g_strdup_printf("demuxed reg %"PRIx64, regidx);
+        case KVM_REG_ARM64_SYSREG:
+            return g_strdup_printf("op0:%d op1:%d crn:%d crm:%d op2:%d",
+                                   CP_REG_ARM64_SYSREG_OP(regidx, OP0),
+                                   CP_REG_ARM64_SYSREG_OP(regidx, OP1),
+                                   CP_REG_ARM64_SYSREG_OP(regidx, CRN),
+                                   CP_REG_ARM64_SYSREG_OP(regidx, CRM),
+                                   CP_REG_ARM64_SYSREG_OP(regidx, OP2));
+        case KVM_REG_ARM_FW:
+            return g_strdup_printf("fw reg %d", (int)(regidx & 0xffff));
+        case KVM_REG_ARM64_SVE:
+            return kvm_print_sve_register_name(regidx);
+        case KVM_REG_ARM_FW_FEAT_BMAP:
+            return g_strdup_printf("fw feat reg %d", (int)(regidx & 0xffff));
+        default:
+            return g_strdup_printf("%"PRIx64, regidx);
+        }
+}
+
 bool write_list_to_kvmstate(ARMCPU *cpu, int level)
 {
     CPUState *cs = CPU(cpu);
@@ -927,11 +979,45 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level)
             g_assert_not_reached();
         }
         if (ret) {
+            gchar *reg_str = kvm_print_register_name(regidx);
+
             /* We might fail for "unknown register" and also for
              * "you tried to set a register which is constant with
              * a different value from what it actually contains".
              */
             ok = false;
+            switch (ret) {
+            case -ENOENT:
+                error_report("Could not set register %s: unknown to KVM",
+                             reg_str);
+                break;
+            case -EINVAL:
+                if ((regidx & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) {
+                    if (!kvm_get_one_reg(cs, regidx, &v32)) {
+                        error_report("Could not set register %s to %x (is %x)",
+                                     reg_str, (uint32_t)cpu->cpreg_values[i],
+                                     v32);
+                    } else {
+                        error_report("Could not set register %s to %x",
+                                     reg_str, (uint32_t)cpu->cpreg_values[i]);
+                    }
+                } else /* U64 */ {
+                    uint64_t v64;
+
+                    if (!kvm_get_one_reg(cs, regidx, &v64)) {
+                        error_report("Could not set register %s to %"PRIx64" (is %"PRIx64")",
+                                     reg_str, cpu->cpreg_values[i], v64);
+                    } else {
+                        error_report("Could not set register %s to %"PRIx64,
+                                     reg_str, cpu->cpreg_values[i]);
+                    }
+                }
+                break;
+            default:
+                error_report("Could not set register %s: %s",
+                             reg_str, strerror(-ret));
+            }
+            g_free(reg_str);
         }
     }
     return ok;
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 6986915bee..6666a0c50c 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -221,26 +221,6 @@ static const VMStateDescription vmstate_vfp = {
     }
 };
 
-static bool iwmmxt_needed(void *opaque)
-{
-    ARMCPU *cpu = opaque;
-    CPUARMState *env = &cpu->env;
-
-    return arm_feature(env, ARM_FEATURE_IWMMXT);
-}
-
-static const VMStateDescription vmstate_iwmmxt = {
-    .name = "cpu/iwmmxt",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = iwmmxt_needed,
-    .fields = (const VMStateField[]) {
-        VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16),
-        VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 /* The expression ARM_MAX_VQ - 2 is 0 for pure AArch32 build,
  * and ARMPredicateReg is actively empty.  This triggers errors
  * in the expansion of the VMSTATE macros.
@@ -1102,7 +1082,6 @@ const VMStateDescription vmstate_arm_cpu = {
     },
     .subsections = (const VMStateDescription * const []) {
         &vmstate_vfp,
-        &vmstate_iwmmxt,
         &vmstate_m,
         &vmstate_thumb2ee,
         /* pmsav7_rnr must come before pmsav7 so that we have the
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index ed5c728eab..6344971fa6 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -59,11 +59,23 @@ typedef struct S1Translate {
      */
     bool in_debug;
     /*
+     * in_at: is this AccessType_AT?
+     * This is also set for debug, because at heart that is also
+     * an address translation, and simplifies a test.
+     */
+    bool in_at;
+    /*
      * If this is stage 2 of a stage 1+2 page table walk, then this must
      * be true if stage 1 is an EL0 access; otherwise this is ignored.
      * Stage 2 is indicated by in_mmu_idx set to ARMMMUIdx_Stage2{,_S}.
      */
     bool in_s1_is_el0;
+    /*
+     * The set of PAGE_* bits to be use in the permission check.
+     * This is normally directly related to the access_type, but
+     * may be suppressed for debug or AT insns.
+     */
+    uint8_t in_prot_check;
     bool out_rw;
     bool out_be;
     ARMSecuritySpace out_space;
@@ -581,6 +593,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
             .in_ptw_idx = ptw_idx_for_stage_2(env, s2_mmu_idx),
             .in_space = s2_space,
             .in_debug = true,
+            .in_prot_check = PAGE_READ,
         };
         GetPhysAddrResult s2 = { };
 
@@ -1061,11 +1074,10 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw,
             ap = (desc >> (4 + ((address >> 9) & 6))) & 3;
             result->f.lg_page_size = 12;
             break;
-        case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */
+        case 3: /* 1k page, or ARMv6 "extended small (4k) page" */
             if (type == 1) {
-                /* ARMv6/XScale extended small page format */
-                if (arm_feature(env, ARM_FEATURE_XSCALE)
-                    || arm_feature(env, ARM_FEATURE_V6)) {
+                /* ARMv6 extended small page format */
+                if (arm_feature(env, ARM_FEATURE_V6)) {
                     phys_addr = (desc & 0xfffff000) | (address & 0xfff);
                     result->f.lg_page_size = 12;
                 } else {
@@ -1089,7 +1101,7 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw,
     }
     result->f.prot = ap_to_rw_prot(env, ptw->in_mmu_idx, ap, domain_prot);
     result->f.prot |= result->f.prot ? PAGE_EXEC : 0;
-    if (!(result->f.prot & (1 << access_type))) {
+    if (ptw->in_prot_check & ~result->f.prot) {
         /* Access permission fault.  */
         fi->type = ARMFault_Permission;
         goto do_fault;
@@ -1243,7 +1255,7 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw,
 
         result->f.prot = get_S1prot(env, mmu_idx, false, user_rw, prot_rw,
                                     xn, pxn, result->f.attrs.space, out_space);
-        if (!(result->f.prot & (1 << access_type))) {
+        if (ptw->in_prot_check & ~result->f.prot) {
             /* Access permission fault.  */
             fi->type = ARMFault_Permission;
             goto do_fault;
@@ -1922,7 +1934,12 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
     descaddr &= ~(hwaddr)(page_size - 1);
     descaddr |= (address & (page_size - 1));
 
-    if (likely(!ptw->in_debug)) {
+    /*
+     * For AccessType_AT, DB is not updated (AArch64.SetDirtyFlag),
+     * and it is IMPLEMENTATION DEFINED whether AF is updated
+     * (AArch64.SetAccessFlag; qemu chooses to not update).
+     */
+    if (likely(!ptw->in_at)) {
         /*
          * Access flag.
          * If HA is enabled, prepare to update the descriptor below.
@@ -2123,7 +2140,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
         result->f.tlb_fill_flags = 0;
     }
 
-    if (!(result->f.prot & (1 << access_type))) {
+    if (ptw->in_prot_check & ~result->f.prot) {
         fi->type = ARMFault_Permission;
         goto do_fault;
     }
@@ -2537,7 +2554,7 @@ static bool get_phys_addr_pmsav7(CPUARMState *env,
 
     fi->type = ARMFault_Permission;
     fi->level = 1;
-    return !(result->f.prot & (1 << access_type));
+    return (ptw->in_prot_check & ~result->f.prot) != 0;
 }
 
 static uint32_t *regime_rbar(CPUARMState *env, ARMMMUIdx mmu_idx,
@@ -2561,8 +2578,9 @@ static uint32_t *regime_rlar(CPUARMState *env, ARMMMUIdx mmu_idx,
 }
 
 bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
-                       MMUAccessType access_type, ARMMMUIdx mmu_idx,
-                       bool secure, GetPhysAddrResult *result,
+                       MMUAccessType access_type, unsigned prot_check,
+                       ARMMMUIdx mmu_idx, bool secure,
+                       GetPhysAddrResult *result,
                        ARMMMUFaultInfo *fi, uint32_t *mregion)
 {
     /*
@@ -2750,7 +2768,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
     if (arm_feature(env, ARM_FEATURE_M)) {
         fi->level = 1;
     }
-    return !(result->f.prot & (1 << access_type));
+    return (prot_check & ~result->f.prot) != 0;
 }
 
 static bool v8m_is_sau_exempt(CPUARMState *env,
@@ -2952,8 +2970,8 @@ static bool get_phys_addr_pmsav8(CPUARMState *env,
         }
     }
 
-    ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, secure,
-                            result, fi, NULL);
+    ret = pmsav8_mpu_lookup(env, address, access_type, ptw->in_prot_check,
+                            mmu_idx, secure, result, fi, NULL);
     if (sattrs.subpage) {
         result->f.lg_page_size = 0;
     }
@@ -3537,18 +3555,26 @@ static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw,
     return false;
 }
 
-bool get_phys_addr_with_space_nogpc(CPUARMState *env, vaddr address,
-                                    MMUAccessType access_type, MemOp memop,
-                                    ARMMMUIdx mmu_idx, ARMSecuritySpace space,
-                                    GetPhysAddrResult *result,
-                                    ARMMMUFaultInfo *fi)
+bool get_phys_addr_for_at(CPUARMState *env, vaddr address,
+                          unsigned prot_check, ARMMMUIdx mmu_idx,
+                          ARMSecuritySpace space, GetPhysAddrResult *result,
+                          ARMMMUFaultInfo *fi)
 {
     S1Translate ptw = {
         .in_mmu_idx = mmu_idx,
         .in_space = space,
+        .in_at = true,
+        .in_prot_check = prot_check,
     };
-    return get_phys_addr_nogpc(env, &ptw, address, access_type,
-                               memop, result, fi);
+    /*
+     * I_MXTJT: Granule protection checks are not performed on the final
+     * address of a successful translation.  This is a translation not a
+     * memory reference, so MMU_DATA_LOAD is arbitrary (the exact protection
+     * check is handled or bypassed by .in_prot_check) and "memop = MO_8"
+     * bypasses any alignment check.
+     */
+    return get_phys_addr_nogpc(env, &ptw, address,
+                               MMU_DATA_LOAD, MO_8, result, fi);
 }
 
 static ARMSecuritySpace
@@ -3624,6 +3650,7 @@ bool get_phys_addr(CPUARMState *env, vaddr address,
     S1Translate ptw = {
         .in_mmu_idx = mmu_idx,
         .in_space = arm_mmu_idx_to_security_space(env, mmu_idx),
+        .in_prot_check = 1 << access_type,
     };
 
     return get_phys_addr_gpc(env, &ptw, address, access_type,
@@ -3637,6 +3664,8 @@ static hwaddr arm_cpu_get_phys_page(CPUARMState *env, vaddr addr,
         .in_mmu_idx = mmu_idx,
         .in_space = arm_mmu_idx_to_security_space(env, mmu_idx),
         .in_debug = true,
+        .in_at = true,
+        .in_prot_check = 0,
     };
     GetPhysAddrResult res = {};
     ARMMMUFaultInfo fi = {};
diff --git a/target/arm/tcg/cpregs-at.c b/target/arm/tcg/cpregs-at.c
index 398a61d398..0e8f229aa7 100644
--- a/target/arm/tcg/cpregs-at.c
+++ b/target/arm/tcg/cpregs-at.c
@@ -24,22 +24,15 @@ static int par_el1_shareability(GetPhysAddrResult *res)
 }
 
 static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
-                             MMUAccessType access_type, ARMMMUIdx mmu_idx,
+                             unsigned prot_check, ARMMMUIdx mmu_idx,
                              ARMSecuritySpace ss)
 {
-    bool ret;
     uint64_t par64;
     bool format64 = false;
     ARMMMUFaultInfo fi = {};
     GetPhysAddrResult res = {};
-
-    /*
-     * I_MXTJT: Granule protection checks are not performed on the final
-     * address of a successful translation.  This is a translation not a
-     * memory reference, so "memop = none = 0".
-     */
-    ret = get_phys_addr_with_space_nogpc(env, value, access_type, 0,
-                                         mmu_idx, ss, &res, &fi);
+    bool ret = get_phys_addr_for_at(env, value, prot_check,
+                                    mmu_idx, ss, &res, &fi);
 
     /*
      * ATS operations only do S1 or S1+S2 translations, so we never
@@ -198,7 +191,7 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
 
 static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
-    MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD;
+    unsigned access_perm = ri->opc2 & 1 ? PAGE_WRITE : PAGE_READ;
     uint64_t par64;
     ARMMMUIdx mmu_idx;
     int el = arm_current_el(env);
@@ -260,7 +253,7 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
         g_assert_not_reached();
     }
 
-    par64 = do_ats_write(env, value, access_type, mmu_idx, ss);
+    par64 = do_ats_write(env, value, access_perm, mmu_idx, ss);
 
     A32_BANKED_CURRENT_REG_SET(env, par, par64);
 }
@@ -268,11 +261,11 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri,
                         uint64_t value)
 {
-    MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD;
+    unsigned access_perm = ri->opc2 & 1 ? PAGE_WRITE : PAGE_READ;
     uint64_t par64;
 
     /* There is no SecureEL2 for AArch32. */
-    par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2,
+    par64 = do_ats_write(env, value, access_perm, ARMMMUIdx_E2,
                          ARMSS_NonSecure);
 
     A32_BANKED_CURRENT_REG_SET(env, par, par64);
@@ -316,7 +309,7 @@ static CPAccessResult at_s1e01_access(CPUARMState *env, const ARMCPRegInfo *ri,
 static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri,
                         uint64_t value)
 {
-    MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD;
+    unsigned access_perm = ri->opc2 & 1 ? PAGE_WRITE : PAGE_READ;
     ARMMMUIdx mmu_idx;
     uint64_t hcr_el2 = arm_hcr_el2_eff(env);
     bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE);
@@ -359,7 +352,7 @@ static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri,
     }
 
     ss = for_el3 ? arm_security_space(env) : arm_security_space_below_el3(env);
-    env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx, ss);
+    env->cp15.par_el[1] = do_ats_write(env, value, access_perm, mmu_idx, ss);
 }
 
 static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -495,6 +488,47 @@ static const ARMCPRegInfo ats1cp_reginfo[] = {
       .writefn = ats_write },
 };
 
+static void ats_s1e1a(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+{
+    uint64_t hcr_el2 = arm_hcr_el2_eff(env);
+    bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE);
+    ARMMMUIdx mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_Stage1_E1;
+    ARMSecuritySpace ss = arm_security_space_below_el3(env);
+
+    env->cp15.par_el[1] = do_ats_write(env, value, 0, mmu_idx, ss);
+}
+
+static void ats_s1e2a(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+{
+    uint64_t hcr_el2 = arm_hcr_el2_eff(env);
+    ARMMMUIdx mmu_idx = hcr_el2 & HCR_E2H ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
+    ARMSecuritySpace ss = arm_security_space_below_el3(env);
+
+    env->cp15.par_el[1] = do_ats_write(env, value, 0, mmu_idx, ss);
+}
+
+static void ats_s1e3a(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+{
+    env->cp15.par_el[1] = do_ats_write(env, value, 0, ARMMMUIdx_E3,
+                                       arm_security_space(env));
+}
+
+static const ARMCPRegInfo ats1a_reginfo[] = {
+    { .name = "AT_S1E1A", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+      .fgt = FGT_ATS1E1A,
+      .accessfn = at_s1e01_access, .writefn = ats_s1e1a },
+    { .name = "AT_S1E2A", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 9, .opc2 = 2,
+      .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+      .accessfn = at_s1e2_access, .writefn = ats_s1e2a },
+    { .name = "AT_S1E3A", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 9, .opc2 = 2,
+      .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+      .writefn = ats_s1e3a },
+};
+
 void define_at_insn_regs(ARMCPU *cpu)
 {
     CPUARMState *env = &cpu->env;
@@ -516,4 +550,7 @@ void define_at_insn_regs(ARMCPU *cpu)
     if (cpu_isar_feature(aa32_ats1e1, cpu)) {
         define_arm_cp_regs(cpu, ats1cp_reginfo);
     }
+    if (cpu_isar_feature(aa64_ats1a, cpu)) {
+        define_arm_cp_regs(cpu, ats1a_reginfo);
+    }
 }
diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c
index a2a23eae0d..f0761410ad 100644
--- a/target/arm/tcg/cpu32.c
+++ b/target/arm/tcg/cpu32.c
@@ -807,144 +807,6 @@ static void sa1110_initfn(Object *obj)
     cpu->reset_sctlr = 0x00000070;
 }
 
-static void pxa250_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    cpu->midr = 0x69052100;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa255_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    cpu->midr = 0x69052d00;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa260_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    cpu->midr = 0x69052903;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa261_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    cpu->midr = 0x69052d05;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa262_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    cpu->midr = 0x69052d06;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270a0_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
-    cpu->midr = 0x69054110;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270a1_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
-    cpu->midr = 0x69054111;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270b0_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
-    cpu->midr = 0x69054112;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270b1_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
-    cpu->midr = 0x69054113;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270c0_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
-    cpu->midr = 0x69054114;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270c5_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "marvell,xscale";
-    set_feature(&cpu->env, ARM_FEATURE_V5);
-    set_feature(&cpu->env, ARM_FEATURE_XSCALE);
-    set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
-    cpu->midr = 0x69054117;
-    cpu->ctr = 0xd172172;
-    cpu->reset_sctlr = 0x00000078;
-}
-
 #ifndef TARGET_AARCH64
 /*
  * -cpu max: a CPU with as many features enabled as our emulation supports.
@@ -1032,31 +894,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
     { .name = "ti925t",      .initfn = ti925t_initfn },
     { .name = "sa1100",      .initfn = sa1100_initfn },
     { .name = "sa1110",      .initfn = sa1110_initfn },
-    { .name = "pxa250",      .initfn = pxa250_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    { .name = "pxa255",      .initfn = pxa255_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    { .name = "pxa260",      .initfn = pxa260_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    { .name = "pxa261",      .initfn = pxa261_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    { .name = "pxa262",      .initfn = pxa262_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    /* "pxa270" is an alias for "pxa270-a0" */
-    { .name = "pxa270",      .initfn = pxa270a0_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    { .name = "pxa270-a0",   .initfn = pxa270a0_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    { .name = "pxa270-a1",   .initfn = pxa270a1_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    { .name = "pxa270-b0",   .initfn = pxa270b0_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    { .name = "pxa270-b1",   .initfn = pxa270b1_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    { .name = "pxa270-c0",   .initfn = pxa270c0_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
-    { .name = "pxa270-c5",   .initfn = pxa270c5_initfn,
-      .deprecation_note = "iwMMXt CPUs are no longer supported", },
 #ifndef TARGET_AARCH64
     { .name = "max",         .initfn = arm_max_initfn },
 #endif
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index b8b1981e70..abef6a246e 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -1179,6 +1179,7 @@ void aarch64_max_tcg_initfn(Object *obj)
     t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1);       /* FEAT_HBC */
     t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2);     /* FEAT_WFxT */
     t = FIELD_DP64(t, ID_AA64ISAR2, CSSC, 1);     /* FEAT_CSSC */
+    t = FIELD_DP64(t, ID_AA64ISAR2, ATS1A, 1);    /* FEAT_ATS1A */
     SET_IDREG(isar, ID_AA64ISAR2, t);
 
     t = GET_IDREG(isar, ID_AA64PFR0);
diff --git a/target/arm/tcg/helper.h b/target/arm/tcg/helper.h
index 4da32db902..4636d1bc03 100644
--- a/target/arm/tcg/helper.h
+++ b/target/arm/tcg/helper.h
@@ -444,101 +444,6 @@ DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, fpst)
 DEF_HELPER_3(neon_acge_f64, i64, i64, i64, fpst)
 DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, fpst)
 
-/* iwmmxt_helper.c */
-DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_madduq, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_sadb, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_sadw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_mulslw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_mulshw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_mululw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_muluhw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_macsw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64)
-DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64)
-
-#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \
-DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \
-DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \
-DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \
-
-DEF_IWMMXT_HELPER_SIZE_ENV(unpackl)
-DEF_IWMMXT_HELPER_SIZE_ENV(unpackh)
-
-DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64)
-
-DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq)
-DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu)
-DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts)
-
-DEF_IWMMXT_HELPER_SIZE_ENV(mins)
-DEF_IWMMXT_HELPER_SIZE_ENV(minu)
-DEF_IWMMXT_HELPER_SIZE_ENV(maxs)
-DEF_IWMMXT_HELPER_SIZE_ENV(maxu)
-
-DEF_IWMMXT_HELPER_SIZE_ENV(subn)
-DEF_IWMMXT_HELPER_SIZE_ENV(addn)
-DEF_IWMMXT_HELPER_SIZE_ENV(subu)
-DEF_IWMMXT_HELPER_SIZE_ENV(addu)
-DEF_IWMMXT_HELPER_SIZE_ENV(subs)
-DEF_IWMMXT_HELPER_SIZE_ENV(adds)
-
-DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64)
-
-DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32)
-DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32)
-
-DEF_HELPER_1(iwmmxt_bcstb, i64, i32)
-DEF_HELPER_1(iwmmxt_bcstw, i64, i32)
-DEF_HELPER_1(iwmmxt_bcstl, i64, i32)
-
-DEF_HELPER_1(iwmmxt_addcb, i64, i64)
-DEF_HELPER_1(iwmmxt_addcw, i64, i64)
-DEF_HELPER_1(iwmmxt_addcl, i64, i64)
-
-DEF_HELPER_1(iwmmxt_msbb, i32, i64)
-DEF_HELPER_1(iwmmxt_msbw, i32, i64)
-DEF_HELPER_1(iwmmxt_msbl, i32, i64)
-
-DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32)
-
-DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64)
-
-DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32)
-DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32)
-DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32)
-
 DEF_HELPER_FLAGS_2(neon_unzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
 DEF_HELPER_FLAGS_2(neon_unzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
 DEF_HELPER_FLAGS_2(neon_qunzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index 59ab526375..01894226cc 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -624,16 +624,9 @@ TCGTBCPUState arm_get_tb_cpu_state(CPUState *cs)
                 DP_TBFLAG_M32(flags, MVE_NO_PRED, 1);
             }
         } else {
-            /*
-             * Note that XSCALE_CPAR shares bits with VECSTRIDE.
-             * Note that VECLEN+VECSTRIDE are RES0 for M-profile.
-             */
-            if (arm_feature(env, ARM_FEATURE_XSCALE)) {
-                DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar);
-            } else {
-                DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len);
-                DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride);
-            }
+            /* Note that VECLEN+VECSTRIDE are RES0 for M-profile. */
+            DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len);
+            DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride);
             if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
                 DP_TBFLAG_A32(flags, VFPEN, 1);
             }
diff --git a/target/arm/tcg/iwmmxt_helper.c b/target/arm/tcg/iwmmxt_helper.c
deleted file mode 100644
index ba054b6b4d..0000000000
--- a/target/arm/tcg/iwmmxt_helper.c
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * iwMMXt micro operations for XScale.
- *
- * Copyright (c) 2007 OpenedHand, Ltd.
- * Written by Andrzej Zaborowski <andrew@openedhand.com>
- * Copyright (c) 2008 CodeSourcery
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-
-#include "cpu.h"
-
-#define HELPER_H "tcg/helper.h"
-#include "exec/helper-proto.h.inc"
-
-/* iwMMXt macros extracted from GNU gdb.  */
-
-/* Set the SIMD wCASF flags for 8, 16, 32 or 64-bit operations.  */
-#define SIMD8_SET(v, n, b)      ((v != 0) << ((((b) + 1) * 4) + (n)))
-#define SIMD16_SET(v, n, h)     ((v != 0) << ((((h) + 1) * 8) + (n)))
-#define SIMD32_SET(v, n, w)     ((v != 0) << ((((w) + 1) * 16) + (n)))
-#define SIMD64_SET(v, n)        ((v != 0) << (32 + (n)))
-/* Flags to pass as "n" above.  */
-#define SIMD_NBIT       -1
-#define SIMD_ZBIT       -2
-#define SIMD_CBIT       -3
-#define SIMD_VBIT       -4
-/* Various status bit macros.  */
-#define NBIT8(x)        ((x) & 0x80)
-#define NBIT16(x)       ((x) & 0x8000)
-#define NBIT32(x)       ((x) & 0x80000000)
-#define NBIT64(x)       ((x) & 0x8000000000000000ULL)
-#define ZBIT8(x)        (((x) & 0xff) == 0)
-#define ZBIT16(x)       (((x) & 0xffff) == 0)
-#define ZBIT32(x)       (((x) & 0xffffffff) == 0)
-#define ZBIT64(x)       (x == 0)
-/* Sign extension macros.  */
-#define EXTEND8H(a)     ((uint16_t) (int8_t) (a))
-#define EXTEND8(a)      ((uint32_t) (int8_t) (a))
-#define EXTEND16(a)     ((uint32_t) (int16_t) (a))
-#define EXTEND16S(a)    ((int32_t) (int16_t) (a))
-#define EXTEND32(a)     ((uint64_t) (int32_t) (a))
-
-uint64_t HELPER(iwmmxt_maddsq)(uint64_t a, uint64_t b)
-{
-    a = ((
-            EXTEND16S((a >> 0) & 0xffff) * EXTEND16S((b >> 0) & 0xffff) +
-            EXTEND16S((a >> 16) & 0xffff) * EXTEND16S((b >> 16) & 0xffff)
-        ) & 0xffffffff) | ((uint64_t) (
-            EXTEND16S((a >> 32) & 0xffff) * EXTEND16S((b >> 32) & 0xffff) +
-            EXTEND16S((a >> 48) & 0xffff) * EXTEND16S((b >> 48) & 0xffff)
-        ) << 32);
-    return a;
-}
-
-uint64_t HELPER(iwmmxt_madduq)(uint64_t a, uint64_t b)
-{
-    a = ((
-            ((a >> 0) & 0xffff) * ((b >> 0) & 0xffff) +
-            ((a >> 16) & 0xffff) * ((b >> 16) & 0xffff)
-        ) & 0xffffffff) | ((
-            ((a >> 32) & 0xffff) * ((b >> 32) & 0xffff) +
-            ((a >> 48) & 0xffff) * ((b >> 48) & 0xffff)
-        ) << 32);
-    return a;
-}
-
-uint64_t HELPER(iwmmxt_sadb)(uint64_t a, uint64_t b)
-{
-#define abs(x) (((x) >= 0) ? x : -x)
-#define SADB(SHR) abs((int) ((a >> SHR) & 0xff) - (int) ((b >> SHR) & 0xff))
-    return
-        SADB(0) + SADB(8) + SADB(16) + SADB(24) +
-        SADB(32) + SADB(40) + SADB(48) + SADB(56);
-#undef SADB
-}
-
-uint64_t HELPER(iwmmxt_sadw)(uint64_t a, uint64_t b)
-{
-#define SADW(SHR) \
-    abs((int) ((a >> SHR) & 0xffff) - (int) ((b >> SHR) & 0xffff))
-    return SADW(0) + SADW(16) + SADW(32) + SADW(48);
-#undef SADW
-}
-
-uint64_t HELPER(iwmmxt_mulslw)(uint64_t a, uint64_t b)
-{
-#define MULS(SHR) ((uint64_t) ((( \
-        EXTEND16S((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff) \
-    ) >> 0) & 0xffff) << SHR)
-    return MULS(0) | MULS(16) | MULS(32) | MULS(48);
-#undef MULS
-}
-
-uint64_t HELPER(iwmmxt_mulshw)(uint64_t a, uint64_t b)
-{
-#define MULS(SHR) ((uint64_t) ((( \
-        EXTEND16S((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff) \
-    ) >> 16) & 0xffff) << SHR)
-    return MULS(0) | MULS(16) | MULS(32) | MULS(48);
-#undef MULS
-}
-
-uint64_t HELPER(iwmmxt_mululw)(uint64_t a, uint64_t b)
-{
-#define MULU(SHR) ((uint64_t) ((( \
-        ((a >> SHR) & 0xffff) * ((b >> SHR) & 0xffff) \
-    ) >> 0) & 0xffff) << SHR)
-    return MULU(0) | MULU(16) | MULU(32) | MULU(48);
-#undef MULU
-}
-
-uint64_t HELPER(iwmmxt_muluhw)(uint64_t a, uint64_t b)
-{
-#define MULU(SHR) ((uint64_t) ((( \
-        ((a >> SHR) & 0xffff) * ((b >> SHR) & 0xffff) \
-    ) >> 16) & 0xffff) << SHR)
-    return MULU(0) | MULU(16) | MULU(32) | MULU(48);
-#undef MULU
-}
-
-uint64_t HELPER(iwmmxt_macsw)(uint64_t a, uint64_t b)
-{
-#define MACS(SHR) ( \
-        EXTEND16((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff))
-    return (int64_t) (MACS(0) + MACS(16) + MACS(32) + MACS(48));
-#undef MACS
-}
-
-uint64_t HELPER(iwmmxt_macuw)(uint64_t a, uint64_t b)
-{
-#define MACU(SHR) ( \
-        (uint32_t) ((a >> SHR) & 0xffff) * \
-        (uint32_t) ((b >> SHR) & 0xffff))
-    return MACU(0) + MACU(16) + MACU(32) + MACU(48);
-#undef MACU
-}
-
-#define NZBIT8(x, i) \
-    SIMD8_SET(NBIT8((x) & 0xff), SIMD_NBIT, i) | \
-    SIMD8_SET(ZBIT8((x) & 0xff), SIMD_ZBIT, i)
-#define NZBIT16(x, i) \
-    SIMD16_SET(NBIT16((x) & 0xffff), SIMD_NBIT, i) | \
-    SIMD16_SET(ZBIT16((x) & 0xffff), SIMD_ZBIT, i)
-#define NZBIT32(x, i) \
-    SIMD32_SET(NBIT32((x) & 0xffffffff), SIMD_NBIT, i) | \
-    SIMD32_SET(ZBIT32((x) & 0xffffffff), SIMD_ZBIT, i)
-#define NZBIT64(x) \
-    SIMD64_SET(NBIT64(x), SIMD_NBIT) | \
-    SIMD64_SET(ZBIT64(x), SIMD_ZBIT)
-#define IWMMXT_OP_UNPACK(S, SH0, SH1, SH2, SH3)                         \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(CPUARMState *env, \
-                                                 uint64_t a, uint64_t b) \
-{                                                               \
-    a =                                                                 \
-        (((a >> SH0) & 0xff) << 0) | (((b >> SH0) & 0xff) << 8) |       \
-        (((a >> SH1) & 0xff) << 16) | (((b >> SH1) & 0xff) << 24) |     \
-        (((a >> SH2) & 0xff) << 32) | (((b >> SH2) & 0xff) << 40) |     \
-        (((a >> SH3) & 0xff) << 48) | (((b >> SH3) & 0xff) << 56);      \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
-        NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) |                         \
-        NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) |               \
-        NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) |               \
-        NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7);                \
-    return a;                                                   \
-}                                                               \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(CPUARMState *env, \
-                                        uint64_t a, uint64_t b) \
-{                                                               \
-    a =                                                                 \
-        (((a >> SH0) & 0xffff) << 0) |                          \
-        (((b >> SH0) & 0xffff) << 16) |                                 \
-        (((a >> SH2) & 0xffff) << 32) |                                 \
-        (((b >> SH2) & 0xffff) << 48);                          \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
-        NZBIT8(a >> 0, 0) | NZBIT8(a >> 16, 1) |                \
-        NZBIT8(a >> 32, 2) | NZBIT8(a >> 48, 3);                \
-    return a;                                                   \
-}                                                               \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(CPUARMState *env, \
-                                        uint64_t a, uint64_t b) \
-{                                                               \
-    a =                                                                 \
-        (((a >> SH0) & 0xffffffff) << 0) |                      \
-        (((b >> SH0) & 0xffffffff) << 32);                      \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
-        NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1);               \
-    return a;                                                   \
-}                                                               \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(CPUARMState *env, \
-                                                  uint64_t x)   \
-{                                                               \
-    x =                                                                 \
-        (((x >> SH0) & 0xff) << 0) |                            \
-        (((x >> SH1) & 0xff) << 16) |                           \
-        (((x >> SH2) & 0xff) << 32) |                           \
-        (((x >> SH3) & 0xff) << 48);                            \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
-        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |              \
-        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);              \
-    return x;                                                   \
-}                                                               \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(CPUARMState *env, \
-                                                  uint64_t x)   \
-{                                                               \
-    x =                                                                 \
-        (((x >> SH0) & 0xffff) << 0) |                          \
-        (((x >> SH2) & 0xffff) << 32);                          \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
-        NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);               \
-    return x;                                                   \
-}                                                               \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(CPUARMState *env, \
-                                                  uint64_t x)   \
-{                                                               \
-    x = (((x >> SH0) & 0xffffffff) << 0);                       \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0);      \
-    return x;                                                   \
-}                                                               \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(CPUARMState *env, \
-                                                  uint64_t x)   \
-{                                                               \
-    x =                                                                 \
-        ((uint64_t) EXTEND8H((x >> SH0) & 0xff) << 0) |                 \
-        ((uint64_t) EXTEND8H((x >> SH1) & 0xff) << 16) |        \
-        ((uint64_t) EXTEND8H((x >> SH2) & 0xff) << 32) |        \
-        ((uint64_t) EXTEND8H((x >> SH3) & 0xff) << 48);                 \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
-        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |              \
-        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);              \
-    return x;                                                   \
-}                                                               \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(CPUARMState *env, \
-                                                  uint64_t x)   \
-{                                                               \
-    x =                                                                 \
-        ((uint64_t) EXTEND16((x >> SH0) & 0xffff) << 0) |       \
-        ((uint64_t) EXTEND16((x >> SH2) & 0xffff) << 32);       \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
-        NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);               \
-    return x;                                                   \
-}                                                               \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(CPUARMState *env, \
-                                                  uint64_t x)   \
-{                                                               \
-    x = EXTEND32((x >> SH0) & 0xffffffff);                      \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0);      \
-    return x;                                                   \
-}
-IWMMXT_OP_UNPACK(l, 0, 8, 16, 24)
-IWMMXT_OP_UNPACK(h, 32, 40, 48, 56)
-
-#define IWMMXT_OP_CMP(SUFF, Tb, Tw, Tl, O)                      \
-uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(CPUARMState *env,    \
-                                        uint64_t a, uint64_t b) \
-{                                                               \
-    a =                                                                 \
-        CMP(0, Tb, O, 0xff) | CMP(8, Tb, O, 0xff) |             \
-        CMP(16, Tb, O, 0xff) | CMP(24, Tb, O, 0xff) |           \
-        CMP(32, Tb, O, 0xff) | CMP(40, Tb, O, 0xff) |           \
-        CMP(48, Tb, O, 0xff) | CMP(56, Tb, O, 0xff);            \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
-        NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) |                         \
-        NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) |               \
-        NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) |               \
-        NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7);                \
-    return a;                                                   \
-}                                                               \
-uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(CPUARMState *env,    \
-                                        uint64_t a, uint64_t b) \
-{                                                               \
-    a = CMP(0, Tw, O, 0xffff) | CMP(16, Tw, O, 0xffff) |        \
-        CMP(32, Tw, O, 0xffff) | CMP(48, Tw, O, 0xffff);        \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
-        NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) |              \
-        NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3);              \
-    return a;                                                   \
-}                                                               \
-uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(CPUARMState *env,    \
-                                        uint64_t a, uint64_t b) \
-{                                                               \
-    a = CMP(0, Tl, O, 0xffffffff) |                             \
-        CMP(32, Tl, O, 0xffffffff);                             \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
-        NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1);               \
-    return a;                                                   \
-}
-#define CMP(SHR, TYPE, OPER, MASK) ((((TYPE) ((a >> SHR) & MASK) OPER \
-            (TYPE) ((b >> SHR) & MASK)) ? (uint64_t) MASK : 0) << SHR)
-IWMMXT_OP_CMP(cmpeq, uint8_t, uint16_t, uint32_t, ==)
-IWMMXT_OP_CMP(cmpgts, int8_t, int16_t, int32_t, >)
-IWMMXT_OP_CMP(cmpgtu, uint8_t, uint16_t, uint32_t, >)
-#undef CMP
-#define CMP(SHR, TYPE, OPER, MASK) ((((TYPE) ((a >> SHR) & MASK) OPER \
-            (TYPE) ((b >> SHR) & MASK)) ? a : b) & ((uint64_t) MASK << SHR))
-IWMMXT_OP_CMP(mins, int8_t, int16_t, int32_t, <)
-IWMMXT_OP_CMP(minu, uint8_t, uint16_t, uint32_t, <)
-IWMMXT_OP_CMP(maxs, int8_t, int16_t, int32_t, >)
-IWMMXT_OP_CMP(maxu, uint8_t, uint16_t, uint32_t, >)
-#undef CMP
-#define CMP(SHR, TYPE, OPER, MASK) ((uint64_t) (((TYPE) ((a >> SHR) & MASK) \
-            OPER (TYPE) ((b >> SHR) & MASK)) & MASK) << SHR)
-IWMMXT_OP_CMP(subn, uint8_t, uint16_t, uint32_t, -)
-IWMMXT_OP_CMP(addn, uint8_t, uint16_t, uint32_t, +)
-#undef CMP
-/* TODO Signed- and Unsigned-Saturation */
-#define CMP(SHR, TYPE, OPER, MASK) ((uint64_t) (((TYPE) ((a >> SHR) & MASK) \
-            OPER (TYPE) ((b >> SHR) & MASK)) & MASK) << SHR)
-IWMMXT_OP_CMP(subu, uint8_t, uint16_t, uint32_t, -)
-IWMMXT_OP_CMP(addu, uint8_t, uint16_t, uint32_t, +)
-IWMMXT_OP_CMP(subs, int8_t, int16_t, int32_t, -)
-IWMMXT_OP_CMP(adds, int8_t, int16_t, int32_t, +)
-#undef CMP
-#undef IWMMXT_OP_CMP
-
-#define AVGB(SHR) ((( \
-        ((a >> SHR) & 0xff) + ((b >> SHR) & 0xff) + round) >> 1) << SHR)
-#define IWMMXT_OP_AVGB(r)                                                 \
-uint64_t HELPER(iwmmxt_avgb##r)(CPUARMState *env, uint64_t a, uint64_t b)    \
-{                                                                         \
-    const int round = r;                                                  \
-    a = AVGB(0) | AVGB(8) | AVGB(16) | AVGB(24) |                         \
-        AVGB(32) | AVGB(40) | AVGB(48) | AVGB(56);                        \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                                 \
-        SIMD8_SET(ZBIT8((a >> 0) & 0xff), SIMD_ZBIT, 0) |                 \
-        SIMD8_SET(ZBIT8((a >> 8) & 0xff), SIMD_ZBIT, 1) |                 \
-        SIMD8_SET(ZBIT8((a >> 16) & 0xff), SIMD_ZBIT, 2) |                \
-        SIMD8_SET(ZBIT8((a >> 24) & 0xff), SIMD_ZBIT, 3) |                \
-        SIMD8_SET(ZBIT8((a >> 32) & 0xff), SIMD_ZBIT, 4) |                \
-        SIMD8_SET(ZBIT8((a >> 40) & 0xff), SIMD_ZBIT, 5) |                \
-        SIMD8_SET(ZBIT8((a >> 48) & 0xff), SIMD_ZBIT, 6) |                \
-        SIMD8_SET(ZBIT8((a >> 56) & 0xff), SIMD_ZBIT, 7);                 \
-    return a;                                                             \
-}
-IWMMXT_OP_AVGB(0)
-IWMMXT_OP_AVGB(1)
-#undef IWMMXT_OP_AVGB
-#undef AVGB
-
-#define AVGW(SHR) ((( \
-        ((a >> SHR) & 0xffff) + ((b >> SHR) & 0xffff) + round) >> 1) << SHR)
-#define IWMMXT_OP_AVGW(r)                                               \
-uint64_t HELPER(iwmmxt_avgw##r)(CPUARMState *env, uint64_t a, uint64_t b)  \
-{                                                                       \
-    const int round = r;                                                \
-    a = AVGW(0) | AVGW(16) | AVGW(32) | AVGW(48);                       \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                               \
-        SIMD16_SET(ZBIT16((a >> 0) & 0xffff), SIMD_ZBIT, 0) |           \
-        SIMD16_SET(ZBIT16((a >> 16) & 0xffff), SIMD_ZBIT, 1) |          \
-        SIMD16_SET(ZBIT16((a >> 32) & 0xffff), SIMD_ZBIT, 2) |          \
-        SIMD16_SET(ZBIT16((a >> 48) & 0xffff), SIMD_ZBIT, 3);           \
-    return a;                                                           \
-}
-IWMMXT_OP_AVGW(0)
-IWMMXT_OP_AVGW(1)
-#undef IWMMXT_OP_AVGW
-#undef AVGW
-
-uint64_t HELPER(iwmmxt_align)(uint64_t a, uint64_t b, uint32_t n)
-{
-    a >>= n << 3;
-    a |= b << (64 - (n << 3));
-    return a;
-}
-
-uint64_t HELPER(iwmmxt_insr)(uint64_t x, uint32_t a, uint32_t b, uint32_t n)
-{
-    x &= ~((uint64_t) b << n);
-    x |= (uint64_t) (a & b) << n;
-    return x;
-}
-
-uint32_t HELPER(iwmmxt_setpsr_nz)(uint64_t x)
-{
-    return SIMD64_SET((x == 0), SIMD_ZBIT) |
-           SIMD64_SET((x & (1ULL << 63)), SIMD_NBIT);
-}
-
-uint64_t HELPER(iwmmxt_bcstb)(uint32_t arg)
-{
-    arg &= 0xff;
-    return
-        ((uint64_t) arg << 0 ) | ((uint64_t) arg << 8 ) |
-        ((uint64_t) arg << 16) | ((uint64_t) arg << 24) |
-        ((uint64_t) arg << 32) | ((uint64_t) arg << 40) |
-        ((uint64_t) arg << 48) | ((uint64_t) arg << 56);
-}
-
-uint64_t HELPER(iwmmxt_bcstw)(uint32_t arg)
-{
-    arg &= 0xffff;
-    return
-        ((uint64_t) arg << 0 ) | ((uint64_t) arg << 16) |
-        ((uint64_t) arg << 32) | ((uint64_t) arg << 48);
-}
-
-uint64_t HELPER(iwmmxt_bcstl)(uint32_t arg)
-{
-    return arg | ((uint64_t) arg << 32);
-}
-
-uint64_t HELPER(iwmmxt_addcb)(uint64_t x)
-{
-    return
-        ((x >> 0) & 0xff) + ((x >> 8) & 0xff) +
-        ((x >> 16) & 0xff) + ((x >> 24) & 0xff) +
-        ((x >> 32) & 0xff) + ((x >> 40) & 0xff) +
-        ((x >> 48) & 0xff) + ((x >> 56) & 0xff);
-}
-
-uint64_t HELPER(iwmmxt_addcw)(uint64_t x)
-{
-    return
-        ((x >> 0) & 0xffff) + ((x >> 16) & 0xffff) +
-        ((x >> 32) & 0xffff) + ((x >> 48) & 0xffff);
-}
-
-uint64_t HELPER(iwmmxt_addcl)(uint64_t x)
-{
-    return (x & 0xffffffff) + (x >> 32);
-}
-
-uint32_t HELPER(iwmmxt_msbb)(uint64_t x)
-{
-    return
-        ((x >> 7) & 0x01) | ((x >> 14) & 0x02) |
-        ((x >> 21) & 0x04) | ((x >> 28) & 0x08) |
-        ((x >> 35) & 0x10) | ((x >> 42) & 0x20) |
-        ((x >> 49) & 0x40) | ((x >> 56) & 0x80);
-}
-
-uint32_t HELPER(iwmmxt_msbw)(uint64_t x)
-{
-    return
-        ((x >> 15) & 0x01) | ((x >> 30) & 0x02) |
-        ((x >> 45) & 0x04) | ((x >> 52) & 0x08);
-}
-
-uint32_t HELPER(iwmmxt_msbl)(uint64_t x)
-{
-    return ((x >> 31) & 0x01) | ((x >> 62) & 0x02);
-}
-
-/* FIXME: Split wCASF setting into a separate op to avoid env use.  */
-uint64_t HELPER(iwmmxt_srlw)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = (((x & (0xffffll << 0)) >> n) & (0xffffll << 0)) |
-        (((x & (0xffffll << 16)) >> n) & (0xffffll << 16)) |
-        (((x & (0xffffll << 32)) >> n) & (0xffffll << 32)) |
-        (((x & (0xffffll << 48)) >> n) & (0xffffll << 48));
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |
-        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_srll)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = ((x & (0xffffffffll << 0)) >> n) |
-        ((x >> n) & (0xffffffffll << 32));
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_srlq)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x >>= n;
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_sllw)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = (((x & (0xffffll << 0)) << n) & (0xffffll << 0)) |
-        (((x & (0xffffll << 16)) << n) & (0xffffll << 16)) |
-        (((x & (0xffffll << 32)) << n) & (0xffffll << 32)) |
-        (((x & (0xffffll << 48)) << n) & (0xffffll << 48));
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |
-        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_slll)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = ((x << n) & (0xffffffffll << 0)) |
-        ((x & (0xffffffffll << 32)) << n);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_sllq)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x <<= n;
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_sraw)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = ((uint64_t) ((EXTEND16(x >> 0) >> n) & 0xffff) << 0) |
-        ((uint64_t) ((EXTEND16(x >> 16) >> n) & 0xffff) << 16) |
-        ((uint64_t) ((EXTEND16(x >> 32) >> n) & 0xffff) << 32) |
-        ((uint64_t) ((EXTEND16(x >> 48) >> n) & 0xffff) << 48);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |
-        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_sral)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = (((EXTEND32(x >> 0) >> n) & 0xffffffff) << 0) |
-        (((EXTEND32(x >> 32) >> n) & 0xffffffff) << 32);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_sraq)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = (int64_t) x >> n;
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_rorw)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = ((((x & (0xffffll << 0)) >> n) |
-          ((x & (0xffffll << 0)) << (16 - n))) & (0xffffll << 0)) |
-        ((((x & (0xffffll << 16)) >> n) |
-          ((x & (0xffffll << 16)) << (16 - n))) & (0xffffll << 16)) |
-        ((((x & (0xffffll << 32)) >> n) |
-          ((x & (0xffffll << 32)) << (16 - n))) & (0xffffll << 32)) |
-        ((((x & (0xffffll << 48)) >> n) |
-          ((x & (0xffffll << 48)) << (16 - n))) & (0xffffll << 48));
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |
-        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_rorl)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = ((x & (0xffffffffll << 0)) >> n) |
-        ((x >> n) & (0xffffffffll << 32)) |
-        ((x << (32 - n)) & (0xffffffffll << 0)) |
-        ((x & (0xffffffffll << 32)) << (32 - n));
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_rorq)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = ror64(x, n);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
-    return x;
-}
-
-uint64_t HELPER(iwmmxt_shufh)(CPUARMState *env, uint64_t x, uint32_t n)
-{
-    x = (((x >> ((n << 4) & 0x30)) & 0xffff) << 0) |
-        (((x >> ((n << 2) & 0x30)) & 0xffff) << 16) |
-        (((x >> ((n << 0) & 0x30)) & 0xffff) << 32) |
-        (((x >> ((n >> 2) & 0x30)) & 0xffff) << 48);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |
-        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);
-    return x;
-}
-
-/* TODO: Unsigned-Saturation */
-uint64_t HELPER(iwmmxt_packuw)(CPUARMState *env, uint64_t a, uint64_t b)
-{
-    a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) |
-        (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) |
-        (((b >> 0) & 0xff) << 32) | (((b >> 16) & 0xff) << 40) |
-        (((b >> 32) & 0xff) << 48) | (((b >> 48) & 0xff) << 56);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) |
-        NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) |
-        NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) |
-        NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7);
-    return a;
-}
-
-uint64_t HELPER(iwmmxt_packul)(CPUARMState *env, uint64_t a, uint64_t b)
-{
-    a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) |
-        (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) |
-        NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3);
-    return a;
-}
-
-uint64_t HELPER(iwmmxt_packuq)(CPUARMState *env, uint64_t a, uint64_t b)
-{
-    a = (a & 0xffffffff) | ((b & 0xffffffff) << 32);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1);
-    return a;
-}
-
-/* TODO: Signed-Saturation */
-uint64_t HELPER(iwmmxt_packsw)(CPUARMState *env, uint64_t a, uint64_t b)
-{
-    a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) |
-        (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) |
-        (((b >> 0) & 0xff) << 32) | (((b >> 16) & 0xff) << 40) |
-        (((b >> 32) & 0xff) << 48) | (((b >> 48) & 0xff) << 56);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) |
-        NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) |
-        NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) |
-        NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7);
-    return a;
-}
-
-uint64_t HELPER(iwmmxt_packsl)(CPUARMState *env, uint64_t a, uint64_t b)
-{
-    a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) |
-        (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) |
-        NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3);
-    return a;
-}
-
-uint64_t HELPER(iwmmxt_packsq)(CPUARMState *env, uint64_t a, uint64_t b)
-{
-    a = (a & 0xffffffff) | ((b & 0xffffffff) << 32);
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
-        NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1);
-    return a;
-}
-
-uint64_t HELPER(iwmmxt_muladdsl)(uint64_t c, uint32_t a, uint32_t b)
-{
-    return c + ((int32_t) EXTEND32(a) * (int32_t) EXTEND32(b));
-}
-
-uint64_t HELPER(iwmmxt_muladdsw)(uint64_t c, uint32_t a, uint32_t b)
-{
-    c += EXTEND32(EXTEND16S((a >> 0) & 0xffff) *
-                  EXTEND16S((b >> 0) & 0xffff));
-    c += EXTEND32(EXTEND16S((a >> 16) & 0xffff) *
-                  EXTEND16S((b >> 16) & 0xffff));
-    return c;
-}
-
-uint64_t HELPER(iwmmxt_muladdswl)(uint64_t c, uint32_t a, uint32_t b)
-{
-    return c + (EXTEND32(EXTEND16S(a & 0xffff) *
-                         EXTEND16S(b & 0xffff)));
-}
diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c
index 28307b5615..d856e3bc8e 100644
--- a/target/arm/tcg/m_helper.c
+++ b/target/arm/tcg/m_helper.c
@@ -2829,8 +2829,8 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
         ARMMMUFaultInfo fi = {};
 
         /* We can ignore the return value as prot is always set */
-        pmsav8_mpu_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, targetsec,
-                          &res, &fi, &mregion);
+        pmsav8_mpu_lookup(env, addr, MMU_DATA_LOAD, PAGE_READ, mmu_idx,
+                          targetsec, &res, &fi, &mregion);
         if (mregion == -1) {
             mrvalid = false;
             mregion = 0;
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
index 895facdc30..1b115656c4 100644
--- a/target/arm/tcg/meson.build
+++ b/target/arm/tcg/meson.build
@@ -66,7 +66,6 @@ arm_common_ss.add(files(
 arm_common_system_ss.add(files(
   'cpregs-at.c',
   'hflags.c',
-  'iwmmxt_helper.c',
   'neon_helper.c',
   'tlb_helper.c',
   'tlb-insns.c',
@@ -74,7 +73,6 @@ arm_common_system_ss.add(files(
 ))
 arm_user_ss.add(files(
   'hflags.c',
-  'iwmmxt_helper.c',
   'neon_helper.c',
   'tlb_helper.c',
   'vfp_helper.c',
diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c
index 575e566280..5373e0e998 100644
--- a/target/arm/tcg/op_helper.c
+++ b/target/arm/tcg/op_helper.c
@@ -768,12 +768,6 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
 
     assert(ri != NULL);
 
-    if (arm_feature(env, ARM_FEATURE_XSCALE) && ri->cp < 14
-        && extract32(env->cp15.c15_cpar, ri->cp, 1) == 0) {
-        res = CP_ACCESS_UNDEFINED;
-        goto fail;
-    }
-
     if (ri->accessfn) {
         res = ri->accessfn(env, ri, isread);
     }
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index f7d6d8ce19..e62dcc5d85 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -44,8 +44,6 @@
 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
 
-/* These are TCG temporaries used only by the legacy iwMMXt decoder */
-static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 /* These are TCG globals which alias CPUARMState fields */
 static TCGv_i32 cpu_R[16];
 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
@@ -1252,1263 +1250,6 @@ void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
     }
 }
 
-#define ARM_CP_RW_BIT   (1 << 20)
-
-static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
-{
-    tcg_gen_ld_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
-}
-
-static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
-{
-    tcg_gen_st_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
-}
-
-static inline TCGv_i32 iwmmxt_load_creg(int reg)
-{
-    TCGv_i32 var = tcg_temp_new_i32();
-    tcg_gen_ld_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
-    return var;
-}
-
-static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
-{
-    tcg_gen_st_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
-}
-
-static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
-{
-    iwmmxt_store_reg(cpu_M0, rn);
-}
-
-static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
-{
-    iwmmxt_load_reg(cpu_M0, rn);
-}
-
-static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
-{
-    iwmmxt_load_reg(cpu_V1, rn);
-    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
-}
-
-static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
-{
-    iwmmxt_load_reg(cpu_V1, rn);
-    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
-}
-
-static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
-{
-    iwmmxt_load_reg(cpu_V1, rn);
-    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
-}
-
-#define IWMMXT_OP(name) \
-static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
-{ \
-    iwmmxt_load_reg(cpu_V1, rn); \
-    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
-}
-
-#define IWMMXT_OP_ENV(name) \
-static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
-{ \
-    iwmmxt_load_reg(cpu_V1, rn); \
-    gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0, cpu_V1); \
-}
-
-#define IWMMXT_OP_ENV_SIZE(name) \
-IWMMXT_OP_ENV(name##b) \
-IWMMXT_OP_ENV(name##w) \
-IWMMXT_OP_ENV(name##l)
-
-#define IWMMXT_OP_ENV1(name) \
-static inline void gen_op_iwmmxt_##name##_M0(void) \
-{ \
-    gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0); \
-}
-
-IWMMXT_OP(maddsq)
-IWMMXT_OP(madduq)
-IWMMXT_OP(sadb)
-IWMMXT_OP(sadw)
-IWMMXT_OP(mulslw)
-IWMMXT_OP(mulshw)
-IWMMXT_OP(mululw)
-IWMMXT_OP(muluhw)
-IWMMXT_OP(macsw)
-IWMMXT_OP(macuw)
-
-IWMMXT_OP_ENV_SIZE(unpackl)
-IWMMXT_OP_ENV_SIZE(unpackh)
-
-IWMMXT_OP_ENV1(unpacklub)
-IWMMXT_OP_ENV1(unpackluw)
-IWMMXT_OP_ENV1(unpacklul)
-IWMMXT_OP_ENV1(unpackhub)
-IWMMXT_OP_ENV1(unpackhuw)
-IWMMXT_OP_ENV1(unpackhul)
-IWMMXT_OP_ENV1(unpacklsb)
-IWMMXT_OP_ENV1(unpacklsw)
-IWMMXT_OP_ENV1(unpacklsl)
-IWMMXT_OP_ENV1(unpackhsb)
-IWMMXT_OP_ENV1(unpackhsw)
-IWMMXT_OP_ENV1(unpackhsl)
-
-IWMMXT_OP_ENV_SIZE(cmpeq)
-IWMMXT_OP_ENV_SIZE(cmpgtu)
-IWMMXT_OP_ENV_SIZE(cmpgts)
-
-IWMMXT_OP_ENV_SIZE(mins)
-IWMMXT_OP_ENV_SIZE(minu)
-IWMMXT_OP_ENV_SIZE(maxs)
-IWMMXT_OP_ENV_SIZE(maxu)
-
-IWMMXT_OP_ENV_SIZE(subn)
-IWMMXT_OP_ENV_SIZE(addn)
-IWMMXT_OP_ENV_SIZE(subu)
-IWMMXT_OP_ENV_SIZE(addu)
-IWMMXT_OP_ENV_SIZE(subs)
-IWMMXT_OP_ENV_SIZE(adds)
-
-IWMMXT_OP_ENV(avgb0)
-IWMMXT_OP_ENV(avgb1)
-IWMMXT_OP_ENV(avgw0)
-IWMMXT_OP_ENV(avgw1)
-
-IWMMXT_OP_ENV(packuw)
-IWMMXT_OP_ENV(packul)
-IWMMXT_OP_ENV(packuq)
-IWMMXT_OP_ENV(packsw)
-IWMMXT_OP_ENV(packsl)
-IWMMXT_OP_ENV(packsq)
-
-static void gen_op_iwmmxt_set_mup(void)
-{
-    TCGv_i32 tmp;
-    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
-    tcg_gen_ori_i32(tmp, tmp, 2);
-    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
-}
-
-static void gen_op_iwmmxt_set_cup(void)
-{
-    TCGv_i32 tmp;
-    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
-    tcg_gen_ori_i32(tmp, tmp, 1);
-    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
-}
-
-static void gen_op_iwmmxt_setpsr_nz(void)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
-    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
-}
-
-static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
-{
-    iwmmxt_load_reg(cpu_V1, rn);
-    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
-    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
-}
-
-static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
-                                     TCGv_i32 dest)
-{
-    int rd;
-    uint32_t offset;
-    TCGv_i32 tmp;
-
-    rd = (insn >> 16) & 0xf;
-    tmp = load_reg(s, rd);
-
-    offset = (insn & 0xff) << ((insn >> 7) & 2);
-    if (insn & (1 << 24)) {
-        /* Pre indexed */
-        if (insn & (1 << 23))
-            tcg_gen_addi_i32(tmp, tmp, offset);
-        else
-            tcg_gen_addi_i32(tmp, tmp, -offset);
-        tcg_gen_mov_i32(dest, tmp);
-        if (insn & (1 << 21)) {
-            store_reg(s, rd, tmp);
-        }
-    } else if (insn & (1 << 21)) {
-        /* Post indexed */
-        tcg_gen_mov_i32(dest, tmp);
-        if (insn & (1 << 23))
-            tcg_gen_addi_i32(tmp, tmp, offset);
-        else
-            tcg_gen_addi_i32(tmp, tmp, -offset);
-        store_reg(s, rd, tmp);
-    } else if (!(insn & (1 << 23)))
-        return 1;
-    return 0;
-}
-
-static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
-{
-    int rd = (insn >> 0) & 0xf;
-    TCGv_i32 tmp;
-
-    if (insn & (1 << 8)) {
-        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
-            return 1;
-        } else {
-            tmp = iwmmxt_load_creg(rd);
-        }
-    } else {
-        tmp = tcg_temp_new_i32();
-        iwmmxt_load_reg(cpu_V0, rd);
-        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
-    }
-    tcg_gen_andi_i32(tmp, tmp, mask);
-    tcg_gen_mov_i32(dest, tmp);
-    return 0;
-}
-
-/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
-   (ie. an undefined instruction).  */
-static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
-{
-    int rd, wrd;
-    int rdhi, rdlo, rd0, rd1, i;
-    TCGv_i32 addr;
-    TCGv_i32 tmp, tmp2, tmp3;
-
-    if ((insn & 0x0e000e00) == 0x0c000000) {
-        if ((insn & 0x0fe00ff0) == 0x0c400000) {
-            wrd = insn & 0xf;
-            rdlo = (insn >> 12) & 0xf;
-            rdhi = (insn >> 16) & 0xf;
-            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
-                iwmmxt_load_reg(cpu_V0, wrd);
-                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
-                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
-            } else {                                    /* TMCRR */
-                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
-                iwmmxt_store_reg(cpu_V0, wrd);
-                gen_op_iwmmxt_set_mup();
-            }
-            return 0;
-        }
-
-        wrd = (insn >> 12) & 0xf;
-        addr = tcg_temp_new_i32();
-        if (gen_iwmmxt_address(s, insn, addr)) {
-            return 1;
-        }
-        if (insn & ARM_CP_RW_BIT) {
-            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
-                tmp = tcg_temp_new_i32();
-                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
-                iwmmxt_store_creg(wrd, tmp);
-            } else {
-                i = 1;
-                if (insn & (1 << 8)) {
-                    if (insn & (1 << 22)) {             /* WLDRD */
-                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
-                        i = 0;
-                    } else {                            /* WLDRW wRd */
-                        tmp = tcg_temp_new_i32();
-                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
-                    }
-                } else {
-                    tmp = tcg_temp_new_i32();
-                    if (insn & (1 << 22)) {             /* WLDRH */
-                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
-                    } else {                            /* WLDRB */
-                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
-                    }
-                }
-                if (i) {
-                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
-                }
-                gen_op_iwmmxt_movq_wRn_M0(wrd);
-            }
-        } else {
-            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
-                tmp = iwmmxt_load_creg(wrd);
-                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
-            } else {
-                gen_op_iwmmxt_movq_M0_wRn(wrd);
-                tmp = tcg_temp_new_i32();
-                if (insn & (1 << 8)) {
-                    if (insn & (1 << 22)) {             /* WSTRD */
-                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
-                    } else {                            /* WSTRW wRd */
-                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
-                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
-                    }
-                } else {
-                    if (insn & (1 << 22)) {             /* WSTRH */
-                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
-                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
-                    } else {                            /* WSTRB */
-                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
-                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
-                    }
-                }
-            }
-        }
-        return 0;
-    }
-
-    if ((insn & 0x0f000000) != 0x0e000000)
-        return 1;
-
-    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
-    case 0x000:                                                 /* WOR */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 0) & 0xf;
-        rd1 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        gen_op_iwmmxt_orq_M0_wRn(rd1);
-        gen_op_iwmmxt_setpsr_nz();
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x011:                                                 /* TMCR */
-        if (insn & 0xf)
-            return 1;
-        rd = (insn >> 12) & 0xf;
-        wrd = (insn >> 16) & 0xf;
-        switch (wrd) {
-        case ARM_IWMMXT_wCID:
-        case ARM_IWMMXT_wCASF:
-            break;
-        case ARM_IWMMXT_wCon:
-            gen_op_iwmmxt_set_cup();
-            /* Fall through.  */
-        case ARM_IWMMXT_wCSSF:
-            tmp = iwmmxt_load_creg(wrd);
-            tmp2 = load_reg(s, rd);
-            tcg_gen_andc_i32(tmp, tmp, tmp2);
-            iwmmxt_store_creg(wrd, tmp);
-            break;
-        case ARM_IWMMXT_wCGR0:
-        case ARM_IWMMXT_wCGR1:
-        case ARM_IWMMXT_wCGR2:
-        case ARM_IWMMXT_wCGR3:
-            gen_op_iwmmxt_set_cup();
-            tmp = load_reg(s, rd);
-            iwmmxt_store_creg(wrd, tmp);
-            break;
-        default:
-            return 1;
-        }
-        break;
-    case 0x100:                                                 /* WXOR */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 0) & 0xf;
-        rd1 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        gen_op_iwmmxt_xorq_M0_wRn(rd1);
-        gen_op_iwmmxt_setpsr_nz();
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x111:                                                 /* TMRC */
-        if (insn & 0xf)
-            return 1;
-        rd = (insn >> 12) & 0xf;
-        wrd = (insn >> 16) & 0xf;
-        tmp = iwmmxt_load_creg(wrd);
-        store_reg(s, rd, tmp);
-        break;
-    case 0x300:                                                 /* WANDN */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 0) & 0xf;
-        rd1 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tcg_gen_neg_i64(cpu_M0, cpu_M0);
-        gen_op_iwmmxt_andq_M0_wRn(rd1);
-        gen_op_iwmmxt_setpsr_nz();
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x200:                                                 /* WAND */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 0) & 0xf;
-        rd1 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        gen_op_iwmmxt_andq_M0_wRn(rd1);
-        gen_op_iwmmxt_setpsr_nz();
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x810: case 0xa10:                             /* WMADD */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 0) & 0xf;
-        rd1 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        if (insn & (1 << 21))
-            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
-        else
-            gen_op_iwmmxt_madduq_M0_wRn(rd1);
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
-            break;
-        case 1:
-            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
-            break;
-        case 2:
-            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
-            break;
-        case 3:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
-            break;
-        case 1:
-            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
-            break;
-        case 2:
-            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
-            break;
-        case 3:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        if (insn & (1 << 22))
-            gen_op_iwmmxt_sadw_M0_wRn(rd1);
-        else
-            gen_op_iwmmxt_sadb_M0_wRn(rd1);
-        if (!(insn & (1 << 20)))
-            gen_op_iwmmxt_addl_M0_wRn(wrd);
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        if (insn & (1 << 21)) {
-            if (insn & (1 << 20))
-                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
-        } else {
-            if (insn & (1 << 20))
-                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_mululw_M0_wRn(rd1);
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        if (insn & (1 << 21))
-            gen_op_iwmmxt_macsw_M0_wRn(rd1);
-        else
-            gen_op_iwmmxt_macuw_M0_wRn(rd1);
-        if (!(insn & (1 << 20))) {
-            iwmmxt_load_reg(cpu_V1, wrd);
-            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
-            break;
-        case 1:
-            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
-            break;
-        case 2:
-            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
-            break;
-        case 3:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        if (insn & (1 << 22)) {
-            if (insn & (1 << 20))
-                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
-        } else {
-            if (insn & (1 << 20))
-                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
-        tcg_gen_andi_i32(tmp, tmp, 7);
-        iwmmxt_load_reg(cpu_V1, rd1);
-        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
-        if (((insn >> 6) & 3) == 3)
-            return 1;
-        rd = (insn >> 12) & 0xf;
-        wrd = (insn >> 16) & 0xf;
-        tmp = load_reg(s, rd);
-        gen_op_iwmmxt_movq_M0_wRn(wrd);
-        switch ((insn >> 6) & 3) {
-        case 0:
-            tmp2 = tcg_constant_i32(0xff);
-            tmp3 = tcg_constant_i32((insn & 7) << 3);
-            break;
-        case 1:
-            tmp2 = tcg_constant_i32(0xffff);
-            tmp3 = tcg_constant_i32((insn & 3) << 4);
-            break;
-        case 2:
-            tmp2 = tcg_constant_i32(0xffffffff);
-            tmp3 = tcg_constant_i32((insn & 1) << 5);
-            break;
-        default:
-            g_assert_not_reached();
-        }
-        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
-        rd = (insn >> 12) & 0xf;
-        wrd = (insn >> 16) & 0xf;
-        if (rd == 15 || ((insn >> 22) & 3) == 3)
-            return 1;
-        gen_op_iwmmxt_movq_M0_wRn(wrd);
-        tmp = tcg_temp_new_i32();
-        switch ((insn >> 22) & 3) {
-        case 0:
-            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
-            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
-            if (insn & 8) {
-                tcg_gen_ext8s_i32(tmp, tmp);
-            } else {
-                tcg_gen_andi_i32(tmp, tmp, 0xff);
-            }
-            break;
-        case 1:
-            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
-            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
-            if (insn & 8) {
-                tcg_gen_ext16s_i32(tmp, tmp);
-            } else {
-                tcg_gen_andi_i32(tmp, tmp, 0xffff);
-            }
-            break;
-        case 2:
-            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
-            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
-            break;
-        }
-        store_reg(s, rd, tmp);
-        break;
-    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
-        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
-            return 1;
-        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
-            break;
-        case 1:
-            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
-            break;
-        case 2:
-            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
-            break;
-        }
-        tcg_gen_shli_i32(tmp, tmp, 28);
-        gen_set_nzcv(tmp);
-        break;
-    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
-        if (((insn >> 6) & 3) == 3)
-            return 1;
-        rd = (insn >> 12) & 0xf;
-        wrd = (insn >> 16) & 0xf;
-        tmp = load_reg(s, rd);
-        switch ((insn >> 6) & 3) {
-        case 0:
-            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
-            break;
-        case 1:
-            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
-            break;
-        case 2:
-            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
-            break;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
-        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
-            return 1;
-        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
-        tmp2 = tcg_temp_new_i32();
-        tcg_gen_mov_i32(tmp2, tmp);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            for (i = 0; i < 7; i ++) {
-                tcg_gen_shli_i32(tmp2, tmp2, 4);
-                tcg_gen_and_i32(tmp, tmp, tmp2);
-            }
-            break;
-        case 1:
-            for (i = 0; i < 3; i ++) {
-                tcg_gen_shli_i32(tmp2, tmp2, 8);
-                tcg_gen_and_i32(tmp, tmp, tmp2);
-            }
-            break;
-        case 2:
-            tcg_gen_shli_i32(tmp2, tmp2, 16);
-            tcg_gen_and_i32(tmp, tmp, tmp2);
-            break;
-        }
-        gen_set_nzcv(tmp);
-        break;
-    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
-            break;
-        case 1:
-            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
-            break;
-        case 2:
-            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
-            break;
-        case 3:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
-        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
-            return 1;
-        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
-        tmp2 = tcg_temp_new_i32();
-        tcg_gen_mov_i32(tmp2, tmp);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            for (i = 0; i < 7; i ++) {
-                tcg_gen_shli_i32(tmp2, tmp2, 4);
-                tcg_gen_or_i32(tmp, tmp, tmp2);
-            }
-            break;
-        case 1:
-            for (i = 0; i < 3; i ++) {
-                tcg_gen_shli_i32(tmp2, tmp2, 8);
-                tcg_gen_or_i32(tmp, tmp, tmp2);
-            }
-            break;
-        case 2:
-            tcg_gen_shli_i32(tmp2, tmp2, 16);
-            tcg_gen_or_i32(tmp, tmp, tmp2);
-            break;
-        }
-        gen_set_nzcv(tmp);
-        break;
-    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
-        rd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
-            return 1;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = tcg_temp_new_i32();
-        switch ((insn >> 22) & 3) {
-        case 0:
-            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
-            break;
-        case 1:
-            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
-            break;
-        case 2:
-            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
-            break;
-        }
-        store_reg(s, rd, tmp);
-        break;
-    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
-    case 0x906: case 0xb06: case 0xd06: case 0xf06:
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
-            break;
-        case 1:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
-            break;
-        case 2:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
-            break;
-        case 3:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
-    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_unpacklsb_M0();
-            else
-                gen_op_iwmmxt_unpacklub_M0();
-            break;
-        case 1:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_unpacklsw_M0();
-            else
-                gen_op_iwmmxt_unpackluw_M0();
-            break;
-        case 2:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_unpacklsl_M0();
-            else
-                gen_op_iwmmxt_unpacklul_M0();
-            break;
-        case 3:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
-    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_unpackhsb_M0();
-            else
-                gen_op_iwmmxt_unpackhub_M0();
-            break;
-        case 1:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_unpackhsw_M0();
-            else
-                gen_op_iwmmxt_unpackhuw_M0();
-            break;
-        case 2:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_unpackhsl_M0();
-            else
-                gen_op_iwmmxt_unpackhul_M0();
-            break;
-        case 3:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
-    case 0x214: case 0x614: case 0xa14: case 0xe14:
-        if (((insn >> 22) & 3) == 0)
-            return 1;
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = tcg_temp_new_i32();
-        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
-            return 1;
-        }
-        switch ((insn >> 22) & 3) {
-        case 1:
-            gen_helper_iwmmxt_srlw(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        case 2:
-            gen_helper_iwmmxt_srll(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        case 3:
-            gen_helper_iwmmxt_srlq(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
-    case 0x014: case 0x414: case 0x814: case 0xc14:
-        if (((insn >> 22) & 3) == 0)
-            return 1;
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = tcg_temp_new_i32();
-        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
-            return 1;
-        }
-        switch ((insn >> 22) & 3) {
-        case 1:
-            gen_helper_iwmmxt_sraw(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        case 2:
-            gen_helper_iwmmxt_sral(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        case 3:
-            gen_helper_iwmmxt_sraq(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
-    case 0x114: case 0x514: case 0x914: case 0xd14:
-        if (((insn >> 22) & 3) == 0)
-            return 1;
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = tcg_temp_new_i32();
-        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
-            return 1;
-        }
-        switch ((insn >> 22) & 3) {
-        case 1:
-            gen_helper_iwmmxt_sllw(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        case 2:
-            gen_helper_iwmmxt_slll(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        case 3:
-            gen_helper_iwmmxt_sllq(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
-    case 0x314: case 0x714: case 0xb14: case 0xf14:
-        if (((insn >> 22) & 3) == 0)
-            return 1;
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = tcg_temp_new_i32();
-        switch ((insn >> 22) & 3) {
-        case 1:
-            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
-                return 1;
-            }
-            gen_helper_iwmmxt_rorw(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        case 2:
-            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
-                return 1;
-            }
-            gen_helper_iwmmxt_rorl(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        case 3:
-            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
-                return 1;
-            }
-            gen_helper_iwmmxt_rorq(cpu_M0, tcg_env, cpu_M0, tmp);
-            break;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
-    case 0x916: case 0xb16: case 0xd16: case 0xf16:
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_minsb_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_minub_M0_wRn(rd1);
-            break;
-        case 1:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_minsw_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_minuw_M0_wRn(rd1);
-            break;
-        case 2:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_minsl_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_minul_M0_wRn(rd1);
-            break;
-        case 3:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
-    case 0x816: case 0xa16: case 0xc16: case 0xe16:
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 22) & 3) {
-        case 0:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_maxub_M0_wRn(rd1);
-            break;
-        case 1:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
-            break;
-        case 2:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_maxul_M0_wRn(rd1);
-            break;
-        case 3:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
-    case 0x402: case 0x502: case 0x602: case 0x702:
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        iwmmxt_load_reg(cpu_V1, rd1);
-        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
-                                tcg_constant_i32((insn >> 20) & 3));
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
-    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
-    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
-    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 20) & 0xf) {
-        case 0x0:
-            gen_op_iwmmxt_subnb_M0_wRn(rd1);
-            break;
-        case 0x1:
-            gen_op_iwmmxt_subub_M0_wRn(rd1);
-            break;
-        case 0x3:
-            gen_op_iwmmxt_subsb_M0_wRn(rd1);
-            break;
-        case 0x4:
-            gen_op_iwmmxt_subnw_M0_wRn(rd1);
-            break;
-        case 0x5:
-            gen_op_iwmmxt_subuw_M0_wRn(rd1);
-            break;
-        case 0x7:
-            gen_op_iwmmxt_subsw_M0_wRn(rd1);
-            break;
-        case 0x8:
-            gen_op_iwmmxt_subnl_M0_wRn(rd1);
-            break;
-        case 0x9:
-            gen_op_iwmmxt_subul_M0_wRn(rd1);
-            break;
-        case 0xb:
-            gen_op_iwmmxt_subsl_M0_wRn(rd1);
-            break;
-        default:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
-    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
-    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
-    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
-        gen_helper_iwmmxt_shufh(cpu_M0, tcg_env, cpu_M0, tmp);
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
-    case 0x418: case 0x518: case 0x618: case 0x718:
-    case 0x818: case 0x918: case 0xa18: case 0xb18:
-    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 20) & 0xf) {
-        case 0x0:
-            gen_op_iwmmxt_addnb_M0_wRn(rd1);
-            break;
-        case 0x1:
-            gen_op_iwmmxt_addub_M0_wRn(rd1);
-            break;
-        case 0x3:
-            gen_op_iwmmxt_addsb_M0_wRn(rd1);
-            break;
-        case 0x4:
-            gen_op_iwmmxt_addnw_M0_wRn(rd1);
-            break;
-        case 0x5:
-            gen_op_iwmmxt_adduw_M0_wRn(rd1);
-            break;
-        case 0x7:
-            gen_op_iwmmxt_addsw_M0_wRn(rd1);
-            break;
-        case 0x8:
-            gen_op_iwmmxt_addnl_M0_wRn(rd1);
-            break;
-        case 0x9:
-            gen_op_iwmmxt_addul_M0_wRn(rd1);
-            break;
-        case 0xb:
-            gen_op_iwmmxt_addsl_M0_wRn(rd1);
-            break;
-        default:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
-    case 0x408: case 0x508: case 0x608: case 0x708:
-    case 0x808: case 0x908: case 0xa08: case 0xb08:
-    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
-        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
-            return 1;
-        wrd = (insn >> 12) & 0xf;
-        rd0 = (insn >> 16) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        gen_op_iwmmxt_movq_M0_wRn(rd0);
-        switch ((insn >> 22) & 3) {
-        case 1:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_packsw_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_packuw_M0_wRn(rd1);
-            break;
-        case 2:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_packsl_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_packul_M0_wRn(rd1);
-            break;
-        case 3:
-            if (insn & (1 << 21))
-                gen_op_iwmmxt_packsq_M0_wRn(rd1);
-            else
-                gen_op_iwmmxt_packuq_M0_wRn(rd1);
-            break;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        gen_op_iwmmxt_set_cup();
-        break;
-    case 0x201: case 0x203: case 0x205: case 0x207:
-    case 0x209: case 0x20b: case 0x20d: case 0x20f:
-    case 0x211: case 0x213: case 0x215: case 0x217:
-    case 0x219: case 0x21b: case 0x21d: case 0x21f:
-        wrd = (insn >> 5) & 0xf;
-        rd0 = (insn >> 12) & 0xf;
-        rd1 = (insn >> 0) & 0xf;
-        if (rd0 == 0xf || rd1 == 0xf)
-            return 1;
-        gen_op_iwmmxt_movq_M0_wRn(wrd);
-        tmp = load_reg(s, rd0);
-        tmp2 = load_reg(s, rd1);
-        switch ((insn >> 16) & 0xf) {
-        case 0x0:                                       /* TMIA */
-            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
-            break;
-        case 0x8:                                       /* TMIAPH */
-            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
-            break;
-        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
-            if (insn & (1 << 16))
-                tcg_gen_shri_i32(tmp, tmp, 16);
-            if (insn & (1 << 17))
-                tcg_gen_shri_i32(tmp2, tmp2, 16);
-            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
-            break;
-        default:
-            return 1;
-        }
-        gen_op_iwmmxt_movq_wRn_M0(wrd);
-        gen_op_iwmmxt_set_mup();
-        break;
-    default:
-        return 1;
-    }
-
-    return 0;
-}
-
-/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
-   (ie. an undefined instruction).  */
-static int disas_dsp_insn(DisasContext *s, uint32_t insn)
-{
-    int acc, rd0, rd1, rdhi, rdlo;
-    TCGv_i32 tmp, tmp2;
-
-    if ((insn & 0x0ff00f10) == 0x0e200010) {
-        /* Multiply with Internal Accumulate Format */
-        rd0 = (insn >> 12) & 0xf;
-        rd1 = insn & 0xf;
-        acc = (insn >> 5) & 7;
-
-        if (acc != 0)
-            return 1;
-
-        tmp = load_reg(s, rd0);
-        tmp2 = load_reg(s, rd1);
-        switch ((insn >> 16) & 0xf) {
-        case 0x0:                                       /* MIA */
-            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
-            break;
-        case 0x8:                                       /* MIAPH */
-            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
-            break;
-        case 0xc:                                       /* MIABB */
-        case 0xd:                                       /* MIABT */
-        case 0xe:                                       /* MIATB */
-        case 0xf:                                       /* MIATT */
-            if (insn & (1 << 16))
-                tcg_gen_shri_i32(tmp, tmp, 16);
-            if (insn & (1 << 17))
-                tcg_gen_shri_i32(tmp2, tmp2, 16);
-            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
-            break;
-        default:
-            return 1;
-        }
-
-        gen_op_iwmmxt_movq_wRn_M0(acc);
-        return 0;
-    }
-
-    if ((insn & 0x0fe00ff8) == 0x0c400000) {
-        /* Internal Accumulator Access Format */
-        rdhi = (insn >> 16) & 0xf;
-        rdlo = (insn >> 12) & 0xf;
-        acc = insn & 7;
-
-        if (acc != 0)
-            return 1;
-
-        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
-            iwmmxt_load_reg(cpu_V0, acc);
-            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
-            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
-            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
-        } else {                                        /* MAR */
-            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
-            iwmmxt_store_reg(cpu_V0, acc);
-        }
-        return 0;
-    }
-
-    return 1;
-}
-
 static void gen_goto_ptr(void)
 {
     tcg_gen_lookup_and_goto_ptr();
@@ -3048,13 +1789,10 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
     }
 
     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
-        (ri->fgt && s->fgt_active) ||
-        (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
+        (ri->fgt && s->fgt_active)) {
         /*
          * Emit code to perform further access permissions checks at
          * runtime; this may result in an exception.
-         * Note that on XScale all cp0..c13 registers do an access check
-         * call in order to handle c15_cpar.
          */
         gen_set_condexec(s);
         gen_update_pc(s, 0);
@@ -3192,24 +1930,6 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
     }
 }
 
-/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
-static void disas_xscale_insn(DisasContext *s, uint32_t insn)
-{
-    int cpnum = (insn >> 8) & 0xf;
-
-    if (extract32(s->c15_cpar, cpnum, 1) == 0) {
-        unallocated_encoding(s);
-    } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
-        if (disas_iwmmxt_insn(s, insn)) {
-            unallocated_encoding(s);
-        }
-    } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
-        if (disas_dsp_insn(s, insn)) {
-            unallocated_encoding(s);
-        }
-    }
-}
-
 /* Store a 64-bit value to a register pair.  Clobbers val.  */
 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
 {
@@ -3569,14 +2289,7 @@ static bool valid_cp(DisasContext *s, int cp)
      * only cp14 and cp15 are valid, and other values aren't considered
      * to be in the coprocessor-instruction space at all. v8M still
      * permits coprocessors 0..7.
-     * For XScale, we must not decode the XScale cp0, cp1 space as
-     * a standard coprocessor insn, because we want to fall through to
-     * the legacy disas_xscale_insn() decoder after decodetree is done.
      */
-    if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
-        return false;
-    }
-
     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
         !arm_dc_feature(s, ARM_FEATURE_M)) {
         return cp >= 14;
@@ -7343,18 +6056,6 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
             disas_neon_shared(s, insn)) {
             return;
         }
-        /* fall back to legacy decoder */
-
-        if ((insn & 0x0e000f00) == 0x0c000100) {
-            if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
-                /* iWMMXt register transfer.  */
-                if (extract32(s->c15_cpar, 1, 1)) {
-                    if (!disas_iwmmxt_insn(s, insn)) {
-                        return;
-                    }
-                }
-            }
-        }
         goto illegal_op;
     }
     if (cond != 0xe) {
@@ -7368,16 +6069,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
         disas_vfp(s, insn)) {
         return;
     }
-    /* fall back to legacy decoder */
-    /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
-    if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
-        if (((insn & 0x0c000e00) == 0x0c000000)
-            && ((insn & 0x03000000) != 0x03000000)) {
-            /* Coprocessor insn, coprocessor 0 or 1 */
-            disas_xscale_insn(s, insn);
-            return;
-        }
-    }
+    /* We didn't match anything in the decoder: UNDEF */
 
 illegal_op:
     unallocated_encoding(s);
@@ -7606,12 +6298,8 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
-        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
-            dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
-        } else {
-            dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
-            dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
-        }
+        dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
+        dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
         dc->sme_trap_nonstreaming =
             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
     }
@@ -7651,10 +6339,6 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
         dc->base.max_insns = MIN(dc->base.max_insns, bound);
     }
-
-    cpu_V0 = tcg_temp_new_i64();
-    cpu_V1 = tcg_temp_new_i64();
-    cpu_M0 = tcg_temp_new_i64();
 }
 
 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index f974996f3f..ec4755ae3f 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -175,8 +175,6 @@ typedef struct DisasContext {
     uint8_t gm_blocksize;
     /* True if the current insn_start has been updated. */
     bool insn_start_updated;
-    /* Bottom two bits of XScale c15_cpar coprocessor access control reg */
-    int c15_cpar;
     /* Offset from VNCR_EL2 when FEAT_NV2 redirects this reg to memory */
     uint32_t nv2_redirect_offset;
 } DisasContext;
diff --git a/tests/data/acpi/aarch64/virt/DSDT.smmuv3-dev b/tests/data/acpi/aarch64/virt/DSDT.smmuv3-dev
new file mode 100644
index 0000000000..53d4c07f42
--- /dev/null
+++ b/tests/data/acpi/aarch64/virt/DSDT.smmuv3-dev
Binary files differdiff --git a/tests/data/acpi/aarch64/virt/DSDT.smmuv3-legacy b/tests/data/acpi/aarch64/virt/DSDT.smmuv3-legacy
new file mode 100644
index 0000000000..53d4c07f42
--- /dev/null
+++ b/tests/data/acpi/aarch64/virt/DSDT.smmuv3-legacy
Binary files differdiff --git a/tests/data/acpi/aarch64/virt/IORT.smmuv3-dev b/tests/data/acpi/aarch64/virt/IORT.smmuv3-dev
new file mode 100644
index 0000000000..67be268f62
--- /dev/null
+++ b/tests/data/acpi/aarch64/virt/IORT.smmuv3-dev
Binary files differdiff --git a/tests/data/acpi/aarch64/virt/IORT.smmuv3-legacy b/tests/data/acpi/aarch64/virt/IORT.smmuv3-legacy
new file mode 100644
index 0000000000..41981a449f
--- /dev/null
+++ b/tests/data/acpi/aarch64/virt/IORT.smmuv3-legacy
Binary files differdiff --git a/tests/guest-debug/test_gdbstub.py b/tests/guest-debug/test_gdbstub.py
index 4f08089e6a..e017ccb55d 100644
--- a/tests/guest-debug/test_gdbstub.py
+++ b/tests/guest-debug/test_gdbstub.py
@@ -1,7 +1,6 @@
 """Helper functions for gdbstub testing
 
 """
-from __future__ import print_function
 import argparse
 import gdb
 import os
diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index e7e6926c81..4fa8ac5096 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -2337,6 +2337,86 @@ static void test_acpi_aarch64_virt_viot(void)
     free_test_data(&data);
 }
 
+static void test_acpi_aarch64_virt_smmuv3_legacy(void)
+{
+    test_data data = {
+        .machine = "virt",
+        .arch = "aarch64",
+        .tcg_only = true,
+        .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd",
+        .uefi_fl2 = "pc-bios/edk2-arm-vars.fd",
+        .ram_start = 0x40000000ULL,
+        .scan_len = 128ULL * MiB,
+    };
+
+    /*
+     * cdrom is plugged into scsi controller to avoid conflict
+     * with pxb-pcie. See comments in test_acpi_aarch64_virt_tcg_pxb() for
+     * details.
+     *
+     * The setup includes three PCIe root complexes, one of which has
+     * bypass_iommu enabled. The generated IORT table contains a single
+     * SMMUv3 node and a Root Complex node with three ID mappings. Two
+     * of the ID mappings have output references pointing to the SMMUv3
+     * node and the remaining one points to ITS.
+     */
+    data.variant = ".smmuv3-legacy";
+    test_acpi_one(" -device pcie-root-port,chassis=1,id=pci.1"
+                  " -device virtio-scsi-pci,id=scsi0,bus=pci.1"
+                  " -drive file="
+                  "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2,"
+                  "if=none,media=cdrom,id=drive-scsi0-0-0-1,readonly=on"
+                  " -device scsi-cd,bus=scsi0.0,scsi-id=0,"
+                  "drive=drive-scsi0-0-0-1,id=scsi0-0-0-1,bootindex=1"
+                  " -cpu cortex-a57"
+                  " -M iommu=smmuv3"
+                  " -device pxb-pcie,id=pcie.1,bus=pcie.0,bus_nr=0x10"
+                  " -device pxb-pcie,id=pcie.2,bus=pcie.0,bus_nr=0x20,bypass_iommu=on",
+                  &data);
+    free_test_data(&data);
+}
+
+static void test_acpi_aarch64_virt_smmuv3_dev(void)
+{
+    test_data data = {
+        .machine = "virt",
+        .arch = "aarch64",
+        .tcg_only = true,
+        .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd",
+        .uefi_fl2 = "pc-bios/edk2-arm-vars.fd",
+        .ram_start = 0x40000000ULL,
+        .scan_len = 128ULL * MiB,
+    };
+
+    /*
+     * cdrom is plugged into scsi controller to avoid conflict
+     * with pxb-pcie. See comments in test_acpi_aarch64_virt_tcg_pxb()
+     * for details.
+     *
+     * The setup includes three PCie root complexes, two of which are
+     * connected to separate SMMUv3 devices. The resulting IORT table
+     * contains two SMMUv3 nodes and a Root Complex node with ID mappings
+     * of which two of theĀ ID mappings have output references pointing
+     * to two different SMMUv3 nodes and the remaining ones pointing to
+     * ITS.
+     */
+    data.variant = ".smmuv3-dev";
+    test_acpi_one(" -device pcie-root-port,chassis=1,id=pci.1"
+                  " -device virtio-scsi-pci,id=scsi0,bus=pci.1"
+                  " -drive file="
+                  "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2,"
+                  "if=none,media=cdrom,id=drive-scsi0-0-0-1,readonly=on"
+                  " -device scsi-cd,bus=scsi0.0,scsi-id=0,"
+                  "drive=drive-scsi0-0-0-1,id=scsi0-0-0-1,bootindex=1"
+                  " -cpu cortex-a57"
+                  " -device arm-smmuv3,primary-bus=pcie.0,id=smmuv3.0"
+                  " -device pxb-pcie,id=pcie.1,bus=pcie.0,bus_nr=0x10"
+                  " -device arm-smmuv3,primary-bus=pcie.1,id=smmuv3.1"
+                  " -device pxb-pcie,id=pcie.2,bus=pcie.0,bus_nr=0x20",
+                  &data);
+    free_test_data(&data);
+}
+
 #ifndef _WIN32
 # define DEV_NULL "/dev/null"
 #else
@@ -2768,6 +2848,12 @@ int main(int argc, char *argv[])
             if (qtest_has_device("virtio-iommu-pci")) {
                 qtest_add_func("acpi/virt/viot", test_acpi_aarch64_virt_viot);
             }
+            qtest_add_func("acpi/virt/smmuv3-legacy",
+                           test_acpi_aarch64_virt_smmuv3_legacy);
+            if (qtest_has_device("arm-smmuv3")) {
+                qtest_add_func("acpi/virt/smmuv3-dev",
+                               test_acpi_aarch64_virt_smmuv3_dev);
+            }
         }
     } else if (strcmp(arch, "riscv64") == 0) {
         if (has_tcg && qtest_has_device("virtio-blk-pci")) {
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
index 16ddcf4f88..1755874bee 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -134,6 +134,35 @@ run-gdbstub-sve-ioctls: sve-ioctls
 
 EXTRA_RUNS += run-gdbstub-sysregs run-gdbstub-sve-ioctls
 
+ifneq ($(CROSS_AS_HAS_ARMV9_SME),)
+# SME gdbstub tests
+
+run-gdbstub-sysregs-sme: sysregs
+	$(call run-test, $@, $(GDB_SCRIPT) \
+		--gdb $(GDB) \
+		--qemu $(QEMU) --qargs "$(QEMU_OPTS)" \
+		--bin $< --test $(AARCH64_SRC)/gdbstub/test-sme.py \
+		-- test_sme --gdb_basic_za_test, \
+	basic gdbstub SME support)
+
+ifeq ($(GDB_HAS_SME_TILES),y)
+run-gdbstub-sysregs-sme-tile-slice: sysregs
+	$(call run-test, $@, $(GDB_SCRIPT) \
+		--gdb $(GDB) \
+		--qemu $(QEMU) --qargs "$(QEMU_OPTS)" \
+		--bin $< --test $(AARCH64_SRC)/gdbstub/test-sme.py \
+		-- test_sme --gdb_tile_slice_test, \
+	gdbstub SME ZA tile slice support)
+else
+run-gdbstub-sysregs-sme-tile-slice: sysregs
+	$(call skip-test,"gdbstub SME ZA tile slice support", \
+	"selected gdb ($(GDB)) does not support SME ZA tile slices")
+endif
+
+EXTRA_RUNS += run-gdbstub-sysregs-sme run-gdbstub-sysregs-sme-tile-slice
+
+endif
+
 ifeq ($(GDB_HAS_MTE),y)
 run-gdbstub-mte: mte-8
 	$(call run-test, $@, $(GDB_SCRIPT) \
diff --git a/tests/tcg/aarch64/gdbstub/test-mte.py b/tests/tcg/aarch64/gdbstub/test-mte.py
index 9ad98e7a54..f4a7d7b446 100644
--- a/tests/tcg/aarch64/gdbstub/test-mte.py
+++ b/tests/tcg/aarch64/gdbstub/test-mte.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 #
 # Test GDB memory-tag commands that exercise the stubs for the qIsAddressTagged,
 # qMemTag, and QMemTag packets, which are used for manipulating allocation tags.
diff --git a/tests/tcg/aarch64/gdbstub/test-sme.py b/tests/tcg/aarch64/gdbstub/test-sme.py
new file mode 100644
index 0000000000..ec03189642
--- /dev/null
+++ b/tests/tcg/aarch64/gdbstub/test-sme.py
@@ -0,0 +1,117 @@
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+#
+# Test the SME registers are visible and changeable via gdbstub
+#
+# This is launched via tests/guest-debug/run-test.py
+#
+
+import argparse
+import gdb
+from test_gdbstub import main, report
+
+MAGIC = 0x01020304
+BASIC_ZA_TEST = 0
+TILE_SLICE_TEST = 0
+
+
+def run_test():
+    """Run the requested test(s) for SME ZA gdbstub support"""
+
+    if BASIC_ZA_TEST:
+        run_basic_sme_za_gdbstub_support_test()
+    if TILE_SLICE_TEST:
+        run_basic_sme_za_tile_slice_gdbstub_support_test()
+
+
+def run_basic_sme_za_gdbstub_support_test():
+    """Test reads and writes to the SME ZA register at the byte level"""
+
+    frame = gdb.selected_frame()
+    rname = "za"
+    za = frame.read_register(rname)
+    report(True, "Reading %s" % rname)
+
+    # Writing to the ZA register, byte by byte.
+    for i in range(0, 16):
+        for j in range(0, 16):
+            cmd = "set $za[%d][%d] = 0x01" % (i, j)
+            gdb.execute(cmd)
+            report(True, "%s" % cmd)
+
+    # Reading from the ZA register, byte by byte.
+    for i in range(0, 16):
+        for j in range(0, 16):
+            reg = "$za[%d][%d]" % (i, j)
+            v = gdb.parse_and_eval(reg)
+            report(str(v.type) == "uint8_t", "size of %s" % (reg))
+            report(v == 0x1, "%s is 0x%x" % (reg, 0x1))
+
+
+def run_basic_sme_za_tile_slice_gdbstub_support_test():
+    """Test reads and writes of SME ZA horizontal and vertical tile slices
+
+    Test if SME ZA tile slices, both horizontal and vertical,
+    can be correctly read and written to. The sizes to test
+    are quadwords and doublewords.
+    """
+
+    sizes = {}
+    sizes["q"] = "uint128_t"
+    sizes["d"] = "uint64_t"
+
+    # Accessing requested sizes of elements of ZA
+    for size in sizes:
+
+        # Accessing various ZA tiles
+        for i in range(0, 4):
+
+            # Accessing various horizontal slices for each ZA tile
+            for j in range(0, 4):
+                # Writing to various elements in each tile slice
+                for k in range(0, 4):
+                    cmd = "set $za%dh%c%d[%d] = 0x%x" % (i, size, j, k, MAGIC)
+                    gdb.execute(cmd)
+                    report(True, "%s" % cmd)
+
+                # Reading from the written elements in each tile slice
+                for k in range(0, 4):
+                    reg = "$za%dh%c%d[%d]" % (i, size, j, k)
+                    v = gdb.parse_and_eval(reg)
+                    report(str(v.type) == sizes[size], "size of %s" % (reg))
+                    report(v == MAGIC, "%s is 0x%x" % (reg, MAGIC))
+
+            # Accessing various vertical slices for each ZA tile
+            for j in range(0, 4):
+                # Writing to various elements in each tile slice
+                for k in range(0, 4):
+                    cmd = "set $za%dv%c%d[%d] = 0x%x" % (i, size, j, k, MAGIC)
+                    gdb.execute(cmd)
+                    report(True, "%s" % cmd)
+
+                # Reading from the written elements in each tile slice
+                for k in range(0, 4):
+                    reg = "$za%dv%c%d[%d]" % (i, size, j, k)
+                    v = gdb.parse_and_eval(reg)
+                    report(str(v.type) == sizes[size], "size of %s" % (reg))
+                    report(v == MAGIC, "%s is 0x%x" % (reg, MAGIC))
+
+
+parser = argparse.ArgumentParser(description="A gdbstub test for SME support")
+parser.add_argument("--gdb_basic_za_test",
+                    help="Enable test for basic SME ZA support",
+                    action="store_true")
+parser.add_argument("--gdb_tile_slice_test",
+                    help="Enable test for ZA tile slice support",
+                    action="store_true")
+args = parser.parse_args()
+
+if args.gdb_basic_za_test:
+    BASIC_ZA_TEST = 1
+if args.gdb_tile_slice_test:
+    TILE_SLICE_TEST = 1
+
+main(run_test, expected_arch="aarch64")
diff --git a/tests/tcg/aarch64/gdbstub/test-sve-ioctl.py b/tests/tcg/aarch64/gdbstub/test-sve-ioctl.py
index a78a3a2514..2c5c218031 100644
--- a/tests/tcg/aarch64/gdbstub/test-sve-ioctl.py
+++ b/tests/tcg/aarch64/gdbstub/test-sve-ioctl.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 #
 # Test the SVE ZReg reports the right amount of data. It uses the
 # sve-ioctl test and examines the register data each time the
diff --git a/tests/tcg/aarch64/gdbstub/test-sve.py b/tests/tcg/aarch64/gdbstub/test-sve.py
index 84cdcd4a32..7b0489a622 100644
--- a/tests/tcg/aarch64/gdbstub/test-sve.py
+++ b/tests/tcg/aarch64/gdbstub/test-sve.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 #
 # Test the SVE registers are visible and changeable via gdbstub
 #
diff --git a/tests/tcg/multiarch/gdbstub/interrupt.py b/tests/tcg/multiarch/gdbstub/interrupt.py
index 2d5654d154..4eccdb41b9 100644
--- a/tests/tcg/multiarch/gdbstub/interrupt.py
+++ b/tests/tcg/multiarch/gdbstub/interrupt.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 #
 # Test some of the system debug features with the multiarch memory
 # test. It is a port of the original vmlinux focused test case but
diff --git a/tests/tcg/multiarch/gdbstub/memory.py b/tests/tcg/multiarch/gdbstub/memory.py
index 532b92e7fb..76d75e5251 100644
--- a/tests/tcg/multiarch/gdbstub/memory.py
+++ b/tests/tcg/multiarch/gdbstub/memory.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 #
 # Test some of the system debug features with the multiarch memory
 # test. It is a port of the original vmlinux focused test case but
diff --git a/tests/tcg/multiarch/gdbstub/sha1.py b/tests/tcg/multiarch/gdbstub/sha1.py
index 1ce711a402..3403b82fd4 100644
--- a/tests/tcg/multiarch/gdbstub/sha1.py
+++ b/tests/tcg/multiarch/gdbstub/sha1.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 #
 # A very simple smoke test for debugging the SHA1 userspace test on
 # each target.
diff --git a/tests/tcg/multiarch/gdbstub/test-proc-mappings.py b/tests/tcg/multiarch/gdbstub/test-proc-mappings.py
index 6eb6ebf7b1..796dca75f0 100644
--- a/tests/tcg/multiarch/gdbstub/test-proc-mappings.py
+++ b/tests/tcg/multiarch/gdbstub/test-proc-mappings.py
@@ -1,7 +1,6 @@
 """Test that gdbstub has access to proc mappings.
 
 This runs as a sourced script (via -x, via run-test.py)."""
-from __future__ import print_function
 import gdb
 from test_gdbstub import gdb_exit, main, report
 
diff --git a/tests/tcg/multiarch/gdbstub/test-qxfer-auxv-read.py b/tests/tcg/multiarch/gdbstub/test-qxfer-auxv-read.py
index 00c26ab4a9..fa36c943d6 100644
--- a/tests/tcg/multiarch/gdbstub/test-qxfer-auxv-read.py
+++ b/tests/tcg/multiarch/gdbstub/test-qxfer-auxv-read.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 #
 # Test auxiliary vector is loaded via gdbstub
 #
diff --git a/tests/tcg/multiarch/gdbstub/test-qxfer-siginfo-read.py b/tests/tcg/multiarch/gdbstub/test-qxfer-siginfo-read.py
index 862596b07a..b18fa1234f 100644
--- a/tests/tcg/multiarch/gdbstub/test-qxfer-siginfo-read.py
+++ b/tests/tcg/multiarch/gdbstub/test-qxfer-siginfo-read.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 #
 # Test gdbstub Xfer:siginfo:read stub.
 #
diff --git a/tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py b/tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py
index 4d6b6b9fbe..49cbc3548f 100644
--- a/tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py
+++ b/tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 #
 # Test auxiliary vector is loaded via gdbstub
 #
diff --git a/tests/tcg/s390x/gdbstub/test-signals-s390x.py b/tests/tcg/s390x/gdbstub/test-signals-s390x.py
index b6b7b39fc4..398ad534eb 100644
--- a/tests/tcg/s390x/gdbstub/test-signals-s390x.py
+++ b/tests/tcg/s390x/gdbstub/test-signals-s390x.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 
 #
 # Test that signals and debugging mix well together on s390x.
diff --git a/tests/tcg/s390x/gdbstub/test-svc.py b/tests/tcg/s390x/gdbstub/test-svc.py
index 17210b4e02..29a0aa0ede 100644
--- a/tests/tcg/s390x/gdbstub/test-svc.py
+++ b/tests/tcg/s390x/gdbstub/test-svc.py
@@ -1,7 +1,6 @@
 """Test single-stepping SVC.
 
 This runs as a sourced script (via -x, via run-test.py)."""
-from __future__ import print_function
 import gdb
 from test_gdbstub import main, report