summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.yml95
-rw-r--r--MAINTAINERS93
-rw-r--r--README.rst11
-rw-r--r--accel/kvm/kvm-all.c2
-rw-r--r--docs/system/arm/nuvoton.rst2
-rw-r--r--docs/system/arm/xlnx-versal-virt.rst1
-rw-r--r--hw/arm/npcm7xx.c45
-rw-r--r--hw/arm/npcm7xx_boards.c99
-rw-r--r--hw/arm/smmu-common.c30
-rw-r--r--hw/arm/smmu-internal.h5
-rw-r--r--hw/arm/smmuv3.c58
-rw-r--r--hw/arm/trace-events24
-rw-r--r--hw/arm/virt.c23
-rw-r--r--hw/arm/xlnx-versal.c36
-rw-r--r--hw/display/pl110.c123
-rw-r--r--hw/display/pl110_template.h120
-rw-r--r--hw/display/pxa2xx_lcd.c520
-rw-r--r--hw/display/pxa2xx_template.h447
-rw-r--r--hw/i386/intel_iommu.c32
-rw-r--r--hw/mips/gt64xxx_pci.c59
-rw-r--r--hw/mips/trace-events6
-rw-r--r--hw/misc/meson.build2
-rw-r--r--hw/misc/npcm7xx_mft.c540
-rw-r--r--hw/misc/npcm7xx_pwm.c4
-rw-r--r--hw/misc/trace-events8
-rw-r--r--hw/misc/xlnx-versal-xramc.c253
-rw-r--r--hw/net/allwinner-sun8i-emac.c58
-rw-r--r--hw/timer/sse-timer.c1
-rw-r--r--hw/virtio/virtio-iommu.c19
-rw-r--r--include/hw/arm/npcm7xx.h13
-rw-r--r--include/hw/arm/xlnx-versal.h13
-rw-r--r--include/hw/boards.h1
-rw-r--r--include/hw/misc/npcm7xx_mft.h70
-rw-r--r--include/hw/misc/npcm7xx_pwm.h4
-rw-r--r--include/hw/misc/xlnx-versal-xramc.h97
-rw-r--r--include/sysemu/dma.h12
-rw-r--r--linux-user/elfload.c8
-rw-r--r--linux-user/main.c24
-rw-r--r--linux-user/syscall.c6
-rwxr-xr-xscripts/qemu-binfmt-conf.sh44
-rw-r--r--softmmu/dma-helpers.c26
-rw-r--r--target/arm/kvm.c4
-rw-r--r--target/arm/kvm_arm.h6
-rw-r--r--target/arm/sve_helper.c107
-rw-r--r--target/arm/translate-sve.c26
-rw-r--r--target/mips/meson.build18
-rw-r--r--target/mips/mxu_translate.c1609
-rw-r--r--target/mips/translate.c3663
-rw-r--r--target/mips/translate.h10
-rw-r--r--target/mips/tx79.decode39
-rw-r--r--target/mips/tx79_translate.c303
-rw-r--r--target/mips/txx9_translate.c20
-rw-r--r--tests/Makefile.include8
-rw-r--r--tests/acceptance/boot_linux_console.py120
-rw-r--r--tests/acceptance/replay_kernel.py10
-rw-r--r--tests/bench/atomic64-bench.c (renamed from tests/atomic64-bench.c)0
-rw-r--r--tests/bench/atomic_add-bench.c (renamed from tests/atomic_add-bench.c)0
-rw-r--r--tests/bench/benchmark-crypto-cipher.c (renamed from tests/benchmark-crypto-cipher.c)0
-rw-r--r--tests/bench/benchmark-crypto-hash.c (renamed from tests/benchmark-crypto-hash.c)0
-rw-r--r--tests/bench/benchmark-crypto-hmac.c (renamed from tests/benchmark-crypto-hmac.c)0
-rw-r--r--tests/bench/meson.build34
-rw-r--r--tests/bench/qht-bench.c (renamed from tests/qht-bench.c)0
-rw-r--r--tests/meson.build216
-rw-r--r--tests/qtest/npcm7xx_pwm-test.c205
-rw-r--r--tests/unit/check-block-qdict.c (renamed from tests/check-block-qdict.c)0
-rw-r--r--tests/unit/check-qdict.c (renamed from tests/check-qdict.c)0
-rw-r--r--tests/unit/check-qjson.c (renamed from tests/check-qjson.c)0
-rw-r--r--tests/unit/check-qlist.c (renamed from tests/check-qlist.c)0
-rw-r--r--tests/unit/check-qlit.c (renamed from tests/check-qlit.c)0
-rw-r--r--tests/unit/check-qnull.c (renamed from tests/check-qnull.c)0
-rw-r--r--tests/unit/check-qnum.c (renamed from tests/check-qnum.c)0
-rw-r--r--tests/unit/check-qobject.c (renamed from tests/check-qobject.c)0
-rw-r--r--tests/unit/check-qom-interface.c (renamed from tests/check-qom-interface.c)0
-rw-r--r--tests/unit/check-qom-proplist.c (renamed from tests/check-qom-proplist.c)0
-rw-r--r--tests/unit/check-qstring.c (renamed from tests/check-qstring.c)0
-rw-r--r--tests/unit/crypto-tls-psk-helpers.c (renamed from tests/crypto-tls-psk-helpers.c)0
-rw-r--r--tests/unit/crypto-tls-psk-helpers.h (renamed from tests/crypto-tls-psk-helpers.h)0
-rw-r--r--tests/unit/crypto-tls-x509-helpers.c (renamed from tests/crypto-tls-x509-helpers.c)0
-rw-r--r--tests/unit/crypto-tls-x509-helpers.h (renamed from tests/crypto-tls-x509-helpers.h)0
-rw-r--r--tests/unit/io-channel-helpers.c (renamed from tests/io-channel-helpers.c)0
-rw-r--r--tests/unit/io-channel-helpers.h (renamed from tests/io-channel-helpers.h)0
-rw-r--r--tests/unit/iothread.c (renamed from tests/iothread.c)0
-rw-r--r--tests/unit/iothread.h (renamed from tests/iothread.h)0
-rw-r--r--tests/unit/meson.build184
-rw-r--r--tests/unit/pkix_asn1_tab.c (renamed from tests/pkix_asn1_tab.c)2
-rw-r--r--tests/unit/ptimer-test-stubs.c (renamed from tests/ptimer-test-stubs.c)0
-rw-r--r--tests/unit/ptimer-test.c (renamed from tests/ptimer-test.c)0
-rw-r--r--tests/unit/ptimer-test.h (renamed from tests/ptimer-test.h)0
-rw-r--r--tests/unit/rcutorture.c (renamed from tests/rcutorture.c)0
-rw-r--r--tests/unit/socket-helpers.c (renamed from tests/socket-helpers.c)0
-rw-r--r--tests/unit/socket-helpers.h (renamed from tests/socket-helpers.h)0
-rw-r--r--tests/unit/test-aio-multithread.c (renamed from tests/test-aio-multithread.c)0
-rw-r--r--tests/unit/test-aio.c (renamed from tests/test-aio.c)0
-rw-r--r--tests/unit/test-authz-list.c (renamed from tests/test-authz-list.c)0
-rw-r--r--tests/unit/test-authz-listfile.c (renamed from tests/test-authz-listfile.c)0
-rw-r--r--tests/unit/test-authz-pam.c (renamed from tests/test-authz-pam.c)0
-rw-r--r--tests/unit/test-authz-simple.c (renamed from tests/test-authz-simple.c)0
-rw-r--r--tests/unit/test-base64.c (renamed from tests/test-base64.c)0
-rw-r--r--tests/unit/test-bdrv-drain.c (renamed from tests/test-bdrv-drain.c)0
-rw-r--r--tests/unit/test-bdrv-graph-mod.c (renamed from tests/test-bdrv-graph-mod.c)0
-rw-r--r--tests/unit/test-bitcnt.c (renamed from tests/test-bitcnt.c)0
-rw-r--r--tests/unit/test-bitmap.c (renamed from tests/test-bitmap.c)0
-rw-r--r--tests/unit/test-bitops.c (renamed from tests/test-bitops.c)0
-rw-r--r--tests/unit/test-block-backend.c (renamed from tests/test-block-backend.c)0
-rw-r--r--tests/unit/test-block-iothread.c (renamed from tests/test-block-iothread.c)0
-rw-r--r--tests/unit/test-blockjob-txn.c (renamed from tests/test-blockjob-txn.c)0
-rw-r--r--tests/unit/test-blockjob.c (renamed from tests/test-blockjob.c)0
-rw-r--r--tests/unit/test-bufferiszero.c (renamed from tests/test-bufferiszero.c)0
-rw-r--r--tests/unit/test-char.c (renamed from tests/test-char.c)0
-rw-r--r--tests/unit/test-clone-visitor.c (renamed from tests/test-clone-visitor.c)0
-rw-r--r--tests/unit/test-coroutine.c (renamed from tests/test-coroutine.c)0
-rw-r--r--tests/unit/test-crypto-afsplit.c (renamed from tests/test-crypto-afsplit.c)0
-rw-r--r--tests/unit/test-crypto-block.c (renamed from tests/test-crypto-block.c)0
-rw-r--r--tests/unit/test-crypto-cipher.c (renamed from tests/test-crypto-cipher.c)0
-rw-r--r--tests/unit/test-crypto-hash.c (renamed from tests/test-crypto-hash.c)0
-rw-r--r--tests/unit/test-crypto-hmac.c (renamed from tests/test-crypto-hmac.c)0
-rw-r--r--tests/unit/test-crypto-ivgen.c (renamed from tests/test-crypto-ivgen.c)0
-rw-r--r--tests/unit/test-crypto-pbkdf.c (renamed from tests/test-crypto-pbkdf.c)0
-rw-r--r--tests/unit/test-crypto-secret.c (renamed from tests/test-crypto-secret.c)0
-rw-r--r--tests/unit/test-crypto-tlscredsx509.c (renamed from tests/test-crypto-tlscredsx509.c)0
-rw-r--r--tests/unit/test-crypto-tlssession.c (renamed from tests/test-crypto-tlssession.c)0
-rw-r--r--tests/unit/test-crypto-xts.c (renamed from tests/test-crypto-xts.c)0
-rw-r--r--tests/unit/test-cutils.c (renamed from tests/test-cutils.c)0
-rw-r--r--tests/unit/test-fdmon-epoll.c (renamed from tests/test-fdmon-epoll.c)0
-rw-r--r--tests/unit/test-hbitmap.c (renamed from tests/test-hbitmap.c)0
-rw-r--r--tests/unit/test-image-locking.c (renamed from tests/test-image-locking.c)0
-rw-r--r--tests/unit/test-int128.c (renamed from tests/test-int128.c)0
-rw-r--r--tests/unit/test-io-channel-buffer.c (renamed from tests/test-io-channel-buffer.c)0
-rw-r--r--tests/unit/test-io-channel-command.c (renamed from tests/test-io-channel-command.c)0
-rw-r--r--tests/unit/test-io-channel-file.c (renamed from tests/test-io-channel-file.c)0
-rw-r--r--tests/unit/test-io-channel-socket.c (renamed from tests/test-io-channel-socket.c)0
-rw-r--r--tests/unit/test-io-channel-tls.c (renamed from tests/test-io-channel-tls.c)0
-rw-r--r--tests/unit/test-io-task.c (renamed from tests/test-io-task.c)0
-rw-r--r--tests/unit/test-iov.c (renamed from tests/test-iov.c)0
-rw-r--r--tests/unit/test-keyval.c (renamed from tests/test-keyval.c)0
-rw-r--r--tests/unit/test-logging.c (renamed from tests/test-logging.c)0
-rw-r--r--tests/unit/test-mul64.c (renamed from tests/test-mul64.c)0
-rw-r--r--tests/unit/test-opts-visitor.c (renamed from tests/test-opts-visitor.c)0
-rw-r--r--tests/unit/test-qapi-util.c (renamed from tests/test-qapi-util.c)0
-rw-r--r--tests/unit/test-qdev-global-props.c (renamed from tests/test-qdev-global-props.c)0
-rw-r--r--tests/unit/test-qdist.c (renamed from tests/test-qdist.c)0
-rw-r--r--tests/unit/test-qemu-opts.c (renamed from tests/test-qemu-opts.c)0
-rw-r--r--tests/unit/test-qga.c (renamed from tests/test-qga.c)2
-rw-r--r--tests/unit/test-qgraph.c (renamed from tests/test-qgraph.c)4
-rw-r--r--tests/unit/test-qht.c (renamed from tests/test-qht.c)0
-rw-r--r--tests/unit/test-qmp-cmds.c (renamed from tests/test-qmp-cmds.c)0
-rw-r--r--tests/unit/test-qmp-event.c (renamed from tests/test-qmp-event.c)0
-rw-r--r--tests/unit/test-qobject-input-visitor.c (renamed from tests/test-qobject-input-visitor.c)0
-rw-r--r--tests/unit/test-qobject-output-visitor.c (renamed from tests/test-qobject-output-visitor.c)0
-rw-r--r--tests/unit/test-rcu-list.c (renamed from tests/test-rcu-list.c)0
-rw-r--r--tests/unit/test-rcu-simpleq.c (renamed from tests/test-rcu-simpleq.c)0
-rw-r--r--tests/unit/test-rcu-slist.c (renamed from tests/test-rcu-slist.c)0
-rw-r--r--tests/unit/test-rcu-tailq.c (renamed from tests/test-rcu-tailq.c)0
-rw-r--r--tests/unit/test-replication.c (renamed from tests/test-replication.c)0
-rw-r--r--tests/unit/test-shift128.c (renamed from tests/test-shift128.c)0
-rw-r--r--tests/unit/test-string-input-visitor.c (renamed from tests/test-string-input-visitor.c)0
-rw-r--r--tests/unit/test-string-output-visitor.c (renamed from tests/test-string-output-visitor.c)0
-rw-r--r--tests/unit/test-thread-pool.c (renamed from tests/test-thread-pool.c)0
-rw-r--r--tests/unit/test-throttle.c (renamed from tests/test-throttle.c)0
-rw-r--r--tests/unit/test-timed-average.c (renamed from tests/test-timed-average.c)0
-rw-r--r--tests/unit/test-util-filemonitor.c (renamed from tests/test-util-filemonitor.c)0
-rw-r--r--tests/unit/test-util-sockets.c (renamed from tests/test-util-sockets.c)0
-rw-r--r--tests/unit/test-uuid.c (renamed from tests/test-uuid.c)0
-rw-r--r--tests/unit/test-visitor-serialization.c (renamed from tests/test-visitor-serialization.c)0
-rw-r--r--tests/unit/test-vmstate.c (renamed from tests/test-vmstate.c)0
-rw-r--r--tests/unit/test-write-threshold.c (renamed from tests/test-write-threshold.c)0
-rw-r--r--tests/unit/test-x86-cpuid.c (renamed from tests/test-x86-cpuid.c)0
-rw-r--r--tests/unit/test-xbzrle.c (renamed from tests/test-xbzrle.c)0
168 files changed, 4657 insertions, 4967 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 293a810656..f65cb11c4d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -171,19 +171,6 @@ check-system-debian:
     IMAGE: debian-amd64
     MAKE_CHECK_ARGS: check
 
-# No targets are built here, just tools, docs, and unit tests. This
-# also feeds into the eventual documentation deployment steps later
-build-tools-and-docs-debian:
-  <<: *native_build_job_definition
-  variables:
-    IMAGE: debian-amd64
-    MAKE_CHECK_ARGS: check-unit check-softfloat ctags TAGS cscope
-    CONFIGURE_ARGS: --disable-system --disable-user --enable-docs --enable-tools
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
-
 acceptance-system-debian:
   <<: *native_test_job_definition
   needs:
@@ -383,6 +370,8 @@ build-disabled:
 # Xen accelerator is not detected / selected. As result it build the
 # i386-softmmu and x86_64-softmmu with KVM being the single accelerator
 # available.
+# Also use a different coroutine implementation (which is only really of
+# interest to KVM users, i.e. with TCG disabled)
 build-tcg-disabled:
   <<: *native_build_job_definition
   needs:
@@ -392,7 +381,8 @@ build-tcg-disabled:
   script:
     - mkdir build
     - cd build
-    - ../configure --disable-tcg --audio-drv-list="" || { cat config.log meson-logs/meson-log.txt && exit 1; }
+    - ../configure --disable-tcg --audio-drv-list="" --with-coroutine=ucontext
+      || { cat config.log meson-logs/meson-log.txt && exit 1; }
     - make -j"$JOBS"
     - make check-unit
     - make check-qapi-schema
@@ -449,6 +439,8 @@ build-user-plugins:
 
 build-user-centos7:
   <<: *native_build_job_definition
+  needs:
+    job: amd64-centos7-container
   variables:
     IMAGE: centos7
     CONFIGURE_ARGS: --disable-system --disable-tools --disable-docs
@@ -478,6 +470,8 @@ clang-system:
 
 clang-user:
   <<: *native_build_job_definition
+  needs:
+    job: amd64-debian-user-cross-container
   variables:
     IMAGE: debian-all-test-cross
     CONFIGURE_ARGS: --cc=clang --cxx=clang++ --disable-system
@@ -606,6 +600,8 @@ acceptance-cfi-x86_64:
 
 tsan-build:
   <<: *native_build_job_definition
+  needs:
+    job: amd64-ubuntu2004-container
   variables:
     IMAGE: ubuntu2004
     CONFIGURE_ARGS: --enable-tsan --cc=clang-10 --cxx=clang++-10 --disable-docs
@@ -643,6 +639,8 @@ check-deprecated:
 # gprof/gcov are GCC features
 gprof-gcov:
   <<: *native_build_job_definition
+  needs:
+    job: amd64-ubuntu2004-container
   variables:
     IMAGE: ubuntu2004
     CONFIGURE_ARGS: --enable-gprof --enable-gcov
@@ -697,15 +695,6 @@ build-tci:
 
 # Alternate coroutines implementations are only really of interest to KVM users
 # However we can't test against KVM on Gitlab-CI so we can only run unit tests
-build-coroutine-ucontext:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-ubuntu2004-container
-  variables:
-    IMAGE: ubuntu2004
-    CONFIGURE_ARGS: --with-coroutine=ucontext --disable-tcg
-    MAKE_CHECK_ARGS: check-unit
-
 build-coroutine-sigaltstack:
   <<: *native_build_job_definition
   needs:
@@ -719,7 +708,7 @@ build-coroutine-sigaltstack:
 #
 # These jobs test old gcrypt and nettle from RHEL7
 # which had some API differences.
-build-crypto-old-nettle:
+crypto-old-nettle:
   <<: *native_build_job_definition
   needs:
     job: amd64-centos7-container
@@ -727,22 +716,9 @@ build-crypto-old-nettle:
     IMAGE: centos7
     TARGETS: x86_64-softmmu x86_64-linux-user
     CONFIGURE_ARGS: --disable-gcrypt --enable-nettle
-    MAKE_CHECK_ARGS: check-build
-  artifacts:
-    paths:
-      - build
-
-check-crypto-old-nettle:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-crypto-old-nettle
-      artifacts: true
-  variables:
-    IMAGE: centos7
     MAKE_CHECK_ARGS: check
 
-
-build-crypto-old-gcrypt:
+crypto-old-gcrypt:
   <<: *native_build_job_definition
   needs:
     job: amd64-centos7-container
@@ -750,22 +726,9 @@ build-crypto-old-gcrypt:
     IMAGE: centos7
     TARGETS: x86_64-softmmu x86_64-linux-user
     CONFIGURE_ARGS: --disable-nettle --enable-gcrypt
-    MAKE_CHECK_ARGS: check-build
-  artifacts:
-    paths:
-      - build
-
-check-crypto-old-gcrypt:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-crypto-old-gcrypt
-      artifacts: true
-  variables:
-    IMAGE: centos7
     MAKE_CHECK_ARGS: check
 
-
-build-crypto-only-gnutls:
+crypto-only-gnutls:
   <<: *native_build_job_definition
   needs:
     job: amd64-centos7-container
@@ -773,20 +736,9 @@ build-crypto-only-gnutls:
     IMAGE: centos7
     TARGETS: x86_64-softmmu x86_64-linux-user
     CONFIGURE_ARGS: --disable-nettle --disable-gcrypt --enable-gnutls
-    MAKE_CHECK_ARGS: check-build
-  artifacts:
-    paths:
-      - build
-
-check-crypto-only-gnutls:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-crypto-only-gnutls
-      artifacts: true
-  variables:
-    IMAGE: centos7
     MAKE_CHECK_ARGS: check
 
+
 # We don't need to exercise every backend with every front-end
 build-trace-multi-user:
   <<: *native_build_job_definition
@@ -869,6 +821,21 @@ build-libvhost-user:
     - meson
     - ninja
 
+# No targets are built here, just tools, docs, and unit tests. This
+# also feeds into the eventual documentation deployment steps later
+build-tools-and-docs-debian:
+  <<: *native_build_job_definition
+  needs:
+    job: amd64-debian-container
+  variables:
+    IMAGE: debian-amd64
+    MAKE_CHECK_ARGS: check-unit check-softfloat ctags TAGS cscope
+    CONFIGURE_ARGS: --disable-system --disable-user --enable-docs --enable-tools
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+
 # Prepare for GitLab pages deployment. Anything copied into the
 # "public" directory will be deployed to $USER.gitlab.io/$PROJECT
 pages:
diff --git a/MAINTAINERS b/MAINTAINERS
index 8e9f0d591e..5ca3c9f851 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1537,7 +1537,7 @@ F: include/hw/southbridge/piix.h
 F: hw/misc/sga.c
 F: hw/isa/apm.c
 F: include/hw/isa/apm.h
-F: tests/test-x86-cpuid.c
+F: tests/unit/test-x86-cpuid.c
 F: tests/qtest/test-x86-cpuid-compat.c
 
 PC Chipset
@@ -2219,7 +2219,7 @@ F: qemu-io*
 F: tests/qemu-iotests/
 F: util/qemu-progress.c
 F: qobject/block-qdict.c
-F: tests/check-block-qdict.c
+F: tests/unit/check-block-qdict.c
 T: git https://repo.or.cz/qemu/kevin.git block
 
 Storage daemon
@@ -2245,7 +2245,7 @@ F: migration/block*
 F: include/block/aio.h
 F: include/block/aio-wait.h
 F: scripts/qemugdb/aio.py
-F: tests/test-fdmon-epoll.c
+F: tests/unit/test-fdmon-epoll.c
 T: git https://github.com/stefanha/qemu.git block
 
 Block SCSI subsystem
@@ -2305,7 +2305,7 @@ F: block/dirty-bitmap.c
 F: block/qcow2-bitmap.c
 F: migration/block-dirty-bitmap.c
 F: util/hbitmap.c
-F: tests/test-hbitmap.c
+F: tests/unit/test-hbitmap.c
 F: docs/interop/bitmaps.rst
 T: git https://repo.or.cz/qemu/ericb.git bitmaps
 
@@ -2326,8 +2326,8 @@ Command line option argument parsing
 M: Markus Armbruster <armbru@redhat.com>
 S: Supported
 F: include/qemu/option.h
-F: tests/test-keyval.c
-F: tests/test-qemu-opts.c
+F: tests/unit/test-keyval.c
+F: tests/unit/test-qemu-opts.c
 F: util/keyval.c
 F: util/qemu-option.c
 
@@ -2445,8 +2445,8 @@ Read, Copy, Update (RCU)
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: include/qemu/rcu*.h
-F: tests/rcutorture.c
-F: tests/test-rcu-*.c
+F: tests/unit/rcutorture.c
+F: tests/unit/test-rcu-*.c
 F: util/rcu.c
 
 Human Monitor (HMP)
@@ -2524,10 +2524,10 @@ F: include/qapi/
 X: include/qapi/qmp/
 F: include/qapi/qmp/dispatch.h
 F: tests/qapi-schema/
-F: tests/test-*-visitor.c
-F: tests/test-qapi-*.c
-F: tests/test-qmp-*.c
-F: tests/test-visitor-serialization.c
+F: tests/unit/test-*-visitor.c
+F: tests/unit/test-qapi-*.c
+F: tests/unit/test-qmp-*.c
+F: tests/unit/test-visitor-serialization.c
 F: scripts/qapi-gen.py
 F: scripts/qapi/*
 F: docs/sphinx/qapidoc.py
@@ -2548,14 +2548,14 @@ F: qobject/
 F: include/qapi/qmp/
 X: include/qapi/qmp/dispatch.h
 F: scripts/coccinelle/qobject.cocci
-F: tests/check-qdict.c
-F: tests/check-qjson.c
-F: tests/check-qlist.c
-F: tests/check-qlit.c
-F: tests/check-qnull.c
-F: tests/check-qnum.c
-F: tests/check-qobject.c
-F: tests/check-qstring.c
+F: tests/unit/check-qdict.c
+F: tests/unit/check-qjson.c
+F: tests/unit/check-qlist.c
+F: tests/unit/check-qlit.c
+F: tests/unit/check-qnull.c
+F: tests/unit/check-qnum.c
+F: tests/unit/check-qobject.c
+F: tests/unit/check-qstring.c
 F: tests/data/qobject/qdict.txt
 T: git https://repo.or.cz/qemu/armbru.git qapi-next
 
@@ -2566,7 +2566,7 @@ F: qga/
 F: docs/interop/qemu-ga.rst
 F: docs/interop/qemu-ga-ref.rst
 F: scripts/qemu-guest-agent/
-F: tests/test-qga.c
+F: tests/unit/test-qga.c
 T: git https://github.com/mdroth/qemu.git qga
 
 QOM
@@ -2587,9 +2587,9 @@ F: scripts/coccinelle/qom-parent-type.cocci
 F: softmmu/qdev-monitor.c
 F: stubs/qdev.c
 F: qom/
-F: tests/check-qom-interface.c
-F: tests/check-qom-proplist.c
-F: tests/test-qdev-global-props.c
+F: tests/unit/check-qom-interface.c
+F: tests/unit/check-qom-proplist.c
+F: tests/unit/test-qdev-global-props.c
 
 QOM boilerplate conversion script
 M: Eduardo Habkost <ehabkost@redhat.com>
@@ -2728,10 +2728,10 @@ S: Maintained
 F: crypto/
 F: include/crypto/
 F: qapi/crypto.json
-F: tests/test-crypto-*
-F: tests/benchmark-crypto-*
-F: tests/crypto-tls-*
-F: tests/pkix_asn1_tab.c
+F: tests/unit/test-crypto-*
+F: tests/bench/benchmark-crypto-*
+F: tests/unit/crypto-tls-*
+F: tests/unit/pkix_asn1_tab.c
 F: qemu.sasl
 
 Coroutines
@@ -2740,7 +2740,7 @@ M: Kevin Wolf <kwolf@redhat.com>
 S: Maintained
 F: util/*coroutine*
 F: include/qemu/coroutine*
-F: tests/test-coroutine.c
+F: tests/unit/test-coroutine.c
 
 Buffers
 M: Daniel P. Berrange <berrange@redhat.com>
@@ -2753,7 +2753,7 @@ M: Daniel P. Berrange <berrange@redhat.com>
 S: Maintained
 F: io/
 F: include/io/
-F: tests/test-io-*
+F: tests/unit/test-io-*
 
 User authorization
 M: Daniel P. Berrange <berrange@redhat.com>
@@ -2761,7 +2761,7 @@ S: Maintained
 F: authz/
 F: qapi/authz.json
 F: include/authz/
-F: tests/test-authz-*
+F: tests/unit/test-authz-*
 
 Sockets
 M: Daniel P. Berrange <berrange@redhat.com>
@@ -2776,7 +2776,7 @@ M: Daniel P. Berrange <berrange@redhat.com>
 S: Odd Fixes
 F: util/filemonitor*.c
 F: include/qemu/filemonitor.h
-F: tests/test-util-filemonitor.c
+F: tests/unit/test-util-filemonitor.c
 
 Throttling infrastructure
 M: Alberto Garcia <berto@igalia.com>
@@ -2786,7 +2786,7 @@ F: include/block/throttle-groups.h
 F: include/qemu/throttle*.h
 F: util/throttle.c
 F: docs/throttle.txt
-F: tests/test-throttle.c
+F: tests/unit/test-throttle.c
 L: qemu-block@nongnu.org
 
 UUID
@@ -2794,7 +2794,7 @@ M: Fam Zheng <fam@euphon.net>
 S: Supported
 F: util/uuid.c
 F: include/qemu/uuid.h
-F: tests/test-uuid.c
+F: tests/unit/test-uuid.c
 
 Yank feature
 M: Lukas Straub <lukasstraub2@web.de>
@@ -3246,7 +3246,7 @@ M: Xie Changlong <xiechanglong.d@gmail.com>
 S: Supported
 F: replication*
 F: block/replication.c
-F: tests/test-replication.c
+F: tests/unit/test-replication.c
 F: docs/block-replication.txt
 
 PVRDMA
@@ -3291,19 +3291,23 @@ F: include/hw/remote/iohub.h
 
 Build and test automation
 -------------------------
-Build and test automation
+Build and test automation, general continuous integration
 M: Alex Bennée <alex.bennee@linaro.org>
-M: Fam Zheng <fam@euphon.net>
-R: Philippe Mathieu-Daudé <philmd@redhat.com>
+M: Philippe Mathieu-Daudé <f4bug@amsat.org>
+M: Thomas Huth <thuth@redhat.com>
+R: Wainer dos Santos Moschetta <wainersm@redhat.com>
+R: Willian Rampazzo <willianr@redhat.com>
 S: Maintained
 F: .github/lockdown.yml
+F: .gitlab-ci.yml
+F: .gitlab-ci.d/
 F: .travis.yml
 F: scripts/ci/
 F: tests/docker/
 F: tests/vm/
 F: scripts/archive-source.sh
+W: https://gitlab.com/qemu-project/qemu/pipelines
 W: https://travis-ci.org/qemu/qemu
-W: http://patchew.org/QEMU/
 
 FreeBSD Hosted Continuous Integration
 M: Ed Maste <emaste@freebsd.org>
@@ -3318,17 +3322,6 @@ S: Maintained
 F: .cirrus.yml
 W: https://cirrus-ci.com/github/qemu/qemu
 
-GitLab Continuous Integration
-M: Thomas Huth <thuth@redhat.com>
-M: Philippe Mathieu-Daudé <philmd@redhat.com>
-M: Alex Bennée <alex.bennee@linaro.org>
-R: Wainer dos Santos Moschetta <wainersm@redhat.com>
-S: Maintained
-F: .gitlab-ci.yml
-F: .gitlab-ci.d/crossbuilds.yml
-F: .gitlab-ci.d/*py
-F: scripts/ci/gitlab-pipeline-status
-
 Guest Test Compilation Support
 M: Alex Bennée <alex.bennee@linaro.org>
 R: Philippe Mathieu-Daudé <f4bug@amsat.org>
diff --git a/README.rst b/README.rst
index 91aa1e314c..a92c7394b7 100644
--- a/README.rst
+++ b/README.rst
@@ -31,6 +31,17 @@ QEMU as a whole is released under the GNU General Public License,
 version 2. For full licensing details, consult the LICENSE file.
 
 
+Documentation
+=============
+
+Documentation can be found hosted online at
+`<https://www.qemu.org/documentation/>`_. The documentation for the
+current development version that is available at
+`<https://www.qemu.org/docs/master/>`_ is generated from the ``docs/``
+folder in the source tree, and is built by `Sphinx
+<https://www.sphinx-doc.org/en/master/>_`.
+
+
 Building
 ========
 
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f88a52393f..37b0a1861e 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2068,6 +2068,8 @@ static int kvm_init(MachineState *ms)
                                                             "kvm-type",
                                                             &error_abort);
         type = mc->kvm_type(ms, kvm_type);
+    } else if (mc->kvm_type) {
+        type = mc->kvm_type(ms, NULL);
     }
 
     do {
diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst
index f9fb9224da..d3cf2d9cd7 100644
--- a/docs/system/arm/nuvoton.rst
+++ b/docs/system/arm/nuvoton.rst
@@ -45,6 +45,7 @@ Supported devices
  * Pulse Width Modulation (PWM)
  * SMBus controller (SMBF)
  * Ethernet controller (EMC)
+ * Tachometer
 
 Missing devices
 ---------------
@@ -63,7 +64,6 @@ Missing devices
  * Peripheral SPI controller (PSPI)
  * SD/MMC host
  * PECI interface
- * Tachometer
  * PCI and PCIe root complex and bridges
  * VDM and MCTP support
  * Serial I/O expansion
diff --git a/docs/system/arm/xlnx-versal-virt.rst b/docs/system/arm/xlnx-versal-virt.rst
index 2602d0f995..27f73500d9 100644
--- a/docs/system/arm/xlnx-versal-virt.rst
+++ b/docs/system/arm/xlnx-versal-virt.rst
@@ -30,6 +30,7 @@ Implemented devices:
 - 8 ADMA (Xilinx zDMA) channels
 - 2 SD Controllers
 - OCM (256KB of On Chip Memory)
+- XRAM (4MB of on chip Accelerator RAM)
 - DDR memory
 
 QEMU does not yet model any other devices, including the PL and the AI Engine.
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
index 9bd1e83f02..495b0f8e91 100644
--- a/hw/arm/npcm7xx.c
+++ b/hw/arm/npcm7xx.c
@@ -122,6 +122,14 @@ enum NPCM7xxInterrupt {
     NPCM7XX_SMBUS15_IRQ,
     NPCM7XX_PWM0_IRQ            = 93,   /* PWM module 0 */
     NPCM7XX_PWM1_IRQ,                   /* PWM module 1 */
+    NPCM7XX_MFT0_IRQ            = 96,   /* MFT module 0 */
+    NPCM7XX_MFT1_IRQ,                   /* MFT module 1 */
+    NPCM7XX_MFT2_IRQ,                   /* MFT module 2 */
+    NPCM7XX_MFT3_IRQ,                   /* MFT module 3 */
+    NPCM7XX_MFT4_IRQ,                   /* MFT module 4 */
+    NPCM7XX_MFT5_IRQ,                   /* MFT module 5 */
+    NPCM7XX_MFT6_IRQ,                   /* MFT module 6 */
+    NPCM7XX_MFT7_IRQ,                   /* MFT module 7 */
     NPCM7XX_EMC2RX_IRQ          = 114,
     NPCM7XX_EMC2TX_IRQ,
     NPCM7XX_GPIO0_IRQ           = 116,
@@ -172,6 +180,18 @@ static const hwaddr npcm7xx_pwm_addr[] = {
     0xf0104000,
 };
 
+/* Register base address for each MFT Module */
+static const hwaddr npcm7xx_mft_addr[] = {
+    0xf0180000,
+    0xf0181000,
+    0xf0182000,
+    0xf0183000,
+    0xf0184000,
+    0xf0185000,
+    0xf0186000,
+    0xf0187000,
+};
+
 /* Direct memory-mapped access to each SMBus Module. */
 static const hwaddr npcm7xx_smbus_addr[] = {
     0xf0080000,
@@ -417,6 +437,10 @@ static void npcm7xx_init(Object *obj)
         object_initialize_child(obj, "pwm[*]", &s->pwm[i], TYPE_NPCM7XX_PWM);
     }
 
+    for (i = 0; i < ARRAY_SIZE(s->mft); i++) {
+        object_initialize_child(obj, "mft[*]", &s->mft[i], TYPE_NPCM7XX_MFT);
+    }
+
     for (i = 0; i < ARRAY_SIZE(s->emc); i++) {
         object_initialize_child(obj, "emc[*]", &s->emc[i], TYPE_NPCM7XX_EMC);
     }
@@ -603,6 +627,19 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
         sysbus_connect_irq(sbd, i, npcm7xx_irq(s, NPCM7XX_PWM0_IRQ + i));
     }
 
+    /* MFT Modules. Cannot fail. */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm7xx_mft_addr) != ARRAY_SIZE(s->mft));
+    for (i = 0; i < ARRAY_SIZE(s->mft); i++) {
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->mft[i]);
+
+        qdev_connect_clock_in(DEVICE(&s->mft[i]), "clock-in",
+                              qdev_get_clock_out(DEVICE(&s->clk),
+                                                 "apb4-clock"));
+        sysbus_realize(sbd, &error_abort);
+        sysbus_mmio_map(sbd, 0, npcm7xx_mft_addr[i]);
+        sysbus_connect_irq(sbd, 0, npcm7xx_irq(s, NPCM7XX_MFT0_IRQ + i));
+    }
+
     /*
      * EMC Modules. Cannot fail.
      * The mapping of the device to its netdev backend works as follows:
@@ -680,14 +717,6 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
     create_unimplemented_device("npcm7xx.peci",         0xf0100000,   4 * KiB);
     create_unimplemented_device("npcm7xx.siox[1]",      0xf0101000,   4 * KiB);
     create_unimplemented_device("npcm7xx.siox[2]",      0xf0102000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.mft[0]",       0xf0180000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.mft[1]",       0xf0181000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.mft[2]",       0xf0182000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.mft[3]",       0xf0183000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.mft[4]",       0xf0184000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.mft[5]",       0xf0185000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.mft[6]",       0xf0186000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.mft[7]",       0xf0187000,   4 * KiB);
     create_unimplemented_device("npcm7xx.pspi1",        0xf0200000,   4 * KiB);
     create_unimplemented_device("npcm7xx.pspi2",        0xf0201000,   4 * KiB);
     create_unimplemented_device("npcm7xx.ahbpci",       0xf0400000,   1 * MiB);
diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
index fbf6ce8e02..e22fe4bf8f 100644
--- a/hw/arm/npcm7xx_boards.c
+++ b/hw/arm/npcm7xx_boards.c
@@ -21,6 +21,7 @@
 #include "hw/core/cpu.h"
 #include "hw/i2c/smbus_eeprom.h"
 #include "hw/loader.h"
+#include "hw/qdev-core.h"
 #include "hw/qdev-properties.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
@@ -116,6 +117,64 @@ static void at24c_eeprom_init(NPCM7xxState *soc, int bus, uint8_t addr,
     i2c_slave_realize_and_unref(i2c_dev, i2c_bus, &error_abort);
 }
 
+static void npcm7xx_init_pwm_splitter(NPCM7xxMachine *machine,
+                                      NPCM7xxState *soc, const int *fan_counts)
+{
+    SplitIRQ *splitters = machine->fan_splitter;
+
+    /*
+     * PWM 0~3 belong to module 0 output 0~3.
+     * PWM 4~7 belong to module 1 output 0~3.
+     */
+    for (int i = 0; i < NPCM7XX_NR_PWM_MODULES; ++i) {
+        for (int j = 0; j < NPCM7XX_PWM_PER_MODULE; ++j) {
+            int splitter_no = i * NPCM7XX_PWM_PER_MODULE + j;
+            DeviceState *splitter;
+
+            if (fan_counts[splitter_no] < 1) {
+                continue;
+            }
+            object_initialize_child(OBJECT(machine), "fan-splitter[*]",
+                                    &splitters[splitter_no], TYPE_SPLIT_IRQ);
+            splitter = DEVICE(&splitters[splitter_no]);
+            qdev_prop_set_uint16(splitter, "num-lines",
+                                 fan_counts[splitter_no]);
+            qdev_realize(splitter, NULL, &error_abort);
+            qdev_connect_gpio_out_named(DEVICE(&soc->pwm[i]), "duty-gpio-out",
+                                        j, qdev_get_gpio_in(splitter, 0));
+        }
+    }
+}
+
+static void npcm7xx_connect_pwm_fan(NPCM7xxState *soc, SplitIRQ *splitter,
+                                    int fan_no, int output_no)
+{
+    DeviceState *fan;
+    int fan_input;
+    qemu_irq fan_duty_gpio;
+
+    g_assert(fan_no >= 0 && fan_no <= NPCM7XX_MFT_MAX_FAN_INPUT);
+    /*
+     * Fan 0~1 belong to module 0 input 0~1.
+     * Fan 2~3 belong to module 1 input 0~1.
+     * ...
+     * Fan 14~15 belong to module 7 input 0~1.
+     * Fan 16~17 belong to module 0 input 2~3.
+     * Fan 18~19 belong to module 1 input 2~3.
+     */
+    if (fan_no < 16) {
+        fan = DEVICE(&soc->mft[fan_no / 2]);
+        fan_input = fan_no % 2;
+    } else {
+        fan = DEVICE(&soc->mft[(fan_no - 16) / 2]);
+        fan_input = fan_no % 2 + 2;
+    }
+
+    /* Connect the Fan to PWM module */
+    fan_duty_gpio = qdev_get_gpio_in_named(fan, "duty", fan_input);
+    qdev_connect_gpio_out(DEVICE(splitter), output_no, fan_duty_gpio);
+}
+
 static void npcm750_evb_i2c_init(NPCM7xxState *soc)
 {
     /* lm75 temperature sensor on SVB, tmp105 is compatible */
@@ -128,6 +187,30 @@ static void npcm750_evb_i2c_init(NPCM7xxState *soc)
     i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 6), "tmp105", 0x48);
 }
 
+static void npcm750_evb_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc)
+{
+    SplitIRQ *splitter = machine->fan_splitter;
+    static const int fan_counts[] = {2, 2, 2, 2, 2, 2, 2, 2};
+
+    npcm7xx_init_pwm_splitter(machine, soc, fan_counts);
+    npcm7xx_connect_pwm_fan(soc, &splitter[0], 0x00, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[0], 0x01, 1);
+    npcm7xx_connect_pwm_fan(soc, &splitter[1], 0x02, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[1], 0x03, 1);
+    npcm7xx_connect_pwm_fan(soc, &splitter[2], 0x04, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[2], 0x05, 1);
+    npcm7xx_connect_pwm_fan(soc, &splitter[3], 0x06, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[3], 0x07, 1);
+    npcm7xx_connect_pwm_fan(soc, &splitter[4], 0x08, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[4], 0x09, 1);
+    npcm7xx_connect_pwm_fan(soc, &splitter[5], 0x0a, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[5], 0x0b, 1);
+    npcm7xx_connect_pwm_fan(soc, &splitter[6], 0x0c, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[6], 0x0d, 1);
+    npcm7xx_connect_pwm_fan(soc, &splitter[7], 0x0e, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[7], 0x0f, 1);
+}
+
 static void quanta_gsj_i2c_init(NPCM7xxState *soc)
 {
     /* GSJ machine have 4 max31725 temperature sensors, tmp105 is compatible. */
@@ -142,6 +225,20 @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc)
     /* TODO: Add additional i2c devices. */
 }
 
+static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc)
+{
+    SplitIRQ *splitter = machine->fan_splitter;
+    static const int fan_counts[] = {2, 2, 2, 0, 0, 0, 0, 0};
+
+    npcm7xx_init_pwm_splitter(machine, soc, fan_counts);
+    npcm7xx_connect_pwm_fan(soc, &splitter[0], 0x00, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[0], 0x01, 1);
+    npcm7xx_connect_pwm_fan(soc, &splitter[1], 0x02, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[1], 0x03, 1);
+    npcm7xx_connect_pwm_fan(soc, &splitter[2], 0x04, 0);
+    npcm7xx_connect_pwm_fan(soc, &splitter[2], 0x05, 1);
+}
+
 static void npcm750_evb_init(MachineState *machine)
 {
     NPCM7xxState *soc;
@@ -153,6 +250,7 @@ static void npcm750_evb_init(MachineState *machine)
     npcm7xx_load_bootrom(machine, soc);
     npcm7xx_connect_flash(&soc->fiu[0], 0, "w25q256", drive_get(IF_MTD, 0, 0));
     npcm750_evb_i2c_init(soc);
+    npcm750_evb_fan_init(NPCM7XX_MACHINE(machine), soc);
     npcm7xx_load_kernel(machine, soc);
 }
 
@@ -168,6 +266,7 @@ static void quanta_gsj_init(MachineState *machine)
     npcm7xx_connect_flash(&soc->fiu[0], 0, "mx25l25635e",
                           drive_get(IF_MTD, 0, 0));
     quanta_gsj_i2c_init(soc);
+    quanta_gsj_fan_init(NPCM7XX_MACHINE(machine), soc);
     npcm7xx_load_kernel(machine, soc);
 }
 
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 405d5c5325..84d2c62c26 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -151,22 +151,28 @@ inline void
 smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova,
                     uint8_t tg, uint64_t num_pages, uint8_t ttl)
 {
-    if (ttl && (num_pages == 1)) {
+    /* if tg is not set we use 4KB range invalidation */
+    uint8_t granule = tg ? tg * 2 + 10 : 12;
+
+    if (ttl && (num_pages == 1) && (asid >= 0)) {
         SMMUIOTLBKey key = smmu_get_iotlb_key(asid, iova, tg, ttl);
 
-        g_hash_table_remove(s->iotlb, &key);
-    } else {
-        /* if tg is not set we use 4KB range invalidation */
-        uint8_t granule = tg ? tg * 2 + 10 : 12;
+        if (g_hash_table_remove(s->iotlb, &key)) {
+            return;
+        }
+        /*
+         * if the entry is not found, let's see if it does not
+         * belong to a larger IOTLB entry
+         */
+    }
 
-        SMMUIOTLBPageInvInfo info = {
-            .asid = asid, .iova = iova,
-            .mask = (num_pages * 1 << granule) - 1};
+    SMMUIOTLBPageInvInfo info = {
+        .asid = asid, .iova = iova,
+        .mask = (num_pages * 1 << granule) - 1};
 
-        g_hash_table_foreach_remove(s->iotlb,
-                                    smmu_hash_remove_by_asid_iova,
-                                    &info);
-    }
+    g_hash_table_foreach_remove(s->iotlb,
+                                smmu_hash_remove_by_asid_iova,
+                                &info);
 }
 
 inline void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid)
diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h
index 55147f29be..2d75b31953 100644
--- a/hw/arm/smmu-internal.h
+++ b/hw/arm/smmu-internal.h
@@ -104,4 +104,9 @@ typedef struct SMMUIOTLBPageInvInfo {
     uint64_t mask;
 } SMMUIOTLBPageInvInfo;
 
+typedef struct SMMUSIDRange {
+    uint32_t start;
+    uint32_t end;
+} SMMUSIDRange;
+
 #endif
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index bd1f97000d..3b87324ce2 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -32,6 +32,7 @@
 
 #include "hw/arm/smmuv3.h"
 #include "smmuv3-internal.h"
+#include "smmu-internal.h"
 
 /**
  * smmuv3_trigger_irq - pulse @irq if enabled and update
@@ -861,7 +862,8 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
     uint16_t vmid = CMD_VMID(cmd);
     bool leaf = CMD_LEAF(cmd);
     uint8_t tg = CMD_TG(cmd);
-    hwaddr num_pages = 1;
+    uint64_t first_page = 0, last_page;
+    uint64_t num_pages = 1;
     int asid = -1;
 
     if (tg) {
@@ -874,9 +876,38 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
     if (type == SMMU_CMD_TLBI_NH_VA) {
         asid = CMD_ASID(cmd);
     }
-    trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf);
-    smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages);
-    smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl);
+
+    /* Split invalidations into ^2 range invalidations */
+    last_page = num_pages - 1;
+    while (num_pages) {
+        uint8_t granule = tg * 2 + 10;
+        uint64_t mask, count;
+
+        mask = dma_aligned_pow2_mask(first_page, last_page, 64 - granule);
+        count = mask + 1;
+
+        trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, count, ttl, leaf);
+        smmuv3_inv_notifiers_iova(s, asid, addr, tg, count);
+        smmu_iotlb_inv_iova(s, asid, addr, tg, count, ttl);
+
+        num_pages -= count;
+        first_page += count;
+        addr += count * BIT_ULL(granule);
+    }
+}
+
+static gboolean
+smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
+{
+    SMMUDevice *sdev = (SMMUDevice *)key;
+    uint32_t sid = smmu_get_sid(sdev);
+    SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data;
+
+    if (sid < sid_range->start || sid > sid_range->end) {
+        return false;
+    }
+    trace_smmuv3_config_cache_inv(sid);
+    return true;
 }
 
 static int smmuv3_cmdq_consume(SMMUv3State *s)
@@ -949,27 +980,18 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
         }
         case SMMU_CMD_CFGI_STE_RANGE: /* same as SMMU_CMD_CFGI_ALL */
         {
-            uint32_t start = CMD_SID(&cmd), end, i;
+            uint32_t start = CMD_SID(&cmd);
             uint8_t range = CMD_STE_RANGE(&cmd);
+            uint64_t end = start + (1ULL << (range + 1)) - 1;
+            SMMUSIDRange sid_range = {start, end};
 
             if (CMD_SSEC(&cmd)) {
                 cmd_error = SMMU_CERROR_ILL;
                 break;
             }
-
-            end = start + (1 << (range + 1)) - 1;
             trace_smmuv3_cmdq_cfgi_ste_range(start, end);
-
-            for (i = start; i <= end; i++) {
-                IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, i);
-                SMMUDevice *sdev;
-
-                if (!mr) {
-                    continue;
-                }
-                sdev = container_of(mr, SMMUDevice, iommu);
-                smmuv3_flush_config(sdev);
-            }
+            g_hash_table_foreach_remove(bs->configs, smmuv3_invalidate_ste,
+                                        &sid_range);
             break;
         }
         case SMMU_CMD_CFGI_CD:
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index a335ee891d..b79a91af5f 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -29,26 +29,26 @@ smmuv3_cmdq_opcode(const char *opcode) "<--- %s"
 smmuv3_cmdq_consume_out(uint32_t prod, uint32_t cons, uint8_t prod_wrap, uint8_t cons_wrap) "prod:%d, cons:%d, prod_wrap:%d, cons_wrap:%d "
 smmuv3_cmdq_consume_error(const char *cmd_name, uint8_t cmd_error) "Error on %s command execution: %d"
 smmuv3_write_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)"
-smmuv3_record_event(const char *type, uint32_t sid) "%s sid=%d"
-smmuv3_find_ste(uint16_t sid, uint32_t features, uint16_t sid_split) "SID:0x%x features:0x%x, sid_split:0x%x"
+smmuv3_record_event(const char *type, uint32_t sid) "%s sid=0x%x"
+smmuv3_find_ste(uint16_t sid, uint32_t features, uint16_t sid_split) "sid=0x%x features:0x%x, sid_split:0x%x"
 smmuv3_find_ste_2lvl(uint64_t strtab_base, uint64_t l1ptr, int l1_ste_offset, uint64_t l2ptr, int l2_ste_offset, int max_l2_ste) "strtab_base:0x%"PRIx64" l1ptr:0x%"PRIx64" l1_off:0x%x, l2ptr:0x%"PRIx64" l2_off:0x%x max_l2_ste:%d"
 smmuv3_get_ste(uint64_t addr) "STE addr: 0x%"PRIx64
-smmuv3_translate_disable(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=%d bypass (smmu disabled) iova:0x%"PRIx64" is_write=%d"
-smmuv3_translate_bypass(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=%d STE bypass iova:0x%"PRIx64" is_write=%d"
-smmuv3_translate_abort(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=%d abort on iova:0x%"PRIx64" is_write=%d"
-smmuv3_translate_success(const char *n, uint16_t sid, uint64_t iova, uint64_t translated, int perm) "%s sid=%d iova=0x%"PRIx64" translated=0x%"PRIx64" perm=0x%x"
+smmuv3_translate_disable(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x bypass (smmu disabled) iova:0x%"PRIx64" is_write=%d"
+smmuv3_translate_bypass(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x STE bypass iova:0x%"PRIx64" is_write=%d"
+smmuv3_translate_abort(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x abort on iova:0x%"PRIx64" is_write=%d"
+smmuv3_translate_success(const char *n, uint16_t sid, uint64_t iova, uint64_t translated, int perm) "%s sid=0x%x iova=0x%"PRIx64" translated=0x%"PRIx64" perm=0x%x"
 smmuv3_get_cd(uint64_t addr) "CD addr: 0x%"PRIx64
 smmuv3_decode_cd(uint32_t oas) "oas=%d"
 smmuv3_decode_cd_tt(int i, uint32_t tsz, uint64_t ttb, uint32_t granule_sz, bool had) "TT[%d]:tsz:%d ttb:0x%"PRIx64" granule_sz:%d had:%d"
-smmuv3_cmdq_cfgi_ste(int streamid) "streamid =%d"
+smmuv3_cmdq_cfgi_ste(int streamid) "streamid= 0x%x"
 smmuv3_cmdq_cfgi_ste_range(int start, int end) "start=0x%x - end=0x%x"
-smmuv3_cmdq_cfgi_cd(uint32_t sid) "streamid = %d"
-smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid %d (hits=%d, misses=%d, hit rate=%d)"
-smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid %d (hits=%d, misses=%d, hit rate=%d)"
-smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d"
+smmuv3_cmdq_cfgi_cd(uint32_t sid) "sid=0x%x"
+smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
+smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
+smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d"
 smmuv3_cmdq_tlbi_nh(void) ""
 smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d"
-smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid %d"
+smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
 smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s"
 smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s"
 smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index c08bf11297..aa2bbd14e0 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2548,27 +2548,36 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
 static int virt_kvm_type(MachineState *ms, const char *type_str)
 {
     VirtMachineState *vms = VIRT_MACHINE(ms);
-    int max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms);
-    int requested_pa_size;
+    int max_vm_pa_size, requested_pa_size;
+    bool fixed_ipa;
+
+    max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa);
 
     /* we freeze the memory map to compute the highest gpa */
     virt_set_memmap(vms);
 
     requested_pa_size = 64 - clz64(vms->highest_gpa);
 
+    /*
+     * KVM requires the IPA size to be at least 32 bits.
+     */
+    if (requested_pa_size < 32) {
+        requested_pa_size = 32;
+    }
+
     if (requested_pa_size > max_vm_pa_size) {
         error_report("-m and ,maxmem option values "
                      "require an IPA range (%d bits) larger than "
                      "the one supported by the host (%d bits)",
                      requested_pa_size, max_vm_pa_size);
-       exit(1);
+        exit(1);
     }
     /*
-     * By default we return 0 which corresponds to an implicit legacy
-     * 40b IPA setting. Otherwise we return the actual requested PA
-     * logsize
+     * We return the requested PA log size, unless KVM only supports
+     * the implicit legacy 40b IPA setting, in which case the kvm_type
+     * must be 0.
      */
-    return requested_pa_size > 40 ? requested_pa_size : 0;
+    return fixed_ipa ? 0 : requested_pa_size;
 }
 
 static void virt_machine_class_init(ObjectClass *oc, void *data)
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
index 628e77ef66..79609692e4 100644
--- a/hw/arm/xlnx-versal.c
+++ b/hw/arm/xlnx-versal.c
@@ -10,6 +10,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
@@ -278,6 +279,40 @@ static void versal_create_rtc(Versal *s, qemu_irq *pic)
     sysbus_connect_irq(sbd, 1, pic[VERSAL_RTC_APB_ERR_IRQ]);
 }
 
+static void versal_create_xrams(Versal *s, qemu_irq *pic)
+{
+    int nr_xrams = ARRAY_SIZE(s->lpd.xram.ctrl);
+    DeviceState *orgate;
+    int i;
+
+    /* XRAM IRQs get ORed into a single line.  */
+    object_initialize_child(OBJECT(s), "xram-irq-orgate",
+                            &s->lpd.xram.irq_orgate, TYPE_OR_IRQ);
+    orgate = DEVICE(&s->lpd.xram.irq_orgate);
+    object_property_set_int(OBJECT(orgate),
+                            "num-lines", nr_xrams, &error_fatal);
+    qdev_realize(orgate, NULL, &error_fatal);
+    qdev_connect_gpio_out(orgate, 0, pic[VERSAL_XRAM_IRQ_0]);
+
+    for (i = 0; i < ARRAY_SIZE(s->lpd.xram.ctrl); i++) {
+        SysBusDevice *sbd;
+        MemoryRegion *mr;
+
+        object_initialize_child(OBJECT(s), "xram[*]", &s->lpd.xram.ctrl[i],
+                                TYPE_XLNX_XRAM_CTRL);
+        sbd = SYS_BUS_DEVICE(&s->lpd.xram.ctrl[i]);
+        sysbus_realize(sbd, &error_fatal);
+
+        mr = sysbus_mmio_get_region(sbd, 0);
+        memory_region_add_subregion(&s->mr_ps,
+                                    MM_XRAMC + i * MM_XRAMC_SIZE, mr);
+        mr = sysbus_mmio_get_region(sbd, 1);
+        memory_region_add_subregion(&s->mr_ps, MM_XRAM + i * MiB, mr);
+
+        sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(orgate, i));
+    }
+}
+
 /* This takes the board allocated linear DDR memory and creates aliases
  * for each split DDR range/aperture on the Versal address map.
  */
@@ -363,6 +398,7 @@ static void versal_realize(DeviceState *dev, Error **errp)
     versal_create_admas(s, pic);
     versal_create_sds(s, pic);
     versal_create_rtc(s, pic);
+    versal_create_xrams(s, pic);
     versal_map_ddr(s);
     versal_unimp(s);
 
diff --git a/hw/display/pl110.c b/hw/display/pl110.c
index 02b0d45f06..4bf15c1da5 100644
--- a/hw/display/pl110.c
+++ b/hw/display/pl110.c
@@ -123,16 +123,84 @@ static const unsigned char *idregs[] = {
     pl111_id
 };
 
-#define BITS 8
+#define COPY_PIXEL(to, from) do { *(uint32_t *)to = from; to += 4; } while (0)
+
+#undef RGB
+#define BORDER bgr
+#define ORDER 0
+#include "pl110_template.h"
+#define ORDER 1
 #include "pl110_template.h"
-#define BITS 15
+#define ORDER 2
 #include "pl110_template.h"
-#define BITS 16
+#undef BORDER
+#define RGB
+#define BORDER rgb
+#define ORDER 0
 #include "pl110_template.h"
-#define BITS 24
+#define ORDER 1
 #include "pl110_template.h"
-#define BITS 32
+#define ORDER 2
 #include "pl110_template.h"
+#undef BORDER
+
+#undef COPY_PIXEL
+
+static drawfn pl110_draw_fn_32[48] = {
+    pl110_draw_line1_lblp_bgr,
+    pl110_draw_line2_lblp_bgr,
+    pl110_draw_line4_lblp_bgr,
+    pl110_draw_line8_lblp_bgr,
+    pl110_draw_line16_555_lblp_bgr,
+    pl110_draw_line32_lblp_bgr,
+    pl110_draw_line16_lblp_bgr,
+    pl110_draw_line12_lblp_bgr,
+
+    pl110_draw_line1_bbbp_bgr,
+    pl110_draw_line2_bbbp_bgr,
+    pl110_draw_line4_bbbp_bgr,
+    pl110_draw_line8_bbbp_bgr,
+    pl110_draw_line16_555_bbbp_bgr,
+    pl110_draw_line32_bbbp_bgr,
+    pl110_draw_line16_bbbp_bgr,
+    pl110_draw_line12_bbbp_bgr,
+
+    pl110_draw_line1_lbbp_bgr,
+    pl110_draw_line2_lbbp_bgr,
+    pl110_draw_line4_lbbp_bgr,
+    pl110_draw_line8_lbbp_bgr,
+    pl110_draw_line16_555_lbbp_bgr,
+    pl110_draw_line32_lbbp_bgr,
+    pl110_draw_line16_lbbp_bgr,
+    pl110_draw_line12_lbbp_bgr,
+
+    pl110_draw_line1_lblp_rgb,
+    pl110_draw_line2_lblp_rgb,
+    pl110_draw_line4_lblp_rgb,
+    pl110_draw_line8_lblp_rgb,
+    pl110_draw_line16_555_lblp_rgb,
+    pl110_draw_line32_lblp_rgb,
+    pl110_draw_line16_lblp_rgb,
+    pl110_draw_line12_lblp_rgb,
+
+    pl110_draw_line1_bbbp_rgb,
+    pl110_draw_line2_bbbp_rgb,
+    pl110_draw_line4_bbbp_rgb,
+    pl110_draw_line8_bbbp_rgb,
+    pl110_draw_line16_555_bbbp_rgb,
+    pl110_draw_line32_bbbp_rgb,
+    pl110_draw_line16_bbbp_rgb,
+    pl110_draw_line12_bbbp_rgb,
+
+    pl110_draw_line1_lbbp_rgb,
+    pl110_draw_line2_lbbp_rgb,
+    pl110_draw_line4_lbbp_rgb,
+    pl110_draw_line8_lbbp_rgb,
+    pl110_draw_line16_555_lbbp_rgb,
+    pl110_draw_line32_lbbp_rgb,
+    pl110_draw_line16_lbbp_rgb,
+    pl110_draw_line12_lbbp_rgb,
+};
 
 static int pl110_enabled(PL110State *s)
 {
@@ -144,9 +212,7 @@ static void pl110_update_display(void *opaque)
     PL110State *s = (PL110State *)opaque;
     SysBusDevice *sbd;
     DisplaySurface *surface = qemu_console_surface(s->con);
-    drawfn* fntable;
     drawfn fn;
-    int dest_width;
     int src_width;
     int bpp_offset;
     int first;
@@ -158,33 +224,6 @@ static void pl110_update_display(void *opaque)
 
     sbd = SYS_BUS_DEVICE(s);
 
-    switch (surface_bits_per_pixel(surface)) {
-    case 0:
-        return;
-    case 8:
-        fntable = pl110_draw_fn_8;
-        dest_width = 1;
-        break;
-    case 15:
-        fntable = pl110_draw_fn_15;
-        dest_width = 2;
-        break;
-    case 16:
-        fntable = pl110_draw_fn_16;
-        dest_width = 2;
-        break;
-    case 24:
-        fntable = pl110_draw_fn_24;
-        dest_width = 3;
-        break;
-    case 32:
-        fntable = pl110_draw_fn_32;
-        dest_width = 4;
-        break;
-    default:
-        fprintf(stderr, "pl110: Bad color depth\n");
-        exit(1);
-    }
     if (s->cr & PL110_CR_BGR)
         bpp_offset = 0;
     else
@@ -218,12 +257,13 @@ static void pl110_update_display(void *opaque)
         }
     }
 
-    if (s->cr & PL110_CR_BEBO)
-        fn = fntable[s->bpp + 8 + bpp_offset];
-    else if (s->cr & PL110_CR_BEPO)
-        fn = fntable[s->bpp + 16 + bpp_offset];
-    else
-        fn = fntable[s->bpp + bpp_offset];
+    if (s->cr & PL110_CR_BEBO) {
+        fn = pl110_draw_fn_32[s->bpp + 8 + bpp_offset];
+    } else if (s->cr & PL110_CR_BEPO) {
+        fn = pl110_draw_fn_32[s->bpp + 16 + bpp_offset];
+    } else {
+        fn = pl110_draw_fn_32[s->bpp + bpp_offset];
+    }
 
     src_width = s->cols;
     switch (s->bpp) {
@@ -247,7 +287,6 @@ static void pl110_update_display(void *opaque)
         src_width <<= 2;
         break;
     }
-    dest_width *= s->cols;
     first = 0;
     if (s->invalidate) {
         framebuffer_update_memory_section(&s->fbsection,
@@ -258,7 +297,7 @@ static void pl110_update_display(void *opaque)
 
     framebuffer_update_display(surface, &s->fbsection,
                                s->cols, s->rows,
-                               src_width, dest_width, 0,
+                               src_width, s->cols * 4, 0,
                                s->invalidate,
                                fn, s->palette,
                                &first, &last);
diff --git a/hw/display/pl110_template.h b/hw/display/pl110_template.h
index 36ba791c6f..877419aa81 100644
--- a/hw/display/pl110_template.h
+++ b/hw/display/pl110_template.h
@@ -10,118 +10,22 @@
  */
 
 #ifndef ORDER
-
-#if BITS == 8
-#define COPY_PIXEL(to, from) *(to++) = from
-#elif BITS == 15 || BITS == 16
-#define COPY_PIXEL(to, from) do { *(uint16_t *)to = from; to += 2; } while (0)
-#elif BITS == 24
-#define COPY_PIXEL(to, from)    \
-    do {                        \
-        *(to++) = from;         \
-        *(to++) = (from) >> 8;  \
-        *(to++) = (from) >> 16; \
-    } while (0)
-#elif BITS == 32
-#define COPY_PIXEL(to, from) do { *(uint32_t *)to = from; to += 4; } while (0)
-#else
-#error unknown bit depth
+#error "pl110_template.h is only for inclusion by pl110.c"
 #endif
 
-#undef RGB
-#define BORDER bgr
-#define ORDER 0
-#include "pl110_template.h"
-#define ORDER 1
-#include "pl110_template.h"
-#define ORDER 2
-#include "pl110_template.h"
-#undef BORDER
-#define RGB
-#define BORDER rgb
-#define ORDER 0
-#include "pl110_template.h"
-#define ORDER 1
-#include "pl110_template.h"
-#define ORDER 2
-#include "pl110_template.h"
-#undef BORDER
-
-static drawfn glue(pl110_draw_fn_,BITS)[48] =
-{
-    glue(pl110_draw_line1_lblp_bgr,BITS),
-    glue(pl110_draw_line2_lblp_bgr,BITS),
-    glue(pl110_draw_line4_lblp_bgr,BITS),
-    glue(pl110_draw_line8_lblp_bgr,BITS),
-    glue(pl110_draw_line16_555_lblp_bgr,BITS),
-    glue(pl110_draw_line32_lblp_bgr,BITS),
-    glue(pl110_draw_line16_lblp_bgr,BITS),
-    glue(pl110_draw_line12_lblp_bgr,BITS),
-
-    glue(pl110_draw_line1_bbbp_bgr,BITS),
-    glue(pl110_draw_line2_bbbp_bgr,BITS),
-    glue(pl110_draw_line4_bbbp_bgr,BITS),
-    glue(pl110_draw_line8_bbbp_bgr,BITS),
-    glue(pl110_draw_line16_555_bbbp_bgr,BITS),
-    glue(pl110_draw_line32_bbbp_bgr,BITS),
-    glue(pl110_draw_line16_bbbp_bgr,BITS),
-    glue(pl110_draw_line12_bbbp_bgr,BITS),
-
-    glue(pl110_draw_line1_lbbp_bgr,BITS),
-    glue(pl110_draw_line2_lbbp_bgr,BITS),
-    glue(pl110_draw_line4_lbbp_bgr,BITS),
-    glue(pl110_draw_line8_lbbp_bgr,BITS),
-    glue(pl110_draw_line16_555_lbbp_bgr,BITS),
-    glue(pl110_draw_line32_lbbp_bgr,BITS),
-    glue(pl110_draw_line16_lbbp_bgr,BITS),
-    glue(pl110_draw_line12_lbbp_bgr,BITS),
-
-    glue(pl110_draw_line1_lblp_rgb,BITS),
-    glue(pl110_draw_line2_lblp_rgb,BITS),
-    glue(pl110_draw_line4_lblp_rgb,BITS),
-    glue(pl110_draw_line8_lblp_rgb,BITS),
-    glue(pl110_draw_line16_555_lblp_rgb,BITS),
-    glue(pl110_draw_line32_lblp_rgb,BITS),
-    glue(pl110_draw_line16_lblp_rgb,BITS),
-    glue(pl110_draw_line12_lblp_rgb,BITS),
-
-    glue(pl110_draw_line1_bbbp_rgb,BITS),
-    glue(pl110_draw_line2_bbbp_rgb,BITS),
-    glue(pl110_draw_line4_bbbp_rgb,BITS),
-    glue(pl110_draw_line8_bbbp_rgb,BITS),
-    glue(pl110_draw_line16_555_bbbp_rgb,BITS),
-    glue(pl110_draw_line32_bbbp_rgb,BITS),
-    glue(pl110_draw_line16_bbbp_rgb,BITS),
-    glue(pl110_draw_line12_bbbp_rgb,BITS),
-
-    glue(pl110_draw_line1_lbbp_rgb,BITS),
-    glue(pl110_draw_line2_lbbp_rgb,BITS),
-    glue(pl110_draw_line4_lbbp_rgb,BITS),
-    glue(pl110_draw_line8_lbbp_rgb,BITS),
-    glue(pl110_draw_line16_555_lbbp_rgb,BITS),
-    glue(pl110_draw_line32_lbbp_rgb,BITS),
-    glue(pl110_draw_line16_lbbp_rgb,BITS),
-    glue(pl110_draw_line12_lbbp_rgb,BITS),
-};
-
-#undef BITS
-#undef COPY_PIXEL
-
-#else
-
 #if ORDER == 0
-#define NAME glue(glue(lblp_, BORDER), BITS)
+#define NAME glue(lblp_, BORDER)
 #ifdef HOST_WORDS_BIGENDIAN
 #define SWAP_WORDS 1
 #endif
 #elif ORDER == 1
-#define NAME glue(glue(bbbp_, BORDER), BITS)
+#define NAME glue(bbbp_, BORDER)
 #ifndef HOST_WORDS_BIGENDIAN
 #define SWAP_WORDS 1
 #endif
 #else
 #define SWAP_PIXELS 1
-#define NAME glue(glue(lbbp_, BORDER), BITS)
+#define NAME glue(lbbp_, BORDER)
 #ifdef HOST_WORDS_BIGENDIAN
 #define SWAP_WORDS 1
 #endif
@@ -270,14 +174,14 @@ static void glue(pl110_draw_line16_,NAME)(void *opaque, uint8_t *d, const uint8_
         MSB = (data & 0x1f) << 3;
         data >>= 5;
 #endif
-        COPY_PIXEL(d, glue(rgb_to_pixel,BITS)(r, g, b));
+        COPY_PIXEL(d, rgb_to_pixel32(r, g, b));
         LSB = (data & 0x1f) << 3;
         data >>= 5;
         g = (data & 0x3f) << 2;
         data >>= 6;
         MSB = (data & 0x1f) << 3;
         data >>= 5;
-        COPY_PIXEL(d, glue(rgb_to_pixel,BITS)(r, g, b));
+        COPY_PIXEL(d, rgb_to_pixel32(r, g, b));
 #undef MSB
 #undef LSB
         width -= 2;
@@ -307,7 +211,7 @@ static void glue(pl110_draw_line32_,NAME)(void *opaque, uint8_t *d, const uint8_
         g = (data >> 16) & 0xff;
         MSB = (data >> 8) & 0xff;
 #endif
-        COPY_PIXEL(d, glue(rgb_to_pixel,BITS)(r, g, b));
+        COPY_PIXEL(d, rgb_to_pixel32(r, g, b));
 #undef MSB
 #undef LSB
         width--;
@@ -338,14 +242,14 @@ static void glue(pl110_draw_line16_555_,NAME)(void *opaque, uint8_t *d, const ui
         data >>= 5;
         MSB = (data & 0x1f) << 3;
         data >>= 5;
-        COPY_PIXEL(d, glue(rgb_to_pixel,BITS)(r, g, b));
+        COPY_PIXEL(d, rgb_to_pixel32(r, g, b));
         LSB = (data & 0x1f) << 3;
         data >>= 5;
         g = (data & 0x1f) << 3;
         data >>= 5;
         MSB = (data & 0x1f) << 3;
         data >>= 6;
-        COPY_PIXEL(d, glue(rgb_to_pixel,BITS)(r, g, b));
+        COPY_PIXEL(d, rgb_to_pixel32(r, g, b));
 #undef MSB
 #undef LSB
         width -= 2;
@@ -376,14 +280,14 @@ static void glue(pl110_draw_line12_,NAME)(void *opaque, uint8_t *d, const uint8_
         data >>= 4;
         MSB = (data & 0xf) << 4;
         data >>= 8;
-        COPY_PIXEL(d, glue(rgb_to_pixel,BITS)(r, g, b));
+        COPY_PIXEL(d, rgb_to_pixel32(r, g, b));
         LSB = (data & 0xf) << 4;
         data >>= 4;
         g = (data & 0xf) << 4;
         data >>= 4;
         MSB = (data & 0xf) << 4;
         data >>= 8;
-        COPY_PIXEL(d, glue(rgb_to_pixel,BITS)(r, g, b));
+        COPY_PIXEL(d, rgb_to_pixel32(r, g, b));
 #undef MSB
 #undef LSB
         width -= 2;
@@ -395,5 +299,3 @@ static void glue(pl110_draw_line12_,NAME)(void *opaque, uint8_t *d, const uint8_
 #undef NAME
 #undef SWAP_WORDS
 #undef ORDER
-
-#endif
diff --git a/hw/display/pxa2xx_lcd.c b/hw/display/pxa2xx_lcd.c
index dfff994962..2887ce496b 100644
--- a/hw/display/pxa2xx_lcd.c
+++ b/hw/display/pxa2xx_lcd.c
@@ -45,7 +45,6 @@ struct PXA2xxLCDState {
 
     int invalidated;
     QemuConsole *con;
-    drawfn *line_fn[2];
     int dest_width;
     int xres, yres;
     int pal_for;
@@ -188,6 +187,435 @@ typedef struct QEMU_PACKED {
 #define LDCMD_SOFINT	(1 << 22)
 #define LDCMD_PAL	(1 << 26)
 
+/* Size of a pixel in the QEMU UI output surface, in bytes */
+#define DEST_PIXEL_WIDTH 4
+
+/* Line drawing code to handle the various possible guest pixel formats */
+
+# define SKIP_PIXEL(to) do { to += deststep; } while (0)
+# define COPY_PIXEL(to, from)    \
+    do {                         \
+        *(uint32_t *) to = from; \
+        SKIP_PIXEL(to);          \
+    } while (0)
+
+#ifdef HOST_WORDS_BIGENDIAN
+# define SWAP_WORDS 1
+#endif
+
+#define FN_2(x) FN(x + 1) FN(x)
+#define FN_4(x) FN_2(x + 2) FN_2(x)
+
+static void pxa2xx_draw_line2(void *opaque, uint8_t *dest, const uint8_t *src,
+                              int width, int deststep)
+{
+    uint32_t *palette = opaque;
+    uint32_t data;
+    while (width > 0) {
+        data = *(uint32_t *) src;
+#define FN(x) COPY_PIXEL(dest, palette[(data >> ((x) * 2)) & 3]);
+#ifdef SWAP_WORDS
+        FN_4(12)
+        FN_4(8)
+        FN_4(4)
+        FN_4(0)
+#else
+        FN_4(0)
+        FN_4(4)
+        FN_4(8)
+        FN_4(12)
+#endif
+#undef FN
+        width -= 16;
+        src += 4;
+    }
+}
+
+static void pxa2xx_draw_line4(void *opaque, uint8_t *dest, const uint8_t *src,
+                              int width, int deststep)
+{
+    uint32_t *palette = opaque;
+    uint32_t data;
+    while (width > 0) {
+        data = *(uint32_t *) src;
+#define FN(x) COPY_PIXEL(dest, palette[(data >> ((x) * 4)) & 0xf]);
+#ifdef SWAP_WORDS
+        FN_2(6)
+        FN_2(4)
+        FN_2(2)
+        FN_2(0)
+#else
+        FN_2(0)
+        FN_2(2)
+        FN_2(4)
+        FN_2(6)
+#endif
+#undef FN
+        width -= 8;
+        src += 4;
+    }
+}
+
+static void pxa2xx_draw_line8(void *opaque, uint8_t *dest, const uint8_t *src,
+                              int width, int deststep)
+{
+    uint32_t *palette = opaque;
+    uint32_t data;
+    while (width > 0) {
+        data = *(uint32_t *) src;
+#define FN(x) COPY_PIXEL(dest, palette[(data >> (x)) & 0xff]);
+#ifdef SWAP_WORDS
+        FN(24)
+        FN(16)
+        FN(8)
+        FN(0)
+#else
+        FN(0)
+        FN(8)
+        FN(16)
+        FN(24)
+#endif
+#undef FN
+        width -= 4;
+        src += 4;
+    }
+}
+
+static void pxa2xx_draw_line16(void *opaque, uint8_t *dest, const uint8_t *src,
+                               int width, int deststep)
+{
+    uint32_t data;
+    unsigned int r, g, b;
+    while (width > 0) {
+        data = *(uint32_t *) src;
+#ifdef SWAP_WORDS
+        data = bswap32(data);
+#endif
+        b = (data & 0x1f) << 3;
+        data >>= 5;
+        g = (data & 0x3f) << 2;
+        data >>= 6;
+        r = (data & 0x1f) << 3;
+        data >>= 5;
+        COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        b = (data & 0x1f) << 3;
+        data >>= 5;
+        g = (data & 0x3f) << 2;
+        data >>= 6;
+        r = (data & 0x1f) << 3;
+        COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        width -= 2;
+        src += 4;
+    }
+}
+
+static void pxa2xx_draw_line16t(void *opaque, uint8_t *dest, const uint8_t *src,
+                                int width, int deststep)
+{
+    uint32_t data;
+    unsigned int r, g, b;
+    while (width > 0) {
+        data = *(uint32_t *) src;
+#ifdef SWAP_WORDS
+        data = bswap32(data);
+#endif
+        b = (data & 0x1f) << 3;
+        data >>= 5;
+        g = (data & 0x1f) << 3;
+        data >>= 5;
+        r = (data & 0x1f) << 3;
+        data >>= 5;
+        if (data & 1) {
+            SKIP_PIXEL(dest);
+        } else {
+            COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        }
+        data >>= 1;
+        b = (data & 0x1f) << 3;
+        data >>= 5;
+        g = (data & 0x1f) << 3;
+        data >>= 5;
+        r = (data & 0x1f) << 3;
+        data >>= 5;
+        if (data & 1) {
+            SKIP_PIXEL(dest);
+        } else {
+            COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        }
+        width -= 2;
+        src += 4;
+    }
+}
+
+static void pxa2xx_draw_line18(void *opaque, uint8_t *dest, const uint8_t *src,
+                               int width, int deststep)
+{
+    uint32_t data;
+    unsigned int r, g, b;
+    while (width > 0) {
+        data = *(uint32_t *) src;
+#ifdef SWAP_WORDS
+        data = bswap32(data);
+#endif
+        b = (data & 0x3f) << 2;
+        data >>= 6;
+        g = (data & 0x3f) << 2;
+        data >>= 6;
+        r = (data & 0x3f) << 2;
+        COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        width -= 1;
+        src += 4;
+    }
+}
+
+/* The wicked packed format */
+static void pxa2xx_draw_line18p(void *opaque, uint8_t *dest, const uint8_t *src,
+                                int width, int deststep)
+{
+    uint32_t data[3];
+    unsigned int r, g, b;
+    while (width > 0) {
+        data[0] = *(uint32_t *) src;
+        src += 4;
+        data[1] = *(uint32_t *) src;
+        src += 4;
+        data[2] = *(uint32_t *) src;
+        src += 4;
+#ifdef SWAP_WORDS
+        data[0] = bswap32(data[0]);
+        data[1] = bswap32(data[1]);
+        data[2] = bswap32(data[2]);
+#endif
+        b = (data[0] & 0x3f) << 2;
+        data[0] >>= 6;
+        g = (data[0] & 0x3f) << 2;
+        data[0] >>= 6;
+        r = (data[0] & 0x3f) << 2;
+        data[0] >>= 12;
+        COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        b = (data[0] & 0x3f) << 2;
+        data[0] >>= 6;
+        g = ((data[1] & 0xf) << 4) | (data[0] << 2);
+        data[1] >>= 4;
+        r = (data[1] & 0x3f) << 2;
+        data[1] >>= 12;
+        COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        b = (data[1] & 0x3f) << 2;
+        data[1] >>= 6;
+        g = (data[1] & 0x3f) << 2;
+        data[1] >>= 6;
+        r = ((data[2] & 0x3) << 6) | (data[1] << 2);
+        data[2] >>= 8;
+        COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        b = (data[2] & 0x3f) << 2;
+        data[2] >>= 6;
+        g = (data[2] & 0x3f) << 2;
+        data[2] >>= 6;
+        r = data[2] << 2;
+        COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        width -= 4;
+    }
+}
+
+static void pxa2xx_draw_line19(void *opaque, uint8_t *dest, const uint8_t *src,
+                               int width, int deststep)
+{
+    uint32_t data;
+    unsigned int r, g, b;
+    while (width > 0) {
+        data = *(uint32_t *) src;
+#ifdef SWAP_WORDS
+        data = bswap32(data);
+#endif
+        b = (data & 0x3f) << 2;
+        data >>= 6;
+        g = (data & 0x3f) << 2;
+        data >>= 6;
+        r = (data & 0x3f) << 2;
+        data >>= 6;
+        if (data & 1) {
+            SKIP_PIXEL(dest);
+        } else {
+            COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        }
+        width -= 1;
+        src += 4;
+    }
+}
+
+/* The wicked packed format */
+static void pxa2xx_draw_line19p(void *opaque, uint8_t *dest, const uint8_t *src,
+                                int width, int deststep)
+{
+    uint32_t data[3];
+    unsigned int r, g, b;
+    while (width > 0) {
+        data[0] = *(uint32_t *) src;
+        src += 4;
+        data[1] = *(uint32_t *) src;
+        src += 4;
+        data[2] = *(uint32_t *) src;
+        src += 4;
+# ifdef SWAP_WORDS
+        data[0] = bswap32(data[0]);
+        data[1] = bswap32(data[1]);
+        data[2] = bswap32(data[2]);
+# endif
+        b = (data[0] & 0x3f) << 2;
+        data[0] >>= 6;
+        g = (data[0] & 0x3f) << 2;
+        data[0] >>= 6;
+        r = (data[0] & 0x3f) << 2;
+        data[0] >>= 6;
+        if (data[0] & 1) {
+            SKIP_PIXEL(dest);
+        } else {
+            COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        }
+        data[0] >>= 6;
+        b = (data[0] & 0x3f) << 2;
+        data[0] >>= 6;
+        g = ((data[1] & 0xf) << 4) | (data[0] << 2);
+        data[1] >>= 4;
+        r = (data[1] & 0x3f) << 2;
+        data[1] >>= 6;
+        if (data[1] & 1) {
+            SKIP_PIXEL(dest);
+        } else {
+            COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        }
+        data[1] >>= 6;
+        b = (data[1] & 0x3f) << 2;
+        data[1] >>= 6;
+        g = (data[1] & 0x3f) << 2;
+        data[1] >>= 6;
+        r = ((data[2] & 0x3) << 6) | (data[1] << 2);
+        data[2] >>= 2;
+        if (data[2] & 1) {
+            SKIP_PIXEL(dest);
+        } else {
+            COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        }
+        data[2] >>= 6;
+        b = (data[2] & 0x3f) << 2;
+        data[2] >>= 6;
+        g = (data[2] & 0x3f) << 2;
+        data[2] >>= 6;
+        r = data[2] << 2;
+        data[2] >>= 6;
+        if (data[2] & 1) {
+            SKIP_PIXEL(dest);
+        } else {
+            COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        }
+        width -= 4;
+    }
+}
+
+static void pxa2xx_draw_line24(void *opaque, uint8_t *dest, const uint8_t *src,
+                               int width, int deststep)
+{
+    uint32_t data;
+    unsigned int r, g, b;
+    while (width > 0) {
+        data = *(uint32_t *) src;
+#ifdef SWAP_WORDS
+        data = bswap32(data);
+#endif
+        b = data & 0xff;
+        data >>= 8;
+        g = data & 0xff;
+        data >>= 8;
+        r = data & 0xff;
+        COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        width -= 1;
+        src += 4;
+    }
+}
+
+static void pxa2xx_draw_line24t(void *opaque, uint8_t *dest, const uint8_t *src,
+                                int width, int deststep)
+{
+    uint32_t data;
+    unsigned int r, g, b;
+    while (width > 0) {
+        data = *(uint32_t *) src;
+#ifdef SWAP_WORDS
+        data = bswap32(data);
+#endif
+        b = (data & 0x7f) << 1;
+        data >>= 7;
+        g = data & 0xff;
+        data >>= 8;
+        r = data & 0xff;
+        data >>= 8;
+        if (data & 1) {
+            SKIP_PIXEL(dest);
+        } else {
+            COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        }
+        width -= 1;
+        src += 4;
+    }
+}
+
+static void pxa2xx_draw_line25(void *opaque, uint8_t *dest, const uint8_t *src,
+                               int width, int deststep)
+{
+    uint32_t data;
+    unsigned int r, g, b;
+    while (width > 0) {
+        data = *(uint32_t *) src;
+#ifdef SWAP_WORDS
+        data = bswap32(data);
+#endif
+        b = data & 0xff;
+        data >>= 8;
+        g = data & 0xff;
+        data >>= 8;
+        r = data & 0xff;
+        data >>= 8;
+        if (data & 1) {
+            SKIP_PIXEL(dest);
+        } else {
+            COPY_PIXEL(dest, rgb_to_pixel32(r, g, b));
+        }
+        width -= 1;
+        src += 4;
+    }
+}
+
+/* Overlay planes disabled, no transparency */
+static drawfn pxa2xx_draw_fn_32[16] = {
+    [0 ... 0xf]       = NULL,
+    [pxa_lcdc_2bpp]   = pxa2xx_draw_line2,
+    [pxa_lcdc_4bpp]   = pxa2xx_draw_line4,
+    [pxa_lcdc_8bpp]   = pxa2xx_draw_line8,
+    [pxa_lcdc_16bpp]  = pxa2xx_draw_line16,
+    [pxa_lcdc_18bpp]  = pxa2xx_draw_line18,
+    [pxa_lcdc_18pbpp] = pxa2xx_draw_line18p,
+    [pxa_lcdc_24bpp]  = pxa2xx_draw_line24,
+};
+
+/* Overlay planes enabled, transparency used */
+static drawfn pxa2xx_draw_fn_32t[16] = {
+    [0 ... 0xf]       = NULL,
+    [pxa_lcdc_4bpp]   = pxa2xx_draw_line4,
+    [pxa_lcdc_8bpp]   = pxa2xx_draw_line8,
+    [pxa_lcdc_16bpp]  = pxa2xx_draw_line16t,
+    [pxa_lcdc_19bpp]  = pxa2xx_draw_line19,
+    [pxa_lcdc_19pbpp] = pxa2xx_draw_line19p,
+    [pxa_lcdc_24bpp]  = pxa2xx_draw_line24t,
+    [pxa_lcdc_25bpp]  = pxa2xx_draw_line25,
+};
+
+#undef COPY_PIXEL
+#undef SKIP_PIXEL
+
+#ifdef SWAP_WORDS
+# undef SWAP_WORDS
+#endif
+
 /* Route internal interrupt lines to the global IC */
 static void pxa2xx_lcdc_int_update(PXA2xxLCDState *s)
 {
@@ -674,14 +1102,21 @@ static void pxa2xx_palette_parse(PXA2xxLCDState *s, int ch, int bpp)
     }
 }
 
+static inline drawfn pxa2xx_drawfn(PXA2xxLCDState *s)
+{
+    if (s->transp) {
+        return pxa2xx_draw_fn_32t[s->bpp];
+    } else {
+        return pxa2xx_draw_fn_32[s->bpp];
+    }
+}
+
 static void pxa2xx_lcdc_dma0_redraw_rot0(PXA2xxLCDState *s,
                 hwaddr addr, int *miny, int *maxy)
 {
     DisplaySurface *surface = qemu_console_surface(s->con);
     int src_width, dest_width;
-    drawfn fn = NULL;
-    if (s->dest_width)
-        fn = s->line_fn[s->transp][s->bpp];
+    drawfn fn = pxa2xx_drawfn(s);
     if (!fn)
         return;
 
@@ -693,14 +1128,14 @@ static void pxa2xx_lcdc_dma0_redraw_rot0(PXA2xxLCDState *s,
     else if (s->bpp > pxa_lcdc_8bpp)
         src_width *= 2;
 
-    dest_width = s->xres * s->dest_width;
+    dest_width = s->xres * DEST_PIXEL_WIDTH;
     *miny = 0;
     if (s->invalidated) {
         framebuffer_update_memory_section(&s->fbsection, s->sysmem,
                                           addr, s->yres, src_width);
     }
     framebuffer_update_display(surface, &s->fbsection, s->xres, s->yres,
-                               src_width, dest_width, s->dest_width,
+                               src_width, dest_width, DEST_PIXEL_WIDTH,
                                s->invalidated,
                                fn, s->dma_ch[0].palette, miny, maxy);
 }
@@ -710,9 +1145,7 @@ static void pxa2xx_lcdc_dma0_redraw_rot90(PXA2xxLCDState *s,
 {
     DisplaySurface *surface = qemu_console_surface(s->con);
     int src_width, dest_width;
-    drawfn fn = NULL;
-    if (s->dest_width)
-        fn = s->line_fn[s->transp][s->bpp];
+    drawfn fn = pxa2xx_drawfn(s);
     if (!fn)
         return;
 
@@ -724,14 +1157,14 @@ static void pxa2xx_lcdc_dma0_redraw_rot90(PXA2xxLCDState *s,
     else if (s->bpp > pxa_lcdc_8bpp)
         src_width *= 2;
 
-    dest_width = s->yres * s->dest_width;
+    dest_width = s->yres * DEST_PIXEL_WIDTH;
     *miny = 0;
     if (s->invalidated) {
         framebuffer_update_memory_section(&s->fbsection, s->sysmem,
                                           addr, s->yres, src_width);
     }
     framebuffer_update_display(surface, &s->fbsection, s->xres, s->yres,
-                               src_width, s->dest_width, -dest_width,
+                               src_width, DEST_PIXEL_WIDTH, -dest_width,
                                s->invalidated,
                                fn, s->dma_ch[0].palette,
                                miny, maxy);
@@ -742,10 +1175,7 @@ static void pxa2xx_lcdc_dma0_redraw_rot180(PXA2xxLCDState *s,
 {
     DisplaySurface *surface = qemu_console_surface(s->con);
     int src_width, dest_width;
-    drawfn fn = NULL;
-    if (s->dest_width) {
-        fn = s->line_fn[s->transp][s->bpp];
-    }
+    drawfn fn = pxa2xx_drawfn(s);
     if (!fn) {
         return;
     }
@@ -759,14 +1189,14 @@ static void pxa2xx_lcdc_dma0_redraw_rot180(PXA2xxLCDState *s,
         src_width *= 2;
     }
 
-    dest_width = s->xres * s->dest_width;
+    dest_width = s->xres * DEST_PIXEL_WIDTH;
     *miny = 0;
     if (s->invalidated) {
         framebuffer_update_memory_section(&s->fbsection, s->sysmem,
                                           addr, s->yres, src_width);
     }
     framebuffer_update_display(surface, &s->fbsection, s->xres, s->yres,
-                               src_width, -dest_width, -s->dest_width,
+                               src_width, -dest_width, -DEST_PIXEL_WIDTH,
                                s->invalidated,
                                fn, s->dma_ch[0].palette, miny, maxy);
 }
@@ -776,10 +1206,7 @@ static void pxa2xx_lcdc_dma0_redraw_rot270(PXA2xxLCDState *s,
 {
     DisplaySurface *surface = qemu_console_surface(s->con);
     int src_width, dest_width;
-    drawfn fn = NULL;
-    if (s->dest_width) {
-        fn = s->line_fn[s->transp][s->bpp];
-    }
+    drawfn fn = pxa2xx_drawfn(s);
     if (!fn) {
         return;
     }
@@ -793,14 +1220,14 @@ static void pxa2xx_lcdc_dma0_redraw_rot270(PXA2xxLCDState *s,
         src_width *= 2;
     }
 
-    dest_width = s->yres * s->dest_width;
+    dest_width = s->yres * DEST_PIXEL_WIDTH;
     *miny = 0;
     if (s->invalidated) {
         framebuffer_update_memory_section(&s->fbsection, s->sysmem,
                                           addr, s->yres, src_width);
     }
     framebuffer_update_display(surface, &s->fbsection, s->xres, s->yres,
-                               src_width, -s->dest_width, dest_width,
+                               src_width, -DEST_PIXEL_WIDTH, dest_width,
                                s->invalidated,
                                fn, s->dma_ch[0].palette,
                                miny, maxy);
@@ -990,17 +1417,6 @@ static const VMStateDescription vmstate_pxa2xx_lcdc = {
     }
 };
 
-#define BITS 8
-#include "pxa2xx_template.h"
-#define BITS 15
-#include "pxa2xx_template.h"
-#define BITS 16
-#include "pxa2xx_template.h"
-#define BITS 24
-#include "pxa2xx_template.h"
-#define BITS 32
-#include "pxa2xx_template.h"
-
 static const GraphicHwOps pxa2xx_ops = {
     .invalidate  = pxa2xx_invalidate_display,
     .gfx_update  = pxa2xx_update_display,
@@ -1010,7 +1426,6 @@ PXA2xxLCDState *pxa2xx_lcdc_init(MemoryRegion *sysmem,
                                  hwaddr base, qemu_irq irq)
 {
     PXA2xxLCDState *s;
-    DisplaySurface *surface;
 
     s = (PXA2xxLCDState *) g_malloc0(sizeof(PXA2xxLCDState));
     s->invalidated = 1;
@@ -1024,41 +1439,6 @@ PXA2xxLCDState *pxa2xx_lcdc_init(MemoryRegion *sysmem,
     memory_region_add_subregion(sysmem, base, &s->iomem);
 
     s->con = graphic_console_init(NULL, 0, &pxa2xx_ops, s);
-    surface = qemu_console_surface(s->con);
-
-    switch (surface_bits_per_pixel(surface)) {
-    case 0:
-        s->dest_width = 0;
-        break;
-    case 8:
-        s->line_fn[0] = pxa2xx_draw_fn_8;
-        s->line_fn[1] = pxa2xx_draw_fn_8t;
-        s->dest_width = 1;
-        break;
-    case 15:
-        s->line_fn[0] = pxa2xx_draw_fn_15;
-        s->line_fn[1] = pxa2xx_draw_fn_15t;
-        s->dest_width = 2;
-        break;
-    case 16:
-        s->line_fn[0] = pxa2xx_draw_fn_16;
-        s->line_fn[1] = pxa2xx_draw_fn_16t;
-        s->dest_width = 2;
-        break;
-    case 24:
-        s->line_fn[0] = pxa2xx_draw_fn_24;
-        s->line_fn[1] = pxa2xx_draw_fn_24t;
-        s->dest_width = 3;
-        break;
-    case 32:
-        s->line_fn[0] = pxa2xx_draw_fn_32;
-        s->line_fn[1] = pxa2xx_draw_fn_32t;
-        s->dest_width = 4;
-        break;
-    default:
-        fprintf(stderr, "%s: Bad color depth\n", __func__);
-        exit(1);
-    }
 
     vmstate_register(NULL, 0, &vmstate_pxa2xx_lcdc, s);
 
diff --git a/hw/display/pxa2xx_template.h b/hw/display/pxa2xx_template.h
deleted file mode 100644
index c64eebc4b6..0000000000
--- a/hw/display/pxa2xx_template.h
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Intel XScale PXA255/270 LCDC emulation.
- *
- * Copyright (c) 2006 Openedhand Ltd.
- * Written by Andrzej Zaborowski <balrog@zabor.org>
- *
- * This code is licensed under the GPLv2.
- *
- * Framebuffer format conversion routines.
- */
-
-# define SKIP_PIXEL(to)		to += deststep
-#if BITS == 8
-# define COPY_PIXEL(to, from)  do { *to = from; SKIP_PIXEL(to); } while (0)
-#elif BITS == 15 || BITS == 16
-# define COPY_PIXEL(to, from)    \
-    do {                         \
-        *(uint16_t *) to = from; \
-        SKIP_PIXEL(to);          \
-    } while (0)
-#elif BITS == 24
-# define COPY_PIXEL(to, from)     \
-    do {                          \
-        *(uint16_t *) to = from;  \
-        *(to + 2) = (from) >> 16; \
-        SKIP_PIXEL(to);           \
-    } while (0)
-#elif BITS == 32
-# define COPY_PIXEL(to, from)    \
-    do {                         \
-        *(uint32_t *) to = from; \
-        SKIP_PIXEL(to);          \
-    } while (0)
-#else
-# error unknown bit depth
-#endif
-
-#ifdef HOST_WORDS_BIGENDIAN
-# define SWAP_WORDS	1
-#endif
-
-#define FN_2(x)		FN(x + 1) FN(x)
-#define FN_4(x)		FN_2(x + 2) FN_2(x)
-
-static void glue(pxa2xx_draw_line2_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t *palette = opaque;
-    uint32_t data;
-    while (width > 0) {
-        data = *(uint32_t *) src;
-#define FN(x)		COPY_PIXEL(dest, palette[(data >> ((x) * 2)) & 3]);
-#ifdef SWAP_WORDS
-        FN_4(12)
-        FN_4(8)
-        FN_4(4)
-        FN_4(0)
-#else
-        FN_4(0)
-        FN_4(4)
-        FN_4(8)
-        FN_4(12)
-#endif
-#undef FN
-        width -= 16;
-        src += 4;
-    }
-}
-
-static void glue(pxa2xx_draw_line4_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t *palette = opaque;
-    uint32_t data;
-    while (width > 0) {
-        data = *(uint32_t *) src;
-#define FN(x)		COPY_PIXEL(dest, palette[(data >> ((x) * 4)) & 0xf]);
-#ifdef SWAP_WORDS
-        FN_2(6)
-        FN_2(4)
-        FN_2(2)
-        FN_2(0)
-#else
-        FN_2(0)
-        FN_2(2)
-        FN_2(4)
-        FN_2(6)
-#endif
-#undef FN
-        width -= 8;
-        src += 4;
-    }
-}
-
-static void glue(pxa2xx_draw_line8_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t *palette = opaque;
-    uint32_t data;
-    while (width > 0) {
-        data = *(uint32_t *) src;
-#define FN(x)		COPY_PIXEL(dest, palette[(data >> (x)) & 0xff]);
-#ifdef SWAP_WORDS
-        FN(24)
-        FN(16)
-        FN(8)
-        FN(0)
-#else
-        FN(0)
-        FN(8)
-        FN(16)
-        FN(24)
-#endif
-#undef FN
-        width -= 4;
-        src += 4;
-    }
-}
-
-static void glue(pxa2xx_draw_line16_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t data;
-    unsigned int r, g, b;
-    while (width > 0) {
-        data = *(uint32_t *) src;
-#ifdef SWAP_WORDS
-        data = bswap32(data);
-#endif
-        b = (data & 0x1f) << 3;
-        data >>= 5;
-        g = (data & 0x3f) << 2;
-        data >>= 6;
-        r = (data & 0x1f) << 3;
-        data >>= 5;
-        COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        b = (data & 0x1f) << 3;
-        data >>= 5;
-        g = (data & 0x3f) << 2;
-        data >>= 6;
-        r = (data & 0x1f) << 3;
-        COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        width -= 2;
-        src += 4;
-    }
-}
-
-static void glue(pxa2xx_draw_line16t_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t data;
-    unsigned int r, g, b;
-    while (width > 0) {
-        data = *(uint32_t *) src;
-#ifdef SWAP_WORDS
-        data = bswap32(data);
-#endif
-        b = (data & 0x1f) << 3;
-        data >>= 5;
-        g = (data & 0x1f) << 3;
-        data >>= 5;
-        r = (data & 0x1f) << 3;
-        data >>= 5;
-        if (data & 1)
-            SKIP_PIXEL(dest);
-        else
-            COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        data >>= 1;
-        b = (data & 0x1f) << 3;
-        data >>= 5;
-        g = (data & 0x1f) << 3;
-        data >>= 5;
-        r = (data & 0x1f) << 3;
-        data >>= 5;
-        if (data & 1)
-            SKIP_PIXEL(dest);
-        else
-            COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        width -= 2;
-        src += 4;
-    }
-}
-
-static void glue(pxa2xx_draw_line18_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t data;
-    unsigned int r, g, b;
-    while (width > 0) {
-        data = *(uint32_t *) src;
-#ifdef SWAP_WORDS
-        data = bswap32(data);
-#endif
-        b = (data & 0x3f) << 2;
-        data >>= 6;
-        g = (data & 0x3f) << 2;
-        data >>= 6;
-        r = (data & 0x3f) << 2;
-        COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        width -= 1;
-        src += 4;
-    }
-}
-
-/* The wicked packed format */
-static void glue(pxa2xx_draw_line18p_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t data[3];
-    unsigned int r, g, b;
-    while (width > 0) {
-        data[0] = *(uint32_t *) src;
-        src += 4;
-        data[1] = *(uint32_t *) src;
-        src += 4;
-        data[2] = *(uint32_t *) src;
-        src += 4;
-#ifdef SWAP_WORDS
-        data[0] = bswap32(data[0]);
-        data[1] = bswap32(data[1]);
-        data[2] = bswap32(data[2]);
-#endif
-        b = (data[0] & 0x3f) << 2;
-        data[0] >>= 6;
-        g = (data[0] & 0x3f) << 2;
-        data[0] >>= 6;
-        r = (data[0] & 0x3f) << 2;
-        data[0] >>= 12;
-        COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        b = (data[0] & 0x3f) << 2;
-        data[0] >>= 6;
-        g = ((data[1] & 0xf) << 4) | (data[0] << 2);
-        data[1] >>= 4;
-        r = (data[1] & 0x3f) << 2;
-        data[1] >>= 12;
-        COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        b = (data[1] & 0x3f) << 2;
-        data[1] >>= 6;
-        g = (data[1] & 0x3f) << 2;
-        data[1] >>= 6;
-        r = ((data[2] & 0x3) << 6) | (data[1] << 2);
-        data[2] >>= 8;
-        COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        b = (data[2] & 0x3f) << 2;
-        data[2] >>= 6;
-        g = (data[2] & 0x3f) << 2;
-        data[2] >>= 6;
-        r = data[2] << 2;
-        COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        width -= 4;
-    }
-}
-
-static void glue(pxa2xx_draw_line19_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t data;
-    unsigned int r, g, b;
-    while (width > 0) {
-        data = *(uint32_t *) src;
-#ifdef SWAP_WORDS
-        data = bswap32(data);
-#endif
-        b = (data & 0x3f) << 2;
-        data >>= 6;
-        g = (data & 0x3f) << 2;
-        data >>= 6;
-        r = (data & 0x3f) << 2;
-        data >>= 6;
-        if (data & 1)
-            SKIP_PIXEL(dest);
-        else
-            COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        width -= 1;
-        src += 4;
-    }
-}
-
-/* The wicked packed format */
-static void glue(pxa2xx_draw_line19p_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t data[3];
-    unsigned int r, g, b;
-    while (width > 0) {
-        data[0] = *(uint32_t *) src;
-        src += 4;
-        data[1] = *(uint32_t *) src;
-        src += 4;
-        data[2] = *(uint32_t *) src;
-        src += 4;
-# ifdef SWAP_WORDS
-        data[0] = bswap32(data[0]);
-        data[1] = bswap32(data[1]);
-        data[2] = bswap32(data[2]);
-# endif
-        b = (data[0] & 0x3f) << 2;
-        data[0] >>= 6;
-        g = (data[0] & 0x3f) << 2;
-        data[0] >>= 6;
-        r = (data[0] & 0x3f) << 2;
-        data[0] >>= 6;
-        if (data[0] & 1)
-            SKIP_PIXEL(dest);
-        else
-            COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        data[0] >>= 6;
-        b = (data[0] & 0x3f) << 2;
-        data[0] >>= 6;
-        g = ((data[1] & 0xf) << 4) | (data[0] << 2);
-        data[1] >>= 4;
-        r = (data[1] & 0x3f) << 2;
-        data[1] >>= 6;
-        if (data[1] & 1)
-            SKIP_PIXEL(dest);
-        else
-            COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        data[1] >>= 6;
-        b = (data[1] & 0x3f) << 2;
-        data[1] >>= 6;
-        g = (data[1] & 0x3f) << 2;
-        data[1] >>= 6;
-        r = ((data[2] & 0x3) << 6) | (data[1] << 2);
-        data[2] >>= 2;
-        if (data[2] & 1)
-            SKIP_PIXEL(dest);
-        else
-            COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        data[2] >>= 6;
-        b = (data[2] & 0x3f) << 2;
-        data[2] >>= 6;
-        g = (data[2] & 0x3f) << 2;
-        data[2] >>= 6;
-        r = data[2] << 2;
-        data[2] >>= 6;
-        if (data[2] & 1)
-            SKIP_PIXEL(dest);
-        else
-            COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        width -= 4;
-    }
-}
-
-static void glue(pxa2xx_draw_line24_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t data;
-    unsigned int r, g, b;
-    while (width > 0) {
-        data = *(uint32_t *) src;
-#ifdef SWAP_WORDS
-        data = bswap32(data);
-#endif
-        b = data & 0xff;
-        data >>= 8;
-        g = data & 0xff;
-        data >>= 8;
-        r = data & 0xff;
-        COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        width -= 1;
-        src += 4;
-    }
-}
-
-static void glue(pxa2xx_draw_line24t_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t data;
-    unsigned int r, g, b;
-    while (width > 0) {
-        data = *(uint32_t *) src;
-#ifdef SWAP_WORDS
-        data = bswap32(data);
-#endif
-        b = (data & 0x7f) << 1;
-        data >>= 7;
-        g = data & 0xff;
-        data >>= 8;
-        r = data & 0xff;
-        data >>= 8;
-        if (data & 1)
-            SKIP_PIXEL(dest);
-        else
-            COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        width -= 1;
-        src += 4;
-    }
-}
-
-static void glue(pxa2xx_draw_line25_, BITS)(void *opaque,
-                uint8_t *dest, const uint8_t *src, int width, int deststep)
-{
-    uint32_t data;
-    unsigned int r, g, b;
-    while (width > 0) {
-        data = *(uint32_t *) src;
-#ifdef SWAP_WORDS
-        data = bswap32(data);
-#endif
-        b = data & 0xff;
-        data >>= 8;
-        g = data & 0xff;
-        data >>= 8;
-        r = data & 0xff;
-        data >>= 8;
-        if (data & 1)
-            SKIP_PIXEL(dest);
-        else
-            COPY_PIXEL(dest, glue(rgb_to_pixel, BITS)(r, g, b));
-        width -= 1;
-        src += 4;
-    }
-}
-
-/* Overlay planes disabled, no transparency */
-static drawfn glue(pxa2xx_draw_fn_, BITS)[16] =
-{
-    [0 ... 0xf]       = NULL,
-    [pxa_lcdc_2bpp]   = glue(pxa2xx_draw_line2_, BITS),
-    [pxa_lcdc_4bpp]   = glue(pxa2xx_draw_line4_, BITS),
-    [pxa_lcdc_8bpp]   = glue(pxa2xx_draw_line8_, BITS),
-    [pxa_lcdc_16bpp]  = glue(pxa2xx_draw_line16_, BITS),
-    [pxa_lcdc_18bpp]  = glue(pxa2xx_draw_line18_, BITS),
-    [pxa_lcdc_18pbpp] = glue(pxa2xx_draw_line18p_, BITS),
-    [pxa_lcdc_24bpp]  = glue(pxa2xx_draw_line24_, BITS),
-};
-
-/* Overlay planes enabled, transparency used */
-static drawfn glue(glue(pxa2xx_draw_fn_, BITS), t)[16] =
-{
-    [0 ... 0xf]       = NULL,
-    [pxa_lcdc_4bpp]   = glue(pxa2xx_draw_line4_, BITS),
-    [pxa_lcdc_8bpp]   = glue(pxa2xx_draw_line8_, BITS),
-    [pxa_lcdc_16bpp]  = glue(pxa2xx_draw_line16t_, BITS),
-    [pxa_lcdc_19bpp]  = glue(pxa2xx_draw_line19_, BITS),
-    [pxa_lcdc_19pbpp] = glue(pxa2xx_draw_line19p_, BITS),
-    [pxa_lcdc_24bpp]  = glue(pxa2xx_draw_line24t_, BITS),
-    [pxa_lcdc_25bpp]  = glue(pxa2xx_draw_line25_, BITS),
-};
-
-#undef BITS
-#undef COPY_PIXEL
-#undef SKIP_PIXEL
-
-#ifdef SWAP_WORDS
-# undef SWAP_WORDS
-#endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b4f5094259..6be8f32918 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
 #include "hw/i386/x86-iommu.h"
 #include "hw/pci-host/q35.h"
 #include "sysemu/kvm.h"
+#include "sysemu/dma.h"
 #include "sysemu/sysemu.h"
 #include "hw/i386/apic_internal.h"
 #include "kvm/kvm_i386.h"
@@ -1884,6 +1885,8 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
     case 3:
         mask = 7;   /* Mask bit 2:0 in the SID field */
         break;
+    default:
+        g_assert_not_reached();
     }
     mask = ~mask;
 
@@ -3453,24 +3456,6 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
     return vtd_dev_as;
 }
 
-static uint64_t get_naturally_aligned_size(uint64_t start,
-                                           uint64_t size, int gaw)
-{
-    uint64_t max_mask = 1ULL << gaw;
-    uint64_t alignment = start ? start & -start : max_mask;
-
-    alignment = MIN(alignment, max_mask);
-    size = MIN(size, max_mask);
-
-    if (alignment <= size) {
-        /* Increase the alignment of start */
-        return alignment;
-    } else {
-        /* Find the largest page mask from size */
-        return 1ULL << (63 - clz64(size));
-    }
-}
-
 /* Unmap the whole range in the notifier's scope. */
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
 {
@@ -3499,13 +3484,14 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
 
     while (remain >= VTD_PAGE_SIZE) {
         IOMMUTLBEvent event;
-        uint64_t mask = get_naturally_aligned_size(start, remain, s->aw_bits);
+        uint64_t mask = dma_aligned_pow2_mask(start, end, s->aw_bits);
+        uint64_t size = mask + 1;
 
-        assert(mask);
+        assert(size);
 
         event.type = IOMMU_NOTIFIER_UNMAP;
         event.entry.iova = start;
-        event.entry.addr_mask = mask - 1;
+        event.entry.addr_mask = mask;
         event.entry.target_as = &address_space_memory;
         event.entry.perm = IOMMU_NONE;
         /* This field is meaningless for unmap */
@@ -3513,8 +3499,8 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
 
         memory_region_notify_iommu_one(n, &event);
 
-        start += mask;
-        remain -= mask;
+        start += size;
+        remain -= size;
     }
 
     assert(!remain);
diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c
index 588e6f9930..43349d6837 100644
--- a/hw/mips/gt64xxx_pci.c
+++ b/hw/mips/gt64xxx_pci.c
@@ -385,13 +385,13 @@ static void gt64120_writel(void *opaque, hwaddr addr,
 {
     GT64120State *s = opaque;
     PCIHostState *phb = PCI_HOST_BRIDGE(s);
-    uint32_t saddr;
+    uint32_t saddr = addr >> 2;
 
+    trace_gt64120_write(addr, val);
     if (!(s->regs[GT_CPU] & 0x00001000)) {
         val = bswap32(val);
     }
 
-    saddr = (addr & 0xfff) >> 2;
     switch (saddr) {
 
     /* CPU Configuration */
@@ -464,7 +464,7 @@ static void gt64120_writel(void *opaque, hwaddr addr,
         /* Read-only registers, do nothing */
         qemu_log_mask(LOG_GUEST_ERROR,
                       "gt64120: Read-only register write "
-                      "reg:0x03%x size:%u value:0x%0*" PRIx64 "\n",
+                      "reg:0x%03x size:%u value:0x%0*" PRIx64 "\n",
                       saddr << 2, size, size << 1, val);
         break;
 
@@ -474,7 +474,7 @@ static void gt64120_writel(void *opaque, hwaddr addr,
         /* Read-only registers, do nothing */
         qemu_log_mask(LOG_GUEST_ERROR,
                       "gt64120: Read-only register write "
-                      "reg:0x03%x size:%u value:0x%0*" PRIx64 "\n",
+                      "reg:0x%03x size:%u value:0x%0*" PRIx64 "\n",
                       saddr << 2, size, size << 1, val);
         break;
 
@@ -516,7 +516,7 @@ static void gt64120_writel(void *opaque, hwaddr addr,
         /* Not implemented */
         qemu_log_mask(LOG_UNIMP,
                       "gt64120: Unimplemented device register write "
-                      "reg:0x03%x size:%u value:0x%0*" PRIx64 "\n",
+                      "reg:0x%03x size:%u value:0x%0*" PRIx64 "\n",
                       saddr << 2, size, size << 1, val);
         break;
 
@@ -529,7 +529,7 @@ static void gt64120_writel(void *opaque, hwaddr addr,
         /* Read-only registers, do nothing */
         qemu_log_mask(LOG_GUEST_ERROR,
                       "gt64120: Read-only register write "
-                      "reg:0x03%x size:%u value:0x%0*" PRIx64 "\n",
+                      "reg:0x%03x size:%u value:0x%0*" PRIx64 "\n",
                       saddr << 2, size, size << 1, val);
         break;
 
@@ -566,7 +566,7 @@ static void gt64120_writel(void *opaque, hwaddr addr,
         /* Not implemented */
         qemu_log_mask(LOG_UNIMP,
                       "gt64120: Unimplemented DMA register write "
-                      "reg:0x03%x size:%u value:0x%0*" PRIx64 "\n",
+                      "reg:0x%03x size:%u value:0x%0*" PRIx64 "\n",
                       saddr << 2, size, size << 1, val);
         break;
 
@@ -579,7 +579,7 @@ static void gt64120_writel(void *opaque, hwaddr addr,
         /* Not implemented */
         qemu_log_mask(LOG_UNIMP,
                       "gt64120: Unimplemented timer register write "
-                      "reg:0x03%x size:%u value:0x%0*" PRIx64 "\n",
+                      "reg:0x%03x size:%u value:0x%0*" PRIx64 "\n",
                       saddr << 2, size, size << 1, val);
         break;
 
@@ -622,8 +622,8 @@ static void gt64120_writel(void *opaque, hwaddr addr,
     case GT_PCI1_CFGDATA:
         /* not implemented */
         qemu_log_mask(LOG_UNIMP,
-                      "gt64120: Unimplemented timer register write "
-                      "reg:0x03%x size:%u value:0x%0*" PRIx64 "\n",
+                      "gt64120: Unimplemented PCI register write "
+                      "reg:0x%03x size:%u value:0x%0*" PRIx64 "\n",
                       saddr << 2, size, size << 1, val);
         break;
     case GT_PCI0_CFGADDR:
@@ -643,19 +643,19 @@ static void gt64120_writel(void *opaque, hwaddr addr,
         /* not really implemented */
         s->regs[saddr] = ~(~(s->regs[saddr]) | ~(val & 0xfffffffe));
         s->regs[saddr] |= !!(s->regs[saddr] & 0xfffffffe);
-        trace_gt64120_write("INTRCAUSE", size, val);
+        trace_gt64120_write_intreg("INTRCAUSE", size, val);
         break;
     case GT_INTRMASK:
         s->regs[saddr] = val & 0x3c3ffffe;
-        trace_gt64120_write("INTRMASK", size, val);
+        trace_gt64120_write_intreg("INTRMASK", size, val);
         break;
     case GT_PCI0_ICMASK:
         s->regs[saddr] = val & 0x03fffffe;
-        trace_gt64120_write("ICMASK", size, val);
+        trace_gt64120_write_intreg("ICMASK", size, val);
         break;
     case GT_PCI0_SERR0MASK:
         s->regs[saddr] = val & 0x0000003f;
-        trace_gt64120_write("SERR0MASK", size, val);
+        trace_gt64120_write_intreg("SERR0MASK", size, val);
         break;
 
     /* Reserved when only PCI_0 is configured. */
@@ -683,7 +683,7 @@ static void gt64120_writel(void *opaque, hwaddr addr,
     default:
         qemu_log_mask(LOG_GUEST_ERROR,
                       "gt64120: Illegal register write "
-                      "reg:0x03%x size:%u value:0x%0*" PRIx64 "\n",
+                      "reg:0x%03x size:%u value:0x%0*" PRIx64 "\n",
                       saddr << 2, size, size << 1, val);
         break;
     }
@@ -695,9 +695,8 @@ static uint64_t gt64120_readl(void *opaque,
     GT64120State *s = opaque;
     PCIHostState *phb = PCI_HOST_BRIDGE(s);
     uint32_t val;
-    uint32_t saddr;
+    uint32_t saddr = addr >> 2;
 
-    saddr = (addr & 0xfff) >> 2;
     switch (saddr) {
 
     /* CPU Configuration */
@@ -931,19 +930,19 @@ static uint64_t gt64120_readl(void *opaque,
     /* Interrupts */
     case GT_INTRCAUSE:
         val = s->regs[saddr];
-        trace_gt64120_read("INTRCAUSE", size, val);
+        trace_gt64120_read_intreg("INTRCAUSE", size, val);
         break;
     case GT_INTRMASK:
         val = s->regs[saddr];
-        trace_gt64120_read("INTRMASK", size, val);
+        trace_gt64120_read_intreg("INTRMASK", size, val);
         break;
     case GT_PCI0_ICMASK:
         val = s->regs[saddr];
-        trace_gt64120_read("ICMASK", size, val);
+        trace_gt64120_read_intreg("ICMASK", size, val);
         break;
     case GT_PCI0_SERR0MASK:
         val = s->regs[saddr];
-        trace_gt64120_read("SERR0MASK", size, val);
+        trace_gt64120_read_intreg("SERR0MASK", size, val);
         break;
 
     /* Reserved when only PCI_0 is configured. */
@@ -960,7 +959,7 @@ static uint64_t gt64120_readl(void *opaque,
         val = s->regs[saddr];
         qemu_log_mask(LOG_GUEST_ERROR,
                       "gt64120: Illegal register read "
-                      "reg:0x03%x size:%u value:0x%0*x\n",
+                      "reg:0x%03x size:%u value:0x%0*x\n",
                       saddr << 2, size, size << 1, val);
         break;
     }
@@ -968,6 +967,7 @@ static uint64_t gt64120_readl(void *opaque,
     if (!(s->regs[GT_CPU] & 0x00001000)) {
         val = bswap32(val);
     }
+    trace_gt64120_read(addr, val);
 
     return val;
 }
@@ -976,6 +976,10 @@ static const MemoryRegionOps isd_mem_ops = {
     .read = gt64120_readl,
     .write = gt64120_writel,
     .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
 };
 
 static int gt64120_pci_map_irq(PCIDevice *pci_dev, int irq_num)
@@ -1196,6 +1200,14 @@ static void gt64120_reset(DeviceState *dev)
     gt64120_pci_mapping(s);
 }
 
+static void gt64120_realize(DeviceState *dev, Error **errp)
+{
+    GT64120State *s = GT64120_PCI_HOST_BRIDGE(dev);
+
+    memory_region_init_io(&s->ISD_mem, OBJECT(dev), &isd_mem_ops, s,
+                          "gt64120-isd", 0x1000);
+}
+
 PCIBus *gt64120_register(qemu_irq *pic)
 {
     GT64120State *d;
@@ -1214,8 +1226,6 @@ PCIBus *gt64120_register(qemu_irq *pic)
                                      get_system_io(),
                                      PCI_DEVFN(18, 0), 4, TYPE_PCI_BUS);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    memory_region_init_io(&d->ISD_mem, OBJECT(dev), &isd_mem_ops, d,
-                          "isd-mem", 0x1000);
 
     pci_create_simple(phb->bus, PCI_DEVFN(0, 0), "gt64120_pci");
     return phb->bus;
@@ -1270,6 +1280,7 @@ static void gt64120_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->realize = gt64120_realize;
     dc->reset = gt64120_reset;
     dc->vmsd = &vmstate_gt64120;
 }
diff --git a/hw/mips/trace-events b/hw/mips/trace-events
index 915139d981..13ee731a48 100644
--- a/hw/mips/trace-events
+++ b/hw/mips/trace-events
@@ -1,4 +1,6 @@
 # gt64xxx_pci.c
-gt64120_read(const char *regname, unsigned size, uint64_t value) "gt64120 read %s size:%u value:0x%08" PRIx64
-gt64120_write(const char *regname, unsigned size, uint64_t value) "gt64120 write %s size:%u value:0x%08" PRIx64
+gt64120_read(uint64_t addr, uint64_t value) "gt64120 read 0x%03"PRIx64" value:0x%08" PRIx64
+gt64120_write(uint64_t addr, uint64_t value) "gt64120 write 0x%03"PRIx64" value:0x%08" PRIx64
+gt64120_read_intreg(const char *regname, unsigned size, uint64_t value) "gt64120 read %s size:%u value:0x%08" PRIx64
+gt64120_write_intreg(const char *regname, unsigned size, uint64_t value) "gt64120 write %s size:%u value:0x%08" PRIx64
 gt64120_isd_remap(uint64_t from_length, uint64_t from_addr, uint64_t to_length, uint64_t to_addr) "ISD: 0x%08" PRIx64 "@0x%08" PRIx64 " -> 0x%08" PRIx64 "@0x%08" PRIx64
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 00356cf12e..7a2b0d031a 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -65,6 +65,7 @@ softmmu_ss.add(when: 'CONFIG_MAINSTONE', if_true: files('mst_fpga.c'))
 softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files(
   'npcm7xx_clk.c',
   'npcm7xx_gcr.c',
+  'npcm7xx_mft.c',
   'npcm7xx_pwm.c',
   'npcm7xx_rng.c',
 ))
@@ -85,6 +86,7 @@ softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files(
 ))
 softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_misc.c'))
 softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c', 'zynq-xadc.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-xramc.c'))
 softmmu_ss.add(when: 'CONFIG_STM32F2XX_SYSCFG', if_true: files('stm32f2xx_syscfg.c'))
 softmmu_ss.add(when: 'CONFIG_STM32F4XX_SYSCFG', if_true: files('stm32f4xx_syscfg.c'))
 softmmu_ss.add(when: 'CONFIG_STM32F4XX_EXTI', if_true: files('stm32f4xx_exti.c'))
diff --git a/hw/misc/npcm7xx_mft.c b/hw/misc/npcm7xx_mft.c
new file mode 100644
index 0000000000..a30583a1b0
--- /dev/null
+++ b/hw/misc/npcm7xx_mft.c
@@ -0,0 +1,540 @@
+/*
+ * Nuvoton NPCM7xx MFT Module
+ *
+ * Copyright 2021 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "hw/qdev-clock.h"
+#include "hw/qdev-properties.h"
+#include "hw/misc/npcm7xx_mft.h"
+#include "hw/misc/npcm7xx_pwm.h"
+#include "hw/registerfields.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qemu/bitops.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/timer.h"
+#include "qemu/units.h"
+#include "trace.h"
+
+/*
+ * Some of the registers can only accessed via 16-bit ops and some can only
+ * be accessed via 8-bit ops. However we mark all of them using REG16 to
+ * simplify implementation. npcm7xx_mft_check_mem_op checks the access length
+ * of memory operations.
+ */
+REG16(NPCM7XX_MFT_CNT1, 0x00);
+REG16(NPCM7XX_MFT_CRA, 0x02);
+REG16(NPCM7XX_MFT_CRB, 0x04);
+REG16(NPCM7XX_MFT_CNT2, 0x06);
+REG16(NPCM7XX_MFT_PRSC, 0x08);
+REG16(NPCM7XX_MFT_CKC, 0x0a);
+REG16(NPCM7XX_MFT_MCTRL, 0x0c);
+REG16(NPCM7XX_MFT_ICTRL, 0x0e);
+REG16(NPCM7XX_MFT_ICLR, 0x10);
+REG16(NPCM7XX_MFT_IEN, 0x12);
+REG16(NPCM7XX_MFT_CPA, 0x14);
+REG16(NPCM7XX_MFT_CPB, 0x16);
+REG16(NPCM7XX_MFT_CPCFG, 0x18);
+REG16(NPCM7XX_MFT_INASEL, 0x1a);
+REG16(NPCM7XX_MFT_INBSEL, 0x1c);
+
+/* Register Fields */
+#define NPCM7XX_MFT_CKC_C2CSEL          BIT(3)
+#define NPCM7XX_MFT_CKC_C1CSEL          BIT(0)
+
+#define NPCM7XX_MFT_MCTRL_TBEN          BIT(6)
+#define NPCM7XX_MFT_MCTRL_TAEN          BIT(5)
+#define NPCM7XX_MFT_MCTRL_TBEDG         BIT(4)
+#define NPCM7XX_MFT_MCTRL_TAEDG         BIT(3)
+#define NPCM7XX_MFT_MCTRL_MODE5         BIT(2)
+
+#define NPCM7XX_MFT_ICTRL_TFPND         BIT(5)
+#define NPCM7XX_MFT_ICTRL_TEPND         BIT(4)
+#define NPCM7XX_MFT_ICTRL_TDPND         BIT(3)
+#define NPCM7XX_MFT_ICTRL_TCPND         BIT(2)
+#define NPCM7XX_MFT_ICTRL_TBPND         BIT(1)
+#define NPCM7XX_MFT_ICTRL_TAPND         BIT(0)
+
+#define NPCM7XX_MFT_ICLR_TFCLR          BIT(5)
+#define NPCM7XX_MFT_ICLR_TECLR          BIT(4)
+#define NPCM7XX_MFT_ICLR_TDCLR          BIT(3)
+#define NPCM7XX_MFT_ICLR_TCCLR          BIT(2)
+#define NPCM7XX_MFT_ICLR_TBCLR          BIT(1)
+#define NPCM7XX_MFT_ICLR_TACLR          BIT(0)
+
+#define NPCM7XX_MFT_IEN_TFIEN           BIT(5)
+#define NPCM7XX_MFT_IEN_TEIEN           BIT(4)
+#define NPCM7XX_MFT_IEN_TDIEN           BIT(3)
+#define NPCM7XX_MFT_IEN_TCIEN           BIT(2)
+#define NPCM7XX_MFT_IEN_TBIEN           BIT(1)
+#define NPCM7XX_MFT_IEN_TAIEN           BIT(0)
+
+#define NPCM7XX_MFT_CPCFG_GET_B(rv)     extract8((rv), 4, 4)
+#define NPCM7XX_MFT_CPCFG_GET_A(rv)     extract8((rv), 0, 4)
+#define NPCM7XX_MFT_CPCFG_HIEN          BIT(3)
+#define NPCM7XX_MFT_CPCFG_EQEN          BIT(2)
+#define NPCM7XX_MFT_CPCFG_LOEN          BIT(1)
+#define NPCM7XX_MFT_CPCFG_CPSEL         BIT(0)
+
+#define NPCM7XX_MFT_INASEL_SELA         BIT(0)
+#define NPCM7XX_MFT_INBSEL_SELB         BIT(0)
+
+/* Max CNT values of the module. The CNT value is a countdown from it. */
+#define NPCM7XX_MFT_MAX_CNT             0xFFFF
+
+/* Each fan revolution should generated 2 pulses */
+#define NPCM7XX_MFT_PULSE_PER_REVOLUTION 2
+
+typedef enum NPCM7xxMFTCaptureState {
+    /* capture succeeded with a valid CNT value. */
+    NPCM7XX_CAPTURE_SUCCEED,
+    /* capture stopped prematurely due to reaching CPCFG condition. */
+    NPCM7XX_CAPTURE_COMPARE_HIT,
+    /* capture fails since it reaches underflow condition for CNT. */
+    NPCM7XX_CAPTURE_UNDERFLOW,
+} NPCM7xxMFTCaptureState;
+
+static void npcm7xx_mft_reset(NPCM7xxMFTState *s)
+{
+    int i;
+
+    /* Only registers PRSC ~ INBSEL need to be reset. */
+    for (i = R_NPCM7XX_MFT_PRSC; i <= R_NPCM7XX_MFT_INBSEL; ++i) {
+        s->regs[i] = 0;
+    }
+}
+
+static void npcm7xx_mft_clear_interrupt(NPCM7xxMFTState *s, uint8_t iclr)
+{
+    /*
+     * Clear bits in ICTRL where corresponding bits in iclr is 1.
+     * Both iclr and ictrl are 8-bit regs. (See npcm7xx_mft_check_mem_op)
+     */
+    s->regs[R_NPCM7XX_MFT_ICTRL] &= ~iclr;
+}
+
+/*
+ * If the CPCFG's condition should be triggered during count down from
+ * NPCM7XX_MFT_MAX_CNT to src if compared to tgt, return the count when
+ * the condition is triggered.
+ * Otherwise return -1.
+ * Since tgt is uint16_t it must always <= NPCM7XX_MFT_MAX_CNT.
+ */
+static int npcm7xx_mft_compare(int32_t src, uint16_t tgt, uint8_t cpcfg)
+{
+    if (cpcfg & NPCM7XX_MFT_CPCFG_HIEN) {
+        return NPCM7XX_MFT_MAX_CNT;
+    }
+    if ((cpcfg & NPCM7XX_MFT_CPCFG_EQEN) && (src <= tgt)) {
+        return tgt;
+    }
+    if ((cpcfg & NPCM7XX_MFT_CPCFG_LOEN) && (tgt > 0) && (src < tgt)) {
+        return tgt - 1;
+    }
+
+    return -1;
+}
+
+/* Compute CNT according to corresponding fan's RPM. */
+static NPCM7xxMFTCaptureState npcm7xx_mft_compute_cnt(
+    Clock *clock, uint32_t max_rpm, uint32_t duty, uint16_t tgt,
+    uint8_t cpcfg, uint16_t *cnt)
+{
+    uint32_t rpm = (uint64_t)max_rpm * (uint64_t)duty / NPCM7XX_PWM_MAX_DUTY;
+    int32_t count;
+    int stopped;
+    NPCM7xxMFTCaptureState state;
+
+    if (rpm == 0) {
+        /*
+         * If RPM = 0, capture won't happen. CNT will continue count down.
+         * So it's effective equivalent to have a cnt > NPCM7XX_MFT_MAX_CNT
+         */
+        count = NPCM7XX_MFT_MAX_CNT + 1;
+    } else {
+        /*
+         * RPM = revolution/min. The time for one revlution (in ns) is
+         * MINUTE_TO_NANOSECOND / RPM.
+         */
+        count = clock_ns_to_ticks(clock, (60 * NANOSECONDS_PER_SECOND) /
+            (rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION));
+    }
+
+    if (count > NPCM7XX_MFT_MAX_CNT) {
+        count = -1;
+    } else {
+        /* The CNT is a countdown value from NPCM7XX_MFT_MAX_CNT. */
+        count = NPCM7XX_MFT_MAX_CNT - count;
+    }
+    stopped = npcm7xx_mft_compare(count, tgt, cpcfg);
+    if (stopped == -1) {
+        if (count == -1) {
+            /* Underflow */
+            state = NPCM7XX_CAPTURE_UNDERFLOW;
+        } else {
+            state = NPCM7XX_CAPTURE_SUCCEED;
+        }
+    } else {
+        count = stopped;
+        state = NPCM7XX_CAPTURE_COMPARE_HIT;
+    }
+
+    if (count != -1) {
+        *cnt = count;
+    }
+    trace_npcm7xx_mft_rpm(clock->canonical_path, clock_get_hz(clock),
+                          state, count, rpm, duty);
+    return state;
+}
+
+/*
+ * Capture Fan RPM and update CNT and CR registers accordingly.
+ * Raise IRQ if certain contidions are met in IEN.
+ */
+static void npcm7xx_mft_capture(NPCM7xxMFTState *s)
+{
+    int irq_level = 0;
+    NPCM7xxMFTCaptureState state;
+    int sel;
+    uint8_t cpcfg;
+
+    /*
+     * If not mode 5, the behavior is undefined. We just do nothing in this
+     * case.
+     */
+    if (!(s->regs[R_NPCM7XX_MFT_MCTRL] & NPCM7XX_MFT_MCTRL_MODE5)) {
+        return;
+    }
+
+    /* Capture input A. */
+    if (s->regs[R_NPCM7XX_MFT_MCTRL] & NPCM7XX_MFT_MCTRL_TAEN &&
+        s->regs[R_NPCM7XX_MFT_CKC] & NPCM7XX_MFT_CKC_C1CSEL) {
+        sel = s->regs[R_NPCM7XX_MFT_INASEL] & NPCM7XX_MFT_INASEL_SELA;
+        cpcfg = NPCM7XX_MFT_CPCFG_GET_A(s->regs[R_NPCM7XX_MFT_CPCFG]);
+        state = npcm7xx_mft_compute_cnt(s->clock_1,
+                                        sel ? s->max_rpm[2] : s->max_rpm[0],
+                                        sel ? s->duty[2] : s->duty[0],
+                                        s->regs[R_NPCM7XX_MFT_CPA],
+                                        cpcfg,
+                                        &s->regs[R_NPCM7XX_MFT_CNT1]);
+        switch (state) {
+        case NPCM7XX_CAPTURE_SUCCEED:
+            /* Interrupt on input capture on TAn transition - TAPND */
+            s->regs[R_NPCM7XX_MFT_CRA] = s->regs[R_NPCM7XX_MFT_CNT1];
+            s->regs[R_NPCM7XX_MFT_ICTRL] |= NPCM7XX_MFT_ICTRL_TAPND;
+            if (s->regs[R_NPCM7XX_MFT_IEN] & NPCM7XX_MFT_IEN_TAIEN) {
+                irq_level = 1;
+            }
+            break;
+
+        case NPCM7XX_CAPTURE_COMPARE_HIT:
+            /* Compare Hit - TEPND */
+            s->regs[R_NPCM7XX_MFT_ICTRL] |= NPCM7XX_MFT_ICTRL_TEPND;
+            if (s->regs[R_NPCM7XX_MFT_IEN] & NPCM7XX_MFT_IEN_TEIEN) {
+                irq_level = 1;
+            }
+            break;
+
+        case NPCM7XX_CAPTURE_UNDERFLOW:
+            /* Underflow - TCPND */
+            s->regs[R_NPCM7XX_MFT_ICTRL] |= NPCM7XX_MFT_ICTRL_TCPND;
+            if (s->regs[R_NPCM7XX_MFT_IEN] & NPCM7XX_MFT_IEN_TCIEN) {
+                irq_level = 1;
+            }
+            break;
+
+        default:
+            g_assert_not_reached();
+        }
+    }
+
+    /* Capture input B. */
+    if (s->regs[R_NPCM7XX_MFT_MCTRL] & NPCM7XX_MFT_MCTRL_TBEN &&
+        s->regs[R_NPCM7XX_MFT_CKC] & NPCM7XX_MFT_CKC_C2CSEL) {
+        sel = s->regs[R_NPCM7XX_MFT_INBSEL] & NPCM7XX_MFT_INBSEL_SELB;
+        cpcfg = NPCM7XX_MFT_CPCFG_GET_B(s->regs[R_NPCM7XX_MFT_CPCFG]);
+        state = npcm7xx_mft_compute_cnt(s->clock_2,
+                                        sel ? s->max_rpm[3] : s->max_rpm[1],
+                                        sel ? s->duty[3] : s->duty[1],
+                                        s->regs[R_NPCM7XX_MFT_CPB],
+                                        cpcfg,
+                                        &s->regs[R_NPCM7XX_MFT_CNT2]);
+        switch (state) {
+        case NPCM7XX_CAPTURE_SUCCEED:
+            /* Interrupt on input capture on TBn transition - TBPND */
+            s->regs[R_NPCM7XX_MFT_CRB] = s->regs[R_NPCM7XX_MFT_CNT2];
+            s->regs[R_NPCM7XX_MFT_ICTRL] |= NPCM7XX_MFT_ICTRL_TBPND;
+            if (s->regs[R_NPCM7XX_MFT_IEN] & NPCM7XX_MFT_IEN_TBIEN) {
+                irq_level = 1;
+            }
+            break;
+
+        case NPCM7XX_CAPTURE_COMPARE_HIT:
+            /* Compare Hit - TFPND */
+            s->regs[R_NPCM7XX_MFT_ICTRL] |= NPCM7XX_MFT_ICTRL_TFPND;
+            if (s->regs[R_NPCM7XX_MFT_IEN] & NPCM7XX_MFT_IEN_TFIEN) {
+                irq_level = 1;
+            }
+            break;
+
+        case NPCM7XX_CAPTURE_UNDERFLOW:
+            /* Underflow - TDPND */
+            s->regs[R_NPCM7XX_MFT_ICTRL] |= NPCM7XX_MFT_ICTRL_TDPND;
+            if (s->regs[R_NPCM7XX_MFT_IEN] & NPCM7XX_MFT_IEN_TDIEN) {
+                irq_level = 1;
+            }
+            break;
+
+        default:
+            g_assert_not_reached();
+        }
+    }
+
+    trace_npcm7xx_mft_capture(DEVICE(s)->canonical_path, irq_level);
+    qemu_set_irq(s->irq, irq_level);
+}
+
+/* Update clock for counters. */
+static void npcm7xx_mft_update_clock(void *opaque, ClockEvent event)
+{
+    NPCM7xxMFTState *s = NPCM7XX_MFT(opaque);
+    uint64_t prescaled_clock_period;
+
+    prescaled_clock_period = clock_get(s->clock_in) *
+        (s->regs[R_NPCM7XX_MFT_PRSC] + 1ULL);
+    trace_npcm7xx_mft_update_clock(s->clock_in->canonical_path,
+                                   s->regs[R_NPCM7XX_MFT_CKC],
+                                   clock_get(s->clock_in),
+                                   prescaled_clock_period);
+    /* Update clock 1 */
+    if (s->regs[R_NPCM7XX_MFT_CKC] & NPCM7XX_MFT_CKC_C1CSEL) {
+        /* Clock is prescaled. */
+        clock_update(s->clock_1, prescaled_clock_period);
+    } else {
+        /* Clock stopped. */
+        clock_update(s->clock_1, 0);
+    }
+    /* Update clock 2 */
+    if (s->regs[R_NPCM7XX_MFT_CKC] & NPCM7XX_MFT_CKC_C2CSEL) {
+        /* Clock is prescaled. */
+        clock_update(s->clock_2, prescaled_clock_period);
+    } else {
+        /* Clock stopped. */
+        clock_update(s->clock_2, 0);
+    }
+
+    npcm7xx_mft_capture(s);
+}
+
+static uint64_t npcm7xx_mft_read(void *opaque, hwaddr offset, unsigned size)
+{
+    NPCM7xxMFTState *s = NPCM7XX_MFT(opaque);
+    uint16_t value = 0;
+
+    switch (offset) {
+    case A_NPCM7XX_MFT_ICLR:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: register @ 0x%04" HWADDR_PRIx " is write-only\n",
+                      __func__, offset);
+        break;
+
+    default:
+        value = s->regs[offset / 2];
+    }
+
+    trace_npcm7xx_mft_read(DEVICE(s)->canonical_path, offset, value);
+    return value;
+}
+
+static void npcm7xx_mft_write(void *opaque, hwaddr offset,
+                              uint64_t v, unsigned size)
+{
+    NPCM7xxMFTState *s = NPCM7XX_MFT(opaque);
+
+    trace_npcm7xx_mft_write(DEVICE(s)->canonical_path, offset, v);
+    switch (offset) {
+    case A_NPCM7XX_MFT_ICLR:
+        npcm7xx_mft_clear_interrupt(s, v);
+        break;
+
+    case A_NPCM7XX_MFT_CKC:
+    case A_NPCM7XX_MFT_PRSC:
+        s->regs[offset / 2] = v;
+        npcm7xx_mft_update_clock(s, ClockUpdate);
+        break;
+
+    default:
+        s->regs[offset / 2] = v;
+        npcm7xx_mft_capture(s);
+        break;
+    }
+}
+
+static bool npcm7xx_mft_check_mem_op(void *opaque, hwaddr offset,
+                                     unsigned size, bool is_write,
+                                     MemTxAttrs attrs)
+{
+    switch (offset) {
+    /* 16-bit registers. Must be accessed with 16-bit read/write.*/
+    case A_NPCM7XX_MFT_CNT1:
+    case A_NPCM7XX_MFT_CRA:
+    case A_NPCM7XX_MFT_CRB:
+    case A_NPCM7XX_MFT_CNT2:
+    case A_NPCM7XX_MFT_CPA:
+    case A_NPCM7XX_MFT_CPB:
+        return size == 2;
+
+    /* 8-bit registers. Must be accessed with 8-bit read/write.*/
+    case A_NPCM7XX_MFT_PRSC:
+    case A_NPCM7XX_MFT_CKC:
+    case A_NPCM7XX_MFT_MCTRL:
+    case A_NPCM7XX_MFT_ICTRL:
+    case A_NPCM7XX_MFT_ICLR:
+    case A_NPCM7XX_MFT_IEN:
+    case A_NPCM7XX_MFT_CPCFG:
+    case A_NPCM7XX_MFT_INASEL:
+    case A_NPCM7XX_MFT_INBSEL:
+        return size == 1;
+
+    default:
+        /* Invalid registers. */
+        return false;
+    }
+}
+
+static void npcm7xx_mft_get_max_rpm(Object *obj, Visitor *v, const char *name,
+                                    void *opaque, Error **errp)
+{
+    visit_type_uint32(v, name, (uint32_t *)opaque, errp);
+}
+
+static void npcm7xx_mft_set_max_rpm(Object *obj, Visitor *v, const char *name,
+                                    void *opaque, Error **errp)
+{
+    NPCM7xxMFTState *s = NPCM7XX_MFT(obj);
+    uint32_t *max_rpm = opaque;
+    uint32_t value;
+
+    if (!visit_type_uint32(v, name, &value, errp)) {
+        return;
+    }
+
+    *max_rpm = value;
+    npcm7xx_mft_capture(s);
+}
+
+static void npcm7xx_mft_duty_handler(void *opaque, int n, int value)
+{
+    NPCM7xxMFTState *s = NPCM7XX_MFT(opaque);
+
+    trace_npcm7xx_mft_set_duty(DEVICE(s)->canonical_path, n, value);
+    s->duty[n] = value;
+    npcm7xx_mft_capture(s);
+}
+
+static const struct MemoryRegionOps npcm7xx_mft_ops = {
+    .read       = npcm7xx_mft_read,
+    .write      = npcm7xx_mft_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid      = {
+        .min_access_size        = 1,
+        .max_access_size        = 2,
+        .unaligned              = false,
+        .accepts                = npcm7xx_mft_check_mem_op,
+    },
+};
+
+static void npcm7xx_mft_enter_reset(Object *obj, ResetType type)
+{
+    NPCM7xxMFTState *s = NPCM7XX_MFT(obj);
+
+    npcm7xx_mft_reset(s);
+}
+
+static void npcm7xx_mft_hold_reset(Object *obj)
+{
+    NPCM7xxMFTState *s = NPCM7XX_MFT(obj);
+
+    qemu_irq_lower(s->irq);
+}
+
+static void npcm7xx_mft_init(Object *obj)
+{
+    NPCM7xxMFTState *s = NPCM7XX_MFT(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    DeviceState *dev = DEVICE(obj);
+
+    memory_region_init_io(&s->iomem, obj, &npcm7xx_mft_ops, s,
+                          TYPE_NPCM7XX_MFT, 4 * KiB);
+    sysbus_init_mmio(sbd, &s->iomem);
+    sysbus_init_irq(sbd, &s->irq);
+    s->clock_in = qdev_init_clock_in(dev, "clock-in", npcm7xx_mft_update_clock,
+                                     s, ClockUpdate);
+    s->clock_1 = qdev_init_clock_out(dev, "clock1");
+    s->clock_2 = qdev_init_clock_out(dev, "clock2");
+
+    for (int i = 0; i < NPCM7XX_PWM_PER_MODULE; ++i) {
+        object_property_add(obj, "max_rpm[*]", "uint32",
+                            npcm7xx_mft_get_max_rpm,
+                            npcm7xx_mft_set_max_rpm,
+                            NULL, &s->max_rpm[i]);
+    }
+    qdev_init_gpio_in_named(dev, npcm7xx_mft_duty_handler, "duty",
+                            NPCM7XX_MFT_FANIN_COUNT);
+}
+
+static const VMStateDescription vmstate_npcm7xx_mft = {
+    .name = "npcm7xx-mft-module",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_CLOCK(clock_in, NPCM7xxMFTState),
+        VMSTATE_CLOCK(clock_1, NPCM7xxMFTState),
+        VMSTATE_CLOCK(clock_2, NPCM7xxMFTState),
+        VMSTATE_UINT16_ARRAY(regs, NPCM7xxMFTState, NPCM7XX_MFT_NR_REGS),
+        VMSTATE_UINT32_ARRAY(max_rpm, NPCM7xxMFTState, NPCM7XX_MFT_FANIN_COUNT),
+        VMSTATE_UINT32_ARRAY(duty, NPCM7xxMFTState, NPCM7XX_MFT_FANIN_COUNT),
+        VMSTATE_END_OF_LIST(),
+    },
+};
+
+static void npcm7xx_mft_class_init(ObjectClass *klass, void *data)
+{
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "NPCM7xx MFT Controller";
+    dc->vmsd = &vmstate_npcm7xx_mft;
+    rc->phases.enter = npcm7xx_mft_enter_reset;
+    rc->phases.hold = npcm7xx_mft_hold_reset;
+}
+
+static const TypeInfo npcm7xx_mft_info = {
+    .name               = TYPE_NPCM7XX_MFT,
+    .parent             = TYPE_SYS_BUS_DEVICE,
+    .instance_size      = sizeof(NPCM7xxMFTState),
+    .class_init         = npcm7xx_mft_class_init,
+    .instance_init      = npcm7xx_mft_init,
+};
+
+static void npcm7xx_mft_register_type(void)
+{
+    type_register_static(&npcm7xx_mft_info);
+}
+type_init(npcm7xx_mft_register_type);
diff --git a/hw/misc/npcm7xx_pwm.c b/hw/misc/npcm7xx_pwm.c
index ce192bb274..2be5bd25c6 100644
--- a/hw/misc/npcm7xx_pwm.c
+++ b/hw/misc/npcm7xx_pwm.c
@@ -139,6 +139,7 @@ static void npcm7xx_pwm_update_duty(NPCM7xxPWM *p)
         trace_npcm7xx_pwm_update_duty(DEVICE(p->module)->canonical_path,
                                       p->index, p->duty, duty);
         p->duty = duty;
+        qemu_set_irq(p->module->duty_gpio_out[p->index], p->duty);
     }
 }
 
@@ -483,6 +484,7 @@ static void npcm7xx_pwm_init(Object *obj)
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     int i;
 
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(s->pwm) != NPCM7XX_PWM_PER_MODULE);
     for (i = 0; i < NPCM7XX_PWM_PER_MODULE; i++) {
         NPCM7xxPWM *p = &s->pwm[i];
         p->module = s;
@@ -501,6 +503,8 @@ static void npcm7xx_pwm_init(Object *obj)
         object_property_add_uint32_ptr(obj, "duty[*]",
                 &s->pwm[i].duty, OBJ_PROP_FLAG_READ);
     }
+    qdev_init_gpio_out_named(DEVICE(s), s->duty_gpio_out,
+                             "duty-gpio-out", NPCM7XX_PWM_PER_MODULE);
 }
 
 static const VMStateDescription vmstate_npcm7xx_pwm = {
diff --git a/hw/misc/trace-events b/hw/misc/trace-events
index cae005549e..b87d0b4c90 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -116,6 +116,14 @@ npcm7xx_clk_write(uint64_t offset, uint32_t value) "offset: 0x%04" PRIx64 " valu
 npcm7xx_gcr_read(uint64_t offset, uint32_t value) " offset: 0x%04" PRIx64 " value: 0x%08" PRIx32
 npcm7xx_gcr_write(uint64_t offset, uint32_t value) "offset: 0x%04" PRIx64 " value: 0x%08" PRIx32
 
+# npcm7xx_mft.c
+npcm7xx_mft_read(const char *name, uint64_t offset, uint16_t value) "%s: offset: 0x%04" PRIx64 " value: 0x%04" PRIx16
+npcm7xx_mft_write(const char *name, uint64_t offset, uint16_t value) "%s: offset: 0x%04" PRIx64 " value: 0x%04" PRIx16
+npcm7xx_mft_rpm(const char *clock, uint32_t clock_hz, int state, int32_t cnt, uint32_t rpm, uint32_t duty) " fan clk: %s clock_hz: %" PRIu32 ", state: %d, cnt: %" PRIi32 ", rpm: %" PRIu32 ", duty: %" PRIu32
+npcm7xx_mft_capture(const char *name, int irq_level) "%s: level: %d"
+npcm7xx_mft_update_clock(const char *name, uint16_t sel, uint64_t clock_period, uint64_t prescaled_clock_period) "%s: sel: 0x%02" PRIx16 ", period: %" PRIu64 ", prescaled: %" PRIu64
+npcm7xx_mft_set_duty(const char *name, int n, int value) "%s[%d]: %d"
+
 # npcm7xx_rng.c
 npcm7xx_rng_read(uint64_t offset, uint64_t value, unsigned size) "offset: 0x%04" PRIx64 " value: 0x%02" PRIx64 " size: %u"
 npcm7xx_rng_write(uint64_t offset, uint64_t value, unsigned size) "offset: 0x%04" PRIx64 " value: 0x%02" PRIx64 " size: %u"
diff --git a/hw/misc/xlnx-versal-xramc.c b/hw/misc/xlnx-versal-xramc.c
new file mode 100644
index 0000000000..e5b719a0ed
--- /dev/null
+++ b/hw/misc/xlnx-versal-xramc.c
@@ -0,0 +1,253 @@
+/*
+ * QEMU model of the Xilinx XRAM Controller.
+ *
+ * Copyright (c) 2021 Xilinx Inc.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com>
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/qdev-properties.h"
+#include "hw/irq.h"
+#include "hw/misc/xlnx-versal-xramc.h"
+
+#ifndef XLNX_XRAM_CTRL_ERR_DEBUG
+#define XLNX_XRAM_CTRL_ERR_DEBUG 0
+#endif
+
+static void xram_update_irq(XlnxXramCtrl *s)
+{
+    bool pending = s->regs[R_XRAM_ISR] & ~s->regs[R_XRAM_IMR];
+    qemu_set_irq(s->irq, pending);
+}
+
+static void xram_isr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxXramCtrl *s = XLNX_XRAM_CTRL(reg->opaque);
+    xram_update_irq(s);
+}
+
+static uint64_t xram_ien_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxXramCtrl *s = XLNX_XRAM_CTRL(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_XRAM_IMR] &= ~val;
+    xram_update_irq(s);
+    return 0;
+}
+
+static uint64_t xram_ids_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxXramCtrl *s = XLNX_XRAM_CTRL(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_XRAM_IMR] |= val;
+    xram_update_irq(s);
+    return 0;
+}
+
+static const RegisterAccessInfo xram_ctrl_regs_info[] = {
+    {   .name = "XRAM_ERR_CTRL",  .addr = A_XRAM_ERR_CTRL,
+        .reset = 0xf,
+        .rsvd = 0xfffffff0,
+    },{ .name = "XRAM_ISR",  .addr = A_XRAM_ISR,
+        .rsvd = 0xfffff800,
+        .w1c = 0x7ff,
+        .post_write = xram_isr_postw,
+    },{ .name = "XRAM_IMR",  .addr = A_XRAM_IMR,
+        .reset = 0x7ff,
+        .rsvd = 0xfffff800,
+        .ro = 0x7ff,
+    },{ .name = "XRAM_IEN",  .addr = A_XRAM_IEN,
+        .rsvd = 0xfffff800,
+        .pre_write = xram_ien_prew,
+    },{ .name = "XRAM_IDS",  .addr = A_XRAM_IDS,
+        .rsvd = 0xfffff800,
+        .pre_write = xram_ids_prew,
+    },{ .name = "XRAM_ECC_CNTL",  .addr = A_XRAM_ECC_CNTL,
+        .rsvd = 0xfffffff8,
+    },{ .name = "XRAM_CLR_EXE",  .addr = A_XRAM_CLR_EXE,
+        .rsvd = 0xffffff00,
+    },{ .name = "XRAM_CE_FFA",  .addr = A_XRAM_CE_FFA,
+        .rsvd = 0xfff00000,
+        .ro = 0xfffff,
+    },{ .name = "XRAM_CE_FFD0",  .addr = A_XRAM_CE_FFD0,
+        .ro = 0xffffffff,
+    },{ .name = "XRAM_CE_FFD1",  .addr = A_XRAM_CE_FFD1,
+        .ro = 0xffffffff,
+    },{ .name = "XRAM_CE_FFD2",  .addr = A_XRAM_CE_FFD2,
+        .ro = 0xffffffff,
+    },{ .name = "XRAM_CE_FFD3",  .addr = A_XRAM_CE_FFD3,
+        .ro = 0xffffffff,
+    },{ .name = "XRAM_CE_FFE",  .addr = A_XRAM_CE_FFE,
+        .rsvd = 0xffff0000,
+        .ro = 0xffff,
+    },{ .name = "XRAM_UE_FFA",  .addr = A_XRAM_UE_FFA,
+        .rsvd = 0xfff00000,
+        .ro = 0xfffff,
+    },{ .name = "XRAM_UE_FFD0",  .addr = A_XRAM_UE_FFD0,
+        .ro = 0xffffffff,
+    },{ .name = "XRAM_UE_FFD1",  .addr = A_XRAM_UE_FFD1,
+        .ro = 0xffffffff,
+    },{ .name = "XRAM_UE_FFD2",  .addr = A_XRAM_UE_FFD2,
+        .ro = 0xffffffff,
+    },{ .name = "XRAM_UE_FFD3",  .addr = A_XRAM_UE_FFD3,
+        .ro = 0xffffffff,
+    },{ .name = "XRAM_UE_FFE",  .addr = A_XRAM_UE_FFE,
+        .rsvd = 0xffff0000,
+        .ro = 0xffff,
+    },{ .name = "XRAM_FI_D0",  .addr = A_XRAM_FI_D0,
+    },{ .name = "XRAM_FI_D1",  .addr = A_XRAM_FI_D1,
+    },{ .name = "XRAM_FI_D2",  .addr = A_XRAM_FI_D2,
+    },{ .name = "XRAM_FI_D3",  .addr = A_XRAM_FI_D3,
+    },{ .name = "XRAM_FI_SY",  .addr = A_XRAM_FI_SY,
+        .rsvd = 0xffff0000,
+    },{ .name = "XRAM_RMW_UE_FFA",  .addr = A_XRAM_RMW_UE_FFA,
+        .rsvd = 0xfff00000,
+        .ro = 0xfffff,
+    },{ .name = "XRAM_FI_CNTR",  .addr = A_XRAM_FI_CNTR,
+        .rsvd = 0xff000000,
+    },{ .name = "XRAM_IMP",  .addr = A_XRAM_IMP,
+        .reset = 0x4,
+        .rsvd = 0xfffffff0,
+        .ro = 0xf,
+    },{ .name = "XRAM_PRDY_DBG",  .addr = A_XRAM_PRDY_DBG,
+        .reset = 0xffff,
+        .rsvd = 0xffff0000,
+        .ro = 0xffff,
+    },{ .name = "XRAM_SAFETY_CHK",  .addr = A_XRAM_SAFETY_CHK,
+    }
+};
+
+static void xram_ctrl_reset_enter(Object *obj, ResetType type)
+{
+    XlnxXramCtrl *s = XLNX_XRAM_CTRL(obj);
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) {
+        register_reset(&s->regs_info[i]);
+    }
+
+    ARRAY_FIELD_DP32(s->regs, XRAM_IMP, SIZE, s->cfg.encoded_size);
+}
+
+static void xram_ctrl_reset_hold(Object *obj)
+{
+    XlnxXramCtrl *s = XLNX_XRAM_CTRL(obj);
+
+    xram_update_irq(s);
+}
+
+static const MemoryRegionOps xram_ctrl_ops = {
+    .read = register_read_memory,
+    .write = register_write_memory,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void xram_ctrl_realize(DeviceState *dev, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    XlnxXramCtrl *s = XLNX_XRAM_CTRL(dev);
+
+    switch (s->cfg.size) {
+    case 64 * KiB:
+        s->cfg.encoded_size = 0;
+        break;
+    case 128 * KiB:
+        s->cfg.encoded_size = 1;
+        break;
+    case 256 * KiB:
+        s->cfg.encoded_size = 2;
+        break;
+    case 512 * KiB:
+        s->cfg.encoded_size = 3;
+        break;
+    case 1 * MiB:
+        s->cfg.encoded_size = 4;
+        break;
+    default:
+        error_setg(errp, "Unsupported XRAM size %" PRId64, s->cfg.size);
+        return;
+    }
+
+    memory_region_init_ram(&s->ram, OBJECT(s),
+                           object_get_canonical_path_component(OBJECT(s)),
+                           s->cfg.size, &error_fatal);
+    sysbus_init_mmio(sbd, &s->ram);
+}
+
+static void xram_ctrl_init(Object *obj)
+{
+    XlnxXramCtrl *s = XLNX_XRAM_CTRL(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    s->reg_array =
+        register_init_block32(DEVICE(obj), xram_ctrl_regs_info,
+                              ARRAY_SIZE(xram_ctrl_regs_info),
+                              s->regs_info, s->regs,
+                              &xram_ctrl_ops,
+                              XLNX_XRAM_CTRL_ERR_DEBUG,
+                              XRAM_CTRL_R_MAX * 4);
+    sysbus_init_mmio(sbd, &s->reg_array->mem);
+    sysbus_init_irq(sbd, &s->irq);
+}
+
+static void xram_ctrl_finalize(Object *obj)
+{
+    XlnxXramCtrl *s = XLNX_XRAM_CTRL(obj);
+    register_finalize_block(s->reg_array);
+}
+
+static const VMStateDescription vmstate_xram_ctrl = {
+    .name = TYPE_XLNX_XRAM_CTRL,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(regs, XlnxXramCtrl, XRAM_CTRL_R_MAX),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static Property xram_ctrl_properties[] = {
+    DEFINE_PROP_UINT64("size", XlnxXramCtrl, cfg.size, 1 * MiB),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xram_ctrl_class_init(ObjectClass *klass, void *data)
+{
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = xram_ctrl_realize;
+    dc->vmsd = &vmstate_xram_ctrl;
+    device_class_set_props(dc, xram_ctrl_properties);
+
+    rc->phases.enter = xram_ctrl_reset_enter;
+    rc->phases.hold = xram_ctrl_reset_hold;
+}
+
+static const TypeInfo xram_ctrl_info = {
+    .name              = TYPE_XLNX_XRAM_CTRL,
+    .parent            = TYPE_SYS_BUS_DEVICE,
+    .instance_size     = sizeof(XlnxXramCtrl),
+    .class_init        = xram_ctrl_class_init,
+    .instance_init     = xram_ctrl_init,
+    .instance_finalize = xram_ctrl_finalize,
+};
+
+static void xram_ctrl_register_types(void)
+{
+    type_register_static(&xram_ctrl_info);
+}
+
+type_init(xram_ctrl_register_types)
diff --git a/hw/net/allwinner-sun8i-emac.c b/hw/net/allwinner-sun8i-emac.c
index bf91803d65..ff611f18fb 100644
--- a/hw/net/allwinner-sun8i-emac.c
+++ b/hw/net/allwinner-sun8i-emac.c
@@ -339,35 +339,40 @@ static void allwinner_sun8i_emac_update_irq(AwSun8iEmacState *s)
     qemu_set_irq(s->irq, (s->int_sta & s->int_en) != 0);
 }
 
-static uint32_t allwinner_sun8i_emac_next_desc(AwSun8iEmacState *s,
-                                               FrameDescriptor *desc,
-                                               size_t min_size)
+static bool allwinner_sun8i_emac_desc_owned(FrameDescriptor *desc,
+                                            size_t min_buf_size)
 {
-    uint32_t paddr = desc->next;
+    return (desc->status & DESC_STATUS_CTL) && (min_buf_size == 0 ||
+           (desc->status2 & DESC_STATUS2_BUF_SIZE_MASK) >= min_buf_size);
+}
 
-    dma_memory_read(&s->dma_as, paddr, desc, sizeof(*desc));
+static void allwinner_sun8i_emac_get_desc(AwSun8iEmacState *s,
+                                          FrameDescriptor *desc,
+                                          uint32_t phys_addr)
+{
+    dma_memory_read(&s->dma_as, phys_addr, desc, sizeof(*desc));
+}
 
-    if ((desc->status & DESC_STATUS_CTL) &&
-        (desc->status2 & DESC_STATUS2_BUF_SIZE_MASK) >= min_size) {
-        return paddr;
-    } else {
-        return 0;
-    }
+static uint32_t allwinner_sun8i_emac_next_desc(AwSun8iEmacState *s,
+                                               FrameDescriptor *desc)
+{
+    const uint32_t nxt = desc->next;
+    allwinner_sun8i_emac_get_desc(s, desc, nxt);
+    return nxt;
 }
 
-static uint32_t allwinner_sun8i_emac_get_desc(AwSun8iEmacState *s,
-                                              FrameDescriptor *desc,
-                                              uint32_t start_addr,
-                                              size_t min_size)
+static uint32_t allwinner_sun8i_emac_find_desc(AwSun8iEmacState *s,
+                                               FrameDescriptor *desc,
+                                               uint32_t start_addr,
+                                               size_t min_size)
 {
     uint32_t desc_addr = start_addr;
 
     /* Note that the list is a cycle. Last entry points back to the head. */
     while (desc_addr != 0) {
-        dma_memory_read(&s->dma_as, desc_addr, desc, sizeof(*desc));
+        allwinner_sun8i_emac_get_desc(s, desc, desc_addr);
 
-        if ((desc->status & DESC_STATUS_CTL) &&
-            (desc->status2 & DESC_STATUS2_BUF_SIZE_MASK) >= min_size) {
+        if (allwinner_sun8i_emac_desc_owned(desc, min_size)) {
             return desc_addr;
         } else if (desc->next == start_addr) {
             break;
@@ -383,14 +388,14 @@ static uint32_t allwinner_sun8i_emac_rx_desc(AwSun8iEmacState *s,
                                              FrameDescriptor *desc,
                                              size_t min_size)
 {
-    return allwinner_sun8i_emac_get_desc(s, desc, s->rx_desc_curr, min_size);
+    return allwinner_sun8i_emac_find_desc(s, desc, s->rx_desc_curr, min_size);
 }
 
 static uint32_t allwinner_sun8i_emac_tx_desc(AwSun8iEmacState *s,
-                                             FrameDescriptor *desc,
-                                             size_t min_size)
+                                             FrameDescriptor *desc)
 {
-    return allwinner_sun8i_emac_get_desc(s, desc, s->tx_desc_head, min_size);
+    allwinner_sun8i_emac_get_desc(s, desc, s->tx_desc_curr);
+    return s->tx_desc_curr;
 }
 
 static void allwinner_sun8i_emac_flush_desc(AwSun8iEmacState *s,
@@ -470,7 +475,8 @@ static ssize_t allwinner_sun8i_emac_receive(NetClientState *nc,
         bytes_left -= desc_bytes;
 
         /* Move to the next descriptor */
-        s->rx_desc_curr = allwinner_sun8i_emac_next_desc(s, &desc, 64);
+        s->rx_desc_curr = allwinner_sun8i_emac_find_desc(s, &desc, desc.next,
+                                                         AW_SUN8I_EMAC_MIN_PKT_SZ);
         if (!s->rx_desc_curr) {
             /* Not enough buffer space available */
             s->int_sta |= INT_STA_RX_BUF_UA;
@@ -495,10 +501,10 @@ static void allwinner_sun8i_emac_transmit(AwSun8iEmacState *s)
     size_t transmitted = 0;
     static uint8_t packet_buf[2048];
 
-    s->tx_desc_curr = allwinner_sun8i_emac_tx_desc(s, &desc, 0);
+    s->tx_desc_curr = allwinner_sun8i_emac_tx_desc(s, &desc);
 
     /* Read all transmit descriptors */
-    while (s->tx_desc_curr != 0) {
+    while (allwinner_sun8i_emac_desc_owned(&desc, 0)) {
 
         /* Read from physical memory into packet buffer */
         bytes = desc.status2 & DESC_STATUS2_BUF_SIZE_MASK;
@@ -524,7 +530,7 @@ static void allwinner_sun8i_emac_transmit(AwSun8iEmacState *s)
             packet_bytes = 0;
             transmitted++;
         }
-        s->tx_desc_curr = allwinner_sun8i_emac_next_desc(s, &desc, 0);
+        s->tx_desc_curr = allwinner_sun8i_emac_next_desc(s, &desc);
     }
 
     /* Raise transmit completed interrupt */
diff --git a/hw/timer/sse-timer.c b/hw/timer/sse-timer.c
index 8dbe6ac651..f959cb9d60 100644
--- a/hw/timer/sse-timer.c
+++ b/hw/timer/sse-timer.c
@@ -415,6 +415,7 @@ static void sse_timer_realize(DeviceState *dev, Error **errp)
 
     if (!s->counter) {
         error_setg(errp, "counter property was not set");
+        return;
     }
 
     s->counter_notifier.notify = sse_timer_counter_callback;
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index c2883a2f6c..1b23e8e18c 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -155,6 +155,7 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
                                       hwaddr virt_end)
 {
     IOMMUTLBEvent event;
+    uint64_t delta = virt_end - virt_start;
 
     if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
         return;
@@ -164,12 +165,24 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
 
     event.type = IOMMU_NOTIFIER_UNMAP;
     event.entry.target_as = &address_space_memory;
-    event.entry.addr_mask = virt_end - virt_start;
-    event.entry.iova = virt_start;
     event.entry.perm = IOMMU_NONE;
     event.entry.translated_addr = 0;
+    event.entry.addr_mask = delta;
+    event.entry.iova = virt_start;
 
-    memory_region_notify_iommu(mr, 0, event);
+    if (delta == UINT64_MAX) {
+        memory_region_notify_iommu(mr, 0, event);
+    }
+
+
+    while (virt_start != virt_end + 1) {
+        uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64);
+
+        event.entry.addr_mask = mask;
+        event.entry.iova = virt_start;
+        memory_region_notify_iommu(mr, 0, event);
+        virt_start += mask + 1;
+    }
 }
 
 static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value,
diff --git a/include/hw/arm/npcm7xx.h b/include/hw/arm/npcm7xx.h
index d32849a456..61ecc57ab9 100644
--- a/include/hw/arm/npcm7xx.h
+++ b/include/hw/arm/npcm7xx.h
@@ -18,12 +18,14 @@
 
 #include "hw/boards.h"
 #include "hw/adc/npcm7xx_adc.h"
+#include "hw/core/split-irq.h"
 #include "hw/cpu/a9mpcore.h"
 #include "hw/gpio/npcm7xx_gpio.h"
 #include "hw/i2c/npcm7xx_smbus.h"
 #include "hw/mem/npcm7xx_mc.h"
 #include "hw/misc/npcm7xx_clk.h"
 #include "hw/misc/npcm7xx_gcr.h"
+#include "hw/misc/npcm7xx_mft.h"
 #include "hw/misc/npcm7xx_pwm.h"
 #include "hw/misc/npcm7xx_rng.h"
 #include "hw/net/npcm7xx_emc.h"
@@ -47,8 +49,16 @@
 #define NPCM7XX_GIC_CPU_IF_ADDR         (0xf03fe100)  /* GIC within A9 */
 #define NPCM7XX_BOARD_SETUP_ADDR        (0xffff1000)  /* Boot ROM */
 
+#define NPCM7XX_NR_PWM_MODULES 2
+
 typedef struct NPCM7xxMachine {
     MachineState        parent;
+    /*
+     * PWM fan splitter. each splitter connects to one PWM output and
+     * multiple MFT inputs.
+     */
+    SplitIRQ            fan_splitter[NPCM7XX_NR_PWM_MODULES *
+                                     NPCM7XX_PWM_PER_MODULE];
 } NPCM7xxMachine;
 
 #define TYPE_NPCM7XX_MACHINE MACHINE_TYPE_NAME("npcm7xx")
@@ -81,7 +91,8 @@ typedef struct NPCM7xxState {
     NPCM7xxCLKState     clk;
     NPCM7xxTimerCtrlState tim[3];
     NPCM7xxADCState     adc;
-    NPCM7xxPWMState     pwm[2];
+    NPCM7xxPWMState     pwm[NPCM7XX_NR_PWM_MODULES];
+    NPCM7xxMFTState     mft[8];
     NPCM7xxOTPState     key_storage;
     NPCM7xxOTPState     fuse_array;
     NPCM7xxMCState      mc;
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
index 2b76885afd..22a8fa5d11 100644
--- a/include/hw/arm/xlnx-versal.h
+++ b/include/hw/arm/xlnx-versal.h
@@ -14,6 +14,7 @@
 
 #include "hw/sysbus.h"
 #include "hw/arm/boot.h"
+#include "hw/or-irq.h"
 #include "hw/sd/sdhci.h"
 #include "hw/intc/arm_gicv3.h"
 #include "hw/char/pl011.h"
@@ -22,6 +23,7 @@
 #include "hw/rtc/xlnx-zynqmp-rtc.h"
 #include "qom/object.h"
 #include "hw/usb/xlnx-usb-subsystem.h"
+#include "hw/misc/xlnx-versal-xramc.h"
 
 #define TYPE_XLNX_VERSAL "xlnx-versal"
 OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL)
@@ -31,6 +33,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL)
 #define XLNX_VERSAL_NR_GEMS    2
 #define XLNX_VERSAL_NR_ADMAS   8
 #define XLNX_VERSAL_NR_SDS     2
+#define XLNX_VERSAL_NR_XRAM    4
 #define XLNX_VERSAL_NR_IRQS    192
 
 struct Versal {
@@ -62,6 +65,11 @@ struct Versal {
             XlnxZDMA adma[XLNX_VERSAL_NR_ADMAS];
             VersalUsb2 usb;
         } iou;
+
+        struct {
+            qemu_or_irq irq_orgate;
+            XlnxXramCtrl ctrl[XLNX_VERSAL_NR_XRAM];
+        } xram;
     } lpd;
 
     /* The Platform Management Controller subsystem.  */
@@ -96,6 +104,7 @@ struct Versal {
 #define VERSAL_GEM1_IRQ_0          58
 #define VERSAL_GEM1_WAKE_IRQ_0     59
 #define VERSAL_ADMA_IRQ_0          60
+#define VERSAL_XRAM_IRQ_0          79
 #define VERSAL_RTC_APB_ERR_IRQ     121
 #define VERSAL_SD0_IRQ_0           126
 #define VERSAL_RTC_ALARM_IRQ       142
@@ -128,6 +137,10 @@ struct Versal {
 #define MM_OCM                      0xfffc0000U
 #define MM_OCM_SIZE                 0x40000
 
+#define MM_XRAM                     0xfe800000
+#define MM_XRAMC                    0xff8e0000
+#define MM_XRAMC_SIZE               0x10000
+
 #define MM_USB2_CTRL_REGS           0xFF9D0000
 #define MM_USB2_CTRL_REGS_SIZE      0x10000
 
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 5fda5fd128..4a90549ad8 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -128,6 +128,7 @@ typedef struct {
  * @kvm_type:
  *    Return the type of KVM corresponding to the kvm-type string option or
  *    computed based on other criteria such as the host kernel capabilities.
+ *    kvm-type may be NULL if it is not needed.
  * @numa_mem_supported:
  *    true if '--numa node.mem' option is supported and false otherwise
  * @smp_parse:
diff --git a/include/hw/misc/npcm7xx_mft.h b/include/hw/misc/npcm7xx_mft.h
new file mode 100644
index 0000000000..36785e3ba8
--- /dev/null
+++ b/include/hw/misc/npcm7xx_mft.h
@@ -0,0 +1,70 @@
+/*
+ * Nuvoton NPCM7xx MFT Module
+ *
+ * Copyright 2021 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_MFT_H
+#define NPCM7XX_MFT_H
+
+#include "exec/memory.h"
+#include "hw/clock.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+/* Max Fan input number. */
+#define NPCM7XX_MFT_MAX_FAN_INPUT 19
+
+/*
+ * Number of registers in one MFT module. Don't change this without increasing
+ * the version_id in vmstate.
+ */
+#define NPCM7XX_MFT_NR_REGS (0x20 / sizeof(uint16_t))
+
+/*
+ * The MFT can take up to 4 inputs: A0, B0, A1, B1. It can measure one A and one
+ * B simultaneously. NPCM7XX_MFT_INASEL and NPCM7XX_MFT_INBSEL are used to
+ * select which A or B input are used.
+ */
+#define NPCM7XX_MFT_FANIN_COUNT 4
+
+/**
+ * struct NPCM7xxMFTState - Multi Functional Tachometer device state.
+ * @parent: System bus device.
+ * @iomem: Memory region through which registers are accessed.
+ * @clock_in: The input clock for MFT from CLK module.
+ * @clock_{1,2}: The counter clocks for NPCM7XX_MFT_CNT{1,2}
+ * @irq: The IRQ for this MFT state.
+ * @regs: The MMIO registers.
+ * @max_rpm: The maximum rpm for fans. Order: A0, B0, A1, B1.
+ * @duty: The duty cycles for fans, relative to NPCM7XX_PWM_MAX_DUTY.
+ */
+typedef struct NPCM7xxMFTState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+
+    Clock       *clock_in;
+    Clock       *clock_1, *clock_2;
+    qemu_irq    irq;
+    uint16_t    regs[NPCM7XX_MFT_NR_REGS];
+
+    uint32_t    max_rpm[NPCM7XX_MFT_FANIN_COUNT];
+    uint32_t    duty[NPCM7XX_MFT_FANIN_COUNT];
+} NPCM7xxMFTState;
+
+#define TYPE_NPCM7XX_MFT "npcm7xx-mft"
+#define NPCM7XX_MFT(obj) \
+    OBJECT_CHECK(NPCM7xxMFTState, (obj), TYPE_NPCM7XX_MFT)
+
+#endif /* NPCM7XX_MFT_H */
diff --git a/include/hw/misc/npcm7xx_pwm.h b/include/hw/misc/npcm7xx_pwm.h
index 5a689d3f66..7ad632a93a 100644
--- a/include/hw/misc/npcm7xx_pwm.h
+++ b/include/hw/misc/npcm7xx_pwm.h
@@ -77,6 +77,7 @@ typedef struct NPCM7xxPWM {
  * @iomem: Memory region through which registers are accessed.
  * @clock: The PWM clock.
  * @pwm: The PWM channels owned by this module.
+ * @duty_gpio_out: The duty cycle of each PWM channels as a output GPIO.
  * @ppr: The prescaler register.
  * @csr: The clock selector register.
  * @pcr: The control register.
@@ -89,7 +90,8 @@ struct NPCM7xxPWMState {
     MemoryRegion iomem;
 
     Clock       *clock;
-    NPCM7xxPWM pwm[NPCM7XX_PWM_PER_MODULE];
+    NPCM7xxPWM  pwm[NPCM7XX_PWM_PER_MODULE];
+    qemu_irq    duty_gpio_out[NPCM7XX_PWM_PER_MODULE];
 
     uint32_t    ppr;
     uint32_t    csr;
diff --git a/include/hw/misc/xlnx-versal-xramc.h b/include/hw/misc/xlnx-versal-xramc.h
new file mode 100644
index 0000000000..d3d1862676
--- /dev/null
+++ b/include/hw/misc/xlnx-versal-xramc.h
@@ -0,0 +1,97 @@
+/*
+ * QEMU model of the Xilinx XRAM Controller.
+ *
+ * Copyright (c) 2021 Xilinx Inc.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com>
+ */
+
+#ifndef XLNX_VERSAL_XRAMC_H
+#define XLNX_VERSAL_XRAMC_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+
+#define TYPE_XLNX_XRAM_CTRL "xlnx.versal-xramc"
+
+#define XLNX_XRAM_CTRL(obj) \
+     OBJECT_CHECK(XlnxXramCtrl, (obj), TYPE_XLNX_XRAM_CTRL)
+
+REG32(XRAM_ERR_CTRL, 0x0)
+    FIELD(XRAM_ERR_CTRL, UE_RES, 3, 1)
+    FIELD(XRAM_ERR_CTRL, PWR_ERR_RES, 2, 1)
+    FIELD(XRAM_ERR_CTRL, PZ_ERR_RES, 1, 1)
+    FIELD(XRAM_ERR_CTRL, APB_ERR_RES, 0, 1)
+REG32(XRAM_ISR, 0x4)
+    FIELD(XRAM_ISR, INV_APB, 0, 1)
+REG32(XRAM_IMR, 0x8)
+    FIELD(XRAM_IMR, INV_APB, 0, 1)
+REG32(XRAM_IEN, 0xc)
+    FIELD(XRAM_IEN, INV_APB, 0, 1)
+REG32(XRAM_IDS, 0x10)
+    FIELD(XRAM_IDS, INV_APB, 0, 1)
+REG32(XRAM_ECC_CNTL, 0x14)
+    FIELD(XRAM_ECC_CNTL, FI_MODE, 2, 1)
+    FIELD(XRAM_ECC_CNTL, DET_ONLY, 1, 1)
+    FIELD(XRAM_ECC_CNTL, ECC_ON_OFF, 0, 1)
+REG32(XRAM_CLR_EXE, 0x18)
+    FIELD(XRAM_CLR_EXE, MON_7, 7, 1)
+    FIELD(XRAM_CLR_EXE, MON_6, 6, 1)
+    FIELD(XRAM_CLR_EXE, MON_5, 5, 1)
+    FIELD(XRAM_CLR_EXE, MON_4, 4, 1)
+    FIELD(XRAM_CLR_EXE, MON_3, 3, 1)
+    FIELD(XRAM_CLR_EXE, MON_2, 2, 1)
+    FIELD(XRAM_CLR_EXE, MON_1, 1, 1)
+    FIELD(XRAM_CLR_EXE, MON_0, 0, 1)
+REG32(XRAM_CE_FFA, 0x1c)
+    FIELD(XRAM_CE_FFA, ADDR, 0, 20)
+REG32(XRAM_CE_FFD0, 0x20)
+REG32(XRAM_CE_FFD1, 0x24)
+REG32(XRAM_CE_FFD2, 0x28)
+REG32(XRAM_CE_FFD3, 0x2c)
+REG32(XRAM_CE_FFE, 0x30)
+    FIELD(XRAM_CE_FFE, SYNDROME, 0, 16)
+REG32(XRAM_UE_FFA, 0x34)
+    FIELD(XRAM_UE_FFA, ADDR, 0, 20)
+REG32(XRAM_UE_FFD0, 0x38)
+REG32(XRAM_UE_FFD1, 0x3c)
+REG32(XRAM_UE_FFD2, 0x40)
+REG32(XRAM_UE_FFD3, 0x44)
+REG32(XRAM_UE_FFE, 0x48)
+    FIELD(XRAM_UE_FFE, SYNDROME, 0, 16)
+REG32(XRAM_FI_D0, 0x4c)
+REG32(XRAM_FI_D1, 0x50)
+REG32(XRAM_FI_D2, 0x54)
+REG32(XRAM_FI_D3, 0x58)
+REG32(XRAM_FI_SY, 0x5c)
+    FIELD(XRAM_FI_SY, DATA, 0, 16)
+REG32(XRAM_RMW_UE_FFA, 0x70)
+    FIELD(XRAM_RMW_UE_FFA, ADDR, 0, 20)
+REG32(XRAM_FI_CNTR, 0x74)
+    FIELD(XRAM_FI_CNTR, COUNT, 0, 24)
+REG32(XRAM_IMP, 0x80)
+    FIELD(XRAM_IMP, SIZE, 0, 4)
+REG32(XRAM_PRDY_DBG, 0x84)
+    FIELD(XRAM_PRDY_DBG, ISLAND3, 12, 4)
+    FIELD(XRAM_PRDY_DBG, ISLAND2, 8, 4)
+    FIELD(XRAM_PRDY_DBG, ISLAND1, 4, 4)
+    FIELD(XRAM_PRDY_DBG, ISLAND0, 0, 4)
+REG32(XRAM_SAFETY_CHK, 0xff8)
+
+#define XRAM_CTRL_R_MAX (R_XRAM_SAFETY_CHK + 1)
+
+typedef struct XlnxXramCtrl {
+    SysBusDevice parent_obj;
+    MemoryRegion ram;
+    qemu_irq irq;
+
+    struct {
+        uint64_t size;
+        unsigned int encoded_size;
+    } cfg;
+
+    RegisterInfoArray *reg_array;
+    uint32_t regs[XRAM_CTRL_R_MAX];
+    RegisterInfo regs_info[XRAM_CTRL_R_MAX];
+} XlnxXramCtrl;
+#endif
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index a052f7bca3..3201e7901d 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -296,4 +296,16 @@ uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg);
 void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
                     QEMUSGList *sg, enum BlockAcctType type);
 
+/**
+ * dma_aligned_pow2_mask: Return the address bit mask of the largest
+ * power of 2 size less or equal than @end - @start + 1, aligned with @start,
+ * and bounded by 1 << @max_addr_bits bits.
+ *
+ * @start: range start address
+ * @end: range end address (greater than @start)
+ * @max_addr_bits: max address bits (<= 64)
+ */
+uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end,
+                               int max_addr_bits);
+
 #endif
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 140a971632..c6731013fd 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2209,9 +2209,8 @@ static uintptr_t pgd_find_hole_fallback(uintptr_t guest_size, uintptr_t brk,
             void * mmap_start = mmap((void *) align_start, guest_size,
                                      PROT_NONE, flags, -1, 0);
             if (mmap_start != MAP_FAILED) {
-                munmap((void *) align_start, guest_size);
-                if (MAP_FIXED_NOREPLACE != 0 ||
-                    mmap_start == (void *) align_start) {
+                munmap(mmap_start, guest_size);
+                if (mmap_start == (void *) align_start) {
                     return (uintptr_t) mmap_start + offset;
                 }
             }
@@ -2236,7 +2235,8 @@ static uintptr_t pgb_find_hole(uintptr_t guest_loaddr, uintptr_t guest_size,
     brk = (uintptr_t)sbrk(0);
 
     if (!maps) {
-        return pgd_find_hole_fallback(guest_size, brk, align, offset);
+        ret = pgd_find_hole_fallback(guest_size, brk, align, offset);
+        return ret == -1 ? -1 : ret - guest_loaddr;
     }
 
     /* The first hole is before the first map entry. */
diff --git a/linux-user/main.c b/linux-user/main.c
index 4f4746dce8..f956afccab 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -26,6 +26,7 @@
 #include <sys/syscall.h>
 #include <sys/resource.h>
 #include <sys/shm.h>
+#include <linux/binfmts.h>
 
 #include "qapi/error.h"
 #include "qemu.h"
@@ -49,6 +50,11 @@
 #include "cpu_loop-common.h"
 #include "crypto/init.h"
 
+#ifndef AT_FLAGS_PRESERVE_ARGV0
+#define AT_FLAGS_PRESERVE_ARGV0_BIT 0
+#define AT_FLAGS_PRESERVE_ARGV0 (1 << AT_FLAGS_PRESERVE_ARGV0_BIT)
+#endif
+
 char *exec_path;
 
 int singlestep;
@@ -632,6 +638,7 @@ int main(int argc, char **argv, char **envp)
     int execfd;
     int log_mask;
     unsigned long max_reserved_va;
+    bool preserve_argv0;
 
     error_init(argv[0]);
     module_call_init(MODULE_INIT_TRACE);
@@ -688,6 +695,9 @@ int main(int argc, char **argv, char **envp)
 
     init_qemu_uname_release();
 
+    /*
+     * Manage binfmt-misc open-binary flag
+     */
     execfd = qemu_getauxval(AT_EXECFD);
     if (execfd == 0) {
         execfd = open(exec_path, O_RDONLY);
@@ -697,6 +707,20 @@ int main(int argc, char **argv, char **envp)
         }
     }
 
+    /*
+     * get binfmt_misc flags
+     */
+    preserve_argv0 = !!(qemu_getauxval(AT_FLAGS) & AT_FLAGS_PRESERVE_ARGV0);
+
+    /*
+     * Manage binfmt-misc preserve-arg[0] flag
+     *    argv[optind]     full path to the binary
+     *    argv[optind + 1] original argv[0]
+     */
+    if (optind + 1 < argc && preserve_argv0) {
+        optind++;
+    }
+
     if (cpu_model == NULL) {
         cpu_model = cpu_get_model(get_elf_eflags(execfd));
     }
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9522f603aa..1e508576c7 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -7890,9 +7890,9 @@ static int open_self_maps(void *cpu_env, int fd)
             count = dprintf(fd, TARGET_ABI_FMT_ptr "-" TARGET_ABI_FMT_ptr
                             " %c%c%c%c %08" PRIx64 " %s %"PRId64,
                             h2g(min), h2g(max - 1) + 1,
-                            e->is_read ? 'r' : '-',
-                            e->is_write ? 'w' : '-',
-                            e->is_exec ? 'x' : '-',
+                            (flags & PAGE_READ) ? 'r' : '-',
+                            (flags & PAGE_WRITE_ORG) ? 'w' : '-',
+                            (flags & PAGE_EXEC) ? 'x' : '-',
                             e->is_priv ? 'p' : '-',
                             (uint64_t) e->offset, e->dev, e->inode);
             if (path) {
diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
index 7b5d54b887..573b5dc6ac 100755
--- a/scripts/qemu-binfmt-conf.sh
+++ b/scripts/qemu-binfmt-conf.sh
@@ -178,25 +178,27 @@ usage() {
 Usage: qemu-binfmt-conf.sh [--qemu-path PATH][--debian][--systemd CPU]
                            [--help][--credential yes|no][--exportdir PATH]
                            [--persistent yes|no][--qemu-suffix SUFFIX]
+                           [--preserve-argv0 yes|no]
 
        Configure binfmt_misc to use qemu interpreter
 
-       --help:        display this usage
-       --qemu-path:   set path to qemu interpreter ($QEMU_PATH)
-       --qemu-suffix: add a suffix to the default interpreter name
-       --debian:      don't write into /proc,
-                      instead generate update-binfmts templates
-       --systemd:     don't write into /proc,
-                      instead generate file for systemd-binfmt.service
-                      for the given CPU. If CPU is "ALL", generate a
-                      file for all known cpus
-       --exportdir:   define where to write configuration files
-                      (default: $SYSTEMDDIR or $DEBIANDIR)
-       --credential:  if yes, credential and security tokens are
-                      calculated according to the binary to interpret
-       --persistent:  if yes, the interpreter is loaded when binfmt is
-                      configured and remains in memory. All future uses
-                      are cloned from the open file.
+       --help:          display this usage
+       --qemu-path:     set path to qemu interpreter ($QEMU_PATH)
+       --qemu-suffix:   add a suffix to the default interpreter name
+       --debian:        don't write into /proc,
+                        instead generate update-binfmts templates
+       --systemd:       don't write into /proc,
+                        instead generate file for systemd-binfmt.service
+                        for the given CPU. If CPU is "ALL", generate a
+                        file for all known cpus
+       --exportdir:     define where to write configuration files
+                        (default: $SYSTEMDDIR or $DEBIANDIR)
+       --credential:    if yes, credential and security tokens are
+                        calculated according to the binary to interpret
+       --persistent:    if yes, the interpreter is loaded when binfmt is
+                        configured and remains in memory. All future uses
+                        are cloned from the open file.
+       --preserve-argv0 preserve argv[0]
 
     To import templates with update-binfmts, use :
 
@@ -269,6 +271,9 @@ qemu_generate_register() {
     if [ "$PERSISTENT" = "yes" ] ; then
         flags="${flags}F"
     fi
+    if [ "$PRESERVE_ARG0" = "yes" ] ; then
+        flags="${flags}P"
+    fi
 
     echo ":qemu-$cpu:M::$magic:$mask:$qemu:$flags"
 }
@@ -330,9 +335,10 @@ DEBIANDIR="/usr/share/binfmts"
 QEMU_PATH=/usr/local/bin
 CREDENTIAL=no
 PERSISTENT=no
+PRESERVE_ARG0=no
 QEMU_SUFFIX=""
 
-options=$(getopt -o ds:Q:S:e:hc:p: -l debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent: -- "$@")
+options=$(getopt -o ds:Q:S:e:hc:p:g: -l debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent:,preserve-argv0: -- "$@")
 eval set -- "$options"
 
 while true ; do
@@ -388,6 +394,10 @@ while true ; do
         shift
         PERSISTENT="$1"
         ;;
+    -g|--preserve-argv0)
+        shift
+        PRESERVE_ARG0="$1"
+        ;;
     *)
         break
         ;;
diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
index 29001b5459..7d766a5e89 100644
--- a/softmmu/dma-helpers.c
+++ b/softmmu/dma-helpers.c
@@ -330,3 +330,29 @@ void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
 {
     block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
 }
+
+uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
+{
+    uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
+    uint64_t alignment_mask, size_mask;
+
+    if (max_addr_bits != 64) {
+        max_mask = (1ULL << max_addr_bits) - 1;
+    }
+
+    alignment_mask = start ? (start & -start) - 1 : max_mask;
+    alignment_mask = MIN(alignment_mask, max_mask);
+    size_mask = MIN(addr_mask, max_mask);
+
+    if (alignment_mask <= size_mask) {
+        /* Increase the alignment of start */
+        return alignment_mask;
+    } else {
+        /* Find the largest page mask from size */
+        if (addr_mask == UINT64_MAX) {
+            return UINT64_MAX;
+        }
+        return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
+    }
+}
+
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index bebea90122..d8381ba224 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -230,12 +230,14 @@ bool kvm_arm_pmu_supported(void)
     return kvm_check_extension(kvm_state, KVM_CAP_ARM_PMU_V3);
 }
 
-int kvm_arm_get_max_vm_ipa_size(MachineState *ms)
+int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa)
 {
     KVMState *s = KVM_STATE(ms->accelerator);
     int ret;
 
     ret = kvm_check_extension(s, KVM_CAP_ARM_VM_IPA_SIZE);
+    *fixed_ipa = ret <= 0;
+
     return ret > 0 ? ret : 40;
 }
 
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 68ec970c4f..34f8daa377 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -311,10 +311,12 @@ bool kvm_arm_sve_supported(void);
 /**
  * kvm_arm_get_max_vm_ipa_size:
  * @ms: Machine state handle
+ * @fixed_ipa: True when the IPA limit is fixed at 40. This is the case
+ * for legacy KVM.
  *
  * Returns the number of bits in the IPA address space supported by KVM
  */
-int kvm_arm_get_max_vm_ipa_size(MachineState *ms);
+int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa);
 
 /**
  * kvm_arm_sync_mpstate_to_kvm:
@@ -409,7 +411,7 @@ static inline void kvm_arm_add_vcpu_properties(Object *obj)
     g_assert_not_reached();
 }
 
-static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms)
+static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa)
 {
     g_assert_not_reached();
 }
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 844db08bd5..fd6c58f96a 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -1871,6 +1871,7 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
     intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     int esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
     intptr_t high = FIELD_EX32(pred_desc, PREDDESC, DATA);
+    int esize = 1 << esz;
     uint64_t *d = vd;
     intptr_t i;
 
@@ -1883,33 +1884,35 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
         mm = extract64(mm, high * half, half);
         nn = expand_bits(nn, esz);
         mm = expand_bits(mm, esz);
-        d[0] = nn + (mm << (1 << esz));
+        d[0] = nn | (mm << esize);
     } else {
-        ARMPredicateReg tmp_n, tmp_m;
+        ARMPredicateReg tmp;
 
         /* We produce output faster than we consume input.
            Therefore we must be mindful of possible overlap.  */
-        if ((vn - vd) < (uintptr_t)oprsz) {
-            vn = memcpy(&tmp_n, vn, oprsz);
-        }
-        if ((vm - vd) < (uintptr_t)oprsz) {
-            vm = memcpy(&tmp_m, vm, oprsz);
+        if (vd == vn) {
+            vn = memcpy(&tmp, vn, oprsz);
+            if (vd == vm) {
+                vm = vn;
+            }
+        } else if (vd == vm) {
+            vm = memcpy(&tmp, vm, oprsz);
         }
         if (high) {
             high = oprsz >> 1;
         }
 
-        if ((high & 3) == 0) {
+        if ((oprsz & 7) == 0) {
             uint32_t *n = vn, *m = vm;
             high >>= 2;
 
-            for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+            for (i = 0; i < oprsz / 8; i++) {
                 uint64_t nn = n[H4(high + i)];
                 uint64_t mm = m[H4(high + i)];
 
                 nn = expand_bits(nn, esz);
                 mm = expand_bits(mm, esz);
-                d[i] = nn + (mm << (1 << esz));
+                d[i] = nn | (mm << esize);
             }
         } else {
             uint8_t *n = vn, *m = vm;
@@ -1921,7 +1924,7 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
 
                 nn = expand_bits(nn, esz);
                 mm = expand_bits(mm, esz);
-                d16[H2(i)] = nn + (mm << (1 << esz));
+                d16[H2(i)] = nn | (mm << esize);
             }
         }
     }
@@ -1939,7 +1942,7 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
     if (oprsz <= 8) {
         l = compress_bits(n[0] >> odd, esz);
         h = compress_bits(m[0] >> odd, esz);
-        d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz);
+        d[0] = l | (h << (4 * oprsz));
     } else {
         ARMPredicateReg tmp_m;
         intptr_t oprsz_16 = oprsz / 16;
@@ -1953,23 +1956,35 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
             h = n[2 * i + 1];
             l = compress_bits(l >> odd, esz);
             h = compress_bits(h >> odd, esz);
-            d[i] = l + (h << 32);
+            d[i] = l | (h << 32);
         }
 
-        /* For VL which is not a power of 2, the results from M do not
-           align nicely with the uint64_t for D.  Put the aligned results
-           from M into TMP_M and then copy it into place afterward.  */
+        /*
+         * For VL which is not a multiple of 512, the results from M do not
+         * align nicely with the uint64_t for D.  Put the aligned results
+         * from M into TMP_M and then copy it into place afterward.
+         */
         if (oprsz & 15) {
-            d[i] = compress_bits(n[2 * i] >> odd, esz);
+            int final_shift = (oprsz & 15) * 2;
+
+            l = n[2 * i + 0];
+            h = n[2 * i + 1];
+            l = compress_bits(l >> odd, esz);
+            h = compress_bits(h >> odd, esz);
+            d[i] = l | (h << final_shift);
 
             for (i = 0; i < oprsz_16; i++) {
                 l = m[2 * i + 0];
                 h = m[2 * i + 1];
                 l = compress_bits(l >> odd, esz);
                 h = compress_bits(h >> odd, esz);
-                tmp_m.p[i] = l + (h << 32);
+                tmp_m.p[i] = l | (h << 32);
             }
-            tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz);
+            l = m[2 * i + 0];
+            h = m[2 * i + 1];
+            l = compress_bits(l >> odd, esz);
+            h = compress_bits(h >> odd, esz);
+            tmp_m.p[i] = l | (h << final_shift);
 
             swap_memmove(vd + oprsz / 2, &tmp_m, oprsz / 2);
         } else {
@@ -1978,7 +1993,7 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
                 h = m[2 * i + 1];
                 l = compress_bits(l >> odd, esz);
                 h = compress_bits(h >> odd, esz);
-                d[oprsz_16 + i] = l + (h << 32);
+                d[oprsz_16 + i] = l | (h << 32);
             }
         }
     }
@@ -2090,11 +2105,11 @@ void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc)
             high = oprsz >> 1;
         }
 
-        if ((high & 3) == 0) {
+        if ((oprsz & 7) == 0) {
             uint32_t *n = vn;
             high >>= 2;
 
-            for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+            for (i = 0; i < oprsz / 8; i++) {
                 uint64_t nn = n[H4(high + i)];
                 d[i] = expand_bits(nn, 0);
             }
@@ -2222,10 +2237,10 @@ void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc)
  */
 int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
-    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
+    intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
 
-    return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz);
+    return last_active_element(vg, words, esz);
 }
 
 void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)
@@ -2695,7 +2710,7 @@ static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz)
 void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg,
                        uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     if (last_active_pred(vn, vg, oprsz)) {
         compute_brk_z(vd, vm, vg, oprsz, true);
     } else {
@@ -2706,7 +2721,7 @@ void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg,
 uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg,
                             uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     if (last_active_pred(vn, vg, oprsz)) {
         return compute_brks_z(vd, vm, vg, oprsz, true);
     } else {
@@ -2717,7 +2732,7 @@ uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg,
 void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg,
                        uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     if (last_active_pred(vn, vg, oprsz)) {
         compute_brk_z(vd, vm, vg, oprsz, false);
     } else {
@@ -2728,7 +2743,7 @@ void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg,
 uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg,
                             uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     if (last_active_pred(vn, vg, oprsz)) {
         return compute_brks_z(vd, vm, vg, oprsz, false);
     } else {
@@ -2738,56 +2753,55 @@ uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg,
 
 void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     compute_brk_z(vd, vn, vg, oprsz, true);
 }
 
 uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     return compute_brks_z(vd, vn, vg, oprsz, true);
 }
 
 void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     compute_brk_z(vd, vn, vg, oprsz, false);
 }
 
 uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     return compute_brks_z(vd, vn, vg, oprsz, false);
 }
 
 void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     compute_brk_m(vd, vn, vg, oprsz, true);
 }
 
 uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     return compute_brks_m(vd, vn, vg, oprsz, true);
 }
 
 void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     compute_brk_m(vd, vn, vg, oprsz, false);
 }
 
 uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     return compute_brks_m(vd, vn, vg, oprsz, false);
 }
 
 void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
-
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     if (!last_active_pred(vn, vg, oprsz)) {
         do_zero(vd, oprsz);
     }
@@ -2812,8 +2826,7 @@ static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz,
 
 uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
-
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     if (last_active_pred(vn, vg, oprsz)) {
         return predtest_ones(vd, oprsz, -1);
     } else {
@@ -2823,12 +2836,12 @@ uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
 
 uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
 {
-    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
-    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
+    intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
     uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz];
     intptr_t i;
 
-    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+    for (i = 0; i < words; ++i) {
         uint64_t t = n[i] & g[i] & mask;
         sum += ctpop64(t);
     }
@@ -2837,8 +2850,8 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
 
 uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
 {
-    uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
-    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+    intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
     uint64_t esz_mask = pred_esz_masks[esz];
     ARMPredicateReg *d = vd;
     uint32_t flags;
@@ -2883,7 +2896,7 @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
 }                                                                     \
 uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc)    \
 {                                                                     \
-    uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc);  \
+    uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc);   \
     TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)];                   \
     for (i = 0; i < oprsz; ) {                                        \
         uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));               \
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 27402af23c..0eefb61214 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -2302,11 +2302,10 @@ static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
      */
     TCGv_ptr t_p = tcg_temp_new_ptr();
     TCGv_i32 t_desc;
-    unsigned vsz = pred_full_reg_size(s);
-    unsigned desc;
+    unsigned desc = 0;
 
-    desc = vsz - 2;
-    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
+    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
+    desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
 
     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
     t_desc = tcg_const_i32(desc);
@@ -2851,7 +2850,7 @@ static bool do_brk3(DisasContext *s, arg_rprr_s *a,
     TCGv_ptr n = tcg_temp_new_ptr();
     TCGv_ptr m = tcg_temp_new_ptr();
     TCGv_ptr g = tcg_temp_new_ptr();
-    TCGv_i32 t = tcg_const_i32(vsz - 2);
+    TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
 
     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
@@ -2885,7 +2884,7 @@ static bool do_brk2(DisasContext *s, arg_rpr_s *a,
     TCGv_ptr d = tcg_temp_new_ptr();
     TCGv_ptr n = tcg_temp_new_ptr();
     TCGv_ptr g = tcg_temp_new_ptr();
-    TCGv_i32 t = tcg_const_i32(vsz - 2);
+    TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
 
     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
@@ -2968,11 +2967,11 @@ static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
     } else {
         TCGv_ptr t_pn = tcg_temp_new_ptr();
         TCGv_ptr t_pg = tcg_temp_new_ptr();
-        unsigned desc;
+        unsigned desc = 0;
         TCGv_i32 t_desc;
 
-        desc = psz - 2;
-        desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
+        desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
+        desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
 
         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
@@ -3098,7 +3097,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
     TCGv_i64 op0, op1, t0, t1, tmax;
     TCGv_i32 t2, t3;
     TCGv_ptr ptr;
-    unsigned desc, vsz = vec_full_reg_size(s);
+    unsigned vsz = vec_full_reg_size(s);
+    unsigned desc = 0;
     TCGCond cond;
 
     if (!sve_access_check(s)) {
@@ -3162,8 +3162,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
     /* Scale elements to bits.  */
     tcg_gen_shli_i32(t2, t2, a->esz);
 
-    desc = (vsz / 8) - 2;
-    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
+    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
+    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
     t3 = tcg_const_i32(desc);
 
     ptr = tcg_temp_new_ptr();
@@ -3440,7 +3440,7 @@ static void do_reduce(DisasContext *s, arg_rpr_esz *a,
 {
     unsigned vsz = vec_full_reg_size(s);
     unsigned p2vsz = pow2ceil(vsz);
-    TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
+    TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
     TCGv_ptr t_zn, t_pg, status;
     TCGv_i64 temp;
 
diff --git a/target/mips/meson.build b/target/mips/meson.build
index 9741545440..3b131c4a7f 100644
--- a/target/mips/meson.build
+++ b/target/mips/meson.build
@@ -3,15 +3,17 @@ gen = [
   decodetree.process('mips64r6.decode', extra_args: '--static-decode=decode_mips64r6'),
   decodetree.process('msa32.decode', extra_args: '--static-decode=decode_msa32'),
   decodetree.process('msa64.decode', extra_args: '--static-decode=decode_msa64'),
+  decodetree.process('tx79.decode', extra_args: '--static-decode=decode_tx79'),
 ]
 
 mips_ss = ss.source_set()
-mips_ss.add(gen)
 mips_ss.add(files(
   'cpu.c',
   'gdbstub.c',
 ))
-mips_ss.add(when: 'CONFIG_TCG', if_true: files(
+mips_tcg_ss = ss.source_set()
+mips_tcg_ss.add(gen)
+mips_tcg_ss.add(files(
   'dsp_helper.c',
   'fpu_helper.c',
   'lmmi_helper.c',
@@ -22,7 +24,15 @@ mips_ss.add(when: 'CONFIG_TCG', if_true: files(
   'tlb_helper.c',
   'translate.c',
   'translate_addr_const.c',
+  'txx9_translate.c',
 ))
+mips_ss.add(when: ['CONFIG_TCG', 'TARGET_MIPS64'], if_true: files(
+  'tx79_translate.c',
+))
+mips_tcg_ss.add(when: 'TARGET_MIPS64', if_false: files(
+  'mxu_translate.c',
+))
+
 mips_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'))
 
 mips_softmmu_ss = ss.source_set()
@@ -30,11 +40,13 @@ mips_softmmu_ss.add(files(
   'addr.c',
   'cp0_timer.c',
   'machine.c',
-  'mips-semi.c',
 ))
 mips_softmmu_ss.add(when: 'CONFIG_TCG', if_true: files(
   'cp0_helper.c',
+  'mips-semi.c',
 ))
 
+mips_ss.add_all(when: 'CONFIG_TCG', if_true: [mips_tcg_ss])
+
 target_arch += {'mips': mips_ss}
 target_softmmu_arch += {'mips': mips_softmmu_ss}
diff --git a/target/mips/mxu_translate.c b/target/mips/mxu_translate.c
new file mode 100644
index 0000000000..afc008eeee
--- /dev/null
+++ b/target/mips/mxu_translate.c
@@ -0,0 +1,1609 @@
+/*
+ *  Ingenic XBurst Media eXtension Unit (MXU) translation routines.
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *  Copyright (c) 2006 Marius Groeger (FPU operations)
+ *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
+ *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
+ *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * Datasheet:
+ *
+ *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
+ *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg-op.h"
+#include "exec/helper-gen.h"
+#include "translate.h"
+
+/*
+ *
+ *       AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
+ *       ============================================
+ *
+ *
+ * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32
+ * instructions set. It is designed to fit the needs of signal, graphical and
+ * video processing applications. MXU instruction set is used in Xburst family
+ * of microprocessors by Ingenic.
+ *
+ * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
+ * the control register.
+ *
+ *
+ *     The notation used in MXU assembler mnemonics
+ *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  Register operands:
+ *
+ *   XRa, XRb, XRc, XRd - MXU registers
+ *   Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
+ *
+ *  Non-register operands:
+ *
+ *   aptn1 - 1-bit accumulate add/subtract pattern
+ *   aptn2 - 2-bit accumulate add/subtract pattern
+ *   eptn2 - 2-bit execute add/subtract pattern
+ *   optn2 - 2-bit operand pattern
+ *   optn3 - 3-bit operand pattern
+ *   sft4  - 4-bit shift amount
+ *   strd2 - 2-bit stride amount
+ *
+ *  Prefixes:
+ *
+ *   Level of parallelism:                Operand size:
+ *    S - single operation at a time       32 - word
+ *    D - two operations in parallel       16 - half word
+ *    Q - four operations in parallel       8 - byte
+ *
+ *  Operations:
+ *
+ *   ADD   - Add or subtract
+ *   ADDC  - Add with carry-in
+ *   ACC   - Accumulate
+ *   ASUM  - Sum together then accumulate (add or subtract)
+ *   ASUMC - Sum together then accumulate (add or subtract) with carry-in
+ *   AVG   - Average between 2 operands
+ *   ABD   - Absolute difference
+ *   ALN   - Align data
+ *   AND   - Logical bitwise 'and' operation
+ *   CPS   - Copy sign
+ *   EXTR  - Extract bits
+ *   I2M   - Move from GPR register to MXU register
+ *   LDD   - Load data from memory to XRF
+ *   LDI   - Load data from memory to XRF (and increase the address base)
+ *   LUI   - Load unsigned immediate
+ *   MUL   - Multiply
+ *   MULU  - Unsigned multiply
+ *   MADD  - 64-bit operand add 32x32 product
+ *   MSUB  - 64-bit operand subtract 32x32 product
+ *   MAC   - Multiply and accumulate (add or subtract)
+ *   MAD   - Multiply and add or subtract
+ *   MAX   - Maximum between 2 operands
+ *   MIN   - Minimum between 2 operands
+ *   M2I   - Move from MXU register to GPR register
+ *   MOVZ  - Move if zero
+ *   MOVN  - Move if non-zero
+ *   NOR   - Logical bitwise 'nor' operation
+ *   OR    - Logical bitwise 'or' operation
+ *   STD   - Store data from XRF to memory
+ *   SDI   - Store data from XRF to memory (and increase the address base)
+ *   SLT   - Set of less than comparison
+ *   SAD   - Sum of absolute differences
+ *   SLL   - Logical shift left
+ *   SLR   - Logical shift right
+ *   SAR   - Arithmetic shift right
+ *   SAT   - Saturation
+ *   SFL   - Shuffle
+ *   SCOP  - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0)
+ *   XOR   - Logical bitwise 'exclusive or' operation
+ *
+ *  Suffixes:
+ *
+ *   E - Expand results
+ *   F - Fixed point multiplication
+ *   L - Low part result
+ *   R - Doing rounding
+ *   V - Variable instead of immediate
+ *   W - Combine above L and V
+ *
+ *
+ *     The list of MXU instructions grouped by functionality
+ *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Load/Store instructions           Multiplication instructions
+ * -----------------------           ---------------------------
+ *
+ *  S32LDD XRa, Rb, s12               S32MADD XRa, XRd, Rs, Rt
+ *  S32STD XRa, Rb, s12               S32MADDU XRa, XRd, Rs, Rt
+ *  S32LDDV XRa, Rb, rc, strd2        S32MSUB XRa, XRd, Rs, Rt
+ *  S32STDV XRa, Rb, rc, strd2        S32MSUBU XRa, XRd, Rs, Rt
+ *  S32LDI XRa, Rb, s12               S32MUL XRa, XRd, Rs, Rt
+ *  S32SDI XRa, Rb, s12               S32MULU XRa, XRd, Rs, Rt
+ *  S32LDIV XRa, Rb, rc, strd2        D16MUL XRa, XRb, XRc, XRd, optn2
+ *  S32SDIV XRa, Rb, rc, strd2        D16MULE XRa, XRb, XRc, optn2
+ *  S32LDDR XRa, Rb, s12              D16MULF XRa, XRb, XRc, optn2
+ *  S32STDR XRa, Rb, s12              D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
+ *  S32LDDVR XRa, Rb, rc, strd2       D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
+ *  S32STDVR XRa, Rb, rc, strd2       D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
+ *  S32LDIR XRa, Rb, s12              D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
+ *  S32SDIR XRa, Rb, s12              S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
+ *  S32LDIVR XRa, Rb, rc, strd2       Q8MUL XRa, XRb, XRc, XRd
+ *  S32SDIVR XRa, Rb, rc, strd2       Q8MULSU XRa, XRb, XRc, XRd
+ *  S16LDD XRa, Rb, s10, eptn2        Q8MAC XRa, XRb, XRc, XRd, aptn2
+ *  S16STD XRa, Rb, s10, eptn2        Q8MACSU XRa, XRb, XRc, XRd, aptn2
+ *  S16LDI XRa, Rb, s10, eptn2        Q8MADL XRa, XRb, XRc, XRd, aptn2
+ *  S16SDI XRa, Rb, s10, eptn2
+ *  S8LDD XRa, Rb, s8, eptn3
+ *  S8STD XRa, Rb, s8, eptn3         Addition and subtraction instructions
+ *  S8LDI XRa, Rb, s8, eptn3         -------------------------------------
+ *  S8SDI XRa, Rb, s8, eptn3
+ *  LXW Rd, Rs, Rt, strd2             D32ADD XRa, XRb, XRc, XRd, eptn2
+ *  LXH Rd, Rs, Rt, strd2             D32ADDC XRa, XRb, XRc, XRd
+ *  LXHU Rd, Rs, Rt, strd2            D32ACC XRa, XRb, XRc, XRd, eptn2
+ *  LXB Rd, Rs, Rt, strd2             D32ACCM XRa, XRb, XRc, XRd, eptn2
+ *  LXBU Rd, Rs, Rt, strd2            D32ASUM XRa, XRb, XRc, XRd, eptn2
+ *                                    S32CPS XRa, XRb, XRc
+ *                                    Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
+ * Comparison instructions            Q16ACC XRa, XRb, XRc, XRd, eptn2
+ * -----------------------            Q16ACCM XRa, XRb, XRc, XRd, eptn2
+ *                                    D16ASUM XRa, XRb, XRc, XRd, eptn2
+ *  S32MAX XRa, XRb, XRc              D16CPS XRa, XRb,
+ *  S32MIN XRa, XRb, XRc              D16AVG XRa, XRb, XRc
+ *  S32SLT XRa, XRb, XRc              D16AVGR XRa, XRb, XRc
+ *  S32MOVZ XRa, XRb, XRc             Q8ADD XRa, XRb, XRc, eptn2
+ *  S32MOVN XRa, XRb, XRc             Q8ADDE XRa, XRb, XRc, XRd, eptn2
+ *  D16MAX XRa, XRb, XRc              Q8ACCE XRa, XRb, XRc, XRd, eptn2
+ *  D16MIN XRa, XRb, XRc              Q8ABD XRa, XRb, XRc
+ *  D16SLT XRa, XRb, XRc              Q8SAD XRa, XRb, XRc, XRd
+ *  D16MOVZ XRa, XRb, XRc             Q8AVG XRa, XRb, XRc
+ *  D16MOVN XRa, XRb, XRc             Q8AVGR XRa, XRb, XRc
+ *  Q8MAX XRa, XRb, XRc               D8SUM XRa, XRb, XRc, XRd
+ *  Q8MIN XRa, XRb, XRc               D8SUMC XRa, XRb, XRc, XRd
+ *  Q8SLT XRa, XRb, XRc
+ *  Q8SLTU XRa, XRb, XRc
+ *  Q8MOVZ XRa, XRb, XRc             Shift instructions
+ *  Q8MOVN XRa, XRb, XRc             ------------------
+ *
+ *                                    D32SLL XRa, XRb, XRc, XRd, sft4
+ * Bitwise instructions               D32SLR XRa, XRb, XRc, XRd, sft4
+ * --------------------               D32SAR XRa, XRb, XRc, XRd, sft4
+ *                                    D32SARL XRa, XRb, XRc, sft4
+ *  S32NOR XRa, XRb, XRc              D32SLLV XRa, XRb, Rb
+ *  S32AND XRa, XRb, XRc              D32SLRV XRa, XRb, Rb
+ *  S32XOR XRa, XRb, XRc              D32SARV XRa, XRb, Rb
+ *  S32OR XRa, XRb, XRc               D32SARW XRa, XRb, XRc, Rb
+ *                                    Q16SLL XRa, XRb, XRc, XRd, sft4
+ *                                    Q16SLR XRa, XRb, XRc, XRd, sft4
+ * Miscellaneous instructions         Q16SAR XRa, XRb, XRc, XRd, sft4
+ * -------------------------          Q16SLLV XRa, XRb, Rb
+ *                                    Q16SLRV XRa, XRb, Rb
+ *  S32SFL XRa, XRb, XRc, XRd, optn2  Q16SARV XRa, XRb, Rb
+ *  S32ALN XRa, XRb, XRc, Rb
+ *  S32ALNI XRa, XRb, XRc, s3
+ *  S32LUI XRa, s8, optn3            Move instructions
+ *  S32EXTR XRa, XRb, Rb, bits5      -----------------
+ *  S32EXTRV XRa, XRb, Rs, Rt
+ *  Q16SCOP XRa, XRb, XRc, XRd        S32M2I XRa, Rb
+ *  Q16SAT XRa, XRb, XRc              S32I2M XRa, Rb
+ *
+ *
+ *     The opcode organization of MXU instructions
+ *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred
+ * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of
+ * other bits up to the instruction level is as follows:
+ *
+ *              bits
+ *             05..00
+ *
+ *          ┌─ 000000 ─ OPC_MXU_S32MADD
+ *          ├─ 000001 ─ OPC_MXU_S32MADDU
+ *          ├─ 000010 ─ <not assigned>   (non-MXU OPC_MUL)
+ *          │
+ *          │                               20..18
+ *          ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX
+ *          │                            ├─ 001 ─ OPC_MXU_S32MIN
+ *          │                            ├─ 010 ─ OPC_MXU_D16MAX
+ *          │                            ├─ 011 ─ OPC_MXU_D16MIN
+ *          │                            ├─ 100 ─ OPC_MXU_Q8MAX
+ *          │                            ├─ 101 ─ OPC_MXU_Q8MIN
+ *          │                            ├─ 110 ─ OPC_MXU_Q8SLT
+ *          │                            └─ 111 ─ OPC_MXU_Q8SLTU
+ *          ├─ 000100 ─ OPC_MXU_S32MSUB
+ *          ├─ 000101 ─ OPC_MXU_S32MSUBU    20..18
+ *          ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT
+ *          │                            ├─ 001 ─ OPC_MXU_D16SLT
+ *          │                            ├─ 010 ─ OPC_MXU_D16AVG
+ *          │                            ├─ 011 ─ OPC_MXU_D16AVGR
+ *          │                            ├─ 100 ─ OPC_MXU_Q8AVG
+ *          │                            ├─ 101 ─ OPC_MXU_Q8AVGR
+ *          │                            └─ 111 ─ OPC_MXU_Q8ADD
+ *          │
+ *          │                               20..18
+ *          ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS
+ *          │                            ├─ 010 ─ OPC_MXU_D16CPS
+ *          │                            ├─ 100 ─ OPC_MXU_Q8ABD
+ *          │                            └─ 110 ─ OPC_MXU_Q16SAT
+ *          ├─ 001000 ─ OPC_MXU_D16MUL
+ *          │                               25..24
+ *          ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF
+ *          │                            └─ 01 ─ OPC_MXU_D16MULE
+ *          ├─ 001010 ─ OPC_MXU_D16MAC
+ *          ├─ 001011 ─ OPC_MXU_D16MACF
+ *          ├─ 001100 ─ OPC_MXU_D16MADL
+ *          ├─ 001101 ─ OPC_MXU_S16MAD
+ *          ├─ 001110 ─ OPC_MXU_Q16ADD
+ *          ├─ 001111 ─ OPC_MXU_D16MACE     23
+ *          │                            ┌─ 0 ─ OPC_MXU_S32LDD
+ *          ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
+ *          │
+ *          │                               23
+ *          ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
+ *          │                            └─ 1 ─ OPC_MXU_S32STDR
+ *          │
+ *          │                               13..10
+ *          ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV
+ *          │                            └─ 0001 ─ OPC_MXU_S32LDDVR
+ *          │
+ *          │                               13..10
+ *          ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
+ *          │                            └─ 0001 ─ OPC_MXU_S32STDVR
+ *          │
+ *          │                               23
+ *          ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
+ *          │                            └─ 1 ─ OPC_MXU_S32LDIR
+ *          │
+ *          │                               23
+ *          ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
+ *          │                            └─ 1 ─ OPC_MXU_S32SDIR
+ *          │
+ *          │                               13..10
+ *          ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV
+ *          │                            └─ 0001 ─ OPC_MXU_S32LDIVR
+ *          │
+ *          │                               13..10
+ *          ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
+ *          │                            └─ 0001 ─ OPC_MXU_S32SDIVR
+ *          ├─ 011000 ─ OPC_MXU_D32ADD
+ *          │                               23..22
+ *   MXU    ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
+ * opcodes ─┤                            ├─ 01 ─ OPC_MXU_D32ACCM
+ *          │                            └─ 10 ─ OPC_MXU_D32ASUM
+ *          ├─ 011010 ─ <not assigned>
+ *          │                               23..22
+ *          ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
+ *          │                            ├─ 01 ─ OPC_MXU_Q16ACCM
+ *          │                            └─ 10 ─ OPC_MXU_Q16ASUM
+ *          │
+ *          │                               23..22
+ *          ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
+ *          │                            ├─ 01 ─ OPC_MXU_D8SUM
+ *          ├─ 011101 ─ OPC_MXU_Q8ACCE   └─ 10 ─ OPC_MXU_D8SUMC
+ *          ├─ 011110 ─ <not assigned>
+ *          ├─ 011111 ─ <not assigned>
+ *          ├─ 100000 ─ <not assigned>   (overlaps with CLZ)
+ *          ├─ 100001 ─ <not assigned>   (overlaps with CLO)
+ *          ├─ 100010 ─ OPC_MXU_S8LDD
+ *          ├─ 100011 ─ OPC_MXU_S8STD       15..14
+ *          ├─ 100100 ─ OPC_MXU_S8LDI    ┌─ 00 ─ OPC_MXU_S32MUL
+ *          ├─ 100101 ─ OPC_MXU_S8SDI    ├─ 00 ─ OPC_MXU_S32MULU
+ *          │                            ├─ 00 ─ OPC_MXU_S32EXTR
+ *          ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 00 ─ OPC_MXU_S32EXTRV
+ *          │
+ *          │                               20..18
+ *          ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
+ *          │                            ├─ 001 ─ OPC_MXU_S32ALN
+ *          │                            ├─ 010 ─ OPC_MXU_S32ALNI
+ *          │                            ├─ 011 ─ OPC_MXU_S32LUI
+ *          │                            ├─ 100 ─ OPC_MXU_S32NOR
+ *          │                            ├─ 101 ─ OPC_MXU_S32AND
+ *          │                            ├─ 110 ─ OPC_MXU_S32OR
+ *          │                            └─ 111 ─ OPC_MXU_S32XOR
+ *          │
+ *          │                               7..5
+ *          ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
+ *          │                            ├─ 001 ─ OPC_MXU_LXH
+ *          ├─ 101001 ─ <not assigned>   ├─ 011 ─ OPC_MXU_LXW
+ *          ├─ 101010 ─ OPC_MXU_S16LDD   ├─ 100 ─ OPC_MXU_LXBU
+ *          ├─ 101011 ─ OPC_MXU_S16STD   └─ 101 ─ OPC_MXU_LXHU
+ *          ├─ 101100 ─ OPC_MXU_S16LDI
+ *          ├─ 101101 ─ OPC_MXU_S16SDI
+ *          ├─ 101110 ─ OPC_MXU_S32M2I
+ *          ├─ 101111 ─ OPC_MXU_S32I2M
+ *          ├─ 110000 ─ OPC_MXU_D32SLL
+ *          ├─ 110001 ─ OPC_MXU_D32SLR      20..18
+ *          ├─ 110010 ─ OPC_MXU_D32SARL  ┌─ 000 ─ OPC_MXU_D32SLLV
+ *          ├─ 110011 ─ OPC_MXU_D32SAR   ├─ 001 ─ OPC_MXU_D32SLRV
+ *          ├─ 110100 ─ OPC_MXU_Q16SLL   ├─ 010 ─ OPC_MXU_D32SARV
+ *          ├─ 110101 ─ OPC_MXU_Q16SLR   ├─ 011 ─ OPC_MXU_Q16SLLV
+ *          │                            ├─ 100 ─ OPC_MXU_Q16SLRV
+ *          ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 101 ─ OPC_MXU_Q16SARV
+ *          │
+ *          ├─ 110111 ─ OPC_MXU_Q16SAR
+ *          │                               23..22
+ *          ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
+ *          │                            └─ 01 ─ OPC_MXU_Q8MULSU
+ *          │
+ *          │                               20..18
+ *          ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
+ *          │                            ├─ 001 ─ OPC_MXU_Q8MOVN
+ *          │                            ├─ 010 ─ OPC_MXU_D16MOVZ
+ *          │                            ├─ 011 ─ OPC_MXU_D16MOVN
+ *          │                            ├─ 100 ─ OPC_MXU_S32MOVZ
+ *          │                            └─ 101 ─ OPC_MXU_S32MOVN
+ *          │
+ *          │                               23..22
+ *          ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC
+ *          │                            └─ 10 ─ OPC_MXU_Q8MACSU
+ *          ├─ 111011 ─ OPC_MXU_Q16SCOP
+ *          ├─ 111100 ─ OPC_MXU_Q8MADL
+ *          ├─ 111101 ─ OPC_MXU_S32SFL
+ *          ├─ 111110 ─ OPC_MXU_Q8SAD
+ *          └─ 111111 ─ <not assigned>   (overlaps with SDBBP)
+ *
+ *
+ * Compiled after:
+ *
+ *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
+ *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
+ */
+
+enum {
+    OPC_MXU__POOL00  = 0x03,
+    OPC_MXU_D16MUL   = 0x08,
+    OPC_MXU_D16MAC   = 0x0A,
+    OPC_MXU__POOL04  = 0x10,
+    OPC_MXU_S8LDD    = 0x22,
+    OPC_MXU__POOL16  = 0x27,
+    OPC_MXU_S32M2I   = 0x2E,
+    OPC_MXU_S32I2M   = 0x2F,
+    OPC_MXU__POOL19  = 0x38,
+};
+
+
+/*
+ * MXU pool 00
+ */
+enum {
+    OPC_MXU_S32MAX   = 0x00,
+    OPC_MXU_S32MIN   = 0x01,
+    OPC_MXU_D16MAX   = 0x02,
+    OPC_MXU_D16MIN   = 0x03,
+    OPC_MXU_Q8MAX    = 0x04,
+    OPC_MXU_Q8MIN    = 0x05,
+};
+
+/*
+ * MXU pool 04
+ */
+enum {
+    OPC_MXU_S32LDD   = 0x00,
+    OPC_MXU_S32LDDR  = 0x01,
+};
+
+/*
+ * MXU pool 16
+ */
+enum {
+    OPC_MXU_S32ALNI  = 0x02,
+    OPC_MXU_S32NOR   = 0x04,
+    OPC_MXU_S32AND   = 0x05,
+    OPC_MXU_S32OR    = 0x06,
+    OPC_MXU_S32XOR   = 0x07,
+};
+
+/*
+ * MXU pool 19
+ */
+enum {
+    OPC_MXU_Q8MUL    = 0x00,
+    OPC_MXU_Q8MULSU  = 0x01,
+};
+
+/* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
+#define MXU_APTN1_A    0
+#define MXU_APTN1_S    1
+
+/* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
+#define MXU_APTN2_AA    0
+#define MXU_APTN2_AS    1
+#define MXU_APTN2_SA    2
+#define MXU_APTN2_SS    3
+
+/* MXU execute add/subtract 2-bit pattern 'eptn2' */
+#define MXU_EPTN2_AA    0
+#define MXU_EPTN2_AS    1
+#define MXU_EPTN2_SA    2
+#define MXU_EPTN2_SS    3
+
+/* MXU operand getting pattern 'optn2' */
+#define MXU_OPTN2_PTN0  0
+#define MXU_OPTN2_PTN1  1
+#define MXU_OPTN2_PTN2  2
+#define MXU_OPTN2_PTN3  3
+/* alternative naming scheme for 'optn2' */
+#define MXU_OPTN2_WW    0
+#define MXU_OPTN2_LW    1
+#define MXU_OPTN2_HW    2
+#define MXU_OPTN2_XW    3
+
+/* MXU operand getting pattern 'optn3' */
+#define MXU_OPTN3_PTN0  0
+#define MXU_OPTN3_PTN1  1
+#define MXU_OPTN3_PTN2  2
+#define MXU_OPTN3_PTN3  3
+#define MXU_OPTN3_PTN4  4
+#define MXU_OPTN3_PTN5  5
+#define MXU_OPTN3_PTN6  6
+#define MXU_OPTN3_PTN7  7
+
+/* MXU registers */
+static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
+static TCGv mxu_CR;
+
+static const char * const mxuregnames[] = {
+    "XR1",  "XR2",  "XR3",  "XR4",  "XR5",  "XR6",  "XR7",  "XR8",
+    "XR9",  "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "MXU_CR",
+};
+
+void mxu_translate_init(void)
+{
+    for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) {
+        mxu_gpr[i] = tcg_global_mem_new(cpu_env,
+                                        offsetof(CPUMIPSState, active_tc.mxu_gpr[i]),
+                                        mxuregnames[i]);
+    }
+
+    mxu_CR = tcg_global_mem_new(cpu_env,
+                                offsetof(CPUMIPSState, active_tc.mxu_cr),
+                                mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]);
+}
+
+/* MXU General purpose registers moves. */
+static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg)
+{
+    if (reg == 0) {
+        tcg_gen_movi_tl(t, 0);
+    } else if (reg <= 15) {
+        tcg_gen_mov_tl(t, mxu_gpr[reg - 1]);
+    }
+}
+
+static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg)
+{
+    if (reg > 0 && reg <= 15) {
+        tcg_gen_mov_tl(mxu_gpr[reg - 1], t);
+    }
+}
+
+/* MXU control register moves. */
+static inline void gen_load_mxu_cr(TCGv t)
+{
+    tcg_gen_mov_tl(t, mxu_CR);
+}
+
+static inline void gen_store_mxu_cr(TCGv t)
+{
+    /* TODO: Add handling of RW rules for MXU_CR. */
+    tcg_gen_mov_tl(mxu_CR, t);
+}
+
+/*
+ * S32I2M XRa, rb - Register move from GRF to XRF
+ */
+static void gen_mxu_s32i2m(DisasContext *ctx)
+{
+    TCGv t0;
+    uint32_t XRa, Rb;
+
+    t0 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 5);
+    Rb = extract32(ctx->opcode, 16, 5);
+
+    gen_load_gpr(t0, Rb);
+    if (XRa <= 15) {
+        gen_store_mxu_gpr(t0, XRa);
+    } else if (XRa == 16) {
+        gen_store_mxu_cr(t0);
+    }
+
+    tcg_temp_free(t0);
+}
+
+/*
+ * S32M2I XRa, rb - Register move from XRF to GRF
+ */
+static void gen_mxu_s32m2i(DisasContext *ctx)
+{
+    TCGv t0;
+    uint32_t XRa, Rb;
+
+    t0 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 5);
+    Rb = extract32(ctx->opcode, 16, 5);
+
+    if (XRa <= 15) {
+        gen_load_mxu_gpr(t0, XRa);
+    } else if (XRa == 16) {
+        gen_load_mxu_cr(t0);
+    }
+
+    gen_store_gpr(t0, Rb);
+
+    tcg_temp_free(t0);
+}
+
+/*
+ * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
+ */
+static void gen_mxu_s8ldd(DisasContext *ctx)
+{
+    TCGv t0, t1;
+    uint32_t XRa, Rb, s8, optn3;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    s8 = extract32(ctx->opcode, 10, 8);
+    optn3 = extract32(ctx->opcode, 18, 3);
+    Rb = extract32(ctx->opcode, 21, 5);
+
+    gen_load_gpr(t0, Rb);
+    tcg_gen_addi_tl(t0, t0, (int8_t)s8);
+
+    switch (optn3) {
+    /* XRa[7:0] = tmp8 */
+    case MXU_OPTN3_PTN0:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        gen_load_mxu_gpr(t0, XRa);
+        tcg_gen_deposit_tl(t0, t0, t1, 0, 8);
+        break;
+    /* XRa[15:8] = tmp8 */
+    case MXU_OPTN3_PTN1:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        gen_load_mxu_gpr(t0, XRa);
+        tcg_gen_deposit_tl(t0, t0, t1, 8, 8);
+        break;
+    /* XRa[23:16] = tmp8 */
+    case MXU_OPTN3_PTN2:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        gen_load_mxu_gpr(t0, XRa);
+        tcg_gen_deposit_tl(t0, t0, t1, 16, 8);
+        break;
+    /* XRa[31:24] = tmp8 */
+    case MXU_OPTN3_PTN3:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        gen_load_mxu_gpr(t0, XRa);
+        tcg_gen_deposit_tl(t0, t0, t1, 24, 8);
+        break;
+    /* XRa = {8'b0, tmp8, 8'b0, tmp8} */
+    case MXU_OPTN3_PTN4:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
+        break;
+    /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
+    case MXU_OPTN3_PTN5:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        tcg_gen_shli_tl(t1, t1, 8);
+        tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
+        break;
+    /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
+    case MXU_OPTN3_PTN6:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB);
+        tcg_gen_mov_tl(t0, t1);
+        tcg_gen_andi_tl(t0, t0, 0xFF00FFFF);
+        tcg_gen_shli_tl(t1, t1, 16);
+        tcg_gen_or_tl(t0, t0, t1);
+        break;
+    /* XRa = {tmp8, tmp8, tmp8, tmp8} */
+    case MXU_OPTN3_PTN7:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        tcg_gen_deposit_tl(t1, t1, t1, 8, 8);
+        tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
+        break;
+    }
+
+    gen_store_mxu_gpr(t0, XRa);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/*
+ * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
+ */
+static void gen_mxu_d16mul(DisasContext *ctx)
+{
+    TCGv t0, t1, t2, t3;
+    uint32_t XRa, XRb, XRc, XRd, optn2;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRd = extract32(ctx->opcode, 18, 4);
+    optn2 = extract32(ctx->opcode, 22, 2);
+
+    gen_load_mxu_gpr(t1, XRb);
+    tcg_gen_sextract_tl(t0, t1, 0, 16);
+    tcg_gen_sextract_tl(t1, t1, 16, 16);
+    gen_load_mxu_gpr(t3, XRc);
+    tcg_gen_sextract_tl(t2, t3, 0, 16);
+    tcg_gen_sextract_tl(t3, t3, 16, 16);
+
+    switch (optn2) {
+    case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t1, t3);
+        tcg_gen_mul_tl(t2, t0, t2);
+        break;
+    case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t0, t3);
+        tcg_gen_mul_tl(t2, t0, t2);
+        break;
+    case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t1, t3);
+        tcg_gen_mul_tl(t2, t1, t2);
+        break;
+    case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t0, t3);
+        tcg_gen_mul_tl(t2, t1, t2);
+        break;
+    }
+    gen_store_mxu_gpr(t3, XRa);
+    gen_store_mxu_gpr(t2, XRd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+}
+
+/*
+ * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 - Signed 16 bit pattern multiply
+ *                                           and accumulate
+ */
+static void gen_mxu_d16mac(DisasContext *ctx)
+{
+    TCGv t0, t1, t2, t3;
+    uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRd = extract32(ctx->opcode, 18, 4);
+    optn2 = extract32(ctx->opcode, 22, 2);
+    aptn2 = extract32(ctx->opcode, 24, 2);
+
+    gen_load_mxu_gpr(t1, XRb);
+    tcg_gen_sextract_tl(t0, t1, 0, 16);
+    tcg_gen_sextract_tl(t1, t1, 16, 16);
+
+    gen_load_mxu_gpr(t3, XRc);
+    tcg_gen_sextract_tl(t2, t3, 0, 16);
+    tcg_gen_sextract_tl(t3, t3, 16, 16);
+
+    switch (optn2) {
+    case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t1, t3);
+        tcg_gen_mul_tl(t2, t0, t2);
+        break;
+    case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t0, t3);
+        tcg_gen_mul_tl(t2, t0, t2);
+        break;
+    case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t1, t3);
+        tcg_gen_mul_tl(t2, t1, t2);
+        break;
+    case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t0, t3);
+        tcg_gen_mul_tl(t2, t1, t2);
+        break;
+    }
+    gen_load_mxu_gpr(t0, XRa);
+    gen_load_mxu_gpr(t1, XRd);
+
+    switch (aptn2) {
+    case MXU_APTN2_AA:
+        tcg_gen_add_tl(t3, t0, t3);
+        tcg_gen_add_tl(t2, t1, t2);
+        break;
+    case MXU_APTN2_AS:
+        tcg_gen_add_tl(t3, t0, t3);
+        tcg_gen_sub_tl(t2, t1, t2);
+        break;
+    case MXU_APTN2_SA:
+        tcg_gen_sub_tl(t3, t0, t3);
+        tcg_gen_add_tl(t2, t1, t2);
+        break;
+    case MXU_APTN2_SS:
+        tcg_gen_sub_tl(t3, t0, t3);
+        tcg_gen_sub_tl(t2, t1, t2);
+        break;
+    }
+    gen_store_mxu_gpr(t3, XRa);
+    gen_store_mxu_gpr(t2, XRd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+}
+
+/*
+ * Q8MUL   XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply
+ * Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply
+ */
+static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
+{
+    TCGv t0, t1, t2, t3, t4, t5, t6, t7;
+    uint32_t XRa, XRb, XRc, XRd, sel;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+    t4 = tcg_temp_new();
+    t5 = tcg_temp_new();
+    t6 = tcg_temp_new();
+    t7 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRd = extract32(ctx->opcode, 18, 4);
+    sel = extract32(ctx->opcode, 22, 2);
+
+    gen_load_mxu_gpr(t3, XRb);
+    gen_load_mxu_gpr(t7, XRc);
+
+    if (sel == 0x2) {
+        /* Q8MULSU */
+        tcg_gen_ext8s_tl(t0, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8s_tl(t1, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8s_tl(t2, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8s_tl(t3, t3);
+    } else {
+        /* Q8MUL */
+        tcg_gen_ext8u_tl(t0, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8u_tl(t1, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8u_tl(t2, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8u_tl(t3, t3);
+    }
+
+    tcg_gen_ext8u_tl(t4, t7);
+    tcg_gen_shri_tl(t7, t7, 8);
+    tcg_gen_ext8u_tl(t5, t7);
+    tcg_gen_shri_tl(t7, t7, 8);
+    tcg_gen_ext8u_tl(t6, t7);
+    tcg_gen_shri_tl(t7, t7, 8);
+    tcg_gen_ext8u_tl(t7, t7);
+
+    tcg_gen_mul_tl(t0, t0, t4);
+    tcg_gen_mul_tl(t1, t1, t5);
+    tcg_gen_mul_tl(t2, t2, t6);
+    tcg_gen_mul_tl(t3, t3, t7);
+
+    tcg_gen_andi_tl(t0, t0, 0xFFFF);
+    tcg_gen_andi_tl(t1, t1, 0xFFFF);
+    tcg_gen_andi_tl(t2, t2, 0xFFFF);
+    tcg_gen_andi_tl(t3, t3, 0xFFFF);
+
+    tcg_gen_shli_tl(t1, t1, 16);
+    tcg_gen_shli_tl(t3, t3, 16);
+
+    tcg_gen_or_tl(t0, t0, t1);
+    tcg_gen_or_tl(t1, t2, t3);
+
+    gen_store_mxu_gpr(t0, XRd);
+    gen_store_mxu_gpr(t1, XRa);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+    tcg_temp_free(t4);
+    tcg_temp_free(t5);
+    tcg_temp_free(t6);
+    tcg_temp_free(t7);
+}
+
+/*
+ * S32LDD  XRa, Rb, S12 - Load a word from memory to XRF
+ * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF, reversed byte seq.
+ */
+static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
+{
+    TCGv t0, t1;
+    uint32_t XRa, Rb, s12, sel;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    s12 = extract32(ctx->opcode, 10, 10);
+    sel = extract32(ctx->opcode, 20, 1);
+    Rb = extract32(ctx->opcode, 21, 5);
+
+    gen_load_gpr(t0, Rb);
+
+    tcg_gen_movi_tl(t1, s12);
+    tcg_gen_shli_tl(t1, t1, 2);
+    if (s12 & 0x200) {
+        tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
+    }
+    tcg_gen_add_tl(t1, t0, t1);
+    tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_SL);
+
+    if (sel == 1) {
+        /* S32LDDR */
+        tcg_gen_bswap32_tl(t1, t1);
+    }
+    gen_store_mxu_gpr(t1, XRa);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+
+/*
+ *                 MXU instruction category: logic
+ *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *               S32NOR    S32AND    S32OR    S32XOR
+ */
+
+/*
+ *  S32NOR XRa, XRb, XRc
+ *    Update XRa with the result of logical bitwise 'nor' operation
+ *    applied to the content of XRb and XRc.
+ */
+static void gen_mxu_S32NOR(DisasContext *ctx)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to all 1s */
+        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF);
+    } else if (unlikely(XRb == 0)) {
+        /* XRb zero register -> just set destination to the negation of XRc */
+        tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
+    } else if (unlikely(XRc == 0)) {
+        /* XRa zero register -> just set destination to the negation of XRb */
+        tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same -> just set destination to the negation of XRb */
+        tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else {
+        /* the most general case */
+        tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
+    }
+}
+
+/*
+ *  S32AND XRa, XRb, XRc
+ *    Update XRa with the result of logical bitwise 'and' operation
+ *    applied to the content of XRb and XRc.
+ */
+static void gen_mxu_S32AND(DisasContext *ctx)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) || (XRc == 0))) {
+        /* one of operands zero register -> just set destination to all 0s */
+        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same -> just set destination to one of them */
+        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else {
+        /* the most general case */
+        tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
+    }
+}
+
+/*
+ *  S32OR XRa, XRb, XRc
+ *    Update XRa with the result of logical bitwise 'or' operation
+ *    applied to the content of XRb and XRc.
+ */
+static void gen_mxu_S32OR(DisasContext *ctx)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to all 0s */
+        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRb == 0)) {
+        /* XRb zero register -> just set destination to the content of XRc */
+        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
+    } else if (unlikely(XRc == 0)) {
+        /* XRc zero register -> just set destination to the content of XRb */
+        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same -> just set destination to one of them */
+        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else {
+        /* the most general case */
+        tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
+    }
+}
+
+/*
+ *  S32XOR XRa, XRb, XRc
+ *    Update XRa with the result of logical bitwise 'xor' operation
+ *    applied to the content of XRb and XRc.
+ */
+static void gen_mxu_S32XOR(DisasContext *ctx)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to all 0s */
+        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRb == 0)) {
+        /* XRb zero register -> just set destination to the content of XRc */
+        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
+    } else if (unlikely(XRc == 0)) {
+        /* XRc zero register -> just set destination to the content of XRb */
+        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same -> just set destination to all 0s */
+        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+    } else {
+        /* the most general case */
+        tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
+    }
+}
+
+
+/*
+ *                   MXU instruction category max/min
+ *                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *                     S32MAX     D16MAX     Q8MAX
+ *                     S32MIN     D16MIN     Q8MIN
+ */
+
+/*
+ *  S32MAX XRa, XRb, XRc
+ *    Update XRa with the maximum of signed 32-bit integers contained
+ *    in XRb and XRc.
+ *
+ *  S32MIN XRa, XRb, XRc
+ *    Update XRa with the minimum of signed 32-bit integers contained
+ *    in XRb and XRc.
+ */
+static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx)
+{
+    uint32_t pad, opc, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    opc = extract32(ctx->opcode, 18, 3);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely((XRb == 0) || (XRc == 0))) {
+        /* exactly one operand is zero register - find which one is not...*/
+        uint32_t XRx = XRb ? XRb : XRc;
+        /* ...and do max/min operation with one operand 0 */
+        if (opc == OPC_MXU_S32MAX) {
+            tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
+        } else {
+            tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
+        }
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same -> just set destination to one of them */
+        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else {
+        /* the most general case */
+        if (opc == OPC_MXU_S32MAX) {
+            tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
+                                               mxu_gpr[XRc - 1]);
+        } else {
+            tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
+                                               mxu_gpr[XRc - 1]);
+        }
+    }
+}
+
+/*
+ *  D16MAX
+ *    Update XRa with the 16-bit-wise maximums of signed integers
+ *    contained in XRb and XRc.
+ *
+ *  D16MIN
+ *    Update XRa with the 16-bit-wise minimums of signed integers
+ *    contained in XRb and XRc.
+ */
+static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
+{
+    uint32_t pad, opc, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    opc = extract32(ctx->opcode, 18, 3);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRc == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRa == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_i32(mxu_gpr[XRc - 1], 0);
+    } else if (unlikely((XRb == 0) || (XRa == 0))) {
+        /* exactly one operand is zero register - find which one is not...*/
+        uint32_t XRx = XRb ? XRb : XRc;
+        /* ...and do half-word-wise max/min with one operand 0 */
+        TCGv_i32 t0 = tcg_temp_new();
+        TCGv_i32 t1 = tcg_const_i32(0);
+
+        /* the left half-word first */
+        tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
+        if (opc == OPC_MXU_D16MAX) {
+            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+        } else {
+            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+        }
+
+        /* the right half-word */
+        tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF);
+        /* move half-words to the leftmost position */
+        tcg_gen_shli_i32(t0, t0, 16);
+        /* t0 will be max/min of t0 and t1 */
+        if (opc == OPC_MXU_D16MAX) {
+            tcg_gen_smax_i32(t0, t0, t1);
+        } else {
+            tcg_gen_smin_i32(t0, t0, t1);
+        }
+        /* return resulting half-words to its original position */
+        tcg_gen_shri_i32(t0, t0, 16);
+        /* finally update the destination */
+        tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+
+        tcg_temp_free(t1);
+        tcg_temp_free(t0);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same -> just set destination to one of them */
+        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else {
+        /* the most general case */
+        TCGv_i32 t0 = tcg_temp_new();
+        TCGv_i32 t1 = tcg_temp_new();
+
+        /* the left half-word first */
+        tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
+        tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
+        if (opc == OPC_MXU_D16MAX) {
+            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+        } else {
+            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+        }
+
+        /* the right half-word */
+        tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
+        tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF);
+        /* move half-words to the leftmost position */
+        tcg_gen_shli_i32(t0, t0, 16);
+        tcg_gen_shli_i32(t1, t1, 16);
+        /* t0 will be max/min of t0 and t1 */
+        if (opc == OPC_MXU_D16MAX) {
+            tcg_gen_smax_i32(t0, t0, t1);
+        } else {
+            tcg_gen_smin_i32(t0, t0, t1);
+        }
+        /* return resulting half-words to its original position */
+        tcg_gen_shri_i32(t0, t0, 16);
+        /* finally update the destination */
+        tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+
+        tcg_temp_free(t1);
+        tcg_temp_free(t0);
+    }
+}
+
+/*
+ *  Q8MAX
+ *    Update XRa with the 8-bit-wise maximums of signed integers
+ *    contained in XRb and XRc.
+ *
+ *  Q8MIN
+ *    Update XRa with the 8-bit-wise minimums of signed integers
+ *    contained in XRb and XRc.
+ */
+static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
+{
+    uint32_t pad, opc, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    opc = extract32(ctx->opcode, 18, 3);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely((XRb == 0) || (XRc == 0))) {
+        /* exactly one operand is zero register - make it be the first...*/
+        uint32_t XRx = XRb ? XRb : XRc;
+        /* ...and do byte-wise max/min with one operand 0 */
+        TCGv_i32 t0 = tcg_temp_new();
+        TCGv_i32 t1 = tcg_const_i32(0);
+        int32_t i;
+
+        /* the leftmost byte (byte 3) first */
+        tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
+        if (opc == OPC_MXU_Q8MAX) {
+            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+        } else {
+            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+        }
+
+        /* bytes 2, 1, 0 */
+        for (i = 2; i >= 0; i--) {
+            /* extract the byte */
+            tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i));
+            /* move the byte to the leftmost position */
+            tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
+            /* t0 will be max/min of t0 and t1 */
+            if (opc == OPC_MXU_Q8MAX) {
+                tcg_gen_smax_i32(t0, t0, t1);
+            } else {
+                tcg_gen_smin_i32(t0, t0, t1);
+            }
+            /* return resulting byte to its original position */
+            tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
+            /* finally update the destination */
+            tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+        }
+
+        tcg_temp_free(t1);
+        tcg_temp_free(t0);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same -> just set destination to one of them */
+        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else {
+        /* the most general case */
+        TCGv_i32 t0 = tcg_temp_new();
+        TCGv_i32 t1 = tcg_temp_new();
+        int32_t i;
+
+        /* the leftmost bytes (bytes 3) first */
+        tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
+        tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
+        if (opc == OPC_MXU_Q8MAX) {
+            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+        } else {
+            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+        }
+
+        /* bytes 2, 1, 0 */
+        for (i = 2; i >= 0; i--) {
+            /* extract corresponding bytes */
+            tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i));
+            tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i));
+            /* move the bytes to the leftmost position */
+            tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
+            tcg_gen_shli_i32(t1, t1, 8 * (3 - i));
+            /* t0 will be max/min of t0 and t1 */
+            if (opc == OPC_MXU_Q8MAX) {
+                tcg_gen_smax_i32(t0, t0, t1);
+            } else {
+                tcg_gen_smin_i32(t0, t0, t1);
+            }
+            /* return resulting byte to its original position */
+            tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
+            /* finally update the destination */
+            tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+        }
+
+        tcg_temp_free(t1);
+        tcg_temp_free(t0);
+    }
+}
+
+
+/*
+ *                 MXU instruction category: align
+ *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *                       S32ALN     S32ALNI
+ */
+
+/*
+ *  S32ALNI XRc, XRb, XRa, optn3
+ *    Arrange bytes from XRb and XRc according to one of five sets of
+ *    rules determined by optn3, and place the result in XRa.
+ */
+static void gen_mxu_S32ALNI(DisasContext *ctx)
+{
+    uint32_t optn3, pad, XRc, XRb, XRa;
+
+    optn3 = extract32(ctx->opcode,  23, 3);
+    pad   = extract32(ctx->opcode,  21, 2);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to all 0s */
+        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRb == 0)) {
+        /* XRb zero register -> just appropriatelly shift XRc into XRa */
+        switch (optn3) {
+        case MXU_OPTN3_PTN0:
+            tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+            break;
+        case MXU_OPTN3_PTN1:
+        case MXU_OPTN3_PTN2:
+        case MXU_OPTN3_PTN3:
+            tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1],
+                             8 * (4 - optn3));
+            break;
+        case MXU_OPTN3_PTN4:
+            tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
+            break;
+        }
+    } else if (unlikely(XRc == 0)) {
+        /* XRc zero register -> just appropriatelly shift XRb into XRa */
+        switch (optn3) {
+        case MXU_OPTN3_PTN0:
+            tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+            break;
+        case MXU_OPTN3_PTN1:
+        case MXU_OPTN3_PTN2:
+        case MXU_OPTN3_PTN3:
+            tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
+            break;
+        case MXU_OPTN3_PTN4:
+            tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+            break;
+        }
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same -> just rotation or moving from any of them */
+        switch (optn3) {
+        case MXU_OPTN3_PTN0:
+        case MXU_OPTN3_PTN4:
+            tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+            break;
+        case MXU_OPTN3_PTN1:
+        case MXU_OPTN3_PTN2:
+        case MXU_OPTN3_PTN3:
+            tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
+            break;
+        }
+    } else {
+        /* the most general case */
+        switch (optn3) {
+        case MXU_OPTN3_PTN0:
+            {
+                /*                                         */
+                /*         XRb                XRc          */
+                /*  +---------------+                      */
+                /*  | A   B   C   D |    E   F   G   H     */
+                /*  +-------+-------+                      */
+                /*          |                              */
+                /*         XRa                             */
+                /*                                         */
+
+                tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+            }
+            break;
+        case MXU_OPTN3_PTN1:
+            {
+                /*                                         */
+                /*         XRb                 XRc         */
+                /*      +-------------------+              */
+                /*    A | B   C   D       E | F   G   H    */
+                /*      +---------+---------+              */
+                /*                |                        */
+                /*               XRa                       */
+                /*                                         */
+
+                TCGv_i32 t0 = tcg_temp_new();
+                TCGv_i32 t1 = tcg_temp_new();
+
+                tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF);
+                tcg_gen_shli_i32(t0, t0, 8);
+
+                tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
+                tcg_gen_shri_i32(t1, t1, 24);
+
+                tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
+
+                tcg_temp_free(t1);
+                tcg_temp_free(t0);
+            }
+            break;
+        case MXU_OPTN3_PTN2:
+            {
+                /*                                         */
+                /*         XRb                 XRc         */
+                /*          +-------------------+          */
+                /*    A   B | C   D       E   F | G   H    */
+                /*          +---------+---------+          */
+                /*                    |                    */
+                /*                   XRa                   */
+                /*                                         */
+
+                TCGv_i32 t0 = tcg_temp_new();
+                TCGv_i32 t1 = tcg_temp_new();
+
+                tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
+                tcg_gen_shli_i32(t0, t0, 16);
+
+                tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
+                tcg_gen_shri_i32(t1, t1, 16);
+
+                tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
+
+                tcg_temp_free(t1);
+                tcg_temp_free(t0);
+            }
+            break;
+        case MXU_OPTN3_PTN3:
+            {
+                /*                                         */
+                /*         XRb                 XRc         */
+                /*              +-------------------+      */
+                /*    A   B   C | D       E   F   G | H    */
+                /*              +---------+---------+      */
+                /*                        |                */
+                /*                       XRa               */
+                /*                                         */
+
+                TCGv_i32 t0 = tcg_temp_new();
+                TCGv_i32 t1 = tcg_temp_new();
+
+                tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF);
+                tcg_gen_shli_i32(t0, t0, 24);
+
+                tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00);
+                tcg_gen_shri_i32(t1, t1, 8);
+
+                tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
+
+                tcg_temp_free(t1);
+                tcg_temp_free(t0);
+            }
+            break;
+        case MXU_OPTN3_PTN4:
+            {
+                /*                                         */
+                /*         XRb                 XRc         */
+                /*                     +---------------+   */
+                /*    A   B   C   D    | E   F   G   H |   */
+                /*                     +-------+-------+   */
+                /*                             |           */
+                /*                            XRa          */
+                /*                                         */
+
+                tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
+            }
+            break;
+        }
+    }
+}
+
+
+/*
+ * Decoding engine for MXU
+ * =======================
+ */
+
+static void decode_opc_mxu__pool00(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_S32MAX:
+    case OPC_MXU_S32MIN:
+        gen_mxu_S32MAX_S32MIN(ctx);
+        break;
+    case OPC_MXU_D16MAX:
+    case OPC_MXU_D16MIN:
+        gen_mxu_D16MAX_D16MIN(ctx);
+        break;
+    case OPC_MXU_Q8MAX:
+    case OPC_MXU_Q8MIN:
+        gen_mxu_Q8MAX_Q8MIN(ctx);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool04(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 20, 1);
+
+    switch (opcode) {
+    case OPC_MXU_S32LDD:
+    case OPC_MXU_S32LDDR:
+        gen_mxu_s32ldd_s32lddr(ctx);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool16(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_S32ALNI:
+        gen_mxu_S32ALNI(ctx);
+        break;
+    case OPC_MXU_S32NOR:
+        gen_mxu_S32NOR(ctx);
+        break;
+    case OPC_MXU_S32AND:
+        gen_mxu_S32AND(ctx);
+        break;
+    case OPC_MXU_S32OR:
+        gen_mxu_S32OR(ctx);
+        break;
+    case OPC_MXU_S32XOR:
+        gen_mxu_S32XOR(ctx);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool19(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_Q8MUL:
+    case OPC_MXU_Q8MULSU:
+        gen_mxu_q8mul_q8mulsu(ctx);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
+{
+    uint32_t opcode = extract32(insn, 0, 6);
+
+    if (opcode == OPC_MXU_S32M2I) {
+        gen_mxu_s32m2i(ctx);
+        return true;
+    }
+
+    if (opcode == OPC_MXU_S32I2M) {
+        gen_mxu_s32i2m(ctx);
+        return true;
+    }
+
+    {
+        TCGv t_mxu_cr = tcg_temp_new();
+        TCGLabel *l_exit = gen_new_label();
+
+        gen_load_mxu_cr(t_mxu_cr);
+        tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN);
+        tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
+
+        switch (opcode) {
+        case OPC_MXU__POOL00:
+            decode_opc_mxu__pool00(ctx);
+            break;
+        case OPC_MXU_D16MUL:
+            gen_mxu_d16mul(ctx);
+            break;
+        case OPC_MXU_D16MAC:
+            gen_mxu_d16mac(ctx);
+            break;
+        case OPC_MXU__POOL04:
+            decode_opc_mxu__pool04(ctx);
+            break;
+        case OPC_MXU_S8LDD:
+            gen_mxu_s8ldd(ctx);
+            break;
+        case OPC_MXU__POOL16:
+            decode_opc_mxu__pool16(ctx);
+            break;
+        case OPC_MXU__POOL19:
+            decode_opc_mxu__pool19(ctx);
+            break;
+        default:
+            MIPS_INVAL("decode_opc_mxu");
+            gen_reserved_instruction(ctx);
+        }
+
+        gen_set_label(l_exit);
+        tcg_temp_free(t_mxu_cr);
+    }
+
+    return true;
+}
diff --git a/target/mips/translate.c b/target/mips/translate.c
index 0b6d82d228..c518bf3963 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -1130,779 +1130,6 @@ enum {
 };
 
 /*
- *
- *       AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
- *       ============================================
- *
- *
- * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32
- * instructions set. It is designed to fit the needs of signal, graphical and
- * video processing applications. MXU instruction set is used in Xburst family
- * of microprocessors by Ingenic.
- *
- * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
- * the control register.
- *
- *
- *     The notation used in MXU assembler mnemonics
- *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  Register operands:
- *
- *   XRa, XRb, XRc, XRd - MXU registers
- *   Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
- *
- *  Non-register operands:
- *
- *   aptn1 - 1-bit accumulate add/subtract pattern
- *   aptn2 - 2-bit accumulate add/subtract pattern
- *   eptn2 - 2-bit execute add/subtract pattern
- *   optn2 - 2-bit operand pattern
- *   optn3 - 3-bit operand pattern
- *   sft4  - 4-bit shift amount
- *   strd2 - 2-bit stride amount
- *
- *  Prefixes:
- *
- *   Level of parallelism:                Operand size:
- *    S - single operation at a time       32 - word
- *    D - two operations in parallel       16 - half word
- *    Q - four operations in parallel       8 - byte
- *
- *  Operations:
- *
- *   ADD   - Add or subtract
- *   ADDC  - Add with carry-in
- *   ACC   - Accumulate
- *   ASUM  - Sum together then accumulate (add or subtract)
- *   ASUMC - Sum together then accumulate (add or subtract) with carry-in
- *   AVG   - Average between 2 operands
- *   ABD   - Absolute difference
- *   ALN   - Align data
- *   AND   - Logical bitwise 'and' operation
- *   CPS   - Copy sign
- *   EXTR  - Extract bits
- *   I2M   - Move from GPR register to MXU register
- *   LDD   - Load data from memory to XRF
- *   LDI   - Load data from memory to XRF (and increase the address base)
- *   LUI   - Load unsigned immediate
- *   MUL   - Multiply
- *   MULU  - Unsigned multiply
- *   MADD  - 64-bit operand add 32x32 product
- *   MSUB  - 64-bit operand subtract 32x32 product
- *   MAC   - Multiply and accumulate (add or subtract)
- *   MAD   - Multiply and add or subtract
- *   MAX   - Maximum between 2 operands
- *   MIN   - Minimum between 2 operands
- *   M2I   - Move from MXU register to GPR register
- *   MOVZ  - Move if zero
- *   MOVN  - Move if non-zero
- *   NOR   - Logical bitwise 'nor' operation
- *   OR    - Logical bitwise 'or' operation
- *   STD   - Store data from XRF to memory
- *   SDI   - Store data from XRF to memory (and increase the address base)
- *   SLT   - Set of less than comparison
- *   SAD   - Sum of absolute differences
- *   SLL   - Logical shift left
- *   SLR   - Logical shift right
- *   SAR   - Arithmetic shift right
- *   SAT   - Saturation
- *   SFL   - Shuffle
- *   SCOP  - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0)
- *   XOR   - Logical bitwise 'exclusive or' operation
- *
- *  Suffixes:
- *
- *   E - Expand results
- *   F - Fixed point multiplication
- *   L - Low part result
- *   R - Doing rounding
- *   V - Variable instead of immediate
- *   W - Combine above L and V
- *
- *
- *     The list of MXU instructions grouped by functionality
- *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * Load/Store instructions           Multiplication instructions
- * -----------------------           ---------------------------
- *
- *  S32LDD XRa, Rb, s12               S32MADD XRa, XRd, Rs, Rt
- *  S32STD XRa, Rb, s12               S32MADDU XRa, XRd, Rs, Rt
- *  S32LDDV XRa, Rb, rc, strd2        S32MSUB XRa, XRd, Rs, Rt
- *  S32STDV XRa, Rb, rc, strd2        S32MSUBU XRa, XRd, Rs, Rt
- *  S32LDI XRa, Rb, s12               S32MUL XRa, XRd, Rs, Rt
- *  S32SDI XRa, Rb, s12               S32MULU XRa, XRd, Rs, Rt
- *  S32LDIV XRa, Rb, rc, strd2        D16MUL XRa, XRb, XRc, XRd, optn2
- *  S32SDIV XRa, Rb, rc, strd2        D16MULE XRa, XRb, XRc, optn2
- *  S32LDDR XRa, Rb, s12              D16MULF XRa, XRb, XRc, optn2
- *  S32STDR XRa, Rb, s12              D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
- *  S32LDDVR XRa, Rb, rc, strd2       D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
- *  S32STDVR XRa, Rb, rc, strd2       D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
- *  S32LDIR XRa, Rb, s12              D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
- *  S32SDIR XRa, Rb, s12              S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
- *  S32LDIVR XRa, Rb, rc, strd2       Q8MUL XRa, XRb, XRc, XRd
- *  S32SDIVR XRa, Rb, rc, strd2       Q8MULSU XRa, XRb, XRc, XRd
- *  S16LDD XRa, Rb, s10, eptn2        Q8MAC XRa, XRb, XRc, XRd, aptn2
- *  S16STD XRa, Rb, s10, eptn2        Q8MACSU XRa, XRb, XRc, XRd, aptn2
- *  S16LDI XRa, Rb, s10, eptn2        Q8MADL XRa, XRb, XRc, XRd, aptn2
- *  S16SDI XRa, Rb, s10, eptn2
- *  S8LDD XRa, Rb, s8, eptn3
- *  S8STD XRa, Rb, s8, eptn3         Addition and subtraction instructions
- *  S8LDI XRa, Rb, s8, eptn3         -------------------------------------
- *  S8SDI XRa, Rb, s8, eptn3
- *  LXW Rd, Rs, Rt, strd2             D32ADD XRa, XRb, XRc, XRd, eptn2
- *  LXH Rd, Rs, Rt, strd2             D32ADDC XRa, XRb, XRc, XRd
- *  LXHU Rd, Rs, Rt, strd2            D32ACC XRa, XRb, XRc, XRd, eptn2
- *  LXB Rd, Rs, Rt, strd2             D32ACCM XRa, XRb, XRc, XRd, eptn2
- *  LXBU Rd, Rs, Rt, strd2            D32ASUM XRa, XRb, XRc, XRd, eptn2
- *                                    S32CPS XRa, XRb, XRc
- *                                    Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
- * Comparison instructions            Q16ACC XRa, XRb, XRc, XRd, eptn2
- * -----------------------            Q16ACCM XRa, XRb, XRc, XRd, eptn2
- *                                    D16ASUM XRa, XRb, XRc, XRd, eptn2
- *  S32MAX XRa, XRb, XRc              D16CPS XRa, XRb,
- *  S32MIN XRa, XRb, XRc              D16AVG XRa, XRb, XRc
- *  S32SLT XRa, XRb, XRc              D16AVGR XRa, XRb, XRc
- *  S32MOVZ XRa, XRb, XRc             Q8ADD XRa, XRb, XRc, eptn2
- *  S32MOVN XRa, XRb, XRc             Q8ADDE XRa, XRb, XRc, XRd, eptn2
- *  D16MAX XRa, XRb, XRc              Q8ACCE XRa, XRb, XRc, XRd, eptn2
- *  D16MIN XRa, XRb, XRc              Q8ABD XRa, XRb, XRc
- *  D16SLT XRa, XRb, XRc              Q8SAD XRa, XRb, XRc, XRd
- *  D16MOVZ XRa, XRb, XRc             Q8AVG XRa, XRb, XRc
- *  D16MOVN XRa, XRb, XRc             Q8AVGR XRa, XRb, XRc
- *  Q8MAX XRa, XRb, XRc               D8SUM XRa, XRb, XRc, XRd
- *  Q8MIN XRa, XRb, XRc               D8SUMC XRa, XRb, XRc, XRd
- *  Q8SLT XRa, XRb, XRc
- *  Q8SLTU XRa, XRb, XRc
- *  Q8MOVZ XRa, XRb, XRc             Shift instructions
- *  Q8MOVN XRa, XRb, XRc             ------------------
- *
- *                                    D32SLL XRa, XRb, XRc, XRd, sft4
- * Bitwise instructions               D32SLR XRa, XRb, XRc, XRd, sft4
- * --------------------               D32SAR XRa, XRb, XRc, XRd, sft4
- *                                    D32SARL XRa, XRb, XRc, sft4
- *  S32NOR XRa, XRb, XRc              D32SLLV XRa, XRb, Rb
- *  S32AND XRa, XRb, XRc              D32SLRV XRa, XRb, Rb
- *  S32XOR XRa, XRb, XRc              D32SARV XRa, XRb, Rb
- *  S32OR XRa, XRb, XRc               D32SARW XRa, XRb, XRc, Rb
- *                                    Q16SLL XRa, XRb, XRc, XRd, sft4
- *                                    Q16SLR XRa, XRb, XRc, XRd, sft4
- * Miscellaneous instructions         Q16SAR XRa, XRb, XRc, XRd, sft4
- * -------------------------          Q16SLLV XRa, XRb, Rb
- *                                    Q16SLRV XRa, XRb, Rb
- *  S32SFL XRa, XRb, XRc, XRd, optn2  Q16SARV XRa, XRb, Rb
- *  S32ALN XRa, XRb, XRc, Rb
- *  S32ALNI XRa, XRb, XRc, s3
- *  S32LUI XRa, s8, optn3            Move instructions
- *  S32EXTR XRa, XRb, Rb, bits5      -----------------
- *  S32EXTRV XRa, XRb, Rs, Rt
- *  Q16SCOP XRa, XRb, XRc, XRd        S32M2I XRa, Rb
- *  Q16SAT XRa, XRb, XRc              S32I2M XRa, Rb
- *
- *
- *     The opcode organization of MXU instructions
- *     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred
- * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of
- * other bits up to the instruction level is as follows:
- *
- *              bits
- *             05..00
- *
- *          ┌─ 000000 ─ OPC_MXU_S32MADD
- *          ├─ 000001 ─ OPC_MXU_S32MADDU
- *          ├─ 000010 ─ <not assigned>   (non-MXU OPC_MUL)
- *          │
- *          │                               20..18
- *          ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX
- *          │                            ├─ 001 ─ OPC_MXU_S32MIN
- *          │                            ├─ 010 ─ OPC_MXU_D16MAX
- *          │                            ├─ 011 ─ OPC_MXU_D16MIN
- *          │                            ├─ 100 ─ OPC_MXU_Q8MAX
- *          │                            ├─ 101 ─ OPC_MXU_Q8MIN
- *          │                            ├─ 110 ─ OPC_MXU_Q8SLT
- *          │                            └─ 111 ─ OPC_MXU_Q8SLTU
- *          ├─ 000100 ─ OPC_MXU_S32MSUB
- *          ├─ 000101 ─ OPC_MXU_S32MSUBU    20..18
- *          ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT
- *          │                            ├─ 001 ─ OPC_MXU_D16SLT
- *          │                            ├─ 010 ─ OPC_MXU_D16AVG
- *          │                            ├─ 011 ─ OPC_MXU_D16AVGR
- *          │                            ├─ 100 ─ OPC_MXU_Q8AVG
- *          │                            ├─ 101 ─ OPC_MXU_Q8AVGR
- *          │                            └─ 111 ─ OPC_MXU_Q8ADD
- *          │
- *          │                               20..18
- *          ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS
- *          │                            ├─ 010 ─ OPC_MXU_D16CPS
- *          │                            ├─ 100 ─ OPC_MXU_Q8ABD
- *          │                            └─ 110 ─ OPC_MXU_Q16SAT
- *          ├─ 001000 ─ OPC_MXU_D16MUL
- *          │                               25..24
- *          ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF
- *          │                            └─ 01 ─ OPC_MXU_D16MULE
- *          ├─ 001010 ─ OPC_MXU_D16MAC
- *          ├─ 001011 ─ OPC_MXU_D16MACF
- *          ├─ 001100 ─ OPC_MXU_D16MADL
- *          ├─ 001101 ─ OPC_MXU_S16MAD
- *          ├─ 001110 ─ OPC_MXU_Q16ADD
- *          ├─ 001111 ─ OPC_MXU_D16MACE     23
- *          │                            ┌─ 0 ─ OPC_MXU_S32LDD
- *          ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
- *          │
- *          │                               23
- *          ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
- *          │                            └─ 1 ─ OPC_MXU_S32STDR
- *          │
- *          │                               13..10
- *          ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV
- *          │                            └─ 0001 ─ OPC_MXU_S32LDDVR
- *          │
- *          │                               13..10
- *          ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
- *          │                            └─ 0001 ─ OPC_MXU_S32STDVR
- *          │
- *          │                               23
- *          ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
- *          │                            └─ 1 ─ OPC_MXU_S32LDIR
- *          │
- *          │                               23
- *          ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
- *          │                            └─ 1 ─ OPC_MXU_S32SDIR
- *          │
- *          │                               13..10
- *          ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV
- *          │                            └─ 0001 ─ OPC_MXU_S32LDIVR
- *          │
- *          │                               13..10
- *          ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
- *          │                            └─ 0001 ─ OPC_MXU_S32SDIVR
- *          ├─ 011000 ─ OPC_MXU_D32ADD
- *          │                               23..22
- *   MXU    ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
- * opcodes ─┤                            ├─ 01 ─ OPC_MXU_D32ACCM
- *          │                            └─ 10 ─ OPC_MXU_D32ASUM
- *          ├─ 011010 ─ <not assigned>
- *          │                               23..22
- *          ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
- *          │                            ├─ 01 ─ OPC_MXU_Q16ACCM
- *          │                            └─ 10 ─ OPC_MXU_Q16ASUM
- *          │
- *          │                               23..22
- *          ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
- *          │                            ├─ 01 ─ OPC_MXU_D8SUM
- *          ├─ 011101 ─ OPC_MXU_Q8ACCE   └─ 10 ─ OPC_MXU_D8SUMC
- *          ├─ 011110 ─ <not assigned>
- *          ├─ 011111 ─ <not assigned>
- *          ├─ 100000 ─ <not assigned>   (overlaps with CLZ)
- *          ├─ 100001 ─ <not assigned>   (overlaps with CLO)
- *          ├─ 100010 ─ OPC_MXU_S8LDD
- *          ├─ 100011 ─ OPC_MXU_S8STD       15..14
- *          ├─ 100100 ─ OPC_MXU_S8LDI    ┌─ 00 ─ OPC_MXU_S32MUL
- *          ├─ 100101 ─ OPC_MXU_S8SDI    ├─ 00 ─ OPC_MXU_S32MULU
- *          │                            ├─ 00 ─ OPC_MXU_S32EXTR
- *          ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 00 ─ OPC_MXU_S32EXTRV
- *          │
- *          │                               20..18
- *          ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
- *          │                            ├─ 001 ─ OPC_MXU_S32ALN
- *          │                            ├─ 010 ─ OPC_MXU_S32ALNI
- *          │                            ├─ 011 ─ OPC_MXU_S32LUI
- *          │                            ├─ 100 ─ OPC_MXU_S32NOR
- *          │                            ├─ 101 ─ OPC_MXU_S32AND
- *          │                            ├─ 110 ─ OPC_MXU_S32OR
- *          │                            └─ 111 ─ OPC_MXU_S32XOR
- *          │
- *          │                               7..5
- *          ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
- *          │                            ├─ 001 ─ OPC_MXU_LXH
- *          ├─ 101001 ─ <not assigned>   ├─ 011 ─ OPC_MXU_LXW
- *          ├─ 101010 ─ OPC_MXU_S16LDD   ├─ 100 ─ OPC_MXU_LXBU
- *          ├─ 101011 ─ OPC_MXU_S16STD   └─ 101 ─ OPC_MXU_LXHU
- *          ├─ 101100 ─ OPC_MXU_S16LDI
- *          ├─ 101101 ─ OPC_MXU_S16SDI
- *          ├─ 101110 ─ OPC_MXU_S32M2I
- *          ├─ 101111 ─ OPC_MXU_S32I2M
- *          ├─ 110000 ─ OPC_MXU_D32SLL
- *          ├─ 110001 ─ OPC_MXU_D32SLR      20..18
- *          ├─ 110010 ─ OPC_MXU_D32SARL  ┌─ 000 ─ OPC_MXU_D32SLLV
- *          ├─ 110011 ─ OPC_MXU_D32SAR   ├─ 001 ─ OPC_MXU_D32SLRV
- *          ├─ 110100 ─ OPC_MXU_Q16SLL   ├─ 010 ─ OPC_MXU_D32SARV
- *          ├─ 110101 ─ OPC_MXU_Q16SLR   ├─ 011 ─ OPC_MXU_Q16SLLV
- *          │                            ├─ 100 ─ OPC_MXU_Q16SLRV
- *          ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 101 ─ OPC_MXU_Q16SARV
- *          │
- *          ├─ 110111 ─ OPC_MXU_Q16SAR
- *          │                               23..22
- *          ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
- *          │                            └─ 01 ─ OPC_MXU_Q8MULSU
- *          │
- *          │                               20..18
- *          ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
- *          │                            ├─ 001 ─ OPC_MXU_Q8MOVN
- *          │                            ├─ 010 ─ OPC_MXU_D16MOVZ
- *          │                            ├─ 011 ─ OPC_MXU_D16MOVN
- *          │                            ├─ 100 ─ OPC_MXU_S32MOVZ
- *          │                            └─ 101 ─ OPC_MXU_S32MOVN
- *          │
- *          │                               23..22
- *          ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC
- *          │                            └─ 10 ─ OPC_MXU_Q8MACSU
- *          ├─ 111011 ─ OPC_MXU_Q16SCOP
- *          ├─ 111100 ─ OPC_MXU_Q8MADL
- *          ├─ 111101 ─ OPC_MXU_S32SFL
- *          ├─ 111110 ─ OPC_MXU_Q8SAD
- *          └─ 111111 ─ <not assigned>   (overlaps with SDBBP)
- *
- *
- * Compiled after:
- *
- *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
- *   Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
- */
-
-enum {
-    OPC_MXU_S32MADD  = 0x00,
-    OPC_MXU_S32MADDU = 0x01,
-    OPC__MXU_MUL     = 0x02,
-    OPC_MXU__POOL00  = 0x03,
-    OPC_MXU_S32MSUB  = 0x04,
-    OPC_MXU_S32MSUBU = 0x05,
-    OPC_MXU__POOL01  = 0x06,
-    OPC_MXU__POOL02  = 0x07,
-    OPC_MXU_D16MUL   = 0x08,
-    OPC_MXU__POOL03  = 0x09,
-    OPC_MXU_D16MAC   = 0x0A,
-    OPC_MXU_D16MACF  = 0x0B,
-    OPC_MXU_D16MADL  = 0x0C,
-    OPC_MXU_S16MAD   = 0x0D,
-    OPC_MXU_Q16ADD   = 0x0E,
-    OPC_MXU_D16MACE  = 0x0F,
-    OPC_MXU__POOL04  = 0x10,
-    OPC_MXU__POOL05  = 0x11,
-    OPC_MXU__POOL06  = 0x12,
-    OPC_MXU__POOL07  = 0x13,
-    OPC_MXU__POOL08  = 0x14,
-    OPC_MXU__POOL09  = 0x15,
-    OPC_MXU__POOL10  = 0x16,
-    OPC_MXU__POOL11  = 0x17,
-    OPC_MXU_D32ADD   = 0x18,
-    OPC_MXU__POOL12  = 0x19,
-    /* not assigned 0x1A */
-    OPC_MXU__POOL13  = 0x1B,
-    OPC_MXU__POOL14  = 0x1C,
-    OPC_MXU_Q8ACCE   = 0x1D,
-    /* not assigned 0x1E */
-    /* not assigned 0x1F */
-    /* not assigned 0x20 */
-    /* not assigned 0x21 */
-    OPC_MXU_S8LDD    = 0x22,
-    OPC_MXU_S8STD    = 0x23,
-    OPC_MXU_S8LDI    = 0x24,
-    OPC_MXU_S8SDI    = 0x25,
-    OPC_MXU__POOL15  = 0x26,
-    OPC_MXU__POOL16  = 0x27,
-    OPC_MXU__POOL17  = 0x28,
-    /* not assigned 0x29 */
-    OPC_MXU_S16LDD   = 0x2A,
-    OPC_MXU_S16STD   = 0x2B,
-    OPC_MXU_S16LDI   = 0x2C,
-    OPC_MXU_S16SDI   = 0x2D,
-    OPC_MXU_S32M2I   = 0x2E,
-    OPC_MXU_S32I2M   = 0x2F,
-    OPC_MXU_D32SLL   = 0x30,
-    OPC_MXU_D32SLR   = 0x31,
-    OPC_MXU_D32SARL  = 0x32,
-    OPC_MXU_D32SAR   = 0x33,
-    OPC_MXU_Q16SLL   = 0x34,
-    OPC_MXU_Q16SLR   = 0x35,
-    OPC_MXU__POOL18  = 0x36,
-    OPC_MXU_Q16SAR   = 0x37,
-    OPC_MXU__POOL19  = 0x38,
-    OPC_MXU__POOL20  = 0x39,
-    OPC_MXU__POOL21  = 0x3A,
-    OPC_MXU_Q16SCOP  = 0x3B,
-    OPC_MXU_Q8MADL   = 0x3C,
-    OPC_MXU_S32SFL   = 0x3D,
-    OPC_MXU_Q8SAD    = 0x3E,
-    /* not assigned 0x3F */
-};
-
-
-/*
- * MXU pool 00
- */
-enum {
-    OPC_MXU_S32MAX   = 0x00,
-    OPC_MXU_S32MIN   = 0x01,
-    OPC_MXU_D16MAX   = 0x02,
-    OPC_MXU_D16MIN   = 0x03,
-    OPC_MXU_Q8MAX    = 0x04,
-    OPC_MXU_Q8MIN    = 0x05,
-    OPC_MXU_Q8SLT    = 0x06,
-    OPC_MXU_Q8SLTU   = 0x07,
-};
-
-/*
- * MXU pool 01
- */
-enum {
-    OPC_MXU_S32SLT   = 0x00,
-    OPC_MXU_D16SLT   = 0x01,
-    OPC_MXU_D16AVG   = 0x02,
-    OPC_MXU_D16AVGR  = 0x03,
-    OPC_MXU_Q8AVG    = 0x04,
-    OPC_MXU_Q8AVGR   = 0x05,
-    OPC_MXU_Q8ADD    = 0x07,
-};
-
-/*
- * MXU pool 02
- */
-enum {
-    OPC_MXU_S32CPS   = 0x00,
-    OPC_MXU_D16CPS   = 0x02,
-    OPC_MXU_Q8ABD    = 0x04,
-    OPC_MXU_Q16SAT   = 0x06,
-};
-
-/*
- * MXU pool 03
- */
-enum {
-    OPC_MXU_D16MULF  = 0x00,
-    OPC_MXU_D16MULE  = 0x01,
-};
-
-/*
- * MXU pool 04
- */
-enum {
-    OPC_MXU_S32LDD   = 0x00,
-    OPC_MXU_S32LDDR  = 0x01,
-};
-
-/*
- * MXU pool 05
- */
-enum {
-    OPC_MXU_S32STD   = 0x00,
-    OPC_MXU_S32STDR  = 0x01,
-};
-
-/*
- * MXU pool 06
- */
-enum {
-    OPC_MXU_S32LDDV  = 0x00,
-    OPC_MXU_S32LDDVR = 0x01,
-};
-
-/*
- * MXU pool 07
- */
-enum {
-    OPC_MXU_S32STDV  = 0x00,
-    OPC_MXU_S32STDVR = 0x01,
-};
-
-/*
- * MXU pool 08
- */
-enum {
-    OPC_MXU_S32LDI   = 0x00,
-    OPC_MXU_S32LDIR  = 0x01,
-};
-
-/*
- * MXU pool 09
- */
-enum {
-    OPC_MXU_S32SDI   = 0x00,
-    OPC_MXU_S32SDIR  = 0x01,
-};
-
-/*
- * MXU pool 10
- */
-enum {
-    OPC_MXU_S32LDIV  = 0x00,
-    OPC_MXU_S32LDIVR = 0x01,
-};
-
-/*
- * MXU pool 11
- */
-enum {
-    OPC_MXU_S32SDIV  = 0x00,
-    OPC_MXU_S32SDIVR = 0x01,
-};
-
-/*
- * MXU pool 12
- */
-enum {
-    OPC_MXU_D32ACC   = 0x00,
-    OPC_MXU_D32ACCM  = 0x01,
-    OPC_MXU_D32ASUM  = 0x02,
-};
-
-/*
- * MXU pool 13
- */
-enum {
-    OPC_MXU_Q16ACC   = 0x00,
-    OPC_MXU_Q16ACCM  = 0x01,
-    OPC_MXU_Q16ASUM  = 0x02,
-};
-
-/*
- * MXU pool 14
- */
-enum {
-    OPC_MXU_Q8ADDE   = 0x00,
-    OPC_MXU_D8SUM    = 0x01,
-    OPC_MXU_D8SUMC   = 0x02,
-};
-
-/*
- * MXU pool 15
- */
-enum {
-    OPC_MXU_S32MUL   = 0x00,
-    OPC_MXU_S32MULU  = 0x01,
-    OPC_MXU_S32EXTR  = 0x02,
-    OPC_MXU_S32EXTRV = 0x03,
-};
-
-/*
- * MXU pool 16
- */
-enum {
-    OPC_MXU_D32SARW  = 0x00,
-    OPC_MXU_S32ALN   = 0x01,
-    OPC_MXU_S32ALNI  = 0x02,
-    OPC_MXU_S32LUI   = 0x03,
-    OPC_MXU_S32NOR   = 0x04,
-    OPC_MXU_S32AND   = 0x05,
-    OPC_MXU_S32OR    = 0x06,
-    OPC_MXU_S32XOR   = 0x07,
-};
-
-/*
- * MXU pool 17
- */
-enum {
-    OPC_MXU_LXB      = 0x00,
-    OPC_MXU_LXH      = 0x01,
-    OPC_MXU_LXW      = 0x03,
-    OPC_MXU_LXBU     = 0x04,
-    OPC_MXU_LXHU     = 0x05,
-};
-
-/*
- * MXU pool 18
- */
-enum {
-    OPC_MXU_D32SLLV  = 0x00,
-    OPC_MXU_D32SLRV  = 0x01,
-    OPC_MXU_D32SARV  = 0x03,
-    OPC_MXU_Q16SLLV  = 0x04,
-    OPC_MXU_Q16SLRV  = 0x05,
-    OPC_MXU_Q16SARV  = 0x07,
-};
-
-/*
- * MXU pool 19
- */
-enum {
-    OPC_MXU_Q8MUL    = 0x00,
-    OPC_MXU_Q8MULSU  = 0x01,
-};
-
-/*
- * MXU pool 20
- */
-enum {
-    OPC_MXU_Q8MOVZ   = 0x00,
-    OPC_MXU_Q8MOVN   = 0x01,
-    OPC_MXU_D16MOVZ  = 0x02,
-    OPC_MXU_D16MOVN  = 0x03,
-    OPC_MXU_S32MOVZ  = 0x04,
-    OPC_MXU_S32MOVN  = 0x05,
-};
-
-/*
- * MXU pool 21
- */
-enum {
-    OPC_MXU_Q8MAC    = 0x00,
-    OPC_MXU_Q8MACSU  = 0x01,
-};
-
-/*
- *     Overview of the TX79-specific instruction set
- *     =============================================
- *
- * The R5900 and the C790 have 128-bit wide GPRs, where the upper 64 bits
- * are only used by the specific quadword (128-bit) LQ/SQ load/store
- * instructions and certain multimedia instructions (MMIs). These MMIs
- * configure the 128-bit data path as two 64-bit, four 32-bit, eight 16-bit
- * or sixteen 8-bit paths.
- *
- * Reference:
- *
- * The Toshiba TX System RISC TX79 Core Architecture manual,
- * https://wiki.qemu.org/File:C790.pdf
- *
- *     Three-Operand Multiply and Multiply-Add (4 instructions)
- *     --------------------------------------------------------
- * MADD    [rd,] rs, rt      Multiply/Add
- * MADDU   [rd,] rs, rt      Multiply/Add Unsigned
- * MULT    [rd,] rs, rt      Multiply (3-operand)
- * MULTU   [rd,] rs, rt      Multiply Unsigned (3-operand)
- *
- *     Multiply Instructions for Pipeline 1 (10 instructions)
- *     ------------------------------------------------------
- * MULT1   [rd,] rs, rt      Multiply Pipeline 1
- * MULTU1  [rd,] rs, rt      Multiply Unsigned Pipeline 1
- * DIV1    rs, rt            Divide Pipeline 1
- * DIVU1   rs, rt            Divide Unsigned Pipeline 1
- * MADD1   [rd,] rs, rt      Multiply-Add Pipeline 1
- * MADDU1  [rd,] rs, rt      Multiply-Add Unsigned Pipeline 1
- * MFHI1   rd                Move From HI1 Register
- * MFLO1   rd                Move From LO1 Register
- * MTHI1   rs                Move To HI1 Register
- * MTLO1   rs                Move To LO1 Register
- *
- *     Arithmetic (19 instructions)
- *     ----------------------------
- * PADDB   rd, rs, rt        Parallel Add Byte
- * PSUBB   rd, rs, rt        Parallel Subtract Byte
- * PADDH   rd, rs, rt        Parallel Add Halfword
- * PSUBH   rd, rs, rt        Parallel Subtract Halfword
- * PADDW   rd, rs, rt        Parallel Add Word
- * PSUBW   rd, rs, rt        Parallel Subtract Word
- * PADSBH  rd, rs, rt        Parallel Add/Subtract Halfword
- * PADDSB  rd, rs, rt        Parallel Add with Signed Saturation Byte
- * PSUBSB  rd, rs, rt        Parallel Subtract with Signed Saturation Byte
- * PADDSH  rd, rs, rt        Parallel Add with Signed Saturation Halfword
- * PSUBSH  rd, rs, rt        Parallel Subtract with Signed Saturation Halfword
- * PADDSW  rd, rs, rt        Parallel Add with Signed Saturation Word
- * PSUBSW  rd, rs, rt        Parallel Subtract with Signed Saturation Word
- * PADDUB  rd, rs, rt        Parallel Add with Unsigned saturation Byte
- * PSUBUB  rd, rs, rt        Parallel Subtract with Unsigned saturation Byte
- * PADDUH  rd, rs, rt        Parallel Add with Unsigned saturation Halfword
- * PSUBUH  rd, rs, rt        Parallel Subtract with Unsigned saturation Halfword
- * PADDUW  rd, rs, rt        Parallel Add with Unsigned saturation Word
- * PSUBUW  rd, rs, rt        Parallel Subtract with Unsigned saturation Word
- *
- *     Min/Max (4 instructions)
- *     ------------------------
- * PMAXH   rd, rs, rt        Parallel Maximum Halfword
- * PMINH   rd, rs, rt        Parallel Minimum Halfword
- * PMAXW   rd, rs, rt        Parallel Maximum Word
- * PMINW   rd, rs, rt        Parallel Minimum Word
- *
- *     Absolute (2 instructions)
- *     -------------------------
- * PABSH   rd, rt            Parallel Absolute Halfword
- * PABSW   rd, rt            Parallel Absolute Word
- *
- *     Logical (4 instructions)
- *     ------------------------
- * PAND    rd, rs, rt        Parallel AND
- * POR     rd, rs, rt        Parallel OR
- * PXOR    rd, rs, rt        Parallel XOR
- * PNOR    rd, rs, rt        Parallel NOR
- *
- *     Shift (9 instructions)
- *     ----------------------
- * PSLLH   rd, rt, sa        Parallel Shift Left Logical Halfword
- * PSRLH   rd, rt, sa        Parallel Shift Right Logical Halfword
- * PSRAH   rd, rt, sa        Parallel Shift Right Arithmetic Halfword
- * PSLLW   rd, rt, sa        Parallel Shift Left Logical Word
- * PSRLW   rd, rt, sa        Parallel Shift Right Logical Word
- * PSRAW   rd, rt, sa        Parallel Shift Right Arithmetic Word
- * PSLLVW  rd, rt, rs        Parallel Shift Left Logical Variable Word
- * PSRLVW  rd, rt, rs        Parallel Shift Right Logical Variable Word
- * PSRAVW  rd, rt, rs        Parallel Shift Right Arithmetic Variable Word
- *
- *     Compare (6 instructions)
- *     ------------------------
- * PCGTB   rd, rs, rt        Parallel Compare for Greater Than Byte
- * PCEQB   rd, rs, rt        Parallel Compare for Equal Byte
- * PCGTH   rd, rs, rt        Parallel Compare for Greater Than Halfword
- * PCEQH   rd, rs, rt        Parallel Compare for Equal Halfword
- * PCGTW   rd, rs, rt        Parallel Compare for Greater Than Word
- * PCEQW   rd, rs, rt        Parallel Compare for Equal Word
- *
- *     LZC (1 instruction)
- *     -------------------
- * PLZCW   rd, rs            Parallel Leading Zero or One Count Word
- *
- *     Quadword Load and Store (2 instructions)
- *     ----------------------------------------
- * LQ      rt, offset(base)  Load Quadword
- * SQ      rt, offset(base)  Store Quadword
- *
- *     Multiply and Divide (19 instructions)
- *     -------------------------------------
- * PMULTW  rd, rs, rt        Parallel Multiply Word
- * PMULTUW rd, rs, rt        Parallel Multiply Unsigned Word
- * PDIVW   rs, rt            Parallel Divide Word
- * PDIVUW  rs, rt            Parallel Divide Unsigned Word
- * PMADDW  rd, rs, rt        Parallel Multiply-Add Word
- * PMADDUW rd, rs, rt        Parallel Multiply-Add Unsigned Word
- * PMSUBW  rd, rs, rt        Parallel Multiply-Subtract Word
- * PMULTH  rd, rs, rt        Parallel Multiply Halfword
- * PMADDH  rd, rs, rt        Parallel Multiply-Add Halfword
- * PMSUBH  rd, rs, rt        Parallel Multiply-Subtract Halfword
- * PHMADH  rd, rs, rt        Parallel Horizontal Multiply-Add Halfword
- * PHMSBH  rd, rs, rt        Parallel Horizontal Multiply-Subtract Halfword
- * PDIVBW  rs, rt            Parallel Divide Broadcast Word
- * PMFHI   rd                Parallel Move From HI Register
- * PMFLO   rd                Parallel Move From LO Register
- * PMTHI   rs                Parallel Move To HI Register
- * PMTLO   rs                Parallel Move To LO Register
- * PMFHL   rd                Parallel Move From HI/LO Register
- * PMTHL   rs                Parallel Move To HI/LO Register
- *
- *     Pack/Extend (11 instructions)
- *     -----------------------------
- * PPAC5   rd, rt            Parallel Pack to 5 bits
- * PPACB   rd, rs, rt        Parallel Pack to Byte
- * PPACH   rd, rs, rt        Parallel Pack to Halfword
- * PPACW   rd, rs, rt        Parallel Pack to Word
- * PEXT5   rd, rt            Parallel Extend Upper from 5 bits
- * PEXTUB  rd, rs, rt        Parallel Extend Upper from Byte
- * PEXTLB  rd, rs, rt        Parallel Extend Lower from Byte
- * PEXTUH  rd, rs, rt        Parallel Extend Upper from Halfword
- * PEXTLH  rd, rs, rt        Parallel Extend Lower from Halfword
- * PEXTUW  rd, rs, rt        Parallel Extend Upper from Word
- * PEXTLW  rd, rs, rt        Parallel Extend Lower from Word
- *
- *     Others (16 instructions)
- *     ------------------------
- * PCPYH   rd, rt            Parallel Copy Halfword
- * PCPYLD  rd, rs, rt        Parallel Copy Lower Doubleword
- * PCPYUD  rd, rs, rt        Parallel Copy Upper Doubleword
- * PREVH   rd, rt            Parallel Reverse Halfword
- * PINTH   rd, rs, rt        Parallel Interleave Halfword
- * PINTEH  rd, rs, rt        Parallel Interleave Even Halfword
- * PEXEH   rd, rt            Parallel Exchange Even Halfword
- * PEXCH   rd, rt            Parallel Exchange Center Halfword
- * PEXEW   rd, rt            Parallel Exchange Even Word
- * PEXCW   rd, rt            Parallel Exchange Center Word
- * QFSRV   rd, rs, rt        Quadword Funnel Shift Right Variable
- * MFSA    rd                Move from Shift Amount Register
- * MTSA    rs                Move to Shift Amount Register
- * MTSAB   rs, immediate     Move Byte Count to Shift Amount Register
- * MTSAH   rs, immediate     Move Halfword Count to Shift Amount Register
- * PROT3W  rd, rt            Parallel Rotate 3 Words
- *
  *     MMI (MultiMedia Instruction) encodings
  *     ======================================
  *
@@ -1970,211 +1197,12 @@ enum {
 enum {
     MMI_OPC_MADD       = 0x00 | MMI_OPC_CLASS_MMI, /* Same as OPC_MADD */
     MMI_OPC_MADDU      = 0x01 | MMI_OPC_CLASS_MMI, /* Same as OPC_MADDU */
-    MMI_OPC_PLZCW      = 0x04 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_CLASS_MMI0 = 0x08 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_CLASS_MMI2 = 0x09 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_MFHI1      = 0x10 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MFHI */
-    MMI_OPC_MTHI1      = 0x11 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MTHI */
-    MMI_OPC_MFLO1      = 0x12 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MFLO */
-    MMI_OPC_MTLO1      = 0x13 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MTLO */
     MMI_OPC_MULT1      = 0x18 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MULT */
     MMI_OPC_MULTU1     = 0x19 | MMI_OPC_CLASS_MMI, /* Same min. as OPC_MULTU */
     MMI_OPC_DIV1       = 0x1A | MMI_OPC_CLASS_MMI, /* Same minor as OPC_DIV  */
     MMI_OPC_DIVU1      = 0x1B | MMI_OPC_CLASS_MMI, /* Same minor as OPC_DIVU */
     MMI_OPC_MADD1      = 0x20 | MMI_OPC_CLASS_MMI,
     MMI_OPC_MADDU1     = 0x21 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_CLASS_MMI1 = 0x28 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_CLASS_MMI3 = 0x29 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_PMFHL      = 0x30 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_PMTHL      = 0x31 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_PSLLH      = 0x34 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_PSRLH      = 0x36 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_PSRAH      = 0x37 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_PSLLW      = 0x3C | MMI_OPC_CLASS_MMI,
-    MMI_OPC_PSRLW      = 0x3E | MMI_OPC_CLASS_MMI,
-    MMI_OPC_PSRAW      = 0x3F | MMI_OPC_CLASS_MMI,
-};
-
-/*
- * MMI instructions with opcode field = MMI and bits 5..0 = MMI0:
- *
- *  31    26                        10     6 5      0
- * +--------+----------------------+--------+--------+
- * |   MMI  |                      |function|  MMI0  |
- * +--------+----------------------+--------+--------+
- *
- * function  bits 7..6
- *     bits |   0   |   1   |   2   |   3
- *    10..8 |   00  |   01  |   10  |   11
- *   -------+-------+-------+-------+-------
- *    0 000 | PADDW | PSUBW | PCGTW | PMAXW
- *    1 001 | PADDH | PSUBH | PCGTH | PMAXH
- *    2 010 | PADDB | PSUBB | PCGTB |   *
- *    3 011 |   *   |   *   |   *   |   *
- *    4 100 | PADDSW| PSUBSW| PEXTLW| PPACW
- *    5 101 | PADDSH| PSUBSH| PEXTLH| PPACH
- *    6 110 | PADDSB| PSUBSB| PEXTLB| PPACB
- *    7 111 |   *   |   *   | PEXT5 | PPAC5
- */
-
-#define MASK_MMI0(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
-enum {
-    MMI_OPC_0_PADDW  = (0x00 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PSUBW  = (0x01 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PCGTW  = (0x02 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PMAXW  = (0x03 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PADDH  = (0x04 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PSUBH  = (0x05 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PCGTH  = (0x06 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PMAXH  = (0x07 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PADDB  = (0x08 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PSUBB  = (0x09 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PCGTB  = (0x0A << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PADDSW = (0x10 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PSUBSW = (0x11 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PEXTLW = (0x12 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PPACW  = (0x13 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PADDSH = (0x14 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PSUBSH = (0x15 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PEXTLH = (0x16 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PPACH  = (0x17 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PADDSB = (0x18 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PSUBSB = (0x19 << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PEXTLB = (0x1A << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PPACB  = (0x1B << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PEXT5  = (0x1E << 6) | MMI_OPC_CLASS_MMI0,
-    MMI_OPC_0_PPAC5  = (0x1F << 6) | MMI_OPC_CLASS_MMI0,
-};
-
-/*
- * MMI instructions with opcode field = MMI and bits 5..0 = MMI1:
- *
- *  31    26                        10     6 5      0
- * +--------+----------------------+--------+--------+
- * |   MMI  |                      |function|  MMI1  |
- * +--------+----------------------+--------+--------+
- *
- * function  bits 7..6
- *     bits |   0   |   1   |   2   |   3
- *    10..8 |   00  |   01  |   10  |   11
- *   -------+-------+-------+-------+-------
- *    0 000 |   *   | PABSW | PCEQW | PMINW
- *    1 001 | PADSBH| PABSH | PCEQH | PMINH
- *    2 010 |   *   |   *   | PCEQB |   *
- *    3 011 |   *   |   *   |   *   |   *
- *    4 100 | PADDUW| PSUBUW| PEXTUW|   *
- *    5 101 | PADDUH| PSUBUH| PEXTUH|   *
- *    6 110 | PADDUB| PSUBUB| PEXTUB| QFSRV
- *    7 111 |   *   |   *   |   *   |   *
- */
-
-#define MASK_MMI1(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
-enum {
-    MMI_OPC_1_PABSW  = (0x01 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PCEQW  = (0x02 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PMINW  = (0x03 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PADSBH = (0x04 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PABSH  = (0x05 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PCEQH  = (0x06 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PMINH  = (0x07 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PCEQB  = (0x0A << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PADDUW = (0x10 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PSUBUW = (0x11 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PEXTUW = (0x12 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PADDUH = (0x14 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PSUBUH = (0x15 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PEXTUH = (0x16 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PADDUB = (0x18 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PSUBUB = (0x19 << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_PEXTUB = (0x1A << 6) | MMI_OPC_CLASS_MMI1,
-    MMI_OPC_1_QFSRV  = (0x1B << 6) | MMI_OPC_CLASS_MMI1,
-};
-
-/*
- * MMI instructions with opcode field = MMI and bits 5..0 = MMI2:
- *
- *  31    26                        10     6 5      0
- * +--------+----------------------+--------+--------+
- * |   MMI  |                      |function|  MMI2  |
- * +--------+----------------------+--------+--------+
- *
- * function  bits 7..6
- *     bits |   0   |   1   |   2   |   3
- *    10..8 |   00  |   01  |   10  |   11
- *   -------+-------+-------+-------+-------
- *    0 000 | PMADDW|   *   | PSLLVW| PSRLVW
- *    1 001 | PMSUBW|   *   |   *   |   *
- *    2 010 | PMFHI | PMFLO | PINTH |   *
- *    3 011 | PMULTW| PDIVW | PCPYLD|   *
- *    4 100 | PMADDH| PHMADH|  PAND |  PXOR
- *    5 101 | PMSUBH| PHMSBH|   *   |   *
- *    6 110 |   *   |   *   | PEXEH | PREVH
- *    7 111 | PMULTH| PDIVBW| PEXEW | PROT3W
- */
-
-#define MASK_MMI2(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
-enum {
-    MMI_OPC_2_PMADDW = (0x00 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PSLLVW = (0x02 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PSRLVW = (0x03 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PMSUBW = (0x04 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PMFHI  = (0x08 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PMFLO  = (0x09 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PINTH  = (0x0A << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PMULTW = (0x0C << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PDIVW  = (0x0D << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PCPYLD = (0x0E << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PMADDH = (0x10 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PHMADH = (0x11 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PAND   = (0x12 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PXOR   = (0x13 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PMSUBH = (0x14 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PHMSBH = (0x15 << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PEXEH  = (0x1A << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PREVH  = (0x1B << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PMULTH = (0x1C << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PDIVBW = (0x1D << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PEXEW  = (0x1E << 6) | MMI_OPC_CLASS_MMI2,
-    MMI_OPC_2_PROT3W = (0x1F << 6) | MMI_OPC_CLASS_MMI2,
-};
-
-/*
- * MMI instructions with opcode field = MMI and bits 5..0 = MMI3:
- *
- *  31    26                        10     6 5      0
- * +--------+----------------------+--------+--------+
- * |   MMI  |                      |function|  MMI3  |
- * +--------+----------------------+--------+--------+
- *
- * function  bits 7..6
- *     bits |   0   |   1   |   2   |   3
- *    10..8 |   00  |   01  |   10  |   11
- *   -------+-------+-------+-------+-------
- *    0 000 |PMADDUW|   *   |   *   | PSRAVW
- *    1 001 |   *   |   *   |   *   |   *
- *    2 010 | PMTHI | PMTLO | PINTEH|   *
- *    3 011 |PMULTUW| PDIVUW| PCPYUD|   *
- *    4 100 |   *   |   *   |  POR  |  PNOR
- *    5 101 |   *   |   *   |   *   |   *
- *    6 110 |   *   |   *   | PEXCH | PCPYH
- *    7 111 |   *   |   *   | PEXCW |   *
- */
-
-#define MASK_MMI3(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
-enum {
-    MMI_OPC_3_PMADDUW = (0x00 << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PSRAVW  = (0x03 << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PMTHI   = (0x08 << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PMTLO   = (0x09 << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PINTEH  = (0x0A << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PMULTUW = (0x0C << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PDIVUW  = (0x0D << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PCPYUD  = (0x0E << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_POR     = (0x12 << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PNOR    = (0x13 << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PEXCH   = (0x1A << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PCPYH   = (0x1B << 6) | MMI_OPC_CLASS_MMI3,
-    MMI_OPC_3_PEXCW   = (0x1E << 6) | MMI_OPC_CLASS_MMI3,
 };
 
 /* global register indices */
@@ -2192,12 +1220,6 @@ static TCGv_i32 hflags;
 TCGv_i32 fpu_fcr0, fpu_fcr31;
 TCGv_i64 fpu_f64[32];
 
-#if !defined(TARGET_MIPS64)
-/* MXU registers */
-static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
-static TCGv mxu_CR;
-#endif
-
 #include "exec/gen-icount.h"
 
 #define gen_helper_0e0i(name, arg) do {                           \
@@ -2267,13 +1289,6 @@ static const char * const fregnames[] = {
     "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
 };
 
-#if !defined(TARGET_MIPS64)
-static const char * const mxuregnames[] = {
-    "XR1",  "XR2",  "XR3",  "XR4",  "XR5",  "XR6",  "XR7",  "XR8",
-    "XR9",  "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "MXU_CR",
-};
-#endif
-
 /* General purpose registers moves. */
 void gen_load_gpr(TCGv t, int reg)
 {
@@ -2357,38 +1372,6 @@ static inline void gen_store_srsgpr(int from, int to)
     }
 }
 
-#if !defined(TARGET_MIPS64)
-/* MXU General purpose registers moves. */
-static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg)
-{
-    if (reg == 0) {
-        tcg_gen_movi_tl(t, 0);
-    } else if (reg <= 15) {
-        tcg_gen_mov_tl(t, mxu_gpr[reg - 1]);
-    }
-}
-
-static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg)
-{
-    if (reg > 0 && reg <= 15) {
-        tcg_gen_mov_tl(mxu_gpr[reg - 1], t);
-    }
-}
-
-/* MXU control register moves. */
-static inline void gen_load_mxu_cr(TCGv t)
-{
-    tcg_gen_mov_tl(t, mxu_CR);
-}
-
-static inline void gen_store_mxu_cr(TCGv t)
-{
-    /* TODO: Add handling of RW rules for MXU_CR. */
-    tcg_gen_mov_tl(mxu_CR, t);
-}
-#endif
-
-
 /* Tests */
 static inline void gen_save_pc(target_ulong pc)
 {
@@ -4122,31 +3105,6 @@ static void gen_shift(DisasContext *ctx, uint32_t opc,
     tcg_temp_free(t1);
 }
 
-#if defined(TARGET_MIPS64)
-/* Copy GPR to and from TX79 HI1/LO1 register. */
-static void gen_HILO1_tx79(DisasContext *ctx, uint32_t opc, int reg)
-{
-    switch (opc) {
-    case MMI_OPC_MFHI1:
-        gen_store_gpr(cpu_HI[1], reg);
-        break;
-    case MMI_OPC_MFLO1:
-        gen_store_gpr(cpu_LO[1], reg);
-        break;
-    case MMI_OPC_MTHI1:
-        gen_load_gpr(cpu_HI[1], reg);
-        break;
-    case MMI_OPC_MTLO1:
-        gen_load_gpr(cpu_LO[1], reg);
-        break;
-    default:
-        MIPS_INVAL("mfthilo1 TX79");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-#endif
-
 /* Arithmetic on HI/LO registers */
 static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg)
 {
@@ -11118,11 +10076,7 @@ static void gen_movci(DisasContext *ctx, int rd, int rs, int cc, int tf)
     tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
     tcg_gen_brcondi_i32(cond, t0, 0, l1);
     tcg_temp_free_i32(t0);
-    if (rs == 0) {
-        tcg_gen_movi_tl(cpu_gpr[rd], 0);
-    } else {
-        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-    }
+    gen_load_gpr(cpu_gpr[rd], rs);
     gen_set_label(l1);
 }
 
@@ -13040,7 +11994,7 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
     }
 }
 
-static void gen_rdhwr(DisasContext *ctx, int rt, int rd, int sel)
+void gen_rdhwr(DisasContext *ctx, int rt, int rd, int sel)
 {
     TCGv t0;
 
@@ -15452,24 +14406,15 @@ static void gen_pool16c_insn(DisasContext *ctx)
 static inline void gen_movep(DisasContext *ctx, int enc_dest, int enc_rt,
                              int enc_rs)
 {
-    int rd, rs, re, rt;
+    int rd, re;
     static const int rd_enc[] = { 5, 5, 6, 4, 4, 4, 4, 4 };
     static const int re_enc[] = { 6, 7, 7, 21, 22, 5, 6, 7 };
     static const int rs_rt_enc[] = { 0, 17, 2, 3, 16, 18, 19, 20 };
+
     rd = rd_enc[enc_dest];
     re = re_enc[enc_dest];
-    rs = rs_rt_enc[enc_rs];
-    rt = rs_rt_enc[enc_rt];
-    if (rs) {
-        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-    } else {
-        tcg_gen_movi_tl(cpu_gpr[rd], 0);
-    }
-    if (rt) {
-        tcg_gen_mov_tl(cpu_gpr[re], cpu_gpr[rt]);
-    } else {
-        tcg_gen_movi_tl(cpu_gpr[re], 0);
-    }
+    gen_load_gpr(cpu_gpr[rd], rs_rt_enc[enc_rs]);
+    gen_load_gpr(cpu_gpr[re], rs_rt_enc[enc_rt]);
 }
 
 static void gen_pool16c_r6_insn(DisasContext *ctx)
@@ -24742,2400 +23687,6 @@ static void decode_opc_special(CPUMIPSState *env, DisasContext *ctx)
 }
 
 
-#if defined(TARGET_MIPS64)
-
-/*
- *
- *           MMI (MultiMedia Interface) ASE instructions
- *           ===========================================
- */
-
-/*
- *          MMI instructions category: data communication
- *          ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *   PCPYH    PEXCH    PEXTLB   PINTH    PPACB    PEXT5    PREVH
- *   PCPYLD   PEXCW    PEXTLH   PINTEH   PPACH    PPAC5    PROT3W
- *   PCPYUD   PEXEH    PEXTLW            PPACW
- *            PEXEW    PEXTUB
- *                     PEXTUH
- *                     PEXTUW
- */
-
-/*
- *  PCPYH rd, rt
- *
- *    Parallel Copy Halfword
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+---------+---------+---------+-----------+
- *  |    MMI    |0 0 0 0 0|   rt    |   rd    |  PCPYH  |    MMI3   |
- *  +-----------+---------+---------+---------+---------+-----------+
- */
-static void gen_mmi_pcpyh(DisasContext *ctx)
-{
-    uint32_t pd, rt, rd;
-    uint32_t opcode;
-
-    opcode = ctx->opcode;
-
-    pd = extract32(opcode, 21, 5);
-    rt = extract32(opcode, 16, 5);
-    rd = extract32(opcode, 11, 5);
-
-    if (unlikely(pd != 0)) {
-        gen_reserved_instruction(ctx);
-    } else if (rd == 0) {
-        /* nop */
-    } else if (rt == 0) {
-        tcg_gen_movi_i64(cpu_gpr[rd], 0);
-        tcg_gen_movi_i64(cpu_gpr_hi[rd], 0);
-    } else {
-        TCGv_i64 t0 = tcg_temp_new();
-        TCGv_i64 t1 = tcg_temp_new();
-        uint64_t mask = (1ULL << 16) - 1;
-
-        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask);
-        tcg_gen_movi_i64(t1, 0);
-        tcg_gen_or_i64(t1, t0, t1);
-        tcg_gen_shli_i64(t0, t0, 16);
-        tcg_gen_or_i64(t1, t0, t1);
-        tcg_gen_shli_i64(t0, t0, 16);
-        tcg_gen_or_i64(t1, t0, t1);
-        tcg_gen_shli_i64(t0, t0, 16);
-        tcg_gen_or_i64(t1, t0, t1);
-
-        tcg_gen_mov_i64(cpu_gpr[rd], t1);
-
-        tcg_gen_andi_i64(t0, cpu_gpr_hi[rt], mask);
-        tcg_gen_movi_i64(t1, 0);
-        tcg_gen_or_i64(t1, t0, t1);
-        tcg_gen_shli_i64(t0, t0, 16);
-        tcg_gen_or_i64(t1, t0, t1);
-        tcg_gen_shli_i64(t0, t0, 16);
-        tcg_gen_or_i64(t1, t0, t1);
-        tcg_gen_shli_i64(t0, t0, 16);
-        tcg_gen_or_i64(t1, t0, t1);
-
-        tcg_gen_mov_i64(cpu_gpr_hi[rd], t1);
-
-        tcg_temp_free(t0);
-        tcg_temp_free(t1);
-    }
-}
-
-/*
- *  PCPYLD rd, rs, rt
- *
- *    Parallel Copy Lower Doubleword
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+---------+---------+---------+-----------+
- *  |    MMI    |   rs    |   rt    |   rd    | PCPYLD  |    MMI2   |
- *  +-----------+---------+---------+---------+---------+-----------+
- */
-static void gen_mmi_pcpyld(DisasContext *ctx)
-{
-    uint32_t rs, rt, rd;
-    uint32_t opcode;
-
-    opcode = ctx->opcode;
-
-    rs = extract32(opcode, 21, 5);
-    rt = extract32(opcode, 16, 5);
-    rd = extract32(opcode, 11, 5);
-
-    if (rd == 0) {
-        /* nop */
-    } else {
-        if (rs == 0) {
-            tcg_gen_movi_i64(cpu_gpr_hi[rd], 0);
-        } else {
-            tcg_gen_mov_i64(cpu_gpr_hi[rd], cpu_gpr[rs]);
-        }
-        if (rt == 0) {
-            tcg_gen_movi_i64(cpu_gpr[rd], 0);
-        } else {
-            if (rd != rt) {
-                tcg_gen_mov_i64(cpu_gpr[rd], cpu_gpr[rt]);
-            }
-        }
-    }
-}
-
-/*
- *  PCPYUD rd, rs, rt
- *
- *    Parallel Copy Upper Doubleword
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+---------+---------+---------+-----------+
- *  |    MMI    |   rs    |   rt    |   rd    | PCPYUD  |    MMI3   |
- *  +-----------+---------+---------+---------+---------+-----------+
- */
-static void gen_mmi_pcpyud(DisasContext *ctx)
-{
-    uint32_t rs, rt, rd;
-    uint32_t opcode;
-
-    opcode = ctx->opcode;
-
-    rs = extract32(opcode, 21, 5);
-    rt = extract32(opcode, 16, 5);
-    rd = extract32(opcode, 11, 5);
-
-    if (rd == 0) {
-        /* nop */
-    } else {
-        if (rs == 0) {
-            tcg_gen_movi_i64(cpu_gpr[rd], 0);
-        } else {
-            tcg_gen_mov_i64(cpu_gpr[rd], cpu_gpr_hi[rs]);
-        }
-        if (rt == 0) {
-            tcg_gen_movi_i64(cpu_gpr_hi[rd], 0);
-        } else {
-            if (rd != rt) {
-                tcg_gen_mov_i64(cpu_gpr_hi[rd], cpu_gpr_hi[rt]);
-            }
-        }
-    }
-}
-
-#endif
-
-
-#if !defined(TARGET_MIPS64)
-
-/* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
-#define MXU_APTN1_A    0
-#define MXU_APTN1_S    1
-
-/* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
-#define MXU_APTN2_AA    0
-#define MXU_APTN2_AS    1
-#define MXU_APTN2_SA    2
-#define MXU_APTN2_SS    3
-
-/* MXU execute add/subtract 2-bit pattern 'eptn2' */
-#define MXU_EPTN2_AA    0
-#define MXU_EPTN2_AS    1
-#define MXU_EPTN2_SA    2
-#define MXU_EPTN2_SS    3
-
-/* MXU operand getting pattern 'optn2' */
-#define MXU_OPTN2_PTN0  0
-#define MXU_OPTN2_PTN1  1
-#define MXU_OPTN2_PTN2  2
-#define MXU_OPTN2_PTN3  3
-/* alternative naming scheme for 'optn2' */
-#define MXU_OPTN2_WW    0
-#define MXU_OPTN2_LW    1
-#define MXU_OPTN2_HW    2
-#define MXU_OPTN2_XW    3
-
-/* MXU operand getting pattern 'optn3' */
-#define MXU_OPTN3_PTN0  0
-#define MXU_OPTN3_PTN1  1
-#define MXU_OPTN3_PTN2  2
-#define MXU_OPTN3_PTN3  3
-#define MXU_OPTN3_PTN4  4
-#define MXU_OPTN3_PTN5  5
-#define MXU_OPTN3_PTN6  6
-#define MXU_OPTN3_PTN7  7
-
-
-/*
- * S32I2M XRa, rb - Register move from GRF to XRF
- */
-static void gen_mxu_s32i2m(DisasContext *ctx)
-{
-    TCGv t0;
-    uint32_t XRa, Rb;
-
-    t0 = tcg_temp_new();
-
-    XRa = extract32(ctx->opcode, 6, 5);
-    Rb = extract32(ctx->opcode, 16, 5);
-
-    gen_load_gpr(t0, Rb);
-    if (XRa <= 15) {
-        gen_store_mxu_gpr(t0, XRa);
-    } else if (XRa == 16) {
-        gen_store_mxu_cr(t0);
-    }
-
-    tcg_temp_free(t0);
-}
-
-/*
- * S32M2I XRa, rb - Register move from XRF to GRF
- */
-static void gen_mxu_s32m2i(DisasContext *ctx)
-{
-    TCGv t0;
-    uint32_t XRa, Rb;
-
-    t0 = tcg_temp_new();
-
-    XRa = extract32(ctx->opcode, 6, 5);
-    Rb = extract32(ctx->opcode, 16, 5);
-
-    if (XRa <= 15) {
-        gen_load_mxu_gpr(t0, XRa);
-    } else if (XRa == 16) {
-        gen_load_mxu_cr(t0);
-    }
-
-    gen_store_gpr(t0, Rb);
-
-    tcg_temp_free(t0);
-}
-
-/*
- * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
- */
-static void gen_mxu_s8ldd(DisasContext *ctx)
-{
-    TCGv t0, t1;
-    uint32_t XRa, Rb, s8, optn3;
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-
-    XRa = extract32(ctx->opcode, 6, 4);
-    s8 = extract32(ctx->opcode, 10, 8);
-    optn3 = extract32(ctx->opcode, 18, 3);
-    Rb = extract32(ctx->opcode, 21, 5);
-
-    gen_load_gpr(t0, Rb);
-    tcg_gen_addi_tl(t0, t0, (int8_t)s8);
-
-    switch (optn3) {
-    /* XRa[7:0] = tmp8 */
-    case MXU_OPTN3_PTN0:
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-        gen_load_mxu_gpr(t0, XRa);
-        tcg_gen_deposit_tl(t0, t0, t1, 0, 8);
-        break;
-    /* XRa[15:8] = tmp8 */
-    case MXU_OPTN3_PTN1:
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-        gen_load_mxu_gpr(t0, XRa);
-        tcg_gen_deposit_tl(t0, t0, t1, 8, 8);
-        break;
-    /* XRa[23:16] = tmp8 */
-    case MXU_OPTN3_PTN2:
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-        gen_load_mxu_gpr(t0, XRa);
-        tcg_gen_deposit_tl(t0, t0, t1, 16, 8);
-        break;
-    /* XRa[31:24] = tmp8 */
-    case MXU_OPTN3_PTN3:
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-        gen_load_mxu_gpr(t0, XRa);
-        tcg_gen_deposit_tl(t0, t0, t1, 24, 8);
-        break;
-    /* XRa = {8'b0, tmp8, 8'b0, tmp8} */
-    case MXU_OPTN3_PTN4:
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-        tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
-        break;
-    /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
-    case MXU_OPTN3_PTN5:
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-        tcg_gen_shli_tl(t1, t1, 8);
-        tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
-        break;
-    /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
-    case MXU_OPTN3_PTN6:
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB);
-        tcg_gen_mov_tl(t0, t1);
-        tcg_gen_andi_tl(t0, t0, 0xFF00FFFF);
-        tcg_gen_shli_tl(t1, t1, 16);
-        tcg_gen_or_tl(t0, t0, t1);
-        break;
-    /* XRa = {tmp8, tmp8, tmp8, tmp8} */
-    case MXU_OPTN3_PTN7:
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-        tcg_gen_deposit_tl(t1, t1, t1, 8, 8);
-        tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
-        break;
-    }
-
-    gen_store_mxu_gpr(t0, XRa);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-/*
- * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
- */
-static void gen_mxu_d16mul(DisasContext *ctx)
-{
-    TCGv t0, t1, t2, t3;
-    uint32_t XRa, XRb, XRc, XRd, optn2;
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    t2 = tcg_temp_new();
-    t3 = tcg_temp_new();
-
-    XRa = extract32(ctx->opcode, 6, 4);
-    XRb = extract32(ctx->opcode, 10, 4);
-    XRc = extract32(ctx->opcode, 14, 4);
-    XRd = extract32(ctx->opcode, 18, 4);
-    optn2 = extract32(ctx->opcode, 22, 2);
-
-    gen_load_mxu_gpr(t1, XRb);
-    tcg_gen_sextract_tl(t0, t1, 0, 16);
-    tcg_gen_sextract_tl(t1, t1, 16, 16);
-    gen_load_mxu_gpr(t3, XRc);
-    tcg_gen_sextract_tl(t2, t3, 0, 16);
-    tcg_gen_sextract_tl(t3, t3, 16, 16);
-
-    switch (optn2) {
-    case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
-        tcg_gen_mul_tl(t3, t1, t3);
-        tcg_gen_mul_tl(t2, t0, t2);
-        break;
-    case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
-        tcg_gen_mul_tl(t3, t0, t3);
-        tcg_gen_mul_tl(t2, t0, t2);
-        break;
-    case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
-        tcg_gen_mul_tl(t3, t1, t3);
-        tcg_gen_mul_tl(t2, t1, t2);
-        break;
-    case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
-        tcg_gen_mul_tl(t3, t0, t3);
-        tcg_gen_mul_tl(t2, t1, t2);
-        break;
-    }
-    gen_store_mxu_gpr(t3, XRa);
-    gen_store_mxu_gpr(t2, XRd);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-    tcg_temp_free(t3);
-}
-
-/*
- * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 - Signed 16 bit pattern multiply
- *                                           and accumulate
- */
-static void gen_mxu_d16mac(DisasContext *ctx)
-{
-    TCGv t0, t1, t2, t3;
-    uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    t2 = tcg_temp_new();
-    t3 = tcg_temp_new();
-
-    XRa = extract32(ctx->opcode, 6, 4);
-    XRb = extract32(ctx->opcode, 10, 4);
-    XRc = extract32(ctx->opcode, 14, 4);
-    XRd = extract32(ctx->opcode, 18, 4);
-    optn2 = extract32(ctx->opcode, 22, 2);
-    aptn2 = extract32(ctx->opcode, 24, 2);
-
-    gen_load_mxu_gpr(t1, XRb);
-    tcg_gen_sextract_tl(t0, t1, 0, 16);
-    tcg_gen_sextract_tl(t1, t1, 16, 16);
-
-    gen_load_mxu_gpr(t3, XRc);
-    tcg_gen_sextract_tl(t2, t3, 0, 16);
-    tcg_gen_sextract_tl(t3, t3, 16, 16);
-
-    switch (optn2) {
-    case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
-        tcg_gen_mul_tl(t3, t1, t3);
-        tcg_gen_mul_tl(t2, t0, t2);
-        break;
-    case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
-        tcg_gen_mul_tl(t3, t0, t3);
-        tcg_gen_mul_tl(t2, t0, t2);
-        break;
-    case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
-        tcg_gen_mul_tl(t3, t1, t3);
-        tcg_gen_mul_tl(t2, t1, t2);
-        break;
-    case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
-        tcg_gen_mul_tl(t3, t0, t3);
-        tcg_gen_mul_tl(t2, t1, t2);
-        break;
-    }
-    gen_load_mxu_gpr(t0, XRa);
-    gen_load_mxu_gpr(t1, XRd);
-
-    switch (aptn2) {
-    case MXU_APTN2_AA:
-        tcg_gen_add_tl(t3, t0, t3);
-        tcg_gen_add_tl(t2, t1, t2);
-        break;
-    case MXU_APTN2_AS:
-        tcg_gen_add_tl(t3, t0, t3);
-        tcg_gen_sub_tl(t2, t1, t2);
-        break;
-    case MXU_APTN2_SA:
-        tcg_gen_sub_tl(t3, t0, t3);
-        tcg_gen_add_tl(t2, t1, t2);
-        break;
-    case MXU_APTN2_SS:
-        tcg_gen_sub_tl(t3, t0, t3);
-        tcg_gen_sub_tl(t2, t1, t2);
-        break;
-    }
-    gen_store_mxu_gpr(t3, XRa);
-    gen_store_mxu_gpr(t2, XRd);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-    tcg_temp_free(t3);
-}
-
-/*
- * Q8MUL   XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply
- * Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply
- */
-static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
-{
-    TCGv t0, t1, t2, t3, t4, t5, t6, t7;
-    uint32_t XRa, XRb, XRc, XRd, sel;
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    t2 = tcg_temp_new();
-    t3 = tcg_temp_new();
-    t4 = tcg_temp_new();
-    t5 = tcg_temp_new();
-    t6 = tcg_temp_new();
-    t7 = tcg_temp_new();
-
-    XRa = extract32(ctx->opcode, 6, 4);
-    XRb = extract32(ctx->opcode, 10, 4);
-    XRc = extract32(ctx->opcode, 14, 4);
-    XRd = extract32(ctx->opcode, 18, 4);
-    sel = extract32(ctx->opcode, 22, 2);
-
-    gen_load_mxu_gpr(t3, XRb);
-    gen_load_mxu_gpr(t7, XRc);
-
-    if (sel == 0x2) {
-        /* Q8MULSU */
-        tcg_gen_ext8s_tl(t0, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8s_tl(t1, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8s_tl(t2, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8s_tl(t3, t3);
-    } else {
-        /* Q8MUL */
-        tcg_gen_ext8u_tl(t0, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8u_tl(t1, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8u_tl(t2, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8u_tl(t3, t3);
-    }
-
-    tcg_gen_ext8u_tl(t4, t7);
-    tcg_gen_shri_tl(t7, t7, 8);
-    tcg_gen_ext8u_tl(t5, t7);
-    tcg_gen_shri_tl(t7, t7, 8);
-    tcg_gen_ext8u_tl(t6, t7);
-    tcg_gen_shri_tl(t7, t7, 8);
-    tcg_gen_ext8u_tl(t7, t7);
-
-    tcg_gen_mul_tl(t0, t0, t4);
-    tcg_gen_mul_tl(t1, t1, t5);
-    tcg_gen_mul_tl(t2, t2, t6);
-    tcg_gen_mul_tl(t3, t3, t7);
-
-    tcg_gen_andi_tl(t0, t0, 0xFFFF);
-    tcg_gen_andi_tl(t1, t1, 0xFFFF);
-    tcg_gen_andi_tl(t2, t2, 0xFFFF);
-    tcg_gen_andi_tl(t3, t3, 0xFFFF);
-
-    tcg_gen_shli_tl(t1, t1, 16);
-    tcg_gen_shli_tl(t3, t3, 16);
-
-    tcg_gen_or_tl(t0, t0, t1);
-    tcg_gen_or_tl(t1, t2, t3);
-
-    gen_store_mxu_gpr(t0, XRd);
-    gen_store_mxu_gpr(t1, XRa);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-    tcg_temp_free(t3);
-    tcg_temp_free(t4);
-    tcg_temp_free(t5);
-    tcg_temp_free(t6);
-    tcg_temp_free(t7);
-}
-
-/*
- * S32LDD  XRa, Rb, S12 - Load a word from memory to XRF
- * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF, reversed byte seq.
- */
-static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
-{
-    TCGv t0, t1;
-    uint32_t XRa, Rb, s12, sel;
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-
-    XRa = extract32(ctx->opcode, 6, 4);
-    s12 = extract32(ctx->opcode, 10, 10);
-    sel = extract32(ctx->opcode, 20, 1);
-    Rb = extract32(ctx->opcode, 21, 5);
-
-    gen_load_gpr(t0, Rb);
-
-    tcg_gen_movi_tl(t1, s12);
-    tcg_gen_shli_tl(t1, t1, 2);
-    if (s12 & 0x200) {
-        tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
-    }
-    tcg_gen_add_tl(t1, t0, t1);
-    tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_SL);
-
-    if (sel == 1) {
-        /* S32LDDR */
-        tcg_gen_bswap32_tl(t1, t1);
-    }
-    gen_store_mxu_gpr(t1, XRa);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-
-/*
- *                 MXU instruction category: logic
- *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *               S32NOR    S32AND    S32OR    S32XOR
- */
-
-/*
- *  S32NOR XRa, XRb, XRc
- *    Update XRa with the result of logical bitwise 'nor' operation
- *    applied to the content of XRb and XRc.
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0| opc |  XRc  |  XRb  |  XRa  |MXU__POOL16|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- */
-static void gen_mxu_S32NOR(DisasContext *ctx)
-{
-    uint32_t pad, XRc, XRb, XRa;
-
-    pad = extract32(ctx->opcode, 21, 5);
-    XRc = extract32(ctx->opcode, 14, 4);
-    XRb = extract32(ctx->opcode, 10, 4);
-    XRa = extract32(ctx->opcode,  6, 4);
-
-    if (unlikely(pad != 0)) {
-        /* opcode padding incorrect -> do nothing */
-    } else if (unlikely(XRa == 0)) {
-        /* destination is zero register -> do nothing */
-    } else if (unlikely((XRb == 0) && (XRc == 0))) {
-        /* both operands zero registers -> just set destination to all 1s */
-        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF);
-    } else if (unlikely(XRb == 0)) {
-        /* XRb zero register -> just set destination to the negation of XRc */
-        tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
-    } else if (unlikely(XRc == 0)) {
-        /* XRa zero register -> just set destination to the negation of XRb */
-        tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-    } else if (unlikely(XRb == XRc)) {
-        /* both operands same -> just set destination to the negation of XRb */
-        tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-    } else {
-        /* the most general case */
-        tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
-    }
-}
-
-/*
- *  S32AND XRa, XRb, XRc
- *    Update XRa with the result of logical bitwise 'and' operation
- *    applied to the content of XRb and XRc.
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0| opc |  XRc  |  XRb  |  XRa  |MXU__POOL16|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- */
-static void gen_mxu_S32AND(DisasContext *ctx)
-{
-    uint32_t pad, XRc, XRb, XRa;
-
-    pad = extract32(ctx->opcode, 21, 5);
-    XRc = extract32(ctx->opcode, 14, 4);
-    XRb = extract32(ctx->opcode, 10, 4);
-    XRa = extract32(ctx->opcode,  6, 4);
-
-    if (unlikely(pad != 0)) {
-        /* opcode padding incorrect -> do nothing */
-    } else if (unlikely(XRa == 0)) {
-        /* destination is zero register -> do nothing */
-    } else if (unlikely((XRb == 0) || (XRc == 0))) {
-        /* one of operands zero register -> just set destination to all 0s */
-        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
-    } else if (unlikely(XRb == XRc)) {
-        /* both operands same -> just set destination to one of them */
-        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-    } else {
-        /* the most general case */
-        tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
-    }
-}
-
-/*
- *  S32OR XRa, XRb, XRc
- *    Update XRa with the result of logical bitwise 'or' operation
- *    applied to the content of XRb and XRc.
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0| opc |  XRc  |  XRb  |  XRa  |MXU__POOL16|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- */
-static void gen_mxu_S32OR(DisasContext *ctx)
-{
-    uint32_t pad, XRc, XRb, XRa;
-
-    pad = extract32(ctx->opcode, 21, 5);
-    XRc = extract32(ctx->opcode, 14, 4);
-    XRb = extract32(ctx->opcode, 10, 4);
-    XRa = extract32(ctx->opcode,  6, 4);
-
-    if (unlikely(pad != 0)) {
-        /* opcode padding incorrect -> do nothing */
-    } else if (unlikely(XRa == 0)) {
-        /* destination is zero register -> do nothing */
-    } else if (unlikely((XRb == 0) && (XRc == 0))) {
-        /* both operands zero registers -> just set destination to all 0s */
-        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
-    } else if (unlikely(XRb == 0)) {
-        /* XRb zero register -> just set destination to the content of XRc */
-        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
-    } else if (unlikely(XRc == 0)) {
-        /* XRc zero register -> just set destination to the content of XRb */
-        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-    } else if (unlikely(XRb == XRc)) {
-        /* both operands same -> just set destination to one of them */
-        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-    } else {
-        /* the most general case */
-        tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
-    }
-}
-
-/*
- *  S32XOR XRa, XRb, XRc
- *    Update XRa with the result of logical bitwise 'xor' operation
- *    applied to the content of XRb and XRc.
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0| opc |  XRc  |  XRb  |  XRa  |MXU__POOL16|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- */
-static void gen_mxu_S32XOR(DisasContext *ctx)
-{
-    uint32_t pad, XRc, XRb, XRa;
-
-    pad = extract32(ctx->opcode, 21, 5);
-    XRc = extract32(ctx->opcode, 14, 4);
-    XRb = extract32(ctx->opcode, 10, 4);
-    XRa = extract32(ctx->opcode,  6, 4);
-
-    if (unlikely(pad != 0)) {
-        /* opcode padding incorrect -> do nothing */
-    } else if (unlikely(XRa == 0)) {
-        /* destination is zero register -> do nothing */
-    } else if (unlikely((XRb == 0) && (XRc == 0))) {
-        /* both operands zero registers -> just set destination to all 0s */
-        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
-    } else if (unlikely(XRb == 0)) {
-        /* XRb zero register -> just set destination to the content of XRc */
-        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
-    } else if (unlikely(XRc == 0)) {
-        /* XRc zero register -> just set destination to the content of XRb */
-        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-    } else if (unlikely(XRb == XRc)) {
-        /* both operands same -> just set destination to all 0s */
-        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
-    } else {
-        /* the most general case */
-        tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
-    }
-}
-
-
-/*
- *                   MXU instruction category max/min
- *                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *                     S32MAX     D16MAX     Q8MAX
- *                     S32MIN     D16MIN     Q8MIN
- */
-
-/*
- *  S32MAX XRa, XRb, XRc
- *    Update XRa with the maximum of signed 32-bit integers contained
- *    in XRb and XRc.
- *
- *  S32MIN XRa, XRb, XRc
- *    Update XRa with the minimum of signed 32-bit integers contained
- *    in XRb and XRc.
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0| opc |  XRc  |  XRb  |  XRa  |MXU__POOL00|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- */
-static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx)
-{
-    uint32_t pad, opc, XRc, XRb, XRa;
-
-    pad = extract32(ctx->opcode, 21, 5);
-    opc = extract32(ctx->opcode, 18, 3);
-    XRc = extract32(ctx->opcode, 14, 4);
-    XRb = extract32(ctx->opcode, 10, 4);
-    XRa = extract32(ctx->opcode,  6, 4);
-
-    if (unlikely(pad != 0)) {
-        /* opcode padding incorrect -> do nothing */
-    } else if (unlikely(XRa == 0)) {
-        /* destination is zero register -> do nothing */
-    } else if (unlikely((XRb == 0) && (XRc == 0))) {
-        /* both operands zero registers -> just set destination to zero */
-        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
-    } else if (unlikely((XRb == 0) || (XRc == 0))) {
-        /* exactly one operand is zero register - find which one is not...*/
-        uint32_t XRx = XRb ? XRb : XRc;
-        /* ...and do max/min operation with one operand 0 */
-        if (opc == OPC_MXU_S32MAX) {
-            tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
-        } else {
-            tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
-        }
-    } else if (unlikely(XRb == XRc)) {
-        /* both operands same -> just set destination to one of them */
-        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-    } else {
-        /* the most general case */
-        if (opc == OPC_MXU_S32MAX) {
-            tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
-                                               mxu_gpr[XRc - 1]);
-        } else {
-            tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
-                                               mxu_gpr[XRc - 1]);
-        }
-    }
-}
-
-/*
- *  D16MAX
- *    Update XRa with the 16-bit-wise maximums of signed integers
- *    contained in XRb and XRc.
- *
- *  D16MIN
- *    Update XRa with the 16-bit-wise minimums of signed integers
- *    contained in XRb and XRc.
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0| opc |  XRc  |  XRb  |  XRa  |MXU__POOL00|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- */
-static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
-{
-    uint32_t pad, opc, XRc, XRb, XRa;
-
-    pad = extract32(ctx->opcode, 21, 5);
-    opc = extract32(ctx->opcode, 18, 3);
-    XRc = extract32(ctx->opcode, 14, 4);
-    XRb = extract32(ctx->opcode, 10, 4);
-    XRa = extract32(ctx->opcode,  6, 4);
-
-    if (unlikely(pad != 0)) {
-        /* opcode padding incorrect -> do nothing */
-    } else if (unlikely(XRc == 0)) {
-        /* destination is zero register -> do nothing */
-    } else if (unlikely((XRb == 0) && (XRa == 0))) {
-        /* both operands zero registers -> just set destination to zero */
-        tcg_gen_movi_i32(mxu_gpr[XRc - 1], 0);
-    } else if (unlikely((XRb == 0) || (XRa == 0))) {
-        /* exactly one operand is zero register - find which one is not...*/
-        uint32_t XRx = XRb ? XRb : XRc;
-        /* ...and do half-word-wise max/min with one operand 0 */
-        TCGv_i32 t0 = tcg_temp_new();
-        TCGv_i32 t1 = tcg_const_i32(0);
-
-        /* the left half-word first */
-        tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
-        if (opc == OPC_MXU_D16MAX) {
-            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
-        } else {
-            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
-        }
-
-        /* the right half-word */
-        tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF);
-        /* move half-words to the leftmost position */
-        tcg_gen_shli_i32(t0, t0, 16);
-        /* t0 will be max/min of t0 and t1 */
-        if (opc == OPC_MXU_D16MAX) {
-            tcg_gen_smax_i32(t0, t0, t1);
-        } else {
-            tcg_gen_smin_i32(t0, t0, t1);
-        }
-        /* return resulting half-words to its original position */
-        tcg_gen_shri_i32(t0, t0, 16);
-        /* finally update the destination */
-        tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
-
-        tcg_temp_free(t1);
-        tcg_temp_free(t0);
-    } else if (unlikely(XRb == XRc)) {
-        /* both operands same -> just set destination to one of them */
-        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-    } else {
-        /* the most general case */
-        TCGv_i32 t0 = tcg_temp_new();
-        TCGv_i32 t1 = tcg_temp_new();
-
-        /* the left half-word first */
-        tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
-        tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
-        if (opc == OPC_MXU_D16MAX) {
-            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
-        } else {
-            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
-        }
-
-        /* the right half-word */
-        tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
-        tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF);
-        /* move half-words to the leftmost position */
-        tcg_gen_shli_i32(t0, t0, 16);
-        tcg_gen_shli_i32(t1, t1, 16);
-        /* t0 will be max/min of t0 and t1 */
-        if (opc == OPC_MXU_D16MAX) {
-            tcg_gen_smax_i32(t0, t0, t1);
-        } else {
-            tcg_gen_smin_i32(t0, t0, t1);
-        }
-        /* return resulting half-words to its original position */
-        tcg_gen_shri_i32(t0, t0, 16);
-        /* finally update the destination */
-        tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
-
-        tcg_temp_free(t1);
-        tcg_temp_free(t0);
-    }
-}
-
-/*
- *  Q8MAX
- *    Update XRa with the 8-bit-wise maximums of signed integers
- *    contained in XRb and XRc.
- *
- *  Q8MIN
- *    Update XRa with the 8-bit-wise minimums of signed integers
- *    contained in XRb and XRc.
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0| opc |  XRc  |  XRb  |  XRa  |MXU__POOL00|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- */
-static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
-{
-    uint32_t pad, opc, XRc, XRb, XRa;
-
-    pad = extract32(ctx->opcode, 21, 5);
-    opc = extract32(ctx->opcode, 18, 3);
-    XRc = extract32(ctx->opcode, 14, 4);
-    XRb = extract32(ctx->opcode, 10, 4);
-    XRa = extract32(ctx->opcode,  6, 4);
-
-    if (unlikely(pad != 0)) {
-        /* opcode padding incorrect -> do nothing */
-    } else if (unlikely(XRa == 0)) {
-        /* destination is zero register -> do nothing */
-    } else if (unlikely((XRb == 0) && (XRc == 0))) {
-        /* both operands zero registers -> just set destination to zero */
-        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
-    } else if (unlikely((XRb == 0) || (XRc == 0))) {
-        /* exactly one operand is zero register - make it be the first...*/
-        uint32_t XRx = XRb ? XRb : XRc;
-        /* ...and do byte-wise max/min with one operand 0 */
-        TCGv_i32 t0 = tcg_temp_new();
-        TCGv_i32 t1 = tcg_const_i32(0);
-        int32_t i;
-
-        /* the leftmost byte (byte 3) first */
-        tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
-        if (opc == OPC_MXU_Q8MAX) {
-            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
-        } else {
-            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
-        }
-
-        /* bytes 2, 1, 0 */
-        for (i = 2; i >= 0; i--) {
-            /* extract the byte */
-            tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i));
-            /* move the byte to the leftmost position */
-            tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
-            /* t0 will be max/min of t0 and t1 */
-            if (opc == OPC_MXU_Q8MAX) {
-                tcg_gen_smax_i32(t0, t0, t1);
-            } else {
-                tcg_gen_smin_i32(t0, t0, t1);
-            }
-            /* return resulting byte to its original position */
-            tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
-            /* finally update the destination */
-            tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
-        }
-
-        tcg_temp_free(t1);
-        tcg_temp_free(t0);
-    } else if (unlikely(XRb == XRc)) {
-        /* both operands same -> just set destination to one of them */
-        tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-    } else {
-        /* the most general case */
-        TCGv_i32 t0 = tcg_temp_new();
-        TCGv_i32 t1 = tcg_temp_new();
-        int32_t i;
-
-        /* the leftmost bytes (bytes 3) first */
-        tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
-        tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
-        if (opc == OPC_MXU_Q8MAX) {
-            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
-        } else {
-            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
-        }
-
-        /* bytes 2, 1, 0 */
-        for (i = 2; i >= 0; i--) {
-            /* extract corresponding bytes */
-            tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i));
-            tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i));
-            /* move the bytes to the leftmost position */
-            tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
-            tcg_gen_shli_i32(t1, t1, 8 * (3 - i));
-            /* t0 will be max/min of t0 and t1 */
-            if (opc == OPC_MXU_Q8MAX) {
-                tcg_gen_smax_i32(t0, t0, t1);
-            } else {
-                tcg_gen_smin_i32(t0, t0, t1);
-            }
-            /* return resulting byte to its original position */
-            tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
-            /* finally update the destination */
-            tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
-        }
-
-        tcg_temp_free(t1);
-        tcg_temp_free(t0);
-    }
-}
-
-
-/*
- *                 MXU instruction category: align
- *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *                       S32ALN     S32ALNI
- */
-
-/*
- *  S32ALNI XRc, XRb, XRa, optn3
- *    Arrange bytes from XRb and XRc according to one of five sets of
- *    rules determined by optn3, and place the result in XRa.
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+-----+---+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |optn3|0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL16|
- *  +-----------+-----+---+-----+-------+-------+-------+-----------+
- *
- */
-static void gen_mxu_S32ALNI(DisasContext *ctx)
-{
-    uint32_t optn3, pad, XRc, XRb, XRa;
-
-    optn3 = extract32(ctx->opcode,  23, 3);
-    pad   = extract32(ctx->opcode,  21, 2);
-    XRc   = extract32(ctx->opcode, 14, 4);
-    XRb   = extract32(ctx->opcode, 10, 4);
-    XRa   = extract32(ctx->opcode,  6, 4);
-
-    if (unlikely(pad != 0)) {
-        /* opcode padding incorrect -> do nothing */
-    } else if (unlikely(XRa == 0)) {
-        /* destination is zero register -> do nothing */
-    } else if (unlikely((XRb == 0) && (XRc == 0))) {
-        /* both operands zero registers -> just set destination to all 0s */
-        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
-    } else if (unlikely(XRb == 0)) {
-        /* XRb zero register -> just appropriatelly shift XRc into XRa */
-        switch (optn3) {
-        case MXU_OPTN3_PTN0:
-            tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
-            break;
-        case MXU_OPTN3_PTN1:
-        case MXU_OPTN3_PTN2:
-        case MXU_OPTN3_PTN3:
-            tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1],
-                             8 * (4 - optn3));
-            break;
-        case MXU_OPTN3_PTN4:
-            tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
-            break;
-        }
-    } else if (unlikely(XRc == 0)) {
-        /* XRc zero register -> just appropriatelly shift XRb into XRa */
-        switch (optn3) {
-        case MXU_OPTN3_PTN0:
-            tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-            break;
-        case MXU_OPTN3_PTN1:
-        case MXU_OPTN3_PTN2:
-        case MXU_OPTN3_PTN3:
-            tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
-            break;
-        case MXU_OPTN3_PTN4:
-            tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
-            break;
-        }
-    } else if (unlikely(XRb == XRc)) {
-        /* both operands same -> just rotation or moving from any of them */
-        switch (optn3) {
-        case MXU_OPTN3_PTN0:
-        case MXU_OPTN3_PTN4:
-            tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-            break;
-        case MXU_OPTN3_PTN1:
-        case MXU_OPTN3_PTN2:
-        case MXU_OPTN3_PTN3:
-            tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
-            break;
-        }
-    } else {
-        /* the most general case */
-        switch (optn3) {
-        case MXU_OPTN3_PTN0:
-            {
-                /*                                         */
-                /*         XRb                XRc          */
-                /*  +---------------+                      */
-                /*  | A   B   C   D |    E   F   G   H     */
-                /*  +-------+-------+                      */
-                /*          |                              */
-                /*         XRa                             */
-                /*                                         */
-
-                tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
-            }
-            break;
-        case MXU_OPTN3_PTN1:
-            {
-                /*                                         */
-                /*         XRb                 XRc         */
-                /*      +-------------------+              */
-                /*    A | B   C   D       E | F   G   H    */
-                /*      +---------+---------+              */
-                /*                |                        */
-                /*               XRa                       */
-                /*                                         */
-
-                TCGv_i32 t0 = tcg_temp_new();
-                TCGv_i32 t1 = tcg_temp_new();
-
-                tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF);
-                tcg_gen_shli_i32(t0, t0, 8);
-
-                tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
-                tcg_gen_shri_i32(t1, t1, 24);
-
-                tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
-
-                tcg_temp_free(t1);
-                tcg_temp_free(t0);
-            }
-            break;
-        case MXU_OPTN3_PTN2:
-            {
-                /*                                         */
-                /*         XRb                 XRc         */
-                /*          +-------------------+          */
-                /*    A   B | C   D       E   F | G   H    */
-                /*          +---------+---------+          */
-                /*                    |                    */
-                /*                   XRa                   */
-                /*                                         */
-
-                TCGv_i32 t0 = tcg_temp_new();
-                TCGv_i32 t1 = tcg_temp_new();
-
-                tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
-                tcg_gen_shli_i32(t0, t0, 16);
-
-                tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
-                tcg_gen_shri_i32(t1, t1, 16);
-
-                tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
-
-                tcg_temp_free(t1);
-                tcg_temp_free(t0);
-            }
-            break;
-        case MXU_OPTN3_PTN3:
-            {
-                /*                                         */
-                /*         XRb                 XRc         */
-                /*              +-------------------+      */
-                /*    A   B   C | D       E   F   G | H    */
-                /*              +---------+---------+      */
-                /*                        |                */
-                /*                       XRa               */
-                /*                                         */
-
-                TCGv_i32 t0 = tcg_temp_new();
-                TCGv_i32 t1 = tcg_temp_new();
-
-                tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF);
-                tcg_gen_shli_i32(t0, t0, 24);
-
-                tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00);
-                tcg_gen_shri_i32(t1, t1, 8);
-
-                tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
-
-                tcg_temp_free(t1);
-                tcg_temp_free(t0);
-            }
-            break;
-        case MXU_OPTN3_PTN4:
-            {
-                /*                                         */
-                /*         XRb                 XRc         */
-                /*                     +---------------+   */
-                /*    A   B   C   D    | E   F   G   H |   */
-                /*                     +-------+-------+   */
-                /*                             |           */
-                /*                            XRa          */
-                /*                                         */
-
-                tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
-            }
-            break;
-        }
-    }
-}
-
-
-/*
- * Decoding engine for MXU
- * =======================
- */
-
-/*
- *
- * Decode MXU pool00
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL00|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool00(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 18, 3);
-
-    switch (opcode) {
-    case OPC_MXU_S32MAX:
-    case OPC_MXU_S32MIN:
-        gen_mxu_S32MAX_S32MIN(ctx);
-        break;
-    case OPC_MXU_D16MAX:
-    case OPC_MXU_D16MIN:
-        gen_mxu_D16MAX_D16MIN(ctx);
-        break;
-    case OPC_MXU_Q8MAX:
-    case OPC_MXU_Q8MIN:
-        gen_mxu_Q8MAX_Q8MIN(ctx);
-        break;
-    case OPC_MXU_Q8SLT:
-        /* TODO: Implement emulation of Q8SLT instruction. */
-        MIPS_INVAL("OPC_MXU_Q8SLT");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q8SLTU:
-        /* TODO: Implement emulation of Q8SLTU instruction. */
-        MIPS_INVAL("OPC_MXU_Q8SLTU");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool01
- *
- *  S32SLT, D16SLT, D16AVG, D16AVGR, Q8AVG, Q8AVGR:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL01|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *
- *  Q8ADD:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---+-----+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |en2|0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL01|
- *  +-----------+---+-----+-----+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool01(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 18, 3);
-
-    switch (opcode) {
-    case OPC_MXU_S32SLT:
-        /* TODO: Implement emulation of S32SLT instruction. */
-        MIPS_INVAL("OPC_MXU_S32SLT");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D16SLT:
-        /* TODO: Implement emulation of D16SLT instruction. */
-        MIPS_INVAL("OPC_MXU_D16SLT");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D16AVG:
-        /* TODO: Implement emulation of D16AVG instruction. */
-        MIPS_INVAL("OPC_MXU_D16AVG");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D16AVGR:
-        /* TODO: Implement emulation of D16AVGR instruction. */
-        MIPS_INVAL("OPC_MXU_D16AVGR");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q8AVG:
-        /* TODO: Implement emulation of Q8AVG instruction. */
-        MIPS_INVAL("OPC_MXU_Q8AVG");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q8AVGR:
-        /* TODO: Implement emulation of Q8AVGR instruction. */
-        MIPS_INVAL("OPC_MXU_Q8AVGR");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q8ADD:
-        /* TODO: Implement emulation of Q8ADD instruction. */
-        MIPS_INVAL("OPC_MXU_Q8ADD");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool02
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL02|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool02(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 18, 3);
-
-    switch (opcode) {
-    case OPC_MXU_S32CPS:
-        /* TODO: Implement emulation of S32CPS instruction. */
-        MIPS_INVAL("OPC_MXU_S32CPS");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D16CPS:
-        /* TODO: Implement emulation of D16CPS instruction. */
-        MIPS_INVAL("OPC_MXU_D16CPS");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q8ABD:
-        /* TODO: Implement emulation of Q8ABD instruction. */
-        MIPS_INVAL("OPC_MXU_Q8ABD");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q16SAT:
-        /* TODO: Implement emulation of Q16SAT instruction. */
-        MIPS_INVAL("OPC_MXU_Q16SAT");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool03
- *
- *  D16MULF:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *  |  SPECIAL2 |x x|on2|0 0 0 0|  XRc  |  XRb  |  XRa  |MXU__POOL03|
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *
- *  D16MULE:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *  |  SPECIAL2 |x x|on2|   Xd  |  XRc  |  XRb  |  XRa  |MXU__POOL03|
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool03(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 24, 2);
-
-    switch (opcode) {
-    case OPC_MXU_D16MULF:
-        /* TODO: Implement emulation of D16MULF instruction. */
-        MIPS_INVAL("OPC_MXU_D16MULF");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D16MULE:
-        /* TODO: Implement emulation of D16MULE instruction. */
-        MIPS_INVAL("OPC_MXU_D16MULE");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool04
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-+-------------------+-------+-----------+
- *  |  SPECIAL2 |    rb   |x|        s12        |  XRa  |MXU__POOL04|
- *  +-----------+---------+-+-------------------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool04(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 20, 1);
-
-    switch (opcode) {
-    case OPC_MXU_S32LDD:
-    case OPC_MXU_S32LDDR:
-        gen_mxu_s32ldd_s32lddr(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool05
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-+-------------------+-------+-----------+
- *  |  SPECIAL2 |    rb   |x|        s12        |  XRa  |MXU__POOL05|
- *  +-----------+---------+-+-------------------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool05(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 20, 1);
-
-    switch (opcode) {
-    case OPC_MXU_S32STD:
-        /* TODO: Implement emulation of S32STD instruction. */
-        MIPS_INVAL("OPC_MXU_S32STD");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32STDR:
-        /* TODO: Implement emulation of S32STDR instruction. */
-        MIPS_INVAL("OPC_MXU_S32STDR");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool06
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *  |  SPECIAL2 |    rb   |    rc   |st2|x x x x|  XRa  |MXU__POOL06|
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool06(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 10, 4);
-
-    switch (opcode) {
-    case OPC_MXU_S32LDDV:
-        /* TODO: Implement emulation of S32LDDV instruction. */
-        MIPS_INVAL("OPC_MXU_S32LDDV");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32LDDVR:
-        /* TODO: Implement emulation of S32LDDVR instruction. */
-        MIPS_INVAL("OPC_MXU_S32LDDVR");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool07
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *  |  SPECIAL2 |    rb   |    rc   |st2|x x x x|  XRa  |MXU__POOL07|
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool07(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 10, 4);
-
-    switch (opcode) {
-    case OPC_MXU_S32STDV:
-        /* TODO: Implement emulation of S32TDV instruction. */
-        MIPS_INVAL("OPC_MXU_S32TDV");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32STDVR:
-        /* TODO: Implement emulation of S32TDVR instruction. */
-        MIPS_INVAL("OPC_MXU_S32TDVR");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool08
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-+-------------------+-------+-----------+
- *  |  SPECIAL2 |    rb   |x|        s12        |  XRa  |MXU__POOL08|
- *  +-----------+---------+-+-------------------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool08(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 20, 1);
-
-    switch (opcode) {
-    case OPC_MXU_S32LDI:
-        /* TODO: Implement emulation of S32LDI instruction. */
-        MIPS_INVAL("OPC_MXU_S32LDI");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32LDIR:
-        /* TODO: Implement emulation of S32LDIR instruction. */
-        MIPS_INVAL("OPC_MXU_S32LDIR");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool09
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-+-------------------+-------+-----------+
- *  |  SPECIAL2 |    rb   |x|        s12        |  XRa  |MXU__POOL09|
- *  +-----------+---------+-+-------------------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool09(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 5, 0);
-
-    switch (opcode) {
-    case OPC_MXU_S32SDI:
-        /* TODO: Implement emulation of S32SDI instruction. */
-        MIPS_INVAL("OPC_MXU_S32SDI");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32SDIR:
-        /* TODO: Implement emulation of S32SDIR instruction. */
-        MIPS_INVAL("OPC_MXU_S32SDIR");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool10
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *  |  SPECIAL2 |    rb   |    rc   |st2|x x x x|  XRa  |MXU__POOL10|
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool10(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 5, 0);
-
-    switch (opcode) {
-    case OPC_MXU_S32LDIV:
-        /* TODO: Implement emulation of S32LDIV instruction. */
-        MIPS_INVAL("OPC_MXU_S32LDIV");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32LDIVR:
-        /* TODO: Implement emulation of S32LDIVR instruction. */
-        MIPS_INVAL("OPC_MXU_S32LDIVR");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool11
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *  |  SPECIAL2 |    rb   |    rc   |st2|x x x x|  XRa  |MXU__POOL11|
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool11(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 10, 4);
-
-    switch (opcode) {
-    case OPC_MXU_S32SDIV:
-        /* TODO: Implement emulation of S32SDIV instruction. */
-        MIPS_INVAL("OPC_MXU_S32SDIV");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32SDIVR:
-        /* TODO: Implement emulation of S32SDIVR instruction. */
-        MIPS_INVAL("OPC_MXU_S32SDIVR");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool12
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *  |  SPECIAL2 |an2|x x|   Xd  |  XRc  |  XRb  |  XRa  |MXU__POOL12|
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool12(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 22, 2);
-
-    switch (opcode) {
-    case OPC_MXU_D32ACC:
-        /* TODO: Implement emulation of D32ACC instruction. */
-        MIPS_INVAL("OPC_MXU_D32ACC");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D32ACCM:
-        /* TODO: Implement emulation of D32ACCM instruction. */
-        MIPS_INVAL("OPC_MXU_D32ACCM");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D32ASUM:
-        /* TODO: Implement emulation of D32ASUM instruction. */
-        MIPS_INVAL("OPC_MXU_D32ASUM");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool13
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *  |  SPECIAL2 |en2|x x|0 0 0 0|  XRc  |  XRb  |  XRa  |MXU__POOL13|
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool13(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 22, 2);
-
-    switch (opcode) {
-    case OPC_MXU_Q16ACC:
-        /* TODO: Implement emulation of Q16ACC instruction. */
-        MIPS_INVAL("OPC_MXU_Q16ACC");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q16ACCM:
-        /* TODO: Implement emulation of Q16ACCM instruction. */
-        MIPS_INVAL("OPC_MXU_Q16ACCM");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q16ASUM:
-        /* TODO: Implement emulation of Q16ASUM instruction. */
-        MIPS_INVAL("OPC_MXU_Q16ASUM");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool14
- *
- *  Q8ADDE, Q8ACCE:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0|x x|  XRd  |  XRc  |  XRb  |  XRa  |MXU__POOL14|
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *
- *  D8SUM, D8SUMC:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *  |  SPECIAL2 |en2|x x|0 0 0 0|  XRc  |  XRb  |  XRa  |MXU__POOL14|
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool14(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 22, 2);
-
-    switch (opcode) {
-    case OPC_MXU_Q8ADDE:
-        /* TODO: Implement emulation of Q8ADDE instruction. */
-        MIPS_INVAL("OPC_MXU_Q8ADDE");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D8SUM:
-        /* TODO: Implement emulation of D8SUM instruction. */
-        MIPS_INVAL("OPC_MXU_D8SUM");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D8SUMC:
-        /* TODO: Implement emulation of D8SUMC instruction. */
-        MIPS_INVAL("OPC_MXU_D8SUMC");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool15
- *
- *  S32MUL, S32MULU, S32EXTRV:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *  |  SPECIAL2 |    rs   |    rt   |x x|  XRd  |  XRa  |MXU__POOL15|
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *
- *  S32EXTR:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *  |  SPECIAL2 |    rb   |   sft5  |x x|  XRd  |  XRa  |MXU__POOL15|
- *  +-----------+---------+---------+---+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool15(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 14, 2);
-
-    switch (opcode) {
-    case OPC_MXU_S32MUL:
-        /* TODO: Implement emulation of S32MUL instruction. */
-        MIPS_INVAL("OPC_MXU_S32MUL");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32MULU:
-        /* TODO: Implement emulation of S32MULU instruction. */
-        MIPS_INVAL("OPC_MXU_S32MULU");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32EXTR:
-        /* TODO: Implement emulation of S32EXTR instruction. */
-        MIPS_INVAL("OPC_MXU_S32EXTR");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32EXTRV:
-        /* TODO: Implement emulation of S32EXTRV instruction. */
-        MIPS_INVAL("OPC_MXU_S32EXTRV");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool16
- *
- *  D32SARW:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |    rb   |x x x|  XRc  |  XRb  |  XRa  |MXU__POOL16|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *
- *  S32ALN:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |    rs   |x x x|  XRc  |  XRb  |  XRa  |MXU__POOL16|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *
- *  S32ALNI:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+-----+---+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |  s3 |0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL16|
- *  +-----------+-----+---+-----+-------+-------+-------+-----------+
- *
- *  S32LUI:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+-----+---+-----+-------+---------------+-----------+
- *  |  SPECIAL2 |optn3|0 0|x x x|  XRc  |       s8      |MXU__POOL16|
- *  +-----------+-----+---+-----+-------+---------------+-----------+
- *
- *  S32NOR, S32AND, S32OR, S32XOR:
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL16|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool16(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 18, 3);
-
-    switch (opcode) {
-    case OPC_MXU_D32SARW:
-        /* TODO: Implement emulation of D32SARW instruction. */
-        MIPS_INVAL("OPC_MXU_D32SARW");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32ALN:
-        /* TODO: Implement emulation of S32ALN instruction. */
-        MIPS_INVAL("OPC_MXU_S32ALN");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32ALNI:
-        gen_mxu_S32ALNI(ctx);
-        break;
-    case OPC_MXU_S32LUI:
-        /* TODO: Implement emulation of S32LUI instruction. */
-        MIPS_INVAL("OPC_MXU_S32LUI");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32NOR:
-        gen_mxu_S32NOR(ctx);
-        break;
-    case OPC_MXU_S32AND:
-        gen_mxu_S32AND(ctx);
-        break;
-    case OPC_MXU_S32OR:
-        gen_mxu_S32OR(ctx);
-        break;
-    case OPC_MXU_S32XOR:
-        gen_mxu_S32XOR(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool17
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+---------+---+---------+-----+-----------+
- *  |  SPECIAL2 |    rs   |    rt   |0 0|    rd   |x x x|MXU__POOL15|
- *  +-----------+---------+---------+---+---------+-----+-----------+
- *
- */
-static void decode_opc_mxu__pool17(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 6, 2);
-
-    switch (opcode) {
-    case OPC_MXU_LXW:
-        /* TODO: Implement emulation of LXW instruction. */
-        MIPS_INVAL("OPC_MXU_LXW");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_LXH:
-        /* TODO: Implement emulation of LXH instruction. */
-        MIPS_INVAL("OPC_MXU_LXH");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_LXHU:
-        /* TODO: Implement emulation of LXHU instruction. */
-        MIPS_INVAL("OPC_MXU_LXHU");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_LXB:
-        /* TODO: Implement emulation of LXB instruction. */
-        MIPS_INVAL("OPC_MXU_LXB");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_LXBU:
-        /* TODO: Implement emulation of LXBU instruction. */
-        MIPS_INVAL("OPC_MXU_LXBU");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-/*
- *
- * Decode MXU pool18
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |    rb   |x x x|  XRd  |  XRa  |0 0 0 0|MXU__POOL18|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool18(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 18, 3);
-
-    switch (opcode) {
-    case OPC_MXU_D32SLLV:
-        /* TODO: Implement emulation of D32SLLV instruction. */
-        MIPS_INVAL("OPC_MXU_D32SLLV");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D32SLRV:
-        /* TODO: Implement emulation of D32SLRV instruction. */
-        MIPS_INVAL("OPC_MXU_D32SLRV");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D32SARV:
-        /* TODO: Implement emulation of D32SARV instruction. */
-        MIPS_INVAL("OPC_MXU_D32SARV");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q16SLLV:
-        /* TODO: Implement emulation of Q16SLLV instruction. */
-        MIPS_INVAL("OPC_MXU_Q16SLLV");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q16SLRV:
-        /* TODO: Implement emulation of Q16SLRV instruction. */
-        MIPS_INVAL("OPC_MXU_Q16SLRV");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q16SARV:
-        /* TODO: Implement emulation of Q16SARV instruction. */
-        MIPS_INVAL("OPC_MXU_Q16SARV");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool19
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0|x x|  XRd  |  XRc  |  XRb  |  XRa  |MXU__POOL19|
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool19(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 22, 2);
-
-    switch (opcode) {
-    case OPC_MXU_Q8MUL:
-    case OPC_MXU_Q8MULSU:
-        gen_mxu_q8mul_q8mulsu(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool20
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *  |  SPECIAL2 |0 0 0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL20|
- *  +-----------+---------+-----+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool20(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 18, 3);
-
-    switch (opcode) {
-    case OPC_MXU_Q8MOVZ:
-        /* TODO: Implement emulation of Q8MOVZ instruction. */
-        MIPS_INVAL("OPC_MXU_Q8MOVZ");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q8MOVN:
-        /* TODO: Implement emulation of Q8MOVN instruction. */
-        MIPS_INVAL("OPC_MXU_Q8MOVN");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D16MOVZ:
-        /* TODO: Implement emulation of D16MOVZ instruction. */
-        MIPS_INVAL("OPC_MXU_D16MOVZ");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_D16MOVN:
-        /* TODO: Implement emulation of D16MOVN instruction. */
-        MIPS_INVAL("OPC_MXU_D16MOVN");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32MOVZ:
-        /* TODO: Implement emulation of S32MOVZ instruction. */
-        MIPS_INVAL("OPC_MXU_S32MOVZ");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_S32MOVN:
-        /* TODO: Implement emulation of S32MOVN instruction. */
-        MIPS_INVAL("OPC_MXU_S32MOVN");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- *
- * Decode MXU pool21
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *  |  SPECIAL2 |an2|x x|  XRd  |  XRc  |  XRb  |  XRa  |MXU__POOL21|
- *  +-----------+---+---+-------+-------+-------+-------+-----------+
- *
- */
-static void decode_opc_mxu__pool21(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opcode = extract32(ctx->opcode, 22, 2);
-
-    switch (opcode) {
-    case OPC_MXU_Q8MAC:
-        /* TODO: Implement emulation of Q8MAC instruction. */
-        MIPS_INVAL("OPC_MXU_Q8MAC");
-        gen_reserved_instruction(ctx);
-        break;
-    case OPC_MXU_Q8MACSU:
-        /* TODO: Implement emulation of Q8MACSU instruction. */
-        MIPS_INVAL("OPC_MXU_Q8MACSU");
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        MIPS_INVAL("decode_opc_mxu");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-
-/*
- * Main MXU decoding function
- *
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *  +-----------+---------------------------------------+-----------+
- *  |  SPECIAL2 |                                       |x x x x x x|
- *  +-----------+---------------------------------------+-----------+
- *
- */
-static void decode_opc_mxu(CPUMIPSState *env, DisasContext *ctx)
-{
-    /*
-     * TODO: Investigate necessity of including handling of
-     * CLZ, CLO, SDBB in this function, as they belong to
-     * SPECIAL2 opcode space for regular pre-R6 MIPS ISAs.
-     */
-    uint32_t opcode = extract32(ctx->opcode, 0, 6);
-
-    if (opcode == OPC__MXU_MUL) {
-        uint32_t  rs, rt, rd, op1;
-
-        rs = extract32(ctx->opcode, 21, 5);
-        rt = extract32(ctx->opcode, 16, 5);
-        rd = extract32(ctx->opcode, 11, 5);
-        op1 = MASK_SPECIAL2(ctx->opcode);
-
-        gen_arith(ctx, op1, rd, rs, rt);
-
-        return;
-    }
-
-    if (opcode == OPC_MXU_S32M2I) {
-        gen_mxu_s32m2i(ctx);
-        return;
-    }
-
-    if (opcode == OPC_MXU_S32I2M) {
-        gen_mxu_s32i2m(ctx);
-        return;
-    }
-
-    {
-        TCGv t_mxu_cr = tcg_temp_new();
-        TCGLabel *l_exit = gen_new_label();
-
-        gen_load_mxu_cr(t_mxu_cr);
-        tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN);
-        tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
-
-        switch (opcode) {
-        case OPC_MXU_S32MADD:
-            /* TODO: Implement emulation of S32MADD instruction. */
-            MIPS_INVAL("OPC_MXU_S32MADD");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_S32MADDU:
-            /* TODO: Implement emulation of S32MADDU instruction. */
-            MIPS_INVAL("OPC_MXU_S32MADDU");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU__POOL00:
-            decode_opc_mxu__pool00(env, ctx);
-            break;
-        case OPC_MXU_S32MSUB:
-            /* TODO: Implement emulation of S32MSUB instruction. */
-            MIPS_INVAL("OPC_MXU_S32MSUB");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_S32MSUBU:
-            /* TODO: Implement emulation of S32MSUBU instruction. */
-            MIPS_INVAL("OPC_MXU_S32MSUBU");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU__POOL01:
-            decode_opc_mxu__pool01(env, ctx);
-            break;
-        case OPC_MXU__POOL02:
-            decode_opc_mxu__pool02(env, ctx);
-            break;
-        case OPC_MXU_D16MUL:
-            gen_mxu_d16mul(ctx);
-            break;
-        case OPC_MXU__POOL03:
-            decode_opc_mxu__pool03(env, ctx);
-            break;
-        case OPC_MXU_D16MAC:
-            gen_mxu_d16mac(ctx);
-            break;
-        case OPC_MXU_D16MACF:
-            /* TODO: Implement emulation of D16MACF instruction. */
-            MIPS_INVAL("OPC_MXU_D16MACF");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_D16MADL:
-            /* TODO: Implement emulation of D16MADL instruction. */
-            MIPS_INVAL("OPC_MXU_D16MADL");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_S16MAD:
-            /* TODO: Implement emulation of S16MAD instruction. */
-            MIPS_INVAL("OPC_MXU_S16MAD");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_Q16ADD:
-            /* TODO: Implement emulation of Q16ADD instruction. */
-            MIPS_INVAL("OPC_MXU_Q16ADD");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_D16MACE:
-            /* TODO: Implement emulation of D16MACE instruction. */
-            MIPS_INVAL("OPC_MXU_D16MACE");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU__POOL04:
-            decode_opc_mxu__pool04(env, ctx);
-            break;
-        case OPC_MXU__POOL05:
-            decode_opc_mxu__pool05(env, ctx);
-            break;
-        case OPC_MXU__POOL06:
-            decode_opc_mxu__pool06(env, ctx);
-            break;
-        case OPC_MXU__POOL07:
-            decode_opc_mxu__pool07(env, ctx);
-            break;
-        case OPC_MXU__POOL08:
-            decode_opc_mxu__pool08(env, ctx);
-            break;
-        case OPC_MXU__POOL09:
-            decode_opc_mxu__pool09(env, ctx);
-            break;
-        case OPC_MXU__POOL10:
-            decode_opc_mxu__pool10(env, ctx);
-            break;
-        case OPC_MXU__POOL11:
-            decode_opc_mxu__pool11(env, ctx);
-            break;
-        case OPC_MXU_D32ADD:
-            /* TODO: Implement emulation of D32ADD instruction. */
-            MIPS_INVAL("OPC_MXU_D32ADD");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU__POOL12:
-            decode_opc_mxu__pool12(env, ctx);
-            break;
-        case OPC_MXU__POOL13:
-            decode_opc_mxu__pool13(env, ctx);
-            break;
-        case OPC_MXU__POOL14:
-            decode_opc_mxu__pool14(env, ctx);
-            break;
-        case OPC_MXU_Q8ACCE:
-            /* TODO: Implement emulation of Q8ACCE instruction. */
-            MIPS_INVAL("OPC_MXU_Q8ACCE");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_S8LDD:
-            gen_mxu_s8ldd(ctx);
-            break;
-        case OPC_MXU_S8STD:
-            /* TODO: Implement emulation of S8STD instruction. */
-            MIPS_INVAL("OPC_MXU_S8STD");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_S8LDI:
-            /* TODO: Implement emulation of S8LDI instruction. */
-            MIPS_INVAL("OPC_MXU_S8LDI");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_S8SDI:
-            /* TODO: Implement emulation of S8SDI instruction. */
-            MIPS_INVAL("OPC_MXU_S8SDI");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU__POOL15:
-            decode_opc_mxu__pool15(env, ctx);
-            break;
-        case OPC_MXU__POOL16:
-            decode_opc_mxu__pool16(env, ctx);
-            break;
-        case OPC_MXU__POOL17:
-            decode_opc_mxu__pool17(env, ctx);
-            break;
-        case OPC_MXU_S16LDD:
-            /* TODO: Implement emulation of S16LDD instruction. */
-            MIPS_INVAL("OPC_MXU_S16LDD");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_S16STD:
-            /* TODO: Implement emulation of S16STD instruction. */
-            MIPS_INVAL("OPC_MXU_S16STD");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_S16LDI:
-            /* TODO: Implement emulation of S16LDI instruction. */
-            MIPS_INVAL("OPC_MXU_S16LDI");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_S16SDI:
-            /* TODO: Implement emulation of S16SDI instruction. */
-            MIPS_INVAL("OPC_MXU_S16SDI");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_D32SLL:
-            /* TODO: Implement emulation of D32SLL instruction. */
-            MIPS_INVAL("OPC_MXU_D32SLL");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_D32SLR:
-            /* TODO: Implement emulation of D32SLR instruction. */
-            MIPS_INVAL("OPC_MXU_D32SLR");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_D32SARL:
-            /* TODO: Implement emulation of D32SARL instruction. */
-            MIPS_INVAL("OPC_MXU_D32SARL");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_D32SAR:
-            /* TODO: Implement emulation of D32SAR instruction. */
-            MIPS_INVAL("OPC_MXU_D32SAR");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_Q16SLL:
-            /* TODO: Implement emulation of Q16SLL instruction. */
-            MIPS_INVAL("OPC_MXU_Q16SLL");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_Q16SLR:
-            /* TODO: Implement emulation of Q16SLR instruction. */
-            MIPS_INVAL("OPC_MXU_Q16SLR");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU__POOL18:
-            decode_opc_mxu__pool18(env, ctx);
-            break;
-        case OPC_MXU_Q16SAR:
-            /* TODO: Implement emulation of Q16SAR instruction. */
-            MIPS_INVAL("OPC_MXU_Q16SAR");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU__POOL19:
-            decode_opc_mxu__pool19(env, ctx);
-            break;
-        case OPC_MXU__POOL20:
-            decode_opc_mxu__pool20(env, ctx);
-            break;
-        case OPC_MXU__POOL21:
-            decode_opc_mxu__pool21(env, ctx);
-            break;
-        case OPC_MXU_Q16SCOP:
-            /* TODO: Implement emulation of Q16SCOP instruction. */
-            MIPS_INVAL("OPC_MXU_Q16SCOP");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_Q8MADL:
-            /* TODO: Implement emulation of Q8MADL instruction. */
-            MIPS_INVAL("OPC_MXU_Q8MADL");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_S32SFL:
-            /* TODO: Implement emulation of S32SFL instruction. */
-            MIPS_INVAL("OPC_MXU_S32SFL");
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_MXU_Q8SAD:
-            /* TODO: Implement emulation of Q8SAD instruction. */
-            MIPS_INVAL("OPC_MXU_Q8SAD");
-            gen_reserved_instruction(ctx);
-            break;
-        default:
-            MIPS_INVAL("decode_opc_mxu");
-            gen_reserved_instruction(ctx);
-        }
-
-        gen_set_label(l_exit);
-        tcg_temp_free(t_mxu_cr);
-    }
-}
-
-#endif /* !defined(TARGET_MIPS64) */
-
-
 static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx)
 {
     int rs, rt, rd;
@@ -27851,146 +24402,6 @@ static void decode_opc_special3_legacy(CPUMIPSState *env, DisasContext *ctx)
 
 #if defined(TARGET_MIPS64)
 
-static void decode_mmi0(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opc = MASK_MMI0(ctx->opcode);
-
-    switch (opc) {
-    case MMI_OPC_0_PADDW:     /* TODO: MMI_OPC_0_PADDW */
-    case MMI_OPC_0_PSUBW:     /* TODO: MMI_OPC_0_PSUBW */
-    case MMI_OPC_0_PCGTW:     /* TODO: MMI_OPC_0_PCGTW */
-    case MMI_OPC_0_PMAXW:     /* TODO: MMI_OPC_0_PMAXW */
-    case MMI_OPC_0_PADDH:     /* TODO: MMI_OPC_0_PADDH */
-    case MMI_OPC_0_PSUBH:     /* TODO: MMI_OPC_0_PSUBH */
-    case MMI_OPC_0_PCGTH:     /* TODO: MMI_OPC_0_PCGTH */
-    case MMI_OPC_0_PMAXH:     /* TODO: MMI_OPC_0_PMAXH */
-    case MMI_OPC_0_PADDB:     /* TODO: MMI_OPC_0_PADDB */
-    case MMI_OPC_0_PSUBB:     /* TODO: MMI_OPC_0_PSUBB */
-    case MMI_OPC_0_PCGTB:     /* TODO: MMI_OPC_0_PCGTB */
-    case MMI_OPC_0_PADDSW:    /* TODO: MMI_OPC_0_PADDSW */
-    case MMI_OPC_0_PSUBSW:    /* TODO: MMI_OPC_0_PSUBSW */
-    case MMI_OPC_0_PEXTLW:    /* TODO: MMI_OPC_0_PEXTLW */
-    case MMI_OPC_0_PPACW:     /* TODO: MMI_OPC_0_PPACW */
-    case MMI_OPC_0_PADDSH:    /* TODO: MMI_OPC_0_PADDSH */
-    case MMI_OPC_0_PSUBSH:    /* TODO: MMI_OPC_0_PSUBSH */
-    case MMI_OPC_0_PEXTLH:    /* TODO: MMI_OPC_0_PEXTLH */
-    case MMI_OPC_0_PPACH:     /* TODO: MMI_OPC_0_PPACH */
-    case MMI_OPC_0_PADDSB:    /* TODO: MMI_OPC_0_PADDSB */
-    case MMI_OPC_0_PSUBSB:    /* TODO: MMI_OPC_0_PSUBSB */
-    case MMI_OPC_0_PEXTLB:    /* TODO: MMI_OPC_0_PEXTLB */
-    case MMI_OPC_0_PPACB:     /* TODO: MMI_OPC_0_PPACB */
-    case MMI_OPC_0_PEXT5:     /* TODO: MMI_OPC_0_PEXT5 */
-    case MMI_OPC_0_PPAC5:     /* TODO: MMI_OPC_0_PPAC5 */
-        gen_reserved_instruction(ctx); /* TODO: MMI_OPC_CLASS_MMI0 */
-        break;
-    default:
-        MIPS_INVAL("TX79 MMI class MMI0");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void decode_mmi1(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opc = MASK_MMI1(ctx->opcode);
-
-    switch (opc) {
-    case MMI_OPC_1_PABSW:     /* TODO: MMI_OPC_1_PABSW */
-    case MMI_OPC_1_PCEQW:     /* TODO: MMI_OPC_1_PCEQW */
-    case MMI_OPC_1_PMINW:     /* TODO: MMI_OPC_1_PMINW */
-    case MMI_OPC_1_PADSBH:    /* TODO: MMI_OPC_1_PADSBH */
-    case MMI_OPC_1_PABSH:     /* TODO: MMI_OPC_1_PABSH */
-    case MMI_OPC_1_PCEQH:     /* TODO: MMI_OPC_1_PCEQH */
-    case MMI_OPC_1_PMINH:     /* TODO: MMI_OPC_1_PMINH */
-    case MMI_OPC_1_PCEQB:     /* TODO: MMI_OPC_1_PCEQB */
-    case MMI_OPC_1_PADDUW:    /* TODO: MMI_OPC_1_PADDUW */
-    case MMI_OPC_1_PSUBUW:    /* TODO: MMI_OPC_1_PSUBUW */
-    case MMI_OPC_1_PEXTUW:    /* TODO: MMI_OPC_1_PEXTUW */
-    case MMI_OPC_1_PADDUH:    /* TODO: MMI_OPC_1_PADDUH */
-    case MMI_OPC_1_PSUBUH:    /* TODO: MMI_OPC_1_PSUBUH */
-    case MMI_OPC_1_PEXTUH:    /* TODO: MMI_OPC_1_PEXTUH */
-    case MMI_OPC_1_PADDUB:    /* TODO: MMI_OPC_1_PADDUB */
-    case MMI_OPC_1_PSUBUB:    /* TODO: MMI_OPC_1_PSUBUB */
-    case MMI_OPC_1_PEXTUB:    /* TODO: MMI_OPC_1_PEXTUB */
-    case MMI_OPC_1_QFSRV:     /* TODO: MMI_OPC_1_QFSRV */
-        gen_reserved_instruction(ctx); /* TODO: MMI_OPC_CLASS_MMI1 */
-        break;
-    default:
-        MIPS_INVAL("TX79 MMI class MMI1");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opc = MASK_MMI2(ctx->opcode);
-
-    switch (opc) {
-    case MMI_OPC_2_PMADDW:    /* TODO: MMI_OPC_2_PMADDW */
-    case MMI_OPC_2_PSLLVW:    /* TODO: MMI_OPC_2_PSLLVW */
-    case MMI_OPC_2_PSRLVW:    /* TODO: MMI_OPC_2_PSRLVW */
-    case MMI_OPC_2_PMSUBW:    /* TODO: MMI_OPC_2_PMSUBW */
-    case MMI_OPC_2_PMFHI:     /* TODO: MMI_OPC_2_PMFHI */
-    case MMI_OPC_2_PMFLO:     /* TODO: MMI_OPC_2_PMFLO */
-    case MMI_OPC_2_PINTH:     /* TODO: MMI_OPC_2_PINTH */
-    case MMI_OPC_2_PMULTW:    /* TODO: MMI_OPC_2_PMULTW */
-    case MMI_OPC_2_PDIVW:     /* TODO: MMI_OPC_2_PDIVW */
-    case MMI_OPC_2_PMADDH:    /* TODO: MMI_OPC_2_PMADDH */
-    case MMI_OPC_2_PHMADH:    /* TODO: MMI_OPC_2_PHMADH */
-    case MMI_OPC_2_PAND:      /* TODO: MMI_OPC_2_PAND */
-    case MMI_OPC_2_PXOR:      /* TODO: MMI_OPC_2_PXOR */
-    case MMI_OPC_2_PMSUBH:    /* TODO: MMI_OPC_2_PMSUBH */
-    case MMI_OPC_2_PHMSBH:    /* TODO: MMI_OPC_2_PHMSBH */
-    case MMI_OPC_2_PEXEH:     /* TODO: MMI_OPC_2_PEXEH */
-    case MMI_OPC_2_PREVH:     /* TODO: MMI_OPC_2_PREVH */
-    case MMI_OPC_2_PMULTH:    /* TODO: MMI_OPC_2_PMULTH */
-    case MMI_OPC_2_PDIVBW:    /* TODO: MMI_OPC_2_PDIVBW */
-    case MMI_OPC_2_PEXEW:     /* TODO: MMI_OPC_2_PEXEW */
-    case MMI_OPC_2_PROT3W:    /* TODO: MMI_OPC_2_PROT3W */
-        gen_reserved_instruction(ctx); /* TODO: MMI_OPC_CLASS_MMI2 */
-        break;
-    case MMI_OPC_2_PCPYLD:
-        gen_mmi_pcpyld(ctx);
-        break;
-    default:
-        MIPS_INVAL("TX79 MMI class MMI2");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void decode_mmi3(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opc = MASK_MMI3(ctx->opcode);
-
-    switch (opc) {
-    case MMI_OPC_3_PMADDUW:    /* TODO: MMI_OPC_3_PMADDUW */
-    case MMI_OPC_3_PSRAVW:     /* TODO: MMI_OPC_3_PSRAVW */
-    case MMI_OPC_3_PMTHI:      /* TODO: MMI_OPC_3_PMTHI */
-    case MMI_OPC_3_PMTLO:      /* TODO: MMI_OPC_3_PMTLO */
-    case MMI_OPC_3_PINTEH:     /* TODO: MMI_OPC_3_PINTEH */
-    case MMI_OPC_3_PMULTUW:    /* TODO: MMI_OPC_3_PMULTUW */
-    case MMI_OPC_3_PDIVUW:     /* TODO: MMI_OPC_3_PDIVUW */
-    case MMI_OPC_3_POR:        /* TODO: MMI_OPC_3_POR */
-    case MMI_OPC_3_PNOR:       /* TODO: MMI_OPC_3_PNOR */
-    case MMI_OPC_3_PEXCH:      /* TODO: MMI_OPC_3_PEXCH */
-    case MMI_OPC_3_PEXCW:      /* TODO: MMI_OPC_3_PEXCW */
-        gen_reserved_instruction(ctx); /* TODO: MMI_OPC_CLASS_MMI3 */
-        break;
-    case MMI_OPC_3_PCPYH:
-        gen_mmi_pcpyh(ctx);
-        break;
-    case MMI_OPC_3_PCPYUD:
-        gen_mmi_pcpyud(ctx);
-        break;
-    default:
-        MIPS_INVAL("TX79 MMI class MMI3");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
 static void decode_mmi(CPUMIPSState *env, DisasContext *ctx)
 {
     uint32_t opc = MASK_MMI(ctx->opcode);
@@ -27999,18 +24410,6 @@ static void decode_mmi(CPUMIPSState *env, DisasContext *ctx)
     int rd = extract32(ctx->opcode, 11, 5);
 
     switch (opc) {
-    case MMI_OPC_CLASS_MMI0:
-        decode_mmi0(env, ctx);
-        break;
-    case MMI_OPC_CLASS_MMI1:
-        decode_mmi1(env, ctx);
-        break;
-    case MMI_OPC_CLASS_MMI2:
-        decode_mmi2(env, ctx);
-        break;
-    case MMI_OPC_CLASS_MMI3:
-        decode_mmi3(env, ctx);
-        break;
     case MMI_OPC_MULT1:
     case MMI_OPC_MULTU1:
     case MMI_OPC_MADD:
@@ -28023,25 +24422,6 @@ static void decode_mmi(CPUMIPSState *env, DisasContext *ctx)
     case MMI_OPC_DIVU1:
         gen_div1_tx79(ctx, opc, rs, rt);
         break;
-    case MMI_OPC_MTLO1:
-    case MMI_OPC_MTHI1:
-        gen_HILO1_tx79(ctx, opc, rs);
-        break;
-    case MMI_OPC_MFLO1:
-    case MMI_OPC_MFHI1:
-        gen_HILO1_tx79(ctx, opc, rd);
-        break;
-    case MMI_OPC_PLZCW:         /* TODO: MMI_OPC_PLZCW */
-    case MMI_OPC_PMFHL:         /* TODO: MMI_OPC_PMFHL */
-    case MMI_OPC_PMTHL:         /* TODO: MMI_OPC_PMTHL */
-    case MMI_OPC_PSLLH:         /* TODO: MMI_OPC_PSLLH */
-    case MMI_OPC_PSRLH:         /* TODO: MMI_OPC_PSRLH */
-    case MMI_OPC_PSRAH:         /* TODO: MMI_OPC_PSRAH */
-    case MMI_OPC_PSLLW:         /* TODO: MMI_OPC_PSLLW */
-    case MMI_OPC_PSRLW:         /* TODO: MMI_OPC_PSRLW */
-    case MMI_OPC_PSRAW:         /* TODO: MMI_OPC_PSRAW */
-        gen_reserved_instruction(ctx);    /* TODO: MMI_OPC_CLASS_MMI */
-        break;
     default:
         MIPS_INVAL("TX79 MMI class");
         gen_reserved_instruction(ctx);
@@ -28276,13 +24656,18 @@ static bool decode_opc_legacy(CPUMIPSState *env, DisasContext *ctx)
 #if defined(TARGET_MIPS64)
         if ((ctx->insn_flags & INSN_R5900) && (ctx->insn_flags & ASE_MMI)) {
             decode_mmi(env, ctx);
-#else
-        if (ctx->insn_flags & ASE_MXU) {
-            decode_opc_mxu(env, ctx);
+            break;
+        }
 #endif
-        } else {
-            decode_opc_special2_legacy(env, ctx);
+        if (TARGET_LONG_BITS == 32 && (ctx->insn_flags & ASE_MXU)) {
+            if (MASK_SPECIAL2(ctx->opcode) == OPC_MUL) {
+                gen_arith(ctx, OPC_MUL, rd, rs, rt);
+            } else {
+                decode_ase_mxu(ctx, ctx->opcode);
+            }
+            break;
         }
+        decode_opc_special2_legacy(env, ctx);
         break;
     case OPC_SPECIAL3:
 #if defined(TARGET_MIPS64)
@@ -28997,6 +25382,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
     if (cpu_supports_isa(env, ISA_MIPS_R6) && decode_isa_rel6(ctx, ctx->opcode)) {
         return;
     }
+    if (cpu_supports_isa(env, INSN_R5900) && decode_ext_txx9(ctx, ctx->opcode)) {
+        return;
+    }
 
     if (decode_opc_legacy(env, ctx)) {
         return;
@@ -29340,18 +25728,9 @@ void mips_tcg_init(void)
     cpu_llval = tcg_global_mem_new(cpu_env, offsetof(CPUMIPSState, llval),
                                    "llval");
 
-#if !defined(TARGET_MIPS64)
-    for (i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) {
-        mxu_gpr[i] = tcg_global_mem_new(cpu_env,
-                                        offsetof(CPUMIPSState,
-                                                 active_tc.mxu_gpr[i]),
-                                        mxuregnames[i]);
+    if (TARGET_LONG_BITS == 32) {
+        mxu_translate_init();
     }
-
-    mxu_CR = tcg_global_mem_new(cpu_env,
-                                offsetof(CPUMIPSState, active_tc.mxu_cr),
-                                mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]);
-#endif /* !TARGET_MIPS64 */
 }
 
 void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb,
diff --git a/target/mips/translate.h b/target/mips/translate.h
index 468e29d757..2b3c7a69ec 100644
--- a/target/mips/translate.h
+++ b/target/mips/translate.h
@@ -148,6 +148,8 @@ void gen_op_addr_add(DisasContext *ctx, TCGv ret, TCGv arg0, TCGv arg1);
 bool gen_lsa(DisasContext *ctx, int rd, int rt, int rs, int sa);
 bool gen_dlsa(DisasContext *ctx, int rd, int rt, int rs, int sa);
 
+void gen_rdhwr(DisasContext *ctx, int rt, int rd, int sel);
+
 extern TCGv cpu_gpr[32], cpu_PC;
 #if defined(TARGET_MIPS64)
 extern TCGv_i64 cpu_gpr_hi[32];
@@ -178,8 +180,16 @@ extern TCGv bcond;
 /* MSA */
 void msa_translate_init(void);
 
+/* MXU */
+void mxu_translate_init(void);
+bool decode_ase_mxu(DisasContext *ctx, uint32_t insn);
+
 /* decodetree generated */
 bool decode_isa_rel6(DisasContext *ctx, uint32_t insn);
 bool decode_ase_msa(DisasContext *ctx, uint32_t insn);
+bool decode_ext_txx9(DisasContext *ctx, uint32_t insn);
+#if defined(TARGET_MIPS64)
+bool decode_ext_tx79(DisasContext *ctx, uint32_t insn);
+#endif
 
 #endif
diff --git a/target/mips/tx79.decode b/target/mips/tx79.decode
new file mode 100644
index 0000000000..0f748b53a6
--- /dev/null
+++ b/target/mips/tx79.decode
@@ -0,0 +1,39 @@
+# Toshiba C790's instruction set
+#
+# Copyright (C) 2021  Philippe Mathieu-Daudé
+#
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# Toshiba Appendix B  C790-Specific Instruction Set Details
+
+###########################################################################
+# Named attribute sets.  These are used to make nice(er) names
+# when creating helpers common to those for the individual
+# instruction patterns.
+
+&rtype           rs rt rd sa
+
+###########################################################################
+# Named instruction formats.  These are generally used to
+# reduce the amount of duplication between instruction patterns.
+
+@rs_rt_rd       ...... rs:5  rt:5  rd:5  ..... ......   &rtype sa=0
+@rt_rd          ...... ..... rt:5  rd:5  ..... ......   &rtype rs=0 sa=0
+@rs             ...... rs:5  ..... ..........  ......   &rtype rt=0 rd=0 sa=0
+@rd             ...... ..........  rd:5  ..... ......   &rtype rs=0 rt=0 sa=0
+
+###########################################################################
+
+MFHI1           011100 0000000000  ..... 00000 010000   @rd
+MTHI1           011100 .....  0000000000 00000 010001   @rs
+MFLO1           011100 0000000000  ..... 00000 010010   @rd
+MTLO1           011100 .....  0000000000 00000 010011   @rs
+
+# MMI2
+
+PCPYLD          011100 ..... ..... ..... 01110 001001   @rs_rt_rd
+
+# MMI3
+
+PCPYUD          011100 ..... ..... ..... 01110 101001   @rs_rt_rd
+PCPYH           011100 00000 ..... ..... 11011 101001   @rt_rd
diff --git a/target/mips/tx79_translate.c b/target/mips/tx79_translate.c
new file mode 100644
index 0000000000..ad83774b97
--- /dev/null
+++ b/target/mips/tx79_translate.c
@@ -0,0 +1,303 @@
+/*
+ * Toshiba TX79-specific instructions translation routines
+ *
+ *  Copyright (c) 2018 Fredrik Noring
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg-op.h"
+#include "exec/helper-gen.h"
+#include "translate.h"
+
+/* Include the auto-generated decoder.  */
+#include "decode-tx79.c.inc"
+
+/*
+ *     Overview of the TX79-specific instruction set
+ *     =============================================
+ *
+ * The R5900 and the C790 have 128-bit wide GPRs, where the upper 64 bits
+ * are only used by the specific quadword (128-bit) LQ/SQ load/store
+ * instructions and certain multimedia instructions (MMIs). These MMIs
+ * configure the 128-bit data path as two 64-bit, four 32-bit, eight 16-bit
+ * or sixteen 8-bit paths.
+ *
+ * Reference:
+ *
+ * The Toshiba TX System RISC TX79 Core Architecture manual,
+ * https://wiki.qemu.org/File:C790.pdf
+ */
+
+bool decode_ext_tx79(DisasContext *ctx, uint32_t insn)
+{
+    if (TARGET_LONG_BITS == 64 && decode_tx79(ctx, insn)) {
+        return true;
+    }
+    return false;
+}
+
+/*
+ *     Three-Operand Multiply and Multiply-Add (4 instructions)
+ *     --------------------------------------------------------
+ * MADD    [rd,] rs, rt      Multiply/Add
+ * MADDU   [rd,] rs, rt      Multiply/Add Unsigned
+ * MULT    [rd,] rs, rt      Multiply (3-operand)
+ * MULTU   [rd,] rs, rt      Multiply Unsigned (3-operand)
+ */
+
+/*
+ *     Multiply Instructions for Pipeline 1 (10 instructions)
+ *     ------------------------------------------------------
+ * MULT1   [rd,] rs, rt      Multiply Pipeline 1
+ * MULTU1  [rd,] rs, rt      Multiply Unsigned Pipeline 1
+ * DIV1    rs, rt            Divide Pipeline 1
+ * DIVU1   rs, rt            Divide Unsigned Pipeline 1
+ * MADD1   [rd,] rs, rt      Multiply-Add Pipeline 1
+ * MADDU1  [rd,] rs, rt      Multiply-Add Unsigned Pipeline 1
+ * MFHI1   rd                Move From HI1 Register
+ * MFLO1   rd                Move From LO1 Register
+ * MTHI1   rs                Move To HI1 Register
+ * MTLO1   rs                Move To LO1 Register
+ */
+
+static bool trans_MFHI1(DisasContext *ctx, arg_rtype *a)
+{
+    gen_store_gpr(cpu_HI[1], a->rd);
+
+    return true;
+}
+
+static bool trans_MFLO1(DisasContext *ctx, arg_rtype *a)
+{
+    gen_store_gpr(cpu_LO[1], a->rd);
+
+    return true;
+}
+
+static bool trans_MTHI1(DisasContext *ctx, arg_rtype *a)
+{
+    gen_load_gpr(cpu_HI[1], a->rs);
+
+    return true;
+}
+
+static bool trans_MTLO1(DisasContext *ctx, arg_rtype *a)
+{
+    gen_load_gpr(cpu_LO[1], a->rs);
+
+    return true;
+}
+
+/*
+ *     Arithmetic (19 instructions)
+ *     ----------------------------
+ * PADDB   rd, rs, rt        Parallel Add Byte
+ * PSUBB   rd, rs, rt        Parallel Subtract Byte
+ * PADDH   rd, rs, rt        Parallel Add Halfword
+ * PSUBH   rd, rs, rt        Parallel Subtract Halfword
+ * PADDW   rd, rs, rt        Parallel Add Word
+ * PSUBW   rd, rs, rt        Parallel Subtract Word
+ * PADSBH  rd, rs, rt        Parallel Add/Subtract Halfword
+ * PADDSB  rd, rs, rt        Parallel Add with Signed Saturation Byte
+ * PSUBSB  rd, rs, rt        Parallel Subtract with Signed Saturation Byte
+ * PADDSH  rd, rs, rt        Parallel Add with Signed Saturation Halfword
+ * PSUBSH  rd, rs, rt        Parallel Subtract with Signed Saturation Halfword
+ * PADDSW  rd, rs, rt        Parallel Add with Signed Saturation Word
+ * PSUBSW  rd, rs, rt        Parallel Subtract with Signed Saturation Word
+ * PADDUB  rd, rs, rt        Parallel Add with Unsigned saturation Byte
+ * PSUBUB  rd, rs, rt        Parallel Subtract with Unsigned saturation Byte
+ * PADDUH  rd, rs, rt        Parallel Add with Unsigned saturation Halfword
+ * PSUBUH  rd, rs, rt        Parallel Subtract with Unsigned saturation Halfword
+ * PADDUW  rd, rs, rt        Parallel Add with Unsigned saturation Word
+ * PSUBUW  rd, rs, rt        Parallel Subtract with Unsigned saturation Word
+ */
+
+/*
+ *     Min/Max (4 instructions)
+ *     ------------------------
+ * PMAXH   rd, rs, rt        Parallel Maximum Halfword
+ * PMINH   rd, rs, rt        Parallel Minimum Halfword
+ * PMAXW   rd, rs, rt        Parallel Maximum Word
+ * PMINW   rd, rs, rt        Parallel Minimum Word
+ */
+
+/*
+ *     Absolute (2 instructions)
+ *     -------------------------
+ * PABSH   rd, rt            Parallel Absolute Halfword
+ * PABSW   rd, rt            Parallel Absolute Word
+ */
+
+/*
+ *     Logical (4 instructions)
+ *     ------------------------
+ * PAND    rd, rs, rt        Parallel AND
+ * POR     rd, rs, rt        Parallel OR
+ * PXOR    rd, rs, rt        Parallel XOR
+ * PNOR    rd, rs, rt        Parallel NOR
+ */
+
+/*
+ *     Shift (9 instructions)
+ *     ----------------------
+ * PSLLH   rd, rt, sa        Parallel Shift Left Logical Halfword
+ * PSRLH   rd, rt, sa        Parallel Shift Right Logical Halfword
+ * PSRAH   rd, rt, sa        Parallel Shift Right Arithmetic Halfword
+ * PSLLW   rd, rt, sa        Parallel Shift Left Logical Word
+ * PSRLW   rd, rt, sa        Parallel Shift Right Logical Word
+ * PSRAW   rd, rt, sa        Parallel Shift Right Arithmetic Word
+ * PSLLVW  rd, rt, rs        Parallel Shift Left Logical Variable Word
+ * PSRLVW  rd, rt, rs        Parallel Shift Right Logical Variable Word
+ * PSRAVW  rd, rt, rs        Parallel Shift Right Arithmetic Variable Word
+ */
+
+/*
+ *     Compare (6 instructions)
+ *     ------------------------
+ * PCGTB   rd, rs, rt        Parallel Compare for Greater Than Byte
+ * PCEQB   rd, rs, rt        Parallel Compare for Equal Byte
+ * PCGTH   rd, rs, rt        Parallel Compare for Greater Than Halfword
+ * PCEQH   rd, rs, rt        Parallel Compare for Equal Halfword
+ * PCGTW   rd, rs, rt        Parallel Compare for Greater Than Word
+ * PCEQW   rd, rs, rt        Parallel Compare for Equal Word
+ */
+
+/*
+ *     LZC (1 instruction)
+ *     -------------------
+ * PLZCW   rd, rs            Parallel Leading Zero or One Count Word
+ */
+
+/*
+ *     Quadword Load and Store (2 instructions)
+ *     ----------------------------------------
+ * LQ      rt, offset(base)  Load Quadword
+ * SQ      rt, offset(base)  Store Quadword
+ */
+
+/*
+ *     Multiply and Divide (19 instructions)
+ *     -------------------------------------
+ * PMULTW  rd, rs, rt        Parallel Multiply Word
+ * PMULTUW rd, rs, rt        Parallel Multiply Unsigned Word
+ * PDIVW   rs, rt            Parallel Divide Word
+ * PDIVUW  rs, rt            Parallel Divide Unsigned Word
+ * PMADDW  rd, rs, rt        Parallel Multiply-Add Word
+ * PMADDUW rd, rs, rt        Parallel Multiply-Add Unsigned Word
+ * PMSUBW  rd, rs, rt        Parallel Multiply-Subtract Word
+ * PMULTH  rd, rs, rt        Parallel Multiply Halfword
+ * PMADDH  rd, rs, rt        Parallel Multiply-Add Halfword
+ * PMSUBH  rd, rs, rt        Parallel Multiply-Subtract Halfword
+ * PHMADH  rd, rs, rt        Parallel Horizontal Multiply-Add Halfword
+ * PHMSBH  rd, rs, rt        Parallel Horizontal Multiply-Subtract Halfword
+ * PDIVBW  rs, rt            Parallel Divide Broadcast Word
+ * PMFHI   rd                Parallel Move From HI Register
+ * PMFLO   rd                Parallel Move From LO Register
+ * PMTHI   rs                Parallel Move To HI Register
+ * PMTLO   rs                Parallel Move To LO Register
+ * PMFHL   rd                Parallel Move From HI/LO Register
+ * PMTHL   rs                Parallel Move To HI/LO Register
+ */
+
+/*
+ *     Pack/Extend (11 instructions)
+ *     -----------------------------
+ * PPAC5   rd, rt            Parallel Pack to 5 bits
+ * PPACB   rd, rs, rt        Parallel Pack to Byte
+ * PPACH   rd, rs, rt        Parallel Pack to Halfword
+ * PPACW   rd, rs, rt        Parallel Pack to Word
+ * PEXT5   rd, rt            Parallel Extend Upper from 5 bits
+ * PEXTUB  rd, rs, rt        Parallel Extend Upper from Byte
+ * PEXTLB  rd, rs, rt        Parallel Extend Lower from Byte
+ * PEXTUH  rd, rs, rt        Parallel Extend Upper from Halfword
+ * PEXTLH  rd, rs, rt        Parallel Extend Lower from Halfword
+ * PEXTUW  rd, rs, rt        Parallel Extend Upper from Word
+ * PEXTLW  rd, rs, rt        Parallel Extend Lower from Word
+ */
+
+/*
+ *     Others (16 instructions)
+ *     ------------------------
+ * PCPYH   rd, rt            Parallel Copy Halfword
+ * PCPYLD  rd, rs, rt        Parallel Copy Lower Doubleword
+ * PCPYUD  rd, rs, rt        Parallel Copy Upper Doubleword
+ * PREVH   rd, rt            Parallel Reverse Halfword
+ * PINTH   rd, rs, rt        Parallel Interleave Halfword
+ * PINTEH  rd, rs, rt        Parallel Interleave Even Halfword
+ * PEXEH   rd, rt            Parallel Exchange Even Halfword
+ * PEXCH   rd, rt            Parallel Exchange Center Halfword
+ * PEXEW   rd, rt            Parallel Exchange Even Word
+ * PEXCW   rd, rt            Parallel Exchange Center Word
+ * QFSRV   rd, rs, rt        Quadword Funnel Shift Right Variable
+ * MFSA    rd                Move from Shift Amount Register
+ * MTSA    rs                Move to Shift Amount Register
+ * MTSAB   rs, immediate     Move Byte Count to Shift Amount Register
+ * MTSAH   rs, immediate     Move Halfword Count to Shift Amount Register
+ * PROT3W  rd, rt            Parallel Rotate 3 Words
+ */
+
+/* Parallel Copy Halfword */
+static bool trans_PCPYH(DisasContext *s, arg_rtype *a)
+{
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    if (a->rt == 0) {
+        tcg_gen_movi_i64(cpu_gpr[a->rd], 0);
+        tcg_gen_movi_i64(cpu_gpr_hi[a->rd], 0);
+        return true;
+    }
+
+    tcg_gen_deposit_i64(cpu_gpr[a->rd], cpu_gpr[a->rt], cpu_gpr[a->rt], 16, 16);
+    tcg_gen_deposit_i64(cpu_gpr[a->rd], cpu_gpr[a->rd], cpu_gpr[a->rd], 32, 32);
+    tcg_gen_deposit_i64(cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rt], cpu_gpr_hi[a->rt], 16, 16);
+    tcg_gen_deposit_i64(cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rd], 32, 32);
+
+    return true;
+}
+
+/* Parallel Copy Lower Doubleword */
+static bool trans_PCPYLD(DisasContext *s, arg_rtype *a)
+{
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    if (a->rs == 0) {
+        tcg_gen_movi_i64(cpu_gpr_hi[a->rd], 0);
+    } else {
+        tcg_gen_mov_i64(cpu_gpr_hi[a->rd], cpu_gpr[a->rs]);
+    }
+
+    if (a->rt == 0) {
+        tcg_gen_movi_i64(cpu_gpr[a->rd], 0);
+    } else if (a->rd != a->rt) {
+        tcg_gen_mov_i64(cpu_gpr[a->rd], cpu_gpr[a->rt]);
+    }
+
+    return true;
+}
+
+/* Parallel Copy Upper Doubleword */
+static bool trans_PCPYUD(DisasContext *s, arg_rtype *a)
+{
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    gen_load_gpr_hi(cpu_gpr[a->rd], a->rs);
+
+    if (a->rt == 0) {
+        tcg_gen_movi_i64(cpu_gpr_hi[a->rd], 0);
+    } else if (a->rd != a->rt) {
+        tcg_gen_mov_i64(cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rt]);
+    }
+
+    return true;
+}
diff --git a/target/mips/txx9_translate.c b/target/mips/txx9_translate.c
new file mode 100644
index 0000000000..8a2c0b766b
--- /dev/null
+++ b/target/mips/txx9_translate.c
@@ -0,0 +1,20 @@
+/*
+ * Toshiba TXx9 instructions translation routines
+ *
+ *  Copyright (c) 2021 Philippe Mathieu-Daudé
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "translate.h"
+
+bool decode_ext_txx9(DisasContext *ctx, uint32_t insn)
+{
+#if defined(TARGET_MIPS64)
+    if (decode_ext_tx79(ctx, insn)) {
+        return true;
+    }
+#endif
+    return false;
+}
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 799e47169c..8f220e15d1 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -3,13 +3,13 @@
 .PHONY: check-help
 check-help:
 	@echo "Regression testing targets:"
-	@echo
 	@echo " $(MAKE) check                Run block, qapi-schema, unit, softfloat, qtest and decodetree tests"
+	@echo " $(MAKE) bench                Run speed tests"
 	@echo
+	@echo "Individual test suites:"
 	@echo " $(MAKE) check-qtest-TARGET   Run qtest tests for given target"
 	@echo " $(MAKE) check-qtest          Run qtest tests"
 	@echo " $(MAKE) check-unit           Run qobject tests"
-	@echo " $(MAKE) check-speed          Run qobject speed tests"
 	@echo " $(MAKE) check-qapi-schema    Run QAPI schema tests"
 	@echo " $(MAKE) check-block          Run block tests"
 ifneq ($(filter $(all-check-targets), check-softfloat),)
@@ -155,8 +155,4 @@ check-clean:
 
 clean: check-clean
 
-# For backwards compatibility
-
-check-speed: bench-speed
-
 endif
diff --git a/tests/acceptance/boot_linux_console.py b/tests/acceptance/boot_linux_console.py
index eb01286799..1ca32ecf25 100644
--- a/tests/acceptance/boot_linux_console.py
+++ b/tests/acceptance/boot_linux_console.py
@@ -507,20 +507,18 @@ class BootLinuxConsole(LinuxKernelTest):
         self.wait_for_console_pattern('Boot successful.')
         # TODO user command, for now the uart is stuck
 
-    @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'),
-                'Test artifacts fetched from unreliable apt.armbian.com')
     def test_arm_cubieboard_initrd(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:cubieboard
         """
         deb_url = ('https://apt.armbian.com/pool/main/l/'
-                   'linux-4.20.7-sunxi/linux-image-dev-sunxi_5.75_armhf.deb')
-        deb_hash = '1334c29c44d984ffa05ed10de8c3361f33d78315'
+                   'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
+        deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
         deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
         kernel_path = self.extract_from_deb(deb_path,
-                                            '/boot/vmlinuz-4.20.7-sunxi')
-        dtb_path = '/usr/lib/linux-image-dev-sunxi/sun4i-a10-cubieboard.dtb'
+                                            '/boot/vmlinuz-5.10.16-sunxi')
+        dtb_path = '/usr/lib/linux-image-current-sunxi/sun4i-a10-cubieboard.dtb'
         dtb_path = self.extract_from_deb(deb_path, dtb_path)
         initrd_url = ('https://github.com/groeck/linux-build-test/raw/'
                       '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/'
@@ -549,20 +547,18 @@ class BootLinuxConsole(LinuxKernelTest):
                                                 'system-control@1c00000')
         # cubieboard's reboot is not functioning; omit reboot test.
 
-    @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'),
-                'Test artifacts fetched from unreliable apt.armbian.com')
     def test_arm_cubieboard_sata(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:cubieboard
         """
         deb_url = ('https://apt.armbian.com/pool/main/l/'
-                   'linux-4.20.7-sunxi/linux-image-dev-sunxi_5.75_armhf.deb')
-        deb_hash = '1334c29c44d984ffa05ed10de8c3361f33d78315'
+                   'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
+        deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
         deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
         kernel_path = self.extract_from_deb(deb_path,
-                                            '/boot/vmlinuz-4.20.7-sunxi')
-        dtb_path = '/usr/lib/linux-image-dev-sunxi/sun4i-a10-cubieboard.dtb'
+                                            '/boot/vmlinuz-5.10.16-sunxi')
+        dtb_path = '/usr/lib/linux-image-current-sunxi/sun4i-a10-cubieboard.dtb'
         dtb_path = self.extract_from_deb(deb_path, dtb_path)
         rootfs_url = ('https://github.com/groeck/linux-build-test/raw/'
                       '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/'
@@ -678,20 +674,18 @@ class BootLinuxConsole(LinuxKernelTest):
         self.wait_for_console_pattern(
                 'Give root password for system maintenance')
 
-    @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'),
-                'Test artifacts fetched from unreliable apt.armbian.com')
     def test_arm_orangepi(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:orangepi-pc
         """
         deb_url = ('https://apt.armbian.com/pool/main/l/'
-                   'linux-4.20.7-sunxi/linux-image-dev-sunxi_5.75_armhf.deb')
-        deb_hash = '1334c29c44d984ffa05ed10de8c3361f33d78315'
+                   'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
+        deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
         deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
         kernel_path = self.extract_from_deb(deb_path,
-                                            '/boot/vmlinuz-4.20.7-sunxi')
-        dtb_path = '/usr/lib/linux-image-dev-sunxi/sun8i-h3-orangepi-pc.dtb'
+                                            '/boot/vmlinuz-5.10.16-sunxi')
+        dtb_path = '/usr/lib/linux-image-current-sunxi/sun8i-h3-orangepi-pc.dtb'
         dtb_path = self.extract_from_deb(deb_path, dtb_path)
 
         self.vm.set_console()
@@ -705,20 +699,18 @@ class BootLinuxConsole(LinuxKernelTest):
         console_pattern = 'Kernel command line: %s' % kernel_command_line
         self.wait_for_console_pattern(console_pattern)
 
-    @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'),
-                'Test artifacts fetched from unreliable apt.armbian.com')
     def test_arm_orangepi_initrd(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:orangepi-pc
         """
         deb_url = ('https://apt.armbian.com/pool/main/l/'
-                   'linux-4.20.7-sunxi/linux-image-dev-sunxi_5.75_armhf.deb')
-        deb_hash = '1334c29c44d984ffa05ed10de8c3361f33d78315'
+                   'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
+        deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
         deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
         kernel_path = self.extract_from_deb(deb_path,
-                                            '/boot/vmlinuz-4.20.7-sunxi')
-        dtb_path = '/usr/lib/linux-image-dev-sunxi/sun8i-h3-orangepi-pc.dtb'
+                                            '/boot/vmlinuz-5.10.16-sunxi')
+        dtb_path = '/usr/lib/linux-image-current-sunxi/sun8i-h3-orangepi-pc.dtb'
         dtb_path = self.extract_from_deb(deb_path, dtb_path)
         initrd_url = ('https://github.com/groeck/linux-build-test/raw/'
                       '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/'
@@ -749,8 +741,6 @@ class BootLinuxConsole(LinuxKernelTest):
         # Wait for VM to shut down gracefully
         self.vm.wait()
 
-    @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'),
-                'Test artifacts fetched from unreliable apt.armbian.com')
     def test_arm_orangepi_sd(self):
         """
         :avocado: tags=arch:arm
@@ -758,12 +748,12 @@ class BootLinuxConsole(LinuxKernelTest):
         :avocado: tags=device:sd
         """
         deb_url = ('https://apt.armbian.com/pool/main/l/'
-                   'linux-4.20.7-sunxi/linux-image-dev-sunxi_5.75_armhf.deb')
-        deb_hash = '1334c29c44d984ffa05ed10de8c3361f33d78315'
+                   'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
+        deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
         deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
         kernel_path = self.extract_from_deb(deb_path,
-                                            '/boot/vmlinuz-4.20.7-sunxi')
-        dtb_path = '/usr/lib/linux-image-dev-sunxi/sun8i-h3-orangepi-pc.dtb'
+                                            '/boot/vmlinuz-5.10.16-sunxi')
+        dtb_path = '/usr/lib/linux-image-current-sunxi/sun8i-h3-orangepi-pc.dtb'
         dtb_path = self.extract_from_deb(deb_path, dtb_path)
         rootfs_url = ('http://storage.kernelci.org/images/rootfs/buildroot/'
                       'kci-2019.02/armel/base/rootfs.ext2.xz')
@@ -802,7 +792,27 @@ class BootLinuxConsole(LinuxKernelTest):
         # Wait for VM to shut down gracefully
         self.vm.wait()
 
-    def do_test_arm_orangepi_uboot_armbian(self, image_path):
+    @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'), 'storage limited')
+    def test_arm_orangepi_bionic_20_08(self):
+        """
+        :avocado: tags=arch:arm
+        :avocado: tags=machine:orangepi-pc
+        :avocado: tags=device:sd
+        """
+
+        # This test download a 275 MiB compressed image and expand it
+        # to 1036 MiB, but the underlying filesystem is 1552 MiB...
+        # As we expand it to 2 GiB we are safe.
+
+        image_url = ('https://archive.armbian.com/orangepipc/archive/'
+                     'Armbian_20.08.1_Orangepipc_bionic_current_5.8.5.img.xz')
+        image_hash = ('b4d6775f5673486329e45a0586bf06b6'
+                      'dbe792199fd182ac6b9c7bb6c7d3e6dd')
+        image_path_xz = self.fetch_asset(image_url, asset_hash=image_hash,
+                                         algorithm='sha256')
+        image_path = archive.extract(image_path_xz, self.workdir)
+        image_pow2ceil_expand(image_path)
+
         self.vm.set_console()
         self.vm.add_args('-drive', 'file=' + image_path + ',if=sd,format=raw',
                          '-nic', 'user',
@@ -828,54 +838,6 @@ class BootLinuxConsole(LinuxKernelTest):
                                       'to <orangepipc>')
         self.wait_for_console_pattern('Starting Load Kernel Modules...')
 
-    @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'),
-                'Test artifacts fetched from unreliable apt.armbian.com')
-    @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'), 'storage limited')
-    @skipUnless(P7ZIP_AVAILABLE, '7z not installed')
-    def test_arm_orangepi_bionic_19_11(self):
-        """
-        :avocado: tags=arch:arm
-        :avocado: tags=machine:orangepi-pc
-        :avocado: tags=device:sd
-        """
-
-        # This test download a 196MB compressed image and expand it to 1GB
-        image_url = ('https://dl.armbian.com/orangepipc/archive/'
-                     'Armbian_19.11.3_Orangepipc_bionic_current_5.3.9.7z')
-        image_hash = '196a8ffb72b0123d92cea4a070894813d305c71e'
-        image_path_7z = self.fetch_asset(image_url, asset_hash=image_hash)
-        image_name = 'Armbian_19.11.3_Orangepipc_bionic_current_5.3.9.img'
-        image_path = os.path.join(self.workdir, image_name)
-        process.run("7z e -o%s %s" % (self.workdir, image_path_7z))
-        image_pow2ceil_expand(image_path)
-
-        self.do_test_arm_orangepi_uboot_armbian(image_path)
-
-    @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'),
-                'Test artifacts fetched from unreliable apt.armbian.com')
-    @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'), 'storage limited')
-    def test_arm_orangepi_bionic_20_08(self):
-        """
-        :avocado: tags=arch:arm
-        :avocado: tags=machine:orangepi-pc
-        :avocado: tags=device:sd
-        """
-
-        # This test download a 275 MiB compressed image and expand it
-        # to 1036 MiB, but the underlying filesystem is 1552 MiB...
-        # As we expand it to 2 GiB we are safe.
-
-        image_url = ('https://dl.armbian.com/orangepipc/archive/'
-                     'Armbian_20.08.1_Orangepipc_bionic_current_5.8.5.img.xz')
-        image_hash = ('b4d6775f5673486329e45a0586bf06b6'
-                      'dbe792199fd182ac6b9c7bb6c7d3e6dd')
-        image_path_xz = self.fetch_asset(image_url, asset_hash=image_hash,
-                                         algorithm='sha256')
-        image_path = archive.extract(image_path_xz, self.workdir)
-        image_pow2ceil_expand(image_path)
-
-        self.do_test_arm_orangepi_uboot_armbian(image_path)
-
     @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'), 'storage limited')
     def test_arm_orangepi_uboot_netbsd9(self):
         """
diff --git a/tests/acceptance/replay_kernel.py b/tests/acceptance/replay_kernel.py
index c1cb862468..71facdaa75 100644
--- a/tests/acceptance/replay_kernel.py
+++ b/tests/acceptance/replay_kernel.py
@@ -177,20 +177,18 @@ class ReplayKernelNormal(ReplayKernelBase):
         self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=1)
 
     @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
-    @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'),
-                'Test artifacts fetched from unreliable apt.armbian.com')
     def test_arm_cubieboard_initrd(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:cubieboard
         """
         deb_url = ('https://apt.armbian.com/pool/main/l/'
-                   'linux-4.20.7-sunxi/linux-image-dev-sunxi_5.75_armhf.deb')
-        deb_hash = '1334c29c44d984ffa05ed10de8c3361f33d78315'
+                   'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
+        deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
         deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
         kernel_path = self.extract_from_deb(deb_path,
-                                            '/boot/vmlinuz-4.20.7-sunxi')
-        dtb_path = '/usr/lib/linux-image-dev-sunxi/sun4i-a10-cubieboard.dtb'
+                                            '/boot/vmlinuz-5.10.16-sunxi')
+        dtb_path = '/usr/lib/linux-image-current-sunxi/sun4i-a10-cubieboard.dtb'
         dtb_path = self.extract_from_deb(deb_path, dtb_path)
         initrd_url = ('https://github.com/groeck/linux-build-test/raw/'
                       '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/'
diff --git a/tests/atomic64-bench.c b/tests/bench/atomic64-bench.c
index e474753d34..e474753d34 100644
--- a/tests/atomic64-bench.c
+++ b/tests/bench/atomic64-bench.c
diff --git a/tests/atomic_add-bench.c b/tests/bench/atomic_add-bench.c
index f05471ab45..f05471ab45 100644
--- a/tests/atomic_add-bench.c
+++ b/tests/bench/atomic_add-bench.c
diff --git a/tests/benchmark-crypto-cipher.c b/tests/bench/benchmark-crypto-cipher.c
index c04f0a0fba..c04f0a0fba 100644
--- a/tests/benchmark-crypto-cipher.c
+++ b/tests/bench/benchmark-crypto-cipher.c
diff --git a/tests/benchmark-crypto-hash.c b/tests/bench/benchmark-crypto-hash.c
index 927b00bb4d..927b00bb4d 100644
--- a/tests/benchmark-crypto-hash.c
+++ b/tests/bench/benchmark-crypto-hash.c
diff --git a/tests/benchmark-crypto-hmac.c b/tests/bench/benchmark-crypto-hmac.c
index 5cca636789..5cca636789 100644
--- a/tests/benchmark-crypto-hmac.c
+++ b/tests/bench/benchmark-crypto-hmac.c
diff --git a/tests/bench/meson.build b/tests/bench/meson.build
new file mode 100644
index 0000000000..00b3c209dc
--- /dev/null
+++ b/tests/bench/meson.build
@@ -0,0 +1,34 @@
+
+qht_bench = executable('qht-bench',
+                       sources: 'qht-bench.c',
+                       dependencies: [qemuutil])
+
+executable('atomic_add-bench',
+           sources: files('atomic_add-bench.c'),
+           dependencies: [qemuutil],
+           build_by_default: false)
+
+executable('atomic64-bench',
+           sources: files('atomic64-bench.c'),
+           dependencies: [qemuutil],
+           build_by_default: false)
+
+benchs = {}
+
+if have_block
+  benchs += {
+     'benchmark-crypto-hash': [crypto],
+     'benchmark-crypto-hmac': [crypto],
+     'benchmark-crypto-cipher': [crypto],
+  }
+endif
+
+foreach bench_name, deps: benchs
+  exe = executable(bench_name, bench_name + '.c',
+                   dependencies: [qemuutil] + deps)
+  benchmark(bench_name, exe,
+            args: ['--tap', '-k'],
+            protocol: 'tap',
+            timeout: 0,
+            suite: ['speed'])
+endforeach
diff --git a/tests/qht-bench.c b/tests/bench/qht-bench.c
index 2e5b70ccd0..2e5b70ccd0 100644
--- a/tests/qht-bench.c
+++ b/tests/bench/qht-bench.c
diff --git a/tests/meson.build b/tests/meson.build
index 656d211e25..55a7b08275 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -1,18 +1,6 @@
 py3 = import('python').find_installation()
 
-qht_bench = executable('qht-bench',
-                       sources: 'qht-bench.c',
-                       dependencies: [qemuutil])
-
-executable('atomic_add-bench',
-           sources: files('atomic_add-bench.c'),
-           dependencies: [qemuutil],
-           build_by_default: false)
-
-executable('atomic64-bench',
-           sources: files('atomic64-bench.c'),
-           dependencies: [qemuutil],
-           build_by_default: false)
+subdir('bench')
 
 test_qapi_outputs = [
   'qapi-builtin-types.c',
@@ -69,211 +57,10 @@ endforeach
 libtestqapi = static_library('testqapi', sources: [genh, test_qapi_sources])
 testqapi = declare_dependency(link_with: libtestqapi, sources: [genh, test_qapi_headers])
 
-testblock = declare_dependency(dependencies: [block], sources: 'iothread.c')
-
-tests = {
-  'check-block-qdict': [],
-  'check-qdict': [],
-  'check-qnum': [],
-  'check-qstring': [],
-  'check-qlist': [],
-  'check-qnull': [],
-  'check-qobject': [],
-  'check-qjson': [],
-  'check-qlit': [],
-  'test-qobject-output-visitor': [testqapi],
-  'test-clone-visitor': [testqapi],
-  'test-qobject-input-visitor': [testqapi],
-  'test-string-input-visitor': [testqapi],
-  'test-string-output-visitor': [testqapi],
-  'test-opts-visitor': [testqapi],
-  'test-visitor-serialization': [testqapi],
-  'test-bitmap': [],
-  # all code tested by test-x86-cpuid is inside topology.h
-  'test-x86-cpuid': [],
-  'test-cutils': [],
-  'test-shift128': [],
-  'test-mul64': [],
-  # all code tested by test-int128 is inside int128.h
-  'test-int128': [],
-  'rcutorture': [],
-  'test-rcu-list': [],
-  'test-rcu-simpleq': [],
-  'test-rcu-tailq': [],
-  'test-rcu-slist': [],
-  'test-qdist': [],
-  'test-qht': [],
-  'test-bitops': [],
-  'test-bitcnt': [],
-  'test-qgraph': ['qtest/libqos/qgraph.c'],
-  'check-qom-interface': [qom],
-  'check-qom-proplist': [qom],
-  'test-qemu-opts': [],
-  'test-keyval': [testqapi],
-  'test-logging': [],
-  'test-uuid': [],
-  'ptimer-test': ['ptimer-test-stubs.c', meson.source_root() / 'hw/core/ptimer.c'],
-  'test-qapi-util': [],
-}
-
-if have_system or have_tools
-  tests += {
-    'test-qmp-event': [testqapi],
-  }
-endif
-
 test_deps = {
   'test-qht-par': qht_bench,
 }
 
-benchs = {}
-
-if have_block
-  tests += {
-    'test-coroutine': [testblock],
-    'test-aio': [testblock],
-    'test-aio-multithread': [testblock],
-    'test-throttle': [testblock],
-    'test-thread-pool': [testblock],
-    'test-hbitmap': [testblock],
-    'test-bdrv-drain': [testblock],
-    'test-bdrv-graph-mod': [testblock],
-    'test-blockjob': [testblock],
-    'test-blockjob-txn': [testblock],
-    'test-block-backend': [testblock],
-    'test-block-iothread': [testblock],
-    'test-write-threshold': [testblock],
-    'test-crypto-hash': [crypto],
-    'test-crypto-hmac': [crypto],
-    'test-crypto-cipher': [crypto],
-    'test-crypto-secret': [crypto, keyutils],
-    'test-authz-simple': [authz],
-    'test-authz-list': [authz],
-    'test-authz-listfile': [authz],
-    'test-io-task': [testblock],
-    'test-io-channel-socket': ['socket-helpers.c', 'io-channel-helpers.c', io],
-    'test-io-channel-file': ['io-channel-helpers.c', io],
-    'test-io-channel-command': ['io-channel-helpers.c', io],
-    'test-io-channel-buffer': ['io-channel-helpers.c', io],
-    'test-crypto-ivgen': [io],
-    'test-crypto-afsplit': [io],
-    'test-crypto-block': [io],
-  }
-  if 'CONFIG_GNUTLS' in config_host and \
-     'CONFIG_TASN1' in config_host and \
-     'CONFIG_POSIX' in config_host
-    tests += {
-      'test-crypto-tlscredsx509': ['crypto-tls-x509-helpers.c', 'pkix_asn1_tab.c',
-                                   tasn1, crypto, gnutls],
-      'test-crypto-tlssession': ['crypto-tls-x509-helpers.c', 'pkix_asn1_tab.c', 'crypto-tls-psk-helpers.c',
-                                 tasn1, crypto, gnutls],
-      'test-io-channel-tls': ['io-channel-helpers.c', 'crypto-tls-x509-helpers.c', 'pkix_asn1_tab.c',
-                              tasn1, io, crypto, gnutls]}
-  endif
-  if 'CONFIG_AUTH_PAM' in config_host
-    tests += {'test-authz-pam': [authz]}
-  endif
-  if 'CONFIG_QEMU_PRIVATE_XTS' in config_host
-    tests += {'test-crypto-xts': [crypto, io]}
-  endif
-  if 'CONFIG_POSIX' in config_host
-    tests += {'test-image-locking': [testblock]}
-  endif
-  if 'CONFIG_REPLICATION' in config_host
-    tests += {'test-replication': [testblock]}
-  endif
-  if 'CONFIG_NETTLE' in config_host or 'CONFIG_GCRYPT' in config_host
-    tests += {'test-crypto-pbkdf': [io]}
-  endif
-  if 'CONFIG_EPOLL_CREATE1' in config_host
-    tests += {'test-fdmon-epoll': [testblock]}
-  endif
-  benchs += {
-     'benchmark-crypto-hash': [crypto],
-     'benchmark-crypto-hmac': [crypto],
-     'benchmark-crypto-cipher': [crypto],
-  }
-endif
-
-if have_system
-  tests += {
-    'test-iov': [],
-    'test-qmp-cmds': [testqapi],
-    'test-xbzrle': [migration],
-    'test-timed-average': [],
-    'test-util-sockets': ['socket-helpers.c'],
-    'test-base64': [],
-    'test-bufferiszero': [],
-    'test-vmstate': [migration, io]
-  }
-  if 'CONFIG_INOTIFY1' in config_host
-    tests += {'test-util-filemonitor': []}
-  endif
-
-  # Some tests: test-char, test-qdev-global-props, and test-qga,
-  # are not runnable under TSan due to a known issue.
-  # https://github.com/google/sanitizers/issues/1116
-  if 'CONFIG_TSAN' not in config_host
-    if 'CONFIG_POSIX' in config_host
-        tests += {
-          'test-char': ['socket-helpers.c', qom, io, chardev]
-        }
-    endif
-
-    tests += {
-      'test-qdev-global-props': [qom, hwcore, testqapi]
-    }
-  endif
-endif
-
-if 'CONFIG_TSAN' not in config_host and \
-   'CONFIG_GUEST_AGENT' in config_host and \
-   'CONFIG_LINUX' in config_host
-  tests += {'test-qga': ['qtest/libqtest.c']}
-  test_deps += {'test-qga': qga}
-endif
-
-test_env = environment()
-test_env.set('G_TEST_SRCDIR', meson.current_source_dir())
-test_env.set('G_TEST_BUILDDIR', meson.current_build_dir())
-
-slow_tests = {
-  'test-crypto-tlscredsx509': 45,
-  'test-crypto-tlssession': 45
-}
-
-foreach test_name, extra: tests
-  src = [test_name + '.c']
-  deps = [qemuutil]
-  if extra.length() > 0
-    # use a sourceset to quickly separate sources and deps
-    test_ss = ss.source_set()
-    test_ss.add(extra)
-    src += test_ss.all_sources()
-    deps += test_ss.all_dependencies()
-  endif
-  exe = executable(test_name, src, genh, dependencies: deps)
-
-  test(test_name, exe,
-       depends: test_deps.get(test_name, []),
-       env: test_env,
-       args: ['--tap', '-k'],
-       protocol: 'tap',
-       timeout: slow_tests.get(test_name, 30),
-       priority: slow_tests.get(test_name, 30),
-       suite: ['unit'])
-endforeach
-
-foreach bench_name, deps: benchs
-  exe = executable(bench_name, bench_name + '.c',
-                   dependencies: [qemuutil] + deps)
-  benchmark(bench_name, exe,
-            args: ['--tap', '-k'],
-            protocol: 'tap',
-            timeout: 0,
-            suite: ['speed'])
-endforeach
-
 if have_tools and 'CONFIG_VHOST_USER' in config_host and 'CONFIG_LINUX' in config_host
   executable('vhost-user-bridge',
              sources: files('vhost-user-bridge.c'),
@@ -299,6 +86,7 @@ if not get_option('tcg').disabled()
   endif
 endif
 
+subdir('unit')
 subdir('qapi-schema')
 subdir('qtest')
 subdir('migration')
diff --git a/tests/qtest/npcm7xx_pwm-test.c b/tests/qtest/npcm7xx_pwm-test.c
index 3d82654b81..bd15a1c294 100644
--- a/tests/qtest/npcm7xx_pwm-test.c
+++ b/tests/qtest/npcm7xx_pwm-test.c
@@ -45,6 +45,7 @@
 #define PLL_FBDV(rv)    extract32((rv), 16, 12)
 #define PLL_OTDV1(rv)   extract32((rv), 8, 3)
 #define PLL_OTDV2(rv)   extract32((rv), 13, 3)
+#define APB4CKDIV(rv)   extract32((rv), 30, 2)
 #define APB3CKDIV(rv)   extract32((rv), 28, 2)
 #define CLK2CKDIV(rv)   extract32((rv), 0, 1)
 #define CLK4CKDIV(rv)   extract32((rv), 26, 2)
@@ -52,6 +53,49 @@
 
 #define MAX_DUTY        1000000
 
+/* MFT (PWM fan) related */
+#define MFT_BA(n)       (0xf0180000 + ((n) * 0x1000))
+#define MFT_IRQ(n)      (96 + (n))
+#define MFT_CNT1        0x00
+#define MFT_CRA         0x02
+#define MFT_CRB         0x04
+#define MFT_CNT2        0x06
+#define MFT_PRSC        0x08
+#define MFT_CKC         0x0a
+#define MFT_MCTRL       0x0c
+#define MFT_ICTRL       0x0e
+#define MFT_ICLR        0x10
+#define MFT_IEN         0x12
+#define MFT_CPA         0x14
+#define MFT_CPB         0x16
+#define MFT_CPCFG       0x18
+#define MFT_INASEL      0x1a
+#define MFT_INBSEL      0x1c
+
+#define MFT_MCTRL_ALL   0x64
+#define MFT_ICLR_ALL    0x3f
+#define MFT_IEN_ALL     0x3f
+#define MFT_CPCFG_EQ_MODE 0x44
+
+#define MFT_CKC_C2CSEL  BIT(3)
+#define MFT_CKC_C1CSEL  BIT(0)
+
+#define MFT_ICTRL_TFPND BIT(5)
+#define MFT_ICTRL_TEPND BIT(4)
+#define MFT_ICTRL_TDPND BIT(3)
+#define MFT_ICTRL_TCPND BIT(2)
+#define MFT_ICTRL_TBPND BIT(1)
+#define MFT_ICTRL_TAPND BIT(0)
+
+#define MFT_MAX_CNT     0xffff
+#define MFT_TIMEOUT     0x5000
+
+#define DEFAULT_RPM     19800
+#define DEFAULT_PRSC    255
+#define MFT_PULSE_PER_REVOLUTION 2
+
+#define MAX_ERROR       1
+
 typedef struct PWMModule {
     int irq;
     uint64_t base_addr;
@@ -210,19 +254,36 @@ static uint64_t pwm_get_duty(QTestState *qts, int module_index, int pwm_index)
     return pwm_qom_get(qts, path, name);
 }
 
+static void mft_qom_set(QTestState *qts, int index, const char *name,
+                        uint32_t value)
+{
+    QDict *response;
+    char *path = g_strdup_printf("/machine/soc/mft[%d]", index);
+
+    g_test_message("Setting properties %s of mft[%d] with value %u",
+                   name, index, value);
+    response = qtest_qmp(qts, "{ 'execute': 'qom-set',"
+            " 'arguments': { 'path': %s, "
+            " 'property': %s, 'value': %u}}",
+            path, name, value);
+    /* The qom set message returns successfully. */
+    g_assert_true(qdict_haskey(response, "return"));
+}
+
 static uint32_t get_pll(uint32_t con)
 {
     return REF_HZ * PLL_FBDV(con) / (PLL_INDV(con) * PLL_OTDV1(con)
             * PLL_OTDV2(con));
 }
 
-static uint64_t read_pclk(QTestState *qts)
+static uint64_t read_pclk(QTestState *qts, bool mft)
 {
     uint64_t freq = REF_HZ;
     uint32_t clksel = qtest_readl(qts, CLK_BA + CLKSEL);
     uint32_t pllcon;
     uint32_t clkdiv1 = qtest_readl(qts, CLK_BA + CLKDIV1);
     uint32_t clkdiv2 = qtest_readl(qts, CLK_BA + CLKDIV2);
+    uint32_t apbdiv = mft ? APB4CKDIV(clkdiv2) : APB3CKDIV(clkdiv2);
 
     switch (CPUCKSEL(clksel)) {
     case 0:
@@ -241,7 +302,7 @@ static uint64_t read_pclk(QTestState *qts)
         g_assert_not_reached();
     }
 
-    freq >>= (CLK2CKDIV(clkdiv1) + CLK4CKDIV(clkdiv1) + APB3CKDIV(clkdiv2));
+    freq >>= (CLK2CKDIV(clkdiv1) + CLK4CKDIV(clkdiv1) + apbdiv);
 
     return freq;
 }
@@ -267,7 +328,7 @@ static uint32_t pwm_selector(uint32_t csr)
 static uint64_t pwm_compute_freq(QTestState *qts, uint32_t ppr, uint32_t csr,
         uint32_t cnr)
 {
-    return read_pclk(qts) / ((ppr + 1) * pwm_selector(csr) * (cnr + 1));
+    return read_pclk(qts, false) / ((ppr + 1) * pwm_selector(csr) * (cnr + 1));
 }
 
 static uint64_t pwm_compute_duty(uint32_t cnr, uint32_t cmr, bool inverted)
@@ -301,6 +362,28 @@ static void pwm_write(QTestState *qts, const TestData *td, unsigned offset,
     qtest_writel(qts, td->module->base_addr + offset, value);
 }
 
+static uint8_t mft_readb(QTestState *qts, int index, unsigned offset)
+{
+    return qtest_readb(qts, MFT_BA(index) + offset);
+}
+
+static uint16_t mft_readw(QTestState *qts, int index, unsigned offset)
+{
+    return qtest_readw(qts, MFT_BA(index) + offset);
+}
+
+static void mft_writeb(QTestState *qts, int index, unsigned offset,
+                        uint8_t value)
+{
+    qtest_writeb(qts, MFT_BA(index) + offset, value);
+}
+
+static void mft_writew(QTestState *qts, int index, unsigned offset,
+                        uint16_t value)
+{
+    return qtest_writew(qts, MFT_BA(index) + offset, value);
+}
+
 static uint32_t pwm_read_ppr(QTestState *qts, const TestData *td)
 {
     return extract32(pwm_read(qts, td, PPR), ppr_base[pwm_index(td->pwm)], 8);
@@ -351,11 +434,116 @@ static void pwm_write_cmr(QTestState *qts, const TestData *td, uint32_t value)
     pwm_write(qts, td, td->pwm->cmr_offset, value);
 }
 
+static int mft_compute_index(const TestData *td)
+{
+    int index = pwm_module_index(td->module) * ARRAY_SIZE(pwm_list) +
+                pwm_index(td->pwm);
+
+    g_assert_cmpint(index, <,
+                    ARRAY_SIZE(pwm_module_list) * ARRAY_SIZE(pwm_list));
+
+    return index;
+}
+
+static void mft_reset_counters(QTestState *qts, int index)
+{
+    mft_writew(qts, index, MFT_CNT1, MFT_MAX_CNT);
+    mft_writew(qts, index, MFT_CNT2, MFT_MAX_CNT);
+    mft_writew(qts, index, MFT_CRA, MFT_MAX_CNT);
+    mft_writew(qts, index, MFT_CRB, MFT_MAX_CNT);
+    mft_writew(qts, index, MFT_CPA, MFT_MAX_CNT - MFT_TIMEOUT);
+    mft_writew(qts, index, MFT_CPB, MFT_MAX_CNT - MFT_TIMEOUT);
+}
+
+static void mft_init(QTestState *qts, const TestData *td)
+{
+    int index = mft_compute_index(td);
+
+    /* Enable everything */
+    mft_writeb(qts, index, MFT_CKC, 0);
+    mft_writeb(qts, index, MFT_ICLR, MFT_ICLR_ALL);
+    mft_writeb(qts, index, MFT_MCTRL, MFT_MCTRL_ALL);
+    mft_writeb(qts, index, MFT_IEN, MFT_IEN_ALL);
+    mft_writeb(qts, index, MFT_INASEL, 0);
+    mft_writeb(qts, index, MFT_INBSEL, 0);
+
+    /* Set cpcfg to use EQ mode, same as kernel driver */
+    mft_writeb(qts, index, MFT_CPCFG, MFT_CPCFG_EQ_MODE);
+
+    /* Write default counters, timeout and prescaler */
+    mft_reset_counters(qts, index);
+    mft_writeb(qts, index, MFT_PRSC, DEFAULT_PRSC);
+
+    /* Write default max rpm via QMP */
+    mft_qom_set(qts, index, "max_rpm[0]", DEFAULT_RPM);
+    mft_qom_set(qts, index, "max_rpm[1]", DEFAULT_RPM);
+}
+
+static int32_t mft_compute_cnt(uint32_t rpm, uint64_t clk)
+{
+    uint64_t cnt;
+
+    if (rpm == 0) {
+        return -1;
+    }
+
+    cnt = clk * 60 / ((DEFAULT_PRSC + 1) * rpm * MFT_PULSE_PER_REVOLUTION);
+    if (cnt >= MFT_TIMEOUT) {
+        return -1;
+    }
+    return MFT_MAX_CNT - cnt;
+}
+
+static void mft_verify_rpm(QTestState *qts, const TestData *td, uint64_t duty)
+{
+    int index = mft_compute_index(td);
+    uint16_t cnt, cr;
+    uint32_t rpm = DEFAULT_RPM * duty / MAX_DUTY;
+    uint64_t clk = read_pclk(qts, true);
+    int32_t expected_cnt = mft_compute_cnt(rpm, clk);
+
+    qtest_irq_intercept_in(qts, "/machine/soc/a9mpcore/gic");
+    g_test_message(
+        "verifying rpm for mft[%d]: clk: %" PRIu64 ", duty: %" PRIu64 ", rpm: %u, cnt: %d",
+        index, clk, duty, rpm, expected_cnt);
+
+    /* Verify rpm for fan A */
+    /* Stop capture */
+    mft_writeb(qts, index, MFT_CKC, 0);
+    mft_writeb(qts, index, MFT_ICLR, MFT_ICLR_ALL);
+    mft_reset_counters(qts, index);
+    g_assert_cmphex(mft_readw(qts, index, MFT_CNT1), ==, MFT_MAX_CNT);
+    g_assert_cmphex(mft_readw(qts, index, MFT_CRA), ==, MFT_MAX_CNT);
+    g_assert_cmphex(mft_readw(qts, index, MFT_CPA), ==,
+                    MFT_MAX_CNT - MFT_TIMEOUT);
+    /* Start capture */
+    mft_writeb(qts, index, MFT_CKC, MFT_CKC_C1CSEL);
+    g_assert_true(qtest_get_irq(qts, MFT_IRQ(index)));
+    if (expected_cnt == -1) {
+        g_assert_cmphex(mft_readb(qts, index, MFT_ICTRL), ==, MFT_ICTRL_TEPND);
+    } else {
+        g_assert_cmphex(mft_readb(qts, index, MFT_ICTRL), ==, MFT_ICTRL_TAPND);
+        cnt = mft_readw(qts, index, MFT_CNT1);
+        /*
+         * Due to error in clock measurement and rounding, we might have a small
+         * error in measuring RPM.
+         */
+        g_assert_cmphex(cnt + MAX_ERROR, >=, expected_cnt);
+        g_assert_cmphex(cnt, <=, expected_cnt + MAX_ERROR);
+        cr = mft_readw(qts, index, MFT_CRA);
+        g_assert_cmphex(cnt, ==, cr);
+    }
+
+    /* Verify rpm for fan B */
+
+    qtest_irq_intercept_out(qts, "/machine/soc/a9mpcore/gic");
+}
+
 /* Check pwm registers can be reset to default value */
 static void test_init(gconstpointer test_data)
 {
     const TestData *td = test_data;
-    QTestState *qts = qtest_init("-machine quanta-gsj");
+    QTestState *qts = qtest_init("-machine npcm750-evb");
     int module = pwm_module_index(td->module);
     int pwm = pwm_index(td->pwm);
 
@@ -369,7 +557,7 @@ static void test_init(gconstpointer test_data)
 static void test_oneshot(gconstpointer test_data)
 {
     const TestData *td = test_data;
-    QTestState *qts = qtest_init("-machine quanta-gsj");
+    QTestState *qts = qtest_init("-machine npcm750-evb");
     int module = pwm_module_index(td->module);
     int pwm = pwm_index(td->pwm);
     uint32_t ppr, csr, pcr;
@@ -400,13 +588,15 @@ static void test_oneshot(gconstpointer test_data)
 static void test_toggle(gconstpointer test_data)
 {
     const TestData *td = test_data;
-    QTestState *qts = qtest_init("-machine quanta-gsj");
+    QTestState *qts = qtest_init("-machine npcm750-evb");
     int module = pwm_module_index(td->module);
     int pwm = pwm_index(td->pwm);
     uint32_t ppr, csr, pcr, cnr, cmr;
     int i, j, k, l;
     uint64_t expected_freq, expected_duty;
 
+    mft_init(qts, td);
+
     pcr = CH_EN | CH_MOD;
     for (i = 0; i < ARRAY_SIZE(ppr_list); ++i) {
         ppr = ppr_list[i];
@@ -440,6 +630,9 @@ static void test_toggle(gconstpointer test_data)
                                 ==, expected_freq);
                     }
 
+                    /* Test MFT's RPM is correct. */
+                    mft_verify_rpm(qts, td, expected_duty);
+
                     /* Test inverted mode */
                     expected_duty = pwm_compute_duty(cnr, cmr, true);
                     pwm_write_pcr(qts, td, pcr | CH_INV);
diff --git a/tests/check-block-qdict.c b/tests/unit/check-block-qdict.c
index 5a25825093..5a25825093 100644
--- a/tests/check-block-qdict.c
+++ b/tests/unit/check-block-qdict.c
diff --git a/tests/check-qdict.c b/tests/unit/check-qdict.c
index b5efa859b0..b5efa859b0 100644
--- a/tests/check-qdict.c
+++ b/tests/unit/check-qdict.c
diff --git a/tests/check-qjson.c b/tests/unit/check-qjson.c
index c845f91d43..c845f91d43 100644
--- a/tests/check-qjson.c
+++ b/tests/unit/check-qjson.c
diff --git a/tests/check-qlist.c b/tests/unit/check-qlist.c
index 3cd0ccbf19..3cd0ccbf19 100644
--- a/tests/check-qlist.c
+++ b/tests/unit/check-qlist.c
diff --git a/tests/check-qlit.c b/tests/unit/check-qlit.c
index bd6798d912..bd6798d912 100644
--- a/tests/check-qlit.c
+++ b/tests/unit/check-qlit.c
diff --git a/tests/check-qnull.c b/tests/unit/check-qnull.c
index ebf21db83c..ebf21db83c 100644
--- a/tests/check-qnull.c
+++ b/tests/unit/check-qnull.c
diff --git a/tests/check-qnum.c b/tests/unit/check-qnum.c
index b85fca2302..b85fca2302 100644
--- a/tests/check-qnum.c
+++ b/tests/unit/check-qnum.c
diff --git a/tests/check-qobject.c b/tests/unit/check-qobject.c
index c1713d15af..c1713d15af 100644
--- a/tests/check-qobject.c
+++ b/tests/unit/check-qobject.c
diff --git a/tests/check-qom-interface.c b/tests/unit/check-qom-interface.c
index c99be97ed8..c99be97ed8 100644
--- a/tests/check-qom-interface.c
+++ b/tests/unit/check-qom-interface.c
diff --git a/tests/check-qom-proplist.c b/tests/unit/check-qom-proplist.c
index 1b76581980..1b76581980 100644
--- a/tests/check-qom-proplist.c
+++ b/tests/unit/check-qom-proplist.c
diff --git a/tests/check-qstring.c b/tests/unit/check-qstring.c
index 4bf9772093..4bf9772093 100644
--- a/tests/check-qstring.c
+++ b/tests/unit/check-qstring.c
diff --git a/tests/crypto-tls-psk-helpers.c b/tests/unit/crypto-tls-psk-helpers.c
index a8395477c3..a8395477c3 100644
--- a/tests/crypto-tls-psk-helpers.c
+++ b/tests/unit/crypto-tls-psk-helpers.c
diff --git a/tests/crypto-tls-psk-helpers.h b/tests/unit/crypto-tls-psk-helpers.h
index 5aa9951cb6..5aa9951cb6 100644
--- a/tests/crypto-tls-psk-helpers.h
+++ b/tests/unit/crypto-tls-psk-helpers.h
diff --git a/tests/crypto-tls-x509-helpers.c b/tests/unit/crypto-tls-x509-helpers.c
index 97658592a2..97658592a2 100644
--- a/tests/crypto-tls-x509-helpers.c
+++ b/tests/unit/crypto-tls-x509-helpers.c
diff --git a/tests/crypto-tls-x509-helpers.h b/tests/unit/crypto-tls-x509-helpers.h
index 8fcd7785ab..8fcd7785ab 100644
--- a/tests/crypto-tls-x509-helpers.h
+++ b/tests/unit/crypto-tls-x509-helpers.h
diff --git a/tests/io-channel-helpers.c b/tests/unit/io-channel-helpers.c
index ff156ed3c4..ff156ed3c4 100644
--- a/tests/io-channel-helpers.c
+++ b/tests/unit/io-channel-helpers.c
diff --git a/tests/io-channel-helpers.h b/tests/unit/io-channel-helpers.h
index 3d14043710..3d14043710 100644
--- a/tests/io-channel-helpers.h
+++ b/tests/unit/io-channel-helpers.h
diff --git a/tests/iothread.c b/tests/unit/iothread.c
index afde12b4ef..afde12b4ef 100644
--- a/tests/iothread.c
+++ b/tests/unit/iothread.c
diff --git a/tests/iothread.h b/tests/unit/iothread.h
index 4877cea6a3..4877cea6a3 100644
--- a/tests/iothread.h
+++ b/tests/unit/iothread.h
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
new file mode 100644
index 0000000000..4bfe4627ba
--- /dev/null
+++ b/tests/unit/meson.build
@@ -0,0 +1,184 @@
+
+testblock = declare_dependency(dependencies: [block], sources: 'iothread.c')
+
+tests = {
+  'check-block-qdict': [],
+  'check-qdict': [],
+  'check-qnum': [],
+  'check-qstring': [],
+  'check-qlist': [],
+  'check-qnull': [],
+  'check-qobject': [],
+  'check-qjson': [],
+  'check-qlit': [],
+  'test-qobject-output-visitor': [testqapi],
+  'test-clone-visitor': [testqapi],
+  'test-qobject-input-visitor': [testqapi],
+  'test-string-input-visitor': [testqapi],
+  'test-string-output-visitor': [testqapi],
+  'test-opts-visitor': [testqapi],
+  'test-visitor-serialization': [testqapi],
+  'test-bitmap': [],
+  # all code tested by test-x86-cpuid is inside topology.h
+  'test-x86-cpuid': [],
+  'test-cutils': [],
+  'test-shift128': [],
+  'test-mul64': [],
+  # all code tested by test-int128 is inside int128.h
+  'test-int128': [],
+  'rcutorture': [],
+  'test-rcu-list': [],
+  'test-rcu-simpleq': [],
+  'test-rcu-tailq': [],
+  'test-rcu-slist': [],
+  'test-qdist': [],
+  'test-qht': [],
+  'test-bitops': [],
+  'test-bitcnt': [],
+  'test-qgraph': ['../qtest/libqos/qgraph.c'],
+  'check-qom-interface': [qom],
+  'check-qom-proplist': [qom],
+  'test-qemu-opts': [],
+  'test-keyval': [testqapi],
+  'test-logging': [],
+  'test-uuid': [],
+  'ptimer-test': ['ptimer-test-stubs.c', meson.source_root() / 'hw/core/ptimer.c'],
+  'test-qapi-util': [],
+}
+
+if have_system or have_tools
+  tests += {
+    'test-qmp-event': [testqapi],
+  }
+endif
+
+if have_block
+  tests += {
+    'test-coroutine': [testblock],
+    'test-aio': [testblock],
+    'test-aio-multithread': [testblock],
+    'test-throttle': [testblock],
+    'test-thread-pool': [testblock],
+    'test-hbitmap': [testblock],
+    'test-bdrv-drain': [testblock],
+    'test-bdrv-graph-mod': [testblock],
+    'test-blockjob': [testblock],
+    'test-blockjob-txn': [testblock],
+    'test-block-backend': [testblock],
+    'test-block-iothread': [testblock],
+    'test-write-threshold': [testblock],
+    'test-crypto-hash': [crypto],
+    'test-crypto-hmac': [crypto],
+    'test-crypto-cipher': [crypto],
+    'test-crypto-secret': [crypto, keyutils],
+    'test-authz-simple': [authz],
+    'test-authz-list': [authz],
+    'test-authz-listfile': [authz],
+    'test-io-task': [testblock],
+    'test-io-channel-socket': ['socket-helpers.c', 'io-channel-helpers.c', io],
+    'test-io-channel-file': ['io-channel-helpers.c', io],
+    'test-io-channel-command': ['io-channel-helpers.c', io],
+    'test-io-channel-buffer': ['io-channel-helpers.c', io],
+    'test-crypto-ivgen': [io],
+    'test-crypto-afsplit': [io],
+    'test-crypto-block': [io],
+  }
+  if 'CONFIG_GNUTLS' in config_host and \
+     'CONFIG_TASN1' in config_host and \
+     'CONFIG_POSIX' in config_host
+    tests += {
+      'test-crypto-tlscredsx509': ['crypto-tls-x509-helpers.c', 'pkix_asn1_tab.c',
+                                   tasn1, crypto, gnutls],
+      'test-crypto-tlssession': ['crypto-tls-x509-helpers.c', 'pkix_asn1_tab.c', 'crypto-tls-psk-helpers.c',
+                                 tasn1, crypto, gnutls],
+      'test-io-channel-tls': ['io-channel-helpers.c', 'crypto-tls-x509-helpers.c', 'pkix_asn1_tab.c',
+                              tasn1, io, crypto, gnutls]}
+  endif
+  if 'CONFIG_AUTH_PAM' in config_host
+    tests += {'test-authz-pam': [authz]}
+  endif
+  if 'CONFIG_QEMU_PRIVATE_XTS' in config_host
+    tests += {'test-crypto-xts': [crypto, io]}
+  endif
+  if 'CONFIG_POSIX' in config_host
+    tests += {'test-image-locking': [testblock]}
+  endif
+  if 'CONFIG_REPLICATION' in config_host
+    tests += {'test-replication': [testblock]}
+  endif
+  if 'CONFIG_NETTLE' in config_host or 'CONFIG_GCRYPT' in config_host
+    tests += {'test-crypto-pbkdf': [io]}
+  endif
+  if 'CONFIG_EPOLL_CREATE1' in config_host
+    tests += {'test-fdmon-epoll': [testblock]}
+  endif
+endif
+
+if have_system
+  tests += {
+    'test-iov': [],
+    'test-qmp-cmds': [testqapi],
+    'test-xbzrle': [migration],
+    'test-timed-average': [],
+    'test-util-sockets': ['socket-helpers.c'],
+    'test-base64': [],
+    'test-bufferiszero': [],
+    'test-vmstate': [migration, io]
+  }
+  if 'CONFIG_INOTIFY1' in config_host
+    tests += {'test-util-filemonitor': []}
+  endif
+
+  # Some tests: test-char, test-qdev-global-props, and test-qga,
+  # are not runnable under TSan due to a known issue.
+  # https://github.com/google/sanitizers/issues/1116
+  if 'CONFIG_TSAN' not in config_host
+    if 'CONFIG_POSIX' in config_host
+        tests += {
+          'test-char': ['socket-helpers.c', qom, io, chardev]
+        }
+    endif
+
+    tests += {
+      'test-qdev-global-props': [qom, hwcore, testqapi]
+    }
+  endif
+endif
+
+if 'CONFIG_TSAN' not in config_host and \
+   'CONFIG_GUEST_AGENT' in config_host and \
+   'CONFIG_LINUX' in config_host
+  tests += {'test-qga': ['../qtest/libqtest.c']}
+  test_deps += {'test-qga': qga}
+endif
+
+test_env = environment()
+test_env.set('G_TEST_SRCDIR', meson.current_source_dir())
+test_env.set('G_TEST_BUILDDIR', meson.current_build_dir())
+
+slow_tests = {
+  'test-crypto-tlscredsx509': 45,
+  'test-crypto-tlssession': 45
+}
+
+foreach test_name, extra: tests
+  src = [test_name + '.c']
+  deps = [qemuutil]
+  if extra.length() > 0
+    # use a sourceset to quickly separate sources and deps
+    test_ss = ss.source_set()
+    test_ss.add(extra)
+    src += test_ss.all_sources()
+    deps += test_ss.all_dependencies()
+  endif
+  exe = executable(test_name, src, genh, dependencies: deps)
+
+  test(test_name, exe,
+       depends: test_deps.get(test_name, []),
+       env: test_env,
+       args: ['--tap', '-k'],
+       protocol: 'tap',
+       timeout: slow_tests.get(test_name, 30),
+       priority: slow_tests.get(test_name, 30),
+       suite: ['unit'])
+endforeach
diff --git a/tests/pkix_asn1_tab.c b/tests/unit/pkix_asn1_tab.c
index 4aaf736d3f..15397cf77a 100644
--- a/tests/pkix_asn1_tab.c
+++ b/tests/unit/pkix_asn1_tab.c
@@ -4,7 +4,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "tests/crypto-tls-x509-helpers.h"
+#include "crypto-tls-x509-helpers.h"
 
 #ifdef QCRYPTO_HAVE_TLS_TEST_SUPPORT
 
diff --git a/tests/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
index 7f801a4d09..7f801a4d09 100644
--- a/tests/ptimer-test-stubs.c
+++ b/tests/unit/ptimer-test-stubs.c
diff --git a/tests/ptimer-test.c b/tests/unit/ptimer-test.c
index 9176b96c1c..9176b96c1c 100644
--- a/tests/ptimer-test.c
+++ b/tests/unit/ptimer-test.c
diff --git a/tests/ptimer-test.h b/tests/unit/ptimer-test.h
index 09ac56da9e..09ac56da9e 100644
--- a/tests/ptimer-test.h
+++ b/tests/unit/ptimer-test.h
diff --git a/tests/rcutorture.c b/tests/unit/rcutorture.c
index de6f649058..de6f649058 100644
--- a/tests/rcutorture.c
+++ b/tests/unit/rcutorture.c
diff --git a/tests/socket-helpers.c b/tests/unit/socket-helpers.c
index f704fd1a69..f704fd1a69 100644
--- a/tests/socket-helpers.c
+++ b/tests/unit/socket-helpers.c
diff --git a/tests/socket-helpers.h b/tests/unit/socket-helpers.h
index 512a004811..512a004811 100644
--- a/tests/socket-helpers.h
+++ b/tests/unit/socket-helpers.h
diff --git a/tests/test-aio-multithread.c b/tests/unit/test-aio-multithread.c
index a555cc8835..a555cc8835 100644
--- a/tests/test-aio-multithread.c
+++ b/tests/unit/test-aio-multithread.c
diff --git a/tests/test-aio.c b/tests/unit/test-aio.c
index 8a46078463..8a46078463 100644
--- a/tests/test-aio.c
+++ b/tests/unit/test-aio.c
diff --git a/tests/test-authz-list.c b/tests/unit/test-authz-list.c
index 5351992a01..5351992a01 100644
--- a/tests/test-authz-list.c
+++ b/tests/unit/test-authz-list.c
diff --git a/tests/test-authz-listfile.c b/tests/unit/test-authz-listfile.c
index 64d0e1500f..64d0e1500f 100644
--- a/tests/test-authz-listfile.c
+++ b/tests/unit/test-authz-listfile.c
diff --git a/tests/test-authz-pam.c b/tests/unit/test-authz-pam.c
index 4fe1ef2603..4fe1ef2603 100644
--- a/tests/test-authz-pam.c
+++ b/tests/unit/test-authz-pam.c
diff --git a/tests/test-authz-simple.c b/tests/unit/test-authz-simple.c
index 6f9034d8ff..6f9034d8ff 100644
--- a/tests/test-authz-simple.c
+++ b/tests/unit/test-authz-simple.c
diff --git a/tests/test-base64.c b/tests/unit/test-base64.c
index 3012d7be26..3012d7be26 100644
--- a/tests/test-base64.c
+++ b/tests/unit/test-base64.c
diff --git a/tests/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 8a29e33e00..8a29e33e00 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
diff --git a/tests/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c
index c4f7d16039..c4f7d16039 100644
--- a/tests/test-bdrv-graph-mod.c
+++ b/tests/unit/test-bdrv-graph-mod.c
diff --git a/tests/test-bitcnt.c b/tests/unit/test-bitcnt.c
index e153dcb8a2..e153dcb8a2 100644
--- a/tests/test-bitcnt.c
+++ b/tests/unit/test-bitcnt.c
diff --git a/tests/test-bitmap.c b/tests/unit/test-bitmap.c
index 8db4f67883..8db4f67883 100644
--- a/tests/test-bitmap.c
+++ b/tests/unit/test-bitmap.c
diff --git a/tests/test-bitops.c b/tests/unit/test-bitops.c
index 5a791d2e17..5a791d2e17 100644
--- a/tests/test-bitops.c
+++ b/tests/unit/test-bitops.c
diff --git a/tests/test-block-backend.c b/tests/unit/test-block-backend.c
index 2fb1a444bd..2fb1a444bd 100644
--- a/tests/test-block-backend.c
+++ b/tests/unit/test-block-backend.c
diff --git a/tests/test-block-iothread.c b/tests/unit/test-block-iothread.c
index 3f866a35c6..3f866a35c6 100644
--- a/tests/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
diff --git a/tests/test-blockjob-txn.c b/tests/unit/test-blockjob-txn.c
index 8bd13b9949..8bd13b9949 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/unit/test-blockjob-txn.c
diff --git a/tests/test-blockjob.c b/tests/unit/test-blockjob.c
index 7519847912..7519847912 100644
--- a/tests/test-blockjob.c
+++ b/tests/unit/test-blockjob.c
diff --git a/tests/test-bufferiszero.c b/tests/unit/test-bufferiszero.c
index e45fd31804..e45fd31804 100644
--- a/tests/test-bufferiszero.c
+++ b/tests/unit/test-bufferiszero.c
diff --git a/tests/test-char.c b/tests/unit/test-char.c
index 755d54c15e..755d54c15e 100644
--- a/tests/test-char.c
+++ b/tests/unit/test-char.c
diff --git a/tests/test-clone-visitor.c b/tests/unit/test-clone-visitor.c
index 4944b3d857..4944b3d857 100644
--- a/tests/test-clone-visitor.c
+++ b/tests/unit/test-clone-visitor.c
diff --git a/tests/test-coroutine.c b/tests/unit/test-coroutine.c
index e946d93a65..e946d93a65 100644
--- a/tests/test-coroutine.c
+++ b/tests/unit/test-coroutine.c
diff --git a/tests/test-crypto-afsplit.c b/tests/unit/test-crypto-afsplit.c
index 00a7c180fd..00a7c180fd 100644
--- a/tests/test-crypto-afsplit.c
+++ b/tests/unit/test-crypto-afsplit.c
diff --git a/tests/test-crypto-block.c b/tests/unit/test-crypto-block.c
index 3b1f0d509f..3b1f0d509f 100644
--- a/tests/test-crypto-block.c
+++ b/tests/unit/test-crypto-block.c
diff --git a/tests/test-crypto-cipher.c b/tests/unit/test-crypto-cipher.c
index 280319a223..280319a223 100644
--- a/tests/test-crypto-cipher.c
+++ b/tests/unit/test-crypto-cipher.c
diff --git a/tests/test-crypto-hash.c b/tests/unit/test-crypto-hash.c
index ce7d0ab9b5..ce7d0ab9b5 100644
--- a/tests/test-crypto-hash.c
+++ b/tests/unit/test-crypto-hash.c
diff --git a/tests/test-crypto-hmac.c b/tests/unit/test-crypto-hmac.c
index ee55382a3c..ee55382a3c 100644
--- a/tests/test-crypto-hmac.c
+++ b/tests/unit/test-crypto-hmac.c
diff --git a/tests/test-crypto-ivgen.c b/tests/unit/test-crypto-ivgen.c
index f581e6aba7..f581e6aba7 100644
--- a/tests/test-crypto-ivgen.c
+++ b/tests/unit/test-crypto-ivgen.c
diff --git a/tests/test-crypto-pbkdf.c b/tests/unit/test-crypto-pbkdf.c
index c50fd639d2..c50fd639d2 100644
--- a/tests/test-crypto-pbkdf.c
+++ b/tests/unit/test-crypto-pbkdf.c
diff --git a/tests/test-crypto-secret.c b/tests/unit/test-crypto-secret.c
index 34a4aecc12..34a4aecc12 100644
--- a/tests/test-crypto-secret.c
+++ b/tests/unit/test-crypto-secret.c
diff --git a/tests/test-crypto-tlscredsx509.c b/tests/unit/test-crypto-tlscredsx509.c
index f487349c32..f487349c32 100644
--- a/tests/test-crypto-tlscredsx509.c
+++ b/tests/unit/test-crypto-tlscredsx509.c
diff --git a/tests/test-crypto-tlssession.c b/tests/unit/test-crypto-tlssession.c
index 8b2453fa79..8b2453fa79 100644
--- a/tests/test-crypto-tlssession.c
+++ b/tests/unit/test-crypto-tlssession.c
diff --git a/tests/test-crypto-xts.c b/tests/unit/test-crypto-xts.c
index 7acbc956fd..7acbc956fd 100644
--- a/tests/test-crypto-xts.c
+++ b/tests/unit/test-crypto-xts.c
diff --git a/tests/test-cutils.c b/tests/unit/test-cutils.c
index bad3a60993..bad3a60993 100644
--- a/tests/test-cutils.c
+++ b/tests/unit/test-cutils.c
diff --git a/tests/test-fdmon-epoll.c b/tests/unit/test-fdmon-epoll.c
index 11fd8a2fa9..11fd8a2fa9 100644
--- a/tests/test-fdmon-epoll.c
+++ b/tests/unit/test-fdmon-epoll.c
diff --git a/tests/test-hbitmap.c b/tests/unit/test-hbitmap.c
index b6726cf76b..b6726cf76b 100644
--- a/tests/test-hbitmap.c
+++ b/tests/unit/test-hbitmap.c
diff --git a/tests/test-image-locking.c b/tests/unit/test-image-locking.c
index ba057bd66c..ba057bd66c 100644
--- a/tests/test-image-locking.c
+++ b/tests/unit/test-image-locking.c
diff --git a/tests/test-int128.c b/tests/unit/test-int128.c
index b86a3c76e6..b86a3c76e6 100644
--- a/tests/test-int128.c
+++ b/tests/unit/test-int128.c
diff --git a/tests/test-io-channel-buffer.c b/tests/unit/test-io-channel-buffer.c
index 9c6724dea4..9c6724dea4 100644
--- a/tests/test-io-channel-buffer.c
+++ b/tests/unit/test-io-channel-buffer.c
diff --git a/tests/test-io-channel-command.c b/tests/unit/test-io-channel-command.c
index 99056e07c0..99056e07c0 100644
--- a/tests/test-io-channel-command.c
+++ b/tests/unit/test-io-channel-command.c
diff --git a/tests/test-io-channel-file.c b/tests/unit/test-io-channel-file.c
index 29038e67b6..29038e67b6 100644
--- a/tests/test-io-channel-file.c
+++ b/tests/unit/test-io-channel-file.c
diff --git a/tests/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
index c49eec1f03..c49eec1f03 100644
--- a/tests/test-io-channel-socket.c
+++ b/tests/unit/test-io-channel-socket.c
diff --git a/tests/test-io-channel-tls.c b/tests/unit/test-io-channel-tls.c
index ad7554c534..ad7554c534 100644
--- a/tests/test-io-channel-tls.c
+++ b/tests/unit/test-io-channel-tls.c
diff --git a/tests/test-io-task.c b/tests/unit/test-io-task.c
index 953a50ae66..953a50ae66 100644
--- a/tests/test-io-task.c
+++ b/tests/unit/test-io-task.c
diff --git a/tests/test-iov.c b/tests/unit/test-iov.c
index 9c415e2f1f..9c415e2f1f 100644
--- a/tests/test-iov.c
+++ b/tests/unit/test-iov.c
diff --git a/tests/test-keyval.c b/tests/unit/test-keyval.c
index e20c07cf3e..e20c07cf3e 100644
--- a/tests/test-keyval.c
+++ b/tests/unit/test-keyval.c
diff --git a/tests/test-logging.c b/tests/unit/test-logging.c
index ccb819f193..ccb819f193 100644
--- a/tests/test-logging.c
+++ b/tests/unit/test-logging.c
diff --git a/tests/test-mul64.c b/tests/unit/test-mul64.c
index 9be775d084..9be775d084 100644
--- a/tests/test-mul64.c
+++ b/tests/unit/test-mul64.c
diff --git a/tests/test-opts-visitor.c b/tests/unit/test-opts-visitor.c
index 23e897061c..23e897061c 100644
--- a/tests/test-opts-visitor.c
+++ b/tests/unit/test-opts-visitor.c
diff --git a/tests/test-qapi-util.c b/tests/unit/test-qapi-util.c
index 847f305cff..847f305cff 100644
--- a/tests/test-qapi-util.c
+++ b/tests/unit/test-qapi-util.c
diff --git a/tests/test-qdev-global-props.c b/tests/unit/test-qdev-global-props.c
index c8862cac5f..c8862cac5f 100644
--- a/tests/test-qdev-global-props.c
+++ b/tests/unit/test-qdev-global-props.c
diff --git a/tests/test-qdist.c b/tests/unit/test-qdist.c
index 9541ce31eb..9541ce31eb 100644
--- a/tests/test-qdist.c
+++ b/tests/unit/test-qdist.c
diff --git a/tests/test-qemu-opts.c b/tests/unit/test-qemu-opts.c
index 6568e31a72..6568e31a72 100644
--- a/tests/test-qemu-opts.c
+++ b/tests/unit/test-qemu-opts.c
diff --git a/tests/test-qga.c b/tests/unit/test-qga.c
index eb33264e8e..5cb140d1b5 100644
--- a/tests/test-qga.c
+++ b/tests/unit/test-qga.c
@@ -4,7 +4,7 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 
-#include "qtest/libqos/libqtest.h"
+#include "../qtest/libqos/libqtest.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
 
diff --git a/tests/test-qgraph.c b/tests/unit/test-qgraph.c
index ae2f7b2dd8..f819430e2c 100644
--- a/tests/test-qgraph.c
+++ b/tests/unit/test-qgraph.c
@@ -17,8 +17,8 @@
  */
 
 #include "qemu/osdep.h"
-#include "qtest/libqos/qgraph.h"
-#include "qtest/libqos/qgraph_internal.h"
+#include "../qtest/libqos/qgraph.h"
+#include "../qtest/libqos/qgraph_internal.h"
 
 #define MACHINE_PC "x86_64/pc"
 #define MACHINE_RASPI2 "arm/raspi2"
diff --git a/tests/test-qht.c b/tests/unit/test-qht.c
index 4d23cefab6..4d23cefab6 100644
--- a/tests/test-qht.c
+++ b/tests/unit/test-qht.c
diff --git a/tests/test-qmp-cmds.c b/tests/unit/test-qmp-cmds.c
index d3413bfef0..d3413bfef0 100644
--- a/tests/test-qmp-cmds.c
+++ b/tests/unit/test-qmp-cmds.c
diff --git a/tests/test-qmp-event.c b/tests/unit/test-qmp-event.c
index 7dd0053190..7dd0053190 100644
--- a/tests/test-qmp-event.c
+++ b/tests/unit/test-qmp-event.c
diff --git a/tests/test-qobject-input-visitor.c b/tests/unit/test-qobject-input-visitor.c
index e41b91a2a6..e41b91a2a6 100644
--- a/tests/test-qobject-input-visitor.c
+++ b/tests/unit/test-qobject-input-visitor.c
diff --git a/tests/test-qobject-output-visitor.c b/tests/unit/test-qobject-output-visitor.c
index 9dc1e075e7..9dc1e075e7 100644
--- a/tests/test-qobject-output-visitor.c
+++ b/tests/unit/test-qobject-output-visitor.c
diff --git a/tests/test-rcu-list.c b/tests/unit/test-rcu-list.c
index 49641e1936..49641e1936 100644
--- a/tests/test-rcu-list.c
+++ b/tests/unit/test-rcu-list.c
diff --git a/tests/test-rcu-simpleq.c b/tests/unit/test-rcu-simpleq.c
index 057f7d33f7..057f7d33f7 100644
--- a/tests/test-rcu-simpleq.c
+++ b/tests/unit/test-rcu-simpleq.c
diff --git a/tests/test-rcu-slist.c b/tests/unit/test-rcu-slist.c
index 868e1e472e..868e1e472e 100644
--- a/tests/test-rcu-slist.c
+++ b/tests/unit/test-rcu-slist.c
diff --git a/tests/test-rcu-tailq.c b/tests/unit/test-rcu-tailq.c
index 8d487e0ee0..8d487e0ee0 100644
--- a/tests/test-rcu-tailq.c
+++ b/tests/unit/test-rcu-tailq.c
diff --git a/tests/test-replication.c b/tests/unit/test-replication.c
index b067240add..b067240add 100644
--- a/tests/test-replication.c
+++ b/tests/unit/test-replication.c
diff --git a/tests/test-shift128.c b/tests/unit/test-shift128.c
index f3ff736e5c..f3ff736e5c 100644
--- a/tests/test-shift128.c
+++ b/tests/unit/test-shift128.c
diff --git a/tests/test-string-input-visitor.c b/tests/unit/test-string-input-visitor.c
index 249faafc9d..249faafc9d 100644
--- a/tests/test-string-input-visitor.c
+++ b/tests/unit/test-string-input-visitor.c
diff --git a/tests/test-string-output-visitor.c b/tests/unit/test-string-output-visitor.c
index e2bedc5c7c..e2bedc5c7c 100644
--- a/tests/test-string-output-visitor.c
+++ b/tests/unit/test-string-output-visitor.c
diff --git a/tests/test-thread-pool.c b/tests/unit/test-thread-pool.c
index 70dc6314a1..70dc6314a1 100644
--- a/tests/test-thread-pool.c
+++ b/tests/unit/test-thread-pool.c
diff --git a/tests/test-throttle.c b/tests/unit/test-throttle.c
index 7adb5e6652..7adb5e6652 100644
--- a/tests/test-throttle.c
+++ b/tests/unit/test-throttle.c
diff --git a/tests/test-timed-average.c b/tests/unit/test-timed-average.c
index 82c92500df..82c92500df 100644
--- a/tests/test-timed-average.c
+++ b/tests/unit/test-timed-average.c
diff --git a/tests/test-util-filemonitor.c b/tests/unit/test-util-filemonitor.c
index b629e10857..b629e10857 100644
--- a/tests/test-util-filemonitor.c
+++ b/tests/unit/test-util-filemonitor.c
diff --git a/tests/test-util-sockets.c b/tests/unit/test-util-sockets.c
index 67486055ed..67486055ed 100644
--- a/tests/test-util-sockets.c
+++ b/tests/unit/test-util-sockets.c
diff --git a/tests/test-uuid.c b/tests/unit/test-uuid.c
index c111de5fc1..c111de5fc1 100644
--- a/tests/test-uuid.c
+++ b/tests/unit/test-uuid.c
diff --git a/tests/test-visitor-serialization.c b/tests/unit/test-visitor-serialization.c
index 4629958647..4629958647 100644
--- a/tests/test-visitor-serialization.c
+++ b/tests/unit/test-visitor-serialization.c
diff --git a/tests/test-vmstate.c b/tests/unit/test-vmstate.c
index a001879585..a001879585 100644
--- a/tests/test-vmstate.c
+++ b/tests/unit/test-vmstate.c
diff --git a/tests/test-write-threshold.c b/tests/unit/test-write-threshold.c
index fc1c45a2eb..fc1c45a2eb 100644
--- a/tests/test-write-threshold.c
+++ b/tests/unit/test-write-threshold.c
diff --git a/tests/test-x86-cpuid.c b/tests/unit/test-x86-cpuid.c
index bfabc0403a..bfabc0403a 100644
--- a/tests/test-x86-cpuid.c
+++ b/tests/unit/test-x86-cpuid.c
diff --git a/tests/test-xbzrle.c b/tests/unit/test-xbzrle.c
index 795d6f1cba..795d6f1cba 100644
--- a/tests/test-xbzrle.c
+++ b/tests/unit/test-xbzrle.c