summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS2
-rw-r--r--disas/meson.build6
-rw-r--r--disas/riscv-xthead.c707
-rw-r--r--disas/riscv-xthead.h28
-rw-r--r--disas/riscv-xventana.c41
-rw-r--r--disas/riscv-xventana.h18
-rw-r--r--disas/riscv.c559
-rw-r--r--disas/riscv.h302
-rw-r--r--docs/about/deprecated.rst5
-rw-r--r--docs/devel/style.rst9
-rw-r--r--docs/interop/qemu-ga.rst5
-rw-r--r--docs/system/riscv/virt.rst1
-rw-r--r--hw/ide/cmd646.c3
-rw-r--r--hw/ide/pci.c16
-rw-r--r--hw/ide/piix.c8
-rw-r--r--hw/ide/sii3112.c7
-rw-r--r--hw/ide/via.c9
-rw-r--r--hw/isa/vt82c686.c11
-rw-r--r--hw/mips/loongson3_virt.c4
-rw-r--r--hw/misc/Kconfig3
-rw-r--r--hw/misc/meson.build1
-rw-r--r--hw/misc/sifive_e_aon.c319
-rw-r--r--hw/riscv/Kconfig1
-rw-r--r--hw/riscv/sifive_e.c17
-rw-r--r--hw/riscv/virt.c56
-rw-r--r--hw/s390x/ipl.c2
-rw-r--r--hw/s390x/meson.build1
-rw-r--r--hw/s390x/s390-pci-kvm.c2
-rw-r--r--hw/s390x/s390-virtio-ccw.c2
-rw-r--r--hw/s390x/tod-kvm.c2
-rw-r--r--include/hw/ide/pci.h1
-rw-r--r--include/hw/isa/vt82c686.h2
-rw-r--r--include/hw/misc/sifive_e_aon.h60
-rw-r--r--include/hw/riscv/sifive_e.h9
-rw-r--r--linux-user/elfload.c4
-rw-r--r--linux-user/riscv/syscall32_nr.h1
-rw-r--r--linux-user/riscv/syscall64_nr.h1
-rw-r--r--linux-user/syscall.c146
-rw-r--r--meson.build17
-rw-r--r--os-posix.c35
-rw-r--r--pc-bios/opensbi-riscv32-generic-fw_dynamic.binbin123072 -> 135344 bytes
-rw-r--r--pc-bios/opensbi-riscv64-generic-fw_dynamic.binbin121800 -> 138304 bytes
-rw-r--r--qemu-options.hx18
-rw-r--r--qga/main.c91
-rw-r--r--qga/vss-win32/install.cpp47
-rw-r--r--qga/vss-win32/meson.build2
-rw-r--r--qga/vss-win32/provider.cpp3
-rw-r--r--qga/vss-win32/requester.cpp51
-rw-r--r--qga/vss-win32/vss-debug.cpp39
-rw-r--r--qga/vss-win32/vss-debug.h25
m---------roms/opensbi0
-rw-r--r--target/mips/cpu-defs.c.inc59
-rw-r--r--target/mips/cpu.c18
-rw-r--r--target/mips/cpu.h45
-rw-r--r--target/mips/helper.h4
-rw-r--r--target/mips/internal.h2
-rw-r--r--target/mips/sysemu/cp0_timer.c35
-rw-r--r--target/mips/tcg/lcsr.decode17
-rw-r--r--target/mips/tcg/lcsr_translate.c75
-rw-r--r--target/mips/tcg/meson.build2
-rw-r--r--target/mips/tcg/mxu_translate.c3753
-rw-r--r--target/mips/tcg/op_helper.c16
-rw-r--r--target/mips/tcg/sysemu/lcsr_helper.c45
-rw-r--r--target/mips/tcg/sysemu/meson.build4
-rw-r--r--target/mips/tcg/sysemu_helper.h.inc8
-rw-r--r--target/mips/tcg/translate.c10
-rw-r--r--target/mips/tcg/translate.h1
-rw-r--r--target/riscv/cpu.c439
-rw-r--r--target/riscv/cpu.h56
-rw-r--r--target/riscv/cpu_cfg.h41
-rw-r--r--target/riscv/cpu_helper.c12
-rw-r--r--target/riscv/csr.c41
-rw-r--r--target/riscv/fpu_helper.c166
-rw-r--r--target/riscv/helper.h29
-rw-r--r--target/riscv/insn32.decode38
-rw-r--r--target/riscv/insn_trans/trans_rvbf16.c.inc175
-rw-r--r--target/riscv/insn_trans/trans_rvzfa.c.inc521
-rw-r--r--target/riscv/insn_trans/trans_rvzfh.c.inc12
-rw-r--r--target/riscv/kvm.c501
-rw-r--r--target/riscv/kvm_riscv.h1
-rw-r--r--target/riscv/op_helper.c3
-rw-r--r--target/riscv/translate.c42
-rw-r--r--target/riscv/vector_helper.c17
-rw-r--r--target/s390x/arch_dump.c2
-rw-r--r--target/s390x/cpu-sysemu.c2
-rw-r--r--target/s390x/cpu_features.c2
-rw-r--r--target/s390x/cpu_models.c2
-rw-r--r--target/s390x/diag.c2
-rw-r--r--target/s390x/helper.c2
-rw-r--r--target/s390x/helper.h2
-rw-r--r--target/s390x/ioinst.c2
-rw-r--r--target/s390x/kvm/kvm.c2
-rw-r--r--target/s390x/kvm/meson.build1
-rw-r--r--target/s390x/kvm/pv.c (renamed from hw/s390x/pv.c)2
-rw-r--r--target/s390x/kvm/pv.h (renamed from include/hw/s390x/pv.h)0
-rw-r--r--target/s390x/mmu_helper.c2
-rw-r--r--target/s390x/tcg/fpu_helper.c3
-rw-r--r--target/s390x/tcg/insn-data.h.inc4
-rw-r--r--target/s390x/tcg/mem_helper.c5
-rw-r--r--target/s390x/tcg/translate.c8
-rw-r--r--tests/avocado/riscv_opensbi.py2
-rw-r--r--tests/qtest/libqos/libqos.c49
-rw-r--r--tests/qtest/libqos/libqos.h2
-rw-r--r--tests/qtest/libqtest.c52
-rw-r--r--tests/qtest/libqtest.h20
-rw-r--r--tests/qtest/meson.build3
-rw-r--r--tests/qtest/migration-test.c143
-rw-r--r--tests/qtest/readconfig-test.c204
-rw-r--r--tests/qtest/sifive-e-aon-watchdog-test.c450
-rw-r--r--tests/tcg/riscv64/Makefile.target6
-rw-r--r--tests/tcg/riscv64/test-fcvtmod.c345
-rw-r--r--tests/tcg/s390x/Makefile.softmmu-target1
-rw-r--r--tests/tcg/s390x/Makefile.target3
-rw-r--r--tests/tcg/s390x/epsw.c23
-rw-r--r--tests/tcg/s390x/gdbstub/test-svc.py2
-rw-r--r--tests/tcg/s390x/hello-s390x-asm.S4
-rw-r--r--tests/tcg/s390x/larl.c21
-rw-r--r--tests/tcg/s390x/lra.S19
-rw-r--r--tests/tcg/s390x/mdeb.c30
-rw-r--r--tests/tcg/s390x/mie3-mvcrl.c46
-rw-r--r--tests/unit/test-qga.c31
-rw-r--r--util/async-teardown.c21
122 files changed, 9644 insertions, 763 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 8f642141d0..e158a25cfe 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -452,8 +452,6 @@ S: Supported
 F: target/s390x/kvm/
 F: target/s390x/machine.c
 F: target/s390x/sigp.c
-F: hw/s390x/pv.c
-F: include/hw/s390x/pv.h
 F: gdb-xml/s390*.xml
 T: git https://github.com/borntraeger/qemu.git s390-next
 L: qemu-s390x@nongnu.org
diff --git a/disas/meson.build b/disas/meson.build
index 3a480eb9f8..815523ab85 100644
--- a/disas/meson.build
+++ b/disas/meson.build
@@ -6,7 +6,11 @@ common_ss.add(when: 'CONFIG_M68K_DIS', if_true: files('m68k.c'))
 common_ss.add(when: 'CONFIG_MICROBLAZE_DIS', if_true: files('microblaze.c'))
 common_ss.add(when: 'CONFIG_MIPS_DIS', if_true: files('mips.c', 'nanomips.c'))
 common_ss.add(when: 'CONFIG_NIOS2_DIS', if_true: files('nios2.c'))
-common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files('riscv.c'))
+common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files(
+    'riscv.c',
+    'riscv-xthead.c',
+    'riscv-xventana.c'
+))
 common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
 common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
 common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
diff --git a/disas/riscv-xthead.c b/disas/riscv-xthead.c
new file mode 100644
index 0000000000..99da679d16
--- /dev/null
+++ b/disas/riscv-xthead.c
@@ -0,0 +1,707 @@
+/*
+ * QEMU RISC-V Disassembler for xthead.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "disas/riscv.h"
+#include "disas/riscv-xthead.h"
+
+typedef enum {
+    /* 0 is reserved for rv_op_illegal. */
+    /* XTheadBa */
+    rv_op_th_addsl = 1,
+    /* XTheadBb */
+    rv_op_th_srri,
+    rv_op_th_srriw,
+    rv_op_th_ext,
+    rv_op_th_extu,
+    rv_op_th_ff0,
+    rv_op_th_ff1,
+    rv_op_th_rev,
+    rv_op_th_revw,
+    rv_op_th_tstnbz,
+    /* XTheadBs */
+    rv_op_th_tst,
+    /* XTheadCmo */
+    rv_op_th_dcache_call,
+    rv_op_th_dcache_ciall,
+    rv_op_th_dcache_iall,
+    rv_op_th_dcache_cpa,
+    rv_op_th_dcache_cipa,
+    rv_op_th_dcache_ipa,
+    rv_op_th_dcache_cva,
+    rv_op_th_dcache_civa,
+    rv_op_th_dcache_iva,
+    rv_op_th_dcache_csw,
+    rv_op_th_dcache_cisw,
+    rv_op_th_dcache_isw,
+    rv_op_th_dcache_cpal1,
+    rv_op_th_dcache_cval1,
+    rv_op_th_icache_iall,
+    rv_op_th_icache_ialls,
+    rv_op_th_icache_ipa,
+    rv_op_th_icache_iva,
+    rv_op_th_l2cache_call,
+    rv_op_th_l2cache_ciall,
+    rv_op_th_l2cache_iall,
+    /* XTheadCondMov */
+    rv_op_th_mveqz,
+    rv_op_th_mvnez,
+    /* XTheadFMemIdx */
+    rv_op_th_flrd,
+    rv_op_th_flrw,
+    rv_op_th_flurd,
+    rv_op_th_flurw,
+    rv_op_th_fsrd,
+    rv_op_th_fsrw,
+    rv_op_th_fsurd,
+    rv_op_th_fsurw,
+    /* XTheadFmv */
+    rv_op_th_fmv_hw_x,
+    rv_op_th_fmv_x_hw,
+    /* XTheadMac */
+    rv_op_th_mula,
+    rv_op_th_mulah,
+    rv_op_th_mulaw,
+    rv_op_th_muls,
+    rv_op_th_mulsw,
+    rv_op_th_mulsh,
+    /* XTheadMemIdx */
+    rv_op_th_lbia,
+    rv_op_th_lbib,
+    rv_op_th_lbuia,
+    rv_op_th_lbuib,
+    rv_op_th_lhia,
+    rv_op_th_lhib,
+    rv_op_th_lhuia,
+    rv_op_th_lhuib,
+    rv_op_th_lwia,
+    rv_op_th_lwib,
+    rv_op_th_lwuia,
+    rv_op_th_lwuib,
+    rv_op_th_ldia,
+    rv_op_th_ldib,
+    rv_op_th_sbia,
+    rv_op_th_sbib,
+    rv_op_th_shia,
+    rv_op_th_shib,
+    rv_op_th_swia,
+    rv_op_th_swib,
+    rv_op_th_sdia,
+    rv_op_th_sdib,
+    rv_op_th_lrb,
+    rv_op_th_lrbu,
+    rv_op_th_lrh,
+    rv_op_th_lrhu,
+    rv_op_th_lrw,
+    rv_op_th_lrwu,
+    rv_op_th_lrd,
+    rv_op_th_srb,
+    rv_op_th_srh,
+    rv_op_th_srw,
+    rv_op_th_srd,
+    rv_op_th_lurb,
+    rv_op_th_lurbu,
+    rv_op_th_lurh,
+    rv_op_th_lurhu,
+    rv_op_th_lurw,
+    rv_op_th_lurwu,
+    rv_op_th_lurd,
+    rv_op_th_surb,
+    rv_op_th_surh,
+    rv_op_th_surw,
+    rv_op_th_surd,
+    /* XTheadMemPair */
+    rv_op_th_ldd,
+    rv_op_th_lwd,
+    rv_op_th_lwud,
+    rv_op_th_sdd,
+    rv_op_th_swd,
+    /* XTheadSync */
+    rv_op_th_sfence_vmas,
+    rv_op_th_sync,
+    rv_op_th_sync_i,
+    rv_op_th_sync_is,
+    rv_op_th_sync_s,
+} rv_xthead_op;
+
+const rv_opcode_data xthead_opcode_data[] = {
+    { "th.illegal", rv_codec_illegal, rv_fmt_none, NULL, 0, 0, 0 },
+    /* XTheadBa */
+    { "th.addsl", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    /* XTheadBb */
+    { "th.srri", rv_codec_r2_imm6, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 },
+    { "th.srriw", rv_codec_r2_imm5, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 },
+    { "th.ext", rv_codec_r2_immhl, rv_fmt_rd_rs1_immh_imml, NULL, 0, 0, 0 },
+    { "th.extu", rv_codec_r2_immhl, rv_fmt_rd_rs1_immh_imml, NULL, 0, 0, 0 },
+    { "th.ff0", rv_codec_r2, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "th.ff1", rv_codec_r2, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "th.rev", rv_codec_r2, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "th.revw", rv_codec_r2, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "th.tstnbz", rv_codec_r2, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    /* XTheadBs */
+    { "th.tst", rv_codec_r2_imm6, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 },
+    /* XTheadCmo */
+    { "th.dcache.call", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "th.dcache.ciall", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "th.dcache.iall", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "th.dcache.cpa", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.dcache.cipa", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.dcache.ipa", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.dcache.cva", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.dcache.civa", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.dcache.iva", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.dcache.csw", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.dcache.cisw", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.dcache.isw", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.dcache.cpal1", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.dcache.cval1", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.icache.iall", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "th.icache.ialls", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "th.icache.ipa", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.icache.iva", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 },
+    { "th.l2cache.call", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "th.l2cache.ciall", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "th.l2cache.iall", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    /* XTheadCondMov */
+    { "th.mveqz", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "th.mvnez", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    /* XTheadFMemIdx */
+    { "th.flrd", rv_codec_r_imm2, rv_fmt_frd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.flrw", rv_codec_r_imm2, rv_fmt_frd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.flurd", rv_codec_r_imm2, rv_fmt_frd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.flurw", rv_codec_r_imm2, rv_fmt_frd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.fsrd", rv_codec_r_imm2, rv_fmt_frd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.fsrw", rv_codec_r_imm2, rv_fmt_frd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.fsurd", rv_codec_r_imm2, rv_fmt_frd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.fsurw", rv_codec_r_imm2, rv_fmt_frd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    /* XTheadFmv */
+    { "th.fmv.hw.x", rv_codec_r, rv_fmt_rd_frs1, NULL, 0, 0, 0 },
+    { "th.fmv.x.hw", rv_codec_r, rv_fmt_rd_frs1, NULL, 0, 0, 0 },
+    /* XTheadMac */
+    { "th.mula", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "th.mulaw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "th.mulah", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "th.muls", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "th.mulsw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "th.mulsh", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    /* XTheadMemIdx */
+    { "th.lbia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lbib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml, NULL, 0, 0, 0 },
+    { "th.lbuia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lbuib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lhia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lhib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lhuia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lhuib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lwia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lwib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lwuia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lwuib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.ldia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.ldib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.sbia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.sbib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.shia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.shib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.swia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.swib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.sdia", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.sdib", rv_codec_r2_imm2_imm5, rv_fmt_rd_rs1_immh_imml_addr, NULL, 0, 0, 0 },
+    { "th.lrb", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lrbu", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lrh", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lrhu", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lrw", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lrwu", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lrd", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.srb", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.srh", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.srw", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.srd", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lurb", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lurbu", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lurh", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lurhu", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lurw", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lurwu", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.lurd", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.surb", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.surh", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.surw", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    { "th.surd", rv_codec_r_imm2, rv_fmt_rd_rs1_rs2_imm, NULL, 0, 0, 0 },
+    /* XTheadMemPair */
+    { "th.ldd", rv_codec_r_imm2, rv_fmt_rd2_imm, NULL, 0, 0, 0 },
+    { "th.lwd", rv_codec_r_imm2, rv_fmt_rd2_imm, NULL, 0, 0, 0 },
+    { "th.lwud", rv_codec_r_imm2, rv_fmt_rd2_imm, NULL, 0, 0, 0 },
+    { "th.sdd", rv_codec_r_imm2, rv_fmt_rd2_imm, NULL, 0, 0, 0 },
+    { "th.swd", rv_codec_r_imm2, rv_fmt_rd2_imm, NULL, 0, 0, 0 },
+    /* XTheadSync */
+    { "th.sfence.vmas", rv_codec_r, rv_fmt_rs1_rs2, NULL, 0, 0, 0 },
+    { "th.sync", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "th.sync.i", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "th.sync.is", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+    { "th.sync.s", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 },
+};
+
+void decode_xtheadba(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 1:
+                switch ((inst >> 25) & 0b1111111) {
+                case 0b0000000:
+                case 0b0000001:
+                case 0b0000010:
+                case 0b0000011: op = rv_op_th_addsl; break;
+                }
+                break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
+
+void decode_xtheadbb(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 1:
+                switch ((inst >> 25) & 0b1111111) {
+                case 0b0001010: op = rv_op_th_srriw; break;
+                case 0b1000000:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_tstnbz;
+                    }
+                    break;
+                case 0b1000001:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_rev;
+                    }
+                    break;
+                case 0b1000010:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_ff0;
+                    }
+                    break;
+                case 0b1000011:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_ff1;
+                    }
+                    break;
+                case 0b1000100:
+                case 0b1001000:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_revw;
+                    }
+                    break;
+                case 0b0000100:
+                case 0b0000101: op = rv_op_th_srri; break;
+                }
+                break;
+            case 2: op = rv_op_th_ext; break;
+            case 3: op = rv_op_th_extu; break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
+
+void decode_xtheadbs(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 1:
+                switch ((inst >> 26) & 0b111111) {
+                case 0b100010: op = rv_op_th_tst; break;
+                }
+                break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
+
+void decode_xtheadcmo(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 0:
+                switch ((inst >> 20 & 0b111111111111)) {
+                case 0b000000000001:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_dcache_call;
+                    }
+                    break;
+                case 0b000000000011:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_dcache_ciall;
+                    }
+                    break;
+                case 0b000000000010:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_dcache_iall;
+                    }
+                    break;
+                case 0b000000101001: op = rv_op_th_dcache_cpa; break;
+                case 0b000000101011: op = rv_op_th_dcache_cipa; break;
+                case 0b000000101010: op = rv_op_th_dcache_ipa; break;
+                case 0b000000100101: op = rv_op_th_dcache_cva; break;
+                case 0b000000100111: op = rv_op_th_dcache_civa; break;
+                case 0b000000100110: op = rv_op_th_dcache_iva; break;
+                case 0b000000100001: op = rv_op_th_dcache_csw; break;
+                case 0b000000100011: op = rv_op_th_dcache_cisw; break;
+                case 0b000000100010: op = rv_op_th_dcache_isw; break;
+                case 0b000000101000: op = rv_op_th_dcache_cpal1; break;
+                case 0b000000100100: op = rv_op_th_dcache_cval1; break;
+                case 0b000000010000:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_icache_iall;
+                    }
+                    break;
+                case 0b000000010001:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_icache_ialls;
+                    }
+                    break;
+                case 0b000000111000: op = rv_op_th_icache_ipa; break;
+                case 0b000000110000: op = rv_op_th_icache_iva; break;
+                case 0b000000010101:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_l2cache_call;
+                    }
+                    break;
+                case 0b000000010111:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_l2cache_ciall;
+                    }
+                    break;
+                case 0b000000010110:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_l2cache_iall;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
+
+void decode_xtheadcondmov(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 1:
+                switch ((inst >> 25) & 0b1111111) {
+                case 0b0100000: op = rv_op_th_mveqz; break;
+                case 0b0100001: op = rv_op_th_mvnez; break;
+                }
+                break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
+
+void decode_xtheadfmemidx(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 6:
+                switch ((inst >> 27) & 0b11111) {
+                case 8: op = rv_op_th_flrw; break;
+                case 10: op = rv_op_th_flurw; break;
+                case 12: op = rv_op_th_flrd; break;
+                case 14: op = rv_op_th_flurd; break;
+                }
+                break;
+            case 7:
+                switch ((inst >> 27) & 0b11111) {
+                case 8: op = rv_op_th_fsrw; break;
+                case 10: op = rv_op_th_fsurw; break;
+                case 12: op = rv_op_th_fsrd; break;
+                case 14: op = rv_op_th_fsurd; break;
+                }
+                break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
+
+void decode_xtheadfmv(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 1:
+                switch ((inst >> 25) & 0b1111111) {
+                case 0b1010000:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_fmv_hw_x;
+                    }
+                    break;
+                case 0b1100000:
+                    if (((inst >> 20) & 0b11111) == 0) {
+                        op = rv_op_th_fmv_x_hw;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
+
+void decode_xtheadmac(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 1:
+                switch ((inst >> 25) & 0b1111111) {
+                case 0b0010000: op = rv_op_th_mula; break;
+                case 0b0010001: op = rv_op_th_muls; break;
+                case 0b0010010: op = rv_op_th_mulaw; break;
+                case 0b0010011: op = rv_op_th_mulsw; break;
+                case 0b0010100: op = rv_op_th_mulah; break;
+                case 0b0010101: op = rv_op_th_mulsh; break;
+                }
+                break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
+
+void decode_xtheadmemidx(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 4:
+                switch ((inst >> 27) & 0b11111) {
+                case 0: op = rv_op_th_lrb; break;
+                case 1: op = rv_op_th_lbib; break;
+                case 2: op = rv_op_th_lurb; break;
+                case 3: op = rv_op_th_lbia; break;
+                case 4: op = rv_op_th_lrh; break;
+                case 5: op = rv_op_th_lhib; break;
+                case 6: op = rv_op_th_lurh; break;
+                case 7: op = rv_op_th_lhia; break;
+                case 8: op = rv_op_th_lrw; break;
+                case 9: op = rv_op_th_lwib; break;
+                case 10: op = rv_op_th_lurw; break;
+                case 11: op = rv_op_th_lwia; break;
+                case 12: op = rv_op_th_lrd; break;
+                case 13: op = rv_op_th_ldib; break;
+                case 14: op = rv_op_th_lurd; break;
+                case 15: op = rv_op_th_ldia; break;
+                case 16: op = rv_op_th_lrbu; break;
+                case 17: op = rv_op_th_lbuib; break;
+                case 18: op = rv_op_th_lurbu; break;
+                case 19: op = rv_op_th_lbuia; break;
+                case 20: op = rv_op_th_lrhu; break;
+                case 21: op = rv_op_th_lhuib; break;
+                case 22: op = rv_op_th_lurhu; break;
+                case 23: op = rv_op_th_lhuia; break;
+                case 24: op = rv_op_th_lrwu; break;
+                case 25: op = rv_op_th_lwuib; break;
+                case 26: op = rv_op_th_lurwu; break;
+                case 27: op = rv_op_th_lwuia; break;
+                }
+                break;
+            case 5:
+                switch ((inst >> 27) & 0b11111) {
+                case 0: op = rv_op_th_srb; break;
+                case 1: op = rv_op_th_sbib; break;
+                case 2: op = rv_op_th_surb; break;
+                case 3: op = rv_op_th_sbia; break;
+                case 4: op = rv_op_th_srh; break;
+                case 5: op = rv_op_th_shib; break;
+                case 6: op = rv_op_th_surh; break;
+                case 7: op = rv_op_th_shia; break;
+                case 8: op = rv_op_th_srw; break;
+                case 9: op = rv_op_th_swib; break;
+                case 10: op = rv_op_th_surw; break;
+                case 11: op = rv_op_th_swia; break;
+                case 12: op = rv_op_th_srd; break;
+                case 13: op = rv_op_th_sdib; break;
+                case 14: op = rv_op_th_surd; break;
+                case 15: op = rv_op_th_sdia; break;
+                }
+                break;
+                break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
+
+void decode_xtheadmempair(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 4:
+                switch ((inst >> 27) & 0b11111) {
+                case 28: op = rv_op_th_lwd; break;
+                case 30: op = rv_op_th_lwud; break;
+                case 31: op = rv_op_th_ldd; break;
+                }
+                break;
+            case 5:
+                switch ((inst >> 27) & 0b11111) {
+                case 28: op = rv_op_th_swd; break;
+                case 31: op = rv_op_th_sdd; break;
+                }
+                break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
+
+void decode_xtheadsync(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 2:
+            /* custom-0 */
+            switch ((inst >> 12) & 0b111) {
+            case 0:
+                switch ((inst >> 25) & 0b1111111) {
+                case 0b0000010: op = rv_op_th_sfence_vmas; break;
+                case 0b0000000:
+                    switch ((inst >> 20) & 0b11111) {
+                    case 0b11000: op = rv_op_th_sync; break;
+                    case 0b11010: op = rv_op_th_sync_i; break;
+                    case 0b11011: op = rv_op_th_sync_is; break;
+                    case 0b11001: op = rv_op_th_sync_s; break;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+            /* custom-0 */
+        }
+        break;
+    }
+
+    dec->op = op;
+}
diff --git a/disas/riscv-xthead.h b/disas/riscv-xthead.h
new file mode 100644
index 0000000000..fcd42746e7
--- /dev/null
+++ b/disas/riscv-xthead.h
@@ -0,0 +1,28 @@
+/*
+ * QEMU disassembler -- RISC-V specific header (xthead*).
+ *
+ * Copyright (c) 2023 VRULL GmbH
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef DISAS_RISCV_XTHEAD_H
+#define DISAS_RISCV_XTHEAD_H
+
+#include "disas/riscv.h"
+
+extern const rv_opcode_data xthead_opcode_data[];
+
+void decode_xtheadba(rv_decode *, rv_isa);
+void decode_xtheadbb(rv_decode *, rv_isa);
+void decode_xtheadbs(rv_decode *, rv_isa);
+void decode_xtheadcmo(rv_decode *, rv_isa);
+void decode_xtheadcondmov(rv_decode *, rv_isa);
+void decode_xtheadfmemidx(rv_decode *, rv_isa);
+void decode_xtheadfmv(rv_decode *, rv_isa);
+void decode_xtheadmac(rv_decode *, rv_isa);
+void decode_xtheadmemidx(rv_decode *, rv_isa);
+void decode_xtheadmempair(rv_decode *, rv_isa);
+void decode_xtheadsync(rv_decode *, rv_isa);
+
+#endif /* DISAS_RISCV_XTHEAD_H */
diff --git a/disas/riscv-xventana.c b/disas/riscv-xventana.c
new file mode 100644
index 0000000000..a0224d1fb3
--- /dev/null
+++ b/disas/riscv-xventana.c
@@ -0,0 +1,41 @@
+/*
+ * QEMU RISC-V Disassembler for xventana.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "disas/riscv.h"
+#include "disas/riscv-xventana.h"
+
+typedef enum {
+    /* 0 is reserved for rv_op_illegal. */
+    ventana_op_vt_maskc = 1,
+    ventana_op_vt_maskcn = 2,
+} rv_ventana_op;
+
+const rv_opcode_data ventana_opcode_data[] = {
+    { "vt.illegal", rv_codec_illegal, rv_fmt_none, NULL, 0, 0, 0 },
+    { "vt.maskc", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "vt.maskcn", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+};
+
+void decode_xventanacondops(rv_decode *dec, rv_isa isa)
+{
+    rv_inst inst = dec->inst;
+    rv_opcode op = rv_op_illegal;
+
+    switch (((inst >> 0) & 0b11)) {
+    case 3:
+        switch (((inst >> 2) & 0b11111)) {
+        case 30:
+            switch (((inst >> 22) & 0b1111111000) | ((inst >> 12) & 0b0000000111)) {
+            case 6: op = ventana_op_vt_maskc; break;
+            case 7: op = ventana_op_vt_maskcn; break;
+            }
+            break;
+        }
+        break;
+    }
+
+    dec->op = op;
+}
diff --git a/disas/riscv-xventana.h b/disas/riscv-xventana.h
new file mode 100644
index 0000000000..72be9ffa16
--- /dev/null
+++ b/disas/riscv-xventana.h
@@ -0,0 +1,18 @@
+/*
+ * QEMU disassembler -- RISC-V specific header (xventana*).
+ *
+ * Copyright (c) 2023 VRULL GmbH
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef DISAS_RISCV_XVENTANA_H
+#define DISAS_RISCV_XVENTANA_H
+
+#include "disas/riscv.h"
+
+extern const rv_opcode_data ventana_opcode_data[];
+
+void decode_xventanacondops(rv_decode*, rv_isa);
+
+#endif /* DISAS_RISCV_XVENTANA_H */
diff --git a/disas/riscv.c b/disas/riscv.c
index 5005364aba..cd7b6e86a7 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -18,162 +18,17 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/bitops.h"
 #include "disas/dis-asm.h"
 #include "target/riscv/cpu_cfg.h"
+#include "disas/riscv.h"
 
-/* types */
-
-typedef uint64_t rv_inst;
-typedef uint16_t rv_opcode;
-
-/* enums */
-
-typedef enum {
-    rv32,
-    rv64,
-    rv128
-} rv_isa;
-
-typedef enum {
-    rv_rm_rne = 0,
-    rv_rm_rtz = 1,
-    rv_rm_rdn = 2,
-    rv_rm_rup = 3,
-    rv_rm_rmm = 4,
-    rv_rm_dyn = 7,
-} rv_rm;
+/* Vendor extensions */
+#include "disas/riscv-xthead.h"
+#include "disas/riscv-xventana.h"
 
 typedef enum {
-    rv_fence_i = 8,
-    rv_fence_o = 4,
-    rv_fence_r = 2,
-    rv_fence_w = 1,
-} rv_fence;
-
-typedef enum {
-    rv_ireg_zero,
-    rv_ireg_ra,
-    rv_ireg_sp,
-    rv_ireg_gp,
-    rv_ireg_tp,
-    rv_ireg_t0,
-    rv_ireg_t1,
-    rv_ireg_t2,
-    rv_ireg_s0,
-    rv_ireg_s1,
-    rv_ireg_a0,
-    rv_ireg_a1,
-    rv_ireg_a2,
-    rv_ireg_a3,
-    rv_ireg_a4,
-    rv_ireg_a5,
-    rv_ireg_a6,
-    rv_ireg_a7,
-    rv_ireg_s2,
-    rv_ireg_s3,
-    rv_ireg_s4,
-    rv_ireg_s5,
-    rv_ireg_s6,
-    rv_ireg_s7,
-    rv_ireg_s8,
-    rv_ireg_s9,
-    rv_ireg_s10,
-    rv_ireg_s11,
-    rv_ireg_t3,
-    rv_ireg_t4,
-    rv_ireg_t5,
-    rv_ireg_t6,
-} rv_ireg;
-
-typedef enum {
-    rvc_end,
-    rvc_rd_eq_ra,
-    rvc_rd_eq_x0,
-    rvc_rs1_eq_x0,
-    rvc_rs2_eq_x0,
-    rvc_rs2_eq_rs1,
-    rvc_rs1_eq_ra,
-    rvc_imm_eq_zero,
-    rvc_imm_eq_n1,
-    rvc_imm_eq_p1,
-    rvc_csr_eq_0x001,
-    rvc_csr_eq_0x002,
-    rvc_csr_eq_0x003,
-    rvc_csr_eq_0xc00,
-    rvc_csr_eq_0xc01,
-    rvc_csr_eq_0xc02,
-    rvc_csr_eq_0xc80,
-    rvc_csr_eq_0xc81,
-    rvc_csr_eq_0xc82,
-} rvc_constraint;
-
-typedef enum {
-    rv_codec_illegal,
-    rv_codec_none,
-    rv_codec_u,
-    rv_codec_uj,
-    rv_codec_i,
-    rv_codec_i_sh5,
-    rv_codec_i_sh6,
-    rv_codec_i_sh7,
-    rv_codec_i_csr,
-    rv_codec_s,
-    rv_codec_sb,
-    rv_codec_r,
-    rv_codec_r_m,
-    rv_codec_r4_m,
-    rv_codec_r_a,
-    rv_codec_r_l,
-    rv_codec_r_f,
-    rv_codec_cb,
-    rv_codec_cb_imm,
-    rv_codec_cb_sh5,
-    rv_codec_cb_sh6,
-    rv_codec_ci,
-    rv_codec_ci_sh5,
-    rv_codec_ci_sh6,
-    rv_codec_ci_16sp,
-    rv_codec_ci_lwsp,
-    rv_codec_ci_ldsp,
-    rv_codec_ci_lqsp,
-    rv_codec_ci_li,
-    rv_codec_ci_lui,
-    rv_codec_ci_none,
-    rv_codec_ciw_4spn,
-    rv_codec_cj,
-    rv_codec_cj_jal,
-    rv_codec_cl_lw,
-    rv_codec_cl_ld,
-    rv_codec_cl_lq,
-    rv_codec_cr,
-    rv_codec_cr_mv,
-    rv_codec_cr_jalr,
-    rv_codec_cr_jr,
-    rv_codec_cs,
-    rv_codec_cs_sw,
-    rv_codec_cs_sd,
-    rv_codec_cs_sq,
-    rv_codec_css_swsp,
-    rv_codec_css_sdsp,
-    rv_codec_css_sqsp,
-    rv_codec_k_bs,
-    rv_codec_k_rnum,
-    rv_codec_v_r,
-    rv_codec_v_ldst,
-    rv_codec_v_i,
-    rv_codec_vsetvli,
-    rv_codec_vsetivli,
-    rv_codec_zcb_ext,
-    rv_codec_zcb_mul,
-    rv_codec_zcb_lb,
-    rv_codec_zcb_lh,
-    rv_codec_zcmp_cm_pushpop,
-    rv_codec_zcmp_cm_mv,
-    rv_codec_zcmt_jt,
-} rv_codec;
-
-typedef enum {
-    rv_op_illegal = 0,
+    /* 0 is reserved for rv_op_illegal. */
     rv_op_lui = 1,
     rv_op_auipc = 2,
     rv_op_jal = 3,
@@ -964,53 +819,51 @@ typedef enum {
     rv_op_cm_jalt = 788,
     rv_op_czero_eqz = 789,
     rv_op_czero_nez = 790,
+    rv_op_fcvt_bf16_s = 791,
+    rv_op_fcvt_s_bf16 = 792,
+    rv_op_vfncvtbf16_f_f_w = 793,
+    rv_op_vfwcvtbf16_f_f_v = 794,
+    rv_op_vfwmaccbf16_vv = 795,
+    rv_op_vfwmaccbf16_vf = 796,
+    rv_op_flh = 797,
+    rv_op_fsh = 798,
+    rv_op_fmv_h_x = 799,
+    rv_op_fmv_x_h = 800,
+    rv_op_fli_s = 801,
+    rv_op_fli_d = 802,
+    rv_op_fli_q = 803,
+    rv_op_fli_h = 804,
+    rv_op_fminm_s = 805,
+    rv_op_fmaxm_s = 806,
+    rv_op_fminm_d = 807,
+    rv_op_fmaxm_d = 808,
+    rv_op_fminm_q = 809,
+    rv_op_fmaxm_q = 810,
+    rv_op_fminm_h = 811,
+    rv_op_fmaxm_h = 812,
+    rv_op_fround_s = 813,
+    rv_op_froundnx_s = 814,
+    rv_op_fround_d = 815,
+    rv_op_froundnx_d = 816,
+    rv_op_fround_q = 817,
+    rv_op_froundnx_q = 818,
+    rv_op_fround_h = 819,
+    rv_op_froundnx_h = 820,
+    rv_op_fcvtmod_w_d = 821,
+    rv_op_fmvh_x_d = 822,
+    rv_op_fmvp_d_x = 823,
+    rv_op_fmvh_x_q = 824,
+    rv_op_fmvp_q_x = 825,
+    rv_op_fleq_s = 826,
+    rv_op_fltq_s = 827,
+    rv_op_fleq_d = 828,
+    rv_op_fltq_d = 829,
+    rv_op_fleq_q = 830,
+    rv_op_fltq_q = 831,
+    rv_op_fleq_h = 832,
+    rv_op_fltq_h = 833,
 } rv_op;
 
-/* structures */
-
-typedef struct {
-    RISCVCPUConfig *cfg;
-    uint64_t  pc;
-    uint64_t  inst;
-    int32_t   imm;
-    uint16_t  op;
-    uint8_t   codec;
-    uint8_t   rd;
-    uint8_t   rs1;
-    uint8_t   rs2;
-    uint8_t   rs3;
-    uint8_t   rm;
-    uint8_t   pred;
-    uint8_t   succ;
-    uint8_t   aq;
-    uint8_t   rl;
-    uint8_t   bs;
-    uint8_t   rnum;
-    uint8_t   vm;
-    uint32_t  vzimm;
-    uint8_t   rlist;
-} rv_decode;
-
-typedef struct {
-    const int op;
-    const rvc_constraint *constraints;
-} rv_comp_data;
-
-enum {
-    rvcd_imm_nz = 0x1
-};
-
-typedef struct {
-    const char * const name;
-    const rv_codec codec;
-    const char * const format;
-    const rv_comp_data *pseudo;
-    const short decomp_rv32;
-    const short decomp_rv64;
-    const short decomp_rv128;
-    const short decomp_data;
-} rv_opcode_data;
-
 /* register names */
 
 static const char rv_ireg_name_sym[32][5] = {
@@ -1034,78 +887,22 @@ static const char rv_vreg_name_sym[32][4] = {
     "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
 };
 
-/* instruction formats */
-
-#define rv_fmt_none                   "O\t"
-#define rv_fmt_rs1                    "O\t1"
-#define rv_fmt_offset                 "O\to"
-#define rv_fmt_pred_succ              "O\tp,s"
-#define rv_fmt_rs1_rs2                "O\t1,2"
-#define rv_fmt_rd_imm                 "O\t0,i"
-#define rv_fmt_rd_offset              "O\t0,o"
-#define rv_fmt_rd_rs1_rs2             "O\t0,1,2"
-#define rv_fmt_frd_rs1                "O\t3,1"
-#define rv_fmt_frd_frs1               "O\t3,4"
-#define rv_fmt_rd_frs1                "O\t0,4"
-#define rv_fmt_rd_frs1_frs2           "O\t0,4,5"
-#define rv_fmt_frd_frs1_frs2          "O\t3,4,5"
-#define rv_fmt_rm_frd_frs1            "O\tr,3,4"
-#define rv_fmt_rm_frd_rs1             "O\tr,3,1"
-#define rv_fmt_rm_rd_frs1             "O\tr,0,4"
-#define rv_fmt_rm_frd_frs1_frs2       "O\tr,3,4,5"
-#define rv_fmt_rm_frd_frs1_frs2_frs3  "O\tr,3,4,5,6"
-#define rv_fmt_rd_rs1_imm             "O\t0,1,i"
-#define rv_fmt_rd_rs1_offset          "O\t0,1,i"
-#define rv_fmt_rd_offset_rs1          "O\t0,i(1)"
-#define rv_fmt_frd_offset_rs1         "O\t3,i(1)"
-#define rv_fmt_rd_csr_rs1             "O\t0,c,1"
-#define rv_fmt_rd_csr_zimm            "O\t0,c,7"
-#define rv_fmt_rs2_offset_rs1         "O\t2,i(1)"
-#define rv_fmt_frs2_offset_rs1        "O\t5,i(1)"
-#define rv_fmt_rs1_rs2_offset         "O\t1,2,o"
-#define rv_fmt_rs2_rs1_offset         "O\t2,1,o"
-#define rv_fmt_aqrl_rd_rs2_rs1        "OAR\t0,2,(1)"
-#define rv_fmt_aqrl_rd_rs1            "OAR\t0,(1)"
-#define rv_fmt_rd                     "O\t0"
-#define rv_fmt_rd_zimm                "O\t0,7"
-#define rv_fmt_rd_rs1                 "O\t0,1"
-#define rv_fmt_rd_rs2                 "O\t0,2"
-#define rv_fmt_rs1_offset             "O\t1,o"
-#define rv_fmt_rs2_offset             "O\t2,o"
-#define rv_fmt_rs1_rs2_bs             "O\t1,2,b"
-#define rv_fmt_rd_rs1_rnum            "O\t0,1,n"
-#define rv_fmt_ldst_vd_rs1_vm         "O\tD,(1)m"
-#define rv_fmt_ldst_vd_rs1_rs2_vm     "O\tD,(1),2m"
-#define rv_fmt_ldst_vd_rs1_vs2_vm     "O\tD,(1),Fm"
-#define rv_fmt_vd_vs2_vs1             "O\tD,F,E"
-#define rv_fmt_vd_vs2_vs1_vl          "O\tD,F,El"
-#define rv_fmt_vd_vs2_vs1_vm          "O\tD,F,Em"
-#define rv_fmt_vd_vs2_rs1_vl          "O\tD,F,1l"
-#define rv_fmt_vd_vs2_fs1_vl          "O\tD,F,4l"
-#define rv_fmt_vd_vs2_rs1_vm          "O\tD,F,1m"
-#define rv_fmt_vd_vs2_fs1_vm          "O\tD,F,4m"
-#define rv_fmt_vd_vs2_imm_vl          "O\tD,F,il"
-#define rv_fmt_vd_vs2_imm_vm          "O\tD,F,im"
-#define rv_fmt_vd_vs2_uimm_vm         "O\tD,F,um"
-#define rv_fmt_vd_vs1_vs2_vm          "O\tD,E,Fm"
-#define rv_fmt_vd_rs1_vs2_vm          "O\tD,1,Fm"
-#define rv_fmt_vd_fs1_vs2_vm          "O\tD,4,Fm"
-#define rv_fmt_vd_vs1                 "O\tD,E"
-#define rv_fmt_vd_rs1                 "O\tD,1"
-#define rv_fmt_vd_fs1                 "O\tD,4"
-#define rv_fmt_vd_imm                 "O\tD,i"
-#define rv_fmt_vd_vs2                 "O\tD,F"
-#define rv_fmt_vd_vs2_vm              "O\tD,Fm"
-#define rv_fmt_rd_vs2_vm              "O\t0,Fm"
-#define rv_fmt_rd_vs2                 "O\t0,F"
-#define rv_fmt_fd_vs2                 "O\t3,F"
-#define rv_fmt_vd_vm                  "O\tDm"
-#define rv_fmt_vsetvli                "O\t0,1,v"
-#define rv_fmt_vsetivli               "O\t0,u,v"
-#define rv_fmt_rs1_rs2_zce_ldst       "O\t2,i(1)"
-#define rv_fmt_push_rlist             "O\tx,-i"
-#define rv_fmt_pop_rlist              "O\tx,i"
-#define rv_fmt_zcmt_index             "O\ti"
+/* The FLI.[HSDQ] numeric constants (0.0 for symbolic constants).
+ * The constants use the hex floating-point literal representation
+ * that is printed when using the printf %a format specifier,
+ * which matches the output that is generated by the disassembler.
+ */
+static const char rv_fli_name_const[32][9] =
+{
+    "0x1p+0", "min", "0x1p-16", "0x1p-15",
+    "0x1p-8", "0x1p-7", "0x1p-4", "0x1p-3",
+    "0x1p-2", "0x1.4p-2", "0x1.8p-2", "0x1.cp-2",
+    "0x1p-1", "0x1.4p-1", "0x1.8p-1", "0x1.cp-1",
+    "0x1p+0", "0x1.4p+0", "0x1.8p+0", "0x1.cp+0",
+    "0x1p+1", "0x1.4p+1", "0x1.8p+1", "0x1p+2",
+    "0x1p+3", "0x1p+4", "0x1p+7", "0x1p+8",
+    "0x1p+15", "0x1p+16", "inf", "nan"
+};
 
 /* pseudo-instruction constraints */
 
@@ -1336,7 +1133,7 @@ static const rv_comp_data rvcp_fsgnjx_q[] = {
 
 /* instruction metadata */
 
-const rv_opcode_data opcode_data[] = {
+const rv_opcode_data rvi_opcode_data[] = {
     { "illegal", rv_codec_illegal, rv_fmt_none, NULL, 0, 0, 0 },
     { "lui", rv_codec_u, rv_fmt_rd_imm, NULL, 0, 0, 0 },
     { "auipc", rv_codec_u, rv_fmt_rd_offset, NULL, 0, 0, 0 },
@@ -2168,6 +1965,49 @@ const rv_opcode_data opcode_data[] = {
     { "cm.jalt", rv_codec_zcmt_jt, rv_fmt_zcmt_index, NULL, 0 },
     { "czero.eqz", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
     { "czero.nez", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "fcvt.bf16.s", rv_codec_r_m, rv_fmt_rm_frd_frs1, NULL, 0, 0, 0 },
+    { "fcvt.s.bf16", rv_codec_r_m, rv_fmt_rm_frd_frs1, NULL, 0, 0, 0 },
+    { "vfncvtbf16.f.f.w", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+    { "vfwcvtbf16.f.f.v", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+    { "vfwmaccbf16.vv", rv_codec_v_r, rv_fmt_vd_vs1_vs2_vm, NULL, 0, 0, 0 },
+    { "vfwmaccbf16.vf", rv_codec_v_r, rv_fmt_vd_fs1_vs2_vm, NULL, 0, 0, 0 },
+    { "flh", rv_codec_i, rv_fmt_frd_offset_rs1, NULL, 0, 0, 0 },
+    { "fsh", rv_codec_s, rv_fmt_frs2_offset_rs1, NULL, 0, 0, 0 },
+    { "fmv.h.x", rv_codec_r, rv_fmt_frd_rs1, NULL, 0, 0, 0 },
+    { "fmv.x.h", rv_codec_r, rv_fmt_rd_frs1, NULL, 0, 0, 0 },
+    { "fli.s", rv_codec_fli, rv_fmt_fli, NULL, 0, 0, 0 },
+    { "fli.d", rv_codec_fli, rv_fmt_fli, NULL, 0, 0, 0 },
+    { "fli.q", rv_codec_fli, rv_fmt_fli, NULL, 0, 0, 0 },
+    { "fli.h", rv_codec_fli, rv_fmt_fli, NULL, 0, 0, 0 },
+    { "fminm.s", rv_codec_r, rv_fmt_frd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fmaxm.s", rv_codec_r, rv_fmt_frd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fminm.d", rv_codec_r, rv_fmt_frd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fmaxm.d", rv_codec_r, rv_fmt_frd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fminm.q", rv_codec_r, rv_fmt_frd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fmaxm.q", rv_codec_r, rv_fmt_frd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fminm.h", rv_codec_r, rv_fmt_frd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fmaxm.h", rv_codec_r, rv_fmt_frd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fround.s", rv_codec_r_m, rv_fmt_rm_frd_frs1, NULL, 0, 0, 0 },
+    { "froundnx.s", rv_codec_r_m, rv_fmt_rm_frd_frs1, NULL, 0, 0, 0 },
+    { "fround.d", rv_codec_r_m, rv_fmt_rm_frd_frs1, NULL, 0, 0, 0 },
+    { "froundnx.d", rv_codec_r_m, rv_fmt_rm_frd_frs1, NULL, 0, 0, 0 },
+    { "fround.q", rv_codec_r_m, rv_fmt_rm_frd_frs1, NULL, 0, 0, 0 },
+    { "froundnx.q", rv_codec_r_m, rv_fmt_rm_frd_frs1, NULL, 0, 0, 0 },
+    { "fround.h", rv_codec_r_m, rv_fmt_rm_frd_frs1, NULL, 0, 0, 0 },
+    { "froundnx.h", rv_codec_r_m, rv_fmt_rm_frd_frs1, NULL, 0, 0, 0 },
+    { "fcvtmod.w.d", rv_codec_r_m, rv_fmt_rm_rd_frs1, NULL, 0, 0, 0 },
+    { "fmvh.x.d", rv_codec_r, rv_fmt_rd_frs1, NULL, 0, 0, 0 },
+    { "fmvp.d.x", rv_codec_r, rv_fmt_frd_rs1_rs2, NULL, 0, 0, 0 },
+    { "fmvh.x.q", rv_codec_r, rv_fmt_rd_frs1, NULL, 0, 0, 0 },
+    { "fmvp.q.x", rv_codec_r, rv_fmt_frd_rs1_rs2, NULL, 0, 0, 0 },
+    { "fleq.s", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fltq.s", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fleq.d", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fltq.d", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fleq.q", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fltq.q", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fleq.h", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
+    { "fltq.h", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
 };
 
 /* CSR names */
@@ -2643,6 +2483,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 3: op = rv_op_vloxei8_v; break;
                 }
                 break;
+            case 1: op = rv_op_flh; break;
             case 2: op = rv_op_flw; break;
             case 3: op = rv_op_fld; break;
             case 4: op = rv_op_flq; break;
@@ -2846,6 +2687,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 3: op = rv_op_vsoxei8_v; break;
                 }
                 break;
+            case 1: op = rv_op_fsh; break;
             case 2: op = rv_op_fsw; break;
             case 3: op = rv_op_fsd; break;
             case 4: op = rv_op_fsq; break;
@@ -3105,36 +2947,62 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 switch ((inst >> 12) & 0b111) {
                 case 0: op = rv_op_fmin_s; break;
                 case 1: op = rv_op_fmax_s; break;
+                case 2: op = rv_op_fminm_s; break;
+                case 3: op = rv_op_fmaxm_s; break;
                 }
                 break;
             case 21:
                 switch ((inst >> 12) & 0b111) {
                 case 0: op = rv_op_fmin_d; break;
                 case 1: op = rv_op_fmax_d; break;
+                case 2: op = rv_op_fminm_d; break;
+                case 3: op = rv_op_fmaxm_d; break;
+                }
+                break;
+            case 22:
+                switch (((inst >> 12) & 0b111)) {
+                case 2: op = rv_op_fminm_h; break;
+                case 3: op = rv_op_fmaxm_h; break;
                 }
                 break;
             case 23:
                 switch ((inst >> 12) & 0b111) {
                 case 0: op = rv_op_fmin_q; break;
                 case 1: op = rv_op_fmax_q; break;
+                case 2: op = rv_op_fminm_q; break;
+                case 3: op = rv_op_fmaxm_q; break;
                 }
                 break;
             case 32:
                 switch ((inst >> 20) & 0b11111) {
                 case 1: op = rv_op_fcvt_s_d; break;
                 case 3: op = rv_op_fcvt_s_q; break;
+                case 4: op = rv_op_fround_s; break;
+                case 5: op = rv_op_froundnx_s; break;
+                case 6: op = rv_op_fcvt_s_bf16; break;
                 }
                 break;
             case 33:
                 switch ((inst >> 20) & 0b11111) {
                 case 0: op = rv_op_fcvt_d_s; break;
                 case 3: op = rv_op_fcvt_d_q; break;
+                case 4: op = rv_op_fround_d; break;
+                case 5: op = rv_op_froundnx_d; break;
+                }
+                break;
+            case 34:
+                switch (((inst >> 20) & 0b11111)) {
+                case 4: op = rv_op_fround_h; break;
+                case 5: op = rv_op_froundnx_h; break;
+                case 8: op = rv_op_fcvt_bf16_s; break;
                 }
                 break;
             case 35:
                 switch ((inst >> 20) & 0b11111) {
                 case 0: op = rv_op_fcvt_q_s; break;
                 case 1: op = rv_op_fcvt_q_d; break;
+                case 4: op = rv_op_fround_q; break;
+                case 5: op = rv_op_froundnx_q; break;
                 }
                 break;
             case 44:
@@ -3157,6 +3025,8 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 0: op = rv_op_fle_s; break;
                 case 1: op = rv_op_flt_s; break;
                 case 2: op = rv_op_feq_s; break;
+                case 4: op = rv_op_fleq_s; break;
+                case 5: op = rv_op_fltq_s; break;
                 }
                 break;
             case 81:
@@ -3164,6 +3034,14 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 0: op = rv_op_fle_d; break;
                 case 1: op = rv_op_flt_d; break;
                 case 2: op = rv_op_feq_d; break;
+                case 4: op = rv_op_fleq_d; break;
+                case 5: op = rv_op_fltq_d; break;
+                }
+                break;
+            case 82:
+                switch (((inst >> 12) & 0b111)) {
+                case 4: op = rv_op_fleq_h; break;
+                case 5: op = rv_op_fltq_h; break;
                 }
                 break;
             case 83:
@@ -3171,6 +3049,18 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 0: op = rv_op_fle_q; break;
                 case 1: op = rv_op_flt_q; break;
                 case 2: op = rv_op_feq_q; break;
+                case 4: op = rv_op_fleq_q; break;
+                case 5: op = rv_op_fltq_q; break;
+                }
+                break;
+            case 89:
+		switch (((inst >> 12) & 0b111)) {
+                case 0: op = rv_op_fmvp_d_x; break;
+                }
+                break;
+            case 91:
+		switch (((inst >> 12) & 0b111)) {
+                case 0: op = rv_op_fmvp_q_x; break;
                 }
                 break;
             case 96:
@@ -3187,6 +3077,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 1: op = rv_op_fcvt_wu_d; break;
                 case 2: op = rv_op_fcvt_l_d; break;
                 case 3: op = rv_op_fcvt_lu_d; break;
+                case 8: op = rv_op_fcvtmod_w_d; break;
                 }
                 break;
             case 99:
@@ -3233,6 +3124,13 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                         ((inst >> 12) & 0b00000111)) {
                 case 0: op = rv_op_fmv_x_d; break;
                 case 1: op = rv_op_fclass_d; break;
+                case 8: op = rv_op_fmvh_x_d; break;
+                }
+                break;
+            case 114:
+                switch (((inst >> 17) & 0b11111000) |
+                        ((inst >> 12) & 0b00000111)) {
+                case 0: op = rv_op_fmv_x_h; break;
                 }
                 break;
             case 115:
@@ -3240,24 +3138,35 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                         ((inst >> 12) & 0b00000111)) {
                 case 0: op = rv_op_fmv_x_q; break;
                 case 1: op = rv_op_fclass_q; break;
+                case 8: op = rv_op_fmvh_x_q; break;
                 }
                 break;
             case 120:
                 switch (((inst >> 17) & 0b11111000) |
                         ((inst >> 12) & 0b00000111)) {
                 case 0: op = rv_op_fmv_s_x; break;
+                case 8: op = rv_op_fli_s; break;
                 }
                 break;
             case 121:
                 switch (((inst >> 17) & 0b11111000) |
                         ((inst >> 12) & 0b00000111)) {
                 case 0: op = rv_op_fmv_d_x; break;
+                case 8: op = rv_op_fli_d; break;
+                }
+                break;
+            case 122:
+                switch (((inst >> 17) & 0b11111000) |
+                        ((inst >> 12) & 0b00000111)) {
+                case 0: op = rv_op_fmv_h_x; break;
+                case 8: op = rv_op_fli_h; break;
                 }
                 break;
             case 123:
                 switch (((inst >> 17) & 0b11111000) |
                         ((inst >> 12) & 0b00000111)) {
                 case 0: op = rv_op_fmv_q_x; break;
+                case 8: op = rv_op_fli_q; break;
                 }
                 break;
             }
@@ -3350,6 +3259,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                     case 10: op = rv_op_vfwcvt_f_xu_v; break;
                     case 11: op = rv_op_vfwcvt_f_x_v; break;
                     case 12: op = rv_op_vfwcvt_f_f_v; break;
+                    case 13: op = rv_op_vfwcvtbf16_f_f_v; break;
                     case 14: op = rv_op_vfwcvt_rtz_xu_f_v; break;
                     case 15: op = rv_op_vfwcvt_rtz_x_f_v; break;
                     case 16: op = rv_op_vfncvt_xu_f_w; break;
@@ -3360,6 +3270,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                     case 21: op = rv_op_vfncvt_rod_f_f_w; break;
                     case 22: op = rv_op_vfncvt_rtz_xu_f_w; break;
                     case 23: op = rv_op_vfncvt_rtz_x_f_w; break;
+                    case 29: op = rv_op_vfncvtbf16_f_f_w; break;
                     }
                     break;
                 case 19:
@@ -3391,6 +3302,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 52: op = rv_op_vfwadd_wv; break;
                 case 54: op = rv_op_vfwsub_wv; break;
                 case 56: op = rv_op_vfwmul_vv; break;
+                case 59: op = rv_op_vfwmaccbf16_vv; break;
                 case 60: op = rv_op_vfwmacc_vv; break;
                 case 61: op = rv_op_vfwnmacc_vv; break;
                 case 62: op = rv_op_vfwmsac_vv; break;
@@ -3629,6 +3541,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 52: op = rv_op_vfwadd_wf; break;
                 case 54: op = rv_op_vfwsub_wf; break;
                 case 56: op = rv_op_vfwmul_vf; break;
+                case 59: op = rv_op_vfwmaccbf16_vf; break;
                 case 60: op = rv_op_vfwmacc_vf; break;
                 case 61: op = rv_op_vfwnmacc_vf; break;
                 case 62: op = rv_op_vfwmsac_vf; break;
@@ -4134,6 +4047,26 @@ static uint32_t operand_zcmp_rlist(rv_inst inst)
     return ((inst << 56) >> 60);
 }
 
+static uint32_t operand_imm6(rv_inst inst)
+{
+    return (inst << 38) >> 60;
+}
+
+static uint32_t operand_imm2(rv_inst inst)
+{
+    return (inst << 37) >> 62;
+}
+
+static uint32_t operand_immh(rv_inst inst)
+{
+    return (inst << 32) >> 58;
+}
+
+static uint32_t operand_imml(rv_inst inst)
+{
+    return (inst << 38) >> 58;
+}
+
 static uint32_t calculate_stack_adj(rv_isa isa, uint32_t rlist, uint32_t spimm)
 {
     int xlen_bytes_log2 = isa == rv64 ? 3 : 2;
@@ -4157,6 +4090,7 @@ static uint32_t operand_tbl_index(rv_inst inst)
 
 static void decode_inst_operands(rv_decode *dec, rv_isa isa)
 {
+    const rv_opcode_data *opcode_data = dec->opcode_data;
     rv_inst inst = dec->inst;
     dec->codec = opcode_data[dec->op].codec;
     switch (dec->codec) {
@@ -4496,6 +4430,42 @@ static void decode_inst_operands(rv_decode *dec, rv_isa isa)
         break;
     case rv_codec_zcmt_jt:
         dec->imm = operand_tbl_index(inst);
+	break;
+    case rv_codec_fli:
+        dec->rd = operand_rd(inst);
+        dec->imm = operand_rs1(inst);
+        break;
+    case rv_codec_r2_imm5:
+        dec->rd = operand_rd(inst);
+        dec->rs1 = operand_rs1(inst);
+        dec->imm = operand_rs2(inst);
+        break;
+    case rv_codec_r2:
+        dec->rd = operand_rd(inst);
+        dec->rs1 = operand_rs1(inst);
+        break;
+    case rv_codec_r2_imm6:
+        dec->rd = operand_rd(inst);
+        dec->rs1 = operand_rs1(inst);
+        dec->imm = operand_imm6(inst);
+        break;
+    case rv_codec_r_imm2:
+        dec->rd = operand_rd(inst);
+        dec->rs1 = operand_rs1(inst);
+        dec->rs2 = operand_rs2(inst);
+        dec->imm = operand_imm2(inst);
+        break;
+    case rv_codec_r2_immhl:
+        dec->rd = operand_rd(inst);
+        dec->rs1 = operand_rs1(inst);
+        dec->imm = operand_immh(inst);
+        dec->imm1 = operand_imml(inst);
+        break;
+    case rv_codec_r2_imm2_imm5:
+        dec->rd = operand_rd(inst);
+        dec->rs1 = operand_rs1(inst);
+        dec->imm = sextract32(operand_rs2(inst), 0, 5);
+        dec->imm1 = operand_imm2(inst);
         break;
     };
 }
@@ -4639,6 +4609,7 @@ static void append(char *s1, const char *s2, size_t n)
 
 static void format_inst(char *buf, size_t buflen, size_t tab, rv_decode *dec)
 {
+    const rv_opcode_data *opcode_data = dec->opcode_data;
     char tmp[64];
     const char *fmt;
 
@@ -4709,6 +4680,10 @@ static void format_inst(char *buf, size_t buflen, size_t tab, rv_decode *dec)
             snprintf(tmp, sizeof(tmp), "%u", ((uint32_t)dec->imm & 0b11111));
             append(buf, tmp, buflen);
             break;
+        case 'j':
+            snprintf(tmp, sizeof(tmp), "%d", dec->imm1);
+            append(buf, tmp, buflen);
+            break;
         case 'o':
             snprintf(tmp, sizeof(tmp), "%d", dec->imm);
             append(buf, tmp, buflen);
@@ -4869,6 +4844,9 @@ static void format_inst(char *buf, size_t buflen, size_t tab, rv_decode *dec)
             append(buf, tmp, buflen);
             break;
         }
+        case 'h':
+            append(buf, rv_fli_name_const[dec->imm], buflen);
+            break;
         default:
             break;
         }
@@ -4880,6 +4858,7 @@ static void format_inst(char *buf, size_t buflen, size_t tab, rv_decode *dec)
 
 static void decode_inst_lift_pseudo(rv_decode *dec)
 {
+    const rv_opcode_data *opcode_data = dec->opcode_data;
     const rv_comp_data *comp_data = opcode_data[dec->op].pseudo;
     if (!comp_data) {
         return;
@@ -4898,6 +4877,7 @@ static void decode_inst_lift_pseudo(rv_decode *dec)
 
 static void decode_inst_decompress_rv32(rv_decode *dec)
 {
+    const rv_opcode_data *opcode_data = dec->opcode_data;
     int decomp_op = opcode_data[dec->op].decomp_rv32;
     if (decomp_op != rv_op_illegal) {
         if ((opcode_data[dec->op].decomp_data & rvcd_imm_nz)
@@ -4912,6 +4892,7 @@ static void decode_inst_decompress_rv32(rv_decode *dec)
 
 static void decode_inst_decompress_rv64(rv_decode *dec)
 {
+    const rv_opcode_data *opcode_data = dec->opcode_data;
     int decomp_op = opcode_data[dec->op].decomp_rv64;
     if (decomp_op != rv_op_illegal) {
         if ((opcode_data[dec->op].decomp_data & rvcd_imm_nz)
@@ -4926,6 +4907,7 @@ static void decode_inst_decompress_rv64(rv_decode *dec)
 
 static void decode_inst_decompress_rv128(rv_decode *dec)
 {
+    const rv_opcode_data *opcode_data = dec->opcode_data;
     int decomp_op = opcode_data[dec->op].decomp_rv128;
     if (decomp_op != rv_op_illegal) {
         if ((opcode_data[dec->op].decomp_data & rvcd_imm_nz)
@@ -4963,7 +4945,44 @@ disasm_inst(char *buf, size_t buflen, rv_isa isa, uint64_t pc, rv_inst inst,
     dec.pc = pc;
     dec.inst = inst;
     dec.cfg = cfg;
-    decode_inst_opcode(&dec, isa);
+
+    static const struct {
+        bool (*guard_func)(const RISCVCPUConfig *);
+        const rv_opcode_data *opcode_data;
+        void (*decode_func)(rv_decode *, rv_isa);
+    } decoders[] = {
+        { always_true_p, rvi_opcode_data, decode_inst_opcode },
+        { has_xtheadba_p, xthead_opcode_data, decode_xtheadba },
+        { has_xtheadbb_p, xthead_opcode_data, decode_xtheadbb },
+        { has_xtheadbs_p, xthead_opcode_data, decode_xtheadbs },
+        { has_xtheadcmo_p, xthead_opcode_data, decode_xtheadcmo },
+        { has_xtheadcondmov_p, xthead_opcode_data, decode_xtheadcondmov },
+        { has_xtheadfmemidx_p, xthead_opcode_data, decode_xtheadfmemidx },
+        { has_xtheadfmv_p, xthead_opcode_data, decode_xtheadfmv },
+        { has_xtheadmac_p, xthead_opcode_data, decode_xtheadmac },
+        { has_xtheadmemidx_p, xthead_opcode_data, decode_xtheadmemidx },
+        { has_xtheadmempair_p, xthead_opcode_data, decode_xtheadmempair },
+        { has_xtheadsync_p, xthead_opcode_data, decode_xtheadsync },
+        { has_XVentanaCondOps_p, ventana_opcode_data, decode_xventanacondops },
+    };
+
+    for (size_t i = 0; i < ARRAY_SIZE(decoders); i++) {
+        bool (*guard_func)(const RISCVCPUConfig *) = decoders[i].guard_func;
+        const rv_opcode_data *opcode_data = decoders[i].opcode_data;
+        void (*decode_func)(rv_decode *, rv_isa) = decoders[i].decode_func;
+
+        if (guard_func(cfg)) {
+            dec.opcode_data = opcode_data;
+            decode_func(&dec, isa);
+            if (dec.op != rv_op_illegal)
+                break;
+        }
+    }
+
+    if (dec.op == rv_op_illegal) {
+        dec.opcode_data = rvi_opcode_data;
+    }
+
     decode_inst_operands(&dec, isa);
     decode_inst_decompress(&dec, isa);
     decode_inst_lift_pseudo(&dec);
diff --git a/disas/riscv.h b/disas/riscv.h
new file mode 100644
index 0000000000..9cf901fc1e
--- /dev/null
+++ b/disas/riscv.h
@@ -0,0 +1,302 @@
+/*
+ * QEMU disassembler -- RISC-V specific header.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef DISAS_RISCV_H
+#define DISAS_RISCV_H
+
+#include "qemu/osdep.h"
+#include "target/riscv/cpu_cfg.h"
+
+/* types */
+
+typedef uint64_t rv_inst;
+typedef uint16_t rv_opcode;
+
+/* enums */
+
+typedef enum {
+    rv32,
+    rv64,
+    rv128
+} rv_isa;
+
+typedef enum {
+    rv_rm_rne = 0,
+    rv_rm_rtz = 1,
+    rv_rm_rdn = 2,
+    rv_rm_rup = 3,
+    rv_rm_rmm = 4,
+    rv_rm_dyn = 7,
+} rv_rm;
+
+typedef enum {
+    rv_fence_i = 8,
+    rv_fence_o = 4,
+    rv_fence_r = 2,
+    rv_fence_w = 1,
+} rv_fence;
+
+typedef enum {
+    rv_ireg_zero,
+    rv_ireg_ra,
+    rv_ireg_sp,
+    rv_ireg_gp,
+    rv_ireg_tp,
+    rv_ireg_t0,
+    rv_ireg_t1,
+    rv_ireg_t2,
+    rv_ireg_s0,
+    rv_ireg_s1,
+    rv_ireg_a0,
+    rv_ireg_a1,
+    rv_ireg_a2,
+    rv_ireg_a3,
+    rv_ireg_a4,
+    rv_ireg_a5,
+    rv_ireg_a6,
+    rv_ireg_a7,
+    rv_ireg_s2,
+    rv_ireg_s3,
+    rv_ireg_s4,
+    rv_ireg_s5,
+    rv_ireg_s6,
+    rv_ireg_s7,
+    rv_ireg_s8,
+    rv_ireg_s9,
+    rv_ireg_s10,
+    rv_ireg_s11,
+    rv_ireg_t3,
+    rv_ireg_t4,
+    rv_ireg_t5,
+    rv_ireg_t6,
+} rv_ireg;
+
+typedef enum {
+    rvc_end,
+    rvc_rd_eq_ra,
+    rvc_rd_eq_x0,
+    rvc_rs1_eq_x0,
+    rvc_rs2_eq_x0,
+    rvc_rs2_eq_rs1,
+    rvc_rs1_eq_ra,
+    rvc_imm_eq_zero,
+    rvc_imm_eq_n1,
+    rvc_imm_eq_p1,
+    rvc_csr_eq_0x001,
+    rvc_csr_eq_0x002,
+    rvc_csr_eq_0x003,
+    rvc_csr_eq_0xc00,
+    rvc_csr_eq_0xc01,
+    rvc_csr_eq_0xc02,
+    rvc_csr_eq_0xc80,
+    rvc_csr_eq_0xc81,
+    rvc_csr_eq_0xc82,
+} rvc_constraint;
+
+typedef enum {
+    rv_codec_illegal,
+    rv_codec_none,
+    rv_codec_u,
+    rv_codec_uj,
+    rv_codec_i,
+    rv_codec_i_sh5,
+    rv_codec_i_sh6,
+    rv_codec_i_sh7,
+    rv_codec_i_csr,
+    rv_codec_s,
+    rv_codec_sb,
+    rv_codec_r,
+    rv_codec_r_m,
+    rv_codec_r4_m,
+    rv_codec_r_a,
+    rv_codec_r_l,
+    rv_codec_r_f,
+    rv_codec_cb,
+    rv_codec_cb_imm,
+    rv_codec_cb_sh5,
+    rv_codec_cb_sh6,
+    rv_codec_ci,
+    rv_codec_ci_sh5,
+    rv_codec_ci_sh6,
+    rv_codec_ci_16sp,
+    rv_codec_ci_lwsp,
+    rv_codec_ci_ldsp,
+    rv_codec_ci_lqsp,
+    rv_codec_ci_li,
+    rv_codec_ci_lui,
+    rv_codec_ci_none,
+    rv_codec_ciw_4spn,
+    rv_codec_cj,
+    rv_codec_cj_jal,
+    rv_codec_cl_lw,
+    rv_codec_cl_ld,
+    rv_codec_cl_lq,
+    rv_codec_cr,
+    rv_codec_cr_mv,
+    rv_codec_cr_jalr,
+    rv_codec_cr_jr,
+    rv_codec_cs,
+    rv_codec_cs_sw,
+    rv_codec_cs_sd,
+    rv_codec_cs_sq,
+    rv_codec_css_swsp,
+    rv_codec_css_sdsp,
+    rv_codec_css_sqsp,
+    rv_codec_k_bs,
+    rv_codec_k_rnum,
+    rv_codec_v_r,
+    rv_codec_v_ldst,
+    rv_codec_v_i,
+    rv_codec_vsetvli,
+    rv_codec_vsetivli,
+    rv_codec_zcb_ext,
+    rv_codec_zcb_mul,
+    rv_codec_zcb_lb,
+    rv_codec_zcb_lh,
+    rv_codec_zcmp_cm_pushpop,
+    rv_codec_zcmp_cm_mv,
+    rv_codec_zcmt_jt,
+    rv_codec_r2_imm5,
+    rv_codec_r2,
+    rv_codec_r2_imm6,
+    rv_codec_r_imm2,
+    rv_codec_r2_immhl,
+    rv_codec_r2_imm2_imm5,
+    rv_codec_fli,
+} rv_codec;
+
+/* structures */
+
+typedef struct {
+    const int op;
+    const rvc_constraint *constraints;
+} rv_comp_data;
+
+typedef struct {
+    const char * const name;
+    const rv_codec codec;
+    const char * const format;
+    const rv_comp_data *pseudo;
+    const short decomp_rv32;
+    const short decomp_rv64;
+    const short decomp_rv128;
+    const short decomp_data;
+} rv_opcode_data;
+
+typedef struct {
+    RISCVCPUConfig *cfg;
+    uint64_t  pc;
+    uint64_t  inst;
+    const rv_opcode_data *opcode_data;
+    int32_t   imm;
+    int32_t   imm1;
+    uint16_t  op;
+    uint8_t   codec;
+    uint8_t   rd;
+    uint8_t   rs1;
+    uint8_t   rs2;
+    uint8_t   rs3;
+    uint8_t   rm;
+    uint8_t   pred;
+    uint8_t   succ;
+    uint8_t   aq;
+    uint8_t   rl;
+    uint8_t   bs;
+    uint8_t   rnum;
+    uint8_t   vm;
+    uint32_t  vzimm;
+    uint8_t   rlist;
+} rv_decode;
+
+enum {
+    rv_op_illegal = 0
+};
+
+enum {
+    rvcd_imm_nz = 0x1
+};
+
+/* instruction formats */
+
+#define rv_fmt_none                   "O\t"
+#define rv_fmt_rs1                    "O\t1"
+#define rv_fmt_offset                 "O\to"
+#define rv_fmt_pred_succ              "O\tp,s"
+#define rv_fmt_rs1_rs2                "O\t1,2"
+#define rv_fmt_rd_imm                 "O\t0,i"
+#define rv_fmt_rd_offset              "O\t0,o"
+#define rv_fmt_rd_rs1_rs2             "O\t0,1,2"
+#define rv_fmt_frd_rs1                "O\t3,1"
+#define rv_fmt_frd_rs1_rs2            "O\t3,1,2"
+#define rv_fmt_frd_frs1               "O\t3,4"
+#define rv_fmt_rd_frs1                "O\t0,4"
+#define rv_fmt_rd_frs1_frs2           "O\t0,4,5"
+#define rv_fmt_frd_frs1_frs2          "O\t3,4,5"
+#define rv_fmt_rm_frd_frs1            "O\tr,3,4"
+#define rv_fmt_rm_frd_rs1             "O\tr,3,1"
+#define rv_fmt_rm_rd_frs1             "O\tr,0,4"
+#define rv_fmt_rm_frd_frs1_frs2       "O\tr,3,4,5"
+#define rv_fmt_rm_frd_frs1_frs2_frs3  "O\tr,3,4,5,6"
+#define rv_fmt_rd_rs1_imm             "O\t0,1,i"
+#define rv_fmt_rd_rs1_offset          "O\t0,1,i"
+#define rv_fmt_rd_offset_rs1          "O\t0,i(1)"
+#define rv_fmt_frd_offset_rs1         "O\t3,i(1)"
+#define rv_fmt_rd_csr_rs1             "O\t0,c,1"
+#define rv_fmt_rd_csr_zimm            "O\t0,c,7"
+#define rv_fmt_rs2_offset_rs1         "O\t2,i(1)"
+#define rv_fmt_frs2_offset_rs1        "O\t5,i(1)"
+#define rv_fmt_rs1_rs2_offset         "O\t1,2,o"
+#define rv_fmt_rs2_rs1_offset         "O\t2,1,o"
+#define rv_fmt_aqrl_rd_rs2_rs1        "OAR\t0,2,(1)"
+#define rv_fmt_aqrl_rd_rs1            "OAR\t0,(1)"
+#define rv_fmt_rd                     "O\t0"
+#define rv_fmt_rd_zimm                "O\t0,7"
+#define rv_fmt_rd_rs1                 "O\t0,1"
+#define rv_fmt_rd_rs2                 "O\t0,2"
+#define rv_fmt_rs1_offset             "O\t1,o"
+#define rv_fmt_rs2_offset             "O\t2,o"
+#define rv_fmt_rs1_rs2_bs             "O\t1,2,b"
+#define rv_fmt_rd_rs1_rnum            "O\t0,1,n"
+#define rv_fmt_ldst_vd_rs1_vm         "O\tD,(1)m"
+#define rv_fmt_ldst_vd_rs1_rs2_vm     "O\tD,(1),2m"
+#define rv_fmt_ldst_vd_rs1_vs2_vm     "O\tD,(1),Fm"
+#define rv_fmt_vd_vs2_vs1             "O\tD,F,E"
+#define rv_fmt_vd_vs2_vs1_vl          "O\tD,F,El"
+#define rv_fmt_vd_vs2_vs1_vm          "O\tD,F,Em"
+#define rv_fmt_vd_vs2_rs1_vl          "O\tD,F,1l"
+#define rv_fmt_vd_vs2_fs1_vl          "O\tD,F,4l"
+#define rv_fmt_vd_vs2_rs1_vm          "O\tD,F,1m"
+#define rv_fmt_vd_vs2_fs1_vm          "O\tD,F,4m"
+#define rv_fmt_vd_vs2_imm_vl          "O\tD,F,il"
+#define rv_fmt_vd_vs2_imm_vm          "O\tD,F,im"
+#define rv_fmt_vd_vs2_uimm_vm         "O\tD,F,um"
+#define rv_fmt_vd_vs1_vs2_vm          "O\tD,E,Fm"
+#define rv_fmt_vd_rs1_vs2_vm          "O\tD,1,Fm"
+#define rv_fmt_vd_fs1_vs2_vm          "O\tD,4,Fm"
+#define rv_fmt_vd_vs1                 "O\tD,E"
+#define rv_fmt_vd_rs1                 "O\tD,1"
+#define rv_fmt_vd_fs1                 "O\tD,4"
+#define rv_fmt_vd_imm                 "O\tD,i"
+#define rv_fmt_vd_vs2                 "O\tD,F"
+#define rv_fmt_vd_vs2_vm              "O\tD,Fm"
+#define rv_fmt_rd_vs2_vm              "O\t0,Fm"
+#define rv_fmt_rd_vs2                 "O\t0,F"
+#define rv_fmt_fd_vs2                 "O\t3,F"
+#define rv_fmt_vd_vm                  "O\tDm"
+#define rv_fmt_vsetvli                "O\t0,1,v"
+#define rv_fmt_vsetivli               "O\t0,u,v"
+#define rv_fmt_rs1_rs2_zce_ldst       "O\t2,i(1)"
+#define rv_fmt_push_rlist             "O\tx,-i"
+#define rv_fmt_pop_rlist              "O\tx,i"
+#define rv_fmt_zcmt_index             "O\ti"
+#define rv_fmt_rd_rs1_rs2_imm         "O\t0,1,2,i"
+#define rv_fmt_frd_rs1_rs2_imm        "O\t3,1,2,i"
+#define rv_fmt_rd_rs1_immh_imml       "O\t0,1,i,j"
+#define rv_fmt_rd_rs1_immh_imml_addr  "O\t0,(1),i,j"
+#define rv_fmt_rd2_imm                "O\t0,2,(1),i"
+#define rv_fmt_fli                    "O\t3,h"
+
+#endif /* DISAS_RISCV_H */
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index ddc1e48460..02ea5a839f 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -116,6 +116,11 @@ Use "whpx" (on Windows) or "hvf" (on macOS) instead.
 
 Use ``-run-with async-teardown=on`` instead.
 
+``-chroot`` (since 8.1)
+'''''''''''''''''''''''
+
+Use ``-run-with chroot=dir`` instead.
+
 ``-singlestep`` (since 8.1)
 '''''''''''''''''''''''''''
 
diff --git a/docs/devel/style.rst b/docs/devel/style.rst
index aa5e083ff8..3cfcdeb9cd 100644
--- a/docs/devel/style.rst
+++ b/docs/devel/style.rst
@@ -567,7 +567,8 @@ For example, instead of
 
 .. code-block:: c
 
-    int somefunc(void) {
+    int somefunc(void)
+    {
         int ret = -1;
         char *foo = g_strdup_printf("foo%", "wibble");
         GList *bar = .....
@@ -588,7 +589,8 @@ Using g_autofree/g_autoptr enables the code to be written as:
 
 .. code-block:: c
 
-    int somefunc(void) {
+    int somefunc(void)
+    {
         g_autofree char *foo = g_strdup_printf("foo%", "wibble");
         g_autoptr (GList) bar = .....
 
@@ -613,7 +615,8 @@ are still some caveats to beware of
 
 .. code-block:: c
 
-    char *somefunc(void) {
+    char *somefunc(void)
+    {
         g_autofree char *foo = g_strdup_printf("foo%", "wibble");
         g_autoptr (GList) bar = .....
 
diff --git a/docs/interop/qemu-ga.rst b/docs/interop/qemu-ga.rst
index a9183802d1..461c5a35ee 100644
--- a/docs/interop/qemu-ga.rst
+++ b/docs/interop/qemu-ga.rst
@@ -84,6 +84,11 @@ Options
   Comma-separated list of RPCs to disable (no spaces, use ``help`` to
   list available RPCs).
 
+.. option:: -a, --allow-rpcs=LIST
+
+  Comma-separated list of RPCs to enable (no spaces, use ``help`` to
+  list available RPCs).
+
 .. option:: -D, --dump-conf
 
   Dump the configuration in a format compatible with ``qemu-ga.conf``
diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst
index b33f45e5b3..f9a2eac544 100644
--- a/docs/system/riscv/virt.rst
+++ b/docs/system/riscv/virt.rst
@@ -93,6 +93,7 @@ The following machine-specific options are supported:
 
   When this option is "on", ACLINT devices will be emulated instead of
   SiFive CLINT. When not specified, this option is assumed to be "off".
+  This option is restricted to the TCG accelerator.
 
 - aia=[none|aplic|aplic-imsic]
 
diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c
index a68357c1c5..cabe9048b1 100644
--- a/hw/ide/cmd646.c
+++ b/hw/ide/cmd646.c
@@ -144,7 +144,7 @@ static void bmdma_write(void *opaque, hwaddr addr,
         cmd646_update_irq(pci_dev);
         break;
     case 2:
-        bm->status = (val & 0x60) | (bm->status & 1) | (bm->status & ~val & 0x06);
+        bmdma_status_writeb(bm, val);
         break;
     case 3:
         if (bm == &bm->pci_dev->bmdma[0]) {
@@ -297,7 +297,6 @@ static void pci_cmd646_ide_realize(PCIDevice *dev, Error **errp)
         ide_bus_init_output_irq(&d->bus[i], qdev_get_gpio_in(ds, i));
 
         bmdma_init(&d->bus[i], &d->bmdma[i], d);
-        d->bmdma[i].bus = &d->bus[i];
         ide_bus_register_restart_cb(&d->bus[i]);
     }
 }
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index fc9224bbc9..a25b352537 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -318,6 +318,12 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val)
     bm->cmd = val & 0x09;
 }
 
+void bmdma_status_writeb(BMDMAState *bm, uint32_t val)
+{
+    bm->status = (val & 0x60) | (bm->status & BM_STATUS_DMAING)
+                 | (bm->status & ~val & (BM_STATUS_ERROR | BM_STATUS_INT));
+}
+
 static uint64_t bmdma_addr_read(void *opaque, hwaddr addr,
                                 unsigned width)
 {
@@ -519,13 +525,23 @@ void bmdma_init(IDEBus *bus, BMDMAState *bm, PCIIDEState *d)
     bus->dma = &bm->dma;
     bm->irq = bus->irq;
     bus->irq = qemu_allocate_irq(bmdma_irq, bm, 0);
+    bm->bus = bus;
     bm->pci_dev = d;
 }
 
+static void pci_ide_init(Object *obj)
+{
+    PCIIDEState *d = PCI_IDE(obj);
+
+    qdev_init_gpio_out_named(DEVICE(d), d->isa_irq, "isa-irq",
+                             ARRAY_SIZE(d->isa_irq));
+}
+
 static const TypeInfo pci_ide_type_info = {
     .name = TYPE_PCI_IDE,
     .parent = TYPE_PCI_DEVICE,
     .instance_size = sizeof(PCIIDEState),
+    .instance_init = pci_ide_init,
     .abstract = true,
     .interfaces = (InterfaceInfo[]) {
         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 41d60921e3..151f206046 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -28,7 +28,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "migration/vmstate.h"
 #include "qapi/error.h"
 #include "hw/pci/pci.h"
 #include "hw/ide/piix.h"
@@ -76,7 +75,7 @@ static void bmdma_write(void *opaque, hwaddr addr,
         bmdma_cmd_writeb(bm, val);
         break;
     case 2:
-        bm->status = (val & 0x60) | (bm->status & 1) | (bm->status & ~val & 0x06);
+        bmdma_status_writeb(bm, val);
         break;
     }
 }
@@ -144,7 +143,6 @@ static bool pci_piix_init_bus(PCIIDEState *d, unsigned i, Error **errp)
     ide_bus_init_output_irq(&d->bus[i], isa_get_irq(NULL, port_info[i].isairq));
 
     bmdma_init(&d->bus[i], &d->bmdma[i], d);
-    d->bmdma[i].bus = &d->bus[i];
     ide_bus_register_restart_cb(&d->bus[i]);
 
     return true;
@@ -160,8 +158,6 @@ static void pci_piix_ide_realize(PCIDevice *dev, Error **errp)
     bmdma_setup_bar(d);
     pci_register_bar(dev, 4, PCI_BASE_ADDRESS_SPACE_IO, &d->bmdma_bar);
 
-    vmstate_register(VMSTATE_IF(dev), 0, &vmstate_ide_pci, d);
-
     for (unsigned i = 0; i < 2; i++) {
         if (!pci_piix_init_bus(d, i, errp)) {
             return;
@@ -187,6 +183,7 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     dc->reset = piix_ide_reset;
+    dc->vmsd = &vmstate_ide_pci;
     k->realize = pci_piix_ide_realize;
     k->exit = pci_piix_ide_exitfn;
     k->vendor_id = PCI_VENDOR_ID_INTEL;
@@ -209,6 +206,7 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     dc->reset = piix_ide_reset;
+    dc->vmsd = &vmstate_ide_pci;
     k->realize = pci_piix_ide_realize;
     k->exit = pci_piix_ide_exitfn;
     k->vendor_id = PCI_VENDOR_ID_INTEL;
diff --git a/hw/ide/sii3112.c b/hw/ide/sii3112.c
index f9becdff8e..63dc4a0494 100644
--- a/hw/ide/sii3112.c
+++ b/hw/ide/sii3112.c
@@ -149,8 +149,7 @@ static void sii3112_reg_write(void *opaque, hwaddr addr,
         break;
     case 0x02:
     case 0x12:
-        d->i.bmdma[0].status = (val & 0x60) | (d->i.bmdma[0].status & 1) |
-                               (d->i.bmdma[0].status & ~val & 6);
+        bmdma_status_writeb(&d->i.bmdma[0], val);
         break;
     case 0x04 ... 0x07:
         bmdma_addr_ioport_ops.write(&d->i.bmdma[0], addr - 4, val, size);
@@ -165,8 +164,7 @@ static void sii3112_reg_write(void *opaque, hwaddr addr,
         break;
     case 0x0a:
     case 0x1a:
-        d->i.bmdma[1].status = (val & 0x60) | (d->i.bmdma[1].status & 1) |
-                               (d->i.bmdma[1].status & ~val & 6);
+        bmdma_status_writeb(&d->i.bmdma[1], val);
         break;
     case 0x0c ... 0x0f:
         bmdma_addr_ioport_ops.write(&d->i.bmdma[1], addr - 12, val, size);
@@ -287,7 +285,6 @@ static void sii3112_pci_realize(PCIDevice *dev, Error **errp)
         ide_bus_init_output_irq(&s->bus[i], qdev_get_gpio_in(ds, i));
 
         bmdma_init(&s->bus[i], &s->bmdma[i], s);
-        s->bmdma[i].bus = &s->bus[i];
         ide_bus_register_restart_cb(&s->bus[i]);
     }
 }
diff --git a/hw/ide/via.c b/hw/ide/via.c
index 177baea9a7..fff23803a6 100644
--- a/hw/ide/via.c
+++ b/hw/ide/via.c
@@ -31,6 +31,7 @@
 #include "sysemu/dma.h"
 #include "hw/isa/vt82c686.h"
 #include "hw/ide/pci.h"
+#include "hw/irq.h"
 #include "trace.h"
 
 static uint64_t bmdma_read(void *opaque, hwaddr addr,
@@ -74,7 +75,7 @@ static void bmdma_write(void *opaque, hwaddr addr,
         bmdma_cmd_writeb(bm, val);
         break;
     case 2:
-        bm->status = (val & 0x60) | (bm->status & 1) | (bm->status & ~val & 0x06);
+        bmdma_status_writeb(bm, val);
         break;
     default:;
     }
@@ -104,7 +105,8 @@ static void bmdma_setup_bar(PCIIDEState *d)
 
 static void via_ide_set_irq(void *opaque, int n, int level)
 {
-    PCIDevice *d = PCI_DEVICE(opaque);
+    PCIIDEState *s = opaque;
+    PCIDevice *d = PCI_DEVICE(s);
 
     if (level) {
         d->config[0x70 + n * 8] |= 0x80;
@@ -112,7 +114,7 @@ static void via_ide_set_irq(void *opaque, int n, int level)
         d->config[0x70 + n * 8] &= ~0x80;
     }
 
-    via_isa_set_irq(pci_get_function_0(d), 14 + n, level);
+    qemu_set_irq(s->isa_irq[n], level);
 }
 
 static void via_ide_reset(DeviceState *dev)
@@ -194,7 +196,6 @@ static void via_ide_realize(PCIDevice *dev, Error **errp)
         ide_bus_init_output_irq(&d->bus[i], qdev_get_gpio_in(ds, i));
 
         bmdma_init(&d->bus[i], &d->bmdma[i], d);
-        d->bmdma[i].bus = &d->bus[i];
         ide_bus_register_restart_cb(&d->bus[i]);
     }
 }
diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index ca89119ce0..57bdfb4e78 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -592,12 +592,6 @@ static const TypeInfo via_isa_info = {
     },
 };
 
-void via_isa_set_irq(PCIDevice *d, int n, int level)
-{
-    ViaISAState *s = VIA_ISA(d);
-    qemu_set_irq(s->isa_irqs_in[n], level);
-}
-
 static void via_isa_request_i8259_irq(void *opaque, int irq, int level)
 {
     ViaISAState *s = opaque;
@@ -692,6 +686,10 @@ static void via_isa_realize(PCIDevice *d, Error **errp)
     if (!qdev_realize(DEVICE(&s->ide), BUS(pci_bus), errp)) {
         return;
     }
+    for (i = 0; i < 2; i++) {
+        qdev_connect_gpio_out_named(DEVICE(&s->ide), "isa-irq", i,
+                                    s->isa_irqs_in[14 + i]);
+    }
 
     /* Functions 2-3: USB Ports */
     for (i = 0; i < ARRAY_SIZE(s->uhci); i++) {
@@ -814,6 +812,7 @@ static void vt8231_isa_reset(DeviceState *dev)
                  PCI_COMMAND_MASTER | PCI_COMMAND_SPECIAL);
     pci_set_word(pci_conf + PCI_STATUS, PCI_STATUS_DEVSEL_MEDIUM);
 
+    pci_conf[0x4c] = 0x04; /* IDE interrupt Routing */
     pci_conf[0x58] = 0x40; /* Miscellaneous Control 0 */
     pci_conf[0x67] = 0x08; /* Fast IR Config */
     pci_conf[0x6b] = 0x01; /* Fast IR I/O Base */
diff --git a/hw/mips/loongson3_virt.c b/hw/mips/loongson3_virt.c
index 3dd91da7a6..4018b8c1d3 100644
--- a/hw/mips/loongson3_virt.c
+++ b/hw/mips/loongson3_virt.c
@@ -481,8 +481,8 @@ static void mips_loongson3_virt_init(MachineState *machine)
         if (!machine->cpu_type) {
             machine->cpu_type = MIPS_CPU_TYPE_NAME("Loongson-3A1000");
         }
-        if (!strstr(machine->cpu_type, "Loongson-3A1000")) {
-            error_report("Loongson-3/TCG needs cpu type Loongson-3A1000");
+        if (!cpu_type_supports_isa(machine->cpu_type, INSN_LOONGSON3A)) {
+            error_report("Loongson-3/TCG needs a Loongson-3 series cpu");
             exit(1);
         }
     } else {
diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index e4c2149175..6996d265e4 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig
@@ -158,6 +158,9 @@ config SIFIVE_TEST
 config SIFIVE_E_PRCI
     bool
 
+config SIFIVE_E_AON
+    bool
+
 config SIFIVE_U_OTP
     bool
 
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 05877f61cc..892f8b91c5 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -30,6 +30,7 @@ system_ss.add(when: 'CONFIG_MCHP_PFSOC_IOSCB', if_true: files('mchp_pfsoc_ioscb.
 system_ss.add(when: 'CONFIG_MCHP_PFSOC_SYSREG', if_true: files('mchp_pfsoc_sysreg.c'))
 system_ss.add(when: 'CONFIG_SIFIVE_TEST', if_true: files('sifive_test.c'))
 system_ss.add(when: 'CONFIG_SIFIVE_E_PRCI', if_true: files('sifive_e_prci.c'))
+system_ss.add(when: 'CONFIG_SIFIVE_E_AON', if_true: files('sifive_e_aon.c'))
 system_ss.add(when: 'CONFIG_SIFIVE_U_OTP', if_true: files('sifive_u_otp.c'))
 system_ss.add(when: 'CONFIG_SIFIVE_U_PRCI', if_true: files('sifive_u_prci.c'))
 
diff --git a/hw/misc/sifive_e_aon.c b/hw/misc/sifive_e_aon.c
new file mode 100644
index 0000000000..4656457d0b
--- /dev/null
+++ b/hw/misc/sifive_e_aon.c
@@ -0,0 +1,319 @@
+/*
+ * SiFive HiFive1 AON (Always On Domain) for QEMU.
+ *
+ * Copyright (c) 2022 SiFive, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "qemu/log.h"
+#include "hw/irq.h"
+#include "hw/registerfields.h"
+#include "hw/misc/sifive_e_aon.h"
+#include "qapi/visitor.h"
+#include "qapi/error.h"
+#include "sysemu/watchdog.h"
+#include "hw/qdev-properties.h"
+
+REG32(AON_WDT_WDOGCFG, 0x0)
+    FIELD(AON_WDT_WDOGCFG, SCALE, 0, 4)
+    FIELD(AON_WDT_WDOGCFG, RSVD0, 4, 4)
+    FIELD(AON_WDT_WDOGCFG, RSTEN, 8, 1)
+    FIELD(AON_WDT_WDOGCFG, ZEROCMP, 9, 1)
+    FIELD(AON_WDT_WDOGCFG, RSVD1, 10, 2)
+    FIELD(AON_WDT_WDOGCFG, EN_ALWAYS, 12, 1)
+    FIELD(AON_WDT_WDOGCFG, EN_CORE_AWAKE, 13, 1)
+    FIELD(AON_WDT_WDOGCFG, RSVD2, 14, 14)
+    FIELD(AON_WDT_WDOGCFG, IP0, 28, 1)
+    FIELD(AON_WDT_WDOGCFG, RSVD3, 29, 3)
+REG32(AON_WDT_WDOGCOUNT, 0x8)
+    FIELD(AON_WDT_WDOGCOUNT, VALUE, 0, 31)
+REG32(AON_WDT_WDOGS, 0x10)
+REG32(AON_WDT_WDOGFEED, 0x18)
+REG32(AON_WDT_WDOGKEY, 0x1c)
+REG32(AON_WDT_WDOGCMP0, 0x20)
+
+static void sifive_e_aon_wdt_update_wdogcount(SiFiveEAONState *r)
+{
+    int64_t now;
+    if (FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG, EN_ALWAYS) == 0 &&
+        FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE) == 0) {
+        return;
+    }
+
+    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    r->wdogcount += muldiv64(now - r->wdog_restart_time,
+                             r->wdogclk_freq, NANOSECONDS_PER_SECOND);
+
+    /* Clean the most significant bit. */
+    r->wdogcount &= R_AON_WDT_WDOGCOUNT_VALUE_MASK;
+    r->wdog_restart_time = now;
+}
+
+static void sifive_e_aon_wdt_update_state(SiFiveEAONState *r)
+{
+    uint16_t wdogs;
+    bool cmp_signal = false;
+    sifive_e_aon_wdt_update_wdogcount(r);
+    wdogs = (uint16_t)(r->wdogcount >>
+                           FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG, SCALE));
+
+    if (wdogs >= r->wdogcmp0) {
+        cmp_signal = true;
+        if (FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG, ZEROCMP) == 1) {
+            r->wdogcount = 0;
+            wdogs = 0;
+        }
+    }
+
+    if (cmp_signal) {
+        if (FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG, RSTEN) == 1) {
+            watchdog_perform_action();
+        }
+        r->wdogcfg = FIELD_DP32(r->wdogcfg, AON_WDT_WDOGCFG, IP0, 1);
+    }
+
+    qemu_set_irq(r->wdog_irq, FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG, IP0));
+
+    if (wdogs < r->wdogcmp0 &&
+        (FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG, EN_ALWAYS) == 1 ||
+         FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE) == 1)) {
+        int64_t next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        next += muldiv64((r->wdogcmp0 - wdogs) <<
+                         FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG, SCALE),
+                         NANOSECONDS_PER_SECOND, r->wdogclk_freq);
+        timer_mod(r->wdog_timer, next);
+    } else {
+        timer_mod(r->wdog_timer, INT64_MAX);
+    }
+}
+
+/*
+ * Callback used when the timer set using timer_mod expires.
+ */
+static void sifive_e_aon_wdt_expired_cb(void *opaque)
+{
+    SiFiveEAONState *r = SIFIVE_E_AON(opaque);
+    sifive_e_aon_wdt_update_state(r);
+}
+
+static uint64_t
+sifive_e_aon_wdt_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    SiFiveEAONState *r = SIFIVE_E_AON(opaque);
+
+    switch (addr) {
+    case A_AON_WDT_WDOGCFG:
+        return r->wdogcfg;
+    case A_AON_WDT_WDOGCOUNT:
+        sifive_e_aon_wdt_update_wdogcount(r);
+        return r->wdogcount;
+    case A_AON_WDT_WDOGS:
+        sifive_e_aon_wdt_update_wdogcount(r);
+        return r->wdogcount >>
+               FIELD_EX32(r->wdogcfg,
+                          AON_WDT_WDOGCFG,
+                          SCALE);
+    case A_AON_WDT_WDOGFEED:
+        return 0;
+    case A_AON_WDT_WDOGKEY:
+        return r->wdogunlock;
+    case A_AON_WDT_WDOGCMP0:
+        return r->wdogcmp0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad read: addr=0x%x\n",
+                      __func__, (int)addr);
+    }
+
+    return 0;
+}
+
+static void
+sifive_e_aon_wdt_write(void *opaque, hwaddr addr,
+                       uint64_t val64, unsigned int size)
+{
+    SiFiveEAONState *r = SIFIVE_E_AON(opaque);
+    uint32_t value = val64;
+
+    switch (addr) {
+    case A_AON_WDT_WDOGCFG: {
+        uint8_t new_en_always;
+        uint8_t new_en_core_awake;
+        uint8_t old_en_always;
+        uint8_t old_en_core_awake;
+        if (r->wdogunlock == 0) {
+            return;
+        }
+
+        new_en_always = FIELD_EX32(value, AON_WDT_WDOGCFG, EN_ALWAYS);
+        new_en_core_awake = FIELD_EX32(value, AON_WDT_WDOGCFG, EN_CORE_AWAKE);
+        old_en_always = FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG, EN_ALWAYS);
+        old_en_core_awake = FIELD_EX32(r->wdogcfg, AON_WDT_WDOGCFG,
+                                       EN_CORE_AWAKE);
+
+        if ((old_en_always ||
+             old_en_core_awake) == 1 &&
+            (new_en_always ||
+             new_en_core_awake) == 0) {
+            sifive_e_aon_wdt_update_wdogcount(r);
+        } else if ((old_en_always ||
+                    old_en_core_awake) == 0 &&
+                   (new_en_always ||
+                    new_en_core_awake) == 1) {
+            r->wdog_restart_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        }
+        r->wdogcfg = value;
+        r->wdogunlock = 0;
+        break;
+    }
+    case A_AON_WDT_WDOGCOUNT:
+        if (r->wdogunlock == 0) {
+            return;
+        }
+        r->wdogcount = value & R_AON_WDT_WDOGCOUNT_VALUE_MASK;
+        r->wdog_restart_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        r->wdogunlock = 0;
+        break;
+    case A_AON_WDT_WDOGS:
+        return;
+    case A_AON_WDT_WDOGFEED:
+        if (r->wdogunlock == 0) {
+            return;
+        }
+        if (value == SIFIVE_E_AON_WDOGFEED) {
+            r->wdogcount = 0;
+            r->wdog_restart_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        }
+        r->wdogunlock = 0;
+        break;
+    case A_AON_WDT_WDOGKEY:
+        if (value == SIFIVE_E_AON_WDOGKEY) {
+            r->wdogunlock = 1;
+        }
+        break;
+    case A_AON_WDT_WDOGCMP0:
+        if (r->wdogunlock == 0) {
+            return;
+        }
+        r->wdogcmp0 = (uint16_t) value;
+        r->wdogunlock = 0;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad write: addr=0x%x v=0x%x\n",
+                      __func__, (int)addr, (int)value);
+    }
+    sifive_e_aon_wdt_update_state(r);
+}
+
+static uint64_t
+sifive_e_aon_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    if (addr < SIFIVE_E_AON_RTC) {
+        return sifive_e_aon_wdt_read(opaque, addr, size);
+    } else if (addr < SIFIVE_E_AON_MAX) {
+        qemu_log_mask(LOG_UNIMP, "%s: Unimplemented read: addr=0x%x\n",
+                      __func__, (int)addr);
+    } else {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad read: addr=0x%x\n",
+                      __func__, (int)addr);
+    }
+    return 0;
+}
+
+static void
+sifive_e_aon_write(void *opaque, hwaddr addr,
+                   uint64_t val64, unsigned int size)
+{
+    if (addr < SIFIVE_E_AON_RTC) {
+        sifive_e_aon_wdt_write(opaque, addr, val64, size);
+    } else if (addr < SIFIVE_E_AON_MAX) {
+        qemu_log_mask(LOG_UNIMP, "%s: Unimplemented write: addr=0x%x\n",
+                      __func__, (int)addr);
+    } else {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad write: addr=0x%x\n",
+                      __func__, (int)addr);
+    }
+}
+
+static const MemoryRegionOps sifive_e_aon_ops = {
+    .read = sifive_e_aon_read,
+    .write = sifive_e_aon_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4
+    },
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4
+    }
+};
+
+static void sifive_e_aon_reset(DeviceState *dev)
+{
+    SiFiveEAONState *r = SIFIVE_E_AON(dev);
+
+    r->wdogcfg = FIELD_DP32(r->wdogcfg, AON_WDT_WDOGCFG, RSTEN, 0);
+    r->wdogcfg = FIELD_DP32(r->wdogcfg, AON_WDT_WDOGCFG, EN_ALWAYS, 0);
+    r->wdogcfg = FIELD_DP32(r->wdogcfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE, 0);
+    r->wdogcmp0 = 0xbeef;
+
+    sifive_e_aon_wdt_update_state(r);
+}
+
+static void sifive_e_aon_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    SiFiveEAONState *r = SIFIVE_E_AON(obj);
+
+    memory_region_init_io(&r->mmio, OBJECT(r), &sifive_e_aon_ops, r,
+                          TYPE_SIFIVE_E_AON, SIFIVE_E_AON_MAX);
+    sysbus_init_mmio(sbd, &r->mmio);
+
+    /* watchdog timer */
+    r->wdog_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                 sifive_e_aon_wdt_expired_cb, r);
+    r->wdogclk_freq = SIFIVE_E_LFCLK_DEFAULT_FREQ;
+    sysbus_init_irq(sbd, &r->wdog_irq);
+}
+
+static Property sifive_e_aon_properties[] = {
+    DEFINE_PROP_UINT64("wdogclk-frequency", SiFiveEAONState, wdogclk_freq,
+                       SIFIVE_E_LFCLK_DEFAULT_FREQ),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sifive_e_aon_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->reset = sifive_e_aon_reset;
+    device_class_set_props(dc, sifive_e_aon_properties);
+}
+
+static const TypeInfo sifive_e_aon_info = {
+    .name          = TYPE_SIFIVE_E_AON,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SiFiveEAONState),
+    .instance_init = sifive_e_aon_init,
+    .class_init    = sifive_e_aon_class_init,
+};
+
+static void sifive_e_aon_register_types(void)
+{
+    type_register_static(&sifive_e_aon_info);
+}
+
+type_init(sifive_e_aon_register_types)
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 6528ebfa3a..b6a5eb4452 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -60,6 +60,7 @@ config SIFIVE_E
     select SIFIVE_PLIC
     select SIFIVE_UART
     select SIFIVE_E_PRCI
+    select SIFIVE_E_AON
     select UNIMP
 
 config SIFIVE_U
diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
index 04939b60c3..0d37adc542 100644
--- a/hw/riscv/sifive_e.c
+++ b/hw/riscv/sifive_e.c
@@ -45,6 +45,7 @@
 #include "hw/intc/riscv_aclint.h"
 #include "hw/intc/sifive_plic.h"
 #include "hw/misc/sifive_e_prci.h"
+#include "hw/misc/sifive_e_aon.h"
 #include "chardev/char.h"
 #include "sysemu/sysemu.h"
 
@@ -185,6 +186,8 @@ static void sifive_e_soc_init(Object *obj)
     object_property_set_int(OBJECT(&s->cpus), "resetvec", 0x1004, &error_abort);
     object_initialize_child(obj, "riscv.sifive.e.gpio0", &s->gpio,
                             TYPE_SIFIVE_GPIO);
+    object_initialize_child(obj, "riscv.sifive.e.aon", &s->aon,
+                            TYPE_SIFIVE_E_AON);
 }
 
 static void sifive_e_soc_realize(DeviceState *dev, Error **errp)
@@ -223,10 +226,17 @@ static void sifive_e_soc_realize(DeviceState *dev, Error **errp)
         RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus,
         RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
         RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, false);
-    create_unimplemented_device("riscv.sifive.e.aon",
-        memmap[SIFIVE_E_DEV_AON].base, memmap[SIFIVE_E_DEV_AON].size);
     sifive_e_prci_create(memmap[SIFIVE_E_DEV_PRCI].base);
 
+    /* AON */
+
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->aon), errp)) {
+        return;
+    }
+
+    /* Map AON registers */
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->aon), 0, memmap[SIFIVE_E_DEV_AON].base);
+
     /* GPIO */
 
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->gpio), errp)) {
@@ -245,6 +255,9 @@ static void sifive_e_soc_realize(DeviceState *dev, Error **errp)
                            qdev_get_gpio_in(DEVICE(s->plic),
                                             SIFIVE_E_GPIO0_IRQ0 + i));
     }
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->aon), 0,
+                       qdev_get_gpio_in(DEVICE(s->plic),
+                                        SIFIVE_E_AON_WDT_IRQ));
 
     sifive_uart_create(sys_mem, memmap[SIFIVE_E_DEV_UART0].base,
         serial_hd(0), qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_E_UART0_IRQ));
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 68ccd0bde1..d90286dc46 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -44,6 +44,7 @@
 #include "chardev/char.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/tcg.h"
 #include "sysemu/kvm.h"
 #include "sysemu/tpm.h"
 #include "hw/pci/pci.h"
@@ -243,13 +244,13 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
             s->soc[socket].hartid_base + cpu);
         qemu_fdt_add_subnode(ms->fdt, cpu_name);
 
-        satp_mode_max = satp_mode_max_from_map(
-            s->soc[socket].harts[cpu].cfg.satp_mode.map);
-        sv_name = g_strdup_printf("riscv,%s",
-                                  satp_mode_str(satp_mode_max, is_32_bit));
-        qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
-        g_free(sv_name);
-
+        if (cpu_ptr->cfg.satp_mode.supported != 0) {
+            satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map);
+            sv_name = g_strdup_printf("riscv,%s",
+                                      satp_mode_str(satp_mode_max, is_32_bit));
+            qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
+            g_free(sv_name);
+        }
 
         name = riscv_isa_string(cpu_ptr);
         qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
@@ -776,7 +777,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
 
         g_free(clust_name);
 
-        if (!kvm_enabled()) {
+        if (tcg_enabled()) {
             if (s->have_aclint) {
                 create_fdt_socket_aclint(s, memmap, socket,
                     &intc_phandles[phandle_pos]);
@@ -1244,10 +1245,21 @@ static void virt_machine_done(Notifier *notifier, void *data)
     target_ulong start_addr = memmap[VIRT_DRAM].base;
     target_ulong firmware_end_addr, kernel_start_addr;
     const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
-    uint32_t fdt_load_addr;
+    uint64_t fdt_load_addr;
     uint64_t kernel_entry = 0;
     BlockBackend *pflash_blk0;
 
+    /* load/create device tree */
+    if (machine->dtb) {
+        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
+        if (!machine->fdt) {
+            error_report("load_device_tree() failed");
+            exit(1);
+        }
+    } else {
+        create_fdt(s, memmap);
+    }
+
     /*
      * Only direct boot kernel is currently supported for KVM VM,
      * so the "-bios" parameter is not supported when KVM is enabled.
@@ -1370,7 +1382,7 @@ static void virt_machine_init(MachineState *machine)
                                 hart_count, &error_abort);
         sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
 
-        if (!kvm_enabled()) {
+        if (tcg_enabled()) {
             if (s->have_aclint) {
                 if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
                     /* Per-socket ACLINT MTIMER */
@@ -1508,17 +1520,6 @@ static void virt_machine_init(MachineState *machine)
     }
     virt_flash_map(s, system_memory);
 
-    /* load/create device tree */
-    if (machine->dtb) {
-        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
-        if (!machine->fdt) {
-            error_report("load_device_tree() failed");
-            exit(1);
-        }
-    } else {
-        create_fdt(s, memmap);
-    }
-
     s->machine_done.notify = virt_machine_done;
     qemu_add_machine_init_done_notifier(&s->machine_done);
 }
@@ -1682,12 +1683,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
 #endif
 
-    object_class_property_add_bool(oc, "aclint", virt_get_aclint,
-                                   virt_set_aclint);
-    object_class_property_set_description(oc, "aclint",
-                                          "Set on/off to enable/disable "
-                                          "emulating ACLINT devices");
-
+    if (tcg_enabled()) {
+        object_class_property_add_bool(oc, "aclint", virt_get_aclint,
+                                       virt_set_aclint);
+        object_class_property_set_description(oc, "aclint",
+                                              "Set on/off to enable/disable "
+                                              "emulating ACLINT devices");
+    }
     object_class_property_add_str(oc, "aia", virt_get_aia,
                                   virt_set_aia);
     object_class_property_set_description(oc, "aia",
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 8612684d48..515dcf51b5 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -26,7 +26,7 @@
 #include "hw/s390x/vfio-ccw.h"
 #include "hw/s390x/css.h"
 #include "hw/s390x/ebcdic.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
 #include "hw/scsi/scsi.h"
 #include "hw/virtio/virtio-net.h"
 #include "ipl.h"
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
index f291016fee..6fd096813a 100644
--- a/hw/s390x/meson.build
+++ b/hw/s390x/meson.build
@@ -22,7 +22,6 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
   'tod-kvm.c',
   's390-skeys-kvm.c',
   's390-stattrib-kvm.c',
-  'pv.c',
   's390-pci-kvm.c',
 ))
 s390x_ss.add(when: 'CONFIG_TCG', if_true: files(
diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c
index 9134fe185f..ff41e4106d 100644
--- a/hw/s390x/s390-pci-kvm.c
+++ b/hw/s390x/s390-pci-kvm.c
@@ -14,7 +14,7 @@
 #include <linux/kvm.h>
 
 #include "kvm/kvm_s390x.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
 #include "hw/s390x/s390-pci-bus.h"
 #include "hw/s390x/s390-pci-kvm.h"
 #include "hw/s390x/s390-pci-inst.h"
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 2dece8eab8..4516d73ff5 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -42,7 +42,7 @@
 #include "hw/s390x/tod.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/cpus.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
 #include "migration/blocker.h"
 #include "qapi/visitor.h"
 
diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c
index e2202dae2d..9588b90f2b 100644
--- a/hw/s390x/tod-kvm.c
+++ b/hw/s390x/tod-kvm.c
@@ -13,7 +13,7 @@
 #include "qemu/module.h"
 #include "sysemu/runstate.h"
 #include "hw/s390x/tod.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
 #include "kvm/kvm_s390x.h"
 
 static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp)
diff --git a/include/hw/ide/pci.h b/include/hw/ide/pci.h
index 74c127e32f..1ff469de87 100644
--- a/include/hw/ide/pci.h
+++ b/include/hw/ide/pci.h
@@ -58,6 +58,7 @@ struct PCIIDEState {
 
 void bmdma_init(IDEBus *bus, BMDMAState *bm, PCIIDEState *d);
 void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val);
+void bmdma_status_writeb(BMDMAState *bm, uint32_t val);
 extern MemoryRegionOps bmdma_addr_ioport_ops;
 void pci_ide_create_devs(PCIDevice *dev);
 
diff --git a/include/hw/isa/vt82c686.h b/include/hw/isa/vt82c686.h
index da1722daf2..b6e95b2851 100644
--- a/include/hw/isa/vt82c686.h
+++ b/include/hw/isa/vt82c686.h
@@ -34,6 +34,4 @@ struct ViaAC97State {
     uint32_t ac97_cmd;
 };
 
-void via_isa_set_irq(PCIDevice *d, int n, int level);
-
 #endif
diff --git a/include/hw/misc/sifive_e_aon.h b/include/hw/misc/sifive_e_aon.h
new file mode 100644
index 0000000000..2ae1c4139c
--- /dev/null
+++ b/include/hw/misc/sifive_e_aon.h
@@ -0,0 +1,60 @@
+/*
+ * SiFive HiFive1 AON (Always On Domain) interface.
+ *
+ * Copyright (c) 2022 SiFive, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_AON_H
+#define HW_SIFIVE_AON_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_SIFIVE_E_AON "riscv.sifive.e.aon"
+OBJECT_DECLARE_SIMPLE_TYPE(SiFiveEAONState, SIFIVE_E_AON)
+
+#define SIFIVE_E_AON_WDOGKEY (0x51F15E)
+#define SIFIVE_E_AON_WDOGFEED (0xD09F00D)
+#define SIFIVE_E_LFCLK_DEFAULT_FREQ (32768)
+
+enum {
+    SIFIVE_E_AON_WDT    = 0x0,
+    SIFIVE_E_AON_RTC    = 0x40,
+    SIFIVE_E_AON_LFROSC = 0x70,
+    SIFIVE_E_AON_BACKUP = 0x80,
+    SIFIVE_E_AON_PMU    = 0x100,
+    SIFIVE_E_AON_MAX    = 0x150
+};
+
+struct SiFiveEAONState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+
+    /*< watchdog timer >*/
+    QEMUTimer *wdog_timer;
+    qemu_irq wdog_irq;
+    uint64_t wdog_restart_time;
+    uint64_t wdogclk_freq;
+
+    uint32_t wdogcfg;
+    uint16_t wdogcmp0;
+    uint32_t wdogcount;
+    uint8_t wdogunlock;
+};
+
+#endif
diff --git a/include/hw/riscv/sifive_e.h b/include/hw/riscv/sifive_e.h
index b824a79e2d..31180a680e 100644
--- a/include/hw/riscv/sifive_e.h
+++ b/include/hw/riscv/sifive_e.h
@@ -22,6 +22,7 @@
 #include "hw/riscv/riscv_hart.h"
 #include "hw/riscv/sifive_cpu.h"
 #include "hw/gpio/sifive_gpio.h"
+#include "hw/misc/sifive_e_aon.h"
 #include "hw/boards.h"
 
 #define TYPE_RISCV_E_SOC "riscv.sifive.e.soc"
@@ -35,6 +36,7 @@ typedef struct SiFiveESoCState {
     /*< public >*/
     RISCVHartArrayState cpus;
     DeviceState *plic;
+    SiFiveEAONState aon;
     SIFIVEGPIOState gpio;
     MemoryRegion xip_mem;
     MemoryRegion mask_rom;
@@ -76,9 +78,10 @@ enum {
 };
 
 enum {
-    SIFIVE_E_UART0_IRQ  = 3,
-    SIFIVE_E_UART1_IRQ  = 4,
-    SIFIVE_E_GPIO0_IRQ0 = 8
+    SIFIVE_E_AON_WDT_IRQ  = 1,
+    SIFIVE_E_UART0_IRQ    = 3,
+    SIFIVE_E_UART1_IRQ    = 4,
+    SIFIVE_E_GPIO0_IRQ0   = 8
 };
 
 #define SIFIVE_E_PLIC_HART_CONFIG "M"
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 9a2ec568b0..d3d1352c4e 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1635,7 +1635,9 @@ const char *elf_hwcap_str(uint32_t bit)
 static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
 {
     regs->psw.addr = infop->entry;
-    regs->psw.mask = PSW_MASK_64 | PSW_MASK_32;
+    regs->psw.mask = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
+                     PSW_MASK_MCHECK | PSW_MASK_PSTATE | PSW_MASK_64 | \
+                     PSW_MASK_32;
     regs->gprs[15] = infop->start_stack;
 }
 
diff --git a/linux-user/riscv/syscall32_nr.h b/linux-user/riscv/syscall32_nr.h
index 1327d7dffa..412e58e5b2 100644
--- a/linux-user/riscv/syscall32_nr.h
+++ b/linux-user/riscv/syscall32_nr.h
@@ -228,6 +228,7 @@
 #define TARGET_NR_accept4 242
 #define TARGET_NR_arch_specific_syscall 244
 #define TARGET_NR_riscv_flush_icache (TARGET_NR_arch_specific_syscall + 15)
+#define TARGET_NR_riscv_hwprobe (TARGET_NR_arch_specific_syscall + 14)
 #define TARGET_NR_prlimit64 261
 #define TARGET_NR_fanotify_init 262
 #define TARGET_NR_fanotify_mark 263
diff --git a/linux-user/riscv/syscall64_nr.h b/linux-user/riscv/syscall64_nr.h
index 6659751933..29e1eb2075 100644
--- a/linux-user/riscv/syscall64_nr.h
+++ b/linux-user/riscv/syscall64_nr.h
@@ -251,6 +251,7 @@
 #define TARGET_NR_recvmmsg 243
 #define TARGET_NR_arch_specific_syscall 244
 #define TARGET_NR_riscv_flush_icache (TARGET_NR_arch_specific_syscall + 15)
+#define TARGET_NR_riscv_hwprobe (TARGET_NR_arch_specific_syscall + 14)
 #define TARGET_NR_wait4 260
 #define TARGET_NR_prlimit64 261
 #define TARGET_NR_fanotify_init 262
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9b9e3bd5e3..420bab7c68 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8983,6 +8983,147 @@ static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count)
 }
 #endif /* TARGET_NR_getdents64 */
 
+#if defined(TARGET_NR_riscv_hwprobe)
+
+#define RISCV_HWPROBE_KEY_MVENDORID     0
+#define RISCV_HWPROBE_KEY_MARCHID       1
+#define RISCV_HWPROBE_KEY_MIMPID        2
+
+#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3
+#define     RISCV_HWPROBE_BASE_BEHAVIOR_IMA (1 << 0)
+
+#define RISCV_HWPROBE_KEY_IMA_EXT_0     4
+#define     RISCV_HWPROBE_IMA_FD       (1 << 0)
+#define     RISCV_HWPROBE_IMA_C        (1 << 1)
+
+#define RISCV_HWPROBE_KEY_CPUPERF_0     5
+#define     RISCV_HWPROBE_MISALIGNED_UNKNOWN     (0 << 0)
+#define     RISCV_HWPROBE_MISALIGNED_EMULATED    (1 << 0)
+#define     RISCV_HWPROBE_MISALIGNED_SLOW        (2 << 0)
+#define     RISCV_HWPROBE_MISALIGNED_FAST        (3 << 0)
+#define     RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
+#define     RISCV_HWPROBE_MISALIGNED_MASK        (7 << 0)
+
+struct riscv_hwprobe {
+    abi_llong  key;
+    abi_ullong value;
+};
+
+static void risc_hwprobe_fill_pairs(CPURISCVState *env,
+                                    struct riscv_hwprobe *pair,
+                                    size_t pair_count)
+{
+    const RISCVCPUConfig *cfg = riscv_cpu_cfg(env);
+
+    for (; pair_count > 0; pair_count--, pair++) {
+        abi_llong key;
+        abi_ullong value;
+        __put_user(0, &pair->value);
+        __get_user(key, &pair->key);
+        switch (key) {
+        case RISCV_HWPROBE_KEY_MVENDORID:
+            __put_user(cfg->mvendorid, &pair->value);
+            break;
+        case RISCV_HWPROBE_KEY_MARCHID:
+            __put_user(cfg->marchid, &pair->value);
+            break;
+        case RISCV_HWPROBE_KEY_MIMPID:
+            __put_user(cfg->mimpid, &pair->value);
+            break;
+        case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
+            value = riscv_has_ext(env, RVI) &&
+                    riscv_has_ext(env, RVM) &&
+                    riscv_has_ext(env, RVA) ?
+                    RISCV_HWPROBE_BASE_BEHAVIOR_IMA : 0;
+            __put_user(value, &pair->value);
+            break;
+        case RISCV_HWPROBE_KEY_IMA_EXT_0:
+            value = riscv_has_ext(env, RVF) &&
+                    riscv_has_ext(env, RVD) ?
+                    RISCV_HWPROBE_IMA_FD : 0;
+            value |= riscv_has_ext(env, RVC) ?
+                     RISCV_HWPROBE_IMA_C : pair->value;
+            __put_user(value, &pair->value);
+            break;
+        case RISCV_HWPROBE_KEY_CPUPERF_0:
+            __put_user(RISCV_HWPROBE_MISALIGNED_FAST, &pair->value);
+            break;
+        default:
+            __put_user(-1, &pair->key);
+            break;
+        }
+    }
+}
+
+static int cpu_set_valid(abi_long arg3, abi_long arg4)
+{
+    int ret, i, tmp;
+    size_t host_mask_size, target_mask_size;
+    unsigned long *host_mask;
+
+    /*
+     * cpu_set_t represent CPU masks as bit masks of type unsigned long *.
+     * arg3 contains the cpu count.
+     */
+    tmp = (8 * sizeof(abi_ulong));
+    target_mask_size = ((arg3 + tmp - 1) / tmp) * sizeof(abi_ulong);
+    host_mask_size = (target_mask_size + (sizeof(*host_mask) - 1)) &
+                     ~(sizeof(*host_mask) - 1);
+
+    host_mask = alloca(host_mask_size);
+
+    ret = target_to_host_cpu_mask(host_mask, host_mask_size,
+                                  arg4, target_mask_size);
+    if (ret != 0) {
+        return ret;
+    }
+
+    for (i = 0 ; i < host_mask_size / sizeof(*host_mask); i++) {
+        if (host_mask[i] != 0) {
+            return 0;
+        }
+    }
+    return -TARGET_EINVAL;
+}
+
+static abi_long do_riscv_hwprobe(CPUArchState *cpu_env, abi_long arg1,
+                                 abi_long arg2, abi_long arg3,
+                                 abi_long arg4, abi_long arg5)
+{
+    int ret;
+    struct riscv_hwprobe *host_pairs;
+
+    /* flags must be 0 */
+    if (arg5 != 0) {
+        return -TARGET_EINVAL;
+    }
+
+    /* check cpu_set */
+    if (arg3 != 0) {
+        ret = cpu_set_valid(arg3, arg4);
+        if (ret != 0) {
+            return ret;
+        }
+    } else if (arg4 != 0) {
+        return -TARGET_EINVAL;
+    }
+
+    /* no pairs */
+    if (arg2 == 0) {
+        return 0;
+    }
+
+    host_pairs = lock_user(VERIFY_WRITE, arg1,
+                           sizeof(*host_pairs) * (size_t)arg2, 0);
+    if (host_pairs == NULL) {
+        return -TARGET_EFAULT;
+    }
+    risc_hwprobe_fill_pairs(cpu_env, host_pairs, arg2);
+    unlock_user(host_pairs, arg1, sizeof(*host_pairs) * (size_t)arg2);
+    return 0;
+}
+#endif /* TARGET_NR_riscv_hwprobe */
+
 #if defined(TARGET_NR_pivot_root) && defined(__NR_pivot_root)
 _syscall2(int, pivot_root, const char *, new_root, const char *, put_old)
 #endif
@@ -13665,6 +13806,11 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
         return ret;
 #endif
 
+#if defined(TARGET_NR_riscv_hwprobe)
+    case TARGET_NR_riscv_hwprobe:
+        return do_riscv_hwprobe(cpu_env, arg1, arg2, arg3, arg4, arg5);
+#endif
+
     default:
         qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
         return -TARGET_ENOSYS;
diff --git a/meson.build b/meson.build
index 58413d44a5..5fcdb37a71 100644
--- a/meson.build
+++ b/meson.build
@@ -20,7 +20,7 @@ config_host = keyval.load(meson.current_build_dir() / 'config-host.mak')
 
 cc = meson.get_compiler('c')
 all_languages = ['c']
-if add_languages('cpp', required: false, native: false)
+if targetos == 'windows' and add_languages('cpp', required: false, native: false)
   all_languages += ['cpp']
   cxx = meson.get_compiler('cpp')
 endif
@@ -55,16 +55,11 @@ qapi_trace_events = []
 
 bsd_oses = ['gnu/kfreebsd', 'freebsd', 'netbsd', 'openbsd', 'dragonfly', 'darwin']
 supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux']
-supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64',
+supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv32', 'riscv64', 'x86', 'x86_64',
   'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64']
 
 cpu = host_machine.cpu_family()
 
-# Unify riscv* to a single family.
-if cpu in ['riscv32', 'riscv64']
-  cpu = 'riscv'
-endif
-
 target_dirs = config_host['TARGET_DIRS'].split()
 have_linux_user = false
 have_bsd_user = false
@@ -99,6 +94,8 @@ elif cpu == 'x86'
   host_arch = 'i386'
 elif cpu == 'mips64'
   host_arch = 'mips'
+elif cpu in ['riscv32', 'riscv64']
+  host_arch = 'riscv'
 else
   host_arch = cpu
 endif
@@ -113,8 +110,10 @@ elif cpu in ['ppc', 'ppc64']
   kvm_targets = ['ppc-softmmu', 'ppc64-softmmu']
 elif cpu in ['mips', 'mips64']
   kvm_targets = ['mips-softmmu', 'mipsel-softmmu', 'mips64-softmmu', 'mips64el-softmmu']
-elif cpu in ['riscv']
-  kvm_targets = ['riscv32-softmmu', 'riscv64-softmmu']
+elif cpu in ['riscv32']
+  kvm_targets = ['riscv32-softmmu']
+elif cpu in ['riscv64']
+  kvm_targets = ['riscv64-softmmu']
 else
   kvm_targets = []
 endif
diff --git a/os-posix.c b/os-posix.c
index 90ea71725f..cfcb96533c 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -38,6 +38,7 @@
 #include "qemu/cutils.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
+#include "qemu/module.h"
 
 #ifdef CONFIG_LINUX
 #include <sys/prctl.h>
@@ -148,6 +149,7 @@ int os_parse_cmd_args(int index, const char *optarg)
         }
         break;
     case QEMU_OPTION_chroot:
+        warn_report("option is deprecated, use '-run-with chroot=...' instead");
         chroot_dir = optarg;
         break;
     case QEMU_OPTION_daemonize:
@@ -158,18 +160,25 @@ int os_parse_cmd_args(int index, const char *optarg)
     case QEMU_OPTION_asyncteardown:
         init_async_teardown();
         break;
+#endif
     case QEMU_OPTION_run_with: {
+        const char *str;
         QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"),
                                                  optarg, false);
         if (!opts) {
             exit(1);
         }
+#if defined(CONFIG_LINUX)
         if (qemu_opt_get_bool(opts, "async-teardown", false)) {
             init_async_teardown();
         }
+#endif
+        str = qemu_opt_get(opts, "chroot");
+        if (str) {
+            chroot_dir = str;
+        }
         break;
     }
-#endif
     default:
         return -1;
     }
@@ -348,3 +357,27 @@ int os_mlock(void)
     return -ENOSYS;
 #endif
 }
+
+static QemuOptsList qemu_run_with_opts = {
+    .name = "run-with",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head),
+    .desc = {
+#if defined(CONFIG_LINUX)
+        {
+            .name = "async-teardown",
+            .type = QEMU_OPT_BOOL,
+        },
+#endif
+        {
+            .name = "chroot",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+static void register_runwith(void)
+{
+    qemu_add_opts(&qemu_run_with_opts);
+}
+opts_init(register_runwith);
diff --git a/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin b/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
index 6a8425885c..7b6c67e0ae 100644
--- a/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
+++ b/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
Binary files differdiff --git a/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin b/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
index 80bdbf2170..1b831b412c 100644
--- a/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
+++ b/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
Binary files differdiff --git a/qemu-options.hx b/qemu-options.hx
index 96087505b2..f8f384e551 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4677,11 +4677,12 @@ ERST
 
 #ifndef _WIN32
 DEF("chroot", HAS_ARG, QEMU_OPTION_chroot, \
-    "-chroot dir     chroot to dir just before starting the VM\n",
+    "-chroot dir     chroot to dir just before starting the VM (deprecated)\n",
     QEMU_ARCH_ALL)
 #endif
 SRST
 ``-chroot dir``
+    Deprecated, use '-run-with chroot=...' instead.
     Immediately before starting guest execution, chroot to the specified
     directory. Especially useful in combination with -runas.
 ERST
@@ -4868,13 +4869,16 @@ SRST
     This option is deprecated and should no longer be used. The new option
     ``-run-with async-teardown=on`` is a replacement.
 ERST
+#endif
+#ifdef CONFIG_POSIX
 DEF("run-with", HAS_ARG, QEMU_OPTION_run_with,
-    "-run-with async-teardown[=on|off]\n"
-    "                misc QEMU process lifecycle options\n"
-    "                async-teardown=on enables asynchronous teardown\n",
+    "-run-with [async-teardown=on|off][,chroot=dir]\n"
+    "                Set miscellaneous QEMU process lifecycle options:\n"
+    "                async-teardown=on enables asynchronous teardown (Linux only)\n"
+    "                chroot=dir chroot to dir just before starting the VM\n",
     QEMU_ARCH_ALL)
 SRST
-``-run-with``
+``-run-with [async-teardown=on|off][,chroot=dir]``
     Set QEMU process lifecycle options.
 
     ``async-teardown=on`` enables asynchronous teardown. A new process called
@@ -4887,6 +4891,10 @@ SRST
     performed correctly. This only works if the cleanup process is not
     forcefully killed with SIGKILL before the main QEMU process has
     terminated completely.
+
+    ``chroot=dir`` can be used for doing a chroot to the specified directory
+    immediately before starting the guest execution. This is especially useful
+    in combination with -runas.
 ERST
 #endif
 
diff --git a/qga/main.c b/qga/main.c
index 2b992a55b3..002161a0cc 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -87,6 +87,7 @@ struct GAState {
     bool delimit_response;
     bool frozen;
     GList *blockedrpcs;
+    GList *allowedrpcs;
     char *state_filepath_isfrozen;
     struct {
         const char *log_filepath;
@@ -261,6 +262,8 @@ QEMU_COPYRIGHT "\n"
 #endif
 "  -b, --block-rpcs  comma-separated list of RPCs to disable (no spaces,\n"
 "                    use \"help\" to list available RPCs)\n"
+"  -a, --allow-rpcs  comma-separated list of RPCs to enable (no spaces,\n"
+"                    use \"help\" to list available RPCs)\n"
 "  -D, --dump-conf   dump a qemu-ga config file based on current config\n"
 "                    options / command-line parameters to stdout\n"
 "  -r, --retry-path  attempt re-opening path if it's unavailable or closed\n"
@@ -395,7 +398,7 @@ static gint ga_strcmp(gconstpointer str1, gconstpointer str2)
 }
 
 /* disable commands that aren't safe for fsfreeze */
-static void ga_disable_not_allowed(const QmpCommand *cmd, void *opaque)
+static void ga_disable_not_allowed_freeze(const QmpCommand *cmd, void *opaque)
 {
     bool allowed = false;
     int i = 0;
@@ -416,16 +419,38 @@ static void ga_disable_not_allowed(const QmpCommand *cmd, void *opaque)
 /* [re-]enable all commands, except those explicitly blocked by user */
 static void ga_enable_non_blocked(const QmpCommand *cmd, void *opaque)
 {
-    GList *blockedrpcs = opaque;
+    GAState *s = opaque;
+    GList *blockedrpcs = s->blockedrpcs;
+    GList *allowedrpcs = s->allowedrpcs;
     const char *name = qmp_command_name(cmd);
 
-    if (g_list_find_custom(blockedrpcs, name, ga_strcmp) == NULL &&
-        !qmp_command_is_enabled(cmd)) {
+    if (g_list_find_custom(blockedrpcs, name, ga_strcmp) == NULL) {
+        if (qmp_command_is_enabled(cmd)) {
+            return;
+        }
+
+        if (allowedrpcs &&
+            g_list_find_custom(allowedrpcs, name, ga_strcmp) == NULL) {
+            return;
+        }
+
         g_debug("enabling command: %s", name);
         qmp_enable_command(&ga_commands, name);
     }
 }
 
+/* disable commands that aren't allowed */
+static void ga_disable_not_allowed(const QmpCommand *cmd, void *opaque)
+{
+    GList *allowedrpcs = opaque;
+    const char *name = qmp_command_name(cmd);
+
+    if (g_list_find_custom(allowedrpcs, name, ga_strcmp) == NULL) {
+        g_debug("disabling command: %s", name);
+        qmp_disable_command(&ga_commands, name, "the command is not allowed");
+    }
+}
+
 static bool ga_create_file(const char *path)
 {
     int fd = open(path, O_CREAT | O_WRONLY, S_IWUSR | S_IRUSR);
@@ -459,7 +484,7 @@ void ga_set_frozen(GAState *s)
         return;
     }
     /* disable all forbidden (for frozen state) commands */
-    qmp_for_each_command(&ga_commands, ga_disable_not_allowed, NULL);
+    qmp_for_each_command(&ga_commands, ga_disable_not_allowed_freeze, NULL);
     g_warning("disabling logging due to filesystem freeze");
     ga_disable_logging(s);
     s->frozen = true;
@@ -497,8 +522,8 @@ void ga_unset_frozen(GAState *s)
         s->deferred_options.pid_filepath = NULL;
     }
 
-    /* enable all disabled, non-blocked commands */
-    qmp_for_each_command(&ga_commands, ga_enable_non_blocked, s->blockedrpcs);
+    /* enable all disabled, non-blocked and allowed commands */
+    qmp_for_each_command(&ga_commands, ga_enable_non_blocked, s);
     s->frozen = false;
     if (!ga_delete_file(s->state_filepath_isfrozen)) {
         g_warning("unable to delete %s, fsfreeze may not function properly",
@@ -984,7 +1009,9 @@ struct GAConfig {
     const char *service;
 #endif
     gchar *bliststr; /* blockedrpcs may point to this string */
+    gchar *aliststr; /* allowedrpcs may point to this string */
     GList *blockedrpcs;
+    GList *allowedrpcs;
     int daemonize;
     GLogLevelFlags log_level;
     int dumpconf;
@@ -1055,6 +1082,19 @@ static void config_load(GAConfig *config)
         config->blockedrpcs = g_list_concat(config->blockedrpcs,
                                           split_list(config->bliststr, ","));
     }
+    if (g_key_file_has_key(keyfile, "general", "allow-rpcs", NULL)) {
+        config->aliststr =
+            g_key_file_get_string(keyfile, "general", "allow-rpcs", &gerr);
+        config->allowedrpcs = g_list_concat(config->allowedrpcs,
+                                          split_list(config->aliststr, ","));
+    }
+
+    if (g_key_file_has_key(keyfile, "general", blockrpcs_key, NULL) &&
+        g_key_file_has_key(keyfile, "general", "allow-rpcs", NULL)) {
+        g_critical("wrong config, using 'block-rpcs' and 'allow-rpcs' keys at"
+                   " the same time is not allowed");
+        exit(EXIT_FAILURE);
+    }
 
 end:
     g_key_file_free(keyfile);
@@ -1115,6 +1155,9 @@ static void config_dump(GAConfig *config)
     tmp = list_join(config->blockedrpcs, ',');
     g_key_file_set_string(keyfile, "general", "block-rpcs", tmp);
     g_free(tmp);
+    tmp = list_join(config->allowedrpcs, ',');
+    g_key_file_set_string(keyfile, "general", "allow-rpcs", tmp);
+    g_free(tmp);
 
     tmp = g_key_file_to_data(keyfile, NULL, &error);
     if (error) {
@@ -1130,8 +1173,9 @@ static void config_dump(GAConfig *config)
 
 static void config_parse(GAConfig *config, int argc, char **argv)
 {
-    const char *sopt = "hVvdm:p:l:f:F::b:s:t:Dr";
+    const char *sopt = "hVvdm:p:l:f:F::b:a:s:t:Dr";
     int opt_ind = 0, ch;
+    bool block_rpcs = false, allow_rpcs = false;
     const struct option lopt[] = {
         { "help", 0, NULL, 'h' },
         { "version", 0, NULL, 'V' },
@@ -1147,6 +1191,7 @@ static void config_parse(GAConfig *config, int argc, char **argv)
         { "daemonize", 0, NULL, 'd' },
         { "block-rpcs", 1, NULL, 'b' },
         { "blacklist", 1, NULL, 'b' },  /* deprecated alias for 'block-rpcs' */
+        { "allow-rpcs", 1, NULL, 'a' },
 #ifdef _WIN32
         { "service", 1, NULL, 's' },
 #endif
@@ -1206,6 +1251,17 @@ static void config_parse(GAConfig *config, int argc, char **argv)
             }
             config->blockedrpcs = g_list_concat(config->blockedrpcs,
                                                 split_list(optarg, ","));
+            block_rpcs = true;
+            break;
+        }
+        case 'a': {
+            if (is_help_option(optarg)) {
+                qmp_for_each_command(&ga_commands, ga_print_cmd, NULL);
+                exit(EXIT_SUCCESS);
+            }
+            config->allowedrpcs = g_list_concat(config->allowedrpcs,
+                                                split_list(optarg, ","));
+            allow_rpcs = true;
             break;
         }
 #ifdef _WIN32
@@ -1246,6 +1302,12 @@ static void config_parse(GAConfig *config, int argc, char **argv)
             exit(EXIT_FAILURE);
         }
     }
+
+    if (block_rpcs && allow_rpcs) {
+        g_critical("wrong commandline, using --block-rpcs and --allow-rpcs at the"
+                   " same time is not allowed");
+        exit(EXIT_FAILURE);
+    }
 }
 
 static void config_free(GAConfig *config)
@@ -1256,10 +1318,12 @@ static void config_free(GAConfig *config)
     g_free(config->state_dir);
     g_free(config->channel_path);
     g_free(config->bliststr);
+    g_free(config->aliststr);
 #ifdef CONFIG_FSFREEZE
     g_free(config->fsfreeze_hook);
 #endif
     g_list_free_full(config->blockedrpcs, g_free);
+    g_list_free_full(config->allowedrpcs, g_free);
     g_free(config);
 }
 
@@ -1350,7 +1414,7 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation)
             s->deferred_options.log_filepath = config->log_filepath;
         }
         ga_disable_logging(s);
-        qmp_for_each_command(&ga_commands, ga_disable_not_allowed, NULL);
+        qmp_for_each_command(&ga_commands, ga_disable_not_allowed_freeze, NULL);
     } else {
         if (config->daemonize) {
             become_daemon(config->pid_filepath);
@@ -1374,6 +1438,15 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation)
         return NULL;
     }
 
+    if (config->allowedrpcs) {
+        qmp_for_each_command(&ga_commands, ga_disable_not_allowed, config->allowedrpcs);
+        s->allowedrpcs = config->allowedrpcs;
+    }
+
+    /*
+     * Some commands can be blocked due to system limitation.
+     * Initialize blockedrpcs list even if allowedrpcs specified.
+     */
     config->blockedrpcs = ga_command_init_blockedrpcs(config->blockedrpcs);
     if (config->blockedrpcs) {
         GList *l = config->blockedrpcs;
diff --git a/qga/vss-win32/install.cpp b/qga/vss-win32/install.cpp
index ff93b08a9e..ae38662a62 100644
--- a/qga/vss-win32/install.cpp
+++ b/qga/vss-win32/install.cpp
@@ -13,6 +13,7 @@
 #include "qemu/osdep.h"
 
 #include "vss-common.h"
+#include "vss-debug.h"
 #ifdef HAVE_VSS_SDK
 #include <vscoordint.h>
 #else
@@ -54,7 +55,7 @@ void errmsg(DWORD err, const char *text)
                   FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
                   NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
                   (char *)&msg, 0, NULL);
-    fprintf(stderr, "%.*s. (Error: %lx) %s\n", len, text, err, msg);
+    qga_debug("%.*s. (Error: %lx) %s", len, text, err, msg);
     LocalFree(msg);
 }
 
@@ -99,6 +100,8 @@ HRESULT put_Value(ICatalogObject *pObj, LPCWSTR name, T val)
 /* Lookup Administrators group name from winmgmt */
 static HRESULT GetAdminName(_bstr_t *name)
 {
+    qga_debug_begin;
+
     HRESULT hr;
     COMPointer<IWbemLocator> pLoc;
     COMPointer<IWbemServices> pSvc;
@@ -141,6 +144,7 @@ static HRESULT GetAdminName(_bstr_t *name)
     }
 
 out:
+    qga_debug_end;
     return hr;
 }
 
@@ -148,6 +152,8 @@ out:
 static HRESULT getNameByStringSID(
     const wchar_t *sid, LPWSTR buffer, LPDWORD bufferLen)
 {
+    qga_debug_begin;
+
     HRESULT hr = S_OK;
     PSID psid = NULL;
     SID_NAME_USE groupType;
@@ -167,6 +173,7 @@ static HRESULT getNameByStringSID(
     LocalFree(psid);
 
 out:
+    qga_debug_end;
     return hr;
 }
 
@@ -174,6 +181,8 @@ out:
 static HRESULT QGAProviderFind(
     HRESULT (*found)(ICatalogCollection *, int, void *), void *arg)
 {
+    qga_debug_begin;
+
     HRESULT hr;
     COMInitializer initializer;
     COMPointer<IUnknown> pUnknown;
@@ -204,41 +213,53 @@ static HRESULT QGAProviderFind(
     chk(pColl->SaveChanges(&n));
 
 out:
+    qga_debug_end;
     return hr;
 }
 
 /* Count QGA VSS provider in COM+ Application Catalog */
 static HRESULT QGAProviderCount(ICatalogCollection *coll, int i, void *arg)
 {
+    qga_debug_begin;
+
     (*(int *)arg)++;
+
+    qga_debug_end;
     return S_OK;
 }
 
 /* Remove QGA VSS provider from COM+ Application Catalog Collection */
 static HRESULT QGAProviderRemove(ICatalogCollection *coll, int i, void *arg)
 {
+    qga_debug_begin;
     HRESULT hr;
 
-    fprintf(stderr, "Removing COM+ Application: %s\n", QGA_PROVIDER_NAME);
+    qga_debug("Removing COM+ Application: %s", QGA_PROVIDER_NAME);
     chk(coll->Remove(i));
 out:
+    qga_debug_end;
     return hr;
 }
 
 /* Unregister this module from COM+ Applications Catalog */
 STDAPI COMUnregister(void)
 {
+    qga_debug_begin;
+
     HRESULT hr;
 
     DllUnregisterServer();
     chk(QGAProviderFind(QGAProviderRemove, NULL));
 out:
+    qga_debug_end;
     return hr;
 }
 
 /* Register this module to COM+ Applications Catalog */
 STDAPI COMRegister(void)
 {
+    qga_debug_begin;
+
     HRESULT hr;
     COMInitializer initializer;
     COMPointer<IUnknown> pUnknown;
@@ -258,12 +279,14 @@ STDAPI COMRegister(void)
 
     if (!g_hinstDll) {
         errmsg(E_FAIL, "Failed to initialize DLL");
+        qga_debug_end;
         return E_FAIL;
     }
 
     chk(QGAProviderFind(QGAProviderCount, (void *)&count));
     if (count) {
         errmsg(E_ABORT, "QGA VSS Provider is already installed");
+        qga_debug_end;
         return E_ABORT;
     }
 
@@ -304,9 +327,8 @@ STDAPI COMRegister(void)
     }
     strcpy(tlbPath, dllPath);
     strcpy(tlbPath+n-3, "tlb");
-    fprintf(stderr, "Registering " QGA_PROVIDER_NAME ":\n");
-    fprintf(stderr, "  %s\n", dllPath);
-    fprintf(stderr, "  %s\n", tlbPath);
+    qga_debug("Registering " QGA_PROVIDER_NAME ": %s %s",
+              dllPath, tlbPath);
     if (!PathFileExists(tlbPath)) {
         hr = HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND);
         errmsg(hr, "Failed to lookup tlb");
@@ -354,6 +376,7 @@ out:
         COMUnregister();
     }
 
+    qga_debug_end;
     return hr;
 }
 
@@ -369,6 +392,8 @@ STDAPI_(void) CALLBACK DLLCOMUnregister(HWND, HINSTANCE, LPSTR, int)
 
 static BOOL CreateRegistryKey(LPCTSTR key, LPCTSTR value, LPCTSTR data)
 {
+    qga_debug_begin;
+
     HKEY  hKey;
     LONG  ret;
     DWORD size;
@@ -389,6 +414,7 @@ static BOOL CreateRegistryKey(LPCTSTR key, LPCTSTR value, LPCTSTR data)
     RegCloseKey(hKey);
 
 out:
+    qga_debug_end;
     if (ret != ERROR_SUCCESS) {
         /* As we cannot printf within DllRegisterServer(), show a dialog. */
         errmsg_dialog(ret, "Cannot add registry", key);
@@ -400,6 +426,8 @@ out:
 /* Register this dll as a VSS provider */
 STDAPI DllRegisterServer(void)
 {
+    qga_debug_begin;
+
     COMInitializer initializer;
     COMPointer<IVssAdmin> pVssAdmin;
     HRESULT hr = E_FAIL;
@@ -478,12 +506,15 @@ out:
         DllUnregisterServer();
     }
 
+    qga_debug_end;
     return hr;
 }
 
 /* Unregister this VSS hardware provider from the system */
 STDAPI DllUnregisterServer(void)
 {
+    qga_debug_begin;
+
     TCHAR key[256];
     COMInitializer initializer;
     COMPointer<IVssAdmin> pVssAdmin;
@@ -501,6 +532,7 @@ STDAPI DllUnregisterServer(void)
     SHDeleteKey(HKEY_CLASSES_ROOT, key);
     SHDeleteKey(HKEY_CLASSES_ROOT, g_szProgid);
 
+    qga_debug_end;
     return S_OK; /* Uninstall should never fail */
 }
 
@@ -517,7 +549,7 @@ namespace _com_util
         }
 
         if (mbstowcs(bstr, ascii, len) == (size_t)-1) {
-            fprintf(stderr, "Failed to convert string '%s' into BSTR", ascii);
+            qga_debug("Failed to convert string '%s' into BSTR", ascii);
             bstr[0] = 0;
         }
         return bstr;
@@ -527,6 +559,8 @@ namespace _com_util
 /* Stop QGA VSS provider service using Winsvc API  */
 STDAPI StopService(void)
 {
+    qga_debug_begin;
+
     HRESULT hr = S_OK;
     SC_HANDLE manager = OpenSCManager(NULL, NULL, SC_MANAGER_ALL_ACCESS);
     SC_HANDLE service = NULL;
@@ -551,5 +585,6 @@ STDAPI StopService(void)
 out:
     CloseServiceHandle(service);
     CloseServiceHandle(manager);
+    qga_debug_end;
     return hr;
 }
diff --git a/qga/vss-win32/meson.build b/qga/vss-win32/meson.build
index 9483ccd3b8..0ac918910b 100644
--- a/qga/vss-win32/meson.build
+++ b/qga/vss-win32/meson.build
@@ -7,7 +7,7 @@ link_args = cc.get_supported_link_arguments([
 
 qga_vss = shared_module(
   'qga-vss',
-  ['requester.cpp', 'provider.cpp', 'install.cpp', genh],
+  ['requester.cpp', 'provider.cpp', 'install.cpp', 'vss-debug.cpp', genh],
   name_prefix: '',
   cpp_args: ['-Wno-unknown-pragmas', '-Wno-delete-non-virtual-dtor', '-Wno-non-virtual-dtor'],
   link_args: link_args,
diff --git a/qga/vss-win32/provider.cpp b/qga/vss-win32/provider.cpp
index 1b885e24ee..cc72e5ef1b 100644
--- a/qga/vss-win32/provider.cpp
+++ b/qga/vss-win32/provider.cpp
@@ -12,6 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "vss-common.h"
+#include "vss-debug.h"
 #ifdef HAVE_VSS_SDK
 #include <vscoordint.h>
 #else
@@ -529,9 +530,11 @@ STDAPI DllCanUnloadNow()
 EXTERN_C
 BOOL WINAPI DllMain(HINSTANCE hinstDll, DWORD dwReason, LPVOID lpReserved)
 {
+    qga_debug("begin, reason = %lu", dwReason);
     if (dwReason == DLL_PROCESS_ATTACH) {
         g_hinstDll = hinstDll;
         DisableThreadLibraryCalls(hinstDll);
     }
+    qga_debug_end;
     return TRUE;
 }
diff --git a/qga/vss-win32/requester.cpp b/qga/vss-win32/requester.cpp
index 3e998af4a8..9884c65e70 100644
--- a/qga/vss-win32/requester.cpp
+++ b/qga/vss-win32/requester.cpp
@@ -12,6 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "vss-common.h"
+#include "vss-debug.h"
 #include "requester.h"
 #include "install.h"
 #include <vswriter.h>
@@ -25,9 +26,11 @@
 
 #define DEFAULT_VSS_BACKUP_TYPE VSS_BT_FULL
 
-#define err_set(e, err, fmt, ...)                                           \
-    ((e)->error_setg_win32_wrapper((e)->errp, __FILE__, __LINE__, __func__, \
-                                   err, fmt, ## __VA_ARGS__))
+#define err_set(e, err, fmt, ...) {                                         \
+    (e)->error_setg_win32_wrapper((e)->errp, __FILE__, __LINE__, __func__,  \
+                                   err, fmt, ## __VA_ARGS__);               \
+    qga_debug(fmt, ## __VA_ARGS__);                                         \
+}
 /* Bad idea, works only when (e)->errp != NULL: */
 #define err_is_set(e) ((e)->errp && *(e)->errp)
 /* To lift this restriction, error_propagate(), like we do in QEMU code */
@@ -54,18 +57,20 @@ static struct QGAVSSContext {
 
 STDAPI requester_init(void)
 {
+    qga_debug_begin;
+
     COMInitializer initializer; /* to call CoInitializeSecurity */
     HRESULT hr = CoInitializeSecurity(
         NULL, -1, NULL, NULL, RPC_C_AUTHN_LEVEL_PKT_PRIVACY,
         RPC_C_IMP_LEVEL_IDENTIFY, NULL, EOAC_NONE, NULL);
     if (FAILED(hr)) {
-        fprintf(stderr, "failed to CoInitializeSecurity (error %lx)\n", hr);
+        qga_debug("failed to CoInitializeSecurity (error %lx)", hr);
         return hr;
     }
 
     hLib = LoadLibraryA("VSSAPI.DLL");
     if (!hLib) {
-        fprintf(stderr, "failed to load VSSAPI.DLL\n");
+        qga_debug("failed to load VSSAPI.DLL");
         return HRESULT_FROM_WIN32(GetLastError());
     }
 
@@ -78,22 +83,25 @@ STDAPI requester_init(void)
 #endif
         );
     if (!pCreateVssBackupComponents) {
-        fprintf(stderr, "failed to get proc address from VSSAPI.DLL\n");
+        qga_debug("failed to get proc address from VSSAPI.DLL");
         return HRESULT_FROM_WIN32(GetLastError());
     }
 
     pVssFreeSnapshotProperties = (t_VssFreeSnapshotProperties)
         GetProcAddress(hLib, "VssFreeSnapshotProperties");
     if (!pVssFreeSnapshotProperties) {
-        fprintf(stderr, "failed to get proc address from VSSAPI.DLL\n");
+        qga_debug("failed to get proc address from VSSAPI.DLL");
         return HRESULT_FROM_WIN32(GetLastError());
     }
 
+    qga_debug_end;
     return S_OK;
 }
 
 static void requester_cleanup(void)
 {
+    qga_debug_begin;
+
     if (vss_ctx.hEventFrozen) {
         CloseHandle(vss_ctx.hEventFrozen);
         vss_ctx.hEventFrozen = NULL;
@@ -115,10 +123,13 @@ static void requester_cleanup(void)
         vss_ctx.pVssbc = NULL;
     }
     vss_ctx.cFrozenVols = 0;
+    qga_debug_end;
 }
 
 STDAPI requester_deinit(void)
 {
+    qga_debug_begin;
+
     requester_cleanup();
 
     pCreateVssBackupComponents = NULL;
@@ -128,11 +139,14 @@ STDAPI requester_deinit(void)
         hLib = NULL;
     }
 
+    qga_debug_end;
     return S_OK;
 }
 
 static HRESULT WaitForAsync(IVssAsync *pAsync)
 {
+    qga_debug_begin;
+
     HRESULT ret, hr;
 
     do {
@@ -148,11 +162,14 @@ static HRESULT WaitForAsync(IVssAsync *pAsync)
         }
     } while (ret == VSS_S_ASYNC_PENDING);
 
+    qga_debug_end;
     return ret;
 }
 
 static void AddComponents(ErrorSet *errset)
 {
+    qga_debug_begin;
+
     unsigned int cWriters, i;
     VSS_ID id, idInstance, idWriter;
     BSTR bstrWriterName = NULL;
@@ -234,17 +251,21 @@ out:
     if (pComponent && info) {
         pComponent->FreeComponentInfo(info);
     }
+    qga_debug_end;
 }
 
 DWORD get_reg_dword_value(HKEY baseKey, LPCSTR subKey, LPCSTR valueName,
                           DWORD defaultData)
 {
+    qga_debug_begin;
+
     DWORD regGetValueError;
     DWORD dwordData;
     DWORD dataSize = sizeof(DWORD);
 
     regGetValueError = RegGetValue(baseKey, subKey, valueName, RRF_RT_DWORD,
                                    NULL, &dwordData, &dataSize);
+    qga_debug_end;
     if (regGetValueError  != ERROR_SUCCESS) {
         return defaultData;
     }
@@ -259,6 +280,8 @@ bool is_valid_vss_backup_type(VSS_BACKUP_TYPE vssBT)
 VSS_BACKUP_TYPE get_vss_backup_type(
     VSS_BACKUP_TYPE defaultVssBT = DEFAULT_VSS_BACKUP_TYPE)
 {
+    qga_debug_begin;
+
     VSS_BACKUP_TYPE vssBackupType;
 
     vssBackupType = static_cast<VSS_BACKUP_TYPE>(
@@ -266,6 +289,7 @@ VSS_BACKUP_TYPE get_vss_backup_type(
                                                 QGA_PROVIDER_REGISTRY_ADDRESS,
                                                 "VssOption",
                                                 defaultVssBT));
+    qga_debug_end;
     if (!is_valid_vss_backup_type(vssBackupType)) {
         return defaultVssBT;
     }
@@ -274,6 +298,8 @@ VSS_BACKUP_TYPE get_vss_backup_type(
 
 void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset)
 {
+    qga_debug_begin;
+
     COMPointer<IVssAsync> pAsync;
     HANDLE volume;
     HRESULT hr;
@@ -289,6 +315,7 @@ void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset)
 
     if (vss_ctx.pVssbc) { /* already frozen */
         *num_vols = 0;
+        qga_debug("finished, already frozen");
         return;
     }
 
@@ -446,6 +473,7 @@ void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset)
         }
     }
 
+    qga_debug("preparing for backup");
     hr = vss_ctx.pVssbc->PrepareForBackup(pAsync.replace());
     if (SUCCEEDED(hr)) {
         hr = WaitForAsync(pAsync);
@@ -469,6 +497,7 @@ void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset)
      * CQGAVssProvider::CommitSnapshots will kick vss_ctx.hEventFrozen
      * after the applications and filesystems are frozen.
      */
+    qga_debug("do snapshot set");
     hr = vss_ctx.pVssbc->DoSnapshotSet(&vss_ctx.pAsyncSnapshot);
     if (FAILED(hr)) {
         err_set(errset, hr, "failed to do snapshot set");
@@ -515,6 +544,7 @@ void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset)
         *num_vols = vss_ctx.cFrozenVols = num_fixed_drives;
     }
 
+    qga_debug("end successful");
     return;
 
 out:
@@ -525,11 +555,14 @@ out:
 out1:
     requester_cleanup();
     CoUninitialize();
+
+    qga_debug_end;
 }
 
 
 void requester_thaw(int *num_vols, void *mountpints, ErrorSet *errset)
 {
+    qga_debug_begin;
     COMPointer<IVssAsync> pAsync;
 
     if (!vss_ctx.hEventThaw) {
@@ -538,6 +571,8 @@ void requester_thaw(int *num_vols, void *mountpints, ErrorSet *errset)
          * and no volumes must be frozen. We return without an error.
          */
         *num_vols = 0;
+        qga_debug("finished, no volumes were frozen");
+
         return;
     }
 
@@ -594,4 +629,6 @@ void requester_thaw(int *num_vols, void *mountpints, ErrorSet *errset)
 
     CoUninitialize();
     StopService();
+
+    qga_debug_end;
 }
diff --git a/qga/vss-win32/vss-debug.cpp b/qga/vss-win32/vss-debug.cpp
new file mode 100644
index 0000000000..820b1c6667
--- /dev/null
+++ b/qga/vss-win32/vss-debug.cpp
@@ -0,0 +1,39 @@
+/*
+ * QEMU Guest Agent VSS debug declarations
+ *
+ * Copyright (C) 2023 Red Hat Inc
+ *
+ * Authors:
+ *  Konstantin Kostiuk <kkostiuk@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "vss-debug.h"
+#include "vss-common.h"
+
+void qga_debug_internal(const char *funcname, const char *fmt, ...)
+{
+    char user_string[512] = {0};
+    char full_string[640] = {0};
+
+    va_list args;
+    va_start(args, fmt);
+    if (vsnprintf(user_string, _countof(user_string), fmt, args) <= 0) {
+        va_end(args);
+        return;
+    }
+
+    va_end(args);
+
+    if (snprintf(full_string, _countof(full_string),
+                 QGA_PROVIDER_NAME "[%lu]: %s %s\n",
+                 GetCurrentThreadId(), funcname, user_string) <= 0) {
+        return;
+    }
+
+    OutputDebugString(full_string);
+    fputs(full_string, stderr);
+}
diff --git a/qga/vss-win32/vss-debug.h b/qga/vss-win32/vss-debug.h
new file mode 100644
index 0000000000..7800457392
--- /dev/null
+++ b/qga/vss-win32/vss-debug.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU Guest Agent VSS debug declarations
+ *
+ * Copyright (C) 2023 Red Hat Inc
+ *
+ * Authors:
+ *  Konstantin Kostiuk <kkostiuk@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include <vss-handles.h>
+
+#ifndef VSS_DEBUG_H
+#define VSS_DEBUG_H
+
+void qga_debug_internal(const char *funcname, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
+
+#define qga_debug(fmt, ...) qga_debug_internal(__func__, fmt, ## __VA_ARGS__)
+#define qga_debug_begin qga_debug("begin")
+#define qga_debug_end qga_debug("end")
+
+#endif
diff --git a/roms/opensbi b/roms/opensbi
-Subproject 6b5188ca14e59ce7bf71afe4e7d3d557c3d31bf
+Subproject 2552799a1df30a3dcd2321a8b75d61d06f5fb9f
diff --git a/target/mips/cpu-defs.c.inc b/target/mips/cpu-defs.c.inc
index d45f245a67..03185d9aa0 100644
--- a/target/mips/cpu-defs.c.inc
+++ b/target/mips/cpu-defs.c.inc
@@ -118,6 +118,26 @@ const mips_def_t mips_defs[] =
         .mmu_type = MMU_TYPE_R4000,
     },
     {
+        .name = "XBurstR1",
+        .CP0_PRid = 0x1ed0024f,
+        .CP0_Config0 = MIPS_CONFIG0 | (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (15 << CP0C1_MMU) |
+                       (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (0 << CP0C1_CA),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3,
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 4,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x1278FF17,
+        .SEGBITS = 32,
+        .PABITS = 32,
+        .insn_flags = CPU_MIPS32R1 | ASE_MXU,
+        .mmu_type = MMU_TYPE_R4000,
+    },
+    {
         .name = "4KEmR1",
         .CP0_PRid = 0x00018500,
         .CP0_Config0 = MIPS_CONFIG0 | (MMU_TYPE_FMT << CP0C0_MT),
@@ -324,6 +344,32 @@ const mips_def_t mips_defs[] =
         .mmu_type = MMU_TYPE_R4000,
     },
     {
+        .name = "XBurstR2",
+        .CP0_PRid = 0x2ed1024f,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+                    (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (15 << CP0C1_MMU) |
+                       (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_DSP2P) | (1 << CP0C3_DSPP) |
+                       (1 << CP0C3_VInt),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 4,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x3778FF1F,
+        .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_L) | (1 << FCR0_W) |
+                    (1 << FCR0_D) | (1 << FCR0_S) | (0x93 << FCR0_PRID),
+        .CP1_fcr31 = 0,
+        .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
+        .SEGBITS = 32,
+        .PABITS = 32,
+        .insn_flags = CPU_MIPS32R2 | ASE_MXU,
+        .mmu_type = MMU_TYPE_R4000,
+    },
+    {
         .name = "M14K",
         .CP0_PRid = 0x00019b00,
         /* Config1 implemented, fixed mapping MMU,
@@ -709,7 +755,7 @@ const mips_def_t mips_defs[] =
         .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) |
                        (1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist),
         .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) |
-                       (1 << CP0C5_LLB) | (1 << CP0C5_MRP),
+                       (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI),
         .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) |
                                   (1 << CP0C5_FRE) | (1 << CP0C5_UFE),
         .CP0_LLAddr_rw_bitmask = 0,
@@ -749,7 +795,7 @@ const mips_def_t mips_defs[] =
         .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) |
                        (1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist),
         .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) |
-                       (1 << CP0C5_LLB) | (1 << CP0C5_MRP),
+                       (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI),
         .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) |
                                   (1 << CP0C5_FRE) | (1 << CP0C5_UFE),
         .CP0_LLAddr_rw_bitmask = 0,
@@ -895,6 +941,15 @@ const mips_def_t mips_defs[] =
         .CP1_fcr31 = 0,
         .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
         .MSAIR = (0x01 << MSAIR_ProcID) | (0x40 << MSAIR_Rev),
+        .lcsr_cpucfg1 = (1 << CPUCFG1_FP) | (2 << CPUCFG1_FPREV) |
+                    (1 << CPUCFG1_MSA1) | (1 << CPUCFG1_LSLDR0) |
+                    (1 << CPUCFG1_LSPERF) | (1 << CPUCFG1_LSPERFX) |
+                    (1 << CPUCFG1_LSSYNCI) | (1 << CPUCFG1_LLEXC) |
+                    (1 << CPUCFG1_SCRAND) | (1 << CPUCFG1_MUALP) |
+                    (1 << CPUCFG1_KMUALEN) | (1 << CPUCFG1_ITLBT) |
+                    (1 << CPUCFG1_SFBP) | (1 << CPUCFG1_CDMAP),
+        .lcsr_cpucfg2 = (1 << CPUCFG2_LEXT1) | (1 << CPUCFG2_LCSRP) |
+                    (1 << CPUCFG2_LDISBLIKELY),
         .SEGBITS = 48,
         .PABITS = 48,
         .insn_flags = CPU_MIPS64R2 | INSN_LOONGSON3A |
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index 01e0fbe10d..63da1948fd 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -244,6 +244,8 @@ static void mips_cpu_reset_hold(Object *obj)
     env->CP0_PageGrain_rw_bitmask = env->cpu_model->CP0_PageGrain_rw_bitmask;
     env->CP0_PageGrain = env->cpu_model->CP0_PageGrain;
     env->CP0_EBaseWG_rw_bitmask = env->cpu_model->CP0_EBaseWG_rw_bitmask;
+    env->lcsr_cpucfg1 = env->cpu_model->lcsr_cpucfg1;
+    env->lcsr_cpucfg2 = env->cpu_model->lcsr_cpucfg2;
     env->active_fpu.fcr0 = env->cpu_model->CP1_fcr0;
     env->active_fpu.fcr31_rw_bitmask = env->cpu_model->CP1_fcr31_rw_bitmask;
     env->active_fpu.fcr31 = env->cpu_model->CP1_fcr31;
@@ -449,9 +451,9 @@ static void mips_cp0_period_set(MIPSCPU *cpu)
 {
     CPUMIPSState *env = &cpu->env;
 
-    env->cp0_count_ns = clock_ticks_to_ns(MIPS_CPU(cpu)->clock,
-                                          env->cpu_model->CCRes);
-    assert(env->cp0_count_ns);
+    clock_set_mul_div(cpu->count_div, env->cpu_model->CCRes, 1);
+    clock_set_source(cpu->count_div, cpu->clock);
+    clock_set_source(env->count_clock, cpu->count_div);
 }
 
 static void mips_cpu_realizefn(DeviceState *dev, Error **errp)
@@ -504,7 +506,17 @@ static void mips_cpu_initfn(Object *obj)
 
     cpu_set_cpustate_pointers(cpu);
     cpu->clock = qdev_init_clock_in(DEVICE(obj), "clk-in", NULL, cpu, 0);
+    cpu->count_div = clock_new(OBJECT(obj), "clk-div-count");
+    env->count_clock = clock_new(OBJECT(obj), "clk-count");
     env->cpu_model = mcc->cpu_def;
+#ifndef CONFIG_USER_ONLY
+    if (mcc->cpu_def->lcsr_cpucfg2 & (1 << CPUCFG2_LCSRP)) {
+        memory_region_init_io(&env->iocsr.mr, OBJECT(cpu), NULL,
+                                env, "iocsr", UINT64_MAX);
+        address_space_init(&env->iocsr.as,
+                            &env->iocsr.mr, "IOCSR");
+    }
+#endif
 }
 
 static char *mips_cpu_type_name(const char *cpu_model)
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index a3bc646976..f81bd06f5e 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -3,6 +3,9 @@
 
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
+#ifndef CONFIG_USER_ONLY
+#include "exec/memory.h"
+#endif
 #include "fpu/softfloat-types.h"
 #include "hw/clock.h"
 #include "mips-defs.h"
@@ -1068,6 +1071,33 @@ typedef struct CPUArchState {
  */
     int32_t CP0_DESAVE;
     target_ulong CP0_KScratch[MIPS_KSCRATCH_NUM];
+/*
+ * Loongson CSR CPUCFG registers
+ */
+    uint32_t lcsr_cpucfg1;
+#define CPUCFG1_FP     0
+#define CPUCFG1_FPREV  1
+#define CPUCFG1_MMI    4
+#define CPUCFG1_MSA1   5
+#define CPUCFG1_MSA2   6
+#define CPUCFG1_LSLDR0 16
+#define CPUCFG1_LSPERF 17
+#define CPUCFG1_LSPERFX 18
+#define CPUCFG1_LSSYNCI 19
+#define CPUCFG1_LLEXC   20
+#define CPUCFG1_SCRAND  21
+#define CPUCFG1_MUALP   25
+#define CPUCFG1_KMUALEN 26
+#define CPUCFG1_ITLBT   27
+#define CPUCFG1_SFBP    29
+#define CPUCFG1_CDMAP   30
+    uint32_t lcsr_cpucfg2;
+#define CPUCFG2_LEXT1   0
+#define CPUCFG2_LEXT2   1
+#define CPUCFG2_LEXT3   2
+#define CPUCFG2_LSPW    3
+#define CPUCFG2_LCSRP   27
+#define CPUCFG2_LDISBLIKELY 28
 
     /* We waste some space so we can handle shadow registers like TCs. */
     TCState tcs[MIPS_SHADOW_SET_MAX];
@@ -1156,12 +1186,18 @@ typedef struct CPUArchState {
     void *irq[8];
     struct MIPSITUState *itu;
     MemoryRegion *itc_tag; /* ITC Configuration Tags */
+
+    /* Loongson IOCSR memory */
+    struct {
+        AddressSpace as;
+        MemoryRegion mr;
+    } iocsr;
 #endif
 
     const mips_def_t *cpu_model;
     QEMUTimer *timer; /* Internal timer */
+    Clock *count_clock; /* CP0_Count clock */
     target_ulong exception_base; /* ExceptionBase input to the core */
-    uint64_t cp0_count_ns; /* CP0_Count clock period (in nanoseconds) */
 } CPUMIPSState;
 
 /**
@@ -1178,6 +1214,7 @@ struct ArchCPU {
     /*< public >*/
 
     Clock *clock;
+    Clock *count_div; /* Divider for CP0_Count clock */
     CPUNegativeOffsetState neg;
     CPUMIPSState env;
 };
@@ -1280,6 +1317,12 @@ static inline bool ase_msa_available(CPUMIPSState *env)
     return env->CP0_Config3 & (1 << CP0C3_MSAP);
 }
 
+/* Check presence of Loongson CSR instructions */
+static inline bool ase_lcsr_available(CPUMIPSState *env)
+{
+    return env->lcsr_cpucfg2 & (1 << CPUCFG2_LCSRP);
+}
+
 /* Check presence of multi-threading ASE implementation */
 static inline bool ase_mt_available(CPUMIPSState *env)
 {
diff --git a/target/mips/helper.h b/target/mips/helper.h
index de32d82e98..0f8462febb 100644
--- a/target/mips/helper.h
+++ b/target/mips/helper.h
@@ -196,6 +196,10 @@ DEF_HELPER_1(rdhwr_xnp, tl, env)
 DEF_HELPER_2(pmon, void, env, int)
 DEF_HELPER_1(wait, void, env)
 
+#ifdef TARGET_MIPS64
+DEF_HELPER_FLAGS_2(lcsr_cpucfg, TCG_CALL_NO_RWG_SE, tl, env, tl)
+#endif
+
 /* Loongson multimedia functions.  */
 DEF_HELPER_FLAGS_2(paddsh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(paddush, TCG_CALL_NO_RWG_SE, i64, i64, i64)
diff --git a/target/mips/internal.h b/target/mips/internal.h
index 4b0031d10d..1d0c026c7d 100644
--- a/target/mips/internal.h
+++ b/target/mips/internal.h
@@ -79,6 +79,8 @@ struct mips_def_t {
     int32_t CP0_PageGrain_rw_bitmask;
     int32_t CP0_PageGrain;
     target_ulong CP0_EBaseWG_rw_bitmask;
+    uint32_t lcsr_cpucfg1;
+    uint32_t lcsr_cpucfg2;
     uint64_t insn_flags;
     enum mips_mmu_types mmu_type;
     int32_t SAARP;
diff --git a/target/mips/sysemu/cp0_timer.c b/target/mips/sysemu/cp0_timer.c
index 70de95d338..9d2bcb0dea 100644
--- a/target/mips/sysemu/cp0_timer.c
+++ b/target/mips/sysemu/cp0_timer.c
@@ -28,15 +28,26 @@
 #include "internal.h"
 
 /* MIPS R4K timer */
+static uint32_t cpu_mips_get_count_val(CPUMIPSState *env)
+{
+    int64_t now_ns;
+    now_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    return env->CP0_Count +
+            (uint32_t)clock_ns_to_ticks(env->count_clock, now_ns);
+}
+
 static void cpu_mips_timer_update(CPUMIPSState *env)
 {
     uint64_t now_ns, next_ns;
     uint32_t wait;
 
     now_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    wait = env->CP0_Compare - env->CP0_Count -
-           (uint32_t)(now_ns / env->cp0_count_ns);
-    next_ns = now_ns + (uint64_t)wait * env->cp0_count_ns;
+    wait = env->CP0_Compare - cpu_mips_get_count_val(env);
+    /* Clamp interval to overflow if virtual time had not progressed */
+    if (!wait) {
+        wait = UINT32_MAX;
+    }
+    next_ns = now_ns + clock_ticks_to_ns(env->count_clock, wait);
     timer_mod(env->timer, next_ns);
 }
 
@@ -64,7 +75,7 @@ uint32_t cpu_mips_get_count(CPUMIPSState *env)
             cpu_mips_timer_expire(env);
         }
 
-        return env->CP0_Count + (uint32_t)(now_ns / env->cp0_count_ns);
+        return cpu_mips_get_count_val(env);
     }
 }
 
@@ -79,9 +90,8 @@ void cpu_mips_store_count(CPUMIPSState *env, uint32_t count)
         env->CP0_Count = count;
     } else {
         /* Store new count register */
-        env->CP0_Count = count -
-               (uint32_t)(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
-                          env->cp0_count_ns);
+        env->CP0_Count = count - (uint32_t)clock_ns_to_ticks(env->count_clock,
+                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
         /* Update timer timer */
         cpu_mips_timer_update(env);
     }
@@ -107,8 +117,8 @@ void cpu_mips_start_count(CPUMIPSState *env)
 void cpu_mips_stop_count(CPUMIPSState *env)
 {
     /* Store the current value */
-    env->CP0_Count += (uint32_t)(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
-                                 env->cp0_count_ns);
+    env->CP0_Count += (uint32_t)clock_ns_to_ticks(env->count_clock,
+                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
 }
 
 static void mips_timer_cb(void *opaque)
@@ -121,14 +131,7 @@ static void mips_timer_cb(void *opaque)
         return;
     }
 
-    /*
-     * ??? This callback should occur when the counter is exactly equal to
-     * the comparator value.  Offset the count by one to avoid immediately
-     * retriggering the callback before any virtual time has passed.
-     */
-    env->CP0_Count++;
     cpu_mips_timer_expire(env);
-    env->CP0_Count--;
 }
 
 void cpu_mips_clock_init(MIPSCPU *cpu)
diff --git a/target/mips/tcg/lcsr.decode b/target/mips/tcg/lcsr.decode
new file mode 100644
index 0000000000..960ef8b6f9
--- /dev/null
+++ b/target/mips/tcg/lcsr.decode
@@ -0,0 +1,17 @@
+# Loongson CSR instructions
+#
+# Copyright (C) 2023 Jiaxun Yang <jiaxun.yang@flygoat.com>
+#
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+
+&r           rs rt rd sa
+
+@rs_rd       ...... rs:5 ..... rd:5 ..... ...... &r rt=0 sa=0
+
+CPUCFG       110010 ..... 01000 ..... 00100 011000 @rs_rd
+
+RDCSR        110010 ..... 00000 ..... 00100 011000 @rs_rd
+WRCSR        110010 ..... 00001 ..... 00100 011000 @rs_rd
+DRDCSR       110010 ..... 00010 ..... 00100 011000 @rs_rd
+DWRCSR       110010 ..... 00011 ..... 00100 011000 @rs_rd
diff --git a/target/mips/tcg/lcsr_translate.c b/target/mips/tcg/lcsr_translate.c
new file mode 100644
index 0000000000..9f2a5f4a37
--- /dev/null
+++ b/target/mips/tcg/lcsr_translate.c
@@ -0,0 +1,75 @@
+/*
+ * Loongson CSR instructions translation routines
+ *
+ *  Copyright (c) 2023 Jiaxun Yang <jiaxun.yang@flygoat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "exec/helper-gen.h"
+#include "translate.h"
+
+/* Include the auto-generated decoder.  */
+#include "decode-lcsr.c.inc"
+
+static bool trans_CPUCFG(DisasContext *ctx, arg_CPUCFG *a)
+{
+    TCGv dest = tcg_temp_new();
+    TCGv src1 = tcg_temp_new();
+
+    gen_load_gpr(src1, a->rs);
+    gen_helper_lcsr_cpucfg(dest, cpu_env, src1);
+    gen_store_gpr(dest, a->rd);
+
+    return true;
+}
+
+#ifndef CONFIG_USER_ONLY
+static bool gen_rdcsr(DisasContext *ctx, arg_r *a,
+                        void (*func)(TCGv, TCGv_ptr, TCGv))
+{
+    TCGv dest = tcg_temp_new();
+    TCGv src1 = tcg_temp_new();
+
+    check_cp0_enabled(ctx);
+    gen_load_gpr(src1, a->rs);
+    func(dest, cpu_env, src1);
+    gen_store_gpr(dest, a->rd);
+
+    return true;
+}
+
+static bool gen_wrcsr(DisasContext *ctx, arg_r *a,
+                        void (*func)(TCGv_ptr, TCGv, TCGv))
+{
+    TCGv val = tcg_temp_new();
+    TCGv addr = tcg_temp_new();
+
+    check_cp0_enabled(ctx);
+    gen_load_gpr(addr, a->rs);
+    gen_load_gpr(val, a->rd);
+    func(cpu_env, addr, val);
+
+    return true;
+}
+
+TRANS(RDCSR, gen_rdcsr, gen_helper_lcsr_rdcsr)
+TRANS(DRDCSR, gen_rdcsr, gen_helper_lcsr_drdcsr)
+TRANS(WRCSR, gen_wrcsr, gen_helper_lcsr_wrcsr)
+TRANS(DWRCSR, gen_wrcsr, gen_helper_lcsr_dwrcsr)
+#else
+#define GEN_FALSE_TRANS(name)   \
+static bool trans_##name(DisasContext *ctx, arg_##name * a)  \
+{   \
+    return false;   \
+}
+
+GEN_FALSE_TRANS(RDCSR)
+GEN_FALSE_TRANS(DRDCSR)
+GEN_FALSE_TRANS(WRCSR)
+GEN_FALSE_TRANS(DWRCSR)
+#endif
diff --git a/target/mips/tcg/meson.build b/target/mips/tcg/meson.build
index 7ee969ec8f..ea7fb582f2 100644
--- a/target/mips/tcg/meson.build
+++ b/target/mips/tcg/meson.build
@@ -4,6 +4,7 @@ gen = [
   decodetree.process('tx79.decode', extra_args: '--static-decode=decode_tx79'),
   decodetree.process('vr54xx.decode', extra_args: '--decode=decode_ext_vr54xx'),
   decodetree.process('octeon.decode', extra_args: '--decode=decode_ext_octeon'),
+  decodetree.process('lcsr.decode', extra_args: '--decode=decode_ase_lcsr'),
 ]
 
 mips_ss.add(gen)
@@ -26,6 +27,7 @@ mips_ss.add(files(
 mips_ss.add(when: 'TARGET_MIPS64', if_true: files(
   'tx79_translate.c',
   'octeon_translate.c',
+  'lcsr_translate.c',
 ), if_false: files(
   'mxu_translate.c',
 ))
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
index 39348b3a91..deb8060a17 100644
--- a/target/mips/tcg/mxu_translate.c
+++ b/target/mips/tcg/mxu_translate.c
@@ -237,11 +237,11 @@
  *          ├─ 001100 ─ OPC_MXU_D16MADL
  *          ├─ 001101 ─ OPC_MXU_S16MAD
  *          ├─ 001110 ─ OPC_MXU_Q16ADD
- *          ├─ 001111 ─ OPC_MXU_D16MACE     23
+ *          ├─ 001111 ─ OPC_MXU_D16MACE     20 (13..10 don't care)
  *          │                            ┌─ 0 ─ OPC_MXU_S32LDD
  *          ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
  *          │
- *          │                               23
+ *          │                               20 (13..10 don't care)
  *          ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
  *          │                            └─ 1 ─ OPC_MXU_S32STDR
  *          │
@@ -253,11 +253,11 @@
  *          ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
  *          │                            └─ 0001 ─ OPC_MXU_S32STDVR
  *          │
- *          │                               23
+ *          │                               20 (13..10 don't care)
  *          ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
  *          │                            └─ 1 ─ OPC_MXU_S32LDIR
  *          │
- *          │                               23
+ *          │                               20 (13..10 don't care)
  *          ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
  *          │                            └─ 1 ─ OPC_MXU_S32SDIR
  *          │
@@ -268,7 +268,7 @@
  *          │                               13..10
  *          ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
  *          │                            └─ 0001 ─ OPC_MXU_S32SDIVR
- *          ├─ 011000 ─ OPC_MXU_D32ADD
+ *          ├─ 011000 ─ OPC_MXU_D32ADD  (catches D32ADDC too)
  *          │                               23..22
  *   MXU    ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
  * opcodes ─┤                            ├─ 01 ─ OPC_MXU_D32ACCM
@@ -277,7 +277,7 @@
  *          │                               23..22
  *          ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
  *          │                            ├─ 01 ─ OPC_MXU_Q16ACCM
- *          │                            └─ 10 ─ OPC_MXU_Q16ASUM
+ *          │                            └─ 10 ─ OPC_MXU_D16ASUM
  *          │
  *          │                               23..22
  *          ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
@@ -290,9 +290,9 @@
  *          ├─ 100010 ─ OPC_MXU_S8LDD
  *          ├─ 100011 ─ OPC_MXU_S8STD       15..14
  *          ├─ 100100 ─ OPC_MXU_S8LDI    ┌─ 00 ─ OPC_MXU_S32MUL
- *          ├─ 100101 ─ OPC_MXU_S8SDI    ├─ 00 ─ OPC_MXU_S32MULU
- *          │                            ├─ 00 ─ OPC_MXU_S32EXTR
- *          ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 00 ─ OPC_MXU_S32EXTRV
+ *          ├─ 100101 ─ OPC_MXU_S8SDI    ├─ 01 ─ OPC_MXU_S32MULU
+ *          │                            ├─ 10 ─ OPC_MXU_S32EXTR
+ *          ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV
  *          │
  *          │                               20..18
  *          ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
@@ -304,7 +304,7 @@
  *          │                            ├─ 110 ─ OPC_MXU_S32OR
  *          │                            └─ 111 ─ OPC_MXU_S32XOR
  *          │
- *          │                               7..5
+ *          │                               8..6
  *          ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
  *          │                            ├─ 001 ─ OPC_MXU_LXH
  *          ├─ 101001 ─ <not assigned>   ├─ 011 ─ OPC_MXU_LXW
@@ -318,15 +318,15 @@
  *          ├─ 110001 ─ OPC_MXU_D32SLR      20..18
  *          ├─ 110010 ─ OPC_MXU_D32SARL  ┌─ 000 ─ OPC_MXU_D32SLLV
  *          ├─ 110011 ─ OPC_MXU_D32SAR   ├─ 001 ─ OPC_MXU_D32SLRV
- *          ├─ 110100 ─ OPC_MXU_Q16SLL   ├─ 010 ─ OPC_MXU_D32SARV
- *          ├─ 110101 ─ OPC_MXU_Q16SLR   ├─ 011 ─ OPC_MXU_Q16SLLV
- *          │                            ├─ 100 ─ OPC_MXU_Q16SLRV
- *          ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 101 ─ OPC_MXU_Q16SARV
+ *          ├─ 110100 ─ OPC_MXU_Q16SLL   ├─ 011 ─ OPC_MXU_D32SARV
+ *          ├─ 110101 ─ OPC_MXU_Q16SLR   ├─ 100 ─ OPC_MXU_Q16SLLV
+ *          │                            ├─ 101 ─ OPC_MXU_Q16SLRV
+ *          ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV
  *          │
  *          ├─ 110111 ─ OPC_MXU_Q16SAR
  *          │                               23..22
  *          ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
- *          │                            └─ 01 ─ OPC_MXU_Q8MULSU
+ *          │                            └─ 10 ─ OPC_MXU_Q8MULSU
  *          │
  *          │                               20..18
  *          ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
@@ -353,15 +353,62 @@
  */
 
 enum {
+    OPC_MXU_S32MADD  = 0x00,
+    OPC_MXU_S32MADDU = 0x01,
     OPC_MXU__POOL00  = 0x03,
+    OPC_MXU_S32MSUB  = 0x04,
+    OPC_MXU_S32MSUBU = 0x05,
+    OPC_MXU__POOL01  = 0x06,
+    OPC_MXU__POOL02  = 0x07,
     OPC_MXU_D16MUL   = 0x08,
+    OPC_MXU__POOL03  = 0x09,
     OPC_MXU_D16MAC   = 0x0A,
+    OPC_MXU_D16MACF  = 0x0B,
+    OPC_MXU_D16MADL  = 0x0C,
+    OPC_MXU_S16MAD   = 0x0D,
+    OPC_MXU_Q16ADD   = 0x0E,
+    OPC_MXU_D16MACE  = 0x0F,
     OPC_MXU__POOL04  = 0x10,
+    OPC_MXU__POOL05  = 0x11,
+    OPC_MXU__POOL06  = 0x12,
+    OPC_MXU__POOL07  = 0x13,
+    OPC_MXU__POOL08  = 0x14,
+    OPC_MXU__POOL09  = 0x15,
+    OPC_MXU__POOL10  = 0x16,
+    OPC_MXU__POOL11  = 0x17,
+    OPC_MXU_D32ADD   = 0x18,
+    OPC_MXU__POOL12  = 0x19,
+    OPC_MXU__POOL13  = 0x1B,
+    OPC_MXU__POOL14  = 0x1C,
+    OPC_MXU_Q8ACCE   = 0x1D,
     OPC_MXU_S8LDD    = 0x22,
+    OPC_MXU_S8STD    = 0x23,
+    OPC_MXU_S8LDI    = 0x24,
+    OPC_MXU_S8SDI    = 0x25,
+    OPC_MXU__POOL15  = 0x26,
     OPC_MXU__POOL16  = 0x27,
+    OPC_MXU__POOL17  = 0x28,
+    OPC_MXU_S16LDD   = 0x2A,
+    OPC_MXU_S16STD   = 0x2B,
+    OPC_MXU_S16LDI   = 0x2C,
+    OPC_MXU_S16SDI   = 0x2D,
     OPC_MXU_S32M2I   = 0x2E,
     OPC_MXU_S32I2M   = 0x2F,
+    OPC_MXU_D32SLL   = 0x30,
+    OPC_MXU_D32SLR   = 0x31,
+    OPC_MXU_D32SARL  = 0x32,
+    OPC_MXU_D32SAR   = 0x33,
+    OPC_MXU_Q16SLL   = 0x34,
+    OPC_MXU_Q16SLR   = 0x35,
+    OPC_MXU__POOL18  = 0x36,
+    OPC_MXU_Q16SAR   = 0x37,
     OPC_MXU__POOL19  = 0x38,
+    OPC_MXU__POOL20  = 0x39,
+    OPC_MXU__POOL21  = 0x3A,
+    OPC_MXU_Q16SCOP  = 0x3B,
+    OPC_MXU_Q8MADL   = 0x3C,
+    OPC_MXU_S32SFL   = 0x3D,
+    OPC_MXU_Q8SAD    = 0x3E,
 };
 
 
@@ -375,21 +422,94 @@ enum {
     OPC_MXU_D16MIN   = 0x03,
     OPC_MXU_Q8MAX    = 0x04,
     OPC_MXU_Q8MIN    = 0x05,
+    OPC_MXU_Q8SLT    = 0x06,
+    OPC_MXU_Q8SLTU   = 0x07,
 };
 
 /*
- * MXU pool 04
+ * MXU pool 01
  */
 enum {
-    OPC_MXU_S32LDD   = 0x00,
-    OPC_MXU_S32LDDR  = 0x01,
+    OPC_MXU_S32SLT   = 0x00,
+    OPC_MXU_D16SLT   = 0x01,
+    OPC_MXU_D16AVG   = 0x02,
+    OPC_MXU_D16AVGR  = 0x03,
+    OPC_MXU_Q8AVG    = 0x04,
+    OPC_MXU_Q8AVGR   = 0x05,
+    OPC_MXU_Q8ADD    = 0x07,
+};
+
+/*
+ * MXU pool 02
+ */
+enum {
+    OPC_MXU_S32CPS   = 0x00,
+    OPC_MXU_D16CPS   = 0x02,
+    OPC_MXU_Q8ABD    = 0x04,
+    OPC_MXU_Q16SAT   = 0x06,
+};
+
+/*
+ * MXU pool 03
+ */
+enum {
+    OPC_MXU_D16MULF  = 0x00,
+    OPC_MXU_D16MULE  = 0x01,
+};
+
+/*
+ * MXU pool 04 05 06 07 08 09 10 11
+ */
+enum {
+    OPC_MXU_S32LDST  = 0x00,
+    OPC_MXU_S32LDSTR = 0x01,
+};
+
+/*
+ * MXU pool 12
+ */
+enum {
+    OPC_MXU_D32ACC    = 0x00,
+    OPC_MXU_D32ACCM   = 0x01,
+    OPC_MXU_D32ASUM   = 0x02,
+};
+
+/*
+ * MXU pool 13
+ */
+enum {
+    OPC_MXU_Q16ACC    = 0x00,
+    OPC_MXU_Q16ACCM   = 0x01,
+    OPC_MXU_D16ASUM   = 0x02,
+};
+
+/*
+ * MXU pool 14
+ */
+enum {
+    OPC_MXU_Q8ADDE    = 0x00,
+    OPC_MXU_D8SUM     = 0x01,
+    OPC_MXU_D8SUMC    = 0x02,
+};
+
+/*
+ * MXU pool 15
+ */
+enum {
+    OPC_MXU_S32MUL    = 0x00,
+    OPC_MXU_S32MULU   = 0x01,
+    OPC_MXU_S32EXTR   = 0x02,
+    OPC_MXU_S32EXTRV  = 0x03,
 };
 
 /*
  * MXU pool 16
  */
 enum {
+    OPC_MXU_D32SARW  = 0x00,
+    OPC_MXU_S32ALN   = 0x01,
     OPC_MXU_S32ALNI  = 0x02,
+    OPC_MXU_S32LUI   = 0x03,
     OPC_MXU_S32NOR   = 0x04,
     OPC_MXU_S32AND   = 0x05,
     OPC_MXU_S32OR    = 0x06,
@@ -397,13 +517,57 @@ enum {
 };
 
 /*
+ * MXU pool 17
+ */
+enum {
+    OPC_MXU_LXB      = 0x00,
+    OPC_MXU_LXH      = 0x01,
+    OPC_MXU_LXW      = 0x03,
+    OPC_MXU_LXBU     = 0x04,
+    OPC_MXU_LXHU     = 0x05,
+};
+
+/*
+ * MXU pool 18
+ */
+enum {
+    OPC_MXU_D32SLLV  = 0x00,
+    OPC_MXU_D32SLRV  = 0x01,
+    OPC_MXU_D32SARV  = 0x03,
+    OPC_MXU_Q16SLLV  = 0x04,
+    OPC_MXU_Q16SLRV  = 0x05,
+    OPC_MXU_Q16SARV  = 0x07,
+};
+
+/*
  * MXU pool 19
  */
 enum {
     OPC_MXU_Q8MUL    = 0x00,
-    OPC_MXU_Q8MULSU  = 0x01,
+    OPC_MXU_Q8MULSU  = 0x02,
 };
 
+/*
+ * MXU pool 20
+ */
+enum {
+    OPC_MXU_Q8MOVZ   = 0x00,
+    OPC_MXU_Q8MOVN   = 0x01,
+    OPC_MXU_D16MOVZ  = 0x02,
+    OPC_MXU_D16MOVN  = 0x03,
+    OPC_MXU_S32MOVZ  = 0x04,
+    OPC_MXU_S32MOVN  = 0x05,
+};
+
+/*
+ * MXU pool 21
+ */
+enum {
+    OPC_MXU_Q8MAC    = 0x00,
+    OPC_MXU_Q8MACSU  = 0x02,
+};
+
+
 /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
 #define MXU_APTN1_A    0
 #define MXU_APTN1_S    1
@@ -537,8 +701,11 @@ static void gen_mxu_s32m2i(DisasContext *ctx)
 
 /*
  * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
+ *
+ * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF,
+ * post modify address register
  */
-static void gen_mxu_s8ldd(DisasContext *ctx)
+static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify)
 {
     TCGv t0, t1;
     uint32_t XRa, Rb, s8, optn3;
@@ -553,6 +720,9 @@ static void gen_mxu_s8ldd(DisasContext *ctx)
 
     gen_load_gpr(t0, Rb);
     tcg_gen_addi_tl(t0, t0, (int8_t)s8);
+    if (postmodify) {
+        gen_store_gpr(t0, Rb);
+    }
 
     switch (optn3) {
     /* XRa[7:0] = tmp8 */
@@ -610,9 +780,208 @@ static void gen_mxu_s8ldd(DisasContext *ctx)
 }
 
 /*
- * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
+ * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory
+ *
+ * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory,
+ * post modify address register
  */
-static void gen_mxu_d16mul(DisasContext *ctx)
+static void gen_mxu_s8std(DisasContext *ctx, bool postmodify)
+{
+    TCGv t0, t1;
+    uint32_t XRa, Rb, s8, optn3;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    s8 = extract32(ctx->opcode, 10, 8);
+    optn3 = extract32(ctx->opcode, 18, 3);
+    Rb = extract32(ctx->opcode, 21, 5);
+
+    if (optn3 > 3) {
+        /* reserved, do nothing */
+        return;
+    }
+
+    gen_load_gpr(t0, Rb);
+    tcg_gen_addi_tl(t0, t0, (int8_t)s8);
+    if (postmodify) {
+        gen_store_gpr(t0, Rb);
+    }
+    gen_load_mxu_gpr(t1, XRa);
+
+    switch (optn3) {
+    /* XRa[7:0] => tmp8 */
+    case MXU_OPTN3_PTN0:
+        tcg_gen_extract_tl(t1, t1, 0, 8);
+        break;
+    /* XRa[15:8] => tmp8 */
+    case MXU_OPTN3_PTN1:
+        tcg_gen_extract_tl(t1, t1, 8, 8);
+        break;
+    /* XRa[23:16] => tmp8 */
+    case MXU_OPTN3_PTN2:
+        tcg_gen_extract_tl(t1, t1, 16, 8);
+        break;
+    /* XRa[31:24] => tmp8 */
+    case MXU_OPTN3_PTN3:
+        tcg_gen_extract_tl(t1, t1, 24, 8);
+        break;
+    }
+
+    tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB);
+}
+
+/*
+ * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF
+ *
+ * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF,
+ * post modify address register
+ */
+static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify)
+{
+    TCGv t0, t1;
+    uint32_t XRa, Rb, optn2;
+    int32_t s10;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa   = extract32(ctx->opcode,   6, 4);
+    s10   = sextract32(ctx->opcode, 10, 9) * 2;
+    optn2 = extract32(ctx->opcode,  19, 2);
+    Rb    = extract32(ctx->opcode,  21, 5);
+
+    gen_load_gpr(t0, Rb);
+    tcg_gen_addi_tl(t0, t0, s10);
+    if (postmodify) {
+        gen_store_gpr(t0, Rb);
+    }
+
+    switch (optn2) {
+    /* XRa[15:0] = tmp16 */
+    case MXU_OPTN2_PTN0:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
+        gen_load_mxu_gpr(t0, XRa);
+        tcg_gen_deposit_tl(t0, t0, t1, 0, 16);
+        break;
+    /* XRa[31:16] = tmp16 */
+    case MXU_OPTN2_PTN1:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
+        gen_load_mxu_gpr(t0, XRa);
+        tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+        break;
+    /* XRa = sign_extend(tmp16) */
+    case MXU_OPTN2_PTN2:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW);
+        break;
+    /* XRa = {tmp16, tmp16} */
+    case MXU_OPTN2_PTN3:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
+        tcg_gen_deposit_tl(t0, t1, t1,  0, 16);
+        tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
+        break;
+    }
+
+    gen_store_mxu_gpr(t0, XRa);
+}
+
+/*
+ * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory
+ *
+ * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory,
+ * post modify address register
+ */
+static void gen_mxu_s16std(DisasContext *ctx, bool postmodify)
+{
+    TCGv t0, t1;
+    uint32_t XRa, Rb, optn2;
+    int32_t s10;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    s10 = sextract32(ctx->opcode, 10, 9) * 2;
+    optn2 = extract32(ctx->opcode, 19, 2);
+    Rb = extract32(ctx->opcode, 21, 5);
+
+    if (optn2 > 1) {
+        /* reserved, do nothing */
+        return;
+    }
+
+    gen_load_gpr(t0, Rb);
+    tcg_gen_addi_tl(t0, t0, s10);
+    if (postmodify) {
+        gen_store_gpr(t0, Rb);
+    }
+    gen_load_mxu_gpr(t1, XRa);
+
+    switch (optn2) {
+    /* XRa[15:0] => tmp16 */
+    case MXU_OPTN2_PTN0:
+        tcg_gen_extract_tl(t1, t1, 0, 16);
+        break;
+    /* XRa[31:16] => tmp16 */
+    case MXU_OPTN2_PTN1:
+        tcg_gen_extract_tl(t1, t1, 16, 16);
+        break;
+    }
+
+    tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW);
+}
+
+/*
+ * S32MUL  XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
+ * of GPR's and stores result into pair of MXU registers.
+ * It strains HI and LO registers.
+ *
+ * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
+ * of GPR's and stores result into pair of MXU registers.
+ * It strains HI and LO registers.
+ */
+static void gen_mxu_s32mul(DisasContext *ctx, bool mulu)
+{
+    TCGv t0, t1;
+    uint32_t XRa, XRd, rs, rt;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode,  6, 4);
+    XRd = extract32(ctx->opcode, 10, 4);
+    rs  = extract32(ctx->opcode, 16, 5);
+    rt  = extract32(ctx->opcode, 21, 5);
+
+    if (unlikely(rs == 0 || rt == 0)) {
+        tcg_gen_movi_tl(t0, 0);
+        tcg_gen_movi_tl(t1, 0);
+    } else {
+        gen_load_gpr(t0, rs);
+        gen_load_gpr(t1, rt);
+
+        if (mulu) {
+            tcg_gen_mulu2_tl(t0, t1, t0, t1);
+        } else {
+            tcg_gen_muls2_tl(t0, t1, t0, t1);
+        }
+    }
+    tcg_gen_mov_tl(cpu_HI[0], t1);
+    tcg_gen_mov_tl(cpu_LO[0], t0);
+    gen_store_mxu_gpr(t1, XRa);
+    gen_store_mxu_gpr(t0, XRd);
+}
+
+/*
+ * D16MUL  XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
+ * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
+ *   with rounding and packing result
+ * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern
+ *   multiplication with rounding
+ */
+static void gen_mxu_d16mul(DisasContext *ctx, bool fractional,
+                           bool packed_result)
 {
     TCGv t0, t1, t2, t3;
     uint32_t XRa, XRb, XRc, XRd, optn2;
@@ -628,6 +997,12 @@ static void gen_mxu_d16mul(DisasContext *ctx)
     XRd = extract32(ctx->opcode, 18, 4);
     optn2 = extract32(ctx->opcode, 22, 2);
 
+    /*
+     * TODO: XRd field isn't used for D16MULF
+     * There's no knowledge how this field affect
+     * instruction decoding/behavior
+     */
+
     gen_load_mxu_gpr(t1, XRb);
     tcg_gen_sextract_tl(t0, t1, 0, 16);
     tcg_gen_sextract_tl(t1, t1, 16, 16);
@@ -653,15 +1028,64 @@ static void gen_mxu_d16mul(DisasContext *ctx)
         tcg_gen_mul_tl(t2, t1, t2);
         break;
     }
-    gen_store_mxu_gpr(t3, XRa);
-    gen_store_mxu_gpr(t2, XRd);
+    if (fractional) {
+        TCGLabel *l_done = gen_new_label();
+        TCGv rounding = tcg_temp_new();
+
+        tcg_gen_shli_tl(t3, t3, 1);
+        tcg_gen_shli_tl(t2, t2, 1);
+        tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
+        tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
+        if (packed_result) {
+            TCGLabel *l_apply_bias_l = gen_new_label();
+            TCGLabel *l_apply_bias_r = gen_new_label();
+            TCGLabel *l_half_done = gen_new_label();
+            TCGv bias = tcg_temp_new();
+
+            /*
+             * D16MULF supports unbiased rounding aka "bankers rounding",
+             * "round to even", "convergent rounding"
+             */
+            tcg_gen_andi_tl(bias, mxu_CR, 0x4);
+            tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
+            tcg_gen_andi_tl(t0, t3, 0x1ffff);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
+            gen_set_label(l_apply_bias_l);
+            tcg_gen_addi_tl(t3, t3, 0x8000);
+            gen_set_label(l_half_done);
+            tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
+            tcg_gen_andi_tl(t0, t2, 0x1ffff);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
+            gen_set_label(l_apply_bias_r);
+            tcg_gen_addi_tl(t2, t2, 0x8000);
+        } else {
+            /* D16MULE doesn't support unbiased rounding */
+            tcg_gen_addi_tl(t3, t3, 0x8000);
+            tcg_gen_addi_tl(t2, t2, 0x8000);
+        }
+        gen_set_label(l_done);
+    }
+    if (!packed_result) {
+        gen_store_mxu_gpr(t3, XRa);
+        gen_store_mxu_gpr(t2, XRd);
+    } else {
+        tcg_gen_andi_tl(t3, t3, 0xffff0000);
+        tcg_gen_shri_tl(t2, t2, 16);
+        tcg_gen_or_tl(t3, t3, t2);
+        gen_store_mxu_gpr(t3, XRa);
+    }
 }
 
 /*
- * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 - Signed 16 bit pattern multiply
- *                                           and accumulate
+ * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
+ *   Signed 16 bit pattern multiply and accumulate
+ * D16MACF XRa, XRb, XRc, aptn2, optn2
+ *   Signed Q15 fraction pattern multiply accumulate and pack
+ * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
+ *   Signed Q15 fraction pattern multiply and accumulate
  */
-static void gen_mxu_d16mac(DisasContext *ctx)
+static void gen_mxu_d16mac(DisasContext *ctx, bool fractional,
+                           bool packed_result)
 {
     TCGv t0, t1, t2, t3;
     uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
@@ -704,6 +1128,11 @@ static void gen_mxu_d16mac(DisasContext *ctx)
         tcg_gen_mul_tl(t2, t1, t2);
         break;
     }
+
+    if (fractional) {
+        tcg_gen_shli_tl(t3, t3, 1);
+        tcg_gen_shli_tl(t2, t2, 1);
+    }
     gen_load_mxu_gpr(t0, XRa);
     gen_load_mxu_gpr(t1, XRd);
 
@@ -725,18 +1154,205 @@ static void gen_mxu_d16mac(DisasContext *ctx)
         tcg_gen_sub_tl(t2, t1, t2);
         break;
     }
-    gen_store_mxu_gpr(t3, XRa);
-    gen_store_mxu_gpr(t2, XRd);
+
+    if (fractional) {
+        TCGLabel *l_done = gen_new_label();
+        TCGv rounding = tcg_temp_new();
+
+        tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
+        tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
+        if (packed_result) {
+            TCGLabel *l_apply_bias_l = gen_new_label();
+            TCGLabel *l_apply_bias_r = gen_new_label();
+            TCGLabel *l_half_done = gen_new_label();
+            TCGv bias = tcg_temp_new();
+
+            /*
+             * D16MACF supports unbiased rounding aka "bankers rounding",
+             * "round to even", "convergent rounding"
+             */
+            tcg_gen_andi_tl(bias, mxu_CR, 0x4);
+            tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
+            tcg_gen_andi_tl(t0, t3, 0x1ffff);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
+            gen_set_label(l_apply_bias_l);
+            tcg_gen_addi_tl(t3, t3, 0x8000);
+            gen_set_label(l_half_done);
+            tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
+            tcg_gen_andi_tl(t0, t2, 0x1ffff);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
+            gen_set_label(l_apply_bias_r);
+            tcg_gen_addi_tl(t2, t2, 0x8000);
+        } else {
+            /* D16MACE doesn't support unbiased rounding */
+            tcg_gen_addi_tl(t3, t3, 0x8000);
+            tcg_gen_addi_tl(t2, t2, 0x8000);
+        }
+        gen_set_label(l_done);
+    }
+
+    if (!packed_result) {
+        gen_store_mxu_gpr(t3, XRa);
+        gen_store_mxu_gpr(t2, XRd);
+    } else {
+        tcg_gen_andi_tl(t3, t3, 0xffff0000);
+        tcg_gen_shri_tl(t2, t2, 16);
+        tcg_gen_or_tl(t3, t3, t2);
+        gen_store_mxu_gpr(t3, XRa);
+    }
 }
 
 /*
- * Q8MUL   XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply
- * Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply
+ * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed
+ * unsigned 16 bit pattern multiply and add/subtract.
  */
-static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
+static void gen_mxu_d16madl(DisasContext *ctx)
+{
+    TCGv t0, t1, t2, t3;
+    uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRd = extract32(ctx->opcode, 18, 4);
+    optn2 = extract32(ctx->opcode, 22, 2);
+    aptn2 = extract32(ctx->opcode, 24, 2);
+
+    gen_load_mxu_gpr(t1, XRb);
+    tcg_gen_sextract_tl(t0, t1,  0, 16);
+    tcg_gen_sextract_tl(t1, t1, 16, 16);
+
+    gen_load_mxu_gpr(t3, XRc);
+    tcg_gen_sextract_tl(t2, t3,  0, 16);
+    tcg_gen_sextract_tl(t3, t3, 16, 16);
+
+    switch (optn2) {
+    case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t1, t3);
+        tcg_gen_mul_tl(t2, t0, t2);
+        break;
+    case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t0, t3);
+        tcg_gen_mul_tl(t2, t0, t2);
+        break;
+    case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t1, t3);
+        tcg_gen_mul_tl(t2, t1, t2);
+        break;
+    case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t0, t3);
+        tcg_gen_mul_tl(t2, t1, t2);
+        break;
+    }
+    tcg_gen_extract_tl(t2, t2, 0, 16);
+    tcg_gen_extract_tl(t3, t3, 0, 16);
+
+    gen_load_mxu_gpr(t1, XRa);
+    tcg_gen_extract_tl(t0, t1,  0, 16);
+    tcg_gen_extract_tl(t1, t1, 16, 16);
+
+    switch (aptn2) {
+    case MXU_APTN2_AA:
+        tcg_gen_add_tl(t3, t1, t3);
+        tcg_gen_add_tl(t2, t0, t2);
+        break;
+    case MXU_APTN2_AS:
+        tcg_gen_add_tl(t3, t1, t3);
+        tcg_gen_sub_tl(t2, t0, t2);
+        break;
+    case MXU_APTN2_SA:
+        tcg_gen_sub_tl(t3, t1, t3);
+        tcg_gen_add_tl(t2, t0, t2);
+        break;
+    case MXU_APTN2_SS:
+        tcg_gen_sub_tl(t3, t1, t3);
+        tcg_gen_sub_tl(t2, t0, t2);
+        break;
+    }
+
+    tcg_gen_andi_tl(t2, t2, 0xffff);
+    tcg_gen_shli_tl(t3, t3, 16);
+    tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2);
+}
+
+/*
+ * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed
+ * signed 16 bit pattern multiply and 32-bit add/subtract.
+ */
+static void gen_mxu_s16mad(DisasContext *ctx)
+{
+    TCGv t0, t1;
+    uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRd = extract32(ctx->opcode, 18, 4);
+    optn2 = extract32(ctx->opcode, 22, 2);
+    aptn1 = extract32(ctx->opcode, 24, 1);
+    pad = extract32(ctx->opcode, 25, 1);
+
+    if (pad) {
+        /* FIXME check if it influence the result */
+    }
+
+    gen_load_mxu_gpr(t0, XRb);
+    gen_load_mxu_gpr(t1, XRc);
+
+    switch (optn2) {
+    case MXU_OPTN2_WW: /* XRB.H*XRC.H */
+        tcg_gen_sextract_tl(t0, t0, 16, 16);
+        tcg_gen_sextract_tl(t1, t1, 16, 16);
+        break;
+    case MXU_OPTN2_LW: /* XRB.L*XRC.L */
+        tcg_gen_sextract_tl(t0, t0,  0, 16);
+        tcg_gen_sextract_tl(t1, t1,  0, 16);
+        break;
+    case MXU_OPTN2_HW: /* XRB.H*XRC.L */
+        tcg_gen_sextract_tl(t0, t0, 16, 16);
+        tcg_gen_sextract_tl(t1, t1,  0, 16);
+        break;
+    case MXU_OPTN2_XW: /* XRB.L*XRC.H */
+        tcg_gen_sextract_tl(t0, t0,  0, 16);
+        tcg_gen_sextract_tl(t1, t1, 16, 16);
+        break;
+    }
+    tcg_gen_mul_tl(t0, t0, t1);
+
+    gen_load_mxu_gpr(t1, XRa);
+
+    switch (aptn1) {
+    case MXU_APTN1_A:
+        tcg_gen_add_tl(t1, t1, t0);
+        break;
+    case MXU_APTN1_S:
+        tcg_gen_sub_tl(t1, t1, t0);
+        break;
+    }
+
+    gen_store_mxu_gpr(t1, XRd);
+}
+
+/*
+ * Q8MUL   XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
+ * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
+ * Q8MAC   XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
+ *   and accumulate
+ * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
+ *   and accumulate
+ */
+static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac)
 {
     TCGv t0, t1, t2, t3, t4, t5, t6, t7;
-    uint32_t XRa, XRb, XRc, XRd, sel;
+    uint32_t XRa, XRb, XRc, XRd, aptn2;
 
     t0 = tcg_temp_new();
     t1 = tcg_temp_new();
@@ -751,90 +1367,311 @@ static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
     XRb = extract32(ctx->opcode, 10, 4);
     XRc = extract32(ctx->opcode, 14, 4);
     XRd = extract32(ctx->opcode, 18, 4);
-    sel = extract32(ctx->opcode, 22, 2);
+    aptn2 = extract32(ctx->opcode, 24, 2);
 
     gen_load_mxu_gpr(t3, XRb);
     gen_load_mxu_gpr(t7, XRc);
 
-    if (sel == 0x2) {
-        /* Q8MULSU */
-        tcg_gen_ext8s_tl(t0, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8s_tl(t1, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8s_tl(t2, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8s_tl(t3, t3);
+    if (su) {
+        /* Q8MULSU / Q8MACSU */
+        tcg_gen_sextract_tl(t0, t3,  0, 8);
+        tcg_gen_sextract_tl(t1, t3,  8, 8);
+        tcg_gen_sextract_tl(t2, t3, 16, 8);
+        tcg_gen_sextract_tl(t3, t3, 24, 8);
     } else {
-        /* Q8MUL */
-        tcg_gen_ext8u_tl(t0, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8u_tl(t1, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8u_tl(t2, t3);
-        tcg_gen_shri_tl(t3, t3, 8);
-        tcg_gen_ext8u_tl(t3, t3);
-    }
-
-    tcg_gen_ext8u_tl(t4, t7);
-    tcg_gen_shri_tl(t7, t7, 8);
-    tcg_gen_ext8u_tl(t5, t7);
-    tcg_gen_shri_tl(t7, t7, 8);
-    tcg_gen_ext8u_tl(t6, t7);
-    tcg_gen_shri_tl(t7, t7, 8);
-    tcg_gen_ext8u_tl(t7, t7);
+        /* Q8MUL / Q8MAC */
+        tcg_gen_extract_tl(t0, t3,  0, 8);
+        tcg_gen_extract_tl(t1, t3,  8, 8);
+        tcg_gen_extract_tl(t2, t3, 16, 8);
+        tcg_gen_extract_tl(t3, t3, 24, 8);
+    }
+
+    tcg_gen_extract_tl(t4, t7,  0, 8);
+    tcg_gen_extract_tl(t5, t7,  8, 8);
+    tcg_gen_extract_tl(t6, t7, 16, 8);
+    tcg_gen_extract_tl(t7, t7, 24, 8);
 
     tcg_gen_mul_tl(t0, t0, t4);
     tcg_gen_mul_tl(t1, t1, t5);
     tcg_gen_mul_tl(t2, t2, t6);
     tcg_gen_mul_tl(t3, t3, t7);
 
-    tcg_gen_andi_tl(t0, t0, 0xFFFF);
-    tcg_gen_andi_tl(t1, t1, 0xFFFF);
-    tcg_gen_andi_tl(t2, t2, 0xFFFF);
-    tcg_gen_andi_tl(t3, t3, 0xFFFF);
-
-    tcg_gen_shli_tl(t1, t1, 16);
-    tcg_gen_shli_tl(t3, t3, 16);
+    if (mac) {
+        gen_load_mxu_gpr(t4, XRd);
+        gen_load_mxu_gpr(t5, XRa);
+        tcg_gen_extract_tl(t6, t4,  0, 16);
+        tcg_gen_extract_tl(t7, t4, 16, 16);
+        if (aptn2 & 1) {
+            tcg_gen_sub_tl(t0, t6, t0);
+            tcg_gen_sub_tl(t1, t7, t1);
+        } else {
+            tcg_gen_add_tl(t0, t6, t0);
+            tcg_gen_add_tl(t1, t7, t1);
+        }
+        tcg_gen_extract_tl(t6, t5,  0, 16);
+        tcg_gen_extract_tl(t7, t5, 16, 16);
+        if (aptn2 & 2) {
+            tcg_gen_sub_tl(t2, t6, t2);
+            tcg_gen_sub_tl(t3, t7, t3);
+        } else {
+            tcg_gen_add_tl(t2, t6, t2);
+            tcg_gen_add_tl(t3, t7, t3);
+        }
+    }
 
-    tcg_gen_or_tl(t0, t0, t1);
-    tcg_gen_or_tl(t1, t2, t3);
+    tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+    tcg_gen_deposit_tl(t1, t2, t3, 16, 16);
 
     gen_store_mxu_gpr(t0, XRd);
     gen_store_mxu_gpr(t1, XRa);
 }
 
 /*
+ * Q8MADL  XRd, XRa, XRb, XRc
+ *   Parallel quad unsigned 8 bit multiply and accumulate.
+ *   e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3]
+ */
+static void gen_mxu_q8madl(DisasContext *ctx)
+{
+    TCGv t0, t1, t2, t3, t4, t5, t6, t7;
+    uint32_t XRa, XRb, XRc, XRd, aptn2;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+    t4 = tcg_temp_new();
+    t5 = tcg_temp_new();
+    t6 = tcg_temp_new();
+    t7 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRd = extract32(ctx->opcode, 18, 4);
+    aptn2 = extract32(ctx->opcode, 24, 2);
+
+    gen_load_mxu_gpr(t3, XRb);
+    gen_load_mxu_gpr(t7, XRc);
+
+    tcg_gen_extract_tl(t0, t3,  0, 8);
+    tcg_gen_extract_tl(t1, t3,  8, 8);
+    tcg_gen_extract_tl(t2, t3, 16, 8);
+    tcg_gen_extract_tl(t3, t3, 24, 8);
+
+    tcg_gen_extract_tl(t4, t7,  0, 8);
+    tcg_gen_extract_tl(t5, t7,  8, 8);
+    tcg_gen_extract_tl(t6, t7, 16, 8);
+    tcg_gen_extract_tl(t7, t7, 24, 8);
+
+    tcg_gen_mul_tl(t0, t0, t4);
+    tcg_gen_mul_tl(t1, t1, t5);
+    tcg_gen_mul_tl(t2, t2, t6);
+    tcg_gen_mul_tl(t3, t3, t7);
+
+    gen_load_mxu_gpr(t4, XRa);
+    tcg_gen_extract_tl(t6, t4, 0, 8);
+    tcg_gen_extract_tl(t7, t4, 8, 8);
+    if (aptn2 & 1) {
+        tcg_gen_sub_tl(t0, t6, t0);
+        tcg_gen_sub_tl(t1, t7, t1);
+    } else {
+        tcg_gen_add_tl(t0, t6, t0);
+        tcg_gen_add_tl(t1, t7, t1);
+    }
+    tcg_gen_extract_tl(t6, t4, 16, 8);
+    tcg_gen_extract_tl(t7, t4, 24, 8);
+    if (aptn2 & 2) {
+        tcg_gen_sub_tl(t2, t6, t2);
+        tcg_gen_sub_tl(t3, t7, t3);
+    } else {
+        tcg_gen_add_tl(t2, t6, t2);
+        tcg_gen_add_tl(t3, t7, t3);
+    }
+
+    tcg_gen_andi_tl(t5, t0, 0xff);
+    tcg_gen_deposit_tl(t5, t5, t1,  8, 8);
+    tcg_gen_deposit_tl(t5, t5, t2, 16, 8);
+    tcg_gen_deposit_tl(t5, t5, t3, 24, 8);
+
+    gen_store_mxu_gpr(t5, XRd);
+}
+
+/*
  * S32LDD  XRa, Rb, S12 - Load a word from memory to XRF
- * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF, reversed byte seq.
+ * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF
+ *   in reversed byte seq.
+ * S32LDI  XRa, Rb, S12 - Load a word from memory to XRF,
+ *   post modify base address GPR.
+ * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF,
+ *   post modify base address GPR and load in reversed byte seq.
  */
-static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
+static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc)
 {
     TCGv t0, t1;
-    uint32_t XRa, Rb, s12, sel;
+    uint32_t XRa, Rb, s12;
 
     t0 = tcg_temp_new();
     t1 = tcg_temp_new();
 
     XRa = extract32(ctx->opcode, 6, 4);
-    s12 = extract32(ctx->opcode, 10, 10);
-    sel = extract32(ctx->opcode, 20, 1);
+    s12 = sextract32(ctx->opcode, 10, 10);
     Rb = extract32(ctx->opcode, 21, 5);
 
     gen_load_gpr(t0, Rb);
+    tcg_gen_movi_tl(t1, s12 * 4);
+    tcg_gen_add_tl(t0, t0, t1);
+
+    tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
+                       (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+                        ctx->default_tcg_memop_mask);
+    gen_store_mxu_gpr(t1, XRa);
 
-    tcg_gen_movi_tl(t1, s12);
-    tcg_gen_shli_tl(t1, t1, 2);
-    if (s12 & 0x200) {
-        tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
+    if (postinc) {
+        gen_store_gpr(t0, Rb);
     }
-    tcg_gen_add_tl(t1, t0, t1);
-    tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) |
-                       ctx->default_tcg_memop_mask);
+}
+
+/*
+ * S32STD  XRa, Rb, S12 - Store a word from XRF to memory
+ * S32STDR XRa, Rb, S12 - Store a word from XRF to memory
+ *   in reversed byte seq.
+ * S32SDI  XRa, Rb, S12 - Store a word from XRF to memory,
+ *   post modify base address GPR.
+ * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory,
+ *   post modify base address GPR and store in reversed byte seq.
+ */
+static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc)
+{
+    TCGv t0, t1;
+    uint32_t XRa, Rb, s12;
 
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    s12 = sextract32(ctx->opcode, 10, 10);
+    Rb = extract32(ctx->opcode, 21, 5);
+
+    gen_load_gpr(t0, Rb);
+    tcg_gen_movi_tl(t1, s12 * 4);
+    tcg_gen_add_tl(t0, t0, t1);
+
+    gen_load_mxu_gpr(t1, XRa);
+    tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
+                       (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+                        ctx->default_tcg_memop_mask);
+
+    if (postinc) {
+        gen_store_gpr(t0, Rb);
+    }
+}
+
+/*
+ * S32LDDV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ *   in reversed byte seq.
+ * S32LDIV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ *   post modify base address GPR.
+ * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ *   post modify base address GPR and load in reversed byte seq.
+ */
+static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed,
+                             bool postinc, uint32_t strd2)
+{
+    TCGv t0, t1;
+    uint32_t XRa, Rb, Rc;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    Rc = extract32(ctx->opcode, 16, 5);
+    Rb = extract32(ctx->opcode, 21, 5);
+
+    gen_load_gpr(t0, Rb);
+    gen_load_gpr(t1, Rc);
+    tcg_gen_shli_tl(t1, t1, strd2);
+    tcg_gen_add_tl(t0, t0, t1);
+
+    tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
+                       (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+                        ctx->default_tcg_memop_mask);
     gen_store_mxu_gpr(t1, XRa);
+
+    if (postinc) {
+        gen_store_gpr(t0, Rb);
+    }
 }
 
+/*
+ * LXW  Ra, Rb, Rc, STRD2 - Load a word from memory to GPR
+ * LXB  Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
+ *   sign extending to GPR size.
+ * LXH  Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
+ *   sign extending to GPR size.
+ * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
+ *   zero extending to GPR size.
+ * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
+ *   zero extending to GPR size.
+ */
+static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop)
+{
+    TCGv t0, t1;
+    uint32_t Ra, Rb, Rc;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    Ra = extract32(ctx->opcode, 11, 5);
+    Rc = extract32(ctx->opcode, 16, 5);
+    Rb = extract32(ctx->opcode, 21, 5);
+
+    gen_load_gpr(t0, Rb);
+    gen_load_gpr(t1, Rc);
+    tcg_gen_shli_tl(t1, t1, strd2);
+    tcg_gen_add_tl(t0, t0, t1);
+
+    tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask);
+    gen_store_gpr(t1, Ra);
+}
+
+/*
+ * S32STDV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ *   in reversed byte seq.
+ * S32SDIV  XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ *   post modify base address GPR.
+ * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ *   post modify base address GPR and store in reversed byte seq.
+ */
+static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed,
+                             bool postinc, uint32_t strd2)
+{
+    TCGv t0, t1;
+    uint32_t XRa, Rb, Rc;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    Rc = extract32(ctx->opcode, 16, 5);
+    Rb = extract32(ctx->opcode, 21, 5);
+
+    gen_load_gpr(t0, Rb);
+    gen_load_gpr(t1, Rc);
+    tcg_gen_shli_tl(t1, t1, strd2);
+    tcg_gen_add_tl(t0, t0, t1);
+
+    gen_load_mxu_gpr(t1, XRa);
+    tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
+                       (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+                        ctx->default_tcg_memop_mask);
+
+    if (postinc) {
+        gen_store_gpr(t0, Rb);
+    }
+}
 
 /*
  *                 MXU instruction category: logic
@@ -981,13 +1818,291 @@ static void gen_mxu_S32XOR(DisasContext *ctx)
     }
 }
 
+/*
+ *                 MXU instruction category: shift
+ *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *               D32SLL    D32SLR    D32SAR    D32SARL
+ *               D32SLLV   D32SLRV   D32SARV   D32SARW
+ *               Q16SLL    Q16SLR    Q16SAR
+ *               Q16SLLV   Q16SLRV   Q16SARV
+ */
+
+/*
+ *  D32SLL XRa, XRd, XRb, XRc, SFT4
+ *    Dual 32-bit shift left from XRb and XRc to SFT4
+ *    bits (0..15). Store to XRa and XRd respectively.
+ *  D32SLR XRa, XRd, XRb, XRc, SFT4
+ *    Dual 32-bit shift logic right from XRb and XRc
+ *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ *  D32SAR XRa, XRd, XRb, XRc, SFT4
+ *    Dual 32-bit shift arithmetic right from XRb and XRc
+ *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ */
+static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic)
+{
+    uint32_t XRa, XRb, XRc, XRd, sft4;
+
+    XRa  = extract32(ctx->opcode,  6, 4);
+    XRb  = extract32(ctx->opcode, 10, 4);
+    XRc  = extract32(ctx->opcode, 14, 4);
+    XRd  = extract32(ctx->opcode, 18, 4);
+    sft4 = extract32(ctx->opcode, 22, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    gen_load_mxu_gpr(t0, XRb);
+    gen_load_mxu_gpr(t1, XRc);
+
+    if (right) {
+        if (arithmetic) {
+            tcg_gen_sari_tl(t0, t0, sft4);
+            tcg_gen_sari_tl(t1, t1, sft4);
+        } else {
+            tcg_gen_shri_tl(t0, t0, sft4);
+            tcg_gen_shri_tl(t1, t1, sft4);
+        }
+    } else {
+        tcg_gen_shli_tl(t0, t0, sft4);
+        tcg_gen_shli_tl(t1, t1, sft4);
+    }
+    gen_store_mxu_gpr(t0, XRa);
+    gen_store_mxu_gpr(t1, XRd);
+}
 
 /*
- *                   MXU instruction category max/min
+ *  D32SLLV XRa, XRd, rs
+ *    Dual 32-bit shift left from XRa and XRd to rs[3:0]
+ *    bits. Store back to XRa and XRd respectively.
+ *  D32SLRV XRa, XRd, rs
+ *    Dual 32-bit shift logic right from XRa and XRd to rs[3:0]
+ *    bits. Store back to XRa and XRd respectively.
+ *  D32SARV XRa, XRd, rs
+ *    Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0]
+ *    bits. Store back to XRa and XRd respectively.
+ */
+static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic)
+{
+    uint32_t XRa, XRd, rs;
+
+    XRa = extract32(ctx->opcode, 10, 4);
+    XRd = extract32(ctx->opcode, 14, 4);
+    rs  = extract32(ctx->opcode, 21, 5);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    gen_load_mxu_gpr(t0, XRa);
+    gen_load_mxu_gpr(t1, XRd);
+    gen_load_gpr(t2, rs);
+    tcg_gen_andi_tl(t2, t2, 0x0f);
+
+    if (right) {
+        if (arithmetic) {
+            tcg_gen_sar_tl(t0, t0, t2);
+            tcg_gen_sar_tl(t1, t1, t2);
+        } else {
+            tcg_gen_shr_tl(t0, t0, t2);
+            tcg_gen_shr_tl(t1, t1, t2);
+        }
+    } else {
+        tcg_gen_shl_tl(t0, t0, t2);
+        tcg_gen_shl_tl(t1, t1, t2);
+    }
+    gen_store_mxu_gpr(t0, XRa);
+    gen_store_mxu_gpr(t1, XRd);
+}
+
+/*
+ *  D32SARL XRa, XRb, XRc, SFT4
+ *    Dual shift arithmetic right 32-bit integers in XRb and XRc
+ *    to SFT4 bits (0..15). Pack 16 LSBs of each into XRa.
+ *
+ *  D32SARW XRa, XRb, XRc, rb
+ *    Dual shift arithmetic right 32-bit integers in XRb and XRc
+ *    to rb[3:0] bits. Pack 16 LSBs of each into XRa.
+ */
+static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw)
+{
+    uint32_t XRa, XRb, XRc, rb;
+
+    XRa = extract32(ctx->opcode,  6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    rb  = extract32(ctx->opcode, 21, 5);
+
+    if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else {
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+
+        if (!sarw) {
+            /* Make SFT4 from rb field */
+            tcg_gen_movi_tl(t2, rb >> 1);
+        } else {
+            gen_load_gpr(t2, rb);
+            tcg_gen_andi_tl(t2, t2, 0x0f);
+        }
+        gen_load_mxu_gpr(t0, XRb);
+        gen_load_mxu_gpr(t1, XRc);
+        tcg_gen_sar_tl(t0, t0, t2);
+        tcg_gen_sar_tl(t1, t1, t2);
+        tcg_gen_extract_tl(t2, t1, 0, 16);
+        tcg_gen_deposit_tl(t2, t2, t0, 16, 16);
+        gen_store_mxu_gpr(t2, XRa);
+    }
+}
+
+/*
+ *  Q16SLL XRa, XRd, XRb, XRc, SFT4
+ *    Quad 16-bit shift left from XRb and XRc to SFT4
+ *    bits (0..15). Store to XRa and XRd respectively.
+ *  Q16SLR XRa, XRd, XRb, XRc, SFT4
+ *    Quad 16-bit shift logic right from XRb and XRc
+ *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ *  Q16SAR XRa, XRd, XRb, XRc, SFT4
+ *    Quad 16-bit shift arithmetic right from XRb and XRc
+ *    to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ */
+static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic)
+{
+    uint32_t XRa, XRb, XRc, XRd, sft4;
+
+    XRa  = extract32(ctx->opcode,  6, 4);
+    XRb  = extract32(ctx->opcode, 10, 4);
+    XRc  = extract32(ctx->opcode, 14, 4);
+    XRd  = extract32(ctx->opcode, 18, 4);
+    sft4 = extract32(ctx->opcode, 22, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+
+    gen_load_mxu_gpr(t0, XRb);
+    gen_load_mxu_gpr(t2, XRc);
+
+    if (arithmetic) {
+        tcg_gen_sextract_tl(t1, t0, 16, 16);
+        tcg_gen_sextract_tl(t0, t0,  0, 16);
+        tcg_gen_sextract_tl(t3, t2, 16, 16);
+        tcg_gen_sextract_tl(t2, t2,  0, 16);
+    } else {
+        tcg_gen_extract_tl(t1, t0, 16, 16);
+        tcg_gen_extract_tl(t0, t0,  0, 16);
+        tcg_gen_extract_tl(t3, t2, 16, 16);
+        tcg_gen_extract_tl(t2, t2,  0, 16);
+    }
+
+    if (right) {
+        if (arithmetic) {
+            tcg_gen_sari_tl(t0, t0, sft4);
+            tcg_gen_sari_tl(t1, t1, sft4);
+            tcg_gen_sari_tl(t2, t2, sft4);
+            tcg_gen_sari_tl(t3, t3, sft4);
+        } else {
+            tcg_gen_shri_tl(t0, t0, sft4);
+            tcg_gen_shri_tl(t1, t1, sft4);
+            tcg_gen_shri_tl(t2, t2, sft4);
+            tcg_gen_shri_tl(t3, t3, sft4);
+        }
+    } else {
+        tcg_gen_shli_tl(t0, t0, sft4);
+        tcg_gen_shli_tl(t1, t1, sft4);
+        tcg_gen_shli_tl(t2, t2, sft4);
+        tcg_gen_shli_tl(t3, t3, sft4);
+    }
+    tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+    tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
+
+    gen_store_mxu_gpr(t0, XRa);
+    gen_store_mxu_gpr(t2, XRd);
+}
+
+/*
+ *  Q16SLLV XRa, XRd, rs
+ *    Quad 16-bit shift left from XRa and XRd to rs[3:0]
+ *    bits. Store to XRa and XRd respectively.
+ *  Q16SLRV XRa, XRd, rs
+ *    Quad 16-bit shift logic right from XRa and XRd to rs[3:0]
+ *    bits. Store to XRa and XRd respectively.
+ *  Q16SARV XRa, XRd, rs
+ *    Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0]
+ *    bits. Store to XRa and XRd respectively.
+ */
+static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic)
+{
+    uint32_t XRa, XRd, rs;
+
+    XRa = extract32(ctx->opcode, 10, 4);
+    XRd = extract32(ctx->opcode, 14, 4);
+    rs  = extract32(ctx->opcode, 21, 5);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+    TCGv t5 = tcg_temp_new();
+
+    gen_load_mxu_gpr(t0, XRa);
+    gen_load_mxu_gpr(t2, XRd);
+    gen_load_gpr(t5, rs);
+    tcg_gen_andi_tl(t5, t5, 0x0f);
+
+
+    if (arithmetic) {
+        tcg_gen_sextract_tl(t1, t0, 16, 16);
+        tcg_gen_sextract_tl(t0, t0,  0, 16);
+        tcg_gen_sextract_tl(t3, t2, 16, 16);
+        tcg_gen_sextract_tl(t2, t2,  0, 16);
+    } else {
+        tcg_gen_extract_tl(t1, t0, 16, 16);
+        tcg_gen_extract_tl(t0, t0,  0, 16);
+        tcg_gen_extract_tl(t3, t2, 16, 16);
+        tcg_gen_extract_tl(t2, t2,  0, 16);
+    }
+
+    if (right) {
+        if (arithmetic) {
+            tcg_gen_sar_tl(t0, t0, t5);
+            tcg_gen_sar_tl(t1, t1, t5);
+            tcg_gen_sar_tl(t2, t2, t5);
+            tcg_gen_sar_tl(t3, t3, t5);
+        } else {
+            tcg_gen_shr_tl(t0, t0, t5);
+            tcg_gen_shr_tl(t1, t1, t5);
+            tcg_gen_shr_tl(t2, t2, t5);
+            tcg_gen_shr_tl(t3, t3, t5);
+        }
+    } else {
+        tcg_gen_shl_tl(t0, t0, t5);
+        tcg_gen_shl_tl(t1, t1, t5);
+        tcg_gen_shl_tl(t2, t2, t5);
+        tcg_gen_shl_tl(t3, t3, t5);
+    }
+    tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+    tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
+
+    gen_store_mxu_gpr(t0, XRa);
+    gen_store_mxu_gpr(t2, XRd);
+}
+
+/*
+ *                   MXU instruction category max/min/avg
  *                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
  *                     S32MAX     D16MAX     Q8MAX
  *                     S32MIN     D16MIN     Q8MIN
+ *                     S32SLT     D16SLT     Q8SLT
+ *                                           Q8SLTU
+ *                                D16AVG     Q8AVG
+ *                                D16AVGR    Q8AVGR
+ *                     S32MOVZ    D16MOVZ    Q8MOVZ
+ *                     S32MOVN    D16MOVN    Q8MOVN
  */
 
 /*
@@ -1072,13 +2187,14 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
         /* ...and do half-word-wise max/min with one operand 0 */
         TCGv_i32 t0 = tcg_temp_new();
         TCGv_i32 t1 = tcg_constant_i32(0);
+        TCGv_i32 t2 = tcg_temp_new();
 
         /* the left half-word first */
         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
         if (opc == OPC_MXU_D16MAX) {
-            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+            tcg_gen_smax_i32(t2, t0, t1);
         } else {
-            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+            tcg_gen_smin_i32(t2, t0, t1);
         }
 
         /* the right half-word */
@@ -1094,7 +2210,7 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
         /* return resulting half-words to its original position */
         tcg_gen_shri_i32(t0, t0, 16);
         /* finally update the destination */
-        tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+        tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
     } else if (unlikely(XRb == XRc)) {
         /* both operands same -> just set destination to one of them */
         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
@@ -1102,14 +2218,15 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
         /* the most general case */
         TCGv_i32 t0 = tcg_temp_new();
         TCGv_i32 t1 = tcg_temp_new();
+        TCGv_i32 t2 = tcg_temp_new();
 
         /* the left half-word first */
         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
         if (opc == OPC_MXU_D16MAX) {
-            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+            tcg_gen_smax_i32(t2, t0, t1);
         } else {
-            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+            tcg_gen_smin_i32(t2, t0, t1);
         }
 
         /* the right half-word */
@@ -1127,7 +2244,7 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
         /* return resulting half-words to its original position */
         tcg_gen_shri_i32(t0, t0, 16);
         /* finally update the destination */
-        tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+        tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
     }
 }
 
@@ -1163,14 +2280,15 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
         /* ...and do byte-wise max/min with one operand 0 */
         TCGv_i32 t0 = tcg_temp_new();
         TCGv_i32 t1 = tcg_constant_i32(0);
+        TCGv_i32 t2 = tcg_temp_new();
         int32_t i;
 
         /* the leftmost byte (byte 3) first */
         tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
         if (opc == OPC_MXU_Q8MAX) {
-            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+            tcg_gen_smax_i32(t2, t0, t1);
         } else {
-            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+            tcg_gen_smin_i32(t2, t0, t1);
         }
 
         /* bytes 2, 1, 0 */
@@ -1188,8 +2306,9 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
             /* return resulting byte to its original position */
             tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
             /* finally update the destination */
-            tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+            tcg_gen_or_i32(t2, t2, t0);
         }
+        gen_store_mxu_gpr(t2, XRa);
     } else if (unlikely(XRb == XRc)) {
         /* both operands same -> just set destination to one of them */
         tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
@@ -1197,15 +2316,16 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
         /* the most general case */
         TCGv_i32 t0 = tcg_temp_new();
         TCGv_i32 t1 = tcg_temp_new();
+        TCGv_i32 t2 = tcg_temp_new();
         int32_t i;
 
         /* the leftmost bytes (bytes 3) first */
         tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
         tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
         if (opc == OPC_MXU_Q8MAX) {
-            tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+            tcg_gen_smax_i32(t2, t0, t1);
         } else {
-            tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+            tcg_gen_smin_i32(t2, t0, t1);
         }
 
         /* bytes 2, 1, 0 */
@@ -1225,11 +2345,1741 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
             /* return resulting byte to its original position */
             tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
             /* finally update the destination */
-            tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+            tcg_gen_or_i32(t2, t2, t0);
         }
+        gen_store_mxu_gpr(t2, XRa);
+    }
+}
+
+/*
+ *  Q8SLT
+ *    Update XRa with the signed "set less than" comparison of XRb and XRc
+ *    on per-byte basis.
+ *    a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
+ *
+ *  Q8SLTU
+ *    Update XRa with the unsigned "set less than" comparison of XRb and XRc
+ *    on per-byte basis.
+ *    a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
+ */
+static void gen_mxu_q8slt(DisasContext *ctx, bool sltu)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGv t4 = tcg_temp_new();
+
+        gen_load_mxu_gpr(t3, XRb);
+        gen_load_mxu_gpr(t4, XRc);
+        tcg_gen_movi_tl(t2, 0);
+
+        for (int i = 0; i < 4; i++) {
+            if (sltu) {
+                tcg_gen_extract_tl(t0, t3, 8 * i, 8);
+                tcg_gen_extract_tl(t1, t4, 8 * i, 8);
+            } else {
+                tcg_gen_sextract_tl(t0, t3, 8 * i, 8);
+                tcg_gen_sextract_tl(t1, t4, 8 * i, 8);
+            }
+            tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
+            tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+        }
+        gen_store_mxu_gpr(t2, XRa);
+    }
+}
+
+/*
+ *  S32SLT
+ *    Update XRa with the signed "set less than" comparison of XRb and XRc.
+ *    a.k.a. XRa = XRb < XRc ? 1 : 0;
+ */
+static void gen_mxu_S32SLT(DisasContext *ctx)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else {
+        /* the most general case */
+        tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1],
+                           mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
     }
 }
 
+/*
+ *  D16SLT
+ *    Update XRa with the signed "set less than" comparison of XRb and XRc
+ *    on per-word basis.
+ *    a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
+ */
+static void gen_mxu_D16SLT(DisasContext *ctx)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGv t4 = tcg_temp_new();
+
+        gen_load_mxu_gpr(t3, XRb);
+        gen_load_mxu_gpr(t4, XRc);
+        tcg_gen_sextract_tl(t0, t3, 16, 16);
+        tcg_gen_sextract_tl(t1, t4, 16, 16);
+        tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
+        tcg_gen_shli_tl(t2, t0, 16);
+        tcg_gen_sextract_tl(t0, t3,  0, 16);
+        tcg_gen_sextract_tl(t1, t4,  0, 16);
+        tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
+        tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0);
+    }
+}
+
+/*
+ *  D16AVG
+ *    Update XRa with the signed average of XRb and XRc
+ *    on per-word basis, rounding down.
+ *    a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
+ *
+ *  D16AVGR
+ *    Update XRa with the signed average of XRb and XRc
+ *    on per-word basis, math rounding 4/5.
+ *    a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
+ */
+static void gen_mxu_d16avg(DisasContext *ctx, bool round45)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same registers -> just set destination to same */
+        tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGv t4 = tcg_temp_new();
+
+        gen_load_mxu_gpr(t3, XRb);
+        gen_load_mxu_gpr(t4, XRc);
+        tcg_gen_sextract_tl(t0, t3, 16, 16);
+        tcg_gen_sextract_tl(t1, t4, 16, 16);
+        tcg_gen_add_tl(t0, t0, t1);
+        if (round45) {
+            tcg_gen_addi_tl(t0, t0, 1);
+        }
+        tcg_gen_shli_tl(t2, t0, 15);
+        tcg_gen_andi_tl(t2, t2, 0xffff0000);
+        tcg_gen_sextract_tl(t0, t3,  0, 16);
+        tcg_gen_sextract_tl(t1, t4,  0, 16);
+        tcg_gen_add_tl(t0, t0, t1);
+        if (round45) {
+            tcg_gen_addi_tl(t0, t0, 1);
+        }
+        tcg_gen_shri_tl(t0, t0, 1);
+        tcg_gen_deposit_tl(t2, t2, t0, 0, 16);
+        gen_store_mxu_gpr(t2, XRa);
+    }
+}
+
+/*
+ *  Q8AVG
+ *    Update XRa with the signed average of XRb and XRc
+ *    on per-byte basis, rounding down.
+ *    a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
+ *
+ *  Q8AVGR
+ *    Update XRa with the signed average of XRb and XRc
+ *    on per-word basis, math rounding 4/5.
+ *    a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
+ */
+static void gen_mxu_q8avg(DisasContext *ctx, bool round45)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRb == XRc)) {
+        /* both operands same registers -> just set destination to same */
+        tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGv t4 = tcg_temp_new();
+
+        gen_load_mxu_gpr(t3, XRb);
+        gen_load_mxu_gpr(t4, XRc);
+        tcg_gen_movi_tl(t2, 0);
+
+        for (int i = 0; i < 4; i++) {
+            tcg_gen_extract_tl(t0, t3, 8 * i, 8);
+            tcg_gen_extract_tl(t1, t4, 8 * i, 8);
+            tcg_gen_add_tl(t0, t0, t1);
+            if (round45) {
+                tcg_gen_addi_tl(t0, t0, 1);
+            }
+            tcg_gen_shri_tl(t0, t0, 1);
+            tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+        }
+        gen_store_mxu_gpr(t2, XRa);
+    }
+}
+
+/*
+ *  Q8MOVZ
+ *    Quadruple 8-bit packed conditional move where
+ *    XRb contains conditions, XRc what to move and
+ *    XRa is the destination.
+ *    a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] }
+ *
+ *  Q8MOVN
+ *    Quadruple 8-bit packed conditional move where
+ *    XRb contains conditions, XRc what to move and
+ *    XRa is the destination.
+ *    a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] }
+ */
+static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond)
+{
+    uint32_t XRc, XRb, XRa;
+
+    XRa = extract32(ctx->opcode,  6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+    TCGLabel *l_quarterdone = gen_new_label();
+    TCGLabel *l_halfdone = gen_new_label();
+    TCGLabel *l_quarterrest = gen_new_label();
+    TCGLabel *l_done = gen_new_label();
+
+    gen_load_mxu_gpr(t0, XRc);
+    gen_load_mxu_gpr(t1, XRb);
+    gen_load_mxu_gpr(t2, XRa);
+
+    tcg_gen_extract_tl(t3, t1, 24, 8);
+    tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone);
+    tcg_gen_extract_tl(t3, t0, 24, 8);
+    tcg_gen_deposit_tl(t2, t2, t3, 24, 8);
+
+    gen_set_label(l_quarterdone);
+    tcg_gen_extract_tl(t3, t1, 16, 8);
+    tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
+    tcg_gen_extract_tl(t3, t0, 16, 8);
+    tcg_gen_deposit_tl(t2, t2, t3, 16, 8);
+
+    gen_set_label(l_halfdone);
+    tcg_gen_extract_tl(t3, t1, 8, 8);
+    tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest);
+    tcg_gen_extract_tl(t3, t0, 8, 8);
+    tcg_gen_deposit_tl(t2, t2, t3, 8, 8);
+
+    gen_set_label(l_quarterrest);
+    tcg_gen_extract_tl(t3, t1, 0, 8);
+    tcg_gen_brcondi_tl(cond, t3, 0, l_done);
+    tcg_gen_extract_tl(t3, t0, 0, 8);
+    tcg_gen_deposit_tl(t2, t2, t3, 0, 8);
+
+    gen_set_label(l_done);
+    gen_store_mxu_gpr(t2, XRa);
+}
+
+/*
+ *  D16MOVZ
+ *    Double 16-bit packed conditional move where
+ *    XRb contains conditions, XRc what to move and
+ *    XRa is the destination.
+ *    a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] }
+ *
+ *  D16MOVN
+ *    Double 16-bit packed conditional move where
+ *    XRb contains conditions, XRc what to move and
+ *    XRa is the destination.
+ *    a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] }
+ */
+static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond)
+{
+    uint32_t XRc, XRb, XRa;
+
+    XRa = extract32(ctx->opcode,  6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+    TCGLabel *l_halfdone = gen_new_label();
+    TCGLabel *l_done = gen_new_label();
+
+    gen_load_mxu_gpr(t0, XRc);
+    gen_load_mxu_gpr(t1, XRb);
+    gen_load_mxu_gpr(t2, XRa);
+
+    tcg_gen_extract_tl(t3, t1, 16, 16);
+    tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
+    tcg_gen_extract_tl(t3, t0, 16, 16);
+    tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
+
+    gen_set_label(l_halfdone);
+    tcg_gen_extract_tl(t3, t1, 0, 16);
+    tcg_gen_brcondi_tl(cond, t3, 0, l_done);
+    tcg_gen_extract_tl(t3, t0, 0, 16);
+    tcg_gen_deposit_tl(t2, t2, t3, 0, 16);
+
+    gen_set_label(l_done);
+    gen_store_mxu_gpr(t2, XRa);
+}
+
+/*
+ *  S32MOVZ
+ *    Quadruple 32-bit conditional move where
+ *    XRb contains conditions, XRc what to move and
+ *    XRa is the destination.
+ *    a.k.a. if (XRb == 0) { XRa = XRc }
+ *
+ *  S32MOVN
+ *    Single 32-bit conditional move where
+ *    XRb contains conditions, XRc what to move and
+ *    XRa is the destination.
+ *    a.k.a. if (XRb != 0) { XRa = XRc }
+ */
+static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond)
+{
+    uint32_t XRc, XRb, XRa;
+
+    XRa = extract32(ctx->opcode,  6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGLabel *l_done = gen_new_label();
+
+    gen_load_mxu_gpr(t0, XRc);
+    gen_load_mxu_gpr(t1, XRb);
+
+    tcg_gen_brcondi_tl(cond, t1, 0, l_done);
+    gen_store_mxu_gpr(t0, XRa);
+    gen_set_label(l_done);
+}
+
+/*
+ *      MXU instruction category: Addition and subtraction
+ *      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *              S32CPS      D16CPS
+ *                                       Q8ADD
+ */
+
+/*
+ *  S32CPS
+ *    Update XRa if XRc < 0 by value of 0 - XRb
+ *    else XRa = XRb
+ */
+static void gen_mxu_S32CPS(DisasContext *ctx)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely(XRb == 0)) {
+        /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRc == 0)) {
+        /* condition always false -> just move XRb to XRa */
+        tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGLabel *l_not_less = gen_new_label();
+        TCGLabel *l_done = gen_new_label();
+
+        tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less);
+        tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]);
+        tcg_gen_br(l_done);
+        gen_set_label(l_not_less);
+        gen_load_mxu_gpr(t0, XRb);
+        gen_set_label(l_done);
+        gen_store_mxu_gpr(t0, XRa);
+    }
+}
+
+/*
+ *  D16CPS
+ *    Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1]
+ *    else XRa[0..1] = XRb[0..1]
+ */
+static void gen_mxu_D16CPS(DisasContext *ctx)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely(XRb == 0)) {
+        /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else if (unlikely(XRc == 0)) {
+        /* condition always false -> just move XRb to XRa */
+        tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGLabel *l_done_hi = gen_new_label();
+        TCGLabel *l_not_less_lo = gen_new_label();
+        TCGLabel *l_done_lo = gen_new_label();
+
+        tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16);
+        tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16);
+        tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi);
+        tcg_gen_subfi_tl(t1, 0, t1);
+
+        gen_set_label(l_done_hi);
+        tcg_gen_shli_i32(t1, t1, 16);
+
+        tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1],  0, 16);
+        tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo);
+        tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1],  0, 16);
+        tcg_gen_subfi_tl(t0, 0, t0);
+        tcg_gen_br(l_done_lo);
+
+        gen_set_label(l_not_less_lo);
+        tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 16);
+
+        gen_set_label(l_done_lo);
+        tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16);
+    }
+}
+
+/*
+ *  Q8ABD XRa, XRb, XRc
+ *  Gets absolute difference for quadruple of 8-bit
+ *  packed in XRb to another one in XRc,
+ *  put the result in XRa.
+ *  a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]);
+ */
+static void gen_mxu_Q8ABD(DisasContext *ctx)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 3);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGv t4 = tcg_temp_new();
+
+        gen_load_mxu_gpr(t3, XRb);
+        gen_load_mxu_gpr(t4, XRc);
+        tcg_gen_movi_tl(t2, 0);
+
+        for (int i = 0; i < 4; i++) {
+            tcg_gen_extract_tl(t0, t3, 8 * i, 8);
+            tcg_gen_extract_tl(t1, t4, 8 * i, 8);
+
+            tcg_gen_sub_tl(t0, t0, t1);
+            tcg_gen_abs_tl(t0, t0);
+
+            tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+        }
+        gen_store_mxu_gpr(t2, XRa);
+    }
+}
+
+/*
+ *  Q8ADD XRa, XRb, XRc, ptn2
+ *  Add/subtract quadruple of 8-bit packed in XRb
+ *  to another one in XRc, put the result in XRa.
+ */
+static void gen_mxu_Q8ADD(DisasContext *ctx)
+{
+    uint32_t aptn2, pad, XRc, XRb, XRa;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    pad   = extract32(ctx->opcode, 21, 3);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGv t4 = tcg_temp_new();
+
+        gen_load_mxu_gpr(t3, XRb);
+        gen_load_mxu_gpr(t4, XRc);
+
+        for (int i = 0; i < 4; i++) {
+            tcg_gen_andi_tl(t0, t3, 0xff);
+            tcg_gen_andi_tl(t1, t4, 0xff);
+
+            if (i < 2) {
+                if (aptn2 & 0x01) {
+                    tcg_gen_sub_tl(t0, t0, t1);
+                } else {
+                    tcg_gen_add_tl(t0, t0, t1);
+                }
+            } else {
+                if (aptn2 & 0x02) {
+                    tcg_gen_sub_tl(t0, t0, t1);
+                } else {
+                    tcg_gen_add_tl(t0, t0, t1);
+                }
+            }
+            if (i < 3) {
+                tcg_gen_shri_tl(t3, t3, 8);
+                tcg_gen_shri_tl(t4, t4, 8);
+            }
+            if (i > 0) {
+                tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+            } else {
+                tcg_gen_andi_tl(t0, t0, 0xff);
+                tcg_gen_mov_tl(t2, t0);
+            }
+        }
+        gen_store_mxu_gpr(t2, XRa);
+    }
+}
+
+/*
+ *  Q8ADDE XRa, XRb, XRc, XRd, aptn2
+ *    Add/subtract quadruple of 8-bit packed in XRb
+ *    to another one in XRc, with zero extending
+ *    to 16-bit and put results as packed 16-bit data
+ *    into XRa and XRd.
+ *    aptn2 manages action add or subract of pairs of data.
+ *
+ *  Q8ACCE XRa, XRb, XRc, XRd, aptn2
+ *    Add/subtract quadruple of 8-bit packed in XRb
+ *    to another one in XRc, with zero extending
+ *    to 16-bit and accumulate results as packed 16-bit data
+ *    into XRa and XRd.
+ *    aptn2 manages action add or subract of pairs of data.
+ */
+static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
+{
+    uint32_t aptn2, XRd, XRc, XRb, XRa;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    XRd   = extract32(ctx->opcode, 18, 4);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        if (XRa != 0) {
+            tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+        }
+        if (XRd != 0) {
+            tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0);
+        }
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGv t4 = tcg_temp_new();
+        TCGv t5 = tcg_temp_new();
+
+        if (XRa != 0) {
+            tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 16, 8);
+            tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 16, 8);
+            tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 24, 8);
+            tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
+            if (aptn2 & 2) {
+                tcg_gen_sub_tl(t0, t0, t1);
+                tcg_gen_sub_tl(t2, t2, t3);
+            } else {
+                tcg_gen_add_tl(t0, t0, t1);
+                tcg_gen_add_tl(t2, t2, t3);
+            }
+            if (accumulate) {
+                gen_load_mxu_gpr(t5, XRa);
+                tcg_gen_extract_tl(t1, t5,  0, 16);
+                tcg_gen_extract_tl(t3, t5, 16, 16);
+                tcg_gen_add_tl(t0, t0, t1);
+                tcg_gen_add_tl(t2, t2, t3);
+            }
+            tcg_gen_shli_tl(t2, t2, 16);
+            tcg_gen_extract_tl(t0, t0, 0, 16);
+            tcg_gen_or_tl(t4, t2, t0);
+        }
+        if (XRd != 0) {
+            tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8);
+            tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 0, 8);
+            tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 8, 8);
+            tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 8, 8);
+            if (aptn2 & 1) {
+                tcg_gen_sub_tl(t0, t0, t1);
+                tcg_gen_sub_tl(t2, t2, t3);
+            } else {
+                tcg_gen_add_tl(t0, t0, t1);
+                tcg_gen_add_tl(t2, t2, t3);
+            }
+            if (accumulate) {
+                gen_load_mxu_gpr(t5, XRd);
+                tcg_gen_extract_tl(t1, t5,  0, 16);
+                tcg_gen_extract_tl(t3, t5, 16, 16);
+                tcg_gen_add_tl(t0, t0, t1);
+                tcg_gen_add_tl(t2, t2, t3);
+            }
+            tcg_gen_shli_tl(t2, t2, 16);
+            tcg_gen_extract_tl(t0, t0, 0, 16);
+            tcg_gen_or_tl(t5, t2, t0);
+        }
+
+        gen_store_mxu_gpr(t4, XRa);
+        gen_store_mxu_gpr(t5, XRd);
+    }
+}
+
+/*
+ *  D8SUM XRa, XRb, XRc
+ *    Double parallel add of quadruple unsigned 8-bit together
+ *    with zero extending to 16-bit data.
+ *  D8SUMC XRa, XRb, XRc
+ *    Double parallel add of quadruple unsigned 8-bit together
+ *    with zero extending to 16-bit data and adding 2 to each
+ *    parallel result.
+ */
+static void gen_mxu_d8sum(DisasContext *ctx, bool sumc)
+{
+    uint32_t pad, pad2, XRc, XRb, XRa;
+
+    pad  = extract32(ctx->opcode, 24, 2);
+    pad2 = extract32(ctx->opcode, 18, 4);
+    XRc  = extract32(ctx->opcode, 14, 4);
+    XRb  = extract32(ctx->opcode, 10, 4);
+    XRa  = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0 || pad2 != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGv t4 = tcg_temp_new();
+        TCGv t5 = tcg_temp_new();
+
+        if (XRb != 0) {
+            tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 8);
+            tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1],  8, 8);
+            tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8);
+            tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8);
+            tcg_gen_add_tl(t4, t0, t1);
+            tcg_gen_add_tl(t4, t4, t2);
+            tcg_gen_add_tl(t4, t4, t3);
+        } else {
+            tcg_gen_mov_tl(t4, 0);
+        }
+        if (XRc != 0) {
+            tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1],  0, 8);
+            tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1],  8, 8);
+            tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8);
+            tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
+            tcg_gen_add_tl(t5, t0, t1);
+            tcg_gen_add_tl(t5, t5, t2);
+            tcg_gen_add_tl(t5, t5, t3);
+        } else {
+            tcg_gen_mov_tl(t5, 0);
+        }
+
+        if (sumc) {
+            tcg_gen_addi_tl(t4, t4, 2);
+            tcg_gen_addi_tl(t5, t5, 2);
+        }
+        tcg_gen_shli_tl(t4, t4, 16);
+
+        tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
+    }
+}
+
+/*
+ * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
+ * 16-bit pattern addition.
+ */
+static void gen_mxu_q16add(DisasContext *ctx)
+{
+    uint32_t aptn2, optn2, XRc, XRb, XRa, XRd;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    optn2 = extract32(ctx->opcode, 22, 2);
+    XRd   = extract32(ctx->opcode, 18, 4);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+    TCGv t4 = tcg_temp_new();
+    TCGv t5 = tcg_temp_new();
+
+    gen_load_mxu_gpr(t1, XRb);
+    tcg_gen_extract_tl(t0, t1,  0, 16);
+    tcg_gen_extract_tl(t1, t1, 16, 16);
+
+    gen_load_mxu_gpr(t3, XRc);
+    tcg_gen_extract_tl(t2, t3,  0, 16);
+    tcg_gen_extract_tl(t3, t3, 16, 16);
+
+    switch (optn2) {
+    case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */
+        tcg_gen_mov_tl(t4, t1);
+        tcg_gen_mov_tl(t5, t0);
+        break;
+    case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */
+        tcg_gen_mov_tl(t4, t0);
+        tcg_gen_mov_tl(t5, t0);
+        break;
+    case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */
+        tcg_gen_mov_tl(t4, t1);
+        tcg_gen_mov_tl(t5, t1);
+        break;
+    case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */
+        tcg_gen_mov_tl(t4, t0);
+        tcg_gen_mov_tl(t5, t1);
+        break;
+    }
+
+    switch (aptn2) {
+    case MXU_APTN2_AA: /* lop +, rop + */
+        tcg_gen_add_tl(t0, t4, t3);
+        tcg_gen_add_tl(t1, t5, t2);
+        tcg_gen_add_tl(t4, t4, t3);
+        tcg_gen_add_tl(t5, t5, t2);
+        break;
+    case MXU_APTN2_AS: /* lop +, rop + */
+        tcg_gen_sub_tl(t0, t4, t3);
+        tcg_gen_sub_tl(t1, t5, t2);
+        tcg_gen_add_tl(t4, t4, t3);
+        tcg_gen_add_tl(t5, t5, t2);
+        break;
+    case MXU_APTN2_SA: /* lop +, rop + */
+        tcg_gen_add_tl(t0, t4, t3);
+        tcg_gen_add_tl(t1, t5, t2);
+        tcg_gen_sub_tl(t4, t4, t3);
+        tcg_gen_sub_tl(t5, t5, t2);
+        break;
+    case MXU_APTN2_SS: /* lop +, rop + */
+        tcg_gen_sub_tl(t0, t4, t3);
+        tcg_gen_sub_tl(t1, t5, t2);
+        tcg_gen_sub_tl(t4, t4, t3);
+        tcg_gen_sub_tl(t5, t5, t2);
+        break;
+    }
+
+    tcg_gen_shli_tl(t0, t0, 16);
+    tcg_gen_extract_tl(t1, t1, 0, 16);
+    tcg_gen_shli_tl(t4, t4, 16);
+    tcg_gen_extract_tl(t5, t5, 0, 16);
+
+    tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
+    tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1);
+}
+
+/*
+ * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed
+ * 16-bit addition/subtraction with accumulate.
+ */
+static void gen_mxu_q16acc(DisasContext *ctx)
+{
+    uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    XRd   = extract32(ctx->opcode, 18, 4);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+    TCGv s3 = tcg_temp_new();
+    TCGv s2 = tcg_temp_new();
+    TCGv s1 = tcg_temp_new();
+    TCGv s0 = tcg_temp_new();
+
+    gen_load_mxu_gpr(t1, XRb);
+    tcg_gen_extract_tl(t0, t1,  0, 16);
+    tcg_gen_extract_tl(t1, t1, 16, 16);
+
+    gen_load_mxu_gpr(t3, XRc);
+    tcg_gen_extract_tl(t2, t3,  0, 16);
+    tcg_gen_extract_tl(t3, t3, 16, 16);
+
+    switch (aptn2) {
+    case MXU_APTN2_AA: /* lop +, rop + */
+        tcg_gen_add_tl(s3, t1, t3);
+        tcg_gen_add_tl(s2, t0, t2);
+        tcg_gen_add_tl(s1, t1, t3);
+        tcg_gen_add_tl(s0, t0, t2);
+        break;
+    case MXU_APTN2_AS: /* lop +, rop - */
+        tcg_gen_sub_tl(s3, t1, t3);
+        tcg_gen_sub_tl(s2, t0, t2);
+        tcg_gen_add_tl(s1, t1, t3);
+        tcg_gen_add_tl(s0, t0, t2);
+        break;
+    case MXU_APTN2_SA: /* lop -, rop + */
+        tcg_gen_add_tl(s3, t1, t3);
+        tcg_gen_add_tl(s2, t0, t2);
+        tcg_gen_sub_tl(s1, t1, t3);
+        tcg_gen_sub_tl(s0, t0, t2);
+        break;
+    case MXU_APTN2_SS: /* lop -, rop - */
+        tcg_gen_sub_tl(s3, t1, t3);
+        tcg_gen_sub_tl(s2, t0, t2);
+        tcg_gen_sub_tl(s1, t1, t3);
+        tcg_gen_sub_tl(s0, t0, t2);
+        break;
+    }
+
+    if (XRa != 0) {
+        tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0);
+        tcg_gen_extract_tl(t0, t0, 0, 16);
+        tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16);
+        tcg_gen_add_tl(t1, t1, s1);
+        tcg_gen_shli_tl(t1, t1, 16);
+        tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0);
+    }
+
+    if (XRd != 0) {
+        tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2);
+        tcg_gen_extract_tl(t0, t0, 0, 16);
+        tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16);
+        tcg_gen_add_tl(t1, t1, s3);
+        tcg_gen_shli_tl(t1, t1, 16);
+        tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0);
+    }
+}
+
+/*
+ * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed
+ * 16-bit accumulate.
+ */
+static void gen_mxu_q16accm(DisasContext *ctx)
+{
+    uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    XRd   = extract32(ctx->opcode, 18, 4);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+
+    gen_load_mxu_gpr(t2, XRb);
+    gen_load_mxu_gpr(t3, XRc);
+
+    if (XRa != 0) {
+        TCGv a0 = tcg_temp_new();
+        TCGv a1 = tcg_temp_new();
+
+        tcg_gen_extract_tl(t0, t2,  0, 16);
+        tcg_gen_extract_tl(t1, t2, 16, 16);
+
+        gen_load_mxu_gpr(a1, XRa);
+        tcg_gen_extract_tl(a0, a1,  0, 16);
+        tcg_gen_extract_tl(a1, a1, 16, 16);
+
+        if (aptn2 & 2) {
+            tcg_gen_sub_tl(a0, a0, t0);
+            tcg_gen_sub_tl(a1, a1, t1);
+        } else {
+            tcg_gen_add_tl(a0, a0, t0);
+            tcg_gen_add_tl(a1, a1, t1);
+        }
+        tcg_gen_extract_tl(a0, a0, 0, 16);
+        tcg_gen_shli_tl(a1, a1, 16);
+        tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0);
+    }
+
+    if (XRd != 0) {
+        TCGv a0 = tcg_temp_new();
+        TCGv a1 = tcg_temp_new();
+
+        tcg_gen_extract_tl(t0, t3,  0, 16);
+        tcg_gen_extract_tl(t1, t3, 16, 16);
+
+        gen_load_mxu_gpr(a1, XRd);
+        tcg_gen_extract_tl(a0, a1,  0, 16);
+        tcg_gen_extract_tl(a1, a1, 16, 16);
+
+        if (aptn2 & 1) {
+            tcg_gen_sub_tl(a0, a0, t0);
+            tcg_gen_sub_tl(a1, a1, t1);
+        } else {
+            tcg_gen_add_tl(a0, a0, t0);
+            tcg_gen_add_tl(a1, a1, t1);
+        }
+        tcg_gen_extract_tl(a0, a0, 0, 16);
+        tcg_gen_shli_tl(a1, a1, 16);
+        tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0);
+    }
+}
+
+
+/*
+ * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed
+ * 16-bit sign extended addition and accumulate.
+ */
+static void gen_mxu_d16asum(DisasContext *ctx)
+{
+    uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    XRd   = extract32(ctx->opcode, 18, 4);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+
+    gen_load_mxu_gpr(t2, XRb);
+    gen_load_mxu_gpr(t3, XRc);
+
+    if (XRa != 0) {
+        tcg_gen_sextract_tl(t0, t2,  0, 16);
+        tcg_gen_sextract_tl(t1, t2, 16, 16);
+        tcg_gen_add_tl(t0, t0, t1);
+        if (aptn2 & 2) {
+            tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+        } else {
+            tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+        }
+    }
+
+    if (XRd != 0) {
+        tcg_gen_sextract_tl(t0, t3,  0, 16);
+        tcg_gen_sextract_tl(t1, t3, 16, 16);
+        tcg_gen_add_tl(t0, t0, t1);
+        if (aptn2 & 1) {
+            tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
+        } else {
+            tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
+        }
+    }
+}
+
+/*
+ * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction, set carry.
+ *
+ * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction with carry.
+ */
+static void gen_mxu_d32add(DisasContext *ctx)
+{
+    uint32_t aptn2, addc, XRc, XRb, XRa, XRd;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    addc  = extract32(ctx->opcode, 22, 2);
+    XRd   = extract32(ctx->opcode, 18, 4);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv cr = tcg_temp_new();
+
+    if (unlikely(addc > 1)) {
+        /* opcode incorrect -> do nothing */
+    } else if (addc == 1) {
+        if (unlikely(XRa == 0 && XRd == 0)) {
+            /* destinations are zero register -> do nothing */
+        } else {
+            /* FIXME ??? What if XRa == XRd ??? */
+            /* aptn2 is unused here */
+            gen_load_mxu_gpr(t0, XRb);
+            gen_load_mxu_gpr(t1, XRc);
+            gen_load_mxu_cr(cr);
+            if (XRa != 0) {
+                tcg_gen_extract_tl(t2, cr, 31, 1);
+                tcg_gen_add_tl(t0, t0, t2);
+                tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+            }
+            if (XRd != 0) {
+                tcg_gen_extract_tl(t2, cr, 30, 1);
+                tcg_gen_add_tl(t1, t1, t2);
+                tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
+            }
+        }
+    } else if (unlikely(XRa == 0 && XRd == 0)) {
+        /* destinations are zero register -> do nothing */
+    } else {
+        /* common case */
+        /* FIXME ??? What if XRa == XRd ??? */
+        TCGv carry = tcg_temp_new();
+
+        gen_load_mxu_gpr(t0, XRb);
+        gen_load_mxu_gpr(t1, XRc);
+        gen_load_mxu_cr(cr);
+        if (XRa != 0) {
+            if (aptn2 & 2) {
+                tcg_gen_sub_i32(t2, t0, t1);
+                tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
+            } else {
+                tcg_gen_add_i32(t2, t0, t1);
+                tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
+            }
+            tcg_gen_andi_tl(cr, cr, 0x7fffffff);
+            tcg_gen_shli_tl(carry, carry, 31);
+            tcg_gen_or_tl(cr, cr, carry);
+            gen_store_mxu_gpr(t2, XRa);
+        }
+        if (XRd != 0) {
+            if (aptn2 & 1) {
+                tcg_gen_sub_i32(t2, t0, t1);
+                tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
+            } else {
+                tcg_gen_add_i32(t2, t0, t1);
+                tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
+            }
+            tcg_gen_andi_tl(cr, cr, 0xbfffffff);
+            tcg_gen_shli_tl(carry, carry, 30);
+            tcg_gen_or_tl(cr, cr, carry);
+            gen_store_mxu_gpr(t2, XRd);
+        }
+        gen_store_mxu_cr(cr);
+    }
+}
+
+/*
+ * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction and accumulate.
+ */
+static void gen_mxu_d32acc(DisasContext *ctx)
+{
+    uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    XRd   = extract32(ctx->opcode, 18, 4);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    if (unlikely(XRa == 0 && XRd == 0)) {
+        /* destinations are zero register -> do nothing */
+    } else {
+        /* common case */
+        gen_load_mxu_gpr(t0, XRb);
+        gen_load_mxu_gpr(t1, XRc);
+        if (XRa != 0) {
+            if (aptn2 & 2) {
+                tcg_gen_sub_tl(t2, t0, t1);
+            } else {
+                tcg_gen_add_tl(t2, t0, t1);
+            }
+            tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
+        }
+        if (XRd != 0) {
+            if (aptn2 & 1) {
+                tcg_gen_sub_tl(t2, t0, t1);
+            } else {
+                tcg_gen_add_tl(t2, t0, t1);
+            }
+            tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
+        }
+    }
+}
+
+/*
+ * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction and accumulate.
+ */
+static void gen_mxu_d32accm(DisasContext *ctx)
+{
+    uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    XRd   = extract32(ctx->opcode, 18, 4);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    if (unlikely(XRa == 0 && XRd == 0)) {
+        /* destinations are zero register -> do nothing */
+    } else {
+        /* common case */
+        gen_load_mxu_gpr(t0, XRb);
+        gen_load_mxu_gpr(t1, XRc);
+        if (XRa != 0) {
+            tcg_gen_add_tl(t2, t0, t1);
+            if (aptn2 & 2) {
+                tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
+            } else {
+                tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
+            }
+        }
+        if (XRd != 0) {
+            tcg_gen_sub_tl(t2, t0, t1);
+            if (aptn2 & 1) {
+                tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
+            } else {
+                tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
+            }
+        }
+    }
+}
+
+/*
+ * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction.
+ */
+static void gen_mxu_d32asum(DisasContext *ctx)
+{
+    uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    XRd   = extract32(ctx->opcode, 18, 4);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    if (unlikely(XRa == 0 && XRd == 0)) {
+        /* destinations are zero register -> do nothing */
+    } else {
+        /* common case */
+        gen_load_mxu_gpr(t0, XRb);
+        gen_load_mxu_gpr(t1, XRc);
+        if (XRa != 0) {
+            if (aptn2 & 2) {
+                tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+            } else {
+                tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+            }
+        }
+        if (XRd != 0) {
+            if (aptn2 & 1) {
+                tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
+            } else {
+                tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
+            }
+        }
+    }
+}
+
+/*
+ *                 MXU instruction category: Miscellaneous
+ *                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *               S32EXTR      S32LUI
+ *               S32EXTRV
+ *                            Q16SAT
+ *                            Q16SCOP
+ */
+
+/*
+ *  S32EXTR XRa, XRd, rs, bits5
+ *    Extract bits5 bits from 64-bit pair {XRa:XRd}
+ *    starting from rs[4:0] offset and put to the XRa.
+ */
+static void gen_mxu_s32extr(DisasContext *ctx)
+{
+    TCGv t0, t1, t2, t3;
+    uint32_t XRa, XRd, rs, bits5;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+
+    XRa   = extract32(ctx->opcode,  6, 4);
+    XRd   = extract32(ctx->opcode, 10, 4);
+    bits5 = extract32(ctx->opcode, 16, 5);
+    rs    = extract32(ctx->opcode, 21, 5);
+
+    /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
+    /* {XRa} = extract({tmp}, 0, bits5); */
+    if (bits5 > 0) {
+        TCGLabel *l_xra_only = gen_new_label();
+        TCGLabel *l_done = gen_new_label();
+
+        gen_load_mxu_gpr(t0, XRd);
+        gen_load_mxu_gpr(t1, XRa);
+        gen_load_gpr(t2, rs);
+        tcg_gen_andi_tl(t2, t2, 0x1f);
+        tcg_gen_subfi_tl(t2, 32, t2);
+        tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only);
+        tcg_gen_subfi_tl(t2, bits5, t2);
+        tcg_gen_subfi_tl(t3, 32, t2);
+        tcg_gen_shr_tl(t0, t0, t3);
+        tcg_gen_shl_tl(t1, t1, t2);
+        tcg_gen_or_tl(t0, t0, t1);
+        tcg_gen_br(l_done);
+        gen_set_label(l_xra_only);
+        tcg_gen_subi_tl(t2, t2, bits5);
+        tcg_gen_shr_tl(t0, t1, t2);
+        gen_set_label(l_done);
+        tcg_gen_extract_tl(t0, t0, 0, bits5);
+    } else {
+        /* unspecified behavior but matches tests on real hardware*/
+        tcg_gen_movi_tl(t0, 0);
+    }
+    gen_store_mxu_gpr(t0, XRa);
+}
+
+/*
+ *  S32EXTRV XRa, XRd, rs, rt
+ *    Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
+ *    starting from rs[4:0] offset and put to the XRa.
+ */
+static void gen_mxu_s32extrv(DisasContext *ctx)
+{
+    TCGv t0, t1, t2, t3, t4;
+    uint32_t XRa, XRd, rs, rt;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+    t4 = tcg_temp_new();
+    TCGLabel *l_xra_only = gen_new_label();
+    TCGLabel *l_done = gen_new_label();
+    TCGLabel *l_zero = gen_new_label();
+    TCGLabel *l_extract = gen_new_label();
+
+    XRa = extract32(ctx->opcode,  6, 4);
+    XRd = extract32(ctx->opcode, 10, 4);
+    rt  = extract32(ctx->opcode, 16, 5);
+    rs  = extract32(ctx->opcode, 21, 5);
+
+    /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
+    gen_load_mxu_gpr(t0, XRd);
+    gen_load_mxu_gpr(t1, XRa);
+    gen_load_gpr(t2, rs);
+    gen_load_gpr(t4, rt);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero);
+    tcg_gen_andi_tl(t2, t2, 0x1f);
+    tcg_gen_subfi_tl(t2, 32, t2);
+    tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only);
+    tcg_gen_sub_tl(t2, t4, t2);
+    tcg_gen_subfi_tl(t3, 32, t2);
+    tcg_gen_shr_tl(t0, t0, t3);
+    tcg_gen_shl_tl(t1, t1, t2);
+    tcg_gen_or_tl(t0, t0, t1);
+    tcg_gen_br(l_extract);
+
+    gen_set_label(l_xra_only);
+    tcg_gen_sub_tl(t2, t2, t4);
+    tcg_gen_shr_tl(t0, t1, t2);
+    tcg_gen_br(l_extract);
+
+    /* unspecified behavior but matches tests on real hardware*/
+    gen_set_label(l_zero);
+    tcg_gen_movi_tl(t0, 0);
+    tcg_gen_br(l_done);
+
+    /* {XRa} = extract({tmp}, 0, rt) */
+    gen_set_label(l_extract);
+    tcg_gen_subfi_tl(t4, 32, t4);
+    tcg_gen_shl_tl(t0, t0, t4);
+    tcg_gen_shr_tl(t0, t0, t4);
+
+    gen_set_label(l_done);
+    gen_store_mxu_gpr(t0, XRa);
+}
+
+/*
+ *  S32LUI XRa, S8, optn3
+ *    Permutate the immediate S8 value to form a word
+ *    to update XRa.
+ */
+static void gen_mxu_s32lui(DisasContext *ctx)
+{
+    uint32_t XRa, s8, optn3, pad;
+
+    XRa   = extract32(ctx->opcode,  6, 4);
+    s8    = extract32(ctx->opcode, 10, 8);
+    pad   = extract32(ctx->opcode, 21, 2);
+    optn3 = extract32(ctx->opcode, 23, 3);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else {
+        uint32_t s16;
+        TCGv t0 = tcg_temp_new();
+
+        switch (optn3) {
+        case 0:
+            tcg_gen_movi_tl(t0, s8);
+            break;
+        case 1:
+            tcg_gen_movi_tl(t0, s8 << 8);
+            break;
+        case 2:
+            tcg_gen_movi_tl(t0, s8 << 16);
+            break;
+        case 3:
+            tcg_gen_movi_tl(t0, s8 << 24);
+            break;
+        case 4:
+            tcg_gen_movi_tl(t0, (s8 << 16) | s8);
+            break;
+        case 5:
+            tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8));
+            break;
+        case 6:
+            s16 = (uint16_t)(int16_t)(int8_t)s8;
+            tcg_gen_movi_tl(t0, (s16 << 16) | s16);
+            break;
+        case 7:
+            tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8);
+            break;
+        }
+        gen_store_mxu_gpr(t0, XRa);
+    }
+}
+
+/*
+ *  Q16SAT XRa, XRb, XRc
+ *  Packs four 16-bit signed integers in XRb and XRc to
+ *  four saturated unsigned 8-bit into XRa.
+ *
+ */
+static void gen_mxu_Q16SAT(DisasContext *ctx)
+{
+    uint32_t pad, XRc, XRb, XRa;
+
+    pad = extract32(ctx->opcode, 21, 3);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+
+        tcg_gen_movi_tl(t2, 0);
+        if (XRb != 0) {
+            TCGLabel *l_less_hi = gen_new_label();
+            TCGLabel *l_less_lo = gen_new_label();
+            TCGLabel *l_lo = gen_new_label();
+            TCGLabel *l_greater_hi = gen_new_label();
+            TCGLabel *l_greater_lo = gen_new_label();
+            TCGLabel *l_done = gen_new_label();
+
+            tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16);
+            tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
+            tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
+            tcg_gen_br(l_lo);
+            gen_set_label(l_less_hi);
+            tcg_gen_movi_tl(t0, 0);
+            tcg_gen_br(l_lo);
+            gen_set_label(l_greater_hi);
+            tcg_gen_movi_tl(t0, 255);
+
+            gen_set_label(l_lo);
+            tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16);
+            tcg_gen_sari_tl(t1, t1, 16);
+            tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
+            tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
+            tcg_gen_br(l_done);
+            gen_set_label(l_less_lo);
+            tcg_gen_movi_tl(t1, 0);
+            tcg_gen_br(l_done);
+            gen_set_label(l_greater_lo);
+            tcg_gen_movi_tl(t1, 255);
+
+            gen_set_label(l_done);
+            tcg_gen_shli_tl(t2, t0, 24);
+            tcg_gen_shli_tl(t1, t1, 16);
+            tcg_gen_or_tl(t2, t2, t1);
+        }
+
+        if (XRc != 0) {
+            TCGLabel *l_less_hi = gen_new_label();
+            TCGLabel *l_less_lo = gen_new_label();
+            TCGLabel *l_lo = gen_new_label();
+            TCGLabel *l_greater_hi = gen_new_label();
+            TCGLabel *l_greater_lo = gen_new_label();
+            TCGLabel *l_done = gen_new_label();
+
+            tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16);
+            tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
+            tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
+            tcg_gen_br(l_lo);
+            gen_set_label(l_less_hi);
+            tcg_gen_movi_tl(t0, 0);
+            tcg_gen_br(l_lo);
+            gen_set_label(l_greater_hi);
+            tcg_gen_movi_tl(t0, 255);
+
+            gen_set_label(l_lo);
+            tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16);
+            tcg_gen_sari_tl(t1, t1, 16);
+            tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
+            tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
+            tcg_gen_br(l_done);
+            gen_set_label(l_less_lo);
+            tcg_gen_movi_tl(t1, 0);
+            tcg_gen_br(l_done);
+            gen_set_label(l_greater_lo);
+            tcg_gen_movi_tl(t1, 255);
+
+            gen_set_label(l_done);
+            tcg_gen_shli_tl(t0, t0, 8);
+            tcg_gen_or_tl(t2, t2, t0);
+            tcg_gen_or_tl(t2, t2, t1);
+        }
+        gen_store_mxu_gpr(t2, XRa);
+    }
+}
+
+/*
+ *  Q16SCOP XRa, XRd, XRb, XRc
+ *    Determine sign of quad packed 16-bit signed values
+ *    in XRb and XRc put result in XRa and XRd respectively.
+ */
+static void gen_mxu_q16scop(DisasContext *ctx)
+{
+    uint32_t XRd, XRc, XRb, XRa;
+
+    XRd  = extract32(ctx->opcode, 18, 4);
+    XRc  = extract32(ctx->opcode, 14, 4);
+    XRb  = extract32(ctx->opcode, 10, 4);
+    XRa  = extract32(ctx->opcode,  6, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+    TCGv t4 = tcg_temp_new();
+
+    TCGLabel *l_b_hi_lt = gen_new_label();
+    TCGLabel *l_b_hi_gt = gen_new_label();
+    TCGLabel *l_b_lo = gen_new_label();
+    TCGLabel *l_b_lo_lt = gen_new_label();
+    TCGLabel *l_c_hi = gen_new_label();
+    TCGLabel *l_c_hi_lt = gen_new_label();
+    TCGLabel *l_c_hi_gt = gen_new_label();
+    TCGLabel *l_c_lo = gen_new_label();
+    TCGLabel *l_c_lo_lt = gen_new_label();
+    TCGLabel *l_done = gen_new_label();
+
+    gen_load_mxu_gpr(t0, XRb);
+    gen_load_mxu_gpr(t1, XRc);
+
+    tcg_gen_sextract_tl(t2, t0, 16, 16);
+    tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt);
+    tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt);
+    tcg_gen_movi_tl(t3, 0);
+    tcg_gen_br(l_b_lo);
+    gen_set_label(l_b_hi_lt);
+    tcg_gen_movi_tl(t3, 0xffff0000);
+    tcg_gen_br(l_b_lo);
+    gen_set_label(l_b_hi_gt);
+    tcg_gen_movi_tl(t3, 0x00010000);
+
+    gen_set_label(l_b_lo);
+    tcg_gen_sextract_tl(t2, t0, 0, 16);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi);
+    tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt);
+    tcg_gen_ori_tl(t3, t3, 0x00000001);
+    tcg_gen_br(l_c_hi);
+    gen_set_label(l_b_lo_lt);
+    tcg_gen_ori_tl(t3, t3, 0x0000ffff);
+    tcg_gen_br(l_c_hi);
+
+    gen_set_label(l_c_hi);
+    tcg_gen_sextract_tl(t2, t1, 16, 16);
+    tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt);
+    tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt);
+    tcg_gen_movi_tl(t4, 0);
+    tcg_gen_br(l_c_lo);
+    gen_set_label(l_c_hi_lt);
+    tcg_gen_movi_tl(t4, 0xffff0000);
+    tcg_gen_br(l_c_lo);
+    gen_set_label(l_c_hi_gt);
+    tcg_gen_movi_tl(t4, 0x00010000);
+
+    gen_set_label(l_c_lo);
+    tcg_gen_sextract_tl(t2, t1, 0, 16);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done);
+    tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt);
+    tcg_gen_ori_tl(t4, t4, 0x00000001);
+    tcg_gen_br(l_done);
+    gen_set_label(l_c_lo_lt);
+    tcg_gen_ori_tl(t4, t4, 0x0000ffff);
+
+    gen_set_label(l_done);
+    gen_store_mxu_gpr(t3, XRa);
+    gen_store_mxu_gpr(t4, XRd);
+}
+
+/*
+ *  S32SFL XRa, XRd, XRb, XRc
+ *    Shuffle bytes according to one of four patterns.
+ */
+static void gen_mxu_s32sfl(DisasContext *ctx)
+{
+    uint32_t XRd, XRc, XRb, XRa, ptn2;
+
+    XRd  = extract32(ctx->opcode, 18, 4);
+    XRc  = extract32(ctx->opcode, 14, 4);
+    XRb  = extract32(ctx->opcode, 10, 4);
+    XRa  = extract32(ctx->opcode,  6, 4);
+    ptn2 = extract32(ctx->opcode, 24, 2);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+
+    gen_load_mxu_gpr(t0, XRb);
+    gen_load_mxu_gpr(t1, XRc);
+
+    switch (ptn2) {
+    case 0:
+        tcg_gen_andi_tl(t2, t0, 0xff000000);
+        tcg_gen_andi_tl(t3, t1, 0x000000ff);
+        tcg_gen_deposit_tl(t3, t3, t0,  8, 8);
+        tcg_gen_shri_tl(t0, t0,  8);
+        tcg_gen_shri_tl(t1, t1,  8);
+        tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
+        tcg_gen_deposit_tl(t3, t3, t1, 16, 8);
+        tcg_gen_shri_tl(t0, t0,  8);
+        tcg_gen_shri_tl(t1, t1,  8);
+        tcg_gen_deposit_tl(t2, t2, t0,  8, 8);
+        tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
+        tcg_gen_shri_tl(t1, t1,  8);
+        tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
+        break;
+    case 1:
+        tcg_gen_andi_tl(t2, t0, 0xff000000);
+        tcg_gen_andi_tl(t3, t1, 0x000000ff);
+        tcg_gen_deposit_tl(t3, t3, t0, 16, 8);
+        tcg_gen_shri_tl(t0, t0,  8);
+        tcg_gen_shri_tl(t1, t1,  8);
+        tcg_gen_deposit_tl(t2, t2, t0, 16, 8);
+        tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
+        tcg_gen_shri_tl(t0, t0,  8);
+        tcg_gen_shri_tl(t1, t1,  8);
+        tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
+        tcg_gen_deposit_tl(t3, t3, t1,  8, 8);
+        tcg_gen_shri_tl(t1, t1,  8);
+        tcg_gen_deposit_tl(t2, t2, t1,  8, 8);
+        break;
+    case 2:
+        tcg_gen_andi_tl(t2, t0, 0xff00ff00);
+        tcg_gen_andi_tl(t3, t1, 0x00ff00ff);
+        tcg_gen_deposit_tl(t3, t3, t0,  8, 8);
+        tcg_gen_shri_tl(t0, t0, 16);
+        tcg_gen_shri_tl(t1, t1,  8);
+        tcg_gen_deposit_tl(t2, t2, t1,  0, 8);
+        tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
+        tcg_gen_shri_tl(t1, t1, 16);
+        tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
+        break;
+    case 3:
+        tcg_gen_andi_tl(t2, t0, 0xffff0000);
+        tcg_gen_andi_tl(t3, t1, 0x0000ffff);
+        tcg_gen_shri_tl(t1, t1, 16);
+        tcg_gen_deposit_tl(t2, t2, t1,  0, 16);
+        tcg_gen_deposit_tl(t3, t3, t0, 16, 16);
+        break;
+    }
+
+    gen_store_mxu_gpr(t2, XRa);
+    gen_store_mxu_gpr(t3, XRd);
+}
+
+/*
+ *  Q8SAD XRa, XRd, XRb, XRc
+ *    Typical SAD opration for motion estimation.
+ */
+static void gen_mxu_q8sad(DisasContext *ctx)
+{
+    uint32_t XRd, XRc, XRb, XRa;
+
+    XRd = extract32(ctx->opcode, 18, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+    TCGv t4 = tcg_temp_new();
+    TCGv t5 = tcg_temp_new();
+
+    gen_load_mxu_gpr(t2, XRb);
+    gen_load_mxu_gpr(t3, XRc);
+    gen_load_mxu_gpr(t5, XRd);
+    tcg_gen_movi_tl(t4, 0);
+
+    for (int i = 0; i < 4; i++) {
+        tcg_gen_andi_tl(t0, t2, 0xff);
+        tcg_gen_andi_tl(t1, t3, 0xff);
+        tcg_gen_sub_tl(t0, t0, t1);
+        tcg_gen_abs_tl(t0, t0);
+        tcg_gen_add_tl(t4, t4, t0);
+        if (i < 3) {
+            tcg_gen_shri_tl(t2, t2, 8);
+            tcg_gen_shri_tl(t3, t3, 8);
+        }
+    }
+    tcg_gen_add_tl(t5, t5, t4);
+    gen_store_mxu_gpr(t4, XRa);
+    gen_store_mxu_gpr(t5, XRd);
+}
 
 /*
  *                 MXU instruction category: align
@@ -1408,6 +4258,129 @@ static void gen_mxu_S32ALNI(DisasContext *ctx)
     }
 }
 
+/*
+ *  S32ALN XRc, XRb, XRa, rs
+ *    Arrange bytes from XRb and XRc according to one of five sets of
+ *    rules determined by rs[2:0], and place the result in XRa.
+ */
+static void gen_mxu_S32ALN(DisasContext *ctx)
+{
+    uint32_t rs, XRc, XRb, XRa;
+
+    rs  = extract32(ctx->opcode, 21, 5);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRa = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to all 0s */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGLabel *l_exit = gen_new_label();
+        TCGLabel *l_b_only = gen_new_label();
+        TCGLabel *l_c_only = gen_new_label();
+
+        gen_load_mxu_gpr(t0, XRb);
+        gen_load_mxu_gpr(t1, XRc);
+        gen_load_gpr(t2, rs);
+        tcg_gen_andi_tl(t2, t2, 0x07);
+
+        /* do nothing for undefined cases */
+        tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit);
+
+        tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only);
+        tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only);
+
+        tcg_gen_shli_tl(t2, t2, 3);
+        tcg_gen_subfi_tl(t3, 32, t2);
+
+        tcg_gen_shl_tl(t0, t0, t2);
+        tcg_gen_shr_tl(t1, t1, t3);
+        tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1);
+        tcg_gen_br(l_exit);
+
+        gen_set_label(l_b_only);
+        gen_store_mxu_gpr(t0, XRa);
+        tcg_gen_br(l_exit);
+
+        gen_set_label(l_c_only);
+        gen_store_mxu_gpr(t1, XRa);
+
+        gen_set_label(l_exit);
+    }
+}
+
+/*
+ *  S32MADD XRa, XRd, rb, rc
+ *    32 to 64 bit signed multiply with subsequent add
+ *    result stored in {XRa, XRd} pair, stain HI/LO.
+ *  S32MADDU XRa, XRd, rb, rc
+ *    32 to 64 bit unsigned multiply with subsequent add
+ *    result stored in {XRa, XRd} pair, stain HI/LO.
+ *  S32MSUB XRa, XRd, rb, rc
+ *    32 to 64 bit signed multiply with subsequent subtract
+ *    result stored in {XRa, XRd} pair, stain HI/LO.
+ *  S32MSUBU XRa, XRd, rb, rc
+ *    32 to 64 bit unsigned multiply with subsequent subtract
+ *    result stored in {XRa, XRd} pair, stain HI/LO.
+ */
+static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns)
+{
+    uint32_t XRa, XRd, Rb, Rc;
+
+    XRa  = extract32(ctx->opcode,  6, 4);
+    XRd  = extract32(ctx->opcode, 10, 4);
+    Rb   = extract32(ctx->opcode, 16, 5);
+    Rc   = extract32(ctx->opcode, 21, 5);
+
+    if (unlikely(Rb == 0 || Rc == 0)) {
+        /* do nothing because x + 0 * y => x */
+    } else if (unlikely(XRa == 0 && XRd == 0)) {
+        /* do nothing because result just dropped */
+    } else {
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv_i64 t2 = tcg_temp_new_i64();
+        TCGv_i64 t3 = tcg_temp_new_i64();
+
+        gen_load_gpr(t0, Rb);
+        gen_load_gpr(t1, Rc);
+
+        if (uns) {
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+        } else {
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+        }
+        tcg_gen_mul_i64(t2, t2, t3);
+
+        gen_load_mxu_gpr(t0, XRa);
+        gen_load_mxu_gpr(t1, XRd);
+
+        tcg_gen_concat_tl_i64(t3, t1, t0);
+        if (sub) {
+            tcg_gen_sub_i64(t3, t3, t2);
+        } else {
+            tcg_gen_add_i64(t3, t3, t2);
+        }
+        gen_move_low32(t1, t3);
+        gen_move_high32(t0, t3);
+
+        tcg_gen_mov_tl(cpu_HI[0], t0);
+        tcg_gen_mov_tl(cpu_LO[0], t1);
+
+        gen_store_mxu_gpr(t1, XRd);
+        gen_store_mxu_gpr(t0, XRa);
+    }
+}
 
 /*
  * Decoding engine for MXU
@@ -1431,6 +4404,116 @@ static void decode_opc_mxu__pool00(DisasContext *ctx)
     case OPC_MXU_Q8MIN:
         gen_mxu_Q8MAX_Q8MIN(ctx);
         break;
+    case OPC_MXU_Q8SLT:
+        gen_mxu_q8slt(ctx, false);
+        break;
+    case OPC_MXU_Q8SLTU:
+        gen_mxu_q8slt(ctx, true);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 0, 6);
+    uint32_t pad  = extract32(ctx->opcode, 14, 2);
+
+    if (pad != 2) {
+        /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */
+        return false;
+    }
+
+    switch (opcode) {
+    case OPC_MXU_S32MADD:
+        gen_mxu_s32madd_sub(ctx, false, false);
+        break;
+    case OPC_MXU_S32MADDU:
+        gen_mxu_s32madd_sub(ctx, false, true);
+        break;
+    case OPC_MXU_S32MSUB:
+        gen_mxu_s32madd_sub(ctx, true, false);
+        break;
+    case OPC_MXU_S32MSUBU:
+        gen_mxu_s32madd_sub(ctx, true, true);
+        break;
+    default:
+        return false;
+    }
+    return true;
+}
+
+static void decode_opc_mxu__pool01(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_S32SLT:
+        gen_mxu_S32SLT(ctx);
+        break;
+    case OPC_MXU_D16SLT:
+        gen_mxu_D16SLT(ctx);
+        break;
+    case OPC_MXU_D16AVG:
+        gen_mxu_d16avg(ctx, false);
+        break;
+    case OPC_MXU_D16AVGR:
+        gen_mxu_d16avg(ctx, true);
+        break;
+    case OPC_MXU_Q8AVG:
+        gen_mxu_q8avg(ctx, false);
+        break;
+    case OPC_MXU_Q8AVGR:
+        gen_mxu_q8avg(ctx, true);
+        break;
+    case OPC_MXU_Q8ADD:
+        gen_mxu_Q8ADD(ctx);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool02(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_S32CPS:
+        gen_mxu_S32CPS(ctx);
+        break;
+    case OPC_MXU_D16CPS:
+        gen_mxu_D16CPS(ctx);
+        break;
+    case OPC_MXU_Q8ABD:
+        gen_mxu_Q8ABD(ctx);
+        break;
+    case OPC_MXU_Q16SAT:
+        gen_mxu_Q16SAT(ctx);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool03(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 24, 2);
+
+    switch (opcode) {
+    case OPC_MXU_D16MULF:
+        gen_mxu_d16mul(ctx, true, true);
+        break;
+    case OPC_MXU_D16MULE:
+        gen_mxu_d16mul(ctx, true, false);
+        break;
     default:
         MIPS_INVAL("decode_opc_mxu");
         gen_reserved_instruction(ctx);
@@ -1440,12 +4523,215 @@ static void decode_opc_mxu__pool00(DisasContext *ctx)
 
 static void decode_opc_mxu__pool04(DisasContext *ctx)
 {
-    uint32_t opcode = extract32(ctx->opcode, 20, 1);
+    uint32_t reversed = extract32(ctx->opcode, 20, 1);
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+    /* Don't care about opcode bits as their meaning is unknown yet */
+    switch (opcode) {
+    default:
+        gen_mxu_s32ldxx(ctx, reversed, false);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool05(DisasContext *ctx)
+{
+    uint32_t reversed = extract32(ctx->opcode, 20, 1);
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+    /* Don't care about opcode bits as their meaning is unknown yet */
+    switch (opcode) {
+    default:
+        gen_mxu_s32stxx(ctx, reversed, false);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool06(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+    uint32_t strd2  = extract32(ctx->opcode, 14, 2);
 
     switch (opcode) {
-    case OPC_MXU_S32LDD:
-    case OPC_MXU_S32LDDR:
-        gen_mxu_s32ldd_s32lddr(ctx);
+    case OPC_MXU_S32LDST:
+    case OPC_MXU_S32LDSTR:
+        if (strd2 <= 2) {
+            gen_mxu_s32ldxvx(ctx, opcode, false, strd2);
+            break;
+        }
+        /* fallthrough */
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool07(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+    uint32_t strd2  = extract32(ctx->opcode, 14, 2);
+
+    switch (opcode) {
+    case OPC_MXU_S32LDST:
+    case OPC_MXU_S32LDSTR:
+        if (strd2 <= 2) {
+            gen_mxu_s32stxvx(ctx, opcode, false, strd2);
+            break;
+        }
+        /* fallthrough */
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool08(DisasContext *ctx)
+{
+    uint32_t reversed = extract32(ctx->opcode, 20, 1);
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+    /* Don't care about opcode bits as their meaning is unknown yet */
+    switch (opcode) {
+    default:
+        gen_mxu_s32ldxx(ctx, reversed, true);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool09(DisasContext *ctx)
+{
+    uint32_t reversed = extract32(ctx->opcode, 20, 1);
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+    /* Don't care about opcode bits as their meaning is unknown yet */
+    switch (opcode) {
+    default:
+        gen_mxu_s32stxx(ctx, reversed, true);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool10(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+    uint32_t strd2  = extract32(ctx->opcode, 14, 2);
+
+    switch (opcode) {
+    case OPC_MXU_S32LDST:
+    case OPC_MXU_S32LDSTR:
+        if (strd2 <= 2) {
+            gen_mxu_s32ldxvx(ctx, opcode, true, strd2);
+            break;
+        }
+        /* fallthrough */
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool11(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+    uint32_t strd2  = extract32(ctx->opcode, 14, 2);
+
+    switch (opcode) {
+    case OPC_MXU_S32LDST:
+    case OPC_MXU_S32LDSTR:
+        if (strd2 <= 2) {
+            gen_mxu_s32stxvx(ctx, opcode, true, strd2);
+            break;
+        }
+        /* fallthrough */
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool12(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_D32ACC:
+        gen_mxu_d32acc(ctx);
+        break;
+    case OPC_MXU_D32ACCM:
+        gen_mxu_d32accm(ctx);
+        break;
+    case OPC_MXU_D32ASUM:
+        gen_mxu_d32asum(ctx);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool13(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_Q16ACC:
+        gen_mxu_q16acc(ctx);
+        break;
+    case OPC_MXU_Q16ACCM:
+        gen_mxu_q16accm(ctx);
+        break;
+    case OPC_MXU_D16ASUM:
+        gen_mxu_d16asum(ctx);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool14(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_Q8ADDE:
+        gen_mxu_q8adde(ctx, false);
+        break;
+    case OPC_MXU_D8SUM:
+        gen_mxu_d8sum(ctx, false);
+        break;
+    case OPC_MXU_D8SUMC:
+        gen_mxu_d8sum(ctx, true);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool15(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 14, 2);
+
+    switch (opcode) {
+    case OPC_MXU_S32MUL:
+        gen_mxu_s32mul(ctx, false);
+        break;
+    case OPC_MXU_S32MULU:
+        gen_mxu_s32mul(ctx, true);
+        break;
+    case OPC_MXU_S32EXTR:
+        gen_mxu_s32extr(ctx);
+        break;
+    case OPC_MXU_S32EXTRV:
+        gen_mxu_s32extrv(ctx);
         break;
     default:
         MIPS_INVAL("decode_opc_mxu");
@@ -1459,9 +4745,18 @@ static void decode_opc_mxu__pool16(DisasContext *ctx)
     uint32_t opcode = extract32(ctx->opcode, 18, 3);
 
     switch (opcode) {
+    case OPC_MXU_D32SARW:
+        gen_mxu_d32sarl(ctx, true);
+        break;
+    case OPC_MXU_S32ALN:
+        gen_mxu_S32ALN(ctx);
+        break;
     case OPC_MXU_S32ALNI:
         gen_mxu_S32ALNI(ctx);
         break;
+    case OPC_MXU_S32LUI:
+        gen_mxu_s32lui(ctx);
+        break;
     case OPC_MXU_S32NOR:
         gen_mxu_S32NOR(ctx);
         break;
@@ -1481,14 +4776,128 @@ static void decode_opc_mxu__pool16(DisasContext *ctx)
     }
 }
 
+static void decode_opc_mxu__pool17(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 6, 3);
+    uint32_t strd2  = extract32(ctx->opcode, 9, 2);
+
+    if (strd2 > 2) {
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        return;
+    }
+
+    switch (opcode) {
+    case OPC_MXU_LXW:
+          gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL);
+          break;
+    case OPC_MXU_LXB:
+          gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB);
+          break;
+    case OPC_MXU_LXH:
+          gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW);
+          break;
+    case OPC_MXU_LXBU:
+          gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB);
+          break;
+    case OPC_MXU_LXHU:
+          gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW);
+          break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool18(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_D32SLLV:
+        gen_mxu_d32sxxv(ctx, false, false);
+        break;
+    case OPC_MXU_D32SLRV:
+        gen_mxu_d32sxxv(ctx, true, false);
+        break;
+    case OPC_MXU_D32SARV:
+        gen_mxu_d32sxxv(ctx, true, true);
+        break;
+    case OPC_MXU_Q16SLLV:
+        gen_mxu_q16sxxv(ctx, false, false);
+        break;
+    case OPC_MXU_Q16SLRV:
+        gen_mxu_q16sxxv(ctx, true, false);
+        break;
+    case OPC_MXU_Q16SARV:
+        gen_mxu_q16sxxv(ctx, true, true);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
 static void decode_opc_mxu__pool19(DisasContext *ctx)
 {
-    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+    uint32_t opcode = extract32(ctx->opcode, 22, 4);
 
     switch (opcode) {
     case OPC_MXU_Q8MUL:
+        gen_mxu_q8mul_mac(ctx, false, false);
+        break;
     case OPC_MXU_Q8MULSU:
-        gen_mxu_q8mul_q8mulsu(ctx);
+        gen_mxu_q8mul_mac(ctx, true, false);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool20(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_Q8MOVZ:
+        gen_mxu_q8movzn(ctx, TCG_COND_NE);
+        break;
+    case OPC_MXU_Q8MOVN:
+        gen_mxu_q8movzn(ctx, TCG_COND_EQ);
+        break;
+    case OPC_MXU_D16MOVZ:
+        gen_mxu_d16movzn(ctx, TCG_COND_NE);
+        break;
+    case OPC_MXU_D16MOVN:
+        gen_mxu_d16movzn(ctx, TCG_COND_EQ);
+        break;
+    case OPC_MXU_S32MOVZ:
+        gen_mxu_s32movzn(ctx, TCG_COND_NE);
+        break;
+    case OPC_MXU_S32MOVN:
+        gen_mxu_s32movzn(ctx, TCG_COND_EQ);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_mxu__pool21(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_Q8MAC:
+        gen_mxu_q8mul_mac(ctx, false, true);
+        break;
+    case OPC_MXU_Q8MACSU:
+        gen_mxu_q8mul_mac(ctx, true, true);
         break;
     default:
         MIPS_INVAL("decode_opc_mxu");
@@ -1497,6 +4906,7 @@ static void decode_opc_mxu__pool19(DisasContext *ctx)
     }
 }
 
+
 bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
 {
     uint32_t opcode = extract32(insn, 0, 6);
@@ -1520,30 +4930,163 @@ bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
         tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
 
         switch (opcode) {
+        case OPC_MXU_S32MADD:
+        case OPC_MXU_S32MADDU:
+        case OPC_MXU_S32MSUB:
+        case OPC_MXU_S32MSUBU:
+            return decode_opc_mxu_s32madd_sub(ctx);
         case OPC_MXU__POOL00:
             decode_opc_mxu__pool00(ctx);
             break;
         case OPC_MXU_D16MUL:
-            gen_mxu_d16mul(ctx);
+            gen_mxu_d16mul(ctx, false, false);
             break;
         case OPC_MXU_D16MAC:
-            gen_mxu_d16mac(ctx);
+            gen_mxu_d16mac(ctx, false, false);
+            break;
+        case OPC_MXU_D16MACF:
+            gen_mxu_d16mac(ctx, true, true);
+            break;
+        case OPC_MXU_D16MADL:
+            gen_mxu_d16madl(ctx);
+            break;
+        case OPC_MXU_S16MAD:
+            gen_mxu_s16mad(ctx);
+            break;
+        case OPC_MXU_Q16ADD:
+            gen_mxu_q16add(ctx);
+            break;
+        case OPC_MXU_D16MACE:
+            gen_mxu_d16mac(ctx, true, false);
+            break;
+        case OPC_MXU__POOL01:
+            decode_opc_mxu__pool01(ctx);
+            break;
+        case OPC_MXU__POOL02:
+            decode_opc_mxu__pool02(ctx);
+            break;
+        case OPC_MXU__POOL03:
+            decode_opc_mxu__pool03(ctx);
             break;
         case OPC_MXU__POOL04:
             decode_opc_mxu__pool04(ctx);
             break;
+        case OPC_MXU__POOL05:
+            decode_opc_mxu__pool05(ctx);
+            break;
+        case OPC_MXU__POOL06:
+            decode_opc_mxu__pool06(ctx);
+            break;
+        case OPC_MXU__POOL07:
+            decode_opc_mxu__pool07(ctx);
+            break;
+        case OPC_MXU__POOL08:
+            decode_opc_mxu__pool08(ctx);
+            break;
+        case OPC_MXU__POOL09:
+            decode_opc_mxu__pool09(ctx);
+            break;
+        case OPC_MXU__POOL10:
+            decode_opc_mxu__pool10(ctx);
+            break;
+        case OPC_MXU__POOL11:
+            decode_opc_mxu__pool11(ctx);
+            break;
+        case OPC_MXU_D32ADD:
+            gen_mxu_d32add(ctx);
+            break;
+        case OPC_MXU__POOL12:
+            decode_opc_mxu__pool12(ctx);
+            break;
+        case OPC_MXU__POOL13:
+            decode_opc_mxu__pool13(ctx);
+            break;
+        case OPC_MXU__POOL14:
+            decode_opc_mxu__pool14(ctx);
+            break;
+        case OPC_MXU_Q8ACCE:
+            gen_mxu_q8adde(ctx, true);
+            break;
         case OPC_MXU_S8LDD:
-            gen_mxu_s8ldd(ctx);
+            gen_mxu_s8ldd(ctx, false);
+            break;
+        case OPC_MXU_S8STD:
+            gen_mxu_s8std(ctx, false);
+            break;
+        case OPC_MXU_S8LDI:
+            gen_mxu_s8ldd(ctx, true);
+            break;
+        case OPC_MXU_S8SDI:
+            gen_mxu_s8std(ctx, true);
+            break;
+        case OPC_MXU__POOL15:
+            decode_opc_mxu__pool15(ctx);
             break;
         case OPC_MXU__POOL16:
             decode_opc_mxu__pool16(ctx);
             break;
+        case OPC_MXU__POOL17:
+            decode_opc_mxu__pool17(ctx);
+            break;
+        case OPC_MXU_S16LDD:
+            gen_mxu_s16ldd(ctx, false);
+            break;
+        case OPC_MXU_S16STD:
+            gen_mxu_s16std(ctx, false);
+            break;
+        case OPC_MXU_S16LDI:
+            gen_mxu_s16ldd(ctx, true);
+            break;
+        case OPC_MXU_S16SDI:
+            gen_mxu_s16std(ctx, true);
+            break;
+        case OPC_MXU_D32SLL:
+            gen_mxu_d32sxx(ctx, false, false);
+            break;
+        case OPC_MXU_D32SLR:
+            gen_mxu_d32sxx(ctx, true, false);
+            break;
+        case OPC_MXU_D32SARL:
+            gen_mxu_d32sarl(ctx, false);
+            break;
+        case OPC_MXU_D32SAR:
+            gen_mxu_d32sxx(ctx, true, true);
+            break;
+        case OPC_MXU_Q16SLL:
+            gen_mxu_q16sxx(ctx, false, false);
+            break;
+        case OPC_MXU__POOL18:
+            decode_opc_mxu__pool18(ctx);
+            break;
+        case OPC_MXU_Q16SLR:
+            gen_mxu_q16sxx(ctx, true, false);
+            break;
+        case OPC_MXU_Q16SAR:
+            gen_mxu_q16sxx(ctx, true, true);
+            break;
         case OPC_MXU__POOL19:
             decode_opc_mxu__pool19(ctx);
             break;
+        case OPC_MXU__POOL20:
+            decode_opc_mxu__pool20(ctx);
+            break;
+        case OPC_MXU__POOL21:
+            decode_opc_mxu__pool21(ctx);
+            break;
+        case OPC_MXU_Q16SCOP:
+            gen_mxu_q16scop(ctx);
+            break;
+        case OPC_MXU_Q8MADL:
+            gen_mxu_q8madl(ctx);
+            break;
+        case OPC_MXU_S32SFL:
+            gen_mxu_s32sfl(ctx);
+            break;
+        case OPC_MXU_Q8SAD:
+            gen_mxu_q8sad(ctx);
+            break;
         default:
-            MIPS_INVAL("decode_opc_mxu");
-            gen_reserved_instruction(ctx);
+            return false;
         }
 
         gen_set_label(l_exit);
diff --git a/target/mips/tcg/op_helper.c b/target/mips/tcg/op_helper.c
index ef3dafcbb3..98935b5e64 100644
--- a/target/mips/tcg/op_helper.c
+++ b/target/mips/tcg/op_helper.c
@@ -257,6 +257,22 @@ void helper_pmon(CPUMIPSState *env, int function)
     }
 }
 
+#ifdef TARGET_MIPS64
+target_ulong helper_lcsr_cpucfg(CPUMIPSState *env, target_ulong rs)
+{
+    switch (rs) {
+    case 0:
+        return env->CP0_PRid;
+    case 1:
+        return env->lcsr_cpucfg1;
+    case 2:
+        return env->lcsr_cpucfg2;
+    default:
+        return 0;
+    }
+}
+#endif
+
 #if !defined(CONFIG_USER_ONLY)
 
 void mips_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
diff --git a/target/mips/tcg/sysemu/lcsr_helper.c b/target/mips/tcg/sysemu/lcsr_helper.c
new file mode 100644
index 0000000000..942143d209
--- /dev/null
+++ b/target/mips/tcg/sysemu/lcsr_helper.c
@@ -0,0 +1,45 @@
+/*
+ * Loongson CSR instructions translation routines
+ *
+ *  Copyright (c) 2023 Jiaxun Yang <jiaxun.yang@flygoat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "cpu.h"
+#include "internal.h"
+#include "qemu/host-utils.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+
+#define GET_MEMTXATTRS(cas) \
+        ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})
+
+uint64_t helper_lcsr_rdcsr(CPUMIPSState *env, target_ulong r_addr)
+{
+    return address_space_ldl(&env->iocsr.as, r_addr,
+                             GET_MEMTXATTRS(env), NULL);
+}
+
+uint64_t helper_lcsr_drdcsr(CPUMIPSState *env, target_ulong r_addr)
+{
+    return address_space_ldq(&env->iocsr.as, r_addr,
+                             GET_MEMTXATTRS(env), NULL);
+}
+
+void helper_lcsr_wrcsr(CPUMIPSState *env, target_ulong w_addr,
+                      target_ulong val)
+{
+    address_space_stl(&env->iocsr.as, w_addr,
+                      val, GET_MEMTXATTRS(env), NULL);
+}
+
+void helper_lcsr_dwrcsr(CPUMIPSState *env, target_ulong w_addr,
+                      target_ulong val)
+{
+    address_space_stq(&env->iocsr.as, w_addr,
+                      val, GET_MEMTXATTRS(env), NULL);
+}
diff --git a/target/mips/tcg/sysemu/meson.build b/target/mips/tcg/sysemu/meson.build
index 43b35b3803..ec665a4b1e 100644
--- a/target/mips/tcg/sysemu/meson.build
+++ b/target/mips/tcg/sysemu/meson.build
@@ -4,3 +4,7 @@ mips_system_ss.add(files(
   'special_helper.c',
   'tlb_helper.c',
 ))
+
+mips_system_ss.add(when: 'TARGET_MIPS64', if_true: files(
+  'lcsr_helper.c',
+))
diff --git a/target/mips/tcg/sysemu_helper.h.inc b/target/mips/tcg/sysemu_helper.h.inc
index af585b5d9c..f163af1eac 100644
--- a/target/mips/tcg/sysemu_helper.h.inc
+++ b/target/mips/tcg/sysemu_helper.h.inc
@@ -181,3 +181,11 @@ DEF_HELPER_1(eret, void, env)
 DEF_HELPER_1(eretnc, void, env)
 DEF_HELPER_1(deret, void, env)
 DEF_HELPER_3(cache, void, env, tl, i32)
+
+#ifdef TARGET_MIPS64
+/* Longson CSR */
+DEF_HELPER_2(lcsr_rdcsr, i64, env, tl)
+DEF_HELPER_2(lcsr_drdcsr, i64, env, tl)
+DEF_HELPER_3(lcsr_wrcsr, void, env, tl, tl)
+DEF_HELPER_3(lcsr_dwrcsr, void, env, tl, tl)
+#endif
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
index 74af91e4f5..9bb40f1849 100644
--- a/target/mips/tcg/translate.c
+++ b/target/mips/tcg/translate.c
@@ -14644,12 +14644,9 @@ static bool decode_opc_legacy(CPUMIPSState *env, DisasContext *ctx)
         }
 #endif
         if (TARGET_LONG_BITS == 32 && (ctx->insn_flags & ASE_MXU)) {
-            if (MASK_SPECIAL2(ctx->opcode) == OPC_MUL) {
-                gen_arith(ctx, OPC_MUL, rd, rs, rt);
-            } else {
-                decode_ase_mxu(ctx, ctx->opcode);
+            if (decode_ase_mxu(ctx, ctx->opcode)) {
+                break;
             }
-            break;
         }
         decode_opc_special2_legacy(env, ctx);
         break;
@@ -15352,6 +15349,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
         return;
     }
 #if defined(TARGET_MIPS64)
+    if (ase_lcsr_available(env) && decode_ase_lcsr(ctx, ctx->opcode)) {
+        return;
+    }
     if (cpu_supports_isa(env, INSN_OCTEON) && decode_ext_octeon(ctx, ctx->opcode)) {
         return;
     }
diff --git a/target/mips/tcg/translate.h b/target/mips/tcg/translate.h
index 3b0498a47a..db3dc932c7 100644
--- a/target/mips/tcg/translate.h
+++ b/target/mips/tcg/translate.h
@@ -221,6 +221,7 @@ bool decode_isa_rel6(DisasContext *ctx, uint32_t insn);
 bool decode_ase_msa(DisasContext *ctx, uint32_t insn);
 bool decode_ext_txx9(DisasContext *ctx, uint32_t insn);
 #if defined(TARGET_MIPS64)
+bool decode_ase_lcsr(DisasContext *ctx, uint32_t insn);
 bool decode_ext_tx79(DisasContext *ctx, uint32_t insn);
 bool decode_ext_octeon(DisasContext *ctx, uint32_t insn);
 #endif
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 4035fe0e62..9339c0241d 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -34,16 +34,11 @@
 #include "migration/vmstate.h"
 #include "fpu/softfloat-helpers.h"
 #include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
 #include "kvm_riscv.h"
 #include "tcg/tcg.h"
 
 /* RISC-V CPU definitions */
-
-#define RISCV_CPU_MARCHID   ((QEMU_VERSION_MAJOR << 16) | \
-                             (QEMU_VERSION_MINOR << 8)  | \
-                             (QEMU_VERSION_MICRO))
-#define RISCV_CPU_MIMPID    RISCV_CPU_MARCHID
-
 static const char riscv_single_letter_exts[] = "IEMAFDQCPVH";
 
 struct isa_ext_data {
@@ -56,6 +51,17 @@ struct isa_ext_data {
     {#_name, _min_ver, offsetof(struct RISCVCPUConfig, _prop)}
 
 /*
+ * From vector_helper.c
+ * Note that vector data is stored in host-endian 64-bit chunks,
+ * so addressing bytes needs a host-endian fixup.
+ */
+#if HOST_BIG_ENDIAN
+#define BYTE(x)   ((x) ^ 7)
+#else
+#define BYTE(x)   (x)
+#endif
+
+/*
  * Here are the ordering rules of extension naming defined by RISC-V
  * specification :
  * 1. All extensions should be separated from other multi-letter extensions
@@ -83,6 +89,8 @@ static const struct isa_ext_data isa_edata_arr[] = {
     ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei),
     ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause),
     ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs),
+    ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa),
+    ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin),
     ISA_EXT_DATA_ENTRY(zfh, PRIV_VERSION_1_11_0, ext_zfh),
     ISA_EXT_DATA_ENTRY(zfhmin, PRIV_VERSION_1_11_0, ext_zfhmin),
     ISA_EXT_DATA_ENTRY(zfinx, PRIV_VERSION_1_12_0, ext_zfinx),
@@ -114,6 +122,8 @@ static const struct isa_ext_data isa_edata_arr[] = {
     ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f),
     ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f),
     ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d),
+    ISA_EXT_DATA_ENTRY(zvfbfmin, PRIV_VERSION_1_12_0, ext_zvfbfmin),
+    ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
     ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
     ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
     ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
@@ -183,6 +193,14 @@ const char * const riscv_fpr_regnames[] = {
     "f30/ft10", "f31/ft11"
 };
 
+const char * const riscv_rvv_regnames[] = {
+  "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",
+  "v7",  "v8",  "v9",  "v10", "v11", "v12", "v13",
+  "v14", "v15", "v16", "v17", "v18", "v19", "v20",
+  "v21", "v22", "v23", "v24", "v25", "v26", "v27",
+  "v28", "v29", "v30", "v31"
+};
+
 static const char * const riscv_excp_names[] = {
     "misaligned_fetch",
     "fault_fetch",
@@ -412,6 +430,7 @@ static void rv64_thead_c906_cpu_init(Object *obj)
     set_misa(env, MXL_RV64, RVG | RVC | RVS | RVU);
     env->priv_ver = PRIV_VERSION_1_11_0;
 
+    cpu->cfg.ext_zfa = true;
     cpu->cfg.ext_zfh = true;
     cpu->cfg.mmu = true;
     cpu->cfg.ext_xtheadba = true;
@@ -444,6 +463,9 @@ static void rv64_veyron_v1_cpu_init(Object *obj)
 
     /* Enable ISA extensions */
     cpu->cfg.mmu = true;
+    cpu->cfg.ext_ifencei = true;
+    cpu->cfg.ext_icsr = true;
+    cpu->cfg.pmp = true;
     cpu->cfg.ext_icbom = true;
     cpu->cfg.cbom_blocksize = 64;
     cpu->cfg.cboz_blocksize = 64;
@@ -608,7 +630,8 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 {
     RISCVCPU *cpu = RISCV_CPU(cs);
     CPURISCVState *env = &cpu->env;
-    int i;
+    int i, j;
+    uint8_t *p;
 
 #if !defined(CONFIG_USER_ONLY)
     if (riscv_has_ext(env, RVH)) {
@@ -692,6 +715,41 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int flags)
             }
         }
     }
+    if (riscv_has_ext(env, RVV) && (flags & CPU_DUMP_VPU)) {
+        static const int dump_rvv_csrs[] = {
+                    CSR_VSTART,
+                    CSR_VXSAT,
+                    CSR_VXRM,
+                    CSR_VCSR,
+                    CSR_VL,
+                    CSR_VTYPE,
+                    CSR_VLENB,
+                };
+        for (int i = 0; i < ARRAY_SIZE(dump_rvv_csrs); ++i) {
+            int csrno = dump_rvv_csrs[i];
+            target_ulong val = 0;
+            RISCVException res = riscv_csrrw_debug(env, csrno, &val, 0, 0);
+
+            /*
+             * Rely on the smode, hmode, etc, predicates within csr.c
+             * to do the filtering of the registers that are present.
+             */
+            if (res == RISCV_EXCP_NONE) {
+                qemu_fprintf(f, " %-8s " TARGET_FMT_lx "\n",
+                             csr_ops[csrno].name, val);
+            }
+        }
+        uint16_t vlenb = cpu->cfg.vlen >> 3;
+
+        for (i = 0; i < 32; i++) {
+            qemu_fprintf(f, " %-8s ", riscv_rvv_regnames[i]);
+            p = (uint8_t *)env->vreg;
+            for (j = vlenb - 1 ; j >= 0; j--) {
+                qemu_fprintf(f, "%02x", *(p + i * vlenb + BYTE(j)));
+            }
+            qemu_fprintf(f, "\n");
+        }
+    }
 }
 
 static void riscv_cpu_set_pc(CPUState *cs, vaddr value)
@@ -858,9 +916,10 @@ static void riscv_cpu_reset_hold(Object *obj)
 static void riscv_cpu_disas_set_info(CPUState *s, disassemble_info *info)
 {
     RISCVCPU *cpu = RISCV_CPU(s);
+    CPURISCVState *env = &cpu->env;
     info->target_info = &cpu->cfg;
 
-    switch (riscv_cpu_mxl(&cpu->env)) {
+    switch (env->xl) {
     case MXL_RV32:
         info->print_insn = print_insn_riscv32;
         break;
@@ -1050,6 +1109,11 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
         return;
     }
 
+    if (cpu->cfg.ext_zfa && !riscv_has_ext(env, RVF)) {
+        error_setg(errp, "Zfa extension requires F extension");
+        return;
+    }
+
     if (cpu->cfg.ext_zfh) {
         cpu->cfg.ext_zfhmin = true;
     }
@@ -1059,6 +1123,11 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
         return;
     }
 
+    if (cpu->cfg.ext_zfbfmin && !riscv_has_ext(env, RVF)) {
+        error_setg(errp, "Zfbfmin extension depends on F extension");
+        return;
+    }
+
     if (riscv_has_ext(env, RVD) && !riscv_has_ext(env, RVF)) {
         error_setg(errp, "D extension requires F extension");
         return;
@@ -1109,6 +1178,21 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
         return;
     }
 
+    if (cpu->cfg.ext_zvfbfmin && !cpu->cfg.ext_zfbfmin) {
+        error_setg(errp, "Zvfbfmin extension depends on Zfbfmin extension");
+        return;
+    }
+
+    if (cpu->cfg.ext_zvfbfmin && !cpu->cfg.ext_zve32f) {
+        error_setg(errp, "Zvfbfmin extension depends on Zve32f extension");
+        return;
+    }
+
+    if (cpu->cfg.ext_zvfbfwma && !cpu->cfg.ext_zvfbfmin) {
+        error_setg(errp, "Zvfbfwma extension depends on Zvfbfmin extension");
+        return;
+    }
+
     /* Set the ISA extensions, checks should have happened above */
     if (cpu->cfg.ext_zhinx) {
         cpu->cfg.ext_zhinxmin = true;
@@ -1304,20 +1388,12 @@ static void riscv_cpu_validate_misa_priv(CPURISCVState *env, Error **errp)
     }
 }
 
-static void riscv_cpu_realize(DeviceState *dev, Error **errp)
+static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp)
 {
-    CPUState *cs = CPU(dev);
     RISCVCPU *cpu = RISCV_CPU(dev);
     CPURISCVState *env = &cpu->env;
-    RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
     Error *local_err = NULL;
 
-    cpu_exec_realizefn(cs, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
     riscv_cpu_validate_misa_mxl(cpu, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
@@ -1352,7 +1428,7 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
     }
 
 #ifndef CONFIG_USER_ONLY
-    cs->tcg_cflags |= CF_PCREL;
+    CPU(dev)->tcg_cflags |= CF_PCREL;
 
     if (cpu->cfg.ext_sstc) {
         riscv_timer_init(cpu);
@@ -1365,6 +1441,28 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
         }
      }
 #endif
+}
+
+static void riscv_cpu_realize(DeviceState *dev, Error **errp)
+{
+    CPUState *cs = CPU(dev);
+    RISCVCPU *cpu = RISCV_CPU(dev);
+    RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (tcg_enabled()) {
+        riscv_cpu_realize_tcg(dev, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
 
     riscv_cpu_finalize_features(cpu, &local_err);
     if (local_err != NULL) {
@@ -1545,33 +1643,83 @@ static void cpu_get_misa_ext_cfg(Object *obj, Visitor *v, const char *name,
     visit_type_bool(v, name, &value, errp);
 }
 
-static const RISCVCPUMisaExtConfig misa_ext_cfgs[] = {
-    {.name = "a", .description = "Atomic instructions",
-     .misa_bit = RVA, .enabled = true},
-    {.name = "c", .description = "Compressed instructions",
-     .misa_bit = RVC, .enabled = true},
-    {.name = "d", .description = "Double-precision float point",
-     .misa_bit = RVD, .enabled = true},
-    {.name = "f", .description = "Single-precision float point",
-     .misa_bit = RVF, .enabled = true},
-    {.name = "i", .description = "Base integer instruction set",
-     .misa_bit = RVI, .enabled = true},
-    {.name = "e", .description = "Base integer instruction set (embedded)",
-     .misa_bit = RVE, .enabled = false},
-    {.name = "m", .description = "Integer multiplication and division",
-     .misa_bit = RVM, .enabled = true},
-    {.name = "s", .description = "Supervisor-level instructions",
-     .misa_bit = RVS, .enabled = true},
-    {.name = "u", .description = "User-level instructions",
-     .misa_bit = RVU, .enabled = true},
-    {.name = "h", .description = "Hypervisor",
-     .misa_bit = RVH, .enabled = true},
-    {.name = "x-j", .description = "Dynamic translated languages",
-     .misa_bit = RVJ, .enabled = false},
-    {.name = "v", .description = "Vector operations",
-     .misa_bit = RVV, .enabled = false},
-    {.name = "g", .description = "General purpose (IMAFD_Zicsr_Zifencei)",
-     .misa_bit = RVG, .enabled = false},
+typedef struct misa_ext_info {
+    const char *name;
+    const char *description;
+} MISAExtInfo;
+
+#define MISA_INFO_IDX(_bit) \
+    __builtin_ctz(_bit)
+
+#define MISA_EXT_INFO(_bit, _propname, _descr) \
+    [MISA_INFO_IDX(_bit)] = {.name = _propname, .description = _descr}
+
+static const MISAExtInfo misa_ext_info_arr[] = {
+    MISA_EXT_INFO(RVA, "a", "Atomic instructions"),
+    MISA_EXT_INFO(RVC, "c", "Compressed instructions"),
+    MISA_EXT_INFO(RVD, "d", "Double-precision float point"),
+    MISA_EXT_INFO(RVF, "f", "Single-precision float point"),
+    MISA_EXT_INFO(RVI, "i", "Base integer instruction set"),
+    MISA_EXT_INFO(RVE, "e", "Base integer instruction set (embedded)"),
+    MISA_EXT_INFO(RVM, "m", "Integer multiplication and division"),
+    MISA_EXT_INFO(RVS, "s", "Supervisor-level instructions"),
+    MISA_EXT_INFO(RVU, "u", "User-level instructions"),
+    MISA_EXT_INFO(RVH, "h", "Hypervisor"),
+    MISA_EXT_INFO(RVJ, "x-j", "Dynamic translated languages"),
+    MISA_EXT_INFO(RVV, "v", "Vector operations"),
+    MISA_EXT_INFO(RVG, "g", "General purpose (IMAFD_Zicsr_Zifencei)"),
+};
+
+static int riscv_validate_misa_info_idx(uint32_t bit)
+{
+    int idx;
+
+    /*
+     * Our lowest valid input (RVA) is 1 and
+     * __builtin_ctz() is UB with zero.
+     */
+    g_assert(bit != 0);
+    idx = MISA_INFO_IDX(bit);
+
+    g_assert(idx < ARRAY_SIZE(misa_ext_info_arr));
+    return idx;
+}
+
+const char *riscv_get_misa_ext_name(uint32_t bit)
+{
+    int idx = riscv_validate_misa_info_idx(bit);
+    const char *val = misa_ext_info_arr[idx].name;
+
+    g_assert(val != NULL);
+    return val;
+}
+
+const char *riscv_get_misa_ext_description(uint32_t bit)
+{
+    int idx = riscv_validate_misa_info_idx(bit);
+    const char *val = misa_ext_info_arr[idx].description;
+
+    g_assert(val != NULL);
+    return val;
+}
+
+#define MISA_CFG(_bit, _enabled) \
+    {.misa_bit = _bit, .enabled = _enabled}
+
+static RISCVCPUMisaExtConfig misa_ext_cfgs[] = {
+    MISA_CFG(RVA, true),
+    MISA_CFG(RVC, true),
+    MISA_CFG(RVD, true),
+    MISA_CFG(RVF, true),
+    MISA_CFG(RVI, true),
+    MISA_CFG(RVE, false),
+    MISA_CFG(RVM, true),
+    MISA_CFG(RVS, true),
+    MISA_CFG(RVU, true),
+    MISA_CFG(RVH, true),
+    MISA_CFG(RVJ, false),
+    MISA_CFG(RVV, false),
+    MISA_CFG(RVG, false),
 };
 
 static void riscv_cpu_add_misa_properties(Object *cpu_obj)
@@ -1579,7 +1727,16 @@ static void riscv_cpu_add_misa_properties(Object *cpu_obj)
     int i;
 
     for (i = 0; i < ARRAY_SIZE(misa_ext_cfgs); i++) {
-        const RISCVCPUMisaExtConfig *misa_cfg = &misa_ext_cfgs[i];
+        RISCVCPUMisaExtConfig *misa_cfg = &misa_ext_cfgs[i];
+        int bit = misa_cfg->misa_bit;
+
+        misa_cfg->name = riscv_get_misa_ext_name(bit);
+        misa_cfg->description = riscv_get_misa_ext_description(bit);
+
+        /* Check if KVM already created the property */
+        if (object_property_find(cpu_obj, misa_cfg->name)) {
+            continue;
+        }
 
         object_property_add(cpu_obj, misa_cfg->name, "bool",
                             cpu_get_misa_ext_cfg,
@@ -1600,6 +1757,7 @@ static Property riscv_cpu_extensions[] = {
     DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
     DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true),
     DEFINE_PROP_BOOL("Zawrs", RISCVCPU, cfg.ext_zawrs, true),
+    DEFINE_PROP_BOOL("Zfa", RISCVCPU, cfg.ext_zfa, true),
     DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false),
     DEFINE_PROP_BOOL("Zfhmin", RISCVCPU, cfg.ext_zfhmin, false),
     DEFINE_PROP_BOOL("Zve32f", RISCVCPU, cfg.ext_zve32f, false),
@@ -1683,9 +1841,33 @@ static Property riscv_cpu_extensions[] = {
     DEFINE_PROP_BOOL("x-zvfh", RISCVCPU, cfg.ext_zvfh, false),
     DEFINE_PROP_BOOL("x-zvfhmin", RISCVCPU, cfg.ext_zvfhmin, false),
 
+    DEFINE_PROP_BOOL("x-zfbfmin", RISCVCPU, cfg.ext_zfbfmin, false),
+    DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false),
+    DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false),
+
     DEFINE_PROP_END_OF_LIST(),
 };
 
+
+#ifndef CONFIG_USER_ONLY
+static void cpu_set_cfg_unavailable(Object *obj, Visitor *v,
+                                    const char *name,
+                                    void *opaque, Error **errp)
+{
+    const char *propname = opaque;
+    bool value;
+
+    if (!visit_type_bool(v, name, &value, errp)) {
+        return;
+    }
+
+    if (value) {
+        error_setg(errp, "extension %s is not available with KVM",
+                   propname);
+    }
+}
+#endif
+
 /*
  * Add CPU properties with user-facing flags.
  *
@@ -1697,24 +1879,48 @@ static void riscv_cpu_add_user_properties(Object *obj)
     Property *prop;
     DeviceState *dev = DEVICE(obj);
 
-    riscv_cpu_add_misa_properties(obj);
+#ifndef CONFIG_USER_ONLY
+    riscv_add_satp_mode_properties(obj);
 
-    for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
-        qdev_property_add_static(dev, prop);
+    if (kvm_enabled()) {
+        kvm_riscv_init_user_properties(obj);
     }
+#endif
 
+    riscv_cpu_add_misa_properties(obj);
+
+    for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
 #ifndef CONFIG_USER_ONLY
-    riscv_add_satp_mode_properties(obj);
+        if (kvm_enabled()) {
+            /* Check if KVM created the property already */
+            if (object_property_find(obj, prop->name)) {
+                continue;
+            }
+
+            /*
+             * Set the default to disabled for every extension
+             * unknown to KVM and error out if the user attempts
+             * to enable any of them.
+             *
+             * We're giving a pass for non-bool properties since they're
+             * not related to the availability of extensions and can be
+             * safely ignored as is.
+             */
+            if (prop->info == &qdev_prop_bool) {
+                object_property_add(obj, prop->name, "bool",
+                                    NULL, cpu_set_cfg_unavailable,
+                                    NULL, (void *)prop->name);
+                continue;
+            }
+        }
 #endif
+        qdev_property_add_static(dev, prop);
+    }
 }
 
 static Property riscv_cpu_properties[] = {
     DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true),
 
-    DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0),
-    DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID),
-    DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID),
-
 #ifndef CONFIG_USER_ONLY
     DEFINE_PROP_UINT64("resetvec", RISCVCPU, env.resetvec, DEFAULT_RSTVEC),
 #endif
@@ -1798,6 +2004,119 @@ static const struct TCGCPUOps riscv_tcg_ops = {
 #endif /* !CONFIG_USER_ONLY */
 };
 
+static bool riscv_cpu_is_dynamic(Object *cpu_obj)
+{
+    return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL;
+}
+
+static void cpu_set_mvendorid(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    bool dynamic_cpu = riscv_cpu_is_dynamic(obj);
+    RISCVCPU *cpu = RISCV_CPU(obj);
+    uint32_t prev_val = cpu->cfg.mvendorid;
+    uint32_t value;
+
+    if (!visit_type_uint32(v, name, &value, errp)) {
+        return;
+    }
+
+    if (!dynamic_cpu && prev_val != value) {
+        error_setg(errp, "Unable to change %s mvendorid (0x%x)",
+                   object_get_typename(obj), prev_val);
+        return;
+    }
+
+    cpu->cfg.mvendorid = value;
+}
+
+static void cpu_get_mvendorid(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    bool value = RISCV_CPU(obj)->cfg.mvendorid;
+
+    visit_type_bool(v, name, &value, errp);
+}
+
+static void cpu_set_mimpid(Object *obj, Visitor *v, const char *name,
+                           void *opaque, Error **errp)
+{
+    bool dynamic_cpu = riscv_cpu_is_dynamic(obj);
+    RISCVCPU *cpu = RISCV_CPU(obj);
+    uint64_t prev_val = cpu->cfg.mimpid;
+    uint64_t value;
+
+    if (!visit_type_uint64(v, name, &value, errp)) {
+        return;
+    }
+
+    if (!dynamic_cpu && prev_val != value) {
+        error_setg(errp, "Unable to change %s mimpid (0x%" PRIu64 ")",
+                   object_get_typename(obj), prev_val);
+        return;
+    }
+
+    cpu->cfg.mimpid = value;
+}
+
+static void cpu_get_mimpid(Object *obj, Visitor *v, const char *name,
+                           void *opaque, Error **errp)
+{
+    bool value = RISCV_CPU(obj)->cfg.mimpid;
+
+    visit_type_bool(v, name, &value, errp);
+}
+
+static void cpu_set_marchid(Object *obj, Visitor *v, const char *name,
+                            void *opaque, Error **errp)
+{
+    bool dynamic_cpu = riscv_cpu_is_dynamic(obj);
+    RISCVCPU *cpu = RISCV_CPU(obj);
+    uint64_t prev_val = cpu->cfg.marchid;
+    uint64_t value, invalid_val;
+    uint32_t mxlen = 0;
+
+    if (!visit_type_uint64(v, name, &value, errp)) {
+        return;
+    }
+
+    if (!dynamic_cpu && prev_val != value) {
+        error_setg(errp, "Unable to change %s marchid (0x%" PRIu64 ")",
+                   object_get_typename(obj), prev_val);
+        return;
+    }
+
+    switch (riscv_cpu_mxl(&cpu->env)) {
+    case MXL_RV32:
+        mxlen = 32;
+        break;
+    case MXL_RV64:
+    case MXL_RV128:
+        mxlen = 64;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    invalid_val = 1LL << (mxlen - 1);
+
+    if (value == invalid_val) {
+        error_setg(errp, "Unable to set marchid with MSB (%u) bit set "
+                         "and the remaining bits zero", mxlen);
+        return;
+    }
+
+    cpu->cfg.marchid = value;
+}
+
+static void cpu_get_marchid(Object *obj, Visitor *v, const char *name,
+                           void *opaque, Error **errp)
+{
+    bool value = RISCV_CPU(obj)->cfg.marchid;
+
+    visit_type_bool(v, name, &value, errp);
+}
+
 static void riscv_cpu_class_init(ObjectClass *c, void *data)
 {
     RISCVCPUClass *mcc = RISCV_CPU_CLASS(c);
@@ -1829,6 +2148,15 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data)
     cc->gdb_get_dynamic_xml = riscv_gdb_get_dynamic_xml;
     cc->tcg_ops = &riscv_tcg_ops;
 
+    object_class_property_add(c, "mvendorid", "uint32", cpu_get_mvendorid,
+                              cpu_set_mvendorid, NULL, NULL);
+
+    object_class_property_add(c, "mimpid", "uint64", cpu_get_mimpid,
+                              cpu_set_mimpid, NULL, NULL);
+
+    object_class_property_add(c, "marchid", "uint64", cpu_get_marchid,
+                              cpu_set_marchid, NULL, NULL);
+
     device_class_set_props(dc, riscv_cpu_properties);
 }
 
@@ -1840,8 +2168,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str,
     int i;
 
     for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
-        if (cpu->env.priv_ver >= isa_edata_arr[i].min_version &&
-            isa_ext_is_enabled(cpu, &isa_edata_arr[i])) {
+        if (isa_ext_is_enabled(cpu, &isa_edata_arr[i])) {
             new = g_strconcat(old, "_", isa_edata_arr[i].name, NULL);
             g_free(old);
             old = new;
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 7adb8706ac..6ea22e0eea 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -41,7 +41,10 @@
 
 #define RV(x) ((target_ulong)1 << (x - 'A'))
 
-/* Consider updating misa_ext_cfgs[] when adding new MISA bits here */
+/*
+ * Consider updating misa_ext_info_arr[] and misa_ext_cfgs[]
+ * when adding new MISA bits here.
+ */
 #define RVI RV('I')
 #define RVE RV('E') /* E and I are mutually exclusive */
 #define RVM RV('M')
@@ -56,6 +59,8 @@
 #define RVJ RV('J')
 #define RVG RV('G')
 
+const char *riscv_get_misa_ext_name(uint32_t bit);
+const char *riscv_get_misa_ext_description(uint32_t bit);
 
 /* Privileged specification version */
 enum {
@@ -500,6 +505,7 @@ FIELD(TB_FLAGS, ITRIGGER, 22, 1)
 /* Virtual mode enabled */
 FIELD(TB_FLAGS, VIRT_ENABLED, 23, 1)
 FIELD(TB_FLAGS, PRIV, 24, 2)
+FIELD(TB_FLAGS, AXL, 26, 2)
 
 #ifdef TARGET_RISCV32
 #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
@@ -516,13 +522,20 @@ static inline const RISCVCPUConfig *riscv_cpu_cfg(CPURISCVState *env)
     return &env_archcpu(env)->cfg;
 }
 
-#if defined(TARGET_RISCV32)
-#define cpu_recompute_xl(env)  ((void)(env), MXL_RV32)
-#else
-static inline RISCVMXL cpu_recompute_xl(CPURISCVState *env)
+#if !defined(CONFIG_USER_ONLY)
+static inline int cpu_address_mode(CPURISCVState *env)
+{
+    int mode = env->priv;
+
+    if (mode == PRV_M && get_field(env->mstatus, MSTATUS_MPRV)) {
+        mode = get_field(env->mstatus, MSTATUS_MPP);
+    }
+    return mode;
+}
+
+static inline RISCVMXL cpu_get_xl(CPURISCVState *env, target_ulong mode)
 {
     RISCVMXL xl = env->misa_mxl;
-#if !defined(CONFIG_USER_ONLY)
     /*
      * When emulating a 32-bit-only cpu, use RV32.
      * When emulating a 64-bit cpu, and MXL has been reduced to RV32,
@@ -530,7 +543,7 @@ static inline RISCVMXL cpu_recompute_xl(CPURISCVState *env)
      * back to RV64 for lower privs.
      */
     if (xl != MXL_RV32) {
-        switch (env->priv) {
+        switch (mode) {
         case PRV_M:
             break;
         case PRV_U:
@@ -541,11 +554,38 @@ static inline RISCVMXL cpu_recompute_xl(CPURISCVState *env)
             break;
         }
     }
-#endif
     return xl;
 }
 #endif
 
+#if defined(TARGET_RISCV32)
+#define cpu_recompute_xl(env)  ((void)(env), MXL_RV32)
+#else
+static inline RISCVMXL cpu_recompute_xl(CPURISCVState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    return cpu_get_xl(env, env->priv);
+#else
+    return env->misa_mxl;
+#endif
+}
+#endif
+
+#if defined(TARGET_RISCV32)
+#define cpu_address_xl(env)  ((void)(env), MXL_RV32)
+#else
+static inline RISCVMXL cpu_address_xl(CPURISCVState *env)
+{
+#ifdef CONFIG_USER_ONLY
+    return env->xl;
+#else
+    int mode = cpu_address_mode(env);
+
+    return cpu_get_xl(env, mode);
+#endif
+}
+#endif
+
 static inline int riscv_cpu_xlen(CPURISCVState *env)
 {
     return 16 << env->xl;
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index c4a627d335..2bd9510ba3 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -75,6 +75,8 @@ struct RISCVCPUConfig {
     bool ext_svpbmt;
     bool ext_zdinx;
     bool ext_zawrs;
+    bool ext_zfa;
+    bool ext_zfbfmin;
     bool ext_zfh;
     bool ext_zfhmin;
     bool ext_zfinx;
@@ -84,6 +86,8 @@ struct RISCVCPUConfig {
     bool ext_zve64f;
     bool ext_zve64d;
     bool ext_zmmul;
+    bool ext_zvfbfmin;
+    bool ext_zvfbfwma;
     bool ext_zvfh;
     bool ext_zvfhmin;
     bool ext_smaia;
@@ -133,4 +137,41 @@ struct RISCVCPUConfig {
 };
 
 typedef struct RISCVCPUConfig RISCVCPUConfig;
+
+/* Helper functions to test for extensions.  */
+
+static inline bool always_true_p(const RISCVCPUConfig *cfg __attribute__((__unused__)))
+{
+    return true;
+}
+
+static inline bool has_xthead_p(const RISCVCPUConfig *cfg)
+{
+    return cfg->ext_xtheadba || cfg->ext_xtheadbb ||
+           cfg->ext_xtheadbs || cfg->ext_xtheadcmo ||
+           cfg->ext_xtheadcondmov ||
+           cfg->ext_xtheadfmemidx || cfg->ext_xtheadfmv ||
+           cfg->ext_xtheadmac || cfg->ext_xtheadmemidx ||
+           cfg->ext_xtheadmempair || cfg->ext_xtheadsync;
+}
+
+#define MATERIALISE_EXT_PREDICATE(ext) \
+    static inline bool has_ ## ext ## _p(const RISCVCPUConfig *cfg) \
+    { \
+        return cfg->ext_ ## ext ; \
+    }
+
+MATERIALISE_EXT_PREDICATE(xtheadba)
+MATERIALISE_EXT_PREDICATE(xtheadbb)
+MATERIALISE_EXT_PREDICATE(xtheadbs)
+MATERIALISE_EXT_PREDICATE(xtheadcmo)
+MATERIALISE_EXT_PREDICATE(xtheadcondmov)
+MATERIALISE_EXT_PREDICATE(xtheadfmemidx)
+MATERIALISE_EXT_PREDICATE(xtheadfmv)
+MATERIALISE_EXT_PREDICATE(xtheadmac)
+MATERIALISE_EXT_PREDICATE(xtheadmemidx)
+MATERIALISE_EXT_PREDICATE(xtheadmempair)
+MATERIALISE_EXT_PREDICATE(xtheadsync)
+MATERIALISE_EXT_PREDICATE(XVentanaCondOps)
+
 #endif
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index a944f25694..9f611d89bb 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -47,7 +47,8 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
 
         if (mode == PRV_M && get_field(status, MSTATUS_MPRV)) {
             mode = get_field(env->mstatus, MSTATUS_MPP);
-            virt = get_field(env->mstatus, MSTATUS_MPV);
+            virt = get_field(env->mstatus, MSTATUS_MPV) &&
+                   (mode != PRV_M);
             if (virt) {
                 status = env->vsstatus;
             }
@@ -134,6 +135,7 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc,
     flags = FIELD_DP32(flags, TB_FLAGS, FS, fs);
     flags = FIELD_DP32(flags, TB_FLAGS, VS, vs);
     flags = FIELD_DP32(flags, TB_FLAGS, XL, env->xl);
+    flags = FIELD_DP32(flags, TB_FLAGS, AXL, cpu_address_xl(env));
     if (env->cur_pmmask != 0) {
         flags = FIELD_DP32(flags, TB_FLAGS, PM_MASK_ENABLED, 1);
     }
@@ -147,13 +149,16 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc,
 void riscv_cpu_update_mask(CPURISCVState *env)
 {
     target_ulong mask = 0, base = 0;
+    RISCVMXL xl = env->xl;
     /*
      * TODO: Current RVJ spec does not specify
      * how the extension interacts with XLEN.
      */
 #ifndef CONFIG_USER_ONLY
+    int mode = cpu_address_mode(env);
+    xl = cpu_get_xl(env, mode);
     if (riscv_has_ext(env, RVJ)) {
-        switch (env->priv) {
+        switch (mode) {
         case PRV_M:
             if (env->mmte & M_PM_ENABLE) {
                 mask = env->mpmmask;
@@ -177,7 +182,7 @@ void riscv_cpu_update_mask(CPURISCVState *env)
         }
     }
 #endif
-    if (env->xl == MXL_RV32) {
+    if (xl == MXL_RV32) {
         env->cur_pmmask = mask & UINT32_MAX;
         env->cur_pmbase = base & UINT32_MAX;
     } else {
@@ -1277,7 +1282,6 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
         if (ret == TRANSLATE_G_STAGE_FAIL) {
             first_stage_error = false;
             two_stage_indirect_error = true;
-            access_type = MMU_DATA_LOAD;
         }
 
         qemu_log_mask(CPU_LOG_MMU,
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 58499b5afc..ea7585329e 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1311,11 +1311,9 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno,
     }
 
     if (xl != MXL_RV32 || env->debugger) {
-        /*
-         * RV32: MPV and GVA are not in mstatus. The current plan is to
-         * add them to mstatush. For now, we just don't support it.
-         */
-        mask |= MSTATUS_MPV | MSTATUS_GVA;
+        if (riscv_has_ext(env, RVH)) {
+            mask |= MSTATUS_MPV | MSTATUS_GVA;
+        }
         if ((val & MSTATUS64_UXL) != 0) {
             mask |= MSTATUS64_UXL;
         }
@@ -1323,10 +1321,6 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno,
 
     mstatus = (mstatus & ~mask) | (val & mask);
 
-    if (xl > MXL_RV32) {
-        /* SXL field is for now read only */
-        mstatus = set_field(mstatus, MSTATUS64_SXL, xl);
-    }
     env->mstatus = mstatus;
 
     /*
@@ -1335,8 +1329,9 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno,
      */
     if (env->debugger) {
         env->xl = cpu_recompute_xl(env);
-        riscv_cpu_update_mask(env);
     }
+
+    riscv_cpu_update_mask(env);
     return RISCV_EXCP_NONE;
 }
 
@@ -1351,7 +1346,7 @@ static RISCVException write_mstatush(CPURISCVState *env, int csrno,
                                      target_ulong val)
 {
     uint64_t valh = (uint64_t)val << 32;
-    uint64_t mask = MSTATUS_MPV | MSTATUS_GVA;
+    uint64_t mask = riscv_has_ext(env, RVH) ? MSTATUS_MPV | MSTATUS_GVA : 0;
 
     env->mstatus = (env->mstatus & ~mask) | (valh & mask);
 
@@ -3639,7 +3634,7 @@ static RISCVException write_mpmmask(CPURISCVState *env, int csrno,
     uint64_t mstatus;
 
     env->mpmmask = val;
-    if ((env->priv == PRV_M) && (env->mmte & M_PM_ENABLE)) {
+    if ((cpu_address_mode(env) == PRV_M) && (env->mmte & M_PM_ENABLE)) {
         env->cur_pmmask = val;
     }
     env->mmte |= EXT_STATUS_DIRTY;
@@ -3667,8 +3662,11 @@ static RISCVException write_spmmask(CPURISCVState *env, int csrno,
         return RISCV_EXCP_NONE;
     }
     env->spmmask = val;
-    if ((env->priv == PRV_S) && (env->mmte & S_PM_ENABLE)) {
+    if ((cpu_address_mode(env) == PRV_S) && (env->mmte & S_PM_ENABLE)) {
         env->cur_pmmask = val;
+        if (cpu_get_xl(env, PRV_S) == MXL_RV32) {
+            env->cur_pmmask &= UINT32_MAX;
+        }
     }
     env->mmte |= EXT_STATUS_DIRTY;
 
@@ -3695,8 +3693,11 @@ static RISCVException write_upmmask(CPURISCVState *env, int csrno,
         return RISCV_EXCP_NONE;
     }
     env->upmmask = val;
-    if ((env->priv == PRV_U) && (env->mmte & U_PM_ENABLE)) {
+    if ((cpu_address_mode(env) == PRV_U) && (env->mmte & U_PM_ENABLE)) {
         env->cur_pmmask = val;
+        if (cpu_get_xl(env, PRV_U) == MXL_RV32) {
+            env->cur_pmmask &= UINT32_MAX;
+        }
     }
     env->mmte |= EXT_STATUS_DIRTY;
 
@@ -3719,7 +3720,7 @@ static RISCVException write_mpmbase(CPURISCVState *env, int csrno,
     uint64_t mstatus;
 
     env->mpmbase = val;
-    if ((env->priv == PRV_M) && (env->mmte & M_PM_ENABLE)) {
+    if ((cpu_address_mode(env) == PRV_M) && (env->mmte & M_PM_ENABLE)) {
         env->cur_pmbase = val;
     }
     env->mmte |= EXT_STATUS_DIRTY;
@@ -3747,8 +3748,11 @@ static RISCVException write_spmbase(CPURISCVState *env, int csrno,
         return RISCV_EXCP_NONE;
     }
     env->spmbase = val;
-    if ((env->priv == PRV_S) && (env->mmte & S_PM_ENABLE)) {
+    if ((cpu_address_mode(env) == PRV_S) && (env->mmte & S_PM_ENABLE)) {
         env->cur_pmbase = val;
+        if (cpu_get_xl(env, PRV_S) == MXL_RV32) {
+            env->cur_pmbase &= UINT32_MAX;
+        }
     }
     env->mmte |= EXT_STATUS_DIRTY;
 
@@ -3775,8 +3779,11 @@ static RISCVException write_upmbase(CPURISCVState *env, int csrno,
         return RISCV_EXCP_NONE;
     }
     env->upmbase = val;
-    if ((env->priv == PRV_U) && (env->mmte & U_PM_ENABLE)) {
+    if ((cpu_address_mode(env) == PRV_U) && (env->mmte & U_PM_ENABLE)) {
         env->cur_pmbase = val;
+        if (cpu_get_xl(env, PRV_U) == MXL_RV32) {
+            env->cur_pmbase &= UINT32_MAX;
+        }
     }
     env->mmte |= EXT_STATUS_DIRTY;
 
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
index 5dd14d8390..871a70a316 100644
--- a/target/riscv/fpu_helper.c
+++ b/target/riscv/fpu_helper.c
@@ -252,6 +252,14 @@ uint64_t helper_fmin_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
                          float32_minimum_number(frs1, frs2, &env->fp_status));
 }
 
+uint64_t helper_fminm_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float32 frs1 = check_nanbox_s(env, rs1);
+    float32 frs2 = check_nanbox_s(env, rs2);
+    float32 ret = float32_min(frs1, frs2, &env->fp_status);
+    return nanbox_s(env, ret);
+}
+
 uint64_t helper_fmax_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
 {
     float32 frs1 = check_nanbox_s(env, rs1);
@@ -261,6 +269,14 @@ uint64_t helper_fmax_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
                          float32_maximum_number(frs1, frs2, &env->fp_status));
 }
 
+uint64_t helper_fmaxm_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float32 frs1 = check_nanbox_s(env, rs1);
+    float32 frs2 = check_nanbox_s(env, rs2);
+    float32 ret = float32_max(frs1, frs2, &env->fp_status);
+    return nanbox_s(env, ret);
+}
+
 uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t rs1)
 {
     float32 frs1 = check_nanbox_s(env, rs1);
@@ -274,6 +290,13 @@ target_ulong helper_fle_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
     return float32_le(frs1, frs2, &env->fp_status);
 }
 
+target_ulong helper_fleq_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float32 frs1 = check_nanbox_s(env, rs1);
+    float32 frs2 = check_nanbox_s(env, rs2);
+    return float32_le_quiet(frs1, frs2, &env->fp_status);
+}
+
 target_ulong helper_flt_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
 {
     float32 frs1 = check_nanbox_s(env, rs1);
@@ -281,6 +304,13 @@ target_ulong helper_flt_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
     return float32_lt(frs1, frs2, &env->fp_status);
 }
 
+target_ulong helper_fltq_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float32 frs1 = check_nanbox_s(env, rs1);
+    float32 frs2 = check_nanbox_s(env, rs2);
+    return float32_lt_quiet(frs1, frs2, &env->fp_status);
+}
+
 target_ulong helper_feq_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
 {
     float32 frs1 = check_nanbox_s(env, rs1);
@@ -338,6 +368,30 @@ target_ulong helper_fclass_s(CPURISCVState *env, uint64_t rs1)
     return fclass_s(frs1);
 }
 
+uint64_t helper_fround_s(CPURISCVState *env, uint64_t rs1)
+{
+    float_status *fs = &env->fp_status;
+    uint16_t nx_old = get_float_exception_flags(fs) & float_flag_inexact;
+    float32 frs1 = check_nanbox_s(env, rs1);
+
+    frs1 = float32_round_to_int(frs1, fs);
+
+    /* Restore the original NX flag. */
+    uint16_t flags = get_float_exception_flags(fs);
+    flags &= ~float_flag_inexact;
+    flags |= nx_old;
+    set_float_exception_flags(flags, fs);
+
+    return nanbox_s(env, frs1);
+}
+
+uint64_t helper_froundnx_s(CPURISCVState *env, uint64_t rs1)
+{
+    float32 frs1 = check_nanbox_s(env, rs1);
+    frs1 = float32_round_to_int(frs1, &env->fp_status);
+    return nanbox_s(env, frs1);
+}
+
 uint64_t helper_fadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
     return float64_add(frs1, frs2, &env->fp_status);
@@ -365,6 +419,11 @@ uint64_t helper_fmin_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
            float64_minimum_number(frs1, frs2, &env->fp_status);
 }
 
+uint64_t helper_fminm_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_min(frs1, frs2, &env->fp_status);
+}
+
 uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
     return env->priv_ver < PRIV_VERSION_1_11_0 ?
@@ -372,6 +431,11 @@ uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
            float64_maximum_number(frs1, frs2, &env->fp_status);
 }
 
+uint64_t helper_fmaxm_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_max(frs1, frs2, &env->fp_status);
+}
+
 uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1)
 {
     return nanbox_s(env, float64_to_float32(rs1, &env->fp_status));
@@ -393,11 +457,21 @@ target_ulong helper_fle_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
     return float64_le(frs1, frs2, &env->fp_status);
 }
 
+target_ulong helper_fleq_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_le_quiet(frs1, frs2, &env->fp_status);
+}
+
 target_ulong helper_flt_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
     return float64_lt(frs1, frs2, &env->fp_status);
 }
 
+target_ulong helper_fltq_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_lt_quiet(frs1, frs2, &env->fp_status);
+}
+
 target_ulong helper_feq_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
     return float64_eq_quiet(frs1, frs2, &env->fp_status);
@@ -408,6 +482,11 @@ target_ulong helper_fcvt_w_d(CPURISCVState *env, uint64_t frs1)
     return float64_to_int32(frs1, &env->fp_status);
 }
 
+uint64_t helper_fcvtmod_w_d(CPURISCVState *env, uint64_t value)
+{
+    return float64_to_int32_modulo(value, float_round_to_zero, &env->fp_status);
+}
+
 target_ulong helper_fcvt_wu_d(CPURISCVState *env, uint64_t frs1)
 {
     return (int32_t)float64_to_uint32(frs1, &env->fp_status);
@@ -448,6 +527,27 @@ target_ulong helper_fclass_d(uint64_t frs1)
     return fclass_d(frs1);
 }
 
+uint64_t helper_fround_d(CPURISCVState *env, uint64_t frs1)
+{
+    float_status *fs = &env->fp_status;
+    uint16_t nx_old = get_float_exception_flags(fs) & float_flag_inexact;
+
+    frs1 = float64_round_to_int(frs1, fs);
+
+    /* Restore the original NX flag. */
+    uint16_t flags = get_float_exception_flags(fs);
+    flags &= ~float_flag_inexact;
+    flags |= nx_old;
+    set_float_exception_flags(flags, fs);
+
+    return frs1;
+}
+
+uint64_t helper_froundnx_d(CPURISCVState *env, uint64_t frs1)
+{
+    return float64_round_to_int(frs1, &env->fp_status);
+}
+
 uint64_t helper_fadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
 {
     float16 frs1 = check_nanbox_h(env, rs1);
@@ -485,6 +585,14 @@ uint64_t helper_fmin_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
                          float16_minimum_number(frs1, frs2, &env->fp_status));
 }
 
+uint64_t helper_fminm_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(env, rs1);
+    float16 frs2 = check_nanbox_h(env, rs2);
+    float16 ret = float16_min(frs1, frs2, &env->fp_status);
+    return nanbox_h(env, ret);
+}
+
 uint64_t helper_fmax_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
 {
     float16 frs1 = check_nanbox_h(env, rs1);
@@ -494,6 +602,14 @@ uint64_t helper_fmax_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
                          float16_maximum_number(frs1, frs2, &env->fp_status));
 }
 
+uint64_t helper_fmaxm_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(env, rs1);
+    float16 frs2 = check_nanbox_h(env, rs2);
+    float16 ret = float16_max(frs1, frs2, &env->fp_status);
+    return nanbox_h(env, ret);
+}
+
 uint64_t helper_fsqrt_h(CPURISCVState *env, uint64_t rs1)
 {
     float16 frs1 = check_nanbox_h(env, rs1);
@@ -507,6 +623,13 @@ target_ulong helper_fle_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
     return float16_le(frs1, frs2, &env->fp_status);
 }
 
+target_ulong helper_fleq_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(env, rs1);
+    float16 frs2 = check_nanbox_h(env, rs2);
+    return float16_le_quiet(frs1, frs2, &env->fp_status);
+}
+
 target_ulong helper_flt_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
 {
     float16 frs1 = check_nanbox_h(env, rs1);
@@ -514,6 +637,13 @@ target_ulong helper_flt_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
     return float16_lt(frs1, frs2, &env->fp_status);
 }
 
+target_ulong helper_fltq_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(env, rs1);
+    float16 frs2 = check_nanbox_h(env, rs2);
+    return float16_lt_quiet(frs1, frs2, &env->fp_status);
+}
+
 target_ulong helper_feq_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
 {
     float16 frs1 = check_nanbox_h(env, rs1);
@@ -527,6 +657,30 @@ target_ulong helper_fclass_h(CPURISCVState *env, uint64_t rs1)
     return fclass_h(frs1);
 }
 
+uint64_t helper_fround_h(CPURISCVState *env, uint64_t rs1)
+{
+    float_status *fs = &env->fp_status;
+    uint16_t nx_old = get_float_exception_flags(fs) & float_flag_inexact;
+    float16 frs1 = check_nanbox_h(env, rs1);
+
+    frs1 = float16_round_to_int(frs1, fs);
+
+    /* Restore the original NX flag. */
+    uint16_t flags = get_float_exception_flags(fs);
+    flags &= ~float_flag_inexact;
+    flags |= nx_old;
+    set_float_exception_flags(flags, fs);
+
+    return nanbox_h(env, frs1);
+}
+
+uint64_t helper_froundnx_h(CPURISCVState *env, uint64_t rs1)
+{
+    float16 frs1 = check_nanbox_s(env, rs1);
+    frs1 = float16_round_to_int(frs1, &env->fp_status);
+    return nanbox_h(env, frs1);
+}
+
 target_ulong helper_fcvt_w_h(CPURISCVState *env, uint64_t rs1)
 {
     float16 frs1 = check_nanbox_h(env, rs1);
@@ -593,3 +747,15 @@ uint64_t helper_fcvt_d_h(CPURISCVState *env, uint64_t rs1)
     float16 frs1 = check_nanbox_h(env, rs1);
     return float16_to_float64(frs1, true, &env->fp_status);
 }
+
+uint64_t helper_fcvt_bf16_s(CPURISCVState *env, uint64_t rs1)
+{
+    float32 frs1 = check_nanbox_s(env, rs1);
+    return nanbox_h(env, float32_to_bfloat16(frs1, &env->fp_status));
+}
+
+uint64_t helper_fcvt_s_bf16(CPURISCVState *env, uint64_t rs1)
+{
+    float16 frs1 = check_nanbox_h(env, rs1);
+    return nanbox_s(env, bfloat16_to_float32(frs1, &env->fp_status));
+}
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 98e97810fd..c95adaf08a 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -25,10 +25,14 @@ DEF_HELPER_FLAGS_3(fsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fmul_s, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fdiv_s, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fmin_s, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fminm_s, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fmax_s, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmaxm_s, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_2(fsqrt_s, TCG_CALL_NO_RWG, i64, env, i64)
 DEF_HELPER_FLAGS_3(fle_s, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_3(fleq_s, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_3(flt_s, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_3(fltq_s, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_3(feq_s, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_2(fcvt_w_s, TCG_CALL_NO_RWG, tl, env, i64)
 DEF_HELPER_FLAGS_2(fcvt_wu_s, TCG_CALL_NO_RWG, tl, env, i64)
@@ -39,6 +43,8 @@ DEF_HELPER_FLAGS_2(fcvt_s_wu, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_2(fcvt_s_l, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_2(fcvt_s_lu, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_2(fclass_s, TCG_CALL_NO_RWG_SE, tl, env, i64)
+DEF_HELPER_FLAGS_2(fround_s, TCG_CALL_NO_RWG_SE, i64, env, i64)
+DEF_HELPER_FLAGS_2(froundnx_s, TCG_CALL_NO_RWG_SE, i64, env, i64)
 
 /* Floating Point - Double Precision */
 DEF_HELPER_FLAGS_3(fadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
@@ -46,14 +52,19 @@ DEF_HELPER_FLAGS_3(fsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fmul_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fdiv_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fmin_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fminm_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fmax_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmaxm_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_2(fcvt_s_d, TCG_CALL_NO_RWG, i64, env, i64)
 DEF_HELPER_FLAGS_2(fcvt_d_s, TCG_CALL_NO_RWG, i64, env, i64)
 DEF_HELPER_FLAGS_2(fsqrt_d, TCG_CALL_NO_RWG, i64, env, i64)
 DEF_HELPER_FLAGS_3(fle_d, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_3(fleq_d, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_3(flt_d, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_3(fltq_d, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_3(feq_d, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_2(fcvt_w_d, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvtmod_w_d, TCG_CALL_NO_RWG, i64, env, i64)
 DEF_HELPER_FLAGS_2(fcvt_wu_d, TCG_CALL_NO_RWG, tl, env, i64)
 DEF_HELPER_FLAGS_2(fcvt_l_d, TCG_CALL_NO_RWG, tl, env, i64)
 DEF_HELPER_FLAGS_2(fcvt_lu_d, TCG_CALL_NO_RWG, tl, env, i64)
@@ -62,6 +73,8 @@ DEF_HELPER_FLAGS_2(fcvt_d_wu, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_2(fcvt_d_l, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_2(fcvt_d_lu, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_1(fclass_d, TCG_CALL_NO_RWG_SE, tl, i64)
+DEF_HELPER_FLAGS_2(fround_d, TCG_CALL_NO_RWG_SE, i64, env, i64)
+DEF_HELPER_FLAGS_2(froundnx_d, TCG_CALL_NO_RWG_SE, i64, env, i64)
 
 /* Bitmanip */
 DEF_HELPER_FLAGS_2(clmul, TCG_CALL_NO_RWG_SE, tl, tl, tl)
@@ -78,10 +91,14 @@ DEF_HELPER_FLAGS_3(fsub_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fmul_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fdiv_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fmin_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fminm_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(fmax_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmaxm_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_2(fsqrt_h, TCG_CALL_NO_RWG, i64, env, i64)
 DEF_HELPER_FLAGS_3(fle_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_3(fleq_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_3(flt_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_3(fltq_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_3(feq_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_2(fcvt_s_h, TCG_CALL_NO_RWG, i64, env, i64)
 DEF_HELPER_FLAGS_2(fcvt_h_s, TCG_CALL_NO_RWG, i64, env, i64)
@@ -96,6 +113,8 @@ DEF_HELPER_FLAGS_2(fcvt_h_wu, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_2(fcvt_h_l, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_2(fcvt_h_lu, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_2(fclass_h, TCG_CALL_NO_RWG_SE, tl, env, i64)
+DEF_HELPER_FLAGS_2(fround_h, TCG_CALL_NO_RWG_SE, i64, env, i64)
+DEF_HELPER_FLAGS_2(froundnx_h, TCG_CALL_NO_RWG_SE, i64, env, i64)
 
 /* Cache-block operations */
 DEF_HELPER_2(cbo_clean_flush, void, env, tl)
@@ -1153,3 +1172,13 @@ DEF_HELPER_FLAGS_3(sm4ks, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl)
 
 /* Zce helper */
 DEF_HELPER_FLAGS_2(cm_jalt, TCG_CALL_NO_WG, tl, env, i32)
+
+/* BF16 functions */
+DEF_HELPER_FLAGS_2(fcvt_bf16_s, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_s_bf16, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_5(vfncvtbf16_f_f_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 73d5d1b045..e341fa9213 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -821,6 +821,32 @@ binvi      01101. ........... 001 ..... 0010011 @sh
 bset       0010100 .......... 001 ..... 0110011 @r
 bseti      00101. ........... 001 ..... 0010011 @sh
 
+# *** Zfa Standard Extension ***
+fli_s       1111000 00001 ..... 000 ..... 1010011 @r2
+fli_d       1111001 00001 ..... 000 ..... 1010011 @r2
+fli_h       1111010 00001 ..... 000 ..... 1010011 @r2
+fminm_s     0010100 ..... ..... 010 ..... 1010011 @r
+fmaxm_s     0010100 ..... ..... 011 ..... 1010011 @r
+fminm_d     0010101 ..... ..... 010 ..... 1010011 @r
+fmaxm_d     0010101 ..... ..... 011 ..... 1010011 @r
+fminm_h     0010110 ..... ..... 010 ..... 1010011 @r
+fmaxm_h     0010110 ..... ..... 011 ..... 1010011 @r
+fround_s    0100000 00100 ..... ... ..... 1010011 @r2_rm
+froundnx_s  0100000 00101 ..... ... ..... 1010011 @r2_rm
+fround_d    0100001 00100 ..... ... ..... 1010011 @r2_rm
+froundnx_d  0100001 00101 ..... ... ..... 1010011 @r2_rm
+fround_h    0100010 00100 ..... ... ..... 1010011 @r2_rm
+froundnx_h  0100010 00101 ..... ... ..... 1010011 @r2_rm
+fcvtmod_w_d 1100001 01000 ..... 001 ..... 1010011 @r2
+fmvh_x_d    1110001 00001 ..... 000 ..... 1010011 @r2
+fmvp_d_x    1011001 ..... ..... 000 ..... 1010011 @r
+fleq_s      1010000 ..... ..... 100 ..... 1010011 @r
+fltq_s      1010000 ..... ..... 101 ..... 1010011 @r
+fleq_d      1010001 ..... ..... 100 ..... 1010011 @r
+fltq_d      1010001 ..... ..... 101 ..... 1010011 @r
+fleq_h      1010010 ..... ..... 100 ..... 1010011 @r
+fltq_h      1010010 ..... ..... 101 ..... 1010011 @r
+
 # *** RV32 Zfh Extension ***
 flh        ............   ..... 001 ..... 0000111 @i
 fsh        .......  ..... ..... 001 ..... 0100111 @s
@@ -908,3 +934,15 @@ sm4ks       .. 11010 ..... ..... 000 ..... 0110011 @k_aes
 # *** RV32 Zicond Standard Extension ***
 czero_eqz   0000111  ..... ..... 101 ..... 0110011 @r
 czero_nez   0000111  ..... ..... 111 ..... 0110011 @r
+
+# *** Zfbfmin Standard Extension ***
+fcvt_bf16_s       0100010  01000 ..... ... ..... 1010011 @r2_rm
+fcvt_s_bf16       0100000  00110 ..... ... ..... 1010011 @r2_rm
+
+# *** Zvfbfmin Standard Extension ***
+vfncvtbf16_f_f_w  010010 . ..... 11101 001 ..... 1010111 @r2_vm
+vfwcvtbf16_f_f_v  010010 . ..... 01101 001 ..... 1010111 @r2_vm
+
+# *** Zvfbfwma Standard Extension ***
+vfwmaccbf16_vv    111011 . ..... ..... 001 ..... 1010111 @r_vm
+vfwmaccbf16_vf    111011 . ..... ..... 101 ..... 1010111 @r_vm
diff --git a/target/riscv/insn_trans/trans_rvbf16.c.inc b/target/riscv/insn_trans/trans_rvbf16.c.inc
new file mode 100644
index 0000000000..911bc29908
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvbf16.c.inc
@@ -0,0 +1,175 @@
+/*
+ * RISC-V translation routines for the BF16 Standard Extensions.
+ *
+ * Copyright (c) 2020-2023 PLCT Lab
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define REQUIRE_ZFBFMIN(ctx) do { \
+    if (!ctx->cfg_ptr->ext_zfbfmin) { \
+        return false; \
+    } \
+} while (0)
+
+#define REQUIRE_ZVFBFMIN(ctx) do { \
+    if (!ctx->cfg_ptr->ext_zvfbfmin) { \
+        return false; \
+    } \
+} while (0)
+
+#define REQUIRE_ZVFBFWMA(ctx) do { \
+    if (!ctx->cfg_ptr->ext_zvfbfwma) { \
+        return false; \
+    } \
+} while (0)
+
+static bool trans_fcvt_bf16_s(DisasContext *ctx, arg_fcvt_bf16_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFBFMIN(ctx);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_bf16_s(dest, cpu_env, src1);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fcvt_s_bf16(DisasContext *ctx, arg_fcvt_s_bf16 *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFBFMIN(ctx);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_s_bf16(dest, cpu_env, src1);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_vfncvtbf16_f_f_w(DisasContext *ctx, arg_vfncvtbf16_f_f_w *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZVFBFMIN(ctx);
+
+    if (opfv_narrow_check(ctx, a) && (ctx->sew == MO_16)) {
+        uint32_t data = 0;
+        TCGLabel *over = gen_new_label();
+
+        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+        tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+                           vreg_ofs(ctx, a->rs2), cpu_env,
+                           ctx->cfg_ptr->vlen / 8,
+                           ctx->cfg_ptr->vlen / 8, data,
+                           gen_helper_vfncvtbf16_f_f_w);
+        mark_vs_dirty(ctx);
+        gen_set_label(over);
+        return true;
+    }
+    return false;
+}
+
+static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZVFBFMIN(ctx);
+
+    if (opfv_widen_check(ctx, a) && (ctx->sew == MO_16)) {
+        uint32_t data = 0;
+        TCGLabel *over = gen_new_label();
+
+        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+        tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+                           vreg_ofs(ctx, a->rs2), cpu_env,
+                           ctx->cfg_ptr->vlen / 8,
+                           ctx->cfg_ptr->vlen / 8, data,
+                           gen_helper_vfwcvtbf16_f_f_v);
+        mark_vs_dirty(ctx);
+        gen_set_label(over);
+        return true;
+    }
+    return false;
+}
+
+static bool trans_vfwmaccbf16_vv(DisasContext *ctx, arg_vfwmaccbf16_vv *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZVFBFWMA(ctx);
+
+    if (require_rvv(ctx) && vext_check_isa_ill(ctx) && (ctx->sew == MO_16) &&
+        vext_check_dss(ctx, a->rd, a->rs1, a->rs2, a->vm)) {
+        uint32_t data = 0;
+        TCGLabel *over = gen_new_label();
+
+        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+        tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+        tcg_gen_gvec_4_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+                           vreg_ofs(ctx, a->rs1),
+                           vreg_ofs(ctx, a->rs2), cpu_env,
+                           ctx->cfg_ptr->vlen / 8,
+                           ctx->cfg_ptr->vlen / 8, data,
+                           gen_helper_vfwmaccbf16_vv);
+        mark_vs_dirty(ctx);
+        gen_set_label(over);
+        return true;
+    }
+    return false;
+}
+
+static bool trans_vfwmaccbf16_vf(DisasContext *ctx, arg_vfwmaccbf16_vf *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZVFBFWMA(ctx);
+
+    if (require_rvv(ctx) && (ctx->sew == MO_16) && vext_check_isa_ill(ctx) &&
+        vext_check_ds(ctx, a->rd, a->rs2, a->vm)) {
+        uint32_t data = 0;
+
+        gen_set_rm(ctx, RISCV_FRM_DYN);
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+        return opfvf_trans(a->rd, a->rs1, a->rs2, data,
+                           gen_helper_vfwmaccbf16_vf, ctx);
+    }
+
+    return false;
+}
diff --git a/target/riscv/insn_trans/trans_rvzfa.c.inc b/target/riscv/insn_trans/trans_rvzfa.c.inc
new file mode 100644
index 0000000000..2c715af3e5
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvzfa.c.inc
@@ -0,0 +1,521 @@
+/*
+ * RISC-V translation routines for the Zfa Standard Extension.
+ *
+ * Copyright (c) 2023 Christoph Müllner, christoph.muellner@vrull.eu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define REQUIRE_ZFA(ctx) do {     \
+    if (!ctx->cfg_ptr->ext_zfa) { \
+        return false;             \
+    }                             \
+} while (0)
+
+#define REQUIRE_ZFH(ctx) do {     \
+    if (!ctx->cfg_ptr->ext_zfh) { \
+        return false;             \
+    }                             \
+} while (0)
+
+static bool trans_fli_s(DisasContext *ctx, arg_fli_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVF);
+
+    /* Values below are NaN-boxed to avoid a gen_nanbox_s(). */
+    static const uint64_t fli_s_table[] = {
+        0xffffffffbf800000,  /* -1.0 */
+        0xffffffff00800000,  /* minimum positive normal */
+        0xffffffff37800000,  /* 1.0 * 2^-16 */
+        0xffffffff38000000,  /* 1.0 * 2^-15 */
+        0xffffffff3b800000,  /* 1.0 * 2^-8  */
+        0xffffffff3c000000,  /* 1.0 * 2^-7  */
+        0xffffffff3d800000,  /* 1.0 * 2^-4  */
+        0xffffffff3e000000,  /* 1.0 * 2^-3  */
+        0xffffffff3e800000,  /* 0.25 */
+        0xffffffff3ea00000,  /* 0.3125 */
+        0xffffffff3ec00000,  /* 0.375 */
+        0xffffffff3ee00000,  /* 0.4375 */
+        0xffffffff3f000000,  /* 0.5 */
+        0xffffffff3f200000,  /* 0.625 */
+        0xffffffff3f400000,  /* 0.75 */
+        0xffffffff3f600000,  /* 0.875 */
+        0xffffffff3f800000,  /* 1.0 */
+        0xffffffff3fa00000,  /* 1.25 */
+        0xffffffff3fc00000,  /* 1.5 */
+        0xffffffff3fe00000,  /* 1.75 */
+        0xffffffff40000000,  /* 2.0 */
+        0xffffffff40200000,  /* 2.5 */
+        0xffffffff40400000,  /* 3 */
+        0xffffffff40800000,  /* 4 */
+        0xffffffff41000000,  /* 8 */
+        0xffffffff41800000,  /* 16 */
+        0xffffffff43000000,  /* 2^7 */
+        0xffffffff43800000,  /* 2^8 */
+        0xffffffff47000000,  /* 2^15 */
+        0xffffffff47800000,  /* 2^16 */
+        0xffffffff7f800000,  /* +inf */
+        0xffffffff7fc00000,  /* Canonical NaN */
+    };
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    tcg_gen_movi_i64(dest, fli_s_table[a->rs1]);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fli_d(DisasContext *ctx, arg_fli_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVD);
+
+    static const uint64_t fli_d_table[] = {
+        0xbff0000000000000,  /* -1.0 */
+        0x0010000000000000,  /* minimum positive normal */
+        0x3ef0000000000000,  /* 1.0 * 2^-16 */
+        0x3f00000000000000,  /* 1.0 * 2^-15 */
+        0x3f70000000000000,  /* 1.0 * 2^-8  */
+        0x3f80000000000000,  /* 1.0 * 2^-7  */
+        0x3fb0000000000000,  /* 1.0 * 2^-4  */
+        0x3fc0000000000000,  /* 1.0 * 2^-3  */
+        0x3fd0000000000000,  /* 0.25 */
+        0x3fd4000000000000,  /* 0.3125 */
+        0x3fd8000000000000,  /* 0.375 */
+        0x3fdc000000000000,  /* 0.4375 */
+        0x3fe0000000000000,  /* 0.5 */
+        0x3fe4000000000000,  /* 0.625 */
+        0x3fe8000000000000,  /* 0.75 */
+        0x3fec000000000000,  /* 0.875 */
+        0x3ff0000000000000,  /* 1.0 */
+        0x3ff4000000000000,  /* 1.25 */
+        0x3ff8000000000000,  /* 1.5 */
+        0x3ffc000000000000,  /* 1.75 */
+        0x4000000000000000,  /* 2.0 */
+        0x4004000000000000,  /* 2.5 */
+        0x4008000000000000,  /* 3 */
+        0x4010000000000000,  /* 4 */
+        0x4020000000000000,  /* 8 */
+        0x4030000000000000,  /* 16 */
+        0x4060000000000000,  /* 2^7 */
+        0x4070000000000000,  /* 2^8 */
+        0x40e0000000000000,  /* 2^15 */
+        0x40f0000000000000,  /* 2^16 */
+        0x7ff0000000000000,  /* +inf */
+        0x7ff8000000000000,  /* Canonical NaN */
+    };
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    tcg_gen_movi_i64(dest, fli_d_table[a->rs1]);
+    gen_set_fpr_d(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fli_h(DisasContext *ctx, arg_fli_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_ZFH(ctx);
+
+    /* Values below are NaN-boxed to avoid a gen_nanbox_h(). */
+    static const uint64_t fli_h_table[] = {
+        0xffffffffffffbc00,  /* -1.0 */
+        0xffffffffffff0400,  /* minimum positive normal */
+        0xffffffffffff0100,  /* 1.0 * 2^-16 */
+        0xffffffffffff0200,  /* 1.0 * 2^-15 */
+        0xffffffffffff1c00,  /* 1.0 * 2^-8  */
+        0xffffffffffff2000,  /* 1.0 * 2^-7  */
+        0xffffffffffff2c00,  /* 1.0 * 2^-4  */
+        0xffffffffffff3000,  /* 1.0 * 2^-3  */
+        0xffffffffffff3400,  /* 0.25 */
+        0xffffffffffff3500,  /* 0.3125 */
+        0xffffffffffff3600,  /* 0.375 */
+        0xffffffffffff3700,  /* 0.4375 */
+        0xffffffffffff3800,  /* 0.5 */
+        0xffffffffffff3900,  /* 0.625 */
+        0xffffffffffff3a00,  /* 0.75 */
+        0xffffffffffff3b00,  /* 0.875 */
+        0xffffffffffff3c00,  /* 1.0 */
+        0xffffffffffff3d00,  /* 1.25 */
+        0xffffffffffff3e00,  /* 1.5 */
+        0xffffffffffff3f00,  /* 1.75 */
+        0xffffffffffff4000,  /* 2.0 */
+        0xffffffffffff4100,  /* 2.5 */
+        0xffffffffffff4200,  /* 3 */
+        0xffffffffffff4400,  /* 4 */
+        0xffffffffffff4800,  /* 8 */
+        0xffffffffffff4c00,  /* 16 */
+        0xffffffffffff5800,  /* 2^7 */
+        0xffffffffffff5c00,  /* 2^8 */
+        0xffffffffffff7800,  /* 2^15 */
+        0xffffffffffff7c00,  /* 2^16 */
+        0xffffffffffff7c00,  /* +inf */
+        0xffffffffffff7e00,  /* Canonical NaN */
+    };
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    tcg_gen_movi_i64(dest, fli_h_table[a->rs1]);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fminm_s(DisasContext *ctx, arg_fminm_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+    gen_helper_fminm_s(dest, cpu_env, src1, src2);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmaxm_s(DisasContext *ctx, arg_fmaxm_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+    gen_helper_fmaxm_s(dest, cpu_env, src1, src2);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fminm_d(DisasContext *ctx, arg_fminm_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+
+    gen_helper_fminm_d(dest, cpu_env, src1, src2);
+    gen_set_fpr_d(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmaxm_d(DisasContext *ctx, arg_fmaxm_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+
+    gen_helper_fmaxm_d(dest, cpu_env, src1, src2);
+    gen_set_fpr_d(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fminm_h(DisasContext *ctx, arg_fminm_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_ZFH(ctx);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+    gen_helper_fminm_h(dest, cpu_env, src1, src2);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmaxm_h(DisasContext *ctx, arg_fmaxm_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_ZFH(ctx);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+    gen_helper_fmaxm_h(dest, cpu_env, src1, src2);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fround_s(DisasContext *ctx, arg_fround_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fround_s(dest, cpu_env, src1);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_froundnx_s(DisasContext *ctx, arg_froundnx_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_froundnx_s(dest, cpu_env, src1);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fround_d(DisasContext *ctx, arg_fround_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fround_d(dest, cpu_env, src1);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_froundnx_d(DisasContext *ctx, arg_froundnx_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_froundnx_d(dest, cpu_env, src1);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fround_h(DisasContext *ctx, arg_fround_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_ZFH(ctx);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fround_h(dest, cpu_env, src1);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_froundnx_h(DisasContext *ctx, arg_froundnx_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_ZFH(ctx);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_froundnx_h(dest, cpu_env, src1);
+    gen_set_fpr_hs(ctx, a->rd, dest);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+bool trans_fcvtmod_w_d(DisasContext *ctx, arg_fcvtmod_w_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv dst = dest_gpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    /* Rounding mode is RTZ. */
+    gen_set_rm(ctx, RISCV_FRM_RTZ);
+    gen_helper_fcvtmod_w_d(t1, cpu_env, src1);
+    tcg_gen_trunc_i64_tl(dst, t1);
+    gen_set_gpr(ctx, a->rd, dst);
+
+    return true;
+}
+
+bool trans_fmvh_x_d(DisasContext *ctx, arg_fmvh_x_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVD);
+    REQUIRE_32BIT(ctx);
+
+    TCGv dst = dest_gpr(ctx, a->rd);
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    tcg_gen_sari_i64(t1, cpu_fpr[a->rs1], 32);
+    tcg_gen_trunc_i64_tl(dst, t1);
+    gen_set_gpr(ctx, a->rd, dst);
+    return true;
+}
+
+bool trans_fmvp_d_x(DisasContext *ctx, arg_fmvp_d_x *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVD);
+    REQUIRE_32BIT(ctx);
+
+    TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
+    TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);
+    tcg_gen_concat_tl_i64(cpu_fpr[a->rd], src1, src2);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+bool trans_fleq_s(DisasContext *ctx, arg_fleq_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+    gen_helper_fleq_s(dest, cpu_env, src1, src2);
+    gen_set_gpr(ctx, a->rd, dest);
+    return true;
+}
+
+bool trans_fltq_s(DisasContext *ctx, arg_fltq_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+    gen_helper_fltq_s(dest, cpu_env, src1, src2);
+    gen_set_gpr(ctx, a->rd, dest);
+    return true;
+}
+
+bool trans_fleq_d(DisasContext *ctx, arg_fleq_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+    gen_helper_fltq_s(dest, cpu_env, src1, src2);
+    gen_set_gpr(ctx, a->rd, dest);
+    return true;
+}
+
+bool trans_fltq_d(DisasContext *ctx, arg_fltq_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+    gen_helper_fltq_s(dest, cpu_env, src1, src2);
+    gen_set_gpr(ctx, a->rd, dest);
+    return true;
+}
+
+bool trans_fleq_h(DisasContext *ctx, arg_fleq_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_ZFH(ctx);
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+    gen_helper_fleq_h(dest, cpu_env, src1, src2);
+    gen_set_gpr(ctx, a->rd, dest);
+    return true;
+}
+
+bool trans_fltq_h(DisasContext *ctx, arg_fltq_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZFA(ctx);
+    REQUIRE_ZFH(ctx);
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+    gen_helper_fltq_h(dest, cpu_env, src1, src2);
+    gen_set_gpr(ctx, a->rd, dest);
+    return true;
+}
diff --git a/target/riscv/insn_trans/trans_rvzfh.c.inc b/target/riscv/insn_trans/trans_rvzfh.c.inc
index 74dde37ff7..8b1e2519bb 100644
--- a/target/riscv/insn_trans/trans_rvzfh.c.inc
+++ b/target/riscv/insn_trans/trans_rvzfh.c.inc
@@ -28,8 +28,8 @@
     }                                  \
 } while (0)
 
-#define REQUIRE_ZFHMIN(ctx) do {              \
-    if (!ctx->cfg_ptr->ext_zfhmin) {          \
+#define REQUIRE_ZFHMIN_OR_ZFBFMIN(ctx) do {   \
+    if (!ctx->cfg_ptr->ext_zfhmin && !ctx->cfg_ptr->ext_zfbfmin) { \
         return false;                         \
     }                                         \
 } while (0)
@@ -46,7 +46,7 @@ static bool trans_flh(DisasContext *ctx, arg_flh *a)
     TCGv t0;
 
     REQUIRE_FPU;
-    REQUIRE_ZFHMIN(ctx);
+    REQUIRE_ZFHMIN_OR_ZFBFMIN(ctx);
 
     decode_save_opc(ctx);
     t0 = get_gpr(ctx, a->rs1, EXT_NONE);
@@ -69,7 +69,7 @@ static bool trans_fsh(DisasContext *ctx, arg_fsh *a)
     TCGv t0;
 
     REQUIRE_FPU;
-    REQUIRE_ZFHMIN(ctx);
+    REQUIRE_ZFHMIN_OR_ZFBFMIN(ctx);
 
     decode_save_opc(ctx);
     t0 = get_gpr(ctx, a->rs1, EXT_NONE);
@@ -574,7 +574,7 @@ static bool trans_fcvt_h_wu(DisasContext *ctx, arg_fcvt_h_wu *a)
 static bool trans_fmv_x_h(DisasContext *ctx, arg_fmv_x_h *a)
 {
     REQUIRE_FPU;
-    REQUIRE_ZFHMIN(ctx);
+    REQUIRE_ZFHMIN_OR_ZFBFMIN(ctx);
 
     TCGv dest = dest_gpr(ctx, a->rd);
 
@@ -594,7 +594,7 @@ static bool trans_fmv_x_h(DisasContext *ctx, arg_fmv_x_h *a)
 static bool trans_fmv_h_x(DisasContext *ctx, arg_fmv_h_x *a)
 {
     REQUIRE_FPU;
-    REQUIRE_ZFHMIN(ctx);
+    REQUIRE_ZFHMIN_OR_ZFBFMIN(ctx);
 
     TCGv t0 = get_gpr(ctx, a->rs1, EXT_ZERO);
 
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 0f932a5b96..9d8a8982f9 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -22,8 +22,10 @@
 #include <linux/kvm.h>
 
 #include "qemu/timer.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
+#include "qapi/visitor.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
 #include "sysemu/kvm_int.h"
@@ -99,12 +101,280 @@ static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
 
 #define KVM_RISCV_SET_TIMER(cs, env, name, reg) \
     do { \
-        int ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(env, time), &reg); \
+        int ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(env, name), &reg); \
         if (ret) { \
             abort(); \
         } \
     } while (0)
 
+typedef struct KVMCPUConfig {
+    const char *name;
+    const char *description;
+    target_ulong offset;
+    int kvm_reg_id;
+    bool user_set;
+    bool supported;
+} KVMCPUConfig;
+
+#define KVM_MISA_CFG(_bit, _reg_id) \
+    {.offset = _bit, .kvm_reg_id = _reg_id}
+
+/* KVM ISA extensions */
+static KVMCPUConfig kvm_misa_ext_cfgs[] = {
+    KVM_MISA_CFG(RVA, KVM_RISCV_ISA_EXT_A),
+    KVM_MISA_CFG(RVC, KVM_RISCV_ISA_EXT_C),
+    KVM_MISA_CFG(RVD, KVM_RISCV_ISA_EXT_D),
+    KVM_MISA_CFG(RVF, KVM_RISCV_ISA_EXT_F),
+    KVM_MISA_CFG(RVH, KVM_RISCV_ISA_EXT_H),
+    KVM_MISA_CFG(RVI, KVM_RISCV_ISA_EXT_I),
+    KVM_MISA_CFG(RVM, KVM_RISCV_ISA_EXT_M),
+};
+
+static void kvm_cpu_set_misa_ext_cfg(Object *obj, Visitor *v,
+                                     const char *name,
+                                     void *opaque, Error **errp)
+{
+    KVMCPUConfig *misa_ext_cfg = opaque;
+    target_ulong misa_bit = misa_ext_cfg->offset;
+    RISCVCPU *cpu = RISCV_CPU(obj);
+    CPURISCVState *env = &cpu->env;
+    bool value, host_bit;
+
+    if (!visit_type_bool(v, name, &value, errp)) {
+        return;
+    }
+
+    host_bit = env->misa_ext_mask & misa_bit;
+
+    if (value == host_bit) {
+        return;
+    }
+
+    if (!value) {
+        misa_ext_cfg->user_set = true;
+        return;
+    }
+
+    /*
+     * Forbid users to enable extensions that aren't
+     * available in the hart.
+     */
+    error_setg(errp, "Enabling MISA bit '%s' is not allowed: it's not "
+               "enabled in the host", misa_ext_cfg->name);
+}
+
+static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs)
+{
+    CPURISCVState *env = &cpu->env;
+    uint64_t id, reg;
+    int i, ret;
+
+    for (i = 0; i < ARRAY_SIZE(kvm_misa_ext_cfgs); i++) {
+        KVMCPUConfig *misa_cfg = &kvm_misa_ext_cfgs[i];
+        target_ulong misa_bit = misa_cfg->offset;
+
+        if (!misa_cfg->user_set) {
+            continue;
+        }
+
+        /* If we're here we're going to disable the MISA bit */
+        reg = 0;
+        id = kvm_riscv_reg_id(env, KVM_REG_RISCV_ISA_EXT,
+                              misa_cfg->kvm_reg_id);
+        ret = kvm_set_one_reg(cs, id, &reg);
+        if (ret != 0) {
+            /*
+             * We're not checking for -EINVAL because if the bit is about
+             * to be disabled, it means that it was already enabled by
+             * KVM. We determined that by fetching the 'isa' register
+             * during init() time. Any error at this point is worth
+             * aborting.
+             */
+            error_report("Unable to set KVM reg %s, error %d",
+                         misa_cfg->name, ret);
+            exit(EXIT_FAILURE);
+        }
+        env->misa_ext &= ~misa_bit;
+    }
+}
+
+#define CPUCFG(_prop) offsetof(struct RISCVCPUConfig, _prop)
+
+#define KVM_EXT_CFG(_name, _prop, _reg_id) \
+    {.name = _name, .offset = CPUCFG(_prop), \
+     .kvm_reg_id = _reg_id}
+
+static KVMCPUConfig kvm_multi_ext_cfgs[] = {
+    KVM_EXT_CFG("zicbom", ext_icbom, KVM_RISCV_ISA_EXT_ZICBOM),
+    KVM_EXT_CFG("zicboz", ext_icboz, KVM_RISCV_ISA_EXT_ZICBOZ),
+    KVM_EXT_CFG("zihintpause", ext_zihintpause, KVM_RISCV_ISA_EXT_ZIHINTPAUSE),
+    KVM_EXT_CFG("zbb", ext_zbb, KVM_RISCV_ISA_EXT_ZBB),
+    KVM_EXT_CFG("ssaia", ext_ssaia, KVM_RISCV_ISA_EXT_SSAIA),
+    KVM_EXT_CFG("sstc", ext_sstc, KVM_RISCV_ISA_EXT_SSTC),
+    KVM_EXT_CFG("svinval", ext_svinval, KVM_RISCV_ISA_EXT_SVINVAL),
+    KVM_EXT_CFG("svpbmt", ext_svpbmt, KVM_RISCV_ISA_EXT_SVPBMT),
+};
+
+static void *kvmconfig_get_cfg_addr(RISCVCPU *cpu, KVMCPUConfig *kvmcfg)
+{
+    return (void *)&cpu->cfg + kvmcfg->offset;
+}
+
+static void kvm_cpu_cfg_set(RISCVCPU *cpu, KVMCPUConfig *multi_ext,
+                            uint32_t val)
+{
+    bool *ext_enabled = kvmconfig_get_cfg_addr(cpu, multi_ext);
+
+    *ext_enabled = val;
+}
+
+static uint32_t kvm_cpu_cfg_get(RISCVCPU *cpu,
+                                KVMCPUConfig *multi_ext)
+{
+    bool *ext_enabled = kvmconfig_get_cfg_addr(cpu, multi_ext);
+
+    return *ext_enabled;
+}
+
+static void kvm_cpu_set_multi_ext_cfg(Object *obj, Visitor *v,
+                                      const char *name,
+                                      void *opaque, Error **errp)
+{
+    KVMCPUConfig *multi_ext_cfg = opaque;
+    RISCVCPU *cpu = RISCV_CPU(obj);
+    bool value, host_val;
+
+    if (!visit_type_bool(v, name, &value, errp)) {
+        return;
+    }
+
+    host_val = kvm_cpu_cfg_get(cpu, multi_ext_cfg);
+
+    /*
+     * Ignore if the user is setting the same value
+     * as the host.
+     */
+    if (value == host_val) {
+        return;
+    }
+
+    if (!multi_ext_cfg->supported) {
+        /*
+         * Error out if the user is trying to enable an
+         * extension that KVM doesn't support. Ignore
+         * option otherwise.
+         */
+        if (value) {
+            error_setg(errp, "KVM does not support disabling extension %s",
+                       multi_ext_cfg->name);
+        }
+
+        return;
+    }
+
+    multi_ext_cfg->user_set = true;
+    kvm_cpu_cfg_set(cpu, multi_ext_cfg, value);
+}
+
+static KVMCPUConfig kvm_cbom_blocksize = {
+    .name = "cbom_blocksize",
+    .offset = CPUCFG(cbom_blocksize),
+    .kvm_reg_id = KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)
+};
+
+static KVMCPUConfig kvm_cboz_blocksize = {
+    .name = "cboz_blocksize",
+    .offset = CPUCFG(cboz_blocksize),
+    .kvm_reg_id = KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)
+};
+
+static void kvm_cpu_set_cbomz_blksize(Object *obj, Visitor *v,
+                                      const char *name,
+                                      void *opaque, Error **errp)
+{
+    KVMCPUConfig *cbomz_cfg = opaque;
+    RISCVCPU *cpu = RISCV_CPU(obj);
+    uint16_t value, *host_val;
+
+    if (!visit_type_uint16(v, name, &value, errp)) {
+        return;
+    }
+
+    host_val = kvmconfig_get_cfg_addr(cpu, cbomz_cfg);
+
+    if (value != *host_val) {
+        error_report("Unable to set %s to a different value than "
+                     "the host (%u)",
+                     cbomz_cfg->name, *host_val);
+        exit(EXIT_FAILURE);
+    }
+
+    cbomz_cfg->user_set = true;
+}
+
+static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs)
+{
+    CPURISCVState *env = &cpu->env;
+    uint64_t id, reg;
+    int i, ret;
+
+    for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
+        KVMCPUConfig *multi_ext_cfg = &kvm_multi_ext_cfgs[i];
+
+        if (!multi_ext_cfg->user_set) {
+            continue;
+        }
+
+        id = kvm_riscv_reg_id(env, KVM_REG_RISCV_ISA_EXT,
+                              multi_ext_cfg->kvm_reg_id);
+        reg = kvm_cpu_cfg_get(cpu, multi_ext_cfg);
+        ret = kvm_set_one_reg(cs, id, &reg);
+        if (ret != 0) {
+            error_report("Unable to %s extension %s in KVM, error %d",
+                         reg ? "enable" : "disable",
+                         multi_ext_cfg->name, ret);
+            exit(EXIT_FAILURE);
+        }
+    }
+}
+
+static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(kvm_misa_ext_cfgs); i++) {
+        KVMCPUConfig *misa_cfg = &kvm_misa_ext_cfgs[i];
+        int bit = misa_cfg->offset;
+
+        misa_cfg->name = riscv_get_misa_ext_name(bit);
+        misa_cfg->description = riscv_get_misa_ext_description(bit);
+
+        object_property_add(cpu_obj, misa_cfg->name, "bool",
+                            NULL,
+                            kvm_cpu_set_misa_ext_cfg,
+                            NULL, misa_cfg);
+        object_property_set_description(cpu_obj, misa_cfg->name,
+                                        misa_cfg->description);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
+        KVMCPUConfig *multi_cfg = &kvm_multi_ext_cfgs[i];
+
+        object_property_add(cpu_obj, multi_cfg->name, "bool",
+                            NULL,
+                            kvm_cpu_set_multi_ext_cfg,
+                            NULL, multi_cfg);
+    }
+
+    object_property_add(cpu_obj, "cbom_blocksize", "uint16",
+                        NULL, kvm_cpu_set_cbomz_blksize,
+                        NULL, &kvm_cbom_blocksize);
+
+    object_property_add(cpu_obj, "cboz_blocksize", "uint16",
+                        NULL, kvm_cpu_set_cbomz_blksize,
+                        NULL, &kvm_cboz_blocksize);
+}
+
 static int kvm_riscv_get_regs_core(CPUState *cs)
 {
     int ret = 0;
@@ -309,6 +579,191 @@ static void kvm_riscv_put_regs_timer(CPUState *cs)
     env->kvm_timer_dirty = false;
 }
 
+typedef struct KVMScratchCPU {
+    int kvmfd;
+    int vmfd;
+    int cpufd;
+} KVMScratchCPU;
+
+/*
+ * Heavily inspired by kvm_arm_create_scratch_host_vcpu()
+ * from target/arm/kvm.c.
+ */
+static bool kvm_riscv_create_scratch_vcpu(KVMScratchCPU *scratch)
+{
+    int kvmfd = -1, vmfd = -1, cpufd = -1;
+
+    kvmfd = qemu_open_old("/dev/kvm", O_RDWR);
+    if (kvmfd < 0) {
+        goto err;
+    }
+    do {
+        vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
+    } while (vmfd == -1 && errno == EINTR);
+    if (vmfd < 0) {
+        goto err;
+    }
+    cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
+    if (cpufd < 0) {
+        goto err;
+    }
+
+    scratch->kvmfd =  kvmfd;
+    scratch->vmfd = vmfd;
+    scratch->cpufd = cpufd;
+
+    return true;
+
+ err:
+    if (cpufd >= 0) {
+        close(cpufd);
+    }
+    if (vmfd >= 0) {
+        close(vmfd);
+    }
+    if (kvmfd >= 0) {
+        close(kvmfd);
+    }
+
+    return false;
+}
+
+static void kvm_riscv_destroy_scratch_vcpu(KVMScratchCPU *scratch)
+{
+    close(scratch->cpufd);
+    close(scratch->vmfd);
+    close(scratch->kvmfd);
+}
+
+static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu)
+{
+    CPURISCVState *env = &cpu->env;
+    struct kvm_one_reg reg;
+    int ret;
+
+    reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG,
+                              KVM_REG_RISCV_CONFIG_REG(mvendorid));
+    reg.addr = (uint64_t)&cpu->cfg.mvendorid;
+    ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
+    if (ret != 0) {
+        error_report("Unable to retrieve mvendorid from host, error %d", ret);
+    }
+
+    reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG,
+                              KVM_REG_RISCV_CONFIG_REG(marchid));
+    reg.addr = (uint64_t)&cpu->cfg.marchid;
+    ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
+    if (ret != 0) {
+        error_report("Unable to retrieve marchid from host, error %d", ret);
+    }
+
+    reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG,
+                              KVM_REG_RISCV_CONFIG_REG(mimpid));
+    reg.addr = (uint64_t)&cpu->cfg.mimpid;
+    ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
+    if (ret != 0) {
+        error_report("Unable to retrieve mimpid from host, error %d", ret);
+    }
+}
+
+static void kvm_riscv_init_misa_ext_mask(RISCVCPU *cpu,
+                                         KVMScratchCPU *kvmcpu)
+{
+    CPURISCVState *env = &cpu->env;
+    struct kvm_one_reg reg;
+    int ret;
+
+    reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG,
+                              KVM_REG_RISCV_CONFIG_REG(isa));
+    reg.addr = (uint64_t)&env->misa_ext_mask;
+    ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
+
+    if (ret) {
+        error_report("Unable to fetch ISA register from KVM, "
+                     "error %d", ret);
+        kvm_riscv_destroy_scratch_vcpu(kvmcpu);
+        exit(EXIT_FAILURE);
+    }
+
+    env->misa_ext = env->misa_ext_mask;
+}
+
+static void kvm_riscv_read_cbomz_blksize(RISCVCPU *cpu, KVMScratchCPU *kvmcpu,
+                                         KVMCPUConfig *cbomz_cfg)
+{
+    CPURISCVState *env = &cpu->env;
+    struct kvm_one_reg reg;
+    int ret;
+
+    reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG,
+                              cbomz_cfg->kvm_reg_id);
+    reg.addr = (uint64_t)kvmconfig_get_cfg_addr(cpu, cbomz_cfg);
+    ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
+    if (ret != 0) {
+        error_report("Unable to read KVM reg %s, error %d",
+                     cbomz_cfg->name, ret);
+        exit(EXIT_FAILURE);
+    }
+}
+
+static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu)
+{
+    CPURISCVState *env = &cpu->env;
+    uint64_t val;
+    int i, ret;
+
+    for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
+        KVMCPUConfig *multi_ext_cfg = &kvm_multi_ext_cfgs[i];
+        struct kvm_one_reg reg;
+
+        reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_ISA_EXT,
+                                  multi_ext_cfg->kvm_reg_id);
+        reg.addr = (uint64_t)&val;
+        ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
+        if (ret != 0) {
+            if (errno == EINVAL) {
+                /* Silently default to 'false' if KVM does not support it. */
+                multi_ext_cfg->supported = false;
+                val = false;
+            } else {
+                error_report("Unable to read ISA_EXT KVM register %s, "
+                             "error %d", multi_ext_cfg->name, ret);
+                kvm_riscv_destroy_scratch_vcpu(kvmcpu);
+                exit(EXIT_FAILURE);
+            }
+        } else {
+            multi_ext_cfg->supported = true;
+        }
+
+        kvm_cpu_cfg_set(cpu, multi_ext_cfg, val);
+    }
+
+    if (cpu->cfg.ext_icbom) {
+        kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cbom_blocksize);
+    }
+
+    if (cpu->cfg.ext_icboz) {
+        kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cboz_blocksize);
+    }
+}
+
+void kvm_riscv_init_user_properties(Object *cpu_obj)
+{
+    RISCVCPU *cpu = RISCV_CPU(cpu_obj);
+    KVMScratchCPU kvmcpu;
+
+    if (!kvm_riscv_create_scratch_vcpu(&kvmcpu)) {
+        return;
+    }
+
+    kvm_riscv_add_cpu_user_properties(cpu_obj);
+    kvm_riscv_init_machine_ids(cpu, &kvmcpu);
+    kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu);
+    kvm_riscv_init_multiext_cfg(cpu, &kvmcpu);
+
+    kvm_riscv_destroy_scratch_vcpu(&kvmcpu);
+}
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
@@ -394,23 +849,49 @@ void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
 
-int kvm_arch_init_vcpu(CPUState *cs)
+static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs)
 {
-    int ret = 0;
-    target_ulong isa;
-    RISCVCPU *cpu = RISCV_CPU(cs);
     CPURISCVState *env = &cpu->env;
     uint64_t id;
+    int ret;
 
-    qemu_add_vm_change_state_handler(kvm_riscv_vm_state_change, cs);
+    id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG,
+                          KVM_REG_RISCV_CONFIG_REG(mvendorid));
+    ret = kvm_set_one_reg(cs, id, &cpu->cfg.mvendorid);
+    if (ret != 0) {
+        return ret;
+    }
 
     id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG,
-                          KVM_REG_RISCV_CONFIG_REG(isa));
-    ret = kvm_get_one_reg(cs, id, &isa);
-    if (ret) {
+                          KVM_REG_RISCV_CONFIG_REG(marchid));
+    ret = kvm_set_one_reg(cs, id, &cpu->cfg.marchid);
+    if (ret != 0) {
         return ret;
     }
-    env->misa_ext = isa;
+
+    id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG,
+                          KVM_REG_RISCV_CONFIG_REG(mimpid));
+    ret = kvm_set_one_reg(cs, id, &cpu->cfg.mimpid);
+
+    return ret;
+}
+
+int kvm_arch_init_vcpu(CPUState *cs)
+{
+    int ret = 0;
+    RISCVCPU *cpu = RISCV_CPU(cs);
+
+    qemu_add_vm_change_state_handler(kvm_riscv_vm_state_change, cs);
+
+    if (!object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST)) {
+        ret = kvm_vcpu_set_machine_ids(cpu, cs);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+
+    kvm_riscv_update_cpu_misa_ext(cpu, cs);
+    kvm_riscv_update_cpu_cfg_isa_ext(cpu, cs);
 
     return ret;
 }
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
index ed281bdce0..e3ba935808 100644
--- a/target/riscv/kvm_riscv.h
+++ b/target/riscv/kvm_riscv.h
@@ -19,6 +19,7 @@
 #ifndef QEMU_KVM_RISCV_H
 #define QEMU_KVM_RISCV_H
 
+void kvm_riscv_init_user_properties(Object *cpu_obj);
 void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
 
diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
index f563dc3981..9cdb9cdd06 100644
--- a/target/riscv/op_helper.c
+++ b/target/riscv/op_helper.c
@@ -335,7 +335,8 @@ target_ulong helper_mret(CPURISCVState *env)
         riscv_raise_exception(env, RISCV_EXCP_INST_ACCESS_FAULT, GETPC());
     }
 
-    target_ulong prev_virt = get_field(env->mstatus, MSTATUS_MPV);
+    target_ulong prev_virt = get_field(env->mstatus, MSTATUS_MPV) &&
+                             (prev_priv != PRV_M);
     mstatus = set_field(mstatus, MSTATUS_MIE,
                         get_field(mstatus, MSTATUS_MPIE));
     mstatus = set_field(mstatus, MSTATUS_MPIE, 1);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 8a33da811e..697df1be9e 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -64,6 +64,7 @@ typedef struct DisasContext {
     target_ulong priv_ver;
     RISCVMXL misa_mxl_max;
     RISCVMXL xl;
+    RISCVMXL address_xl;
     uint32_t misa_ext;
     uint32_t opcode;
     RISCVExtStatus mstatus_fs;
@@ -121,29 +122,6 @@ static inline bool has_ext(DisasContext *ctx, uint32_t ext)
     return ctx->misa_ext & ext;
 }
 
-static bool always_true_p(DisasContext *ctx  __attribute__((__unused__)))
-{
-    return true;
-}
-
-static bool has_xthead_p(DisasContext *ctx  __attribute__((__unused__)))
-{
-    return ctx->cfg_ptr->ext_xtheadba || ctx->cfg_ptr->ext_xtheadbb ||
-           ctx->cfg_ptr->ext_xtheadbs || ctx->cfg_ptr->ext_xtheadcmo ||
-           ctx->cfg_ptr->ext_xtheadcondmov ||
-           ctx->cfg_ptr->ext_xtheadfmemidx || ctx->cfg_ptr->ext_xtheadfmv ||
-           ctx->cfg_ptr->ext_xtheadmac || ctx->cfg_ptr->ext_xtheadmemidx ||
-           ctx->cfg_ptr->ext_xtheadmempair || ctx->cfg_ptr->ext_xtheadsync;
-}
-
-#define MATERIALISE_EXT_PREDICATE(ext)  \
-    static bool has_ ## ext ## _p(DisasContext *ctx)    \
-    { \
-        return ctx->cfg_ptr->ext_ ## ext ; \
-    }
-
-MATERIALISE_EXT_PREDICATE(XVentanaCondOps);
-
 #ifdef TARGET_RISCV32
 #define get_xl(ctx)    MXL_RV32
 #elif defined(CONFIG_USER_ONLY)
@@ -152,6 +130,14 @@ MATERIALISE_EXT_PREDICATE(XVentanaCondOps);
 #define get_xl(ctx)    ((ctx)->xl)
 #endif
 
+#ifdef TARGET_RISCV32
+#define get_address_xl(ctx)    MXL_RV32
+#elif defined(CONFIG_USER_ONLY)
+#define get_address_xl(ctx)    MXL_RV64
+#else
+#define get_address_xl(ctx)    ((ctx)->address_xl)
+#endif
+
 /* The word size for this machine mode. */
 static inline int __attribute__((unused)) get_xlen(DisasContext *ctx)
 {
@@ -598,12 +584,13 @@ static TCGv get_address(DisasContext *ctx, int rs1, int imm)
     tcg_gen_addi_tl(addr, src1, imm);
     if (ctx->pm_mask_enabled) {
         tcg_gen_andc_tl(addr, addr, pm_mask);
-    } else if (get_xl(ctx) == MXL_RV32) {
+    } else if (get_address_xl(ctx) == MXL_RV32) {
         tcg_gen_ext32u_tl(addr, addr);
     }
     if (ctx->pm_base_enabled) {
         tcg_gen_or_tl(addr, addr, pm_base);
     }
+
     return addr;
 }
 
@@ -1104,10 +1091,12 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
 #include "insn_trans/trans_rvzicond.c.inc"
 #include "insn_trans/trans_rvzawrs.c.inc"
 #include "insn_trans/trans_rvzicbo.c.inc"
+#include "insn_trans/trans_rvzfa.c.inc"
 #include "insn_trans/trans_rvzfh.c.inc"
 #include "insn_trans/trans_rvk.c.inc"
 #include "insn_trans/trans_privileged.c.inc"
 #include "insn_trans/trans_svinval.c.inc"
+#include "insn_trans/trans_rvbf16.c.inc"
 #include "decode-xthead.c.inc"
 #include "insn_trans/trans_xthead.c.inc"
 #include "insn_trans/trans_xventanacondops.c.inc"
@@ -1134,7 +1123,7 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
      * that are tested in-order until a decoder matches onto the opcode.
      */
     static const struct {
-        bool (*guard_func)(DisasContext *);
+        bool (*guard_func)(const RISCVCPUConfig *);
         bool (*decode_func)(DisasContext *, uint32_t);
     } decoders[] = {
         { always_true_p,  decode_insn32 },
@@ -1163,7 +1152,7 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
         ctx->opcode = opcode32;
 
         for (size_t i = 0; i < ARRAY_SIZE(decoders); ++i) {
-            if (decoders[i].guard_func(ctx) &&
+            if (decoders[i].guard_func(ctx->cfg_ptr) &&
                 decoders[i].decode_func(ctx, opcode32)) {
                 return;
             }
@@ -1200,6 +1189,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
     ctx->misa_mxl_max = env->misa_mxl_max;
     ctx->xl = FIELD_EX32(tb_flags, TB_FLAGS, XL);
+    ctx->address_xl = FIELD_EX32(tb_flags, TB_FLAGS, AXL);
     ctx->cs = cs;
     ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
     ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 1e06e7447c..71bb9b4457 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -3554,6 +3554,17 @@ RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
 GEN_VEXT_VF(vfwmacc_vf_h, 4)
 GEN_VEXT_VF(vfwmacc_vf_w, 8)
 
+static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(bfloat16_to_float32(a, s),
+                          bfloat16_to_float32(b, s), d, 0, s);
+}
+
+RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
+GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
+RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmacc16)
+GEN_VEXT_VF(vfwmaccbf16_vf, 4)
+
 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
 {
     return float32_muladd(float16_to_float32(a, true, s),
@@ -4535,6 +4546,9 @@ RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
 
+RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
+GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
+
 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
 /* (TD, T2, TX2) */
 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
@@ -4581,6 +4595,9 @@ RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
 
+RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
+GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
+
 /*
  * Vector Reduction Operations
  */
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index cb98f4894d..51a2116515 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -17,8 +17,8 @@
 #include "s390x-internal.h"
 #include "elf.h"
 #include "sysemu/dump.h"
-#include "hw/s390x/pv.h"
 #include "kvm/kvm_s390x.h"
+#include "target/s390x/kvm/pv.h"
 
 struct S390xUserRegsStruct {
     uint64_t psw[2];
diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c
index 97d6c760a8..8112561e5e 100644
--- a/target/s390x/cpu-sysemu.c
+++ b/target/s390x/cpu-sysemu.c
@@ -33,7 +33,7 @@
 #include "qapi/qapi-visit-run-state.h"
 #include "sysemu/hw_accel.h"
 
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
 #include "hw/boards.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/tcg.h"
diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 2e4e11d264..ebb155ce1c 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -15,7 +15,7 @@
 #include "qemu/module.h"
 #include "cpu_features.h"
 #ifndef CONFIG_USER_ONLY
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
 #endif
 
 #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index ae8880e81d..42b52afdb4 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -24,7 +24,7 @@
 #include "qemu/qemu-print.h"
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/sysemu.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
 #endif
 
 #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
diff --git a/target/s390x/diag.c b/target/s390x/diag.c
index e5f0df19e7..8ce18e08f3 100644
--- a/target/s390x/diag.c
+++ b/target/s390x/diag.c
@@ -19,9 +19,9 @@
 #include "sysemu/cpus.h"
 #include "hw/s390x/ipl.h"
 #include "hw/s390x/s390-virtio-ccw.h"
-#include "hw/s390x/pv.h"
 #include "sysemu/kvm.h"
 #include "kvm/kvm_s390x.h"
+#include "target/s390x/kvm/pv.h"
 #include "qemu/error-report.h"
 
 
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 2b363aa959..d76c06381b 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -24,7 +24,7 @@
 #include "gdbstub/helpers.h"
 #include "qemu/timer.h"
 #include "hw/s390x/ioinst.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/runstate.h"
 
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 6bc01df73d..05102578fc 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -355,7 +355,7 @@ DEF_HELPER_FLAGS_4(idte, TCG_CALL_NO_RWG, void, env, i64, i64, i32)
 DEF_HELPER_FLAGS_4(ipte, TCG_CALL_NO_RWG, void, env, i64, i64, i32)
 DEF_HELPER_FLAGS_1(ptlb, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_1(purge, TCG_CALL_NO_RWG, void, env)
-DEF_HELPER_2(lra, i64, env, i64)
+DEF_HELPER_3(lra, i64, env, i64, i64)
 DEF_HELPER_1(per_check_exception, void, env)
 DEF_HELPER_FLAGS_3(per_branch, TCG_CALL_NO_RWG, void, env, i64, i64)
 DEF_HELPER_FLAGS_2(per_ifetch, TCG_CALL_NO_RWG, void, env, i64)
diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c
index 053aaabb5a..bbe45a497a 100644
--- a/target/s390x/ioinst.c
+++ b/target/s390x/ioinst.c
@@ -16,7 +16,7 @@
 #include "hw/s390x/ioinst.h"
 #include "trace.h"
 #include "hw/s390x/s390-pci-bus.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
 
 /* All I/O instructions but chsc use the s format */
 static uint64_t get_address_from_regs(CPUS390XState *env, uint32_t ipb,
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 3ac7ec9acf..a9e5880349 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -50,7 +50,7 @@
 #include "exec/memattrs.h"
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "hw/s390x/s390-virtio-hcall.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
 
 #ifndef DEBUG_KVM
 #define DEBUG_KVM  0
diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
index 37253f75bf..d6aca590ae 100644
--- a/target/s390x/kvm/meson.build
+++ b/target/s390x/kvm/meson.build
@@ -1,5 +1,6 @@
 
 s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
+  'pv.c',
   'kvm.c'
 ), if_false: files(
   'stubs.c'
diff --git a/hw/s390x/pv.c b/target/s390x/kvm/pv.c
index b63f3784c6..6a69be7e5c 100644
--- a/hw/s390x/pv.c
+++ b/target/s390x/kvm/pv.c
@@ -21,9 +21,9 @@
 #include "qom/object_interfaces.h"
 #include "exec/confidential-guest-support.h"
 #include "hw/s390x/ipl.h"
-#include "hw/s390x/pv.h"
 #include "hw/s390x/sclp.h"
 #include "target/s390x/kvm/kvm_s390x.h"
+#include "target/s390x/kvm/pv.h"
 
 static bool info_valid;
 static struct kvm_s390_pv_info_vm info_vm;
diff --git a/include/hw/s390x/pv.h b/target/s390x/kvm/pv.h
index 7b935e2246..7b935e2246 100644
--- a/include/hw/s390x/pv.h
+++ b/target/s390x/kvm/pv.h
diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c
index b04b57c235..fbb2f1b4d4 100644
--- a/target/s390x/mmu_helper.c
+++ b/target/s390x/mmu_helper.c
@@ -417,7 +417,7 @@ int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
 
     vaddr &= TARGET_PAGE_MASK;
 
-    if (!(env->psw.mask & PSW_MASK_DAT)) {
+    if (rw != MMU_S390_LRA && !(env->psw.mask & PSW_MASK_DAT)) {
         *raddr = vaddr;
         goto nodat;
     }
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
index 57e5829283..4b7fa58af3 100644
--- a/target/s390x/tcg/fpu_helper.c
+++ b/target/s390x/tcg/fpu_helper.c
@@ -306,8 +306,9 @@ uint64_t HELPER(mdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 /* 64/32-bit FP multiplication */
 uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
+    float64 f1_64 = float32_to_float64(f1, &env->fpu_status);
     float64 ret = float32_to_float64(f2, &env->fpu_status);
-    ret = float64_mul(f1, ret, &env->fpu_status);
+    ret = float64_mul(f1_64, ret, &env->fpu_status);
     handle_exceptions(env, false, GETPC());
     return ret;
 }
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index 0a45dbbcda..457ed25d2f 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -667,11 +667,11 @@
     F(0xb317, MEEBR,   RRE,   Z,   e1, e2, new, e1, meeb, 0, IF_BFP)
     F(0xb31c, MDBR,    RRE,   Z,   f1, f2, new, f1, mdb, 0, IF_BFP)
     F(0xb34c, MXBR,    RRE,   Z,   x1, x2, new_x, x1, mxb, 0, IF_BFP)
-    F(0xb30c, MDEBR,   RRE,   Z,   f1, e2, new, f1, mdeb, 0, IF_BFP)
+    F(0xb30c, MDEBR,   RRE,   Z,   e1, e2, new, f1, mdeb, 0, IF_BFP)
     F(0xb307, MXDBR,   RRE,   Z,   f1, f2, new_x, x1, mxdb, 0, IF_BFP)
     F(0xed17, MEEB,    RXE,   Z,   e1, m2_32u, new, e1, meeb, 0, IF_BFP)
     F(0xed1c, MDB,     RXE,   Z,   f1, m2_64, new, f1, mdb, 0, IF_BFP)
-    F(0xed0c, MDEB,    RXE,   Z,   f1, m2_32u, new, f1, mdeb, 0, IF_BFP)
+    F(0xed0c, MDEB,    RXE,   Z,   e1, m2_32u, new, f1, mdeb, 0, IF_BFP)
     F(0xed07, MXDB,    RXE,   Z,   f1, m2_64, new_x, x1, mxdb, 0, IF_BFP)
 /* MULTIPLY HALFWORD */
     C(0x4c00, MH,      RX_a,  Z,   r1_o, m2_16s, new, r1_32, mul, 0)
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index d02ec861d8..f417fb1183 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -514,6 +514,7 @@ void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
     int32_t i;
 
     /* MVCRL always copies one more byte than specified - maximum is 256 */
+    l &= 0xff;
     l++;
 
     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
@@ -2355,7 +2356,7 @@ void HELPER(purge)(CPUS390XState *env)
 }
 
 /* load real address */
-uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
+uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
 {
     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
     uint64_t ret, tec;
@@ -2369,7 +2370,7 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
     if (exc) {
         cc = 3;
-        ret = exc | 0x80000000;
+        ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
     } else {
         cc = 0;
         ret |= addr & ~TARGET_PAGE_MASK;
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index a6ee2d4423..6661b27efa 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -2383,10 +2383,14 @@ static DisasJumpType op_epsw(DisasContext *s, DisasOps *o)
     int r1 = get_field(s, r1);
     int r2 = get_field(s, r2);
     TCGv_i64 t = tcg_temp_new_i64();
+    TCGv_i64 t_cc = tcg_temp_new_i64();
 
     /* Note the "subsequently" in the PoO, which implies a defined result
        if r1 == r2.  Thus we cannot defer these writes to an output hook.  */
+    gen_op_calc_cc(s);
+    tcg_gen_extu_i32_i64(t_cc, cc_op);
     tcg_gen_shri_i64(t, psw_mask, 32);
+    tcg_gen_deposit_i64(t, t, t_cc, 12, 2);
     store_reg32_i64(r1, t);
     if (r2 != 0) {
         store_reg32_i64(r2, psw_mask);
@@ -2928,7 +2932,7 @@ static DisasJumpType op_lctlg(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_lra(DisasContext *s, DisasOps *o)
 {
-    gen_helper_lra(o->out, cpu_env, o->in2);
+    gen_helper_lra(o->out, cpu_env, o->out, o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
 }
@@ -5790,7 +5794,7 @@ static TCGv gen_ri2(DisasContext *s)
 
     disas_jdest(s, i2, is_imm, imm, ri2);
     if (is_imm) {
-        ri2 = tcg_constant_i64(s->base.pc_next + imm * 2);
+        ri2 = tcg_constant_i64(s->base.pc_next + (int64_t)imm * 2);
     }
 
     return ri2;
diff --git a/tests/avocado/riscv_opensbi.py b/tests/avocado/riscv_opensbi.py
index e02f0d404a..bfff9cc3c3 100644
--- a/tests/avocado/riscv_opensbi.py
+++ b/tests/avocado/riscv_opensbi.py
@@ -6,7 +6,6 @@
 # later.  See the COPYING file in the top-level directory.
 
 from avocado_qemu import QemuSystemTest
-from avocado import skip
 from avocado_qemu import wait_for_console_pattern
 
 class RiscvOpenSBI(QemuSystemTest):
@@ -21,7 +20,6 @@ class RiscvOpenSBI(QemuSystemTest):
         wait_for_console_pattern(self, 'Platform Name')
         wait_for_console_pattern(self, 'Boot HART MEDELEG')
 
-    @skip("requires OpenSBI fix to work")
     def test_riscv32_spike(self):
         """
         :avocado: tags=arch:riscv32
diff --git a/tests/qtest/libqos/libqos.c b/tests/qtest/libqos/libqos.c
index 5ffda080ec..5c0fa1f7c5 100644
--- a/tests/qtest/libqos/libqos.c
+++ b/tests/qtest/libqos/libqos.c
@@ -137,56 +137,9 @@ void migrate(QOSState *from, QOSState *to, const char *uri)
     migrate_allocator(&from->alloc, &to->alloc);
 }
 
-bool have_qemu_img(void)
-{
-    char *rpath;
-    const char *path = getenv("QTEST_QEMU_IMG");
-    if (!path) {
-        return false;
-    }
-
-    rpath = realpath(path, NULL);
-    if (!rpath) {
-        return false;
-    } else {
-        free(rpath);
-        return true;
-    }
-}
-
-void mkimg(const char *file, const char *fmt, unsigned size_mb)
-{
-    gchar *cli;
-    bool ret;
-    int rc;
-    GError *err = NULL;
-    char *qemu_img_path;
-    gchar *out, *out2;
-    char *qemu_img_abs_path;
-
-    qemu_img_path = getenv("QTEST_QEMU_IMG");
-    g_assert(qemu_img_path);
-    qemu_img_abs_path = realpath(qemu_img_path, NULL);
-    g_assert(qemu_img_abs_path);
-
-    cli = g_strdup_printf("%s create -f %s %s %uM", qemu_img_abs_path,
-                          fmt, file, size_mb);
-    ret = g_spawn_command_line_sync(cli, &out, &out2, &rc, &err);
-    if (err || !g_spawn_check_exit_status(rc, &err)) {
-        fprintf(stderr, "%s\n", err->message);
-        g_error_free(err);
-    }
-    g_assert(ret && !err);
-
-    g_free(out);
-    g_free(out2);
-    g_free(cli);
-    free(qemu_img_abs_path);
-}
-
 void mkqcow2(const char *file, unsigned size_mb)
 {
-    return mkimg(file, "qcow2", size_mb);
+    g_assert_true(mkimg(file, "qcow2", size_mb));
 }
 
 void prepare_blkdebug_script(const char *debug_fn, const char *event)
diff --git a/tests/qtest/libqos/libqos.h b/tests/qtest/libqos/libqos.h
index 12d05b2365..c04950e2b1 100644
--- a/tests/qtest/libqos/libqos.h
+++ b/tests/qtest/libqos/libqos.h
@@ -27,8 +27,6 @@ QOSState *qtest_boot(QOSOps *ops, const char *cmdline_fmt, ...)
     G_GNUC_PRINTF(2, 3);
 void qtest_common_shutdown(QOSState *qs);
 void qtest_shutdown(QOSState *qs);
-bool have_qemu_img(void);
-void mkimg(const char *file, const char *fmt, unsigned size_mb);
 void mkqcow2(const char *file, unsigned size_mb);
 void migrate(QOSState *from, QOSState *to, const char *uri);
 void prepare_blkdebug_script(const char *debug_fn, const char *event);
diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index 79152f0ec3..c22dfc30d3 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -1742,3 +1742,55 @@ bool qtest_qom_get_bool(QTestState *s, const char *path, const char *property)
 
     return b;
 }
+
+bool have_qemu_img(void)
+{
+    char *rpath;
+    const char *path = getenv("QTEST_QEMU_IMG");
+    if (!path) {
+        return false;
+    }
+
+    rpath = realpath(path, NULL);
+    if (!rpath) {
+        return false;
+    } else {
+        free(rpath);
+        return true;
+    }
+}
+
+bool mkimg(const char *file, const char *fmt, unsigned size_mb)
+{
+    gchar *cli;
+    bool ret;
+    int rc;
+    GError *err = NULL;
+    char *qemu_img_path;
+    gchar *out, *out2;
+    char *qemu_img_abs_path;
+
+    qemu_img_path = getenv("QTEST_QEMU_IMG");
+    if (!qemu_img_path) {
+        return false;
+    }
+    qemu_img_abs_path = realpath(qemu_img_path, NULL);
+    if (!qemu_img_abs_path) {
+        return false;
+    }
+
+    cli = g_strdup_printf("%s create -f %s %s %uM", qemu_img_abs_path,
+                          fmt, file, size_mb);
+    ret = g_spawn_command_line_sync(cli, &out, &out2, &rc, &err);
+    if (err || !g_spawn_check_exit_status(rc, &err)) {
+        fprintf(stderr, "%s\n", err->message);
+        g_error_free(err);
+    }
+
+    g_free(out);
+    g_free(out2);
+    g_free(cli);
+    free(qemu_img_abs_path);
+
+    return ret && !err;
+}
diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h
index 913acc3d5c..3a71bc45fc 100644
--- a/tests/qtest/libqtest.h
+++ b/tests/qtest/libqtest.h
@@ -994,4 +994,24 @@ bool qtest_qom_get_bool(QTestState *s, const char *path, const char *property);
  */
 pid_t qtest_pid(QTestState *s);
 
+/**
+ * have_qemu_img:
+ *
+ * Returns: true if "qemu-img" is available.
+ */
+bool have_qemu_img(void);
+
+/**
+ * mkimg:
+ * @file: File name of the image that should be created
+ * @fmt: Format, e.g. "qcow2" or "raw"
+ * @size_mb: Size of the image in megabytes
+ *
+ * Create a disk image with qemu-img. Note that the QTEST_QEMU_IMG
+ * environment variable must point to the qemu-img file.
+ *
+ * Returns: true if the image has been created successfully.
+ */
+bool mkimg(const char *file, const char *fmt, unsigned size_mb);
+
 #endif
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 74630f6672..b071d400b3 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -234,6 +234,9 @@ qtests_s390x = \
    'cpu-plug-test',
    'migration-test']
 
+qtests_riscv32 = \
+  (config_all_devices.has_key('CONFIG_SIFIVE_E_AON') ? ['sifive-e-aon-watchdog-test'] : [])
+
 qos_test_ss = ss.source_set()
 qos_test_ss.add(
   'ac97-test.c',
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index b9cc194100..efa8c729db 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -47,6 +47,20 @@ static bool got_src_stop;
 static bool got_dst_resume;
 
 /*
+ * An initial 3 MB offset is used as that corresponds
+ * to ~1 sec of data transfer with our bandwidth setting.
+ */
+#define MAGIC_OFFSET_BASE (3 * 1024 * 1024)
+/*
+ * A further 1k is added to ensure we're not a multiple
+ * of TEST_MEM_PAGE_SIZE, thus avoid clash with writes
+ * from the migration guest workload.
+ */
+#define MAGIC_OFFSET_SHUFFLE 1024
+#define MAGIC_OFFSET (MAGIC_OFFSET_BASE + MAGIC_OFFSET_SHUFFLE)
+#define MAGIC_MARKER 0xFEED12345678CAFEULL
+
+/*
  * Dirtylimit stop working if dirty page rate error
  * value less than DIRTYLIMIT_TOLERANCE_RANGE
  */
@@ -445,6 +459,91 @@ static void migrate_ensure_converge(QTestState *who)
     migrate_set_parameter_int(who, "downtime-limit", 30 * 1000);
 }
 
+/*
+ * Our goal is to ensure that we run a single full migration
+ * iteration, and also dirty memory, ensuring that at least
+ * one further iteration is required.
+ *
+ * We can't directly synchronize with the start of a migration
+ * so we have to apply some tricks monitoring memory that is
+ * transferred.
+ *
+ * Initially we set the migration bandwidth to an insanely
+ * low value, with tiny max downtime too. This basically
+ * guarantees migration will never complete.
+ *
+ * This will result in a test that is unacceptably slow though,
+ * so we can't let the entire migration pass run at this speed.
+ * Our intent is to let it run just long enough that we can
+ * prove data prior to the marker has been transferred *AND*
+ * also prove this transferred data is dirty again.
+ *
+ * Before migration starts, we write a 64-bit magic marker
+ * into a fixed location in the src VM RAM.
+ *
+ * Then watch dst memory until the marker appears. This is
+ * proof that start_address -> MAGIC_OFFSET_BASE has been
+ * transferred.
+ *
+ * Finally we go back to the source and read a byte just
+ * before the marker untill we see it flip in value. This
+ * is proof that start_address -> MAGIC_OFFSET_BASE
+ * is now dirty again.
+ *
+ * IOW, we're guaranteed at least a 2nd migration pass
+ * at this point.
+ *
+ * We can now let migration run at full speed to finish
+ * the test
+ */
+static void migrate_prepare_for_dirty_mem(QTestState *from)
+{
+    /*
+     * The guest workflow iterates from start_address to
+     * end_address, writing 1 byte every TEST_MEM_PAGE_SIZE
+     * bytes.
+     *
+     * IOW, if we write to mem at a point which is NOT
+     * a multiple of TEST_MEM_PAGE_SIZE, our write won't
+     * conflict with the migration workflow.
+     *
+     * We put in a marker here, that we'll use to determine
+     * when the data has been transferred to the dst.
+     */
+    qtest_writeq(from, start_address + MAGIC_OFFSET, MAGIC_MARKER);
+}
+
+static void migrate_wait_for_dirty_mem(QTestState *from,
+                                       QTestState *to)
+{
+    uint64_t watch_address = start_address + MAGIC_OFFSET_BASE;
+    uint64_t marker_address = start_address + MAGIC_OFFSET;
+    uint8_t watch_byte;
+
+    /*
+     * Wait for the MAGIC_MARKER to get transferred, as an
+     * indicator that a migration pass has made some known
+     * amount of progress.
+     */
+    do {
+        usleep(1000 * 10);
+    } while (qtest_readq(to, marker_address) != MAGIC_MARKER);
+
+    /*
+     * Now ensure that already transferred bytes are
+     * dirty again from the guest workload. Note the
+     * guest byte value will wrap around and by chance
+     * match the original watch_byte. This is harmless
+     * as we'll eventually see a different value if we
+     * keep watching
+     */
+    watch_byte = qtest_readb(from, watch_address);
+    do {
+        usleep(1000 * 10);
+    } while (qtest_readb(from, watch_address) == watch_byte);
+}
+
+
 static void migrate_pause(QTestState *who)
 {
     qtest_qmp_assert_success(who, "{ 'execute': 'migrate-pause' }");
@@ -577,7 +676,10 @@ typedef struct {
         MIG_TEST_FAIL_DEST_QUIT_ERR,
     } result;
 
-    /* Optional: set number of migration passes to wait for, if live==true */
+    /*
+     * Optional: set number of migration passes to wait for, if live==true.
+     * If zero, then merely wait for a few MB of dirty data
+     */
     unsigned int iterations;
 
     /*
@@ -1165,12 +1267,14 @@ static int migrate_postcopy_prepare(QTestState **from_ptr,
 
     migrate_ensure_non_converge(from);
 
+    migrate_prepare_for_dirty_mem(from);
+
     /* Wait for the first serial output from the source */
     wait_for_serial("src_serial");
 
     migrate_qmp(from, uri, "{}");
 
-    wait_for_migration_pass(from);
+    migrate_wait_for_dirty_mem(from, to);
 
     *from_ptr = from;
     *to_ptr = to;
@@ -1405,14 +1509,8 @@ static void test_precopy_common(MigrateCommon *args)
     }
 
     if (args->live) {
-        /*
-         * Testing live migration, we want to ensure that some
-         * memory is re-dirtied after being transferred, so that
-         * we exercise logic for dirty page handling. We achieve
-         * this with a ridiculosly low bandwidth that guarantees
-         * non-convergance.
-         */
         migrate_ensure_non_converge(from);
+        migrate_prepare_for_dirty_mem(from);
     } else {
         /*
          * Testing non-live migration, we allow it to run at
@@ -1447,13 +1545,16 @@ static void test_precopy_common(MigrateCommon *args)
         }
     } else {
         if (args->live) {
-            if (args->iterations) {
-                while (args->iterations--) {
-                    wait_for_migration_pass(from);
-                }
-            } else {
+            /*
+             * For initial iteration(s) we must do a full pass,
+             * but for the final iteration, we need only wait
+             * for some dirty mem before switching to converge
+             */
+            while (args->iterations > 1) {
                 wait_for_migration_pass(from);
+                args->iterations--;
             }
+            migrate_wait_for_dirty_mem(from, to);
 
             migrate_ensure_converge(from);
 
@@ -1586,6 +1687,9 @@ static void test_ignore_shared(void)
         return;
     }
 
+    migrate_ensure_non_converge(from);
+    migrate_prepare_for_dirty_mem(from);
+
     migrate_set_capability(from, "x-ignore-shared", true);
     migrate_set_capability(to, "x-ignore-shared", true);
 
@@ -1594,7 +1698,7 @@ static void test_ignore_shared(void)
 
     migrate_qmp(from, uri, "{}");
 
-    wait_for_migration_pass(from);
+    migrate_wait_for_dirty_mem(from, to);
 
     if (!got_src_stop) {
         qtest_qmp_eventwait(from, "STOP");
@@ -2325,6 +2429,7 @@ static void test_multifd_tcp_cancel(void)
     }
 
     migrate_ensure_non_converge(from);
+    migrate_prepare_for_dirty_mem(from);
 
     migrate_set_parameter_int(from, "multifd-channels", 16);
     migrate_set_parameter_int(to, "multifd-channels", 16);
@@ -2343,7 +2448,7 @@ static void test_multifd_tcp_cancel(void)
 
     migrate_qmp(from, uri, "{}");
 
-    wait_for_migration_pass(from);
+    migrate_wait_for_dirty_mem(from, to);
 
     migrate_cancel(from);
 
@@ -2372,11 +2477,13 @@ static void test_multifd_tcp_cancel(void)
 
     wait_for_migration_status(from, "cancelled", NULL);
 
-    migrate_ensure_converge(from);
+    migrate_ensure_non_converge(from);
 
     migrate_qmp(from, uri, "{}");
 
-    wait_for_migration_pass(from);
+    migrate_wait_for_dirty_mem(from, to);
+
+    migrate_ensure_converge(from);
 
     if (!got_src_stop) {
         qtest_qmp_eventwait(from, "STOP");
diff --git a/tests/qtest/readconfig-test.c b/tests/qtest/readconfig-test.c
index ac7242451b..760f974e63 100644
--- a/tests/qtest/readconfig-test.c
+++ b/tests/qtest/readconfig-test.c
@@ -48,7 +48,7 @@ static QTestState *qtest_init_with_config(const char *cfgdata)
     return qts;
 }
 
-static void test_x86_memdev_resp(QObject *res)
+static void test_x86_memdev_resp(QObject *res, const char *mem_id, int size)
 {
     Visitor *v;
     g_autoptr(MemdevList) memdevs = NULL;
@@ -63,8 +63,8 @@ static void test_x86_memdev_resp(QObject *res)
     g_assert(!memdevs->next);
 
     memdev = memdevs->value;
-    g_assert_cmpstr(memdev->id, ==, "ram");
-    g_assert_cmpint(memdev->size, ==, 200 * MiB);
+    g_assert_cmpstr(memdev->id, ==, mem_id);
+    g_assert_cmpint(memdev->size, ==, size * MiB);
 
     visit_free(v);
 }
@@ -80,7 +80,7 @@ static void test_x86_memdev(void)
     qts = qtest_init_with_config(cfgdata);
     /* Test valid command */
     resp = qtest_qmp(qts, "{ 'execute': 'query-memdev' }");
-    test_x86_memdev_resp(qdict_get(resp, "return"));
+    test_x86_memdev_resp(qdict_get(resp, "return"), "ram", 200);
     qobject_unref(resp);
 
     qtest_quit(qts);
@@ -197,6 +197,189 @@ static void test_docs_config_ich9(void)
     qtest_quit(qts);
 }
 
+#if defined(CONFIG_POSIX) && defined(CONFIG_SLIRP)
+
+static char *make_temp_img(const char *template, const char *format, int size)
+{
+    GError *error = NULL;
+    char *temp_name;
+    int fd;
+
+    /* Create a temporary image names */
+    fd = g_file_open_tmp(template, &temp_name, &error);
+    if (fd == -1) {
+        fprintf(stderr, "unable to create file: %s\n", error->message);
+        g_error_free(error);
+        return NULL;
+    }
+    close(fd);
+
+    if (!mkimg(temp_name, format, size)) {
+        fprintf(stderr, "qemu-img failed to create %s\n", temp_name);
+        g_free(temp_name);
+        return NULL;
+    }
+
+    return temp_name;
+}
+
+struct device {
+    const char *name;
+    const char *type;
+};
+
+static void test_docs_q35(const char *input_file, struct device *devices)
+{
+    QTestState *qts;
+    QDict *resp;
+    QObject *qobj;
+    int ret, i;
+    g_autofree char *cfg_file = NULL, *sedcmd = NULL;
+    g_autofree char *hd_file = NULL, *cd_file = NULL;
+
+    /* Check that all the devices are available in the QEMU binary */
+    for (i = 0; devices[i].name; i++) {
+        if (!qtest_has_device(devices[i].type)) {
+            g_test_skip("one of the required devices is not available");
+            return;
+        }
+    }
+
+    hd_file = make_temp_img("qtest_disk_XXXXXX.qcow2", "qcow2", 1);
+    cd_file = make_temp_img("qtest_cdrom_XXXXXX.iso", "raw", 1);
+    if (!hd_file || !cd_file) {
+        g_test_skip("could not create disk images");
+        goto cleanup;
+    }
+
+    /* Create a temporary config file where we replace the disk image names */
+    ret = g_file_open_tmp("q35-emulated-XXXXXX.cfg", &cfg_file, NULL);
+    if (ret == -1) {
+        g_test_skip("could not create temporary config file");
+        goto cleanup;
+    }
+    close(ret);
+
+    sedcmd = g_strdup_printf("sed -e 's,guest.qcow2,%s,' -e 's,install.iso,%s,'"
+                             " %s %s > '%s'",
+                             hd_file, cd_file,
+                             !qtest_has_accel("kvm") ? "-e '/accel/d'" : "",
+                             input_file, cfg_file);
+    ret = system(sedcmd);
+    if (ret) {
+        g_test_skip("could not modify temporary config file");
+        goto cleanup;
+    }
+
+    qts = qtest_initf("-machine none -nodefaults -readconfig %s", cfg_file);
+
+    /* Check memory size */
+    resp = qtest_qmp(qts, "{ 'execute': 'query-memdev' }");
+    test_x86_memdev_resp(qdict_get(resp, "return"), "pc.ram", 1024);
+    qobject_unref(resp);
+
+    resp = qtest_qmp(qts, "{ 'execute': 'qom-list',"
+                          "  'arguments': {'path': '/machine/peripheral' }}");
+    qobj = qdict_get(resp, "return");
+
+    /* Check that all the devices have been created */
+    for (i = 0; devices[i].name; i++) {
+        test_object_available(qobj, devices[i].name, devices[i].type);
+    }
+
+    qobject_unref(resp);
+
+    qtest_quit(qts);
+
+cleanup:
+    if (hd_file) {
+        unlink(hd_file);
+    }
+    if (cd_file) {
+        unlink(cd_file);
+    }
+    if (cfg_file) {
+        unlink(cfg_file);
+    }
+}
+
+static void test_docs_q35_emulated(void)
+{
+    struct device devices[] = {
+        { "ich9-pcie-port-1", "ioh3420" },
+        { "ich9-pcie-port-2", "ioh3420" },
+        { "ich9-pcie-port-3", "ioh3420" },
+        { "ich9-pcie-port-4", "ioh3420" },
+        { "ich9-pci-bridge", "i82801b11-bridge" },
+        { "ich9-ehci-1", "ich9-usb-ehci1" },
+        { "ich9-ehci-2", "ich9-usb-ehci2" },
+        { "ich9-uhci-1", "ich9-usb-uhci1" },
+        { "ich9-uhci-2", "ich9-usb-uhci2" },
+        { "ich9-uhci-3", "ich9-usb-uhci3" },
+        { "ich9-uhci-4", "ich9-usb-uhci4" },
+        { "ich9-uhci-5", "ich9-usb-uhci5" },
+        { "ich9-uhci-6", "ich9-usb-uhci6" },
+        { "sata-disk", "ide-hd" },
+        { "sata-optical-disk", "ide-cd" },
+        { "net", "e1000" },
+        { "video", "VGA" },
+        { "ich9-hda-audio", "ich9-intel-hda" },
+        { "ich9-hda-duplex", "hda-duplex" },
+        { NULL, NULL }
+    };
+
+    test_docs_q35("docs/config/q35-emulated.cfg", devices);
+}
+
+static void test_docs_q35_virtio_graphical(void)
+{
+    struct device devices[] = {
+        { "pcie.1", "pcie-root-port" },
+        { "pcie.2", "pcie-root-port" },
+        { "pcie.3", "pcie-root-port" },
+        { "pcie.4", "pcie-root-port" },
+        { "pcie.5", "pcie-root-port" },
+        { "pcie.6", "pcie-root-port" },
+        { "pcie.7", "pcie-root-port" },
+        { "pcie.8", "pcie-root-port" },
+        { "scsi", "virtio-scsi-pci" },
+        { "scsi-disk", "scsi-hd" },
+        { "scsi-optical-disk", "scsi-cd" },
+        { "net", "virtio-net-pci" },
+        { "usb", "nec-usb-xhci" },
+        { "tablet", "usb-tablet" },
+        { "video", "qxl-vga" },
+        { "sound", "ich9-intel-hda" },
+        { "duplex", "hda-duplex" },
+        { NULL, NULL }
+    };
+
+    test_docs_q35("docs/config/q35-virtio-graphical.cfg", devices);
+}
+
+static void test_docs_q35_virtio_serial(void)
+{
+    struct device devices[] = {
+        { "pcie.1", "pcie-root-port" },
+        { "pcie.2", "pcie-root-port" },
+        { "pcie.3", "pcie-root-port" },
+        { "pcie.4", "pcie-root-port" },
+        { "pcie.5", "pcie-root-port" },
+        { "pcie.6", "pcie-root-port" },
+        { "pcie.7", "pcie-root-port" },
+        { "pcie.8", "pcie-root-port" },
+        { "scsi", "virtio-scsi-pci" },
+        { "scsi-disk", "scsi-hd" },
+        { "scsi-optical-disk", "scsi-cd" },
+        { "net", "virtio-net-pci" },
+        { NULL, NULL }
+    };
+
+    test_docs_q35("docs/config/q35-virtio-serial.cfg", devices);
+}
+
+#endif /* CONFIG_LINUX */
+
 int main(int argc, char *argv[])
 {
     const char *arch;
@@ -211,6 +394,19 @@ int main(int argc, char *argv[])
             qtest_has_device("ich9-usb-uhci1")) {
             qtest_add_func("readconfig/x86/ich9-ehci-uhci", test_docs_config_ich9);
         }
+#if defined(CONFIG_POSIX) && defined(CONFIG_SLIRP)
+        qtest_add_func("readconfig/x86/q35-emulated", test_docs_q35_emulated);
+        qtest_add_func("readconfig/x86/q35-virtio-graphical",
+                       test_docs_q35_virtio_graphical);
+        if (g_test_slow()) {
+            /*
+             * q35-virtio-serial.cfg is a subset of q35-virtio-graphical.cfg,
+             * so we can skip the test in quick mode
+             */
+            qtest_add_func("readconfig/x86/q35-virtio-serial",
+                           test_docs_q35_virtio_serial);
+        }
+#endif
     }
 #if defined(CONFIG_SPICE) && !defined(__FreeBSD__)
     qtest_add_func("readconfig/spice", test_spice);
diff --git a/tests/qtest/sifive-e-aon-watchdog-test.c b/tests/qtest/sifive-e-aon-watchdog-test.c
new file mode 100644
index 0000000000..1f313d16ad
--- /dev/null
+++ b/tests/qtest/sifive-e-aon-watchdog-test.c
@@ -0,0 +1,450 @@
+/*
+ * QTest testcase for the watchdog timer of HiFive 1 rev b.
+ *
+ * Copyright (c) 2023 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "qemu/bitops.h"
+#include "libqtest.h"
+#include "hw/registerfields.h"
+#include "hw/misc/sifive_e_aon.h"
+
+FIELD(AON_WDT_WDOGCFG, SCALE, 0, 4)
+FIELD(AON_WDT_WDOGCFG, RSVD0, 4, 4)
+FIELD(AON_WDT_WDOGCFG, RSTEN, 8, 1)
+FIELD(AON_WDT_WDOGCFG, ZEROCMP, 9, 1)
+FIELD(AON_WDT_WDOGCFG, RSVD1, 10, 2)
+FIELD(AON_WDT_WDOGCFG, EN_ALWAYS, 12, 1)
+FIELD(AON_WDT_WDOGCFG, EN_CORE_AWAKE, 13, 1)
+FIELD(AON_WDT_WDOGCFG, RSVD2, 14, 14)
+FIELD(AON_WDT_WDOGCFG, IP0, 28, 1)
+FIELD(AON_WDT_WDOGCFG, RSVD3, 29, 3)
+
+#define WDOG_BASE (0x10000000)
+#define WDOGCFG (0x0)
+#define WDOGCOUNT (0x8)
+#define WDOGS (0x10)
+#define WDOGFEED (0x18)
+#define WDOGKEY (0x1c)
+#define WDOGCMP0 (0x20)
+
+#define SIFIVE_E_AON_WDOGKEY (0x51F15E)
+#define SIFIVE_E_AON_WDOGFEED (0xD09F00D)
+#define SIFIVE_E_LFCLK_DEFAULT_FREQ (32768)
+
+static void test_init(QTestState *qts)
+{
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCOUNT, 0);
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, 0);
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCMP0, 0xBEEF);
+}
+
+static void test_wdogcount(void)
+{
+    uint64_t tmp;
+    QTestState *qts = qtest_init("-machine sifive_e");
+
+    test_init(qts);
+
+    tmp = qtest_readl(qts, WDOG_BASE + WDOGCOUNT);
+    qtest_writel(qts, WDOG_BASE + WDOGCOUNT, 0xBEEF);
+    g_assert(qtest_readl(qts, WDOG_BASE + WDOGCOUNT) == tmp);
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCOUNT, 0xBEEF);
+    g_assert(0xBEEF == qtest_readl(qts, WDOG_BASE + WDOGCOUNT));
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCOUNT, 0xAAAAAAAA);
+    g_assert(0x2AAAAAAA == qtest_readl(qts, WDOG_BASE + WDOGCOUNT));
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGFEED, 0xAAAAAAAA);
+    g_assert(0x2AAAAAAA == qtest_readl(qts, WDOG_BASE + WDOGCOUNT));
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGFEED, SIFIVE_E_AON_WDOGFEED);
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGCOUNT));
+
+    qtest_quit(qts);
+}
+
+static void test_wdogcfg(void)
+{
+    uint32_t tmp_cfg;
+    QTestState *qts = qtest_init("-machine sifive_e");
+
+    test_init(qts);
+
+    tmp_cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, 0xFFFFFFFF);
+    g_assert(qtest_readl(qts, WDOG_BASE + WDOGCFG) == tmp_cfg);
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, 0xFFFFFFFF);
+    g_assert(0xFFFFFFFF == qtest_readl(qts, WDOG_BASE + WDOGCFG));
+
+    tmp_cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(15 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, SCALE));
+    g_assert(1 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, RSTEN));
+    g_assert(1 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, ZEROCMP));
+    g_assert(1 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, EN_ALWAYS));
+    g_assert(1 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE));
+    g_assert(1 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, IP0));
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, 0);
+    tmp_cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(0 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, SCALE));
+    g_assert(0 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, RSTEN));
+    g_assert(0 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, ZEROCMP));
+    g_assert(0 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, EN_ALWAYS));
+    g_assert(0 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE));
+    g_assert(0 == FIELD_EX32(tmp_cfg, AON_WDT_WDOGCFG, IP0));
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGCFG));
+
+    qtest_quit(qts);
+}
+
+static void test_wdogcmp0(void)
+{
+    uint32_t tmp;
+    QTestState *qts = qtest_init("-machine sifive_e");
+
+    test_init(qts);
+
+    tmp = qtest_readl(qts, WDOG_BASE + WDOGCMP0);
+    qtest_writel(qts, WDOG_BASE + WDOGCMP0, 0xBEEF);
+    g_assert(qtest_readl(qts, WDOG_BASE + WDOGCMP0) == tmp);
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCMP0, 0xBEEF);
+    g_assert(0xBEEF == qtest_readl(qts, WDOG_BASE + WDOGCMP0));
+
+    qtest_quit(qts);
+}
+
+static void test_wdogkey(void)
+{
+    QTestState *qts = qtest_init("-machine sifive_e");
+
+    test_init(qts);
+
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGKEY));
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, 0xFFFF);
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGKEY));
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    g_assert(1 == qtest_readl(qts, WDOG_BASE + WDOGKEY));
+
+    qtest_writel(qts, WDOG_BASE + WDOGFEED, 0xAAAAAAAA);
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGKEY));
+
+    qtest_quit(qts);
+}
+
+static void test_wdogfeed(void)
+{
+    QTestState *qts = qtest_init("-machine sifive_e");
+
+    test_init(qts);
+
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGFEED));
+
+    qtest_writel(qts, WDOG_BASE + WDOGFEED, 0xFFFF);
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGFEED));
+
+    qtest_quit(qts);
+}
+
+static void test_scaled_wdogs(void)
+{
+    uint32_t cfg;
+    uint32_t fake_count = 0x12345678;
+    QTestState *qts = qtest_init("-machine sifive_e");
+
+    test_init(qts);
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCOUNT, fake_count);
+    g_assert(qtest_readl(qts, WDOG_BASE + WDOGCOUNT) == fake_count);
+    g_assert((uint16_t)qtest_readl(qts, WDOG_BASE + WDOGS) ==
+             (uint16_t)fake_count);
+
+    for (int i = 0; i < 16; i++) {
+        cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+        cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, SCALE, i);
+        qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+        qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+        g_assert((uint16_t)qtest_readl(qts, WDOG_BASE + WDOGS) ==
+                 (uint16_t)(fake_count >>
+                            FIELD_EX32(cfg, AON_WDT_WDOGCFG, SCALE)));
+    }
+
+    qtest_quit(qts);
+}
+
+static void test_watchdog(void)
+{
+    uint32_t cfg;
+    QTestState *qts = qtest_init("-machine sifive_e");
+
+    test_init(qts);
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCMP0, SIFIVE_E_LFCLK_DEFAULT_FREQ);
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, SCALE, 0);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS, 1);
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+
+    qtest_clock_step(qts, NANOSECONDS_PER_SECOND);
+
+    g_assert(qtest_readl(qts, WDOG_BASE + WDOGCOUNT) ==
+             SIFIVE_E_LFCLK_DEFAULT_FREQ);
+    g_assert(qtest_readl(qts, WDOG_BASE + WDOGS) ==
+             SIFIVE_E_LFCLK_DEFAULT_FREQ);
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, SCALE));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, RSTEN));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, ZEROCMP));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCOUNT, 0);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, IP0, 0);
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    qtest_quit(qts);
+}
+
+static void test_scaled_watchdog(void)
+{
+    uint32_t cfg;
+    QTestState *qts = qtest_init("-machine sifive_e");
+
+    test_init(qts);
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCMP0, 10);
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, SCALE, 15);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS, 1);
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+
+    qtest_clock_step(qts, NANOSECONDS_PER_SECOND * 10);
+
+    g_assert(qtest_readl(qts, WDOG_BASE + WDOGCOUNT) ==
+             SIFIVE_E_LFCLK_DEFAULT_FREQ * 10);
+
+    g_assert(10 == qtest_readl(qts, WDOG_BASE + WDOGS));
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(15 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, SCALE));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, RSTEN));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, ZEROCMP));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCOUNT, 0);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, IP0, 0);
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    qtest_quit(qts);
+}
+
+static void test_periodic_int(void)
+{
+    uint32_t cfg;
+    QTestState *qts = qtest_init("-machine sifive_e");
+
+    test_init(qts);
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCMP0, SIFIVE_E_LFCLK_DEFAULT_FREQ);
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, SCALE, 0);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, ZEROCMP, 1);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS, 1);
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+
+    qtest_clock_step(qts, NANOSECONDS_PER_SECOND);
+
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGCOUNT));
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGS));
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, SCALE));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, RSTEN));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, ZEROCMP));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, IP0, 0);
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    qtest_clock_step(qts, NANOSECONDS_PER_SECOND);
+
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGCOUNT));
+    g_assert(0 == qtest_readl(qts, WDOG_BASE + WDOGS));
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, SCALE));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, RSTEN));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, ZEROCMP));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, IP0, 0);
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    qtest_quit(qts);
+}
+
+static void test_enable_disable(void)
+{
+    uint32_t cfg;
+    QTestState *qts = qtest_init("-machine sifive_e");
+
+    test_init(qts);
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCMP0, 10);
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, SCALE, 15);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS, 1);
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+
+    qtest_clock_step(qts, NANOSECONDS_PER_SECOND * 2);
+
+    g_assert(qtest_readl(qts, WDOG_BASE + WDOGCOUNT) ==
+             SIFIVE_E_LFCLK_DEFAULT_FREQ * 2);
+    g_assert(2 == qtest_readl(qts, WDOG_BASE + WDOGS));
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(15 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, SCALE));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, RSTEN));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, ZEROCMP));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS, 0);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+
+    qtest_clock_step(qts, NANOSECONDS_PER_SECOND * 8);
+
+    g_assert(qtest_readl(qts, WDOG_BASE + WDOGCOUNT) ==
+             SIFIVE_E_LFCLK_DEFAULT_FREQ * 2);
+    g_assert(2 == qtest_readl(qts, WDOG_BASE + WDOGS));
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(15 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, SCALE));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, RSTEN));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, ZEROCMP));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS, 1);
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+
+    qtest_clock_step(qts, NANOSECONDS_PER_SECOND * 8);
+
+    g_assert(qtest_readl(qts, WDOG_BASE + WDOGCOUNT) ==
+             SIFIVE_E_LFCLK_DEFAULT_FREQ * 10);
+    g_assert(10 == qtest_readl(qts, WDOG_BASE + WDOGS));
+
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(15 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, SCALE));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, RSTEN));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, ZEROCMP));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_ALWAYS));
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, EN_CORE_AWAKE));
+    g_assert(1 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCOUNT, 0);
+    cfg = FIELD_DP32(cfg, AON_WDT_WDOGCFG, IP0, 0);
+    qtest_writel(qts, WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+    qtest_writel(qts, WDOG_BASE + WDOGCFG, cfg);
+    cfg = qtest_readl(qts, WDOG_BASE + WDOGCFG);
+    g_assert(0 == FIELD_EX32(cfg, AON_WDT_WDOGCFG, IP0));
+
+    qtest_quit(qts);
+}
+
+int main(int argc, char *argv[])
+{
+    g_test_init(&argc, &argv, NULL);
+    qtest_add_func("/sifive-e-aon-watchdog-test/wdogcount",
+                   test_wdogcount);
+    qtest_add_func("/sifive-e-aon-watchdog-test/wdogcfg",
+                   test_wdogcfg);
+    qtest_add_func("/sifive-e-aon-watchdog-test/wdogcmp0",
+                   test_wdogcmp0);
+    qtest_add_func("/sifive-e-aon-watchdog-test/wdogkey",
+                   test_wdogkey);
+    qtest_add_func("/sifive-e-aon-watchdog-test/wdogfeed",
+                   test_wdogfeed);
+    qtest_add_func("/sifive-e-aon-watchdog-test/scaled_wdogs",
+                   test_scaled_wdogs);
+    qtest_add_func("/sifive-e-aon-watchdog-test/watchdog",
+                   test_watchdog);
+    qtest_add_func("/sifive-e-aon-watchdog-test/scaled_watchdog",
+                   test_scaled_watchdog);
+    qtest_add_func("/sifive-e-aon-watchdog-test/periodic_int",
+                   test_periodic_int);
+    qtest_add_func("/sifive-e-aon-watchdog-test/enable_disable",
+                   test_enable_disable);
+    return g_test_run();
+}
diff --git a/tests/tcg/riscv64/Makefile.target b/tests/tcg/riscv64/Makefile.target
index 42993549cb..a7e390c384 100644
--- a/tests/tcg/riscv64/Makefile.target
+++ b/tests/tcg/riscv64/Makefile.target
@@ -12,3 +12,9 @@ run-test-noc: QEMU_OPTS += -cpu rv64,c=false
 
 TESTS += test-aes
 run-test-aes: QEMU_OPTS += -cpu rv64,zk=on
+
+# Test for fcvtmod
+TESTS += test-fcvtmod
+test-fcvtmod: CFLAGS += -march=rv64imafdc
+test-fcvtmod: LDFLAGS += -static
+run-test-fcvtmod: QEMU_OPTS += -cpu rv64,d=true,Zfa=true
diff --git a/tests/tcg/riscv64/test-fcvtmod.c b/tests/tcg/riscv64/test-fcvtmod.c
new file mode 100644
index 0000000000..f050579974
--- /dev/null
+++ b/tests/tcg/riscv64/test-fcvtmod.c
@@ -0,0 +1,345 @@
+#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#define FFLAG_NX_SHIFT 0 /* inexact */
+#define FFLAG_UF_SHIFT 1 /* underflow */
+#define FFLAG_OF_SHIFT 2 /* overflow */
+#define FFLAG_DZ_SHIFT 3 /* divide by zero */
+#define FFLAG_NV_SHIFT 4 /* invalid operation */
+
+#define FFLAG_NV (1UL << FFLAG_NV_SHIFT)
+#define FFLAG_DZ (1UL << FFLAG_DZ_SHIFT)
+#define FFLAG_OF (1UL << FFLAG_OF_SHIFT)
+#define FFLAG_UF (1UL << FFLAG_UF_SHIFT)
+#define FFLAG_NX (1UL << FFLAG_NX_SHIFT)
+
+typedef struct fp64_fcvt_fcvtmod_testcase {
+    const char* name;
+    union {
+        uint64_t inp_lu;
+        double inp_lf;
+    };
+    uint64_t exp_fcvt;
+    uint8_t exp_fcvt_fflags;
+    uint64_t exp_fcvtmod;
+    uint8_t exp_fcvtmod_fflags;
+} fp64_fcvt_fcvtmod_testcase_t;
+
+void print_fflags(uint8_t fflags)
+{
+    int set = 0;
+
+    if (fflags == 0) {
+        printf("-");
+        return;
+    }
+
+    if (fflags & FFLAG_NV) {
+        printf("%sFFLAG_NV", set ? " | " : "");
+        set = 1;
+    }
+    if (fflags & FFLAG_DZ) {
+        printf("%sFFLAG_DZ", set ? " | " : "");
+        set = 1;
+    }
+    if (fflags & FFLAG_OF) {
+        printf("%sFFLAG_OF", set ? " | " : "");
+        set = 1;
+    }
+    if (fflags & FFLAG_UF) {
+        printf("%sFFLAG_UF", set ? " | " : "");
+        set = 1;
+    }
+    if (fflags & FFLAG_NX) {
+        printf("%sFFLAG_NX", set ? " | " : "");
+        set = 1;
+    }
+}
+
+/* Clear all FP flags. */
+static inline void clear_fflags()
+{
+    __asm__ __volatile__("fsflags zero");
+}
+
+/* Read all FP flags. */
+static inline uint8_t get_fflags()
+{
+    uint64_t v;
+    __asm__ __volatile__("frflags %0" : "=r"(v));
+    return (uint8_t)v;
+}
+
+/* Move input value (without conversations) into an FP register. */
+static inline double do_fmv_d_x(uint64_t inp)
+{
+    double fpr;
+    __asm__ __volatile__("fmv.d.x %0, %1" : "=f"(fpr) : "r"(inp));
+    return fpr;
+}
+
+static inline uint64_t do_fcvt_w_d(uint64_t inp, uint8_t *fflags)
+{
+    uint64_t ret;
+    double fpr = do_fmv_d_x(inp);
+
+    clear_fflags();
+
+    __asm__ __volatile__("fcvt.w.d %0, %1, rtz" : "=r"(ret) : "f"(fpr));
+
+    *fflags = get_fflags();
+
+    return ret;
+}
+
+static inline uint64_t do_fcvtmod_w_d(uint64_t inp, uint8_t *fflags)
+{
+    uint64_t ret;
+    double fpr = do_fmv_d_x(inp);
+
+    clear_fflags();
+
+    /* fcvtmod.w.d rd, rs1, rtz = 1100001 01000 rs1 001 rd 1010011 */
+    asm(".insn r  0x53, 0x1, 0x61, %0, %1, f8" : "=r"(ret) : "f"(fpr));
+
+    *fflags = get_fflags();
+
+    return ret;
+}
+
+static const fp64_fcvt_fcvtmod_testcase_t tests[] = {
+    /* Zero (exp=0, frac=0) */
+    { .name = "+0.0",
+      .inp_lf = 0x0p0,
+      .exp_fcvt = 0x0000000000000000,
+      .exp_fcvt_fflags = 0,
+      .exp_fcvtmod = 0x0000000000000000,
+      .exp_fcvtmod_fflags = 0 },
+    { .name = "-0.0",
+      .inp_lf = -0x0p0,
+      .exp_fcvt = 0x0000000000000000,
+      .exp_fcvt_fflags = 0,
+      .exp_fcvtmod = 0x0000000000000000,
+      .exp_fcvtmod_fflags = 0 },
+
+    /* Subnormal: exp=0 frac!=0 */
+    { .name = "Subnormal frac=1",
+      .inp_lu = 0x0000000000000001,
+      .exp_fcvt = 0x0000000000000000,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+    { .name = "Subnormal frac=0xf..f",
+      .inp_lu = 0x0000ffffffffffff,
+      .exp_fcvt = 0x0000000000000000,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+    { .name = "Neg subnormal frac=1",
+      .inp_lu = 0x0000000000000001,
+      .exp_fcvt = 0x0000000000000000,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+    { .name = "Neg subnormal frac=0xf..f",
+      .inp_lu = 0x8000ffffffffffff,
+      .exp_fcvt = 0x0000000000000000,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+
+    /* Infinity: exp=0x7ff, frac=0 */
+    { .name = "+INF",
+      .inp_lu = 0x7ff0000000000000,
+      .exp_fcvt = 0x000000007fffffff, /* int32 max */
+      .exp_fcvt_fflags = FFLAG_NV,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NV },
+    { .name = "-INF",
+      .inp_lu = 0xfff0000000000000,
+      .exp_fcvt = 0xffffffff80000000, /* int32 min */
+      .exp_fcvt_fflags = FFLAG_NV,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NV },
+
+    /* NaN: exp=7ff, frac!=0 */
+    { .name = "canonical NaN",
+      .inp_lu = 0x7ff8000000000000,
+      .exp_fcvt = 0x000000007fffffff, /* int32 max */
+      .exp_fcvt_fflags = FFLAG_NV,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NV },
+    { .name = "non-canonical NaN",
+      .inp_lu = 0x7ff8000000100000,
+      .exp_fcvt = 0x000000007fffffff, /* int32 min */
+      .exp_fcvt_fflags = FFLAG_NV,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NV },
+
+    /* Normal numbers: exp!=0, exp!=7ff */
+    { .name = "+smallest normal value",
+      .inp_lu = 0x0010000000000000,
+      .exp_fcvt = 0,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+    { .name = "-smallest normal value",
+      .inp_lu = 0x8010000000000000,
+      .exp_fcvt = 0,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+
+    { .name = "+0.5",
+      .inp_lf = 0x1p-1,
+      .exp_fcvt = 0,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+    { .name = "-0.5",
+      .inp_lf = -0x1p-1,
+      .exp_fcvt = 0,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+
+    { .name = "+value just below 1.0",
+      .inp_lu = 0x3fefffffffffffff,
+      .exp_fcvt = 0,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+    { .name = "-value just above -1.0",
+      .inp_lu = 0xbfefffffffffffff,
+      .exp_fcvt = 0,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+
+    { .name = "+1.0",
+      .inp_lf = 0x1p0,
+      .exp_fcvt = 0x0000000000000001,
+      .exp_fcvt_fflags = 0,
+      .exp_fcvtmod = 0x0000000000000001,
+      .exp_fcvtmod_fflags = 0 },
+    { .name = "-1.0",
+      .inp_lf = -0x1p0,
+      .exp_fcvt = 0xffffffffffffffff,
+      .exp_fcvt_fflags = 0,
+      .exp_fcvtmod = 0xffffffffffffffff,
+      .exp_fcvtmod_fflags = 0 },
+
+    { .name = "+1.5",
+      .inp_lu = 0x3ff8000000000000,
+      .exp_fcvt = 1,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 1,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+    { .name = "-1.5",
+      .inp_lu = 0xbff8000000000000,
+      .exp_fcvt = 0xffffffffffffffff,
+      .exp_fcvt_fflags = FFLAG_NX,
+      .exp_fcvtmod = 0xffffffffffffffff,
+      .exp_fcvtmod_fflags = FFLAG_NX },
+
+    { .name = "+max int32 (2147483647)",
+      .inp_lu = 0x41dfffffffc00000,
+      .exp_fcvt = 0x000000007fffffff,
+      .exp_fcvt_fflags = 0,
+      .exp_fcvtmod = 0x000000007fffffff,
+      .exp_fcvtmod_fflags = 0 },
+    { .name = "+max int32 +1 (2147483648)",
+      .inp_lf = 0x1p31,
+      .exp_fcvt = 0x000000007fffffff,
+      .exp_fcvt_fflags = FFLAG_NV,
+      .exp_fcvtmod = (uint64_t)-2147483648l, /* int32 min */
+      .exp_fcvtmod_fflags = FFLAG_NV },
+    { .name = "+max int32 +2 (2147483649)",
+      .inp_lu = 0x41e0000000200000,
+      .exp_fcvt = 0x000000007fffffff,
+      .exp_fcvt_fflags = FFLAG_NV,
+      .exp_fcvtmod = (uint64_t)-2147483647l, /* int32 min +1 */
+      .exp_fcvtmod_fflags = FFLAG_NV },
+
+    { .name = "-max int32 (-2147483648)",
+      .inp_lf = -0x1p31,
+      .exp_fcvt = 0xffffffff80000000,
+      .exp_fcvt_fflags = 0,
+      .exp_fcvtmod = 0xffffffff80000000,
+      .exp_fcvtmod_fflags = 0 },
+    { .name = "-max int32 -1 (-2147483649)",
+      .inp_lf = -0x1.00000002p+31,
+      .exp_fcvt = 0xffffffff80000000,
+      .exp_fcvt_fflags = FFLAG_NV,
+      .exp_fcvtmod = 2147483647, /* int32 max */
+      .exp_fcvtmod_fflags = FFLAG_NV },
+    { .name = "-max int32 -2 (-2147483650)",
+      .inp_lf = -0x1.00000004p+31,
+      .exp_fcvt = 0xffffffff80000000,
+      .exp_fcvt_fflags = FFLAG_NV,
+      .exp_fcvtmod = 2147483646, /* int32 max -1 */
+      .exp_fcvtmod_fflags = FFLAG_NV },
+};
+
+int run_fcvtmod_tests()
+{
+    uint64_t act_fcvt;
+    uint8_t act_fcvt_fflags;
+    uint64_t act_fcvtmod;
+    uint8_t act_fcvtmod_fflags;
+
+    for (size_t i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
+        const fp64_fcvt_fcvtmod_testcase_t *t = &tests[i];
+
+        act_fcvt = do_fcvt_w_d(t->inp_lu, &act_fcvt_fflags);
+        int fcvt_correct = act_fcvt == t->exp_fcvt &&
+                    act_fcvt_fflags == t->exp_fcvt_fflags;
+        act_fcvtmod = do_fcvtmod_w_d(t->inp_lu, &act_fcvtmod_fflags);
+        int fcvtmod_correct = act_fcvtmod == t->exp_fcvtmod &&
+                       act_fcvtmod_fflags == t->exp_fcvtmod_fflags;
+
+        if (fcvt_correct && fcvtmod_correct) {
+            continue;
+        }
+
+        printf("Test %zu (%s) failed!\n", i, t->name);
+
+        double fpr = do_fmv_d_x(t->inp_lu);
+        printf("inp_lu: 0x%016lx == %lf\n", t->inp_lu, fpr);
+        printf("inp_lf: %lf\n", t->inp_lf);
+
+        uint32_t sign = (t->inp_lu >> 63);
+        uint32_t exp = (uint32_t)(t->inp_lu >> 52) & 0x7ff;
+        uint64_t frac = t->inp_lu & 0xfffffffffffffull; /* significand */
+        int true_exp = exp - 1023;
+        int shift = true_exp - 52;
+        uint64_t true_frac = frac | 1ull << 52;
+
+        printf("sign=%d, exp=0x%03x, frac=0x%012lx\n", sign, exp, frac);
+        printf("true_exp=%d, shift=%d, true_frac=0x%016lx\n", true_exp, shift, true_frac);
+
+        if (!fcvt_correct) {
+            printf("act_fcvt: 0x%016lx == %li\n", act_fcvt, act_fcvt);
+            printf("exp_fcvt: 0x%016lx == %li\n", t->exp_fcvt, t->exp_fcvt);
+            printf("act_fcvt_fflags: "); print_fflags(act_fcvt_fflags); printf("\n");
+            printf("exp_fcvt_fflags: "); print_fflags(t->exp_fcvt_fflags); printf("\n");
+        }
+
+        if (!fcvtmod_correct) {
+            printf("act_fcvtmod: 0x%016lx == %li\n", act_fcvtmod, act_fcvtmod);
+            printf("exp_fcvtmod: 0x%016lx == %li\n", t->exp_fcvtmod, t->exp_fcvtmod);
+            printf("act_fcvtmod_fflags: "); print_fflags(act_fcvtmod_fflags); printf("\n");
+            printf("exp_fcvtmod_fflags: "); print_fflags(t->exp_fcvtmod_fflags); printf("\n");
+        }
+
+        return 1;
+    }
+
+    return 0;
+}
+
+int main()
+{
+    return run_fcvtmod_tests();
+}
diff --git a/tests/tcg/s390x/Makefile.softmmu-target b/tests/tcg/s390x/Makefile.softmmu-target
index 44dfd71629..242c7b0f83 100644
--- a/tests/tcg/s390x/Makefile.softmmu-target
+++ b/tests/tcg/s390x/Makefile.softmmu-target
@@ -20,6 +20,7 @@ ASM_TESTS =                                                                    \
     sam                                                                        \
     lpsw                                                                       \
     lpswe-early                                                                \
+    lra                                                                        \
     ssm-early                                                                  \
     stosm-early                                                                \
     unaligned-lowcore
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 85abfbb98c..19fbbc6e53 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -36,6 +36,9 @@ TESTS+=rxsbg
 TESTS+=ex-relative-long
 TESTS+=ex-branch
 TESTS+=mxdb
+TESTS+=epsw
+TESTS+=larl
+TESTS+=mdeb
 
 cdsg: CFLAGS+=-pthread
 cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/epsw.c b/tests/tcg/s390x/epsw.c
new file mode 100644
index 0000000000..affb1a5e3a
--- /dev/null
+++ b/tests/tcg/s390x/epsw.c
@@ -0,0 +1,23 @@
+/*
+ * Test the EPSW instruction.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include <assert.h>
+#include <stdlib.h>
+
+int main(void)
+{
+    unsigned long r1 = 0x1234567887654321UL, r2 = 0x8765432112345678UL;
+
+    asm("cr %[r1],%[r2]\n"  /* cc = 1 */
+        "epsw %[r1],%[r2]"
+        : [r1] "+r" (r1), [r2] "+r" (r2) : : "cc");
+
+    /* Do not check the R and RI bits. */
+    r1 &= ~0x40000008UL;
+    assert(r1 == 0x1234567807051001UL);
+    assert(r2 == 0x8765432180000000UL);
+
+    return EXIT_SUCCESS;
+}
diff --git a/tests/tcg/s390x/gdbstub/test-svc.py b/tests/tcg/s390x/gdbstub/test-svc.py
index 7851ca7284..18fad3f163 100644
--- a/tests/tcg/s390x/gdbstub/test-svc.py
+++ b/tests/tcg/s390x/gdbstub/test-svc.py
@@ -25,7 +25,7 @@ def run_test():
     gdb.execute("si")
     report("larl\t" in gdb.execute("x/i $pc", False, True), "insn #2")
     gdb.execute("si")
-    report("lghi\t" in gdb.execute("x/i $pc", False, True), "insn #3")
+    report("lgrl\t" in gdb.execute("x/i $pc", False, True), "insn #3")
     gdb.execute("si")
     report("svc\t" in gdb.execute("x/i $pc", False, True), "insn #4")
     gdb.execute("si")
diff --git a/tests/tcg/s390x/hello-s390x-asm.S b/tests/tcg/s390x/hello-s390x-asm.S
index 2e9faa1604..4dbda12d35 100644
--- a/tests/tcg/s390x/hello-s390x-asm.S
+++ b/tests/tcg/s390x/hello-s390x-asm.S
@@ -8,7 +8,7 @@ _start:
 /* puts("Hello, World!"); */
 lghi %r2,1
 larl %r3,foo
-lghi %r4,foo_end-foo
+lgrl %r4,foo_len
 svc 4
 
 /* exit(0); */
@@ -18,3 +18,5 @@ svc 1
 .align 2
 foo: .asciz "Hello, World!\n"
 foo_end:
+.align 8
+foo_len: .quad foo_end-foo
diff --git a/tests/tcg/s390x/larl.c b/tests/tcg/s390x/larl.c
new file mode 100644
index 0000000000..7c95f89be7
--- /dev/null
+++ b/tests/tcg/s390x/larl.c
@@ -0,0 +1,21 @@
+/*
+ * Test the LARL instruction.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include <stdlib.h>
+
+int main(void)
+{
+    long algfi = (long)main;
+    long larl;
+
+    /*
+     * The compiler may emit larl for the C addition, so compute the expected
+     * value using algfi.
+     */
+    asm("algfi %[r],0xd0000000" : [r] "+r" (algfi) : : "cc");
+    asm("larl %[r],main+0xd0000000" : [r] "=r" (larl));
+
+    return algfi == larl ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/tests/tcg/s390x/lra.S b/tests/tcg/s390x/lra.S
new file mode 100644
index 0000000000..79ab86f36b
--- /dev/null
+++ b/tests/tcg/s390x/lra.S
@@ -0,0 +1,19 @@
+    .org 0x200                         /* lowcore padding */
+    .globl _start
+_start:
+    lgrl %r1,initial_r1
+    lra %r1,0(%r1)
+    cgrl %r1,expected_r1
+    jne 1f
+    lpswe success_psw
+1:
+    lpswe failure_psw
+    .align 8
+initial_r1:
+    .quad 0x8765432112345678
+expected_r1:
+    .quad 0x8765432180000038           /* ASCE type exception */
+success_psw:
+    .quad 0x2000000000000,0xfff        /* see is_special_wait_psw() */
+failure_psw:
+    .quad 0x2000000000000,0            /* disabled wait */
diff --git a/tests/tcg/s390x/mdeb.c b/tests/tcg/s390x/mdeb.c
new file mode 100644
index 0000000000..4897d28069
--- /dev/null
+++ b/tests/tcg/s390x/mdeb.c
@@ -0,0 +1,30 @@
+/*
+ * Test the MDEB and MDEBR instructions.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include <assert.h>
+#include <stdlib.h>
+
+int main(void)
+{
+    union {
+        float f[2];
+        double d;
+    } a;
+    float b;
+
+    a.f[0] = 1.2345;
+    a.f[1] = 999;
+    b = 6.789;
+    asm("mdeb %[a],%[b]" : [a] "+f" (a.d) : [b] "R" (b));
+    assert(a.d > 8.38 && a.d < 8.39);
+
+    a.f[0] = 1.2345;
+    a.f[1] = 999;
+    b = 6.789;
+    asm("mdebr %[a],%[b]" : [a] "+f" (a.d) : [b] "f" (b));
+    assert(a.d > 8.38 && a.d < 8.39);
+
+    return EXIT_SUCCESS;
+}
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
index 93c7b0a290..6d3d049f2c 100644
--- a/tests/tcg/s390x/mie3-mvcrl.c
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -1,29 +1,55 @@
+#include <stdbool.h>
 #include <stdint.h>
+#include <stdlib.h>
 #include <string.h>
 
-
-static inline void mvcrl_8(const char *dst, const char *src)
+static void mvcrl(const char *dst, const char *src, size_t len)
 {
+    register long r0 asm("r0") = len;
+
     asm volatile (
-        "llill %%r0, 8\n"
         ".insn sse, 0xE50A00000000, 0(%[dst]), 0(%[src])"
-        : : [dst] "d" (dst), [src] "d" (src)
-        : "r0", "memory");
+        : : [dst] "d" (dst), [src] "d" (src), "r" (r0)
+        : "memory");
 }
 
-
-int main(int argc, char *argv[])
+static bool test(void)
 {
     const char *alpha = "abcdefghijklmnop";
 
     /* array missing 'i' */
-    char tstr[17] = "abcdefghjklmnop\0" ;
+    char tstr[17] = "abcdefghjklmnop\0";
 
     /* mvcrl reference use: 'open a hole in an array' */
-    mvcrl_8(tstr + 9, tstr + 8);
+    mvcrl(tstr + 9, tstr + 8, 8);
 
     /* place missing 'i' */
     tstr[8] = 'i';
 
-    return strncmp(alpha, tstr, 16ul);
+    return strncmp(alpha, tstr, 16ul) == 0;
+}
+
+static bool test_bad_r0(void)
+{
+    char src[256] = { 0 };
+
+    /*
+     * PoP says: Bits 32-55 of general register 0 should contain zeros;
+     * otherwise, the program may not operate compatibly in the future.
+     *
+     * Try it anyway in order to check whether this would crash QEMU itself.
+     */
+    mvcrl(src, src, (size_t)-1);
+
+    return true;
+}
+
+int main(void)
+{
+    bool ok = true;
+
+    ok &= test();
+    ok &= test_bad_r0();
+
+    return ok ? EXIT_SUCCESS : EXIT_FAILURE;
 }
diff --git a/tests/unit/test-qga.c b/tests/unit/test-qga.c
index 360b4cab23..671e83cb86 100644
--- a/tests/unit/test-qga.c
+++ b/tests/unit/test-qga.c
@@ -665,6 +665,36 @@ static void test_qga_blockedrpcs(gconstpointer data)
     fixture_tear_down(&fix, NULL);
 }
 
+static void test_qga_allowedrpcs(gconstpointer data)
+{
+    TestFixture fix;
+    QDict *ret, *error;
+    const gchar *class, *desc;
+
+    fixture_setup(&fix, "-a guest-ping,guest-get-time", NULL);
+
+    /* check allowed RPCs */
+    ret = qmp_fd(fix.fd, "{'execute': 'guest-ping'}");
+    qmp_assert_no_error(ret);
+    qobject_unref(ret);
+
+    ret = qmp_fd(fix.fd, "{'execute': 'guest-get-time'}");
+    qmp_assert_no_error(ret);
+    qobject_unref(ret);
+
+    /* check something else */
+    ret = qmp_fd(fix.fd, "{'execute': 'guest-get-fsinfo'}");
+    g_assert_nonnull(ret);
+    error = qdict_get_qdict(ret, "error");
+    class = qdict_get_try_str(error, "class");
+    desc = qdict_get_try_str(error, "desc");
+    g_assert_cmpstr(class, ==, "CommandNotFound");
+    g_assert_nonnull(g_strstr_len(desc, -1, "has been disabled"));
+    qobject_unref(ret);
+
+    fixture_tear_down(&fix, NULL);
+}
+
 static void test_qga_config(gconstpointer data)
 {
     GError *error = NULL;
@@ -1090,6 +1120,7 @@ int main(int argc, char **argv)
                          test_qga_fsfreeze_status);
 
     g_test_add_data_func("/qga/blockedrpcs", NULL, test_qga_blockedrpcs);
+    g_test_add_data_func("/qga/allowedrpcs", NULL, test_qga_allowedrpcs);
     g_test_add_data_func("/qga/config", NULL, test_qga_config);
     g_test_add_data_func("/qga/guest-exec", &fix, test_qga_guest_exec);
     g_test_add_data_func("/qga/guest-exec-separated", &fix,
diff --git a/util/async-teardown.c b/util/async-teardown.c
index 3ab19c8740..62cdeb0f20 100644
--- a/util/async-teardown.c
+++ b/util/async-teardown.c
@@ -12,9 +12,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/config-file.h"
-#include "qemu/option.h"
-#include "qemu/module.h"
 #include <dirent.h>
 #include <sys/prctl.h>
 #include <sched.h>
@@ -147,21 +144,3 @@ void init_async_teardown(void)
     clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL);
     sigprocmask(SIG_SETMASK, &old_signals, NULL);
 }
-
-static QemuOptsList qemu_run_with_opts = {
-    .name = "run-with",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head),
-    .desc = {
-        {
-            .name = "async-teardown",
-            .type = QEMU_OPT_BOOL,
-        },
-        { /* end of list */ }
-    },
-};
-
-static void register_teardown(void)
-{
-    qemu_add_opts(&qemu_run_with_opts);
-}
-opts_init(register_teardown);