summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.editorconfig5
-rw-r--r--.patchew.yml302
-rw-r--r--.travis.yml2
-rw-r--r--MAINTAINERS21
-rw-r--r--Makefile28
-rw-r--r--Makefile.objs2
-rw-r--r--Makefile.target16
-rw-r--r--block.c125
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/backup.c246
-rw-r--r--block/block-backend.c50
-rw-r--r--block/commit.c13
-rw-r--r--block/crypto.c3
-rw-r--r--block/dirty-bitmap.c9
-rw-r--r--block/io.c28
-rw-r--r--block/linux-aio.c72
-rw-r--r--block/mirror.c4
-rw-r--r--block/parallels.c3
-rw-r--r--block/qcow.c3
-rw-r--r--block/qcow2-bitmap.c3
-rw-r--r--block/qcow2-cache.c1
-rw-r--r--block/qcow2-cluster.c10
-rw-r--r--block/qcow2-refcount.c40
-rw-r--r--block/qcow2-snapshot.c1
-rw-r--r--block/qcow2-threads.c268
-rw-r--r--block/qcow2.c337
-rw-r--r--block/qcow2.h30
-rw-r--r--block/qed.c3
-rw-r--r--block/quorum.c1
-rw-r--r--block/sheepdog.c3
-rw-r--r--block/trace-events4
-rw-r--r--block/vdi.c3
-rw-r--r--block/vhdx.c3
-rw-r--r--block/vmdk.c3
-rw-r--r--block/vpc.c3
-rw-r--r--blockdev.c118
-rw-r--r--blockjob.c14
-rwxr-xr-xconfigure76
-rw-r--r--contrib/libvhost-user/libvhost-user.c1
-rw-r--r--contrib/libvhost-user/libvhost-user.h1
-rw-r--r--contrib/vhost-user-gpu/50-qemu-gpu.json.in5
-rw-r--r--contrib/vhost-user-gpu/Makefile.objs10
-rw-r--r--contrib/vhost-user-gpu/main.c1185
-rw-r--r--contrib/vhost-user-gpu/virgl.c579
-rw-r--r--contrib/vhost-user-gpu/virgl.h25
-rw-r--r--contrib/vhost-user-gpu/vugbm.c328
-rw-r--r--contrib/vhost-user-gpu/vugbm.h67
-rw-r--r--contrib/vhost-user-gpu/vugpu.h177
-rw-r--r--default-configs/arm-softmmu.mak1
-rw-r--r--default-configs/i386-softmmu.mak1
-rw-r--r--default-configs/lm32-softmmu.mak2
-rw-r--r--default-configs/m68k-softmmu.mak2
-rw-r--r--default-configs/mips-softmmu-common.mak1
-rw-r--r--default-configs/nios2-softmmu.mak2
-rw-r--r--default-configs/xtensa-softmmu.mak2
-rw-r--r--docs/devel/build-system.txt19
-rw-r--r--docs/devel/multiple-iothreads.txt4
-rw-r--r--docs/index.rst1
-rw-r--r--docs/interop/index.rst1
-rw-r--r--docs/interop/vhost-user-gpu.rst242
-rw-r--r--docs/interop/vhost-user.rst9
-rw-r--r--docs/specs/index.rst13
-rw-r--r--docs/specs/ppc-spapr-xive.rst174
-rw-r--r--docs/specs/ppc-xive.rst199
-rw-r--r--gdbstub.c2
-rw-r--r--hmp.c3
-rw-r--r--hw/Kconfig1
-rw-r--r--hw/Makefile.objs1
-rw-r--r--hw/acpi/Kconfig4
-rw-r--r--hw/acpi/Makefile.objs1
-rw-r--r--hw/acpi/pci.c61
-rw-r--r--hw/arm/Kconfig1
-rw-r--r--hw/arm/aspeed.c6
-rw-r--r--hw/arm/aspeed_soc.c50
-rw-r--r--hw/arm/bcm2835_peripherals.c61
-rw-r--r--hw/arm/digic.c17
-rw-r--r--hw/arm/imx25_pdk.c14
-rw-r--r--hw/arm/kzm.c5
-rw-r--r--hw/arm/mps2-tz.c8
-rw-r--r--hw/arm/mps2.c8
-rw-r--r--hw/arm/raspi.c7
-rw-r--r--hw/arm/sabrelite.c5
-rw-r--r--hw/arm/virt-acpi-build.c17
-rw-r--r--hw/arm/xlnx-zcu102.c5
-rw-r--r--hw/arm/xlnx-zynqmp.c8
-rw-r--r--hw/audio/ac97.c2
-rw-r--r--hw/block/dataplane/virtio-blk.c12
-rw-r--r--hw/block/dataplane/xen-block.c6
-rw-r--r--hw/block/fdc.c2
-rw-r--r--hw/block/nvme.c106
-rw-r--r--hw/block/nvme.h2
-rw-r--r--hw/block/trace-events2
-rw-r--r--hw/block/xen-block.c2
-rw-r--r--hw/core/machine.c3
-rw-r--r--hw/core/qdev-properties-system.c41
-rw-r--r--hw/core/sysbus.c10
-rw-r--r--hw/display/Kconfig10
-rw-r--r--hw/display/Makefile.objs5
-rw-r--r--hw/display/vhost-user-gpu-pci.c51
-rw-r--r--hw/display/vhost-user-gpu.c607
-rw-r--r--hw/display/vhost-user-vga.c52
-rw-r--r--hw/display/virtio-gpu-3d.c49
-rw-r--r--hw/display/virtio-gpu-base.c268
-rw-r--r--hw/display/virtio-gpu-pci.c55
-rw-r--r--hw/display/virtio-gpu.c448
-rw-r--r--hw/display/virtio-vga.c138
-rw-r--r--hw/display/virtio-vga.h32
-rw-r--r--hw/i386/acpi-build.c18
-rw-r--r--hw/i386/pc.c3
-rw-r--r--hw/i386/pc_q35.c16
-rw-r--r--hw/ide/qdev.c2
-rw-r--r--hw/intc/Makefile.objs1
-rw-r--r--hw/intc/armv7m_nvic.c6
-rw-r--r--hw/intc/spapr_xive.c193
-rw-r--r--hw/intc/spapr_xive_kvm.c823
-rw-r--r--hw/intc/xics.c10
-rw-r--r--hw/intc/xics_kvm.c113
-rw-r--r--hw/intc/xics_spapr.c7
-rw-r--r--hw/intc/xive.c53
-rw-r--r--hw/isa/i82378.c4
-rw-r--r--hw/isa/lpc_ich9.c2
-rw-r--r--hw/isa/vt82c686.c2
-rw-r--r--hw/microblaze/xlnx-zynqmp-pmu.c45
-rw-r--r--hw/mips/boston.c25
-rw-r--r--hw/mips/cps.c20
-rw-r--r--hw/mips/mips_malta.c19
-rw-r--r--hw/misc/edu.c32
-rw-r--r--hw/misc/macio/macio.c8
-rw-r--r--hw/pci-bridge/dec.c4
-rw-r--r--hw/pci-bridge/i82801b11.c2
-rw-r--r--hw/pci-bridge/pci_bridge_dev.c2
-rw-r--r--hw/pci-bridge/pcie_pci_bridge.c2
-rw-r--r--hw/pci-bridge/pcie_root_port.c2
-rw-r--r--hw/pci-bridge/simba.c2
-rw-r--r--hw/pci-bridge/xio3130_downstream.c2
-rw-r--r--hw/pci-bridge/xio3130_upstream.c2
-rw-r--r--hw/pci/pci.c101
-rw-r--r--hw/pci/pci_bridge.c2
-rw-r--r--hw/pci/pci_host.c13
-rw-r--r--hw/ppc/Kconfig5
-rw-r--r--hw/ppc/pnv.c25
-rw-r--r--hw/ppc/pnv_xscom.c18
-rw-r--r--hw/ppc/prep.c7
-rw-r--r--hw/ppc/spapr.c38
-rw-r--r--hw/ppc/spapr_caps.c13
-rw-r--r--hw/ppc/spapr_cpu_core.c2
-rw-r--r--hw/ppc/spapr_hcall.c26
-rw-r--r--hw/ppc/spapr_irq.c140
-rw-r--r--hw/ppc/spapr_pci.c34
-rw-r--r--hw/ppc/spapr_rtas.c6
-rw-r--r--hw/rdma/rdma_backend.c7
-rw-r--r--hw/riscv/Makefile.objs1
-rw-r--r--hw/riscv/sifive_e.c28
-rw-r--r--hw/riscv/sifive_gpio.c388
-rw-r--r--hw/riscv/spike.c106
-rw-r--r--hw/riscv/trace-events7
-rw-r--r--hw/riscv/virt.c4
-rw-r--r--hw/s390x/event-facility.c4
-rw-r--r--hw/s390x/s390-virtio-ccw.c2
-rw-r--r--hw/scsi/lsi53c895a.c2
-rw-r--r--hw/scsi/mptsas.c4
-rw-r--r--hw/scsi/scsi-disk.c24
-rw-r--r--hw/scsi/vhost-scsi.c57
-rw-r--r--hw/scsi/vhost-user-scsi.c3
-rw-r--r--hw/scsi/virtio-scsi.c27
-rw-r--r--hw/scsi/vmw_pvscsi.c4
-rw-r--r--hw/sd/milkymist-memcard.c2
-rw-r--r--hw/sd/ssi-sd.c2
-rw-r--r--hw/semihosting/Kconfig3
-rw-r--r--hw/semihosting/Makefile.objs2
-rw-r--r--hw/semihosting/config.c186
-rw-r--r--hw/semihosting/console.c84
-rw-r--r--hw/usb/dev-storage.c2
-rw-r--r--hw/vfio/display.c1
-rw-r--r--hw/vfio/pci.c4
-rw-r--r--hw/virtio/vhost-user.c11
-rw-r--r--hw/virtio/vhost.c3
-rw-r--r--hw/virtio/virtio.c5
-rw-r--r--hw/watchdog/wdt_i6300esb.c2
-rw-r--r--include/block/block.h21
-rw-r--r--include/block/block_int.h1
-rw-r--r--include/block/nvme.h2
-rw-r--r--include/block/raw-aio.h3
-rw-r--r--include/elf.h1
-rw-r--r--include/exec/memory.h17
-rw-r--r--include/hw/acpi/acpi-defs.h18
-rw-r--r--include/hw/acpi/pci.h1
-rw-r--r--include/hw/arm/bcm2835_peripherals.h3
-rw-r--r--include/hw/block/block.h7
-rw-r--r--include/hw/boards.h3
-rw-r--r--include/hw/i386/pc.h3
-rw-r--r--include/hw/pci/pci.h3
-rw-r--r--include/hw/pci/pci_bus.h8
-rw-r--r--include/hw/ppc/spapr.h2
-rw-r--r--include/hw/ppc/spapr_irq.h2
-rw-r--r--include/hw/ppc/spapr_xive.h39
-rw-r--r--include/hw/ppc/xics.h1
-rw-r--r--include/hw/ppc/xics_spapr.h1
-rw-r--r--include/hw/ppc/xive.h14
-rw-r--r--include/hw/ppc/xive_regs.h6
-rw-r--r--include/hw/qdev-properties.h3
-rw-r--r--include/hw/riscv/sifive_e.h8
-rw-r--r--include/hw/riscv/sifive_gpio.h72
-rw-r--r--include/hw/riscv/virt.h4
-rw-r--r--include/hw/scsi/scsi.h1
-rw-r--r--include/hw/semihosting/console.h38
-rw-r--r--include/hw/semihosting/semihost.h (renamed from include/exec/semihost.h)17
-rw-r--r--include/hw/sysbus.h1
-rw-r--r--include/hw/virtio/vhost-backend.h2
-rw-r--r--include/hw/virtio/vhost-scsi-common.h1
-rw-r--r--include/hw/virtio/virtio-gpu-bswap.h61
-rw-r--r--include/hw/virtio/virtio-gpu-pci.h40
-rw-r--r--include/hw/virtio/virtio-gpu-pixman.h45
-rw-r--r--include/hw/virtio/virtio-gpu.h92
-rw-r--r--include/sysemu/accel.h1
-rw-r--r--include/sysemu/block-backend.h5
-rw-r--r--include/sysemu/sysemu.h1
-rw-r--r--include/ui/console.h1
-rw-r--r--include/ui/egl-helpers.h3
-rw-r--r--linux-user/Makefile.objs2
-rw-r--r--linux-user/arm/semihost.c24
-rw-r--r--linux-user/elfload.c28
-rw-r--r--linux-user/riscv/target_elf.h1
-rw-r--r--linux-user/s390x/signal.c4
-rw-r--r--linux-user/syscall.c24
-rw-r--r--memory.c8
-rw-r--r--migration/block-dirty-bitmap.c14
-rw-r--r--migration/block.c3
-rw-r--r--migration/fd.c8
-rw-r--r--migration/fd.h2
-rw-r--r--migration/ram.c29
-rw-r--r--nbd/server.c6
-rw-r--r--numa.c4
-rw-r--r--qapi/block-core.json26
-rw-r--r--qemu-deprecated.texi21
-rw-r--r--qemu-img.c89
-rw-r--r--qemu-options.hx8
-rw-r--r--qom/object.c7
-rw-r--r--rules.mak9
-rwxr-xr-xscripts/checkpatch.pl3
-rw-r--r--stubs/Makefile.objs1
-rw-r--r--stubs/semihost.c70
-rw-r--r--target/arm/arm-semi.c96
-rw-r--r--target/arm/helper.c2
-rw-r--r--target/arm/translate-a64.c2
-rw-r--r--target/arm/translate.c2
-rw-r--r--target/i386/cpu.c3
-rw-r--r--target/i386/cpu.h1
-rw-r--r--target/lm32/helper.c2
-rw-r--r--target/m68k/op_helper.c2
-rw-r--r--target/mips/Makefile.objs3
-rw-r--r--target/mips/cpu.h209
-rw-r--r--target/mips/dsp_helper.c40
-rw-r--r--target/mips/helper.c13
-rw-r--r--target/mips/helper.h18
-rw-r--r--target/mips/lmi_helper.c8
-rw-r--r--target/mips/mips-semi.c14
-rw-r--r--target/mips/msa_helper.c1262
-rw-r--r--target/mips/op_helper.c388
-rw-r--r--target/mips/translate.c221
-rw-r--r--target/nios2/helper.c2
-rw-r--r--target/ppc/helper.h12
-rw-r--r--target/ppc/int_helper.c53
-rw-r--r--target/ppc/kvm.c9
-rw-r--r--target/ppc/kvm_ppc.h6
-rw-r--r--target/ppc/trace-events2
-rw-r--r--target/ppc/translate/vmx-impl.inc.c24
-rw-r--r--target/ppc/translate/vsx-impl.inc.c86
-rw-r--r--target/riscv/Makefile.objs15
-rw-r--r--target/riscv/cpu.c79
-rw-r--r--target/riscv/cpu.h10
-rw-r--r--target/riscv/cpu_bits.h45
-rw-r--r--target/riscv/cpu_helper.c35
-rw-r--r--target/riscv/csr.c30
-rw-r--r--target/riscv/insn16-32.decode28
-rw-r--r--target/riscv/insn16-64.decode36
-rw-r--r--target/riscv/insn16.decode173
-rw-r--r--target/riscv/insn32.decode10
-rw-r--r--target/riscv/insn_trans/trans_privileged.inc.c8
-rw-r--r--target/riscv/insn_trans/trans_rvc.inc.c347
-rw-r--r--target/riscv/insn_trans/trans_rvi.inc.c26
-rw-r--r--target/riscv/op_helper.c7
-rw-r--r--target/riscv/translate.c77
-rw-r--r--target/s390x/Makefile.objs3
-rw-r--r--target/s390x/arch_dump.c8
-rw-r--r--target/s390x/cpu.c3
-rw-r--r--target/s390x/cpu.h5
-rw-r--r--target/s390x/cpu_models.c4
-rw-r--r--target/s390x/excp_helper.c21
-rw-r--r--target/s390x/fpu_helper.c4
-rw-r--r--target/s390x/gdbstub.c16
-rw-r--r--target/s390x/gen-features.c10
-rw-r--r--target/s390x/helper.c10
-rw-r--r--target/s390x/helper.h84
-rw-r--r--target/s390x/insn-data.def58
-rw-r--r--target/s390x/internal.h4
-rw-r--r--target/s390x/kvm.c16
-rw-r--r--target/s390x/machine.c128
-rw-r--r--target/s390x/misc_helper.c11
-rw-r--r--target/s390x/tcg_s390x.h2
-rw-r--r--target/s390x/translate.c2
-rw-r--r--target/s390x/translate_vx.inc.c507
-rw-r--r--target/s390x/vec.h40
-rw-r--r--target/s390x/vec_fpu_helper.c625
-rw-r--r--target/s390x/vec_string_helper.c473
-rw-r--r--target/xtensa/translate.c2
-rw-r--r--target/xtensa/xtensa-semi.c2
-rw-r--r--tests/Makefile.include24
-rw-r--r--tests/acceptance/linux_ssh_mips_malta.py230
-rw-r--r--tests/bios-tables-test-allowed-diff.h1
-rw-r--r--tests/bios-tables-test.c53
-rwxr-xr-xtests/data/acpi/rebuild-expected-aml.sh2
-rw-r--r--tests/data/acpi/virt/APICbin0 -> 168 bytes
-rw-r--r--tests/data/acpi/virt/DSDTbin0 -> 18476 bytes
-rw-r--r--tests/data/acpi/virt/FACPbin0 -> 268 bytes
-rw-r--r--tests/data/acpi/virt/GTDTbin0 -> 96 bytes
-rw-r--r--tests/data/acpi/virt/MCFGbin0 -> 60 bytes
-rw-r--r--tests/data/acpi/virt/SPCRbin0 -> 80 bytes
-rw-r--r--tests/docker/Makefile.include1
-rw-r--r--tests/docker/dockerfiles/debian-ppc64-cross.docker11
-rw-r--r--tests/docker/dockerfiles/fedora.docker7
-rw-r--r--tests/docker/dockerfiles/ubuntu1804.docker57
-rw-r--r--tests/ds1338-test.c45
-rw-r--r--tests/libqos/arm-imx25-pdk-machine.c92
-rw-r--r--tests/libqos/arm-n800-machine.c92
-rw-r--r--tests/libqos/i2c-imx.c40
-rw-r--r--tests/libqos/i2c-omap.c70
-rw-r--r--tests/libqos/i2c.c74
-rw-r--r--tests/libqos/i2c.h63
-rw-r--r--tests/libqos/qgraph.c12
-rw-r--r--tests/libqos/qgraph.h15
-rw-r--r--tests/libqtest.c99
-rw-r--r--tests/libqtest.h62
-rw-r--r--tests/migration-test.c101
-rw-r--r--tests/pca9552-test.c91
-rwxr-xr-xtests/perf/block/qcow2/convert-blockstatus71
-rwxr-xr-xtests/qemu-iotests/05124
-rw-r--r--tests/qemu-iotests/051.out3
-rw-r--r--tests/qemu-iotests/051.pc.out27
-rwxr-xr-xtests/qemu-iotests/0562
-rwxr-xr-xtests/qemu-iotests/0607
-rw-r--r--tests/qemu-iotests/060.out5
-rwxr-xr-xtests/qemu-iotests/1022
-rw-r--r--tests/qemu-iotests/102.out3
-rw-r--r--tests/qemu-iotests/141.out2
-rw-r--r--tests/qemu-iotests/144.out2
-rwxr-xr-xtests/qemu-iotests/24021
-rw-r--r--tests/qemu-iotests/240.out15
-rwxr-xr-xtests/qemu-iotests/25078
-rw-r--r--tests/qemu-iotests/250.out16
-rwxr-xr-xtests/qemu-iotests/25452
-rw-r--r--tests/qemu-iotests/254.out52
-rwxr-xr-xtests/qemu-iotests/255135
-rw-r--r--tests/qemu-iotests/255.out40
-rwxr-xr-xtests/qemu-iotests/check178
-rw-r--r--tests/qemu-iotests/group180
-rw-r--r--tests/qemu-iotests/iotests.py10
-rw-r--r--tests/qos-test.c17
-rw-r--r--tests/requirements.txt1
-rw-r--r--tests/tcg/Makefile1
-rw-r--r--tests/tcg/aarch64/Makefile.softmmu-target34
-rw-r--r--tests/tcg/aarch64/system/boot.S239
-rw-r--r--tests/tcg/aarch64/system/kernel.ld24
-rw-r--r--tests/tcg/alpha/Makefile.softmmu-target34
-rw-r--r--tests/tcg/alpha/system/boot.S511
-rw-r--r--tests/tcg/alpha/system/kernel.ld30
-rw-r--r--tests/tcg/i386/Makefile.softmmu-target4
-rw-r--r--tests/tcg/i386/system/memory.c243
-rw-r--r--tests/tcg/minilib/printf.c3
-rw-r--r--tests/tcg/mips/include/test_utils_128.h2
-rw-r--r--tests/tcg/mips/include/wrappers_msa.h479
-rw-r--r--tests/tcg/mips/user/ase/msa/README20
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mul_q_h.c (renamed from tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_h.c)14
-rw-r--r--tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mul_q_w.c (renamed from tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_w.c)14
-rw-r--r--tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mulr_q_h.c (renamed from tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_h.c)14
-rw-r--r--tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mulr_q_w.c (renamed from tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_w.c)14
-rw-r--r--tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_a_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_a_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_a_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_a_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_b.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_d.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_h.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_w.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_b.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_d.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_h.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_w.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_b.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_d.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_h.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_w.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_b.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_d.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_h.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_w.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_b.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_d.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_h.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_w.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_d.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_h.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_w.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_d.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_h.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_w.c6
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_b.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_d.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_h.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_w.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_b.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_d.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_h.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_w.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_b.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_d.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_h.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_w.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_b.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_d.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_h.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_w.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_d.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_h.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_w.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_d.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_h.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_w.c157
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_w.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_b.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_d.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_h.c14
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/logic/test_msa_and_v.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/logic/test_msa_nor_v.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/logic/test_msa_or_v.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/logic/test_msa_xor_v.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_sll_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_sll_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_sll_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_sll_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_sra_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_sra_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_sra_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_sra_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srar_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srar_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srar_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srar_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srl_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srl_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srl_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srl_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_w.c155
-rwxr-xr-xtests/tcg/mips/user/ase/msa/test_msa_compile.sh558
-rwxr-xr-xtests/tcg/mips/user/ase/msa/test_msa_run.sh326
-rw-r--r--tests/tcg/multiarch/system/Makefile.softmmu-target14
-rw-r--r--tests/tcg/multiarch/system/hello.c (renamed from tests/tcg/i386/system/hello.c)0
-rw-r--r--tests/tcg/multiarch/system/memory.c449
-rw-r--r--tests/tcg/ppc/Makefile.include3
-rw-r--r--tests/test-bdrv-drain.c56
-rw-r--r--tests/test-bdrv-graph-mod.c6
-rw-r--r--tests/test-block-backend.c6
-rw-r--r--tests/test-block-iothread.c104
-rw-r--r--tests/test-blockjob.c2
-rw-r--r--tests/test-thread-pool.c32
-rw-r--r--tests/test-throttle.c6
-rw-r--r--tests/tmp105-test.c112
-rw-r--r--tests/virtio-scsi-test.c63
-rw-r--r--ui/curses.c8
-rw-r--r--ui/egl-helpers.c39
-rw-r--r--ui/spice-app.c3
-rw-r--r--ui/spice-display.c7
-rw-r--r--util/Makefile.objs2
-rw-r--r--util/guest-random.c2
-rw-r--r--vl.c160
628 files changed, 28888 insertions, 4083 deletions
diff --git a/.editorconfig b/.editorconfig
index 1582883393..df6db65531 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -26,6 +26,11 @@ file_type_emacs = makefile
 indent_style = space
 indent_size = 4
 
+[*.{s,S}]
+indent_style = tab
+indent_size = 8
+file_type_emacs = asm
+
 [*.{vert,frag}]
 file_type_emacs = glsl
 
diff --git a/.patchew.yml b/.patchew.yml
new file mode 100644
index 0000000000..988c29261f
--- /dev/null
+++ b/.patchew.yml
@@ -0,0 +1,302 @@
+---
+# Note: this file is still unused.  It serves as a documentation for the
+# Patchew configuration in case patchew.org disappears or has to be
+# reinstalled.
+#
+# Patchew configuration is available to project administrators at
+# https://patchew.org/api/v1/projects/1/config/ and can be configured
+# to YAML using the following Python script:
+#
+#     import json
+#     import sys
+#     import ruamel.yaml
+#
+#     json_str = sys.stdin.read()
+#     yaml = ruamel.yaml.YAML()
+#     yaml.explicit_start = True
+#     data = json.loads(json_str, object_pairs_hook=ruamel.yaml.comments.CommentedMap)
+#     ruamel.yaml.scalarstring.walk_tree(data)
+#     yaml.dump(data, sys.stdout)
+
+email:
+  notifications:
+    timeouts:
+      event: TestingReport
+      enabled: true
+      to_user: false
+      reply_subject: true
+      set_reply_to: true
+      in_reply_to: true
+      reply_to_all: false
+      subject_template: none
+      to: fam@euphon.net
+      cc: ''
+      body_template: |
+        {% if not is_timeout %} {{ cancel }} {% endif %}
+
+        Test '{{ test }}' timeout, log:
+
+        {{ log }}
+    ENOSPC:
+      event: TestingReport
+      enabled: true
+      to_user: false
+      reply_subject: false
+      set_reply_to: false
+      in_reply_to: true
+      reply_to_all: false
+      subject_template: Out of space error
+      to: fam@euphon.net
+      cc: ''
+      body_template: |
+        {% if passed %}
+          {{ cancel }}
+        {% endif %}
+
+        {% if 'No space left on device' in log %}
+        Tester {{ tester }} out of space when running {{ test }}
+
+          {{ log }}
+        {% else %}
+          {{ cancel }}
+        {% endif %}
+    FailureShort:
+      event: TestingReport
+      enabled: true
+      to_user: false
+      reply_subject: true
+      set_reply_to: true
+      in_reply_to: true
+      reply_to_all: true
+      subject_template: Testing failed
+      to: ''
+      cc: ''
+      body_template: |
+        {% if passed or not obj.message_id or is_timeout %}
+          {{ cancel }}
+        {% endif %}
+        {% if 'No space left on device' in log %}
+          {{ cancel }}
+        {% endif %}
+        Patchew URL: https://patchew.org/QEMU/{{ obj.message_id }}/
+
+        {% ansi2text log as logtext %}
+        {% if test == "checkpatch" %}
+        Hi,
+
+        This series seems to have some coding style problems. See output below for
+        more information:
+
+        {{ logtext }}
+        {% elif test == "docker-mingw@fedora" or test == "docker-quick@centos7" or test == "asan" %}
+        Hi,
+
+        This series failed the {{ test }} build test. Please find the testing commands and
+        their output below. If you have Docker installed, you can probably reproduce it
+        locally.
+
+        {% lines_between logtext start="^=== TEST SCRIPT BEGIN ===$" stop="^=== TEST SCRIPT END ===$" %}
+        {% lines_between logtext start="^=== OUTPUT BEGIN ===$" stop="=== OUTPUT END ===$" as output %}
+        {% grep_C output regex="\b(FAIL|XPASS|ERROR|WARN|error:|warning:)" n=3 %}
+        {% elif test == "s390x" or test == "FreeBSD" or test == "ppcle" or test == "ppcbe" %}
+        Hi,
+
+        This series failed build test on {{test}} host. Please find the details below.
+
+        {% lines_between logtext start="^=== TEST SCRIPT BEGIN ===$" stop="^=== TEST SCRIPT END ===$" %}
+        {% lines_between logtext start="^=== OUTPUT BEGIN ===$" stop="=== OUTPUT END ===$" as output %}
+        {% grep_C output regex="\b(FAIL|XPASS|ERROR|WARN|error:|warning:)" n=3 %}
+        {% else %}
+        {{ cancel }}
+        {% endif %}
+
+        The full log is available at
+        {{ log_url }}.
+        ---
+        Email generated automatically by Patchew [https://patchew.org/].
+        Please send your feedback to patchew-devel@redhat.com
+testing:
+  tests:
+    asan:
+      enabled: true
+      requirements: docker
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        time make docker-test-debug@fedora TARGET_LIST=x86_64-softmmu J=14 NETWORK=1
+    docker-quick@centos7:
+      enabled: false
+      requirements: docker,x86_64
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
+    checkpatch:
+      enabled: true
+      requirements: ''
+      timeout: 600
+      script: |
+        #!/bin/bash
+        git rev-parse base > /dev/null || exit 0
+        git config --local diff.renamelimit 0
+        git config --local diff.renames True
+        git config --local diff.algorithm histogram
+        ./scripts/checkpatch.pl --mailback base..
+    docker-mingw@fedora:
+      enabled: true
+      requirements: docker,x86_64
+      timeout: 3600
+      script: |
+        #! /bin/bash
+        test "$(uname -m)" = "x86_64"
+    ppcle:
+      enabled: false
+      requirements: ppcle
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        # Testing script will be invoked under the git checkout with
+        # HEAD pointing to a commit that has the patches applied on top of "base"
+        # branch
+        set -e
+        CC=$HOME/bin/cc
+        INSTALL=$PWD/install
+        BUILD=$PWD/build
+        mkdir -p $BUILD $INSTALL
+        SRC=$PWD
+        cd $BUILD
+        $SRC/configure --cc=$CC --prefix=$INSTALL
+        make -j4
+        # XXX: we need reliable clean up
+        # make check -j4 V=1
+        make install
+
+        echo
+        echo "=== ENV ==="
+        env
+
+        echo
+        echo "=== PACKAGES ==="
+        rpm -qa
+    ppcbe:
+      enabled: false
+      requirements: ppcbe
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        # Testing script will be invoked under the git checkout with
+        # HEAD pointing to a commit that has the patches applied on top of "base"
+        # branch
+        set -e
+        CC=$HOME/bin/cc
+        INSTALL=$PWD/install
+        BUILD=$PWD/build
+        mkdir -p $BUILD $INSTALL
+        SRC=$PWD
+        cd $BUILD
+        $SRC/configure --cc=$CC --prefix=$INSTALL
+        make -j4
+        # XXX: we need reliable clean up
+        # make check -j4 V=1
+        make install
+
+        echo
+        echo "=== ENV ==="
+        env
+
+        echo
+        echo "=== PACKAGES ==="
+        rpm -qa
+    FreeBSD:
+      enabled: true
+      requirements: qemu-x86,x86_64,git
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        # Testing script will be invoked under the git checkout with
+        # HEAD pointing to a commit that has the patches applied on top of "base"
+        # branch
+        if qemu-system-x86_64 --help >/dev/null 2>&1; then
+          QEMU=qemu-system-x86_64
+        elif /usr/libexec/qemu-kvm --help >/dev/null 2>&1; then
+          QEMU=/usr/libexec/qemu-kvm
+        else
+          exit 1
+        fi
+        make vm-build-freebsd J=21 QEMU=$QEMU
+        exit 0
+    docker-clang@ubuntu:
+      enabled: true
+      requirements: docker,x86_64
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        time make docker-test-clang@ubuntu SHOW_ENV=1 J=14 NETWORK=1
+    s390x:
+      enabled: true
+      requirements: s390x
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        # Testing script will be invoked under the git checkout with
+        # HEAD pointing to a commit that has the patches applied on top of "base"
+        # branch
+        set -e
+        CC=$HOME/bin/cc
+        INSTALL=$PWD/install
+        BUILD=$PWD/build
+        mkdir -p $BUILD $INSTALL
+        SRC=$PWD
+        cd $BUILD
+        $SRC/configure --cc=$CC --prefix=$INSTALL
+        make -j4
+        # XXX: we need reliable clean up
+        # make check -j4 V=1
+        make install
+
+        echo
+        echo "=== ENV ==="
+        env
+
+        echo
+        echo "=== PACKAGES ==="
+        rpm -qa
+  requirements:
+    x86_64:
+      script: |
+        #! /bin/bash
+        test "$(uname -m)" = "x86_64"
+    qemu-x86:
+      script: |
+        #!/bin/bash
+        if qemu-system-x86_64 --help >/dev/null 2>&1; then
+          :
+        elif /usr/libexec/qemu-kvm --help >/dev/null 2>&1; then
+          :
+        else
+          exit 1
+        fi
+    ppcle:
+      script: |
+        #!/bin/bash
+        test "$(uname -m)" = "ppc64le"
+    ppcbe:
+      script: |
+        #!/bin/bash
+        test "$(uname -m)" = "ppc64"
+    git:
+      script: |
+        #! /bin/bash
+        git config user.name > /dev/null 2>&1
+    docker:
+      script: |
+        #!/bin/bash
+        docker ps || sudo -n docker ps
+    s390x:
+      script: |
+        #!/bin/bash
+        test "$(uname -m)" = "s390x"
+git:
+  push_to: git@github.com:patchew-project/qemu
+  public_repo: https://github.com/patchew-project/qemu
+  url_template: https://github.com/patchew-project/qemu/tree/%t
diff --git a/.travis.yml b/.travis.yml
index 6fd89b3d91..b053a836a3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -284,5 +284,5 @@ matrix:
 
     # Run check-tcg against softmmu targets
     - env:
-        - CONFIG="--target-list=xtensa-softmmu,arm-softmmu"
+        - CONFIG="--target-list=xtensa-softmmu,arm-softmmu,aarch64-softmmu,alpha-softmmu"
         - TEST_CMD="make -j3 check-tcg V=1"
diff --git a/MAINTAINERS b/MAINTAINERS
index 3cacd751bf..7be1225415 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -102,6 +102,7 @@ F: pc-bios/s390-ccw/
 F: pc-bios/s390-ccw.img
 F: target/s390x/
 F: docs/vfio-ap.txt
+F: tests/migration/s390x/
 K: ^Subject:.*(?i)s390x?
 T: git https://github.com/cohuck/qemu.git s390-next
 L: qemu-s390x@nongnu.org
@@ -470,6 +471,7 @@ M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: hw/alpha/
 F: hw/isa/smc37c669-superio.c
+F: tests/tcg/alpha/system/
 
 ARM Machines
 ------------
@@ -934,6 +936,7 @@ M: Aurelien Jarno <aurelien@aurel32.net>
 R: Aleksandar Rikalo <arikalo@wavecomp.com>
 S: Maintained
 F: hw/mips/mips_malta.c
+F: tests/acceptance/linux_ssh_mips_malta.py
 
 Mipssim
 M: Aleksandar Markovic <amarkovic@wavecomp.com>
@@ -1673,9 +1676,17 @@ virtio-gpu
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: hw/display/virtio-gpu*
-F: hw/display/virtio-vga.c
+F: hw/display/virtio-vga.*
 F: include/hw/virtio/virtio-gpu.h
 
+vhost-user-gpu
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Maintained
+F: docs/interop/vhost-user-gpu.rst
+F: contrib/vhost-user-gpu
+F: hw/display/vhost-user-*
+
 Cirrus VGA
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Odd Fixes
@@ -1710,6 +1721,7 @@ L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/*/*xive*
 F: include/hw/*/*xive*
+F: docs/*/*xive*
 
 Subsystems
 ----------
@@ -2562,6 +2574,13 @@ F: docs/pvrdma.txt
 F: contrib/rdmacm-mux/*
 F: qapi/rdma.json
 
+Semihosting
+M: Alex Bennée <alex.bennee@linaro.org>
+L: qemu-devel@nongnu.org
+S: Maintained
+F: hw/semihosting/
+F: include/hw/semihosting/
+
 Build and test automation
 -------------------------
 Build and test automation
diff --git a/Makefile b/Makefile
index e02b88bcb1..8e2fc6624c 100644
--- a/Makefile
+++ b/Makefile
@@ -314,8 +314,20 @@ $(call set-vpath, $(SRC_PATH))
 
 LIBS+=-lz $(LIBS_TOOLS)
 
+vhost-user-json-y =
+HELPERS-y =
+
 HELPERS-$(call land,$(CONFIG_SOFTMMU),$(CONFIG_LINUX)) = qemu-bridge-helper$(EXESUF)
 
+ifdef CONFIG_LINUX
+ifdef CONFIG_VIRGL
+ifdef CONFIG_GBM
+HELPERS-y += vhost-user-gpu$(EXESUF)
+vhost-user-json-y += contrib/vhost-user-gpu/50-qemu-gpu.json
+endif
+endif
+endif
+
 ifdef BUILD_DOCS
 DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
 DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7
@@ -409,6 +421,7 @@ dummy := $(call unnest-vars,, \
                 vhost-user-scsi-obj-y \
                 vhost-user-blk-obj-y \
                 vhost-user-input-obj-y \
+                vhost-user-gpu-obj-y \
                 qga-vss-dll-obj-y \
                 block-obj-y \
                 block-obj-m \
@@ -426,7 +439,7 @@ dummy := $(call unnest-vars,, \
 
 include $(SRC_PATH)/tests/Makefile.include
 
-all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) $(HELPERS-y) recurse-all modules
+all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) $(HELPERS-y) recurse-all modules $(vhost-user-json-y)
 
 qemu-version.h: FORCE
 	$(call quiet-command, \
@@ -619,6 +632,9 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad"
 rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)
 
+vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a
+	$(call LINK, $^)
+
 ifdef CONFIG_VHOST_USER_INPUT
 ifdef CONFIG_LINUX
 vhost-user-input$(EXESUF): $(vhost-user-input-obj-y) libvhost-user.a libqemuutil.a
@@ -827,6 +843,12 @@ endif
 ifneq ($(HELPERS-y),)
 	$(call install-prog,$(HELPERS-y),$(DESTDIR)$(libexecdir))
 endif
+ifneq ($(vhost-user-json-y),)
+	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/vhost-user/"
+	for x in $(vhost-user-json-y); do \
+		$(INSTALL_DATA) $$x "$(DESTDIR)$(qemu_datadir)/vhost-user/"; \
+	done
+endif
 ifdef CONFIG_TRACE_SYSTEMTAP
 	$(INSTALL_PROG) "scripts/qemu-trace-stap" $(DESTDIR)$(bindir)
 endif
@@ -1009,7 +1031,9 @@ $(filter %.1 %.7 %.8,$(DOCS)): scripts/texi2pod.pl
 %/coverage-report.html:
 	@mkdir -p $*
 	$(call quiet-command,\
-		gcovr -r $(SRC_PATH) --object-directory $(BUILD_PATH) \
+		gcovr -r $(SRC_PATH) \
+		$(foreach t, $(TARGET_DIRS), --object-directory $(BUILD_DIR)/$(t)) \
+		 --object-directory $(BUILD_DIR) \
 		-p --html --html-details -o $@, \
 		"GEN", "coverage-report.html")
 
diff --git a/Makefile.objs b/Makefile.objs
index d7491413c1..c8337fa34b 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -123,6 +123,7 @@ vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
 vhost-user-blk-obj-y = contrib/vhost-user-blk/
 rdmacm-mux-obj-y = contrib/rdmacm-mux/
 vhost-user-input-obj-y = contrib/vhost-user-input/
+vhost-user-gpu-obj-y = contrib/vhost-user-gpu/
 
 ######################################################################
 trace-events-subdirs =
@@ -182,6 +183,7 @@ trace-events-subdirs += hw/virtio
 trace-events-subdirs += hw/watchdog
 trace-events-subdirs += hw/xen
 trace-events-subdirs += hw/gpio
+trace-events-subdirs += hw/riscv
 trace-events-subdirs += migration
 trace-events-subdirs += net
 trace-events-subdirs += ui
diff --git a/Makefile.target b/Makefile.target
index 4ef4ce5996..ecd856e3a3 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -238,3 +238,19 @@ endif
 
 generated-files-y += config-target.h
 Makefile: $(generated-files-y)
+
+# Reports/Analysis
+#
+# The target specific coverage report only cares about target specific
+# blobs and not the shared code.
+#
+
+%/coverage-report.html:
+	@mkdir -p $*
+	$(call quiet-command,\
+		gcovr -r $(SRC_PATH) --object-directory $(CURDIR) \
+		-p --html --html-details -o $@, \
+		"GEN", "coverage-report.html")
+
+.PHONY: coverage-report
+coverage-report: $(CURDIR)/reports/coverage/coverage-report.html
diff --git a/block.c b/block.c
index cb11537029..e3e77feee0 100644
--- a/block.c
+++ b/block.c
@@ -2235,6 +2235,10 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
         bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
         bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, &error_abort);
         bdrv_set_perm(old_bs, perm, shared_perm);
+
+        /* When the parent requiring a non-default AioContext is removed, the
+         * node moves back to the main AioContext */
+        bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
     }
 
     if (new_bs) {
@@ -2243,18 +2247,31 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
     }
 }
 
+/*
+ * This function steals the reference to child_bs from the caller.
+ * That reference is later dropped by bdrv_root_unref_child().
+ *
+ * On failure NULL is returned, errp is set and the reference to
+ * child_bs is also dropped.
+ *
+ * The caller must hold the AioContext lock @child_bs, but not that of @ctx
+ * (unless @child_bs is already in @ctx).
+ */
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildRole *child_role,
+                                  AioContext *ctx,
                                   uint64_t perm, uint64_t shared_perm,
                                   void *opaque, Error **errp)
 {
     BdrvChild *child;
+    Error *local_err = NULL;
     int ret;
 
     ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
     if (ret < 0) {
         bdrv_abort_perm_update(child_bs);
+        bdrv_unref(child_bs);
         return NULL;
     }
 
@@ -2268,12 +2285,48 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
         .opaque         = opaque,
     };
 
+    /* If the AioContexts don't match, first try to move the subtree of
+     * child_bs into the AioContext of the new parent. If this doesn't work,
+     * try moving the parent into the AioContext of child_bs instead. */
+    if (bdrv_get_aio_context(child_bs) != ctx) {
+        ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err);
+        if (ret < 0 && child_role->can_set_aio_ctx) {
+            GSList *ignore = g_slist_prepend(NULL, child);;
+            ctx = bdrv_get_aio_context(child_bs);
+            if (child_role->can_set_aio_ctx(child, ctx, &ignore, NULL)) {
+                error_free(local_err);
+                ret = 0;
+                g_slist_free(ignore);
+                ignore = g_slist_prepend(NULL, child);;
+                child_role->set_aio_ctx(child, ctx, &ignore);
+            }
+            g_slist_free(ignore);
+        }
+        if (ret < 0) {
+            error_propagate(errp, local_err);
+            g_free(child);
+            bdrv_abort_perm_update(child_bs);
+            return NULL;
+        }
+    }
+
     /* This performs the matching bdrv_set_perm() for the above check. */
     bdrv_replace_child(child, child_bs);
 
     return child;
 }
 
+/*
+ * This function transfers the reference to child_bs from the caller
+ * to parent_bs. That reference is later dropped by parent_bs on
+ * bdrv_close() or if someone calls bdrv_unref_child().
+ *
+ * On failure NULL is returned, errp is set and the reference to
+ * child_bs is also dropped.
+ *
+ * If @parent_bs and @child_bs are in different AioContexts, the caller must
+ * hold the AioContext lock for @child_bs, but not for @parent_bs.
+ */
 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              BlockDriverState *child_bs,
                              const char *child_name,
@@ -2286,11 +2339,11 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
     bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
 
     assert(parent_bs->drv);
-    assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs));
     bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
                     perm, shared_perm, &perm, &shared_perm);
 
     child = bdrv_root_attach_child(child_bs, child_name, child_role,
+                                   bdrv_get_aio_context(parent_bs),
                                    perm, shared_perm, parent_bs, errp);
     if (child == NULL) {
         return NULL;
@@ -2401,12 +2454,9 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
     /* If backing_hd was already part of bs's backing chain, and
      * inherits_from pointed recursively to bs then let's update it to
      * point directly to bs (else it will become NULL). */
-    if (update_inherits_from) {
+    if (bs->backing && update_inherits_from) {
         backing_hd->inherits_from = bs;
     }
-    if (!bs->backing) {
-        bdrv_unref(backing_hd);
-    }
 
 out:
     bdrv_refresh_limits(bs, NULL);
@@ -2504,7 +2554,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
         ret = -EINVAL;
         goto free_exit;
     }
-    bdrv_set_aio_context(backing_hd, bdrv_get_aio_context(bs));
 
     if (implicit_backing) {
         bdrv_refresh_filename(backing_hd);
@@ -2594,7 +2643,6 @@ BdrvChild *bdrv_open_child(const char *filename,
                            const BdrvChildRole *child_role,
                            bool allow_none, Error **errp)
 {
-    BdrvChild *c;
     BlockDriverState *bs;
 
     bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
@@ -2603,13 +2651,7 @@ BdrvChild *bdrv_open_child(const char *filename,
         return NULL;
     }
 
-    c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
-    if (!c) {
-        bdrv_unref(bs);
-        return NULL;
-    }
-
-    return c;
+    return bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
 }
 
 /* TODO Future callers may need to specify parent/child_role in order for
@@ -2878,7 +2920,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
             /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
              * looking at the header to guess the image format. This works even
              * in cases where a guest would not see a consistent state. */
-            file = blk_new(0, BLK_PERM_ALL);
+            file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
             blk_insert_bs(file, file_bs, &local_err);
             bdrv_unref(file_bs);
             if (local_err) {
@@ -3877,22 +3919,12 @@ static void bdrv_close(BlockDriverState *bs)
         bs->drv = NULL;
     }
 
-    bdrv_set_backing_hd(bs, NULL, &error_abort);
-
-    if (bs->file != NULL) {
-        bdrv_unref_child(bs, bs->file);
-        bs->file = NULL;
-    }
-
     QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
-        /* TODO Remove bdrv_unref() from drivers' close function and use
-         * bdrv_unref_child() here */
-        if (child->bs->inherits_from == bs) {
-            child->bs->inherits_from = NULL;
-        }
-        bdrv_detach_child(child);
+        bdrv_unref_child(bs, child);
     }
 
+    bs->backing = NULL;
+    bs->file = NULL;
     g_free(bs->opaque);
     bs->opaque = NULL;
     atomic_set(&bs->copy_on_read, 0);
@@ -4021,13 +4053,13 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
     uint64_t perm = 0, shared = BLK_PERM_ALL;
     int ret;
 
-    assert(!atomic_read(&from->in_flight));
-    assert(!atomic_read(&to->in_flight));
-
     /* Make sure that @from doesn't go away until we have successfully attached
      * all of its parents to @to. */
     bdrv_ref(from);
 
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+    bdrv_drained_begin(from);
+
     /* Put all parents into @list and calculate their cumulative permissions */
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
         assert(c->bs == from);
@@ -4068,6 +4100,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
 
 out:
     g_slist_free(list);
+    bdrv_drained_end(from);
     bdrv_unref(from);
 }
 
@@ -5756,8 +5789,17 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
     bs->walking_aio_notifiers = false;
 }
 
-/* @ignore will accumulate all visited BdrvChild object. The caller is
- * responsible for freeing the list afterwards. */
+/*
+ * Changes the AioContext used for fd handlers, timers, and BHs by this
+ * BlockDriverState and all its children and parents.
+ *
+ * The caller must own the AioContext lock for the old AioContext of bs, but it
+ * must not own the AioContext lock for new_context (unless new_context is the
+ * same as the current context of bs).
+ *
+ * @ignore will accumulate all visited BdrvChild object. The caller is
+ * responsible for freeing the list afterwards.
+ */
 void bdrv_set_aio_context_ignore(BlockDriverState *bs,
                                  AioContext *new_context, GSList **ignore)
 {
@@ -5780,10 +5822,9 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
         if (g_slist_find(*ignore, child)) {
             continue;
         }
-        if (child->role->set_aio_ctx) {
-            *ignore = g_slist_prepend(*ignore, child);
-            child->role->set_aio_ctx(child, new_context, ignore);
-        }
+        assert(child->role->set_aio_ctx);
+        *ignore = g_slist_prepend(*ignore, child);
+        child->role->set_aio_ctx(child, new_context, ignore);
     }
 
     bdrv_detach_aio_context(bs);
@@ -5797,16 +5838,6 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
     aio_context_release(new_context);
 }
 
-/* The caller must own the AioContext lock for the old AioContext of bs, but it
- * must not own the AioContext lock for new_context (unless new_context is
- * the same as the current context of bs). */
-void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
-{
-    GSList *ignore_list = NULL;
-    bdrv_set_aio_context_ignore(bs, new_context, &ignore_list);
-    g_slist_free(ignore_list);
-}
-
 static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
                                             GSList **ignore, Error **errp)
 {
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 7a81892a52..ae11605c9f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -6,7 +6,7 @@ block-obj-$(CONFIG_BOCHS) += bochs.o
 block-obj-$(CONFIG_VVFAT) += vvfat.o
 block-obj-$(CONFIG_DMG) += dmg.o
 
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o qcow2-threads.o
 block-obj-$(CONFIG_QED) += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-$(CONFIG_QED) += qed-check.o
 block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
diff --git a/block/backup.c b/block/backup.c
index 916817d8b1..715e1d3be8 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -112,7 +112,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
     int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
     int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
 
-    hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
+    assert(QEMU_IS_ALIGNED(start, job->cluster_size));
+    hbitmap_reset(job->copy_bitmap, start, job->cluster_size);
     nbytes = MIN(job->cluster_size, job->len - start);
     if (!*bounce_buffer) {
         *bounce_buffer = blk_blockalign(blk, job->cluster_size);
@@ -145,7 +146,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
 
     return nbytes;
 fail:
-    hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
+    hbitmap_set(job->copy_bitmap, start, job->cluster_size);
     return ret;
 
 }
@@ -165,16 +166,15 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
     int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
 
     assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
+    assert(QEMU_IS_ALIGNED(start, job->cluster_size));
     nbytes = MIN(job->copy_range_size, end - start);
     nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
-    hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
-                  nr_clusters);
+    hbitmap_reset(job->copy_bitmap, start, job->cluster_size * nr_clusters);
     ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
                             read_flags, write_flags);
     if (ret < 0) {
         trace_backup_do_cow_copy_range_fail(job, start, ret);
-        hbitmap_set(job->copy_bitmap, start / job->cluster_size,
-                    nr_clusters);
+        hbitmap_set(job->copy_bitmap, start, job->cluster_size * nr_clusters);
         return ret;
     }
 
@@ -202,7 +202,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
     cow_request_begin(&cow_request, job, start, end);
 
     while (start < end) {
-        if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
+        if (!hbitmap_get(job->copy_bitmap, start)) {
             trace_backup_do_cow_skip(job, start);
             start += job->cluster_size;
             continue; /* already copied */
@@ -298,12 +298,16 @@ static void backup_clean(Job *job)
     assert(s->target);
     blk_unref(s->target);
     s->target = NULL;
+
+    if (s->copy_bitmap) {
+        hbitmap_free(s->copy_bitmap);
+        s->copy_bitmap = NULL;
+    }
 }
 
 void backup_do_checkpoint(BlockJob *job, Error **errp)
 {
     BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
-    int64_t len;
 
     assert(block_job_driver(job) == &backup_job_driver);
 
@@ -313,8 +317,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
         return;
     }
 
-    len = DIV_ROUND_UP(backup_job->len, backup_job->cluster_size);
-    hbitmap_set(backup_job->copy_bitmap, 0, len);
+    hbitmap_set(backup_job->copy_bitmap, 0, backup_job->len);
 }
 
 static void backup_drain(BlockJob *job)
@@ -365,20 +368,44 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
     return false;
 }
 
-static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
+static bool bdrv_is_unallocated_range(BlockDriverState *bs,
+                                      int64_t offset, int64_t bytes)
+{
+    int64_t end = offset + bytes;
+
+    while (offset < end && !bdrv_is_allocated(bs, offset, bytes, &bytes)) {
+        if (bytes == 0) {
+            return true;
+        }
+        offset += bytes;
+        bytes = end - offset;
+    }
+
+    return offset >= end;
+}
+
+static int coroutine_fn backup_loop(BackupBlockJob *job)
 {
     int ret;
     bool error_is_read;
-    int64_t cluster;
+    int64_t offset;
     HBitmapIter hbi;
+    BlockDriverState *bs = blk_bs(job->common.blk);
 
     hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
-    while ((cluster = hbitmap_iter_next(&hbi)) != -1) {
+    while ((offset = hbitmap_iter_next(&hbi)) != -1) {
+        if (job->sync_mode == MIRROR_SYNC_MODE_TOP &&
+            bdrv_is_unallocated_range(bs, offset, job->cluster_size))
+        {
+            hbitmap_reset(job->copy_bitmap, offset, job->cluster_size);
+            continue;
+        }
+
         do {
             if (yield_and_check(job)) {
                 return 0;
             }
-            ret = backup_do_cow(job, cluster * job->cluster_size,
+            ret = backup_do_cow(job, offset,
                                 job->cluster_size, &error_is_read, false);
             if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
                            BLOCK_ERROR_ACTION_REPORT)
@@ -394,66 +421,43 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
 /* init copy_bitmap from sync_bitmap */
 static void backup_incremental_init_copy_bitmap(BackupBlockJob *job)
 {
-    BdrvDirtyBitmapIter *dbi;
-    int64_t offset;
-    int64_t end = DIV_ROUND_UP(bdrv_dirty_bitmap_size(job->sync_bitmap),
-                               job->cluster_size);
+    uint64_t offset = 0;
+    uint64_t bytes = job->len;
 
-    dbi = bdrv_dirty_iter_new(job->sync_bitmap);
-    while ((offset = bdrv_dirty_iter_next(dbi)) != -1) {
-        int64_t cluster = offset / job->cluster_size;
-        int64_t next_cluster;
-
-        offset += bdrv_dirty_bitmap_granularity(job->sync_bitmap);
-        if (offset >= bdrv_dirty_bitmap_size(job->sync_bitmap)) {
-            hbitmap_set(job->copy_bitmap, cluster, end - cluster);
-            break;
-        }
+    while (bdrv_dirty_bitmap_next_dirty_area(job->sync_bitmap,
+                                             &offset, &bytes))
+    {
+        hbitmap_set(job->copy_bitmap, offset, bytes);
 
-        offset = bdrv_dirty_bitmap_next_zero(job->sync_bitmap, offset,
-                                             UINT64_MAX);
-        if (offset == -1) {
-            hbitmap_set(job->copy_bitmap, cluster, end - cluster);
+        offset += bytes;
+        if (offset >= job->len) {
             break;
         }
-
-        next_cluster = DIV_ROUND_UP(offset, job->cluster_size);
-        hbitmap_set(job->copy_bitmap, cluster, next_cluster - cluster);
-        if (next_cluster >= end) {
-            break;
-        }
-
-        bdrv_set_dirty_iter(dbi, next_cluster * job->cluster_size);
+        bytes = job->len - offset;
     }
 
     /* TODO job_progress_set_remaining() would make more sense */
     job_progress_update(&job->common.job,
-        job->len - hbitmap_count(job->copy_bitmap) * job->cluster_size);
-
-    bdrv_dirty_iter_free(dbi);
+        job->len - hbitmap_count(job->copy_bitmap));
 }
 
 static int coroutine_fn backup_run(Job *job, Error **errp)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
     BlockDriverState *bs = blk_bs(s->common.blk);
-    int64_t offset, nb_clusters;
     int ret = 0;
 
     QLIST_INIT(&s->inflight_reqs);
     qemu_co_rwlock_init(&s->flush_rwlock);
 
-    nb_clusters = DIV_ROUND_UP(s->len, s->cluster_size);
     job_progress_set_remaining(job, s->len);
 
-    s->copy_bitmap = hbitmap_alloc(nb_clusters, 0);
     if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
         backup_incremental_init_copy_bitmap(s);
     } else {
-        hbitmap_set(s->copy_bitmap, 0, nb_clusters);
+        hbitmap_set(s->copy_bitmap, 0, s->len);
     }
 
-
     s->before_write.notify = backup_before_write_notify;
     bdrv_add_before_write_notifier(bs, &s->before_write);
 
@@ -465,68 +469,8 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
              * notify callback service CoW requests. */
             job_yield(job);
         }
-    } else if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
-        ret = backup_run_incremental(s);
     } else {
-        /* Both FULL and TOP SYNC_MODE's require copying.. */
-        for (offset = 0; offset < s->len;
-             offset += s->cluster_size) {
-            bool error_is_read;
-            int alloced = 0;
-
-            if (yield_and_check(s)) {
-                break;
-            }
-
-            if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
-                int i;
-                int64_t n;
-
-                /* Check to see if these blocks are already in the
-                 * backing file. */
-
-                for (i = 0; i < s->cluster_size;) {
-                    /* bdrv_is_allocated() only returns true/false based
-                     * on the first set of sectors it comes across that
-                     * are are all in the same state.
-                     * For that reason we must verify each sector in the
-                     * backup cluster length.  We end up copying more than
-                     * needed but at some point that is always the case. */
-                    alloced =
-                        bdrv_is_allocated(bs, offset + i,
-                                          s->cluster_size - i, &n);
-                    i += n;
-
-                    if (alloced || n == 0) {
-                        break;
-                    }
-                }
-
-                /* If the above loop never found any sectors that are in
-                 * the topmost image, skip this backup. */
-                if (alloced == 0) {
-                    continue;
-                }
-            }
-            /* FULL sync mode we copy the whole drive. */
-            if (alloced < 0) {
-                ret = alloced;
-            } else {
-                ret = backup_do_cow(s, offset, s->cluster_size,
-                                    &error_is_read, false);
-            }
-            if (ret < 0) {
-                /* Depending on error action, fail now or retry cluster */
-                BlockErrorAction action =
-                    backup_error_action(s, error_is_read, -ret);
-                if (action == BLOCK_ERROR_ACTION_REPORT) {
-                    break;
-                } else {
-                    offset -= s->cluster_size;
-                    continue;
-                }
-            }
-        }
+        ret = backup_loop(s);
     }
 
     notifier_with_return_remove(&s->before_write);
@@ -534,7 +478,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
     /* wait until pending backup_do_cow() calls have completed */
     qemu_co_rwlock_wrlock(&s->flush_rwlock);
     qemu_co_rwlock_unlock(&s->flush_rwlock);
-    hbitmap_free(s->copy_bitmap);
 
     return ret;
 }
@@ -554,6 +497,42 @@ static const BlockJobDriver backup_job_driver = {
     .drain                  = backup_drain,
 };
 
+static int64_t backup_calculate_cluster_size(BlockDriverState *target,
+                                             Error **errp)
+{
+    int ret;
+    BlockDriverInfo bdi;
+
+    /*
+     * If there is no backing file on the target, we cannot rely on COW if our
+     * backup cluster size is smaller than the target cluster size. Even for
+     * targets with a backing file, try to avoid COW if possible.
+     */
+    ret = bdrv_get_info(target, &bdi);
+    if (ret == -ENOTSUP && !target->backing) {
+        /* Cluster size is not defined */
+        warn_report("The target block device doesn't provide "
+                    "information about the block size and it doesn't have a "
+                    "backing file. The default block size of %u bytes is "
+                    "used. If the actual block size of the target exceeds "
+                    "this default, the backup may be unusable",
+                    BACKUP_CLUSTER_SIZE_DEFAULT);
+        return BACKUP_CLUSTER_SIZE_DEFAULT;
+    } else if (ret < 0 && !target->backing) {
+        error_setg_errno(errp, -ret,
+            "Couldn't determine the cluster size of the target image, "
+            "which has no backing file");
+        error_append_hint(errp,
+            "Aborting, since this may create an unusable destination image\n");
+        return ret;
+    } else if (ret < 0 && target->backing) {
+        /* Not fatal; just trudge on ahead. */
+        return BACKUP_CLUSTER_SIZE_DEFAULT;
+    }
+
+    return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+}
+
 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, int64_t speed,
                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
@@ -565,9 +544,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                   JobTxn *txn, Error **errp)
 {
     int64_t len;
-    BlockDriverInfo bdi;
     BackupBlockJob *job = NULL;
     int ret;
+    int64_t cluster_size;
+    HBitmap *copy_bitmap = NULL;
 
     assert(bs);
     assert(target);
@@ -629,6 +609,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         goto error;
     }
 
+    cluster_size = backup_calculate_cluster_size(target, errp);
+    if (cluster_size < 0) {
+        goto error;
+    }
+
+    copy_bitmap = hbitmap_alloc(len, ctz32(cluster_size));
+
     /* job->len is fixed, so we can't allow resize */
     job = block_job_create(job_id, &backup_job_driver, txn, bs,
                            BLK_PERM_CONSISTENT_READ,
@@ -640,7 +627,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     }
 
     /* The target must match the source in size, so no resize here either */
-    job->target = blk_new(BLK_PERM_WRITE,
+    job->target = blk_new(job->common.job.aio_context,
+                          BLK_PERM_WRITE,
                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
                           BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
     ret = blk_insert_bs(job->target, target, errp);
@@ -657,33 +645,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
 
     /* Detect image-fleecing (and similar) schemes */
     job->serialize_target_writes = bdrv_chain_contains(target, bs);
-
-    /* If there is no backing file on the target, we cannot rely on COW if our
-     * backup cluster size is smaller than the target cluster size. Even for
-     * targets with a backing file, try to avoid COW if possible. */
-    ret = bdrv_get_info(target, &bdi);
-    if (ret == -ENOTSUP && !target->backing) {
-        /* Cluster size is not defined */
-        warn_report("The target block device doesn't provide "
-                    "information about the block size and it doesn't have a "
-                    "backing file. The default block size of %u bytes is "
-                    "used. If the actual block size of the target exceeds "
-                    "this default, the backup may be unusable",
-                    BACKUP_CLUSTER_SIZE_DEFAULT);
-        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
-    } else if (ret < 0 && !target->backing) {
-        error_setg_errno(errp, -ret,
-            "Couldn't determine the cluster size of the target image, "
-            "which has no backing file");
-        error_append_hint(errp,
-            "Aborting, since this may create an unusable destination image\n");
-        goto error;
-    } else if (ret < 0 && target->backing) {
-        /* Not fatal; just trudge on ahead. */
-        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
-    } else {
-        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
-    }
+    job->cluster_size = cluster_size;
+    job->copy_bitmap = copy_bitmap;
+    copy_bitmap = NULL;
     job->use_copy_range = true;
     job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
                                         blk_get_max_transfer(job->target));
@@ -699,6 +663,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     return &job->common;
 
  error:
+    if (copy_bitmap) {
+        assert(!job || !job->copy_bitmap);
+        hbitmap_free(copy_bitmap);
+    }
     if (sync_bitmap) {
         bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
     }
diff --git a/block/block-backend.c b/block/block-backend.c
index 4c0a8ef88d..f5d9407d20 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -42,6 +42,7 @@ struct BlockBackend {
     char *name;
     int refcnt;
     BdrvChild *root;
+    AioContext *ctx;
     DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
     QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
     QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
@@ -322,12 +323,13 @@ static const BdrvChildRole child_root = {
  *
  * Return the new BlockBackend on success, null on failure.
  */
-BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
 {
     BlockBackend *blk;
 
     blk = g_new0(BlockBackend, 1);
     blk->refcnt = 1;
+    blk->ctx = ctx;
     blk->perm = perm;
     blk->shared_perm = shared_perm;
     blk_set_enable_write_cache(blk, true);
@@ -347,6 +349,7 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
 
 /*
  * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
+ * The new BlockBackend is in the main AioContext.
  *
  * Just as with bdrv_open(), after having called this function the reference to
  * @options belongs to the block layer (even on failure).
@@ -382,17 +385,16 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
         perm |= BLK_PERM_RESIZE;
     }
 
-    blk = blk_new(perm, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL);
     bs = bdrv_open(filename, reference, options, flags, errp);
     if (!bs) {
         blk_unref(blk);
         return NULL;
     }
 
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx,
                                        perm, BLK_PERM_ALL, blk, errp);
     if (!blk->root) {
-        bdrv_unref(bs);
         blk_unref(blk);
         return NULL;
     }
@@ -800,12 +802,12 @@ void blk_remove_bs(BlockBackend *blk)
 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
 {
     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+    bdrv_ref(bs);
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx,
                                        blk->perm, blk->shared_perm, blk, errp);
     if (blk->root == NULL) {
         return -EPERM;
     }
-    bdrv_ref(bs);
 
     notifier_list_notify(&blk->insert_bs_notifiers, blk);
     if (tgm->throttle_state) {
@@ -1857,7 +1859,14 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
 
 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
-    return bdrv_get_aio_context(blk_bs(blk));
+    BlockDriverState *bs = blk_bs(blk);
+
+    if (bs) {
+        AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
+        assert(ctx == blk->ctx);
+    }
+
+    return blk->ctx;
 }
 
 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
@@ -1866,30 +1875,37 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
     return blk_get_aio_context(blk_acb->blk);
 }
 
-static void blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
-                                   bool update_root_node)
+static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                                  bool update_root_node, Error **errp)
 {
     BlockDriverState *bs = blk_bs(blk);
     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+    int ret;
 
     if (bs) {
+        if (update_root_node) {
+            ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
+                                                 errp);
+            if (ret < 0) {
+                return ret;
+            }
+        }
         if (tgm->throttle_state) {
             bdrv_drained_begin(bs);
             throttle_group_detach_aio_context(tgm);
             throttle_group_attach_aio_context(tgm, new_context);
             bdrv_drained_end(bs);
         }
-        if (update_root_node) {
-            GSList *ignore = g_slist_prepend(NULL, blk->root);
-            bdrv_set_aio_context_ignore(bs, new_context, &ignore);
-            g_slist_free(ignore);
-        }
     }
+
+    blk->ctx = new_context;
+    return 0;
 }
 
-void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                        Error **errp)
 {
-    blk_do_set_aio_context(blk, new_context, true);
+    return blk_do_set_aio_context(blk, new_context, true, errp);
 }
 
 static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
@@ -1916,7 +1932,7 @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
                                  GSList **ignore)
 {
     BlockBackend *blk = child->opaque;
-    blk_do_set_aio_context(blk, ctx, false);
+    blk_do_set_aio_context(blk, ctx, false, &error_abort);
 }
 
 void blk_add_aio_context_notifier(BlockBackend *blk,
diff --git a/block/commit.c b/block/commit.c
index 14e5bb394c..c815def89a 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -301,7 +301,6 @@ void commit_start(const char *job_id, BlockDriverState *bs,
         commit_top_bs->implicit = true;
     }
     commit_top_bs->total_sectors = top->total_sectors;
-    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top));
 
     bdrv_append(commit_top_bs, top, &local_err);
     if (local_err) {
@@ -338,7 +337,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
         goto fail;
     }
 
-    s->base = blk_new(BLK_PERM_CONSISTENT_READ
+    s->base = blk_new(s->common.job.aio_context,
+                      BLK_PERM_CONSISTENT_READ
                       | BLK_PERM_WRITE
                       | BLK_PERM_RESIZE,
                       BLK_PERM_CONSISTENT_READ
@@ -351,7 +351,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     s->base_bs = base;
 
     /* Required permissions are already taken with block_job_add_bdrv() */
-    s->top = blk_new(0, BLK_PERM_ALL);
+    s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL);
     ret = blk_insert_bs(s->top, top, errp);
     if (ret < 0) {
         goto fail;
@@ -395,6 +395,7 @@ int bdrv_commit(BlockDriverState *bs)
     BlockDriverState *backing_file_bs = NULL;
     BlockDriverState *commit_top_bs = NULL;
     BlockDriver *drv = bs->drv;
+    AioContext *ctx;
     int64_t offset, length, backing_length;
     int ro;
     int64_t n;
@@ -422,8 +423,9 @@ int bdrv_commit(BlockDriverState *bs)
         }
     }
 
-    src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
-    backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    ctx = bdrv_get_aio_context(bs);
+    src = blk_new(ctx, BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+    backing = blk_new(ctx, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
 
     ret = blk_insert_bs(src, bs, &local_err);
     if (ret < 0) {
@@ -440,7 +442,6 @@ int bdrv_commit(BlockDriverState *bs)
         error_report_err(local_err);
         goto ro_cleanup;
     }
-    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs));
 
     bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
     bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
diff --git a/block/crypto.c b/block/crypto.c
index 3af46b805f..7351fd479d 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -257,7 +257,8 @@ static int block_crypto_co_create_generic(BlockDriverState *bs,
     QCryptoBlock *crypto = NULL;
     struct BlockCryptoCreateData data;
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
 
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 59e6ebb861..49646a30e6 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -816,10 +816,10 @@ void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
 {
     bool ret;
 
-    /* only bitmaps from one bds are supported */
-    assert(dest->mutex == src->mutex);
-
     qemu_mutex_lock(dest->mutex);
+    if (src->mutex != dest->mutex) {
+        qemu_mutex_lock(src->mutex);
+    }
 
     if (bdrv_dirty_bitmap_check(dest, BDRV_BITMAP_DEFAULT, errp)) {
         goto out;
@@ -845,4 +845,7 @@ void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
 
 out:
     qemu_mutex_unlock(dest->mutex);
+    if (src->mutex != dest->mutex) {
+        qemu_mutex_unlock(src->mutex);
+    }
 }
diff --git a/block/io.c b/block/io.c
index 3134a60a48..9ba1bada36 100644
--- a/block/io.c
+++ b/block/io.c
@@ -422,11 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
         return;
     }
     assert(bs->quiesce_counter > 0);
-    old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
 
     /* Re-enable things in child-to-parent order */
     bdrv_drain_invoke(bs, false);
     bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
+
+    old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
     if (old_quiesce_counter == 1) {
         aio_enable_external(bdrv_get_aio_context(bs));
     }
@@ -2092,6 +2093,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
      */
     assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
            align > offset - aligned_offset);
+    if (ret & BDRV_BLOCK_RECURSE) {
+        assert(ret & BDRV_BLOCK_DATA);
+        assert(ret & BDRV_BLOCK_OFFSET_VALID);
+        assert(!(ret & BDRV_BLOCK_ZERO));
+    }
+
     *pnum -= offset - aligned_offset;
     if (*pnum > bytes) {
         *pnum = bytes;
@@ -2122,7 +2129,8 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
         }
     }
 
-    if (want_zero && local_file && local_file != bs &&
+    if (want_zero && ret & BDRV_BLOCK_RECURSE &&
+        local_file && local_file != bs &&
         (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
         (ret & BDRV_BLOCK_OFFSET_VALID)) {
         int64_t file_pnum;
@@ -2624,7 +2632,7 @@ int bdrv_flush(BlockDriverState *bs)
 typedef struct DiscardCo {
     BdrvChild *child;
     int64_t offset;
-    int bytes;
+    int64_t bytes;
     int ret;
 } DiscardCo;
 static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
@@ -2635,14 +2643,15 @@ static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
     aio_wait_kick();
 }
 
-int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes)
+int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
+                                  int64_t bytes)
 {
     BdrvTrackedRequest req;
     int max_pdiscard, ret;
     int head, tail, align;
     BlockDriverState *bs = child->bs;
 
-    if (!bs || !bs->drv) {
+    if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
         return -ENOMEDIUM;
     }
 
@@ -2650,9 +2659,8 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes)
         return -EPERM;
     }
 
-    ret = bdrv_check_byte_request(bs, offset, bytes);
-    if (ret < 0) {
-        return ret;
+    if (offset < 0 || bytes < 0 || bytes > INT64_MAX - offset) {
+        return -EIO;
     }
 
     /* Do nothing if disabled.  */
@@ -2687,7 +2695,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes)
     assert(max_pdiscard >= bs->bl.request_alignment);
 
     while (bytes > 0) {
-        int num = bytes;
+        int64_t num = bytes;
 
         if (head) {
             /* Make small requests to get to alignment boundaries. */
@@ -2749,7 +2757,7 @@ out:
     return ret;
 }
 
-int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes)
+int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
 {
     Coroutine *co;
     DiscardCo rwco = {
diff --git a/block/linux-aio.c b/block/linux-aio.c
index d4b61fb251..27100c2fd1 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -30,7 +30,6 @@
 #define MAX_EVENTS 128
 
 struct qemu_laiocb {
-    BlockAIOCB common;
     Coroutine *co;
     LinuxAioState *ctx;
     struct iocb iocb;
@@ -72,7 +71,7 @@ static inline ssize_t io_event_ret(struct io_event *ev)
 }
 
 /*
- * Completes an AIO request (calls the callback and frees the ACB).
+ * Completes an AIO request.
  */
 static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
 {
@@ -94,18 +93,15 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
     }
 
     laiocb->ret = ret;
-    if (laiocb->co) {
-        /* If the coroutine is already entered it must be in ioq_submit() and
-         * will notice laio->ret has been filled in when it eventually runs
-         * later.  Coroutines cannot be entered recursively so avoid doing
-         * that!
-         */
-        if (!qemu_coroutine_entered(laiocb->co)) {
-            aio_co_wake(laiocb->co);
-        }
-    } else {
-        laiocb->common.cb(laiocb->common.opaque, ret);
-        qemu_aio_unref(laiocb);
+
+    /*
+     * If the coroutine is already entered it must be in ioq_submit() and
+     * will notice laio->ret has been filled in when it eventually runs
+     * later.  Coroutines cannot be entered recursively so avoid doing
+     * that!
+     */
+    if (!qemu_coroutine_entered(laiocb->co)) {
+        aio_co_wake(laiocb->co);
     }
 }
 
@@ -273,30 +269,6 @@ static bool qemu_laio_poll_cb(void *opaque)
     return true;
 }
 
-static void laio_cancel(BlockAIOCB *blockacb)
-{
-    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
-    struct io_event event;
-    int ret;
-
-    if (laiocb->ret != -EINPROGRESS) {
-        return;
-    }
-    ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
-    laiocb->ret = -ECANCELED;
-    if (ret != 0) {
-        /* iocb is not cancelled, cb will be called by the event loop later */
-        return;
-    }
-
-    laiocb->common.cb(laiocb->common.opaque, laiocb->ret);
-}
-
-static const AIOCBInfo laio_aiocb_info = {
-    .aiocb_size         = sizeof(struct qemu_laiocb),
-    .cancel_async       = laio_cancel,
-};
-
 static void ioq_init(LaioQueue *io_q)
 {
     QSIMPLEQ_INIT(&io_q->pending);
@@ -431,30 +403,6 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
     return laiocb.ret;
 }
 
-BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
-{
-    struct qemu_laiocb *laiocb;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
-    int ret;
-
-    laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
-    laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE;
-    laiocb->ctx = s;
-    laiocb->ret = -EINPROGRESS;
-    laiocb->is_read = (type == QEMU_AIO_READ);
-    laiocb->qiov = qiov;
-
-    ret = laio_do_submit(fd, laiocb, offset, type);
-    if (ret < 0) {
-        qemu_aio_unref(laiocb);
-        return NULL;
-    }
-
-    return &laiocb->common;
-}
-
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
     aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
diff --git a/block/mirror.c b/block/mirror.c
index ec4bd9f404..f8bdb5b21b 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1543,7 +1543,6 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                                           BDRV_REQ_NO_FALLBACK;
     bs_opaque = g_new0(MirrorBDSOpaque, 1);
     mirror_top_bs->opaque = bs_opaque;
-    bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
 
     /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
      * it alive until block_job_create() succeeds even if bs has no parent. */
@@ -1584,7 +1583,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
      * We can allow anything except resize there.*/
     target_is_backing = bdrv_chain_contains(bs, target);
     target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
-    s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
+    s->target = blk_new(s->common.job.aio_context,
+                        BLK_PERM_WRITE | BLK_PERM_RESIZE |
                         (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
                         BLK_PERM_WRITE_UNCHANGED |
                         (target_is_backing ? BLK_PERM_CONSISTENT_READ |
diff --git a/block/parallels.c b/block/parallels.c
index 2747400577..00fae125d1 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -554,7 +554,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
         return -EIO;
     }
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto out;
diff --git a/block/qcow.c b/block/qcow.c
index 1bb8fd05e2..6dee5bb792 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -844,7 +844,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
         return -EIO;
     }
 
-    qcow_blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    qcow_blk = blk_new(bdrv_get_aio_context(bs),
+                       BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(qcow_blk, bs, errp);
     if (ret < 0) {
         goto exit;
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 8a75366c92..b2487101ed 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -29,7 +29,6 @@
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 
-#include "block/block_int.h"
 #include "qcow2.h"
 
 /* NOTICE: BME here means Bitmaps Extension and used as a namespace for
@@ -754,7 +753,7 @@ static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
         dir_offset = *offset;
     }
 
-    dir = g_try_malloc(dir_size);
+    dir = g_try_malloc0(dir_size);
     if (dir == NULL) {
         return -ENOMEM;
     }
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index df02e7b20a..b33bcbc984 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -23,7 +23,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "block/block_int.h"
 #include "qemu-common.h"
 #include "qcow2.h"
 #include "trace.h"
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index b36f4aa84a..cf892f37a8 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -27,7 +27,6 @@
 
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "block/block_int.h"
 #include "qcow2.h"
 #include "qemu/bswap.h"
 #include "trace.h"
@@ -472,13 +471,12 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
 {
     if (bytes && bs->encrypted) {
         BDRVQcow2State *s = bs->opaque;
-        int64_t offset = (s->crypt_physical_offset ?
-                          (cluster_offset + offset_in_cluster) :
-                          (src_cluster_offset + offset_in_cluster));
         assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
         assert((bytes & ~BDRV_SECTOR_MASK) == 0);
         assert(s->crypto);
-        if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) {
+        if (qcow2_co_encrypt(bs, cluster_offset,
+                             src_cluster_offset + offset_in_cluster,
+                             buffer, bytes) < 0) {
             return false;
         }
     }
@@ -833,7 +831,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
     assert(start->offset + start->nb_bytes <= end->offset);
     assert(!m->data_qiov || m->data_qiov->size == data_bytes);
 
-    if (start->nb_bytes == 0 && end->nb_bytes == 0) {
+    if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) {
         return 0;
     }
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 0b09d6838b..22cadd79d5 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -25,11 +25,11 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "block/block_int.h"
 #include "qcow2.h"
 #include "qemu/range.h"
 #include "qemu/bswap.h"
 #include "qemu/cutils.h"
+#include "trace.h"
 
 static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
                                     uint64_t max);
@@ -738,7 +738,11 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
 
         /* Discard is optional, ignore the return value */
         if (ret >= 0) {
-            bdrv_pdiscard(bs->file, d->offset, d->bytes);
+            int r2 = bdrv_pdiscard(bs->file, d->offset, d->bytes);
+            if (r2 < 0) {
+                trace_qcow2_process_discards_failed_region(d->offset, d->bytes,
+                                                           r2);
+            }
         }
 
         g_free(d);
@@ -3445,3 +3449,35 @@ int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
                             "There are no references in the refcount table.");
     return -EIO;
 }
+
+int qcow2_detect_metadata_preallocation(BlockDriverState *bs)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int64_t i, end_cluster, cluster_count = 0, threshold;
+    int64_t file_length, real_allocation, real_clusters;
+
+    file_length = bdrv_getlength(bs->file->bs);
+    if (file_length < 0) {
+        return file_length;
+    }
+
+    real_allocation = bdrv_get_allocated_file_size(bs->file->bs);
+    if (real_allocation < 0) {
+        return real_allocation;
+    }
+
+    real_clusters = real_allocation / s->cluster_size;
+    threshold = MAX(real_clusters * 10 / 9, real_clusters + 2);
+
+    end_cluster = size_to_clusters(s, file_length);
+    for (i = 0; i < end_cluster && cluster_count < threshold; i++) {
+        uint64_t refcount;
+        int ret = qcow2_get_refcount(bs, i, &refcount);
+        if (ret < 0) {
+            return ret;
+        }
+        cluster_count += !!refcount;
+    }
+
+    return cluster_count >= threshold;
+}
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index a6ffae89a6..d0e7fa9311 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -24,7 +24,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "block/block_int.h"
 #include "qcow2.h"
 #include "qemu/bswap.h"
 #include "qemu/error-report.h"
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
new file mode 100644
index 0000000000..3b1e63fe41
--- /dev/null
+++ b/block/qcow2-threads.c
@@ -0,0 +1,268 @@
+/*
+ * Threaded data processing for Qcow2: compression, encryption
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ * Copyright (c) 2018 Virtuozzo International GmbH. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#define ZLIB_CONST
+#include <zlib.h>
+
+#include "qcow2.h"
+#include "block/thread-pool.h"
+#include "crypto.h"
+
+static int coroutine_fn
+qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg)
+{
+    int ret;
+    BDRVQcow2State *s = bs->opaque;
+    ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+
+    qemu_co_mutex_lock(&s->lock);
+    while (s->nb_threads >= QCOW2_MAX_THREADS) {
+        qemu_co_queue_wait(&s->thread_task_queue, &s->lock);
+    }
+    s->nb_threads++;
+    qemu_co_mutex_unlock(&s->lock);
+
+    ret = thread_pool_submit_co(pool, func, arg);
+
+    qemu_co_mutex_lock(&s->lock);
+    s->nb_threads--;
+    qemu_co_queue_next(&s->thread_task_queue);
+    qemu_co_mutex_unlock(&s->lock);
+
+    return ret;
+}
+
+
+/*
+ * Compression
+ */
+
+typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size,
+                                     const void *src, size_t src_size);
+typedef struct Qcow2CompressData {
+    void *dest;
+    size_t dest_size;
+    const void *src;
+    size_t src_size;
+    ssize_t ret;
+
+    Qcow2CompressFunc func;
+} Qcow2CompressData;
+
+/*
+ * qcow2_compress()
+ *
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: compressed size on success
+ *          -ENOMEM destination buffer is not enough to store compressed data
+ *          -EIO    on any other error
+ */
+static ssize_t qcow2_compress(void *dest, size_t dest_size,
+                              const void *src, size_t src_size)
+{
+    ssize_t ret;
+    z_stream strm;
+
+    /* best compression, small window, no zlib header */
+    memset(&strm, 0, sizeof(strm));
+    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
+                       -12, 9, Z_DEFAULT_STRATEGY);
+    if (ret != Z_OK) {
+        return -EIO;
+    }
+
+    /*
+     * strm.next_in is not const in old zlib versions, such as those used on
+     * OpenBSD/NetBSD, so cast the const away
+     */
+    strm.avail_in = src_size;
+    strm.next_in = (void *) src;
+    strm.avail_out = dest_size;
+    strm.next_out = dest;
+
+    ret = deflate(&strm, Z_FINISH);
+    if (ret == Z_STREAM_END) {
+        ret = dest_size - strm.avail_out;
+    } else {
+        ret = (ret == Z_OK ? -ENOMEM : -EIO);
+    }
+
+    deflateEnd(&strm);
+
+    return ret;
+}
+
+/*
+ * qcow2_decompress()
+ *
+ * Decompress some data (not more than @src_size bytes) to produce exactly
+ * @dest_size bytes.
+ *
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: 0 on success
+ *          -1 on fail
+ */
+static ssize_t qcow2_decompress(void *dest, size_t dest_size,
+                                const void *src, size_t src_size)
+{
+    int ret = 0;
+    z_stream strm;
+
+    memset(&strm, 0, sizeof(strm));
+    strm.avail_in = src_size;
+    strm.next_in = (void *) src;
+    strm.avail_out = dest_size;
+    strm.next_out = dest;
+
+    ret = inflateInit2(&strm, -12);
+    if (ret != Z_OK) {
+        return -1;
+    }
+
+    ret = inflate(&strm, Z_FINISH);
+    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
+        /*
+         * We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
+         * @src buffer may be processed partly (because in qcow2 we know size of
+         * compressed data with precision of one sector)
+         */
+        ret = -1;
+    }
+
+    inflateEnd(&strm);
+
+    return ret;
+}
+
+static int qcow2_compress_pool_func(void *opaque)
+{
+    Qcow2CompressData *data = opaque;
+
+    data->ret = data->func(data->dest, data->dest_size,
+                           data->src, data->src_size);
+
+    return 0;
+}
+
+static ssize_t coroutine_fn
+qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+                     const void *src, size_t src_size, Qcow2CompressFunc func)
+{
+    Qcow2CompressData arg = {
+        .dest = dest,
+        .dest_size = dest_size,
+        .src = src,
+        .src_size = src_size,
+        .func = func,
+    };
+
+    qcow2_co_process(bs, qcow2_compress_pool_func, &arg);
+
+    return arg.ret;
+}
+
+ssize_t coroutine_fn
+qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+                  const void *src, size_t src_size)
+{
+    return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
+                                qcow2_compress);
+}
+
+ssize_t coroutine_fn
+qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
+                    const void *src, size_t src_size)
+{
+    return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
+                                qcow2_decompress);
+}
+
+
+/*
+ * Cryptography
+ */
+
+/*
+ * Qcow2EncDecFunc: common prototype of qcrypto_block_encrypt() and
+ * qcrypto_block_decrypt() functions.
+ */
+typedef int (*Qcow2EncDecFunc)(QCryptoBlock *block, uint64_t offset,
+                               uint8_t *buf, size_t len, Error **errp);
+
+typedef struct Qcow2EncDecData {
+    QCryptoBlock *block;
+    uint64_t offset;
+    uint8_t *buf;
+    size_t len;
+
+    Qcow2EncDecFunc func;
+} Qcow2EncDecData;
+
+static int qcow2_encdec_pool_func(void *opaque)
+{
+    Qcow2EncDecData *data = opaque;
+
+    return data->func(data->block, data->offset, data->buf, data->len, NULL);
+}
+
+static int coroutine_fn
+qcow2_co_encdec(BlockDriverState *bs, uint64_t file_cluster_offset,
+                  uint64_t offset, void *buf, size_t len, Qcow2EncDecFunc func)
+{
+    BDRVQcow2State *s = bs->opaque;
+    Qcow2EncDecData arg = {
+        .block = s->crypto,
+        .offset = s->crypt_physical_offset ?
+                      file_cluster_offset + offset_into_cluster(s, offset) :
+                      offset,
+        .buf = buf,
+        .len = len,
+        .func = func,
+    };
+
+    return qcow2_co_process(bs, qcow2_encdec_pool_func, &arg);
+}
+
+int coroutine_fn
+qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+                 uint64_t offset, void *buf, size_t len)
+{
+    return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len,
+                             qcrypto_block_encrypt);
+}
+
+int coroutine_fn
+qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+                 uint64_t offset, void *buf, size_t len)
+{
+    return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len,
+                             qcrypto_block_decrypt);
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index d39882785d..9396d490d5 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -24,10 +24,6 @@
 
 #include "qemu/osdep.h"
 
-#define ZLIB_CONST
-#include <zlib.h>
-
-#include "block/block_int.h"
 #include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
@@ -44,7 +40,6 @@
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "crypto.h"
-#include "block/thread-pool.h"
 
 /*
   Differences with QCOW:
@@ -302,7 +297,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
             }
             s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
                                            qcow2_crypto_hdr_read_func,
-                                           bs, cflags, 1, errp);
+                                           bs, cflags, QCOW2_MAX_THREADS, errp);
             if (!s->crypto) {
                 return -EINVAL;
             }
@@ -1543,7 +1538,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
                 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
             }
             s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
-                                           NULL, NULL, cflags, 1, errp);
+                                           NULL, NULL, cflags,
+                                           QCOW2_MAX_THREADS, errp);
             if (!s->crypto) {
                 ret = -EINVAL;
                 goto fail;
@@ -1698,7 +1694,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
     }
 #endif
 
-    qemu_co_queue_init(&s->compress_wait_queue);
+    qemu_co_queue_init(&s->thread_task_queue);
 
     return ret;
 
@@ -1899,6 +1895,12 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
     unsigned int bytes;
     int status = 0;
 
+    if (!s->metadata_preallocation_checked) {
+        ret = qcow2_detect_metadata_preallocation(bs);
+        s->metadata_preallocation = (ret == 1);
+        s->metadata_preallocation_checked = true;
+    }
+
     bytes = MIN(INT_MAX, count);
     qemu_co_mutex_lock(&s->lock);
     ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
@@ -1921,6 +1923,11 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
     } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
         status |= BDRV_BLOCK_DATA;
     }
+    if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) &&
+        (status & BDRV_BLOCK_OFFSET_VALID))
+    {
+        status |= BDRV_BLOCK_RECURSE;
+    }
     return status;
 }
 
@@ -1974,8 +1981,6 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
 
     qemu_iovec_init(&hd_qiov, qiov->niov);
 
-    qemu_co_mutex_lock(&s->lock);
-
     while (bytes != 0) {
 
         /* prepare next request */
@@ -1985,7 +1990,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
                             QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
         }
 
+        qemu_co_mutex_lock(&s->lock);
         ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
+        qemu_co_mutex_unlock(&s->lock);
         if (ret < 0) {
             goto fail;
         }
@@ -2000,10 +2007,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
 
             if (bs->backing) {
                 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
-                qemu_co_mutex_unlock(&s->lock);
                 ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
                                      &hd_qiov, 0);
-                qemu_co_mutex_lock(&s->lock);
                 if (ret < 0) {
                     goto fail;
                 }
@@ -2019,11 +2024,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
             break;
 
         case QCOW2_CLUSTER_COMPRESSED:
-            qemu_co_mutex_unlock(&s->lock);
             ret = qcow2_co_preadv_compressed(bs, cluster_offset,
                                              offset, cur_bytes,
                                              &hd_qiov);
-            qemu_co_mutex_lock(&s->lock);
             if (ret < 0) {
                 goto fail;
             }
@@ -2060,11 +2063,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
             }
 
             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-            qemu_co_mutex_unlock(&s->lock);
             ret = bdrv_co_preadv(s->data_file,
                                  cluster_offset + offset_in_cluster,
                                  cur_bytes, &hd_qiov, 0);
-            qemu_co_mutex_lock(&s->lock);
             if (ret < 0) {
                 goto fail;
             }
@@ -2072,13 +2073,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
                 assert(s->crypto);
                 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
                 assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-                if (qcrypto_block_decrypt(s->crypto,
-                                          (s->crypt_physical_offset ?
-                                           cluster_offset + offset_in_cluster :
-                                           offset),
-                                          cluster_data,
-                                          cur_bytes,
-                                          NULL) < 0) {
+                if (qcow2_co_decrypt(bs, cluster_offset, offset,
+                                     cluster_data, cur_bytes) < 0) {
                     ret = -EIO;
                     goto fail;
                 }
@@ -2099,8 +2095,6 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
     ret = 0;
 
 fail:
-    qemu_co_mutex_unlock(&s->lock);
-
     qemu_iovec_destroy(&hd_qiov);
     qemu_vfree(cluster_data);
 
@@ -2120,6 +2114,11 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
             continue;
         }
 
+        /* If COW regions are handled already, skip this too */
+        if (m->skip_cow) {
+            continue;
+        }
+
         /* The data (middle) region must be immediately after the
          * start region */
         if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
@@ -2145,6 +2144,80 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
     return false;
 }
 
+static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    int64_t nr;
+    return !bytes ||
+        (!bdrv_is_allocated_above(bs, NULL, offset, bytes, &nr) && nr == bytes);
+}
+
+static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
+{
+    /*
+     * This check is designed for optimization shortcut so it must be
+     * efficient.
+     * Instead of is_zero(), use is_unallocated() as it is faster (but not
+     * as accurate and can result in false negatives).
+     */
+    return is_unallocated(bs, m->offset + m->cow_start.offset,
+                          m->cow_start.nb_bytes) &&
+           is_unallocated(bs, m->offset + m->cow_end.offset,
+                          m->cow_end.nb_bytes);
+}
+
+static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
+{
+    BDRVQcow2State *s = bs->opaque;
+    QCowL2Meta *m;
+
+    if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
+        return 0;
+    }
+
+    if (bs->encrypted) {
+        return 0;
+    }
+
+    for (m = l2meta; m != NULL; m = m->next) {
+        int ret;
+
+        if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
+            continue;
+        }
+
+        if (!is_zero_cow(bs, m)) {
+            continue;
+        }
+
+        /*
+         * instead of writing zero COW buffers,
+         * efficiently zero out the whole clusters
+         */
+
+        ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset,
+                                            m->nb_clusters * s->cluster_size,
+                                            true);
+        if (ret < 0) {
+            return ret;
+        }
+
+        BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
+        ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset,
+                                    m->nb_clusters * s->cluster_size,
+                                    BDRV_REQ_NO_FALLBACK);
+        if (ret < 0) {
+            if (ret != -ENOTSUP && ret != -EAGAIN) {
+                return ret;
+            }
+            continue;
+        }
+
+        trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
+        m->skip_cow = true;
+    }
+    return 0;
+}
+
 static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                          uint64_t bytes, QEMUIOVector *qiov,
                                          int flags)
@@ -2181,11 +2254,20 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
         ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
                                          &cluster_offset, &l2meta);
         if (ret < 0) {
-            goto fail;
+            goto out_locked;
         }
 
         assert((cluster_offset & 511) == 0);
 
+        ret = qcow2_pre_write_overlap_check(bs, 0,
+                                            cluster_offset + offset_in_cluster,
+                                            cur_bytes, true);
+        if (ret < 0) {
+            goto out_locked;
+        }
+
+        qemu_co_mutex_unlock(&s->lock);
+
         qemu_iovec_reset(&hd_qiov);
         qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
 
@@ -2197,7 +2279,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                                    * s->cluster_size);
                 if (cluster_data == NULL) {
                     ret = -ENOMEM;
-                    goto fail;
+                    goto out_unlocked;
                 }
             }
 
@@ -2205,24 +2287,20 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
             qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
 
-            if (qcrypto_block_encrypt(s->crypto,
-                                      (s->crypt_physical_offset ?
-                                       cluster_offset + offset_in_cluster :
-                                       offset),
-                                      cluster_data,
-                                      cur_bytes, NULL) < 0) {
+            if (qcow2_co_encrypt(bs, cluster_offset, offset,
+                                 cluster_data, cur_bytes) < 0) {
                 ret = -EIO;
-                goto fail;
+                goto out_unlocked;
             }
 
             qemu_iovec_reset(&hd_qiov);
             qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
         }
 
-        ret = qcow2_pre_write_overlap_check(bs, 0,
-                cluster_offset + offset_in_cluster, cur_bytes, true);
+        /* Try to efficiently initialize the physical space with zeroes */
+        ret = handle_alloc_space(bs, l2meta);
         if (ret < 0) {
-            goto fail;
+            goto out_unlocked;
         }
 
         /* If we need to do COW, check if it's possible to merge the
@@ -2230,22 +2308,22 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
          * If it's not possible (or not necessary) then write the
          * guest data now. */
         if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
-            qemu_co_mutex_unlock(&s->lock);
             BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
             trace_qcow2_writev_data(qemu_coroutine_self(),
                                     cluster_offset + offset_in_cluster);
             ret = bdrv_co_pwritev(s->data_file,
                                   cluster_offset + offset_in_cluster,
                                   cur_bytes, &hd_qiov, 0);
-            qemu_co_mutex_lock(&s->lock);
             if (ret < 0) {
-                goto fail;
+                goto out_unlocked;
             }
         }
 
+        qemu_co_mutex_lock(&s->lock);
+
         ret = qcow2_handle_l2meta(bs, &l2meta, true);
         if (ret) {
-            goto fail;
+            goto out_locked;
         }
 
         bytes -= cur_bytes;
@@ -2254,8 +2332,12 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
         trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
     }
     ret = 0;
+    goto out_locked;
 
-fail:
+out_unlocked:
+    qemu_co_mutex_lock(&s->lock);
+
+out_locked:
     qcow2_handle_l2meta(bs, &l2meta, false);
 
     qemu_co_mutex_unlock(&s->lock);
@@ -3092,7 +3174,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     }
 
     /* Create BlockBackend to write to the image */
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto out;
@@ -3921,171 +4004,6 @@ fail:
     return ret;
 }
 
-/*
- * qcow2_compress()
- *
- * @dest - destination buffer, @dest_size bytes
- * @src - source buffer, @src_size bytes
- *
- * Returns: compressed size on success
- *          -ENOMEM destination buffer is not enough to store compressed data
- *          -EIO    on any other error
- */
-static ssize_t qcow2_compress(void *dest, size_t dest_size,
-                              const void *src, size_t src_size)
-{
-    ssize_t ret;
-    z_stream strm;
-
-    /* best compression, small window, no zlib header */
-    memset(&strm, 0, sizeof(strm));
-    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-                       -12, 9, Z_DEFAULT_STRATEGY);
-    if (ret != Z_OK) {
-        return -EIO;
-    }
-
-    /* strm.next_in is not const in old zlib versions, such as those used on
-     * OpenBSD/NetBSD, so cast the const away */
-    strm.avail_in = src_size;
-    strm.next_in = (void *) src;
-    strm.avail_out = dest_size;
-    strm.next_out = dest;
-
-    ret = deflate(&strm, Z_FINISH);
-    if (ret == Z_STREAM_END) {
-        ret = dest_size - strm.avail_out;
-    } else {
-        ret = (ret == Z_OK ? -ENOMEM : -EIO);
-    }
-
-    deflateEnd(&strm);
-
-    return ret;
-}
-
-/*
- * qcow2_decompress()
- *
- * Decompress some data (not more than @src_size bytes) to produce exactly
- * @dest_size bytes.
- *
- * @dest - destination buffer, @dest_size bytes
- * @src - source buffer, @src_size bytes
- *
- * Returns: 0 on success
- *          -1 on fail
- */
-static ssize_t qcow2_decompress(void *dest, size_t dest_size,
-                                const void *src, size_t src_size)
-{
-    int ret = 0;
-    z_stream strm;
-
-    memset(&strm, 0, sizeof(strm));
-    strm.avail_in = src_size;
-    strm.next_in = (void *) src;
-    strm.avail_out = dest_size;
-    strm.next_out = dest;
-
-    ret = inflateInit2(&strm, -12);
-    if (ret != Z_OK) {
-        return -1;
-    }
-
-    ret = inflate(&strm, Z_FINISH);
-    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
-        /* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
-         * @src buffer may be processed partly (because in qcow2 we know size of
-         * compressed data with precision of one sector) */
-        ret = -1;
-    }
-
-    inflateEnd(&strm);
-
-    return ret;
-}
-
-#define MAX_COMPRESS_THREADS 4
-
-typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size,
-                                     const void *src, size_t src_size);
-typedef struct Qcow2CompressData {
-    void *dest;
-    size_t dest_size;
-    const void *src;
-    size_t src_size;
-    ssize_t ret;
-
-    Qcow2CompressFunc func;
-} Qcow2CompressData;
-
-static int qcow2_compress_pool_func(void *opaque)
-{
-    Qcow2CompressData *data = opaque;
-
-    data->ret = data->func(data->dest, data->dest_size,
-                           data->src, data->src_size);
-
-    return 0;
-}
-
-static void qcow2_compress_complete(void *opaque, int ret)
-{
-    qemu_coroutine_enter(opaque);
-}
-
-static ssize_t coroutine_fn
-qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size,
-                     const void *src, size_t src_size, Qcow2CompressFunc func)
-{
-    BDRVQcow2State *s = bs->opaque;
-    BlockAIOCB *acb;
-    ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    Qcow2CompressData arg = {
-        .dest = dest,
-        .dest_size = dest_size,
-        .src = src,
-        .src_size = src_size,
-        .func = func,
-    };
-
-    while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
-        qemu_co_queue_wait(&s->compress_wait_queue, NULL);
-    }
-
-    s->nb_compress_threads++;
-    acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
-                                 qcow2_compress_complete,
-                                 qemu_coroutine_self());
-
-    if (!acb) {
-        s->nb_compress_threads--;
-        return -EINVAL;
-    }
-    qemu_coroutine_yield();
-    s->nb_compress_threads--;
-    qemu_co_queue_next(&s->compress_wait_queue);
-
-    return arg.ret;
-}
-
-static ssize_t coroutine_fn
-qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
-                  const void *src, size_t src_size)
-{
-    return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
-                                qcow2_compress);
-}
-
-static ssize_t coroutine_fn
-qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
-                    const void *src, size_t src_size)
-{
-    return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
-                                qcow2_decompress);
-}
-
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
 static coroutine_fn int
@@ -5089,7 +5007,8 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
     }
 
     if (new_size) {
-        BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+        BlockBackend *blk = blk_new(bdrv_get_aio_context(bs),
+                                    BLK_PERM_RESIZE, BLK_PERM_ALL);
         ret = blk_insert_bs(blk, bs, errp);
         if (ret < 0) {
             blk_unref(blk);
diff --git a/block/qcow2.h b/block/qcow2.h
index 8d92ef1fee..fc1b0d3c1e 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -28,6 +28,7 @@
 #include "crypto/block.h"
 #include "qemu/coroutine.h"
 #include "qemu/units.h"
+#include "block/block_int.h"
 
 //#define DEBUG_ALLOC
 //#define DEBUG_ALLOC2
@@ -267,6 +268,8 @@ typedef struct Qcow2BitmapHeaderExt {
     uint64_t bitmap_directory_offset;
 } QEMU_PACKED Qcow2BitmapHeaderExt;
 
+#define QCOW2_MAX_THREADS 4
+
 typedef struct BDRVQcow2State {
     int cluster_bits;
     int cluster_size;
@@ -349,10 +352,13 @@ typedef struct BDRVQcow2State {
     char *image_backing_format;
     char *image_data_file;
 
-    CoQueue compress_wait_queue;
-    int nb_compress_threads;
+    CoQueue thread_task_queue;
+    int nb_threads;
 
     BdrvChild *data_file;
+
+    bool metadata_preallocation_checked;
+    bool metadata_preallocation;
 } BDRVQcow2State;
 
 typedef struct Qcow2COWRegion {
@@ -402,6 +408,12 @@ typedef struct QCowL2Meta
      */
     Qcow2COWRegion cow_end;
 
+    /*
+     * Indicates that COW regions are already handled and do not require
+     * any more processing.
+     */
+    bool skip_cow;
+
     /**
      * The I/O vector with the data from the actual guest write request.
      * If non-NULL, this is meant to be merged together with the data
@@ -646,6 +658,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
                                 void *cb_opaque, Error **errp);
 int qcow2_shrink_reftable(BlockDriverState *bs);
 int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
+int qcow2_detect_metadata_preallocation(BlockDriverState *bs);
 
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
@@ -737,4 +750,17 @@ void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
                                           const char *name,
                                           Error **errp);
 
+ssize_t coroutine_fn
+qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+                  const void *src, size_t src_size);
+ssize_t coroutine_fn
+qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
+                    const void *src, size_t src_size);
+int coroutine_fn
+qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+                 uint64_t offset, void *buf, size_t len);
+int coroutine_fn
+qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+                 uint64_t offset, void *buf, size_t len);
+
 #endif
diff --git a/block/qed.c b/block/qed.c
index dcdcd62b4a..bb4f5c9863 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -649,7 +649,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
         return -EIO;
     }
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto out;
diff --git a/block/quorum.c b/block/quorum.c
index 352f729136..133ee18204 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1019,7 +1019,6 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
     child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
     if (child == NULL) {
         s->next_child_index--;
-        bdrv_unref(child_bs);
         goto out;
     }
     s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
diff --git a/block/sheepdog.c b/block/sheepdog.c
index cbdfe9ab6e..f76d6ddbbc 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1800,7 +1800,8 @@ static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size,
     void *buf = NULL;
     int ret;
 
-    blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
                   BLK_PERM_ALL);
 
     ret = blk_insert_bs(blk, bs, errp);
diff --git a/block/trace-events b/block/trace-events
index 79ccd8d824..eab51497fc 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -68,6 +68,7 @@ qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
 qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64
 qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
 qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
+qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset 0x%" PRIx64 " nb_clusters %d"
 
 # qcow2-cluster.c
 qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d"
@@ -91,6 +92,9 @@ qcow2_cache_get_done(void *co, int c, int i) "co %p is_l2_cache %d index %d"
 qcow2_cache_flush(void *co, int c) "co %p is_l2_cache %d"
 qcow2_cache_entry_flush(void *co, int c, int i) "co %p is_l2_cache %d index %d"
 
+# qcow2-refcount.c
+qcow2_process_discards_failed_region(uint64_t offset, uint64_t bytes, int ret) "offset 0x%" PRIx64 " bytes 0x%" PRIx64 " ret %d"
+
 # qed-l2-cache.c
 qed_alloc_l2_cache_entry(void *l2_cache, void *entry) "l2_cache %p entry %p"
 qed_unref_l2_cache_entry(void *entry, int ref) "entry %p ref %d"
diff --git a/block/vdi.c b/block/vdi.c
index d7ef6628e7..b9845a4cbd 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -803,7 +803,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
         goto exit;
     }
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs_file),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs_file, errp);
     if (ret < 0) {
         goto exit;
diff --git a/block/vhdx.c b/block/vhdx.c
index a143a57657..d6070b6fa8 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1900,7 +1900,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
         return -EIO;
     }
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto delete_and_exit;
diff --git a/block/vmdk.c b/block/vmdk.c
index de8cb859f8..51067c774f 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2356,7 +2356,8 @@ static BlockBackend *vmdk_co_create_cb(int64_t size, int idx,
     if (!bs) {
         return NULL;
     }
-    blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
                   BLK_PERM_ALL);
     if (blk_insert_bs(blk, bs, errp)) {
         bdrv_unref(bs);
diff --git a/block/vpc.c b/block/vpc.c
index 0c279b87c8..d4776ee8a5 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -1011,7 +1011,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
         return -EIO;
     }
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto out;
diff --git a/blockdev.c b/blockdev.c
index 79fbac8450..3f44b891eb 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -574,7 +574,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     if ((!file || !*file) && !qdict_size(bs_opts)) {
         BlockBackendRootState *blk_rs;
 
-        blk = blk_new(0, BLK_PERM_ALL);
+        blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
         blk_rs = blk_get_root_state(blk);
         blk_rs->open_flags    = bdrv_flags;
         blk_rs->read_only     = read_only;
@@ -1535,6 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common,
                              DO_UPCAST(ExternalSnapshotState, common, common);
     TransactionAction *action = common->action;
     AioContext *aio_context;
+    int ret;
 
     /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
      * purpose but a different set of parameters */
@@ -1674,7 +1675,10 @@ static void external_snapshot_prepare(BlkActionState *common,
         goto out;
     }
 
-    bdrv_set_aio_context(state->new_bs, aio_context);
+    ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp);
+    if (ret < 0) {
+        goto out;
+    }
 
     /* This removes our old bs and adds the new bs. This is an operation that
      * can fail, so we need to do it in .prepare; undoing it for abort is
@@ -2112,11 +2116,10 @@ static void block_dirty_bitmap_disable_abort(BlkActionState *common)
     }
 }
 
-static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(const char *node,
-                                                    const char *target,
-                                                    strList *bitmaps,
-                                                    HBitmap **backup,
-                                                    Error **errp);
+static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(
+        const char *node, const char *target,
+        BlockDirtyBitmapMergeSourceList *bitmaps,
+        HBitmap **backup, Error **errp);
 
 static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
                                              Error **errp)
@@ -2965,15 +2968,14 @@ void qmp_block_dirty_bitmap_disable(const char *node, const char *name,
     bdrv_disable_dirty_bitmap(bitmap);
 }
 
-static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(const char *node,
-                                                    const char *target,
-                                                    strList *bitmaps,
-                                                    HBitmap **backup,
-                                                    Error **errp)
+static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(
+        const char *node, const char *target,
+        BlockDirtyBitmapMergeSourceList *bitmaps,
+        HBitmap **backup, Error **errp)
 {
     BlockDriverState *bs;
     BdrvDirtyBitmap *dst, *src, *anon;
-    strList *lst;
+    BlockDirtyBitmapMergeSourceList *lst;
     Error *local_err = NULL;
 
     dst = block_dirty_bitmap_lookup(node, target, &bs, errp);
@@ -2988,11 +2990,28 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(const char *node,
     }
 
     for (lst = bitmaps; lst; lst = lst->next) {
-        src = bdrv_find_dirty_bitmap(bs, lst->value);
-        if (!src) {
-            error_setg(errp, "Dirty bitmap '%s' not found", lst->value);
-            dst = NULL;
-            goto out;
+        switch (lst->value->type) {
+            const char *name, *node;
+        case QTYPE_QSTRING:
+            name = lst->value->u.local;
+            src = bdrv_find_dirty_bitmap(bs, name);
+            if (!src) {
+                error_setg(errp, "Dirty bitmap '%s' not found", name);
+                dst = NULL;
+                goto out;
+            }
+            break;
+        case QTYPE_QDICT:
+            node = lst->value->u.external.node;
+            name = lst->value->u.external.name;
+            src = block_dirty_bitmap_lookup(node, name, NULL, errp);
+            if (!src) {
+                dst = NULL;
+                goto out;
+            }
+            break;
+        default:
+            abort();
         }
 
         bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err);
@@ -3012,7 +3031,8 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(const char *node,
 }
 
 void qmp_block_dirty_bitmap_merge(const char *node, const char *target,
-                                  strList *bitmaps, Error **errp)
+                                  BlockDirtyBitmapMergeSourceList *bitmaps,
+                                  Error **errp)
 {
     do_block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp);
 }
@@ -3138,7 +3158,7 @@ void qmp_block_resize(bool has_device, const char *device,
         goto out;
     }
 
-    blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs), BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto out;
@@ -3424,6 +3444,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
     int flags, job_flags = JOB_DEFAULT;
     int64_t size;
     bool set_backing_hd = false;
+    int ret;
 
     if (!backup->has_speed) {
         backup->speed = 0;
@@ -3450,11 +3471,16 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
         backup->compress = false;
     }
 
-    bs = qmp_get_root_bs(backup->device, errp);
+    bs = bdrv_lookup_bs(backup->device, backup->device, errp);
     if (!bs) {
         return NULL;
     }
 
+    if (!bs->drv) {
+        error_setg(errp, "Device has no medium");
+        return NULL;
+    }
+
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
@@ -3520,13 +3546,16 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
         goto out;
     }
 
-    bdrv_set_aio_context(target_bs, aio_context);
+    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    if (ret < 0) {
+        bdrv_unref(target_bs);
+        goto out;
+    }
 
     if (set_backing_hd) {
         bdrv_set_backing_hd(target_bs, source, &local_err);
         if (local_err) {
-            bdrv_unref(target_bs);
-            goto out;
+            goto unref;
         }
     }
 
@@ -3534,11 +3563,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
         bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
         if (!bmap) {
             error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
-            bdrv_unref(target_bs);
-            goto out;
+            goto unref;
         }
         if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) {
-            goto out;
+            goto unref;
         }
     }
     if (!backup->auto_finalize) {
@@ -3552,12 +3580,13 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
                             backup->sync, bmap, backup->compress,
                             backup->on_source_error, backup->on_target_error,
                             job_flags, NULL, NULL, txn, &local_err);
-    bdrv_unref(target_bs);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
-        goto out;
+        goto unref;
     }
 
+unref:
+    bdrv_unref(target_bs);
 out:
     aio_context_release(aio_context);
     return job;
@@ -3593,6 +3622,7 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn,
     AioContext *aio_context;
     BlockJob *job = NULL;
     int job_flags = JOB_DEFAULT;
+    int ret;
 
     if (!backup->has_speed) {
         backup->speed = 0;
@@ -3629,16 +3659,9 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn,
         goto out;
     }
 
-    if (bdrv_get_aio_context(target_bs) != aio_context) {
-        if (!bdrv_has_blk(target_bs)) {
-            /* The target BDS is not attached, we can safely move it to another
-             * AioContext. */
-            bdrv_set_aio_context(target_bs, aio_context);
-        } else {
-            error_setg(errp, "Target is attached to a different thread from "
-                             "source.");
-            goto out;
-        }
+    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    if (ret < 0) {
+        goto out;
     }
 
     if (backup->has_bitmap) {
@@ -3811,6 +3834,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     int flags;
     int64_t size;
     const char *format = arg->format;
+    int ret;
 
     bs = qmp_get_root_bs(arg->device, errp);
     if (!bs) {
@@ -3911,7 +3935,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
         goto out;
     }
 
-    bdrv_set_aio_context(target_bs, aio_context);
+    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    if (ret < 0) {
+        bdrv_unref(target_bs);
+        goto out;
+    }
 
     blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
                            arg->has_replaces, arg->replaces, arg->sync,
@@ -3955,6 +3983,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
     AioContext *aio_context;
     BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
     Error *local_err = NULL;
+    int ret;
 
     bs = qmp_get_root_bs(device, errp);
     if (!bs) {
@@ -3969,7 +3998,10 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    bdrv_set_aio_context(target_bs, aio_context);
+    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    if (ret < 0) {
+        goto out;
+    }
 
     blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
                            has_replaces, replaces, sync, backing_mode,
@@ -3985,7 +4017,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                            has_auto_dismiss, auto_dismiss,
                            &local_err);
     error_propagate(errp, local_err);
-
+out:
     aio_context_release(aio_context);
 }
 
@@ -4475,7 +4507,7 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
     old_context = bdrv_get_aio_context(bs);
     aio_context_acquire(old_context);
 
-    bdrv_set_aio_context(bs, new_context);
+    bdrv_try_set_aio_context(bs, new_context, errp);
 
     aio_context_release(old_context);
 }
diff --git a/blockjob.c b/blockjob.c
index 9ca942ba01..931d675c0c 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -204,14 +204,20 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
 {
     BdrvChild *c;
 
-    c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
-                               job, errp);
+    bdrv_ref(bs);
+    if (job->job.aio_context != qemu_get_aio_context()) {
+        aio_context_release(job->job.aio_context);
+    }
+    c = bdrv_root_attach_child(bs, name, &child_job, job->job.aio_context,
+                               perm, shared_perm, job, errp);
+    if (job->job.aio_context != qemu_get_aio_context()) {
+        aio_context_acquire(job->job.aio_context);
+    }
     if (c == NULL) {
         return -EPERM;
     }
 
     job->nodes = g_slist_prepend(job->nodes, c);
-    bdrv_ref(bs);
     bdrv_op_block_all(bs, job->blocker);
 
     return 0;
@@ -392,7 +398,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
         job_id = bdrv_get_device_name(bs);
     }
 
-    blk = blk_new(perm, shared_perm);
+    blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         blk_unref(blk);
diff --git a/configure b/configure
index 528b9ff705..b091b82cb3 100755
--- a/configure
+++ b/configure
@@ -198,7 +198,7 @@ supported_kvm_target() {
         i386:i386 | i386:x86_64 | i386:x32 | \
         x86_64:i386 | x86_64:x86_64 | x86_64:x32 | \
         mips:mips | mipsel:mips | \
-        ppc:ppc | ppc64:ppc | ppc:ppc64 | ppc64:ppc64 | \
+        ppc:ppc | ppc64:ppc | ppc:ppc64 | ppc64:ppc64 | ppc64:ppc64le | \
         s390x:s390x)
             return 0
         ;;
@@ -502,8 +502,11 @@ cross_cc_arm="arm-linux-gnueabihf-gcc"
 cross_cc_cflags_armeb="-mbig-endian"
 cross_cc_i386="i386-pc-linux-gnu-gcc"
 cross_cc_cflags_i386=""
-cross_cc_powerpc="powerpc-linux-gnu-gcc"
-cross_cc_powerpc="powerpc-linux-gnu-gcc"
+cross_cc_ppc="powerpc-linux-gnu-gcc"
+cross_cc_cflags_ppc="-m32"
+cross_cc_ppc64="powerpc-linux-gnu-gcc"
+cross_cc_cflags_ppc64="-m64"
+cross_cc_ppc64le="powerpc64le-linux-gnu-gcc"
 
 enabled_cross_compilers=""
 
@@ -700,7 +703,11 @@ elif check_define __sparc__ ; then
   fi
 elif check_define _ARCH_PPC ; then
   if check_define _ARCH_PPC64 ; then
-    cpu="ppc64"
+    if check_define _LITTLE_ENDIAN ; then
+      cpu="ppc64le"
+    else
+      cpu="ppc64"
+    fi
   else
     cpu="ppc"
   fi
@@ -731,10 +738,14 @@ ARCH=
 # Note that this case should only have supported host CPUs, not guests.
 case "$cpu" in
   ppc|ppc64|s390|s390x|sparc64|x32|riscv32|riscv64)
-    cpu="$cpu"
     supported_cpu="yes"
     eval "cross_cc_${cpu}=\$host_cc"
   ;;
+  ppc64le)
+    ARCH="ppc64"
+    supported_cpu="yes"
+    cross_cc_ppc64le=$host_cc
+  ;;
   i386|i486|i586|i686|i86pc|BePC)
     cpu="i386"
     supported_cpu="yes"
@@ -1538,44 +1549,44 @@ case "$cpu" in
     ppc)
            CPU_CFLAGS="-m32"
            LDFLAGS="-m32 $LDFLAGS"
-           cross_cc_powerpc=$cc
-           cross_cc_cflags_powerpc=$CPU_CFLAGS
+           cross_cc_ppc=$cc
+           cross_cc_cflags_ppc="$CPU_CFLAGS"
            ;;
     ppc64)
            CPU_CFLAGS="-m64"
            LDFLAGS="-m64 $LDFLAGS"
            cross_cc_ppc64=$cc
-           cross_cc_cflags_ppc64=$CPU_CFLAGS
+           cross_cc_cflags_ppc64="$CPU_CFLAGS"
            ;;
     sparc)
            CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc"
            LDFLAGS="-m32 -mv8plus $LDFLAGS"
            cross_cc_sparc=$cc
-           cross_cc_cflags_sparc=$CPU_CFLAGS
+           cross_cc_cflags_sparc="$CPU_CFLAGS"
            ;;
     sparc64)
            CPU_CFLAGS="-m64 -mcpu=ultrasparc"
            LDFLAGS="-m64 $LDFLAGS"
            cross_cc_sparc64=$cc
-           cross_cc_cflags_sparc64=$CPU_CFLAGS
+           cross_cc_cflags_sparc64="$CPU_CFLAGS"
            ;;
     s390)
            CPU_CFLAGS="-m31"
            LDFLAGS="-m31 $LDFLAGS"
            cross_cc_s390=$cc
-           cross_cc_cflags_s390=$CPU_CFLAGS
+           cross_cc_cflags_s390="$CPU_CFLAGS"
            ;;
     s390x)
            CPU_CFLAGS="-m64"
            LDFLAGS="-m64 $LDFLAGS"
            cross_cc_s390x=$cc
-           cross_cc_cflags_s390x=$CPU_CFLAGS
+           cross_cc_cflags_s390x="$CPU_CFLAGS"
            ;;
     i386)
            CPU_CFLAGS="-m32"
            LDFLAGS="-m32 $LDFLAGS"
            cross_cc_i386=$cc
-           cross_cc_cflags_i386=$CPU_CFLAGS
+           cross_cc_cflags_i386="$CPU_CFLAGS"
            ;;
     x86_64)
            # ??? Only extremely old AMD cpus do not have cmpxchg16b.
@@ -1584,13 +1595,13 @@ case "$cpu" in
            CPU_CFLAGS="-m64 -mcx16"
            LDFLAGS="-m64 $LDFLAGS"
            cross_cc_x86_64=$cc
-           cross_cc_cflags_x86_64=$CPU_CFLAGS
+           cross_cc_cflags_x86_64="$CPU_CFLAGS"
            ;;
     x32)
            CPU_CFLAGS="-mx32"
            LDFLAGS="-mx32 $LDFLAGS"
            cross_cc_i386=$cc
-           cross_cc_cflags_i386=$CPU_CFLAGS
+           cross_cc_cflags_i386="$CPU_CFLAGS"
            ;;
     # No special flags required for other host CPUs
 esac
@@ -4099,6 +4110,13 @@ libs_softmmu="$libs_softmmu $fdt_libs"
 ##########################################
 # opengl probe (for sdl2, gtk, milkymist-tmu2)
 
+gbm="no"
+if $pkg_config gbm; then
+    gbm_cflags="$($pkg_config --cflags gbm)"
+    gbm_libs="$($pkg_config --libs gbm)"
+    gbm="yes"
+fi
+
 if test "$opengl" != "no" ; then
   opengl_pkgs="epoxy gbm"
   if $pkg_config $opengl_pkgs; then
@@ -6191,7 +6209,7 @@ if { test "$cpu" = "i386" || test "$cpu" = "x86_64"; } && \
         fi
     done
 fi
-if test "$cpu" = "ppc64" && test "$targetos" != "Darwin" ; then
+if test "$ARCH" = "ppc64" && test "$targetos" != "Darwin" ; then
   roms="$roms spapr-rtas"
 fi
 
@@ -6434,8 +6452,6 @@ echo "gcov              $gcov_tool"
 echo "gcov enabled      $gcov"
 echo "TPM support       $tpm"
 echo "libssh2 support   $libssh2"
-echo "TPM passthrough   $tpm_passthrough"
-echo "TPM emulator      $tpm_emulator"
 echo "QOM debugging     $qom_cast_debug"
 echo "Live block migration $live_block_migration"
 echo "lzo support       $lzo"
@@ -6964,6 +6980,13 @@ if test "$opengl" = "yes" ; then
   fi
 fi
 
+if test "$gbm" = "yes" ; then
+    echo "CONFIG_GBM=y" >> $config_host_mak
+    echo "GBM_LIBS=$gbm_libs" >> $config_host_mak
+    echo "GBM_CFLAGS=$gbm_cflags" >> $config_host_mak
+fi
+
+
 if test "$malloc_trim" = "yes" ; then
   echo "CONFIG_MALLOC_TRIM=y" >> $config_host_mak
 fi
@@ -7133,14 +7156,6 @@ fi
 
 if test "$tpm" = "yes"; then
   echo 'CONFIG_TPM=$(CONFIG_SOFTMMU)' >> $config_host_mak
-  # TPM passthrough support?
-  if test "$tpm_passthrough" = "yes"; then
-    echo "CONFIG_TPM_PASSTHROUGH=y" >> $config_host_mak
-  fi
-  # TPM emulator support?
-  if test "$tpm_emulator" = "yes"; then
-    echo "CONFIG_TPM_EMULATOR=y" >> $config_host_mak
-  fi
 fi
 
 echo "TRACE_BACKENDS=$trace_backends" >> $config_host_mak
@@ -7378,7 +7393,7 @@ if test "$linux" = "yes" ; then
   i386|x86_64|x32)
     linux_arch=x86
     ;;
-  ppc|ppc64)
+  ppc|ppc64|ppc64le)
     linux_arch=powerpc
     ;;
   s390x)
@@ -7539,7 +7554,8 @@ case "$target_name" in
   ;;
   ppc)
     gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_compiler=$cross_cc_powerpc
+    target_compiler=$cross_cc_ppc
+    target_compiler_cflags="$cross_cc_cflags_ppc"
   ;;
   ppc64)
     TARGET_BASE_ARCH=ppc
@@ -7547,6 +7563,7 @@ case "$target_name" in
     mttcg=yes
     gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml"
     target_compiler=$cross_cc_ppc64
+    target_compiler_cflags="$cross_cc_cflags_ppc64"
   ;;
   ppc64le)
     TARGET_ARCH=ppc64
@@ -7859,6 +7876,9 @@ echo "QEMU_CFLAGS+=$cflags" >> $config_target_mak
 
 done # for target in $targets
 
+echo "PIXMAN_CFLAGS=$pixman_cflags" >> $config_host_mak
+echo "PIXMAN_LIBS=$pixman_libs" >> $config_host_mak
+
 if test -n "$enabled_cross_compilers"; then
     echo
     echo "NOTE: cross-compilers enabled: $enabled_cross_compilers"
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index c56f2dfe44..443b7e08c3 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -130,6 +130,7 @@ vu_request_to_string(unsigned int req)
         REQ(VHOST_USER_POSTCOPY_END),
         REQ(VHOST_USER_GET_INFLIGHT_FD),
         REQ(VHOST_USER_SET_INFLIGHT_FD),
+        REQ(VHOST_USER_GPU_SET_SOCKET),
         REQ(VHOST_USER_MAX),
     };
 #undef REQ
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
index 78b33306e8..3b888ff0a5 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -94,6 +94,7 @@ typedef enum VhostUserRequest {
     VHOST_USER_POSTCOPY_END     = 30,
     VHOST_USER_GET_INFLIGHT_FD = 31,
     VHOST_USER_SET_INFLIGHT_FD = 32,
+    VHOST_USER_GPU_SET_SOCKET = 33,
     VHOST_USER_MAX
 } VhostUserRequest;
 
diff --git a/contrib/vhost-user-gpu/50-qemu-gpu.json.in b/contrib/vhost-user-gpu/50-qemu-gpu.json.in
new file mode 100644
index 0000000000..658b545864
--- /dev/null
+++ b/contrib/vhost-user-gpu/50-qemu-gpu.json.in
@@ -0,0 +1,5 @@
+{
+  "description": "QEMU vhost-user-gpu",
+  "type": "gpu",
+  "binary": "@libexecdir@/vhost-user-gpu",
+}
diff --git a/contrib/vhost-user-gpu/Makefile.objs b/contrib/vhost-user-gpu/Makefile.objs
new file mode 100644
index 0000000000..6170c919e4
--- /dev/null
+++ b/contrib/vhost-user-gpu/Makefile.objs
@@ -0,0 +1,10 @@
+vhost-user-gpu-obj-y = main.o virgl.o vugbm.o
+
+main.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS)
+main.o-libs := $(PIXMAN_LIBS)
+
+virgl.o-cflags := $(VIRGL_CFLAGS) $(GBM_CFLAGS)
+virgl.o-libs := $(VIRGL_LIBS)
+
+vugbm.o-cflags := $(GBM_CFLAGS)
+vugbm.o-libs := $(GBM_LIBS)
diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c
new file mode 100644
index 0000000000..f9e2146b69
--- /dev/null
+++ b/contrib/vhost-user-gpu/main.c
@@ -0,0 +1,1185 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *     Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/drm.h"
+#include "qapi/error.h"
+#include "qemu/sockets.h"
+
+#include <pixman.h>
+#include <glib-unix.h>
+
+#include "vugpu.h"
+#include "hw/virtio/virtio-gpu-bswap.h"
+#include "hw/virtio/virtio-gpu-pixman.h"
+#include "virgl.h"
+#include "vugbm.h"
+
+struct virtio_gpu_simple_resource {
+    uint32_t resource_id;
+    uint32_t width;
+    uint32_t height;
+    uint32_t format;
+    struct iovec *iov;
+    unsigned int iov_cnt;
+    uint32_t scanout_bitmask;
+    pixman_image_t *image;
+    struct vugbm_buffer buffer;
+    QTAILQ_ENTRY(virtio_gpu_simple_resource) next;
+};
+
+static gboolean opt_print_caps;
+static int opt_fdnum = -1;
+static char *opt_socket_path;
+static char *opt_render_node;
+static gboolean opt_virgl;
+
+static void vg_handle_ctrl(VuDev *dev, int qidx);
+
+static const char *
+vg_cmd_to_string(int cmd)
+{
+#define CMD(cmd) [cmd] = #cmd
+    static const char *vg_cmd_str[] = {
+        CMD(VIRTIO_GPU_UNDEFINED),
+
+        /* 2d commands */
+        CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF),
+        CMD(VIRTIO_GPU_CMD_SET_SCANOUT),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH),
+        CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING),
+        CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO),
+        CMD(VIRTIO_GPU_CMD_GET_CAPSET),
+
+        /* 3d commands */
+        CMD(VIRTIO_GPU_CMD_CTX_CREATE),
+        CMD(VIRTIO_GPU_CMD_CTX_DESTROY),
+        CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE),
+        CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D),
+        CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D),
+        CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D),
+        CMD(VIRTIO_GPU_CMD_SUBMIT_3D),
+
+        /* cursor commands */
+        CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR),
+        CMD(VIRTIO_GPU_CMD_MOVE_CURSOR),
+    };
+#undef REQ
+
+    if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) {
+        return vg_cmd_str[cmd];
+    } else {
+        return "unknown";
+    }
+}
+
+static int
+vg_sock_fd_read(int sock, void *buf, ssize_t buflen)
+{
+    int ret;
+
+    do {
+        ret = read(sock, buf, buflen);
+    } while (ret < 0 && (errno == EINTR || errno == EAGAIN));
+
+    g_warn_if_fail(ret == buflen);
+    return ret;
+}
+
+static void
+vg_sock_fd_close(VuGpu *g)
+{
+    if (g->sock_fd >= 0) {
+        close(g->sock_fd);
+        g->sock_fd = -1;
+    }
+}
+
+static gboolean
+source_wait_cb(gint fd, GIOCondition condition, gpointer user_data)
+{
+    VuGpu *g = user_data;
+
+    if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) {
+        return G_SOURCE_CONTINUE;
+    }
+
+    /* resume */
+    g->wait_ok = 0;
+    vg_handle_ctrl(&g->dev.parent, 0);
+
+    return G_SOURCE_REMOVE;
+}
+
+void
+vg_wait_ok(VuGpu *g)
+{
+    assert(g->wait_ok == 0);
+    g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP,
+                               source_wait_cb, g);
+}
+
+static int
+vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd)
+{
+    ssize_t ret;
+    struct msghdr msg;
+    struct iovec iov;
+    union {
+        struct cmsghdr cmsghdr;
+        char control[CMSG_SPACE(sizeof(int))];
+    } cmsgu;
+    struct cmsghdr *cmsg;
+
+    iov.iov_base = (void *)buf;
+    iov.iov_len = buflen;
+
+    msg.msg_name = NULL;
+    msg.msg_namelen = 0;
+    msg.msg_iov = &iov;
+    msg.msg_iovlen = 1;
+
+    if (fd != -1) {
+        msg.msg_control = cmsgu.control;
+        msg.msg_controllen = sizeof(cmsgu.control);
+
+        cmsg = CMSG_FIRSTHDR(&msg);
+        cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+
+        *((int *)CMSG_DATA(cmsg)) = fd;
+    } else {
+        msg.msg_control = NULL;
+        msg.msg_controllen = 0;
+    }
+
+    do {
+        ret = sendmsg(sock, &msg, 0);
+    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+
+    g_warn_if_fail(ret == buflen);
+    return ret;
+}
+
+void
+vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd)
+{
+    if (vg_sock_fd_write(vg->sock_fd, msg,
+                         VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) {
+        vg_sock_fd_close(vg);
+    }
+}
+
+bool
+vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size,
+            gpointer payload)
+{
+    uint32_t req, flags, size;
+
+    if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 ||
+        vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 ||
+        vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) {
+        goto err;
+    }
+
+    g_return_val_if_fail(req == expect_req, false);
+    g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false);
+    g_return_val_if_fail(size == expect_size, false);
+
+    if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) {
+        goto err;
+    }
+
+    return true;
+
+err:
+    vg_sock_fd_close(g);
+    return false;
+}
+
+static struct virtio_gpu_simple_resource *
+virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id)
+{
+    struct virtio_gpu_simple_resource *res;
+
+    QTAILQ_FOREACH(res, &g->reslist, next) {
+        if (res->resource_id == resource_id) {
+            return res;
+        }
+    }
+    return NULL;
+}
+
+void
+vg_ctrl_response(VuGpu *g,
+                 struct virtio_gpu_ctrl_command *cmd,
+                 struct virtio_gpu_ctrl_hdr *resp,
+                 size_t resp_len)
+{
+    size_t s;
+
+    if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) {
+        resp->flags |= VIRTIO_GPU_FLAG_FENCE;
+        resp->fence_id = cmd->cmd_hdr.fence_id;
+        resp->ctx_id = cmd->cmd_hdr.ctx_id;
+    }
+    virtio_gpu_ctrl_hdr_bswap(resp);
+    s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len);
+    if (s != resp_len) {
+        g_critical("%s: response size incorrect %zu vs %zu",
+                   __func__, s, resp_len);
+    }
+    vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s);
+    vu_queue_notify(&g->dev.parent, cmd->vq);
+    cmd->finished = true;
+}
+
+void
+vg_ctrl_response_nodata(VuGpu *g,
+                        struct virtio_gpu_ctrl_command *cmd,
+                        enum virtio_gpu_ctrl_type type)
+{
+    struct virtio_gpu_ctrl_hdr resp = {
+        .type = type,
+    };
+
+    vg_ctrl_response(g, cmd, &resp, sizeof(resp));
+}
+
+void
+vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resp_display_info dpy_info = { {} };
+    VhostUserGpuMsg msg = {
+        .request = VHOST_USER_GPU_GET_DISPLAY_INFO,
+        .size = 0,
+    };
+
+    assert(vg->wait_ok == 0);
+
+    vg_send_msg(vg, &msg, -1);
+    if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) {
+        return;
+    }
+
+    vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info));
+}
+
+static void
+vg_resource_create_2d(VuGpu *g,
+                      struct virtio_gpu_ctrl_command *cmd)
+{
+    pixman_format_code_t pformat;
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_create_2d c2d;
+
+    VUGPU_FILL_CMD(c2d);
+    virtio_gpu_bswap_32(&c2d, sizeof(c2d));
+
+    if (c2d.resource_id == 0) {
+        g_critical("%s: resource id 0 is not allowed", __func__);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    res = virtio_gpu_find_resource(g, c2d.resource_id);
+    if (res) {
+        g_critical("%s: resource already exists %d", __func__, c2d.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    res = g_new0(struct virtio_gpu_simple_resource, 1);
+    res->width = c2d.width;
+    res->height = c2d.height;
+    res->format = c2d.format;
+    res->resource_id = c2d.resource_id;
+
+    pformat = virtio_gpu_get_pixman_format(c2d.format);
+    if (!pformat) {
+        g_critical("%s: host couldn't handle guest format %d",
+                   __func__, c2d.format);
+        g_free(res);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+    vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height);
+    res->image = pixman_image_create_bits(pformat,
+                                          c2d.width,
+                                          c2d.height,
+                                          (uint32_t *)res->buffer.mmap,
+                                          res->buffer.stride);
+    if (!res->image) {
+        g_critical("%s: resource creation failed %d %d %d",
+                   __func__, c2d.resource_id, c2d.width, c2d.height);
+        g_free(res);
+        cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY;
+        return;
+    }
+
+    QTAILQ_INSERT_HEAD(&g->reslist, res, next);
+}
+
+static void
+vg_disable_scanout(VuGpu *g, int scanout_id)
+{
+    struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id];
+    struct virtio_gpu_simple_resource *res;
+
+    if (scanout->resource_id == 0) {
+        return;
+    }
+
+    res = virtio_gpu_find_resource(g, scanout->resource_id);
+    if (res) {
+        res->scanout_bitmask &= ~(1 << scanout_id);
+    }
+
+    scanout->width = 0;
+    scanout->height = 0;
+
+    {
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_SCANOUT,
+            .size = sizeof(VhostUserGpuScanout),
+            .payload.scanout.scanout_id = scanout_id,
+        };
+        vg_send_msg(g, &msg, -1);
+    }
+}
+
+static void
+vg_resource_destroy(VuGpu *g,
+                    struct virtio_gpu_simple_resource *res)
+{
+    int i;
+
+    if (res->scanout_bitmask) {
+        for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) {
+            if (res->scanout_bitmask & (1 << i)) {
+                vg_disable_scanout(g, i);
+            }
+        }
+    }
+
+    vugbm_buffer_destroy(&res->buffer);
+    pixman_image_unref(res->image);
+    QTAILQ_REMOVE(&g->reslist, res, next);
+    g_free(res);
+}
+
+static void
+vg_resource_unref(VuGpu *g,
+                  struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_unref unref;
+
+    VUGPU_FILL_CMD(unref);
+    virtio_gpu_bswap_32(&unref, sizeof(unref));
+
+    res = virtio_gpu_find_resource(g, unref.resource_id);
+    if (!res) {
+        g_critical("%s: illegal resource specified %d",
+                   __func__, unref.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+    vg_resource_destroy(g, res);
+}
+
+int
+vg_create_mapping_iov(VuGpu *g,
+                      struct virtio_gpu_resource_attach_backing *ab,
+                      struct virtio_gpu_ctrl_command *cmd,
+                      struct iovec **iov)
+{
+    struct virtio_gpu_mem_entry *ents;
+    size_t esize, s;
+    int i;
+
+    if (ab->nr_entries > 16384) {
+        g_critical("%s: nr_entries is too big (%d > 16384)",
+                   __func__, ab->nr_entries);
+        return -1;
+    }
+
+    esize = sizeof(*ents) * ab->nr_entries;
+    ents = g_malloc(esize);
+    s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+                   sizeof(*ab), ents, esize);
+    if (s != esize) {
+        g_critical("%s: command data size incorrect %zu vs %zu",
+                   __func__, s, esize);
+        g_free(ents);
+        return -1;
+    }
+
+    *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries);
+    for (i = 0; i < ab->nr_entries; i++) {
+        uint64_t len = ents[i].length;
+        (*iov)[i].iov_len = ents[i].length;
+        (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr);
+        if (!(*iov)[i].iov_base || len != ents[i].length) {
+            g_critical("%s: resource %d element %d",
+                       __func__, ab->resource_id, i);
+            g_free(*iov);
+            g_free(ents);
+            *iov = NULL;
+            return -1;
+        }
+    }
+    g_free(ents);
+    return 0;
+}
+
+static void
+vg_resource_attach_backing(VuGpu *g,
+                           struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_attach_backing ab;
+    int ret;
+
+    VUGPU_FILL_CMD(ab);
+    virtio_gpu_bswap_32(&ab, sizeof(ab));
+
+    res = virtio_gpu_find_resource(g, ab.resource_id);
+    if (!res) {
+        g_critical("%s: illegal resource specified %d",
+                   __func__, ab.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov);
+    if (ret != 0) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        return;
+    }
+
+    res->iov_cnt = ab.nr_entries;
+}
+
+static void
+vg_resource_detach_backing(VuGpu *g,
+                           struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_detach_backing detach;
+
+    VUGPU_FILL_CMD(detach);
+    virtio_gpu_bswap_32(&detach, sizeof(detach));
+
+    res = virtio_gpu_find_resource(g, detach.resource_id);
+    if (!res || !res->iov) {
+        g_critical("%s: illegal resource specified %d",
+                   __func__, detach.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    g_free(res->iov);
+    res->iov = NULL;
+    res->iov_cnt = 0;
+}
+
+static void
+vg_transfer_to_host_2d(VuGpu *g,
+                       struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    int h;
+    uint32_t src_offset, dst_offset, stride;
+    int bpp;
+    pixman_format_code_t format;
+    struct virtio_gpu_transfer_to_host_2d t2d;
+
+    VUGPU_FILL_CMD(t2d);
+    virtio_gpu_t2d_bswap(&t2d);
+
+    res = virtio_gpu_find_resource(g, t2d.resource_id);
+    if (!res || !res->iov) {
+        g_critical("%s: illegal resource specified %d",
+                   __func__, t2d.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    if (t2d.r.x > res->width ||
+        t2d.r.y > res->height ||
+        t2d.r.width > res->width ||
+        t2d.r.height > res->height ||
+        t2d.r.x + t2d.r.width > res->width ||
+        t2d.r.y + t2d.r.height > res->height) {
+        g_critical("%s: transfer bounds outside resource"
+                   " bounds for resource %d: %d %d %d %d vs %d %d",
+                   __func__, t2d.resource_id, t2d.r.x, t2d.r.y,
+                   t2d.r.width, t2d.r.height, res->width, res->height);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    format = pixman_image_get_format(res->image);
+    bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8;
+    stride = pixman_image_get_stride(res->image);
+
+    if (t2d.offset || t2d.r.x || t2d.r.y ||
+        t2d.r.width != pixman_image_get_width(res->image)) {
+        void *img_data = pixman_image_get_data(res->image);
+        for (h = 0; h < t2d.r.height; h++) {
+            src_offset = t2d.offset + stride * h;
+            dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp);
+
+            iov_to_buf(res->iov, res->iov_cnt, src_offset,
+                       img_data
+                       + dst_offset, t2d.r.width * bpp);
+        }
+    } else {
+        iov_to_buf(res->iov, res->iov_cnt, 0,
+                   pixman_image_get_data(res->image),
+                   pixman_image_get_stride(res->image)
+                   * pixman_image_get_height(res->image));
+    }
+}
+
+static void
+vg_set_scanout(VuGpu *g,
+               struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res, *ores;
+    struct virtio_gpu_scanout *scanout;
+    struct virtio_gpu_set_scanout ss;
+    int fd;
+
+    VUGPU_FILL_CMD(ss);
+    virtio_gpu_bswap_32(&ss, sizeof(ss));
+
+    if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) {
+        g_critical("%s: illegal scanout id specified %d",
+                   __func__, ss.scanout_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+        return;
+    }
+
+    if (ss.resource_id == 0) {
+        vg_disable_scanout(g, ss.scanout_id);
+        return;
+    }
+
+    /* create a surface for this scanout */
+    res = virtio_gpu_find_resource(g, ss.resource_id);
+    if (!res) {
+        g_critical("%s: illegal resource specified %d",
+                      __func__, ss.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    if (ss.r.x > res->width ||
+        ss.r.y > res->height ||
+        ss.r.width > res->width ||
+        ss.r.height > res->height ||
+        ss.r.x + ss.r.width > res->width ||
+        ss.r.y + ss.r.height > res->height) {
+        g_critical("%s: illegal scanout %d bounds for"
+                   " resource %d, (%d,%d)+%d,%d vs %d %d",
+                   __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y,
+                   ss.r.width, ss.r.height, res->width, res->height);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    scanout = &g->scanout[ss.scanout_id];
+
+    ores = virtio_gpu_find_resource(g, scanout->resource_id);
+    if (ores) {
+        ores->scanout_bitmask &= ~(1 << ss.scanout_id);
+    }
+
+    res->scanout_bitmask |= (1 << ss.scanout_id);
+    scanout->resource_id = ss.resource_id;
+    scanout->x = ss.r.x;
+    scanout->y = ss.r.y;
+    scanout->width = ss.r.width;
+    scanout->height = ss.r.height;
+
+    struct vugbm_buffer *buffer = &res->buffer;
+
+    if (vugbm_buffer_can_get_dmabuf_fd(buffer)) {
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_DMABUF_SCANOUT,
+            .size = sizeof(VhostUserGpuDMABUFScanout),
+            .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) {
+                .scanout_id = ss.scanout_id,
+                .x = ss.r.x,
+                .y = ss.r.y,
+                .width = ss.r.width,
+                .height = ss.r.height,
+                .fd_width = buffer->width,
+                .fd_height = buffer->height,
+                .fd_stride = buffer->stride,
+                .fd_drm_fourcc = buffer->format
+            }
+        };
+
+        if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) {
+            vg_send_msg(g, &msg, fd);
+            close(fd);
+        }
+    } else {
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_SCANOUT,
+            .size = sizeof(VhostUserGpuScanout),
+            .payload.scanout = (VhostUserGpuScanout) {
+                .scanout_id = ss.scanout_id,
+                .width = scanout->width,
+                .height = scanout->height
+            }
+        };
+        vg_send_msg(g, &msg, -1);
+    }
+}
+
+static void
+vg_resource_flush(VuGpu *g,
+                  struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_flush rf;
+    pixman_region16_t flush_region;
+    int i;
+
+    VUGPU_FILL_CMD(rf);
+    virtio_gpu_bswap_32(&rf, sizeof(rf));
+
+    res = virtio_gpu_find_resource(g, rf.resource_id);
+    if (!res) {
+        g_critical("%s: illegal resource specified %d\n",
+                   __func__, rf.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    if (rf.r.x > res->width ||
+        rf.r.y > res->height ||
+        rf.r.width > res->width ||
+        rf.r.height > res->height ||
+        rf.r.x + rf.r.width > res->width ||
+        rf.r.y + rf.r.height > res->height) {
+        g_critical("%s: flush bounds outside resource"
+                   " bounds for resource %d: %d %d %d %d vs %d %d\n",
+                   __func__, rf.resource_id, rf.r.x, rf.r.y,
+                   rf.r.width, rf.r.height, res->width, res->height);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    pixman_region_init_rect(&flush_region,
+                            rf.r.x, rf.r.y, rf.r.width, rf.r.height);
+    for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) {
+        struct virtio_gpu_scanout *scanout;
+        pixman_region16_t region, finalregion;
+        pixman_box16_t *extents;
+
+        if (!(res->scanout_bitmask & (1 << i))) {
+            continue;
+        }
+        scanout = &g->scanout[i];
+
+        pixman_region_init(&finalregion);
+        pixman_region_init_rect(&region, scanout->x, scanout->y,
+                                scanout->width, scanout->height);
+
+        pixman_region_intersect(&finalregion, &flush_region, &region);
+
+        extents = pixman_region_extents(&finalregion);
+        size_t width = extents->x2 - extents->x1;
+        size_t height = extents->y2 - extents->y1;
+
+        if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) {
+            VhostUserGpuMsg vmsg = {
+                .request = VHOST_USER_GPU_DMABUF_UPDATE,
+                .size = sizeof(VhostUserGpuUpdate),
+                .payload.update = (VhostUserGpuUpdate) {
+                    .scanout_id = i,
+                    .x = extents->x1,
+                    .y = extents->y1,
+                    .width = width,
+                    .height = height,
+                }
+            };
+            vg_send_msg(g, &vmsg, -1);
+            vg_wait_ok(g);
+        } else {
+            size_t bpp =
+                PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8;
+            size_t size = width * height * bpp;
+
+            void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE +
+                               sizeof(VhostUserGpuUpdate) + size);
+            VhostUserGpuMsg *msg = p;
+            msg->request = VHOST_USER_GPU_UPDATE;
+            msg->size = sizeof(VhostUserGpuUpdate) + size;
+            msg->payload.update = (VhostUserGpuUpdate) {
+                .scanout_id = i,
+                .x = extents->x1,
+                .y = extents->y1,
+                .width = width,
+                .height = height,
+            };
+            pixman_image_t *i =
+                pixman_image_create_bits(pixman_image_get_format(res->image),
+                                         msg->payload.update.width,
+                                         msg->payload.update.height,
+                                         p + offsetof(VhostUserGpuMsg,
+                                                      payload.update.data),
+                                         width * bpp);
+            pixman_image_composite(PIXMAN_OP_SRC,
+                                   res->image, NULL, i,
+                                   extents->x1, extents->y1,
+                                   0, 0, 0, 0,
+                                   width, height);
+            pixman_image_unref(i);
+            vg_send_msg(g, msg, -1);
+            g_free(msg);
+        }
+        pixman_region_fini(&region);
+        pixman_region_fini(&finalregion);
+    }
+    pixman_region_fini(&flush_region);
+}
+
+static void
+vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd)
+{
+    switch (cmd->cmd_hdr.type) {
+    case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
+        vg_get_display_info(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
+        vg_resource_create_2d(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_UNREF:
+        vg_resource_unref(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
+        vg_resource_flush(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
+        vg_transfer_to_host_2d(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SET_SCANOUT:
+        vg_set_scanout(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
+        vg_resource_attach_backing(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
+        vg_resource_detach_backing(vg, cmd);
+        break;
+    /* case VIRTIO_GPU_CMD_GET_EDID: */
+    /*     break */
+    default:
+        g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type);
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        break;
+    }
+    if (!cmd->finished) {
+        vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error :
+                                VIRTIO_GPU_RESP_OK_NODATA);
+    }
+}
+
+static void
+vg_handle_ctrl(VuDev *dev, int qidx)
+{
+    VuGpu *vg = container_of(dev, VuGpu, dev.parent);
+    VuVirtq *vq = vu_get_queue(dev, qidx);
+    struct virtio_gpu_ctrl_command *cmd = NULL;
+    size_t len;
+
+    for (;;) {
+        if (vg->wait_ok != 0) {
+            return;
+        }
+
+        cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command));
+        if (!cmd) {
+            break;
+        }
+        cmd->vq = vq;
+        cmd->error = 0;
+        cmd->finished = false;
+
+        len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+                         0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr));
+        if (len != sizeof(cmd->cmd_hdr)) {
+            g_warning("%s: command size incorrect %zu vs %zu\n",
+                      __func__, len, sizeof(cmd->cmd_hdr));
+        }
+
+        virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr);
+        g_debug("%d %s\n", cmd->cmd_hdr.type,
+                vg_cmd_to_string(cmd->cmd_hdr.type));
+
+        if (vg->virgl) {
+            vg_virgl_process_cmd(vg, cmd);
+        } else {
+            vg_process_cmd(vg, cmd);
+        }
+
+        if (!cmd->finished) {
+            QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next);
+            vg->inflight++;
+        } else {
+            g_free(cmd);
+        }
+    }
+}
+
+static void
+update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data)
+{
+    struct virtio_gpu_simple_resource *res;
+
+    res = virtio_gpu_find_resource(g, resource_id);
+    g_return_if_fail(res != NULL);
+    g_return_if_fail(pixman_image_get_width(res->image) == 64);
+    g_return_if_fail(pixman_image_get_height(res->image) == 64);
+    g_return_if_fail(
+        PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32);
+
+    memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t));
+}
+
+static void
+vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor)
+{
+    bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR;
+
+    g_debug("%s move:%d\n", G_STRFUNC, move);
+
+    if (move) {
+        VhostUserGpuMsg msg = {
+            .request = cursor->resource_id ?
+                VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE,
+            .size = sizeof(VhostUserGpuCursorPos),
+            .payload.cursor_pos = {
+                .scanout_id = cursor->pos.scanout_id,
+                .x = cursor->pos.x,
+                .y = cursor->pos.y,
+            }
+        };
+        vg_send_msg(g, &msg, -1);
+    } else {
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_CURSOR_UPDATE,
+            .size = sizeof(VhostUserGpuCursorUpdate),
+            .payload.cursor_update = {
+                .pos = {
+                    .scanout_id = cursor->pos.scanout_id,
+                    .x = cursor->pos.x,
+                    .y = cursor->pos.y,
+                },
+                .hot_x = cursor->hot_x,
+                .hot_y = cursor->hot_y,
+            }
+        };
+        if (g->virgl) {
+            vg_virgl_update_cursor_data(g, cursor->resource_id,
+                                        msg.payload.cursor_update.data);
+        } else {
+            update_cursor_data_simple(g, cursor->resource_id,
+                                      msg.payload.cursor_update.data);
+        }
+        vg_send_msg(g, &msg, -1);
+    }
+}
+
+static void
+vg_handle_cursor(VuDev *dev, int qidx)
+{
+    VuGpu *g = container_of(dev, VuGpu, dev.parent);
+    VuVirtq *vq = vu_get_queue(dev, qidx);
+    VuVirtqElement *elem;
+    size_t len;
+    struct virtio_gpu_update_cursor cursor;
+
+    for (;;) {
+        elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+        if (!elem) {
+            break;
+        }
+        g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num);
+
+        len = iov_to_buf(elem->out_sg, elem->out_num,
+                         0, &cursor, sizeof(cursor));
+        if (len != sizeof(cursor)) {
+            g_warning("%s: cursor size incorrect %zu vs %zu\n",
+                      __func__, len, sizeof(cursor));
+        } else {
+            virtio_gpu_bswap_32(&cursor, sizeof(cursor));
+            vg_process_cursor_cmd(g, &cursor);
+        }
+        vu_queue_push(dev, vq, elem, 0);
+        vu_queue_notify(dev, vq);
+        g_free(elem);
+    }
+}
+
+static void
+vg_panic(VuDev *dev, const char *msg)
+{
+    g_critical("%s\n", msg);
+    exit(1);
+}
+
+static void
+vg_queue_set_started(VuDev *dev, int qidx, bool started)
+{
+    VuVirtq *vq = vu_get_queue(dev, qidx);
+
+    g_debug("queue started %d:%d\n", qidx, started);
+
+    switch (qidx) {
+    case 0:
+        vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL);
+        break;
+    case 1:
+        vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL);
+        break;
+    default:
+        break;
+    }
+}
+
+static void
+set_gpu_protocol_features(VuGpu *g)
+{
+    uint64_t u64;
+    VhostUserGpuMsg msg = {
+        .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES
+    };
+
+    assert(g->wait_ok == 0);
+    vg_send_msg(g, &msg, -1);
+    if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) {
+        return;
+    }
+
+    msg = (VhostUserGpuMsg) {
+        .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES,
+        .size = sizeof(uint64_t),
+        .payload.u64 = 0
+    };
+    vg_send_msg(g, &msg, -1);
+}
+
+static int
+vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply)
+{
+    VuGpu *g = container_of(dev, VuGpu, dev.parent);
+
+    switch (msg->request) {
+    case VHOST_USER_GPU_SET_SOCKET: {
+        g_return_val_if_fail(msg->fd_num == 1, 1);
+        g_return_val_if_fail(g->sock_fd == -1, 1);
+        g->sock_fd = msg->fds[0];
+        set_gpu_protocol_features(g);
+        return 1;
+    }
+    default:
+        return 0;
+    }
+
+    return 0;
+}
+
+static uint64_t
+vg_get_features(VuDev *dev)
+{
+    uint64_t features = 0;
+
+    if (opt_virgl) {
+        features |= 1 << VIRTIO_GPU_F_VIRGL;
+    }
+
+    return features;
+}
+
+static void
+vg_set_features(VuDev *dev, uint64_t features)
+{
+    VuGpu *g = container_of(dev, VuGpu, dev.parent);
+    bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL);
+
+    if (virgl && !g->virgl_inited) {
+        if (!vg_virgl_init(g)) {
+            vg_panic(dev, "Failed to initialize virgl");
+        }
+        g->virgl_inited = true;
+    }
+
+    g->virgl = virgl;
+}
+
+static int
+vg_get_config(VuDev *dev, uint8_t *config, uint32_t len)
+{
+    VuGpu *g = container_of(dev, VuGpu, dev.parent);
+
+    g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1);
+
+    if (opt_virgl) {
+        g->virtio_config.num_capsets = vg_virgl_get_num_capsets();
+    }
+
+    memcpy(config, &g->virtio_config, len);
+
+    return 0;
+}
+
+static int
+vg_set_config(VuDev *dev, const uint8_t *data,
+              uint32_t offset, uint32_t size,
+              uint32_t flags)
+{
+    VuGpu *g = container_of(dev, VuGpu, dev.parent);
+    struct virtio_gpu_config *config = (struct virtio_gpu_config *)data;
+
+    if (config->events_clear) {
+        g->virtio_config.events_read &= ~config->events_clear;
+    }
+
+    return 0;
+}
+
+static const VuDevIface vuiface = {
+    .set_features = vg_set_features,
+    .get_features = vg_get_features,
+    .queue_set_started = vg_queue_set_started,
+    .process_msg = vg_process_msg,
+    .get_config = vg_get_config,
+    .set_config = vg_set_config,
+};
+
+static void
+vg_destroy(VuGpu *g)
+{
+    struct virtio_gpu_simple_resource *res, *tmp;
+
+    vug_deinit(&g->dev);
+
+    vg_sock_fd_close(g);
+
+    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
+        vg_resource_destroy(g, res);
+    }
+
+    vugbm_device_destroy(&g->gdev);
+}
+
+static GOptionEntry entries[] = {
+    { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
+      "Print capabilities", NULL },
+    { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
+      "Use inherited fd socket", "FDNUM" },
+    { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
+      "Use UNIX socket path", "PATH" },
+    { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node,
+      "Specify DRM render node", "PATH" },
+    { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl,
+      "Turn virgl rendering on", NULL },
+    { NULL, }
+};
+
+int
+main(int argc, char *argv[])
+{
+    GOptionContext *context;
+    GError *error = NULL;
+    GMainLoop *loop = NULL;
+    int fd;
+    VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 };
+
+    QTAILQ_INIT(&g.reslist);
+    QTAILQ_INIT(&g.fenceq);
+
+    context = g_option_context_new("QEMU vhost-user-gpu");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_printerr("Option parsing failed: %s\n", error->message);
+        exit(EXIT_FAILURE);
+    }
+    g_option_context_free(context);
+
+    if (opt_print_caps) {
+        g_print("{\n");
+        g_print("  \"type\": \"gpu\",\n");
+        g_print("  \"features\": [\n");
+        g_print("    \"render-node\",\n");
+        g_print("    \"virgl\"\n");
+        g_print("  ]\n");
+        g_print("}\n");
+        exit(EXIT_SUCCESS);
+    }
+
+    g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node);
+    if (opt_render_node && g.drm_rnode_fd == -1) {
+        g_printerr("Failed to open DRM rendernode.\n");
+        exit(EXIT_FAILURE);
+    }
+
+    if (g.drm_rnode_fd >= 0) {
+        if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) {
+            g_warning("Failed to init DRM device, using fallback path");
+        }
+    }
+
+    if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) {
+        g_printerr("Please specify either --fd or --socket-path\n");
+        exit(EXIT_FAILURE);
+    }
+
+    if (opt_socket_path) {
+        int lsock = unix_listen(opt_socket_path, &error_fatal);
+        fd = accept(lsock, NULL, NULL);
+        close(lsock);
+    } else {
+        fd = opt_fdnum;
+    }
+    if (fd == -1) {
+        g_printerr("Invalid socket");
+        exit(EXIT_FAILURE);
+    }
+
+    vug_init(&g.dev, fd, vg_panic, &vuiface);
+
+    loop = g_main_loop_new(NULL, FALSE);
+    g_main_loop_run(loop);
+    g_main_loop_unref(loop);
+
+    vg_destroy(&g);
+    if (g.drm_rnode_fd >= 0) {
+        close(g.drm_rnode_fd);
+    }
+
+    return 0;
+}
diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c
new file mode 100644
index 0000000000..43413e29df
--- /dev/null
+++ b/contrib/vhost-user-gpu/virgl.c
@@ -0,0 +1,579 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *     Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <virglrenderer.h>
+#include "virgl.h"
+
+void
+vg_virgl_update_cursor_data(VuGpu *g, uint32_t resource_id,
+                            gpointer data)
+{
+    uint32_t width, height;
+    uint32_t *cursor;
+
+    cursor = virgl_renderer_get_cursor_data(resource_id, &width, &height);
+    g_return_if_fail(cursor != NULL);
+    g_return_if_fail(width == 64);
+    g_return_if_fail(height == 64);
+
+    memcpy(data, cursor, 64 * 64 * sizeof(uint32_t));
+    free(cursor);
+}
+
+static void
+virgl_cmd_context_create(VuGpu *g,
+                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_create cc;
+
+    VUGPU_FILL_CMD(cc);
+
+    virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen,
+                                  cc.debug_name);
+}
+
+static void
+virgl_cmd_context_destroy(VuGpu *g,
+                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_destroy cd;
+
+    VUGPU_FILL_CMD(cd);
+
+    virgl_renderer_context_destroy(cd.hdr.ctx_id);
+}
+
+static void
+virgl_cmd_create_resource_2d(VuGpu *g,
+                             struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_create_2d c2d;
+    struct virgl_renderer_resource_create_args args;
+
+    VUGPU_FILL_CMD(c2d);
+
+    args.handle = c2d.resource_id;
+    args.target = 2;
+    args.format = c2d.format;
+    args.bind = (1 << 1);
+    args.width = c2d.width;
+    args.height = c2d.height;
+    args.depth = 1;
+    args.array_size = 1;
+    args.last_level = 0;
+    args.nr_samples = 0;
+    args.flags = VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP;
+    virgl_renderer_resource_create(&args, NULL, 0);
+}
+
+static void
+virgl_cmd_create_resource_3d(VuGpu *g,
+                             struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_create_3d c3d;
+    struct virgl_renderer_resource_create_args args;
+
+    VUGPU_FILL_CMD(c3d);
+
+    args.handle = c3d.resource_id;
+    args.target = c3d.target;
+    args.format = c3d.format;
+    args.bind = c3d.bind;
+    args.width = c3d.width;
+    args.height = c3d.height;
+    args.depth = c3d.depth;
+    args.array_size = c3d.array_size;
+    args.last_level = c3d.last_level;
+    args.nr_samples = c3d.nr_samples;
+    args.flags = c3d.flags;
+    virgl_renderer_resource_create(&args, NULL, 0);
+}
+
+static void
+virgl_cmd_resource_unref(VuGpu *g,
+                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_unref unref;
+
+    VUGPU_FILL_CMD(unref);
+
+    virgl_renderer_resource_unref(unref.resource_id);
+}
+
+/* Not yet(?) defined in standard-headers, remove when possible */
+#ifndef VIRTIO_GPU_CAPSET_VIRGL2
+#define VIRTIO_GPU_CAPSET_VIRGL2 2
+#endif
+
+static void
+virgl_cmd_get_capset_info(VuGpu *g,
+                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_get_capset_info info;
+    struct virtio_gpu_resp_capset_info resp;
+
+    VUGPU_FILL_CMD(info);
+
+    if (info.capset_index == 0) {
+        resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL;
+        virgl_renderer_get_cap_set(resp.capset_id,
+                                   &resp.capset_max_version,
+                                   &resp.capset_max_size);
+    } else if (info.capset_index == 1) {
+        resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL2;
+        virgl_renderer_get_cap_set(resp.capset_id,
+                                   &resp.capset_max_version,
+                                   &resp.capset_max_size);
+    } else {
+        resp.capset_max_version = 0;
+        resp.capset_max_size = 0;
+    }
+    resp.hdr.type = VIRTIO_GPU_RESP_OK_CAPSET_INFO;
+    vg_ctrl_response(g, cmd, &resp.hdr, sizeof(resp));
+}
+
+uint32_t
+vg_virgl_get_num_capsets(void)
+{
+    uint32_t capset2_max_ver, capset2_max_size;
+    virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VIRGL2,
+                               &capset2_max_ver,
+                               &capset2_max_size);
+
+    return capset2_max_ver ? 2 : 1;
+}
+
+static void
+virgl_cmd_get_capset(VuGpu *g,
+                     struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_get_capset gc;
+    struct virtio_gpu_resp_capset *resp;
+    uint32_t max_ver, max_size;
+
+    VUGPU_FILL_CMD(gc);
+
+    virgl_renderer_get_cap_set(gc.capset_id, &max_ver,
+                               &max_size);
+    resp = g_malloc0(sizeof(*resp) + max_size);
+
+    resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET;
+    virgl_renderer_fill_caps(gc.capset_id,
+                             gc.capset_version,
+                             (void *)resp->capset_data);
+    vg_ctrl_response(g, cmd, &resp->hdr, sizeof(*resp) + max_size);
+    g_free(resp);
+}
+
+static void
+virgl_cmd_submit_3d(VuGpu *g,
+                    struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_cmd_submit cs;
+    void *buf;
+    size_t s;
+
+    VUGPU_FILL_CMD(cs);
+
+    buf = g_malloc(cs.size);
+    s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+                   sizeof(cs), buf, cs.size);
+    if (s != cs.size) {
+        g_critical("%s: size mismatch (%zd/%d)", __func__, s, cs.size);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        goto out;
+    }
+
+    virgl_renderer_submit_cmd(buf, cs.hdr.ctx_id, cs.size / 4);
+
+out:
+    g_free(buf);
+}
+
+static void
+virgl_cmd_transfer_to_host_2d(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_to_host_2d t2d;
+    struct virtio_gpu_box box;
+
+    VUGPU_FILL_CMD(t2d);
+
+    box.x = t2d.r.x;
+    box.y = t2d.r.y;
+    box.z = 0;
+    box.w = t2d.r.width;
+    box.h = t2d.r.height;
+    box.d = 1;
+
+    virgl_renderer_transfer_write_iov(t2d.resource_id,
+                                      0,
+                                      0,
+                                      0,
+                                      0,
+                                      (struct virgl_box *)&box,
+                                      t2d.offset, NULL, 0);
+}
+
+static void
+virgl_cmd_transfer_to_host_3d(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_host_3d t3d;
+
+    VUGPU_FILL_CMD(t3d);
+
+    virgl_renderer_transfer_write_iov(t3d.resource_id,
+                                      t3d.hdr.ctx_id,
+                                      t3d.level,
+                                      t3d.stride,
+                                      t3d.layer_stride,
+                                      (struct virgl_box *)&t3d.box,
+                                      t3d.offset, NULL, 0);
+}
+
+static void
+virgl_cmd_transfer_from_host_3d(VuGpu *g,
+                                struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_host_3d tf3d;
+
+    VUGPU_FILL_CMD(tf3d);
+
+    virgl_renderer_transfer_read_iov(tf3d.resource_id,
+                                     tf3d.hdr.ctx_id,
+                                     tf3d.level,
+                                     tf3d.stride,
+                                     tf3d.layer_stride,
+                                     (struct virgl_box *)&tf3d.box,
+                                     tf3d.offset, NULL, 0);
+}
+
+static void
+virgl_resource_attach_backing(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_attach_backing att_rb;
+    struct iovec *res_iovs;
+    int ret;
+
+    VUGPU_FILL_CMD(att_rb);
+
+    ret = vg_create_mapping_iov(g, &att_rb, cmd, &res_iovs);
+    if (ret != 0) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        return;
+    }
+
+    virgl_renderer_resource_attach_iov(att_rb.resource_id,
+                                       res_iovs, att_rb.nr_entries);
+}
+
+static void
+virgl_resource_detach_backing(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_detach_backing detach_rb;
+    struct iovec *res_iovs = NULL;
+    int num_iovs = 0;
+
+    VUGPU_FILL_CMD(detach_rb);
+
+    virgl_renderer_resource_detach_iov(detach_rb.resource_id,
+                                       &res_iovs,
+                                       &num_iovs);
+    if (res_iovs == NULL || num_iovs == 0) {
+        return;
+    }
+    g_free(res_iovs);
+}
+
+static void
+virgl_cmd_set_scanout(VuGpu *g,
+                      struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_set_scanout ss;
+    struct virgl_renderer_resource_info info;
+    int ret;
+
+    VUGPU_FILL_CMD(ss);
+
+    if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) {
+        g_critical("%s: illegal scanout id specified %d",
+                   __func__, ss.scanout_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+        return;
+    }
+
+    memset(&info, 0, sizeof(info));
+
+    if (ss.resource_id && ss.r.width && ss.r.height) {
+        ret = virgl_renderer_resource_get_info(ss.resource_id, &info);
+        if (ret == -1) {
+            g_critical("%s: illegal resource specified %d\n",
+                       __func__, ss.resource_id);
+            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+            return;
+        }
+
+        int fd = -1;
+        if (virgl_renderer_get_fd_for_texture(info.tex_id, &fd) < 0) {
+            g_critical("%s: failed to get fd for texture\n", __func__);
+            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+            return;
+        }
+        assert(fd >= 0);
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_DMABUF_SCANOUT,
+            .size = sizeof(VhostUserGpuDMABUFScanout),
+            .payload.dmabuf_scanout.scanout_id = ss.scanout_id,
+            .payload.dmabuf_scanout.x =  ss.r.x,
+            .payload.dmabuf_scanout.y =  ss.r.y,
+            .payload.dmabuf_scanout.width = ss.r.width,
+            .payload.dmabuf_scanout.height = ss.r.height,
+            .payload.dmabuf_scanout.fd_width = info.width,
+            .payload.dmabuf_scanout.fd_height = info.height,
+            .payload.dmabuf_scanout.fd_stride = info.stride,
+            .payload.dmabuf_scanout.fd_flags = info.flags,
+            .payload.dmabuf_scanout.fd_drm_fourcc = info.drm_fourcc
+        };
+        vg_send_msg(g, &msg, fd);
+        close(fd);
+    } else {
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_DMABUF_SCANOUT,
+            .size = sizeof(VhostUserGpuDMABUFScanout),
+            .payload.dmabuf_scanout.scanout_id = ss.scanout_id,
+        };
+        g_debug("disable scanout");
+        vg_send_msg(g, &msg, -1);
+    }
+    g->scanout[ss.scanout_id].resource_id = ss.resource_id;
+}
+
+static void
+virgl_cmd_resource_flush(VuGpu *g,
+                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_flush rf;
+    int i;
+
+    VUGPU_FILL_CMD(rf);
+
+    if (!rf.resource_id) {
+        g_debug("bad resource id for flush..?");
+        return;
+    }
+    for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) {
+        if (g->scanout[i].resource_id != rf.resource_id) {
+            continue;
+        }
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_DMABUF_UPDATE,
+            .size = sizeof(VhostUserGpuUpdate),
+            .payload.update.scanout_id = i,
+            .payload.update.x = rf.r.x,
+            .payload.update.y = rf.r.y,
+            .payload.update.width = rf.r.width,
+            .payload.update.height = rf.r.height
+        };
+        vg_send_msg(g, &msg, -1);
+        vg_wait_ok(g);
+    }
+}
+
+static void
+virgl_cmd_ctx_attach_resource(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_resource att_res;
+
+    VUGPU_FILL_CMD(att_res);
+
+    virgl_renderer_ctx_attach_resource(att_res.hdr.ctx_id, att_res.resource_id);
+}
+
+static void
+virgl_cmd_ctx_detach_resource(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_resource det_res;
+
+    VUGPU_FILL_CMD(det_res);
+
+    virgl_renderer_ctx_detach_resource(det_res.hdr.ctx_id, det_res.resource_id);
+}
+
+void vg_virgl_process_cmd(VuGpu *g, struct virtio_gpu_ctrl_command *cmd)
+{
+    virgl_renderer_force_ctx_0();
+    switch (cmd->cmd_hdr.type) {
+    case VIRTIO_GPU_CMD_CTX_CREATE:
+        virgl_cmd_context_create(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_DESTROY:
+        virgl_cmd_context_destroy(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
+        virgl_cmd_create_resource_2d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_3D:
+        virgl_cmd_create_resource_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SUBMIT_3D:
+        virgl_cmd_submit_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
+        virgl_cmd_transfer_to_host_2d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D:
+        virgl_cmd_transfer_to_host_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D:
+        virgl_cmd_transfer_from_host_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
+        virgl_resource_attach_backing(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
+        virgl_resource_detach_backing(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SET_SCANOUT:
+        virgl_cmd_set_scanout(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
+        virgl_cmd_resource_flush(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_UNREF:
+        virgl_cmd_resource_unref(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE:
+        /* TODO add security */
+        virgl_cmd_ctx_attach_resource(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE:
+        /* TODO add security */
+        virgl_cmd_ctx_detach_resource(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_CAPSET_INFO:
+        virgl_cmd_get_capset_info(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_CAPSET:
+        virgl_cmd_get_capset(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
+        vg_get_display_info(g, cmd);
+        break;
+    default:
+        g_debug("TODO handle ctrl %x\n", cmd->cmd_hdr.type);
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        break;
+    }
+
+    if (cmd->finished) {
+        return;
+    }
+
+    if (cmd->error) {
+        g_warning("%s: ctrl 0x%x, error 0x%x\n", __func__,
+                  cmd->cmd_hdr.type, cmd->error);
+        vg_ctrl_response_nodata(g, cmd, cmd->error);
+        return;
+    }
+
+    if (!(cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE)) {
+        vg_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+        return;
+    }
+
+    g_debug("Creating fence id:%" PRId64 " type:%d",
+            cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+    virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+}
+
+static void
+virgl_write_fence(void *opaque, uint32_t fence)
+{
+    VuGpu *g = opaque;
+    struct virtio_gpu_ctrl_command *cmd, *tmp;
+
+    QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
+        /*
+         * the guest can end up emitting fences out of order
+         * so we should check all fenced cmds not just the first one.
+         */
+        if (cmd->cmd_hdr.fence_id > fence) {
+            continue;
+        }
+        g_debug("FENCE %" PRIu64, cmd->cmd_hdr.fence_id);
+        vg_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+        QTAILQ_REMOVE(&g->fenceq, cmd, next);
+        g_free(cmd);
+        g->inflight--;
+    }
+}
+
+#if defined(VIRGL_RENDERER_CALLBACKS_VERSION) && \
+    VIRGL_RENDERER_CALLBACKS_VERSION >= 2
+static int
+virgl_get_drm_fd(void *opaque)
+{
+    VuGpu *g = opaque;
+
+    return g->drm_rnode_fd;
+}
+#endif
+
+static struct virgl_renderer_callbacks virgl_cbs = {
+#if defined(VIRGL_RENDERER_CALLBACKS_VERSION) &&    \
+    VIRGL_RENDERER_CALLBACKS_VERSION >= 2
+    .get_drm_fd  = virgl_get_drm_fd,
+    .version     = 2,
+#else
+    .version     = 1,
+#endif
+    .write_fence = virgl_write_fence,
+};
+
+static void
+vg_virgl_poll(VuDev *dev, int condition, void *data)
+{
+    virgl_renderer_poll();
+}
+
+bool
+vg_virgl_init(VuGpu *g)
+{
+    int ret;
+
+    if (g->drm_rnode_fd && virgl_cbs.version == 1) {
+        g_warning("virgl will use the default rendernode");
+    }
+
+    ret = virgl_renderer_init(g,
+                              VIRGL_RENDERER_USE_EGL |
+                              VIRGL_RENDERER_THREAD_SYNC,
+                              &virgl_cbs);
+    if (ret != 0) {
+        return false;
+    }
+
+    ret = virgl_renderer_get_poll_fd();
+    if (ret != -1) {
+        g->renderer_source =
+            vug_source_new(&g->dev, ret, G_IO_IN, vg_virgl_poll, g);
+    }
+
+    return true;
+}
diff --git a/contrib/vhost-user-gpu/virgl.h b/contrib/vhost-user-gpu/virgl.h
new file mode 100644
index 0000000000..f952bc9d4f
--- /dev/null
+++ b/contrib/vhost-user-gpu/virgl.h
@@ -0,0 +1,25 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *     Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef VUGPU_VIRGL_H_
+#define VUGPU_VIRGL_H_
+
+#include "vugpu.h"
+
+bool vg_virgl_init(VuGpu *g);
+uint32_t vg_virgl_get_num_capsets(void);
+void vg_virgl_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd);
+void vg_virgl_update_cursor_data(VuGpu *g, uint32_t resource_id,
+                                 gpointer data);
+
+#endif
diff --git a/contrib/vhost-user-gpu/vugbm.c b/contrib/vhost-user-gpu/vugbm.c
new file mode 100644
index 0000000000..d3bb82ff0e
--- /dev/null
+++ b/contrib/vhost-user-gpu/vugbm.c
@@ -0,0 +1,328 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * DRM helpers
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "vugbm.h"
+
+static bool
+mem_alloc_bo(struct vugbm_buffer *buf)
+{
+    buf->mmap = g_malloc(buf->width * buf->height * 4);
+    buf->stride = buf->width * 4;
+    return true;
+}
+
+static void
+mem_free_bo(struct vugbm_buffer *buf)
+{
+    g_free(buf->mmap);
+}
+
+static bool
+mem_map_bo(struct vugbm_buffer *buf)
+{
+    return buf->mmap != NULL;
+}
+
+static void
+mem_unmap_bo(struct vugbm_buffer *buf)
+{
+}
+
+static void
+mem_device_destroy(struct vugbm_device *dev)
+{
+}
+
+#ifdef CONFIG_MEMFD
+struct udmabuf_create {
+        uint32_t memfd;
+        uint32_t flags;
+        uint64_t offset;
+        uint64_t size;
+};
+
+#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create)
+
+static size_t
+udmabuf_get_size(struct vugbm_buffer *buf)
+{
+    return ROUND_UP(buf->width * buf->height * 4, getpagesize());
+}
+
+static bool
+udmabuf_alloc_bo(struct vugbm_buffer *buf)
+{
+    int ret;
+
+    buf->memfd = memfd_create("udmabuf-bo", MFD_ALLOW_SEALING);
+    if (buf->memfd < 0) {
+        return false;
+    }
+
+    ret = ftruncate(buf->memfd, udmabuf_get_size(buf));
+    if (ret < 0) {
+        close(buf->memfd);
+        return false;
+    }
+
+    ret = fcntl(buf->memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+    if (ret < 0) {
+        close(buf->memfd);
+        return false;
+    }
+
+    buf->stride = buf->width * 4;
+
+    return true;
+}
+
+static void
+udmabuf_free_bo(struct vugbm_buffer *buf)
+{
+    close(buf->memfd);
+}
+
+static bool
+udmabuf_map_bo(struct vugbm_buffer *buf)
+{
+    buf->mmap = mmap(NULL, udmabuf_get_size(buf),
+                     PROT_READ | PROT_WRITE, MAP_SHARED, buf->memfd, 0);
+    if (buf->mmap == MAP_FAILED) {
+        return false;
+    }
+
+    return true;
+}
+
+static bool
+udmabuf_get_fd(struct vugbm_buffer *buf, int *fd)
+{
+    struct udmabuf_create create = {
+        .memfd = buf->memfd,
+        .offset = 0,
+        .size = udmabuf_get_size(buf),
+    };
+
+    *fd = ioctl(buf->dev->fd, UDMABUF_CREATE, &create);
+
+    return *fd >= 0;
+}
+
+static void
+udmabuf_unmap_bo(struct vugbm_buffer *buf)
+{
+    munmap(buf->mmap, udmabuf_get_size(buf));
+}
+
+static void
+udmabuf_device_destroy(struct vugbm_device *dev)
+{
+    close(dev->fd);
+}
+#endif
+
+#ifdef CONFIG_GBM
+static bool
+alloc_bo(struct vugbm_buffer *buf)
+{
+    struct gbm_device *dev = buf->dev->dev;
+
+    assert(!buf->bo);
+
+    buf->bo = gbm_bo_create(dev, buf->width, buf->height,
+                            buf->format,
+                            GBM_BO_USE_RENDERING | GBM_BO_USE_LINEAR);
+
+    if (buf->bo) {
+        buf->stride = gbm_bo_get_stride(buf->bo);
+        return true;
+    }
+
+    return false;
+}
+
+static void
+free_bo(struct vugbm_buffer *buf)
+{
+    gbm_bo_destroy(buf->bo);
+}
+
+static bool
+map_bo(struct vugbm_buffer *buf)
+{
+    uint32_t stride;
+
+    buf->mmap = gbm_bo_map(buf->bo, 0, 0, buf->width, buf->height,
+                           GBM_BO_TRANSFER_READ_WRITE, &stride,
+                           &buf->mmap_data);
+
+    assert(stride == buf->stride);
+
+    return buf->mmap != NULL;
+}
+
+static void
+unmap_bo(struct vugbm_buffer *buf)
+{
+    gbm_bo_unmap(buf->bo, buf->mmap_data);
+}
+
+static bool
+get_fd(struct vugbm_buffer *buf, int *fd)
+{
+    *fd = gbm_bo_get_fd(buf->bo);
+
+    return *fd >= 0;
+}
+
+static void
+device_destroy(struct vugbm_device *dev)
+{
+    gbm_device_destroy(dev->dev);
+}
+#endif
+
+void
+vugbm_device_destroy(struct vugbm_device *dev)
+{
+    if (!dev->inited) {
+        return;
+    }
+
+    dev->device_destroy(dev);
+}
+
+bool
+vugbm_device_init(struct vugbm_device *dev, int fd)
+{
+    dev->fd = fd;
+
+#ifdef CONFIG_GBM
+    dev->dev = gbm_create_device(fd);
+#endif
+
+    if (0) {
+        /* nothing */
+    }
+#ifdef CONFIG_GBM
+    else if (dev->dev != NULL) {
+        dev->alloc_bo = alloc_bo;
+        dev->free_bo = free_bo;
+        dev->get_fd = get_fd;
+        dev->map_bo = map_bo;
+        dev->unmap_bo = unmap_bo;
+        dev->device_destroy = device_destroy;
+    }
+#endif
+#ifdef CONFIG_MEMFD
+    else if (g_file_test("/dev/udmabuf", G_FILE_TEST_EXISTS)) {
+        dev->fd = open("/dev/udmabuf", O_RDWR);
+        if (dev->fd < 0) {
+            return false;
+        }
+        g_debug("Using experimental udmabuf backend");
+        dev->alloc_bo = udmabuf_alloc_bo;
+        dev->free_bo = udmabuf_free_bo;
+        dev->get_fd = udmabuf_get_fd;
+        dev->map_bo = udmabuf_map_bo;
+        dev->unmap_bo = udmabuf_unmap_bo;
+        dev->device_destroy = udmabuf_device_destroy;
+    }
+#endif
+    else {
+        g_debug("Using mem fallback");
+        dev->alloc_bo = mem_alloc_bo;
+        dev->free_bo = mem_free_bo;
+        dev->map_bo = mem_map_bo;
+        dev->unmap_bo = mem_unmap_bo;
+        dev->device_destroy = mem_device_destroy;
+        return false;
+    }
+
+    dev->inited = true;
+    return true;
+}
+
+static bool
+vugbm_buffer_map(struct vugbm_buffer *buf)
+{
+    struct vugbm_device *dev = buf->dev;
+
+    return dev->map_bo(buf);
+}
+
+static void
+vugbm_buffer_unmap(struct vugbm_buffer *buf)
+{
+    struct vugbm_device *dev = buf->dev;
+
+    dev->unmap_bo(buf);
+}
+
+bool
+vugbm_buffer_can_get_dmabuf_fd(struct vugbm_buffer *buffer)
+{
+    if (!buffer->dev->get_fd) {
+        return false;
+    }
+
+    return true;
+}
+
+bool
+vugbm_buffer_get_dmabuf_fd(struct vugbm_buffer *buffer, int *fd)
+{
+    if (!vugbm_buffer_can_get_dmabuf_fd(buffer) ||
+        !buffer->dev->get_fd(buffer, fd)) {
+        g_warning("Failed to get dmabuf");
+        return false;
+    }
+
+    if (*fd < 0) {
+        g_warning("error: dmabuf_fd < 0");
+        return false;
+    }
+
+    return true;
+}
+
+bool
+vugbm_buffer_create(struct vugbm_buffer *buffer, struct vugbm_device *dev,
+                    uint32_t width, uint32_t height)
+{
+    buffer->dev = dev;
+    buffer->width = width;
+    buffer->height = height;
+    buffer->format = GBM_FORMAT_XRGB8888;
+    buffer->stride = 0; /* modified during alloc */
+    if (!dev->alloc_bo(buffer)) {
+        g_warning("alloc_bo failed");
+        return false;
+    }
+
+    if (!vugbm_buffer_map(buffer)) {
+        g_warning("map_bo failed");
+        goto err;
+    }
+
+    return true;
+
+err:
+    dev->free_bo(buffer);
+    return false;
+}
+
+void
+vugbm_buffer_destroy(struct vugbm_buffer *buffer)
+{
+    struct vugbm_device *dev = buffer->dev;
+
+    vugbm_buffer_unmap(buffer);
+    dev->free_bo(buffer);
+}
diff --git a/contrib/vhost-user-gpu/vugbm.h b/contrib/vhost-user-gpu/vugbm.h
new file mode 100644
index 0000000000..c0bf27af9b
--- /dev/null
+++ b/contrib/vhost-user-gpu/vugbm.h
@@ -0,0 +1,67 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * GBM helpers
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef VHOST_USER_GPU_GBM_H
+#define VHOST_USER_GPU_GBM_H
+
+#include "qemu/osdep.h"
+
+#ifdef CONFIG_MEMFD
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#endif
+
+#ifdef CONFIG_GBM
+#include <gbm.h>
+#endif
+
+struct vugbm_buffer;
+
+struct vugbm_device {
+    bool inited;
+    int fd;
+#ifdef CONFIG_GBM
+    struct gbm_device *dev;
+#endif
+
+    bool (*alloc_bo)(struct vugbm_buffer *buf);
+    void (*free_bo)(struct vugbm_buffer *buf);
+    bool (*get_fd)(struct vugbm_buffer *buf, int *fd);
+    bool (*map_bo)(struct vugbm_buffer *buf);
+    void (*unmap_bo)(struct vugbm_buffer *buf);
+    void (*device_destroy)(struct vugbm_device *dev);
+};
+
+struct vugbm_buffer {
+    struct vugbm_device *dev;
+
+#ifdef CONFIG_MEMFD
+    int memfd;
+#endif
+#ifdef CONFIG_GBM
+    struct gbm_bo *bo;
+    void *mmap_data;
+#endif
+
+    uint8_t *mmap;
+    uint32_t width;
+    uint32_t height;
+    uint32_t stride;
+    uint32_t format;
+};
+
+bool vugbm_device_init(struct vugbm_device *dev, int fd);
+void vugbm_device_destroy(struct vugbm_device *dev);
+
+bool vugbm_buffer_create(struct vugbm_buffer *buffer, struct vugbm_device *dev,
+                         uint32_t width, uint32_t height);
+bool vugbm_buffer_can_get_dmabuf_fd(struct vugbm_buffer *buffer);
+bool vugbm_buffer_get_dmabuf_fd(struct vugbm_buffer *buffer, int *fd);
+void vugbm_buffer_destroy(struct vugbm_buffer *buffer);
+
+#endif
diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h
new file mode 100644
index 0000000000..458e92a1b3
--- /dev/null
+++ b/contrib/vhost-user-gpu/vugpu.h
@@ -0,0 +1,177 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *     Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef VUGPU_H_
+#define VUGPU_H_
+
+#include "qemu/osdep.h"
+
+#include "contrib/libvhost-user/libvhost-user-glib.h"
+#include "standard-headers/linux/virtio_gpu.h"
+
+#include "qemu/queue.h"
+#include "qemu/iov.h"
+#include "qemu/bswap.h"
+#include "vugbm.h"
+
+typedef enum VhostUserGpuRequest {
+    VHOST_USER_GPU_NONE = 0,
+    VHOST_USER_GPU_GET_PROTOCOL_FEATURES,
+    VHOST_USER_GPU_SET_PROTOCOL_FEATURES,
+    VHOST_USER_GPU_GET_DISPLAY_INFO,
+    VHOST_USER_GPU_CURSOR_POS,
+    VHOST_USER_GPU_CURSOR_POS_HIDE,
+    VHOST_USER_GPU_CURSOR_UPDATE,
+    VHOST_USER_GPU_SCANOUT,
+    VHOST_USER_GPU_UPDATE,
+    VHOST_USER_GPU_DMABUF_SCANOUT,
+    VHOST_USER_GPU_DMABUF_UPDATE,
+} VhostUserGpuRequest;
+
+typedef struct VhostUserGpuDisplayInfoReply {
+    struct virtio_gpu_resp_display_info info;
+} VhostUserGpuDisplayInfoReply;
+
+typedef struct VhostUserGpuCursorPos {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+} QEMU_PACKED VhostUserGpuCursorPos;
+
+typedef struct VhostUserGpuCursorUpdate {
+    VhostUserGpuCursorPos pos;
+    uint32_t hot_x;
+    uint32_t hot_y;
+    uint32_t data[64 * 64];
+} QEMU_PACKED VhostUserGpuCursorUpdate;
+
+typedef struct VhostUserGpuScanout {
+    uint32_t scanout_id;
+    uint32_t width;
+    uint32_t height;
+} QEMU_PACKED VhostUserGpuScanout;
+
+typedef struct VhostUserGpuUpdate {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+    uint32_t width;
+    uint32_t height;
+    uint8_t data[];
+} QEMU_PACKED VhostUserGpuUpdate;
+
+typedef struct VhostUserGpuDMABUFScanout {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+    uint32_t width;
+    uint32_t height;
+    uint32_t fd_width;
+    uint32_t fd_height;
+    uint32_t fd_stride;
+    uint32_t fd_flags;
+    int fd_drm_fourcc;
+} QEMU_PACKED VhostUserGpuDMABUFScanout;
+
+typedef struct VhostUserGpuMsg {
+    uint32_t request; /* VhostUserGpuRequest */
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+    union {
+        VhostUserGpuCursorPos cursor_pos;
+        VhostUserGpuCursorUpdate cursor_update;
+        VhostUserGpuScanout scanout;
+        VhostUserGpuUpdate update;
+        VhostUserGpuDMABUFScanout dmabuf_scanout;
+        struct virtio_gpu_resp_display_info display_info;
+        uint64_t u64;
+    } payload;
+} QEMU_PACKED VhostUserGpuMsg;
+
+static VhostUserGpuMsg m __attribute__ ((unused));
+#define VHOST_USER_GPU_HDR_SIZE \
+    (sizeof(m.request) + sizeof(m.flags) + sizeof(m.size))
+
+#define VHOST_USER_GPU_MSG_FLAG_REPLY 0x4
+
+struct virtio_gpu_scanout {
+    uint32_t width, height;
+    int x, y;
+    int invalidate;
+    uint32_t resource_id;
+};
+
+typedef struct VuGpu {
+    VugDev dev;
+    struct virtio_gpu_config virtio_config;
+    struct vugbm_device gdev;
+    int sock_fd;
+    int drm_rnode_fd;
+    GSource *renderer_source;
+    guint wait_ok;
+
+    bool virgl;
+    bool virgl_inited;
+    uint32_t inflight;
+
+    struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS];
+    QTAILQ_HEAD(, virtio_gpu_simple_resource) reslist;
+    QTAILQ_HEAD(, virtio_gpu_ctrl_command) fenceq;
+} VuGpu;
+
+struct virtio_gpu_ctrl_command {
+    VuVirtqElement elem;
+    VuVirtq *vq;
+    struct virtio_gpu_ctrl_hdr cmd_hdr;
+    uint32_t error;
+    bool finished;
+    QTAILQ_ENTRY(virtio_gpu_ctrl_command) next;
+};
+
+#define VUGPU_FILL_CMD(out) do {                                \
+        size_t s;                                               \
+        s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0,  \
+                       &out, sizeof(out));                      \
+        if (s != sizeof(out)) {                                 \
+            g_critical("%s: command size incorrect %zu vs %zu", \
+                       __func__, s, sizeof(out));               \
+            return;                                             \
+        }                                                       \
+    } while (0)
+
+
+void    vg_ctrl_response(VuGpu *g,
+                         struct virtio_gpu_ctrl_command *cmd,
+                         struct virtio_gpu_ctrl_hdr *resp,
+                         size_t resp_len);
+
+void    vg_ctrl_response_nodata(VuGpu *g,
+                                struct virtio_gpu_ctrl_command *cmd,
+                                enum virtio_gpu_ctrl_type type);
+
+int     vg_create_mapping_iov(VuGpu *g,
+                              struct virtio_gpu_resource_attach_backing *ab,
+                              struct virtio_gpu_ctrl_command *cmd,
+                              struct iovec **iov);
+
+void    vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd);
+
+void    vg_wait_ok(VuGpu *g);
+
+void    vg_send_msg(VuGpu *g, const VhostUserGpuMsg *msg, int fd);
+
+bool    vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size,
+                    gpointer payload);
+
+
+#endif
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index f23ecfd5c5..1f2e0e7fde 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -39,3 +39,4 @@ CONFIG_MICROBIT=y
 CONFIG_FSL_IMX25=y
 CONFIG_FSL_IMX7=y
 CONFIG_FSL_IMX6UL=y
+CONFIG_SEMIHOSTING=y
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index ba3fb3ff50..cd5ea391e8 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -25,3 +25,4 @@
 CONFIG_ISAPC=y
 CONFIG_I440FX=y
 CONFIG_Q35=y
+CONFIG_ACPI_PCI=y
diff --git a/default-configs/lm32-softmmu.mak b/default-configs/lm32-softmmu.mak
index 6d259665d6..115b3e34c9 100644
--- a/default-configs/lm32-softmmu.mak
+++ b/default-configs/lm32-softmmu.mak
@@ -4,6 +4,8 @@
 #
 #CONFIG_MILKYMIST_TMU2=n        # disabling it actually causes compile-time failures
 
+CONFIG_SEMIHOSTING=y
+
 # Boards:
 #
 CONFIG_LM32=y
diff --git a/default-configs/m68k-softmmu.mak b/default-configs/m68k-softmmu.mak
index e17495e2a0..4049a8f2ba 100644
--- a/default-configs/m68k-softmmu.mak
+++ b/default-configs/m68k-softmmu.mak
@@ -1,5 +1,7 @@
 # Default configuration for m68k-softmmu
 
+CONFIG_SEMIHOSTING=y
+
 # Boards:
 #
 CONFIG_AN5206=y
diff --git a/default-configs/mips-softmmu-common.mak b/default-configs/mips-softmmu-common.mak
index 8e54a74b7a..e10ac4b20c 100644
--- a/default-configs/mips-softmmu-common.mak
+++ b/default-configs/mips-softmmu-common.mak
@@ -35,6 +35,7 @@ CONFIG_MIPS_CPS=y
 CONFIG_MIPS_ITU=y
 CONFIG_R4K=y
 CONFIG_MALTA=y
+CONFIG_SEMIHOSTING=y
 CONFIG_PCNET_PCI=y
 CONFIG_MIPSSIM=y
 CONFIG_ACPI_SMBUS=y
diff --git a/default-configs/nios2-softmmu.mak b/default-configs/nios2-softmmu.mak
index e130d024e6..1bc4082ea9 100644
--- a/default-configs/nios2-softmmu.mak
+++ b/default-configs/nios2-softmmu.mak
@@ -1,5 +1,7 @@
 # Default configuration for nios2-softmmu
 
+CONFIG_SEMIHOSTING=y
+
 # Boards:
 #
 CONFIG_NIOS2_10M50=y
diff --git a/default-configs/xtensa-softmmu.mak b/default-configs/xtensa-softmmu.mak
index 7e4d1cc097..3aa20a47a7 100644
--- a/default-configs/xtensa-softmmu.mak
+++ b/default-configs/xtensa-softmmu.mak
@@ -1,5 +1,7 @@
 # Default configuration for Xtensa
 
+CONFIG_SEMIHOSTING=y
+
 # Boards:
 #
 CONFIG_XTENSA_SIM=y
diff --git a/docs/devel/build-system.txt b/docs/devel/build-system.txt
index addd274eeb..41bd08ea3a 100644
--- a/docs/devel/build-system.txt
+++ b/docs/devel/build-system.txt
@@ -413,18 +413,13 @@ context.
 - default-configs/*.mak
 
 The files under default-configs/ control what emulated hardware is built
-into each QEMU system and userspace emulator targets. They merely
-contain a long list of config variable definitions. For example,
-default-configs/x86_64-softmmu.mak has:
-
-  include sound.mak
-  include usb.mak
-  CONFIG_QXL=$(CONFIG_SPICE)
-  CONFIG_VGA_ISA=y
-  CONFIG_VGA_CIRRUS=y
-  CONFIG_VMWARE_VGA=y
-  CONFIG_VIRTIO_VGA=y
-  ...snip...
+into each QEMU system and userspace emulator targets. They merely contain
+a list of config variable definitions like the machines that should be
+included. For example, default-configs/aarch64-softmmu.mak has:
+
+  include arm-softmmu.mak
+  CONFIG_XLNX_ZYNQMP_ARM=y
+  CONFIG_XLNX_VERSAL=y
 
 These files rarely need changing unless new devices / hardware need to
 be enabled for a particular system/userspace emulation target
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
index 4f9012d154..aeb997bed5 100644
--- a/docs/devel/multiple-iothreads.txt
+++ b/docs/devel/multiple-iothreads.txt
@@ -109,7 +109,7 @@ The AioContext originates from the QEMU block layer, even though nowadays
 AioContext is a generic event loop that can be used by any QEMU subsystem.
 
 The block layer has support for AioContext integrated.  Each BlockDriverState
-is associated with an AioContext using bdrv_set_aio_context() and
+is associated with an AioContext using bdrv_try_set_aio_context() and
 bdrv_get_aio_context().  This allows block layer code to process I/O inside the
 right AioContext.  Other subsystems may wish to follow a similar approach.
 
@@ -134,5 +134,5 @@ Long-running jobs (usually in the form of coroutines) are best scheduled in
 the BlockDriverState's AioContext to avoid the need to acquire/release around
 each bdrv_*() call.  The functions bdrv_add/remove_aio_context_notifier,
 or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends,
-can be used to get a notification whenever bdrv_set_aio_context() moves a
+can be used to get a notification whenever bdrv_try_set_aio_context() moves a
 BlockDriverState to a different AioContext.
diff --git a/docs/index.rst b/docs/index.rst
index 3690955dd1..baa5791c17 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -12,4 +12,5 @@ Welcome to QEMU's documentation!
 
    interop/index
    devel/index
+   specs/index
 
diff --git a/docs/interop/index.rst b/docs/interop/index.rst
index a037bd67ec..b4bfcab417 100644
--- a/docs/interop/index.rst
+++ b/docs/interop/index.rst
@@ -16,3 +16,4 @@ Contents:
    live-block-operations
    pr-helper
    vhost-user
+   vhost-user-gpu
diff --git a/docs/interop/vhost-user-gpu.rst b/docs/interop/vhost-user-gpu.rst
new file mode 100644
index 0000000000..688f8b4259
--- /dev/null
+++ b/docs/interop/vhost-user-gpu.rst
@@ -0,0 +1,242 @@
+=======================
+Vhost-user-gpu Protocol
+=======================
+
+:Licence: This work is licensed under the terms of the GNU GPL,
+          version 2 or later. See the COPYING file in the top-level
+          directory.
+
+.. contents:: Table of Contents
+
+Introduction
+============
+
+The vhost-user-gpu protocol is aiming at sharing the rendering result
+of a virtio-gpu, done from a vhost-user slave process to a vhost-user
+master process (such as QEMU). It bears a resemblance to a display
+server protocol, if you consider QEMU as the display server and the
+slave as the client, but in a very limited way. Typically, it will
+work by setting a scanout/display configuration, before sending flush
+events for the display updates. It will also update the cursor shape
+and position.
+
+The protocol is sent over a UNIX domain stream socket, since it uses
+socket ancillary data to share opened file descriptors (DMABUF fds or
+shared memory). The socket is usually obtained via
+``VHOST_USER_GPU_SET_SOCKET``.
+
+Requests are sent by the *slave*, and the optional replies by the
+*master*.
+
+Wire format
+===========
+
+Unless specified differently, numbers are in the machine native byte
+order.
+
+A vhost-user-gpu message (request and reply) consists of 3 header
+fields and a payload.
+
++---------+-------+------+---------+
+| request | flags | size | payload |
++---------+-------+------+---------+
+
+Header
+------
+
+:request: ``u32``, type of the request
+
+:flags: ``u32``, 32-bit bit field:
+
+ - Bit 2 is the reply flag - needs to be set on each reply
+
+:size: ``u32``, size of the payload
+
+Payload types
+-------------
+
+Depending on the request type, **payload** can be:
+
+VhostUserGpuCursorPos
+^^^^^^^^^^^^^^^^^^^^^
+
++------------+---+---+
+| scanout-id | x | y |
++------------+---+---+
+
+:scanout-id: ``u32``, the scanout where the cursor is located
+
+:x/y: ``u32``, the cursor postion
+
+VhostUserGpuCursorUpdate
+^^^^^^^^^^^^^^^^^^^^^^^^
+
++-----+-------+-------+--------+
+| pos | hot_x | hot_y | cursor |
++-----+-------+-------+--------+
+
+:pos: a ``VhostUserGpuCursorPos``, the cursor location
+
+:hot_x/hot_y: ``u32``, the cursor hot location
+
+:cursor: ``[u32; 64 * 64]``, 64x64 RGBA cursor data (PIXMAN_a8r8g8b8 format)
+
+VhostUserGpuScanout
+^^^^^^^^^^^^^^^^^^^
+
++------------+---+---+
+| scanout-id | w | h |
++------------+---+---+
+
+:scanout-id: ``u32``, the scanout configuration to set
+
+:w/h: ``u32``, the scanout width/height size
+
+VhostUserGpuUpdate
+^^^^^^^^^^^^^^^^^^
+
++------------+---+---+---+---+------+
+| scanout-id | x | y | w | h | data |
++------------+---+---+---+---+------+
+
+:scanout-id: ``u32``, the scanout content to update
+
+:x/y/w/h: ``u32``, region of the update
+
+:data: RGB data (PIXMAN_x8r8g8b8 format)
+
+VhostUserGpuDMABUFScanout
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
++------------+---+---+---+---+-----+-----+--------+-------+--------+
+| scanout-id | x | y | w | h | fdw | fwh | stride | flags | fourcc |
++------------+---+---+---+---+-----+-----+--------+-------+--------+
+
+:scanout-id: ``u32``, the scanout configuration to set
+
+:x/y: ``u32``, the location of the scanout within the DMABUF
+
+:w/h: ``u32``, the scanout width/height size
+
+:fdw/fdh/stride/flags: ``u32``, the DMABUF width/height/stride/flags
+
+:fourcc: ``i32``, the DMABUF fourcc
+
+
+C structure
+-----------
+
+In QEMU the vhost-user-gpu message is implemented with the following struct:
+
+.. code:: c
+
+  typedef struct VhostUserGpuMsg {
+      uint32_t request; /* VhostUserGpuRequest */
+      uint32_t flags;
+      uint32_t size; /* the following payload size */
+      union {
+          VhostUserGpuCursorPos cursor_pos;
+          VhostUserGpuCursorUpdate cursor_update;
+          VhostUserGpuScanout scanout;
+          VhostUserGpuUpdate update;
+          VhostUserGpuDMABUFScanout dmabuf_scanout;
+          struct virtio_gpu_resp_display_info display_info;
+          uint64_t u64;
+      } payload;
+  } QEMU_PACKED VhostUserGpuMsg;
+
+Protocol features
+-----------------
+
+None yet.
+
+As the protocol may need to evolve, new messages and communication
+changes are negotiated thanks to preliminary
+``VHOST_USER_GPU_GET_PROTOCOL_FEATURES`` and
+``VHOST_USER_GPU_SET_PROTOCOL_FEATURES`` requests.
+
+Communication
+=============
+
+Message types
+-------------
+
+``VHOST_USER_GPU_GET_PROTOCOL_FEATURES``
+  :id: 1
+  :request payload: N/A
+  :reply payload: ``u64``
+
+  Get the supported protocol features bitmask.
+
+``VHOST_USER_GPU_SET_PROTOCOL_FEATURES``
+  :id: 2
+  :request payload: ``u64``
+  :reply payload: N/A
+
+  Enable protocol features using a bitmask.
+
+``VHOST_USER_GPU_GET_DISPLAY_INFO``
+  :id: 3
+  :request payload: N/A
+  :reply payload: ``struct virtio_gpu_resp_display_info`` (from virtio specification)
+
+  Get the preferred display configuration.
+
+``VHOST_USER_GPU_CURSOR_POS``
+  :id: 4
+  :request payload: ``VhostUserGpuCursorPos``
+  :reply payload: N/A
+
+  Set/show the cursor position.
+
+``VHOST_USER_GPU_CURSOR_POS_HIDE``
+  :id: 5
+  :request payload: ``VhostUserGpuCursorPos``
+  :reply payload: N/A
+
+  Set/hide the cursor.
+
+``VHOST_USER_GPU_CURSOR_UPDATE``
+  :id: 6
+  :request payload: ``VhostUserGpuCursorUpdate``
+  :reply payload: N/A
+
+  Update the cursor shape and location.
+
+``VHOST_USER_GPU_SCANOUT``
+  :id: 7
+  :request payload: ``VhostUserGpuScanout``
+  :reply payload: N/A
+
+  Set the scanout resolution. To disable a scanout, the dimensions
+  width/height are set to 0.
+
+``VHOST_USER_GPU_UPDATE``
+  :id: 8
+  :request payload: ``VhostUserGpuUpdate``
+  :reply payload: N/A
+
+  Update the scanout content. The data payload contains the graphical bits.
+  The display should be flushed and presented.
+
+``VHOST_USER_GPU_DMABUF_SCANOUT``
+  :id: 9
+  :request payload: ``VhostUserGpuDMABUFScanout``
+  :reply payload: N/A
+
+  Set the scanout resolution/configuration, and share a DMABUF file
+  descriptor for the scanout content, which is passed as ancillary
+  data. To disable a scanout, the dimensions width/height are set
+  to 0, there is no file descriptor passed.
+
+``VHOST_USER_GPU_DMABUF_UPDATE``
+  :id: 10
+  :request payload: ``VhostUserGpuUpdate``
+  :reply payload: empty payload
+
+  The display should be flushed and presented according to updated
+  region from ``VhostUserGpuUpdate``.
+
+  Note: there is no data payload, since the scanout is shared thanks
+  to DMABUF, that must have been set previously with
+  ``VHOST_USER_GPU_DMABUF_SCANOUT``.
diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 7f3232c798..dc0ff9211f 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -1163,6 +1163,15 @@ Master message types
   send the shared inflight buffer back to slave so that slave could
   get inflight I/O after a crash or restart.
 
+``VHOST_USER_GPU_SET_SOCKET``
+  :id: 33
+  :equivalent ioctl: N/A
+  :master payload: N/A
+
+  Sets the GPU protocol socket file descriptor, which is passed as
+  ancillary data. The GPU protocol is used to inform the master of
+  rendering state and updates. See vhost-user-gpu.rst for details.
+
 Slave message types
 -------------------
 
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
new file mode 100644
index 0000000000..2e927519c2
--- /dev/null
+++ b/docs/specs/index.rst
@@ -0,0 +1,13 @@
+. This is the top level page for the 'specs' manual
+
+
+QEMU full-system emulation guest hardware specifications
+========================================================
+
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   xive
diff --git a/docs/specs/ppc-spapr-xive.rst b/docs/specs/ppc-spapr-xive.rst
new file mode 100644
index 0000000000..539ce7ca4e
--- /dev/null
+++ b/docs/specs/ppc-spapr-xive.rst
@@ -0,0 +1,174 @@
+XIVE for sPAPR (pseries machines)
+=================================
+
+The POWER9 processor comes with a new interrupt controller
+architecture, called XIVE as "eXternal Interrupt Virtualization
+Engine". It supports a larger number of interrupt sources and offers
+virtualization features which enables the HW to deliver interrupts
+directly to virtual processors without hypervisor assistance.
+
+A QEMU ``pseries`` machine (which is PAPR compliant) using POWER9
+processors can run under two interrupt modes:
+
+- *Legacy Compatibility Mode*
+
+  the hypervisor provides identical interfaces and similar
+  functionality to PAPR+ Version 2.7.  This is the default mode
+
+  It is also referred as *XICS* in QEMU.
+
+- *XIVE native exploitation mode*
+
+  the hypervisor provides new interfaces to manage the XIVE control
+  structures, and provides direct control for interrupt management
+  through MMIO pages.
+
+Which interrupt modes can be used by the machine is negotiated with
+the guest O/S during the Client Architecture Support negotiation
+sequence. The two modes are mutually exclusive.
+
+Both interrupt mode share the same IRQ number space. See below for the
+layout.
+
+CAS Negotiation
+---------------
+
+QEMU advertises the supported interrupt modes in the device tree
+property "ibm,arch-vec-5-platform-support" in byte 23 and the OS
+Selection for XIVE is indicated in the "ibm,architecture-vec-5"
+property byte 23.
+
+The interrupt modes supported by the machine depend on the CPU type
+(POWER9 is required for XIVE) but also on the machine property
+``ic-mode`` which can be set on the command line. It can take the
+following values: ``xics``, ``xive``, ``dual`` and currently ``xics``
+is the default but it may change in the future.
+
+The choosen interrupt mode is activated after a reconfiguration done
+in a machine reset.
+
+XIVE Device tree properties
+---------------------------
+
+The properties for the PAPR interrupt controller node when the *XIVE
+native exploitation mode* is selected shoud contain:
+
+- ``device_type``
+
+  value should be "power-ivpe".
+
+- ``compatible``
+
+  value should be "ibm,power-ivpe".
+
+- ``reg``
+
+  contains the base address and size of the thread interrupt
+  managnement areas (TIMA), for the User level and for the Guest OS
+  level. Only the Guest OS level is taken into account today.
+
+- ``ibm,xive-eq-sizes``
+
+  the size of the event queues. One cell per size supported, contains
+  log2 of size, in ascending order.
+
+- ``ibm,xive-lisn-ranges``
+
+  the IRQ interrupt number ranges assigned to the guest for the IPIs.
+
+The root node also exports :
+
+- ``ibm,plat-res-int-priorities``
+
+  contains a list of priorities that the hypervisor has reserved for
+  its own use.
+
+IRQ number space
+----------------
+
+IRQ Number space of the ``pseries`` machine is 8K wide and is the same
+for both interrupt mode. The different ranges are defined as follow :
+
+- ``0x0000 .. 0x0FFF`` 4K CPU IPIs (only used under XIVE)
+- ``0x1000 .. 0x1000`` 1 EPOW
+- ``0x1001 .. 0x1001`` 1 HOTPLUG
+- ``0x1100 .. 0x11FF`` 256 VIO devices
+- ``0x1200 .. 0x127F`` 32 PHBs devices
+- ``0x1280 .. 0x12FF`` unused
+- ``0x1300 .. 0x1FFF`` PHB MSIs
+
+Monitoring XIVE
+---------------
+
+The state of the XIVE interrupt controller can be queried through the
+monitor commands ``info pic``. The output comes in two parts.
+
+First, the state of the thread interrupt context registers is dumped
+for each CPU :
+
+::
+
+   (qemu) info pic
+   CPU[0000]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR  W2
+   CPU[0000]: USER    00   00  00    00   00  00  00   00  00000000
+   CPU[0000]:   OS    00   ff  00    00   ff  00  ff   ff  80000400
+   CPU[0000]: POOL    00   00  00    00   00  00  00   00  00000000
+   CPU[0000]: PHYS    00   00  00    00   00  00  00   ff  00000000
+   ...
+
+In the case of a ``pseries`` machine, QEMU acts as the hypervisor and only
+the O/S and USER register rings make sense. ``W2`` contains the vCPU CAM
+line which is set to the VP identifier.
+
+Then comes the routing information which aggregates the EAS and the
+END configuration:
+
+::
+
+   ...
+   LISN         PQ    EISN     CPU/PRIO EQ
+   00000000 MSI --    00000010   0/6    380/16384 @1fe3e0000 ^1 [ 80000010 ... ]
+   00000001 MSI --    00000010   1/6    305/16384 @1fc230000 ^1 [ 80000010 ... ]
+   00000002 MSI --    00000010   2/6    220/16384 @1fc2f0000 ^1 [ 80000010 ... ]
+   00000003 MSI --    00000010   3/6    201/16384 @1fc390000 ^1 [ 80000010 ... ]
+   00000004 MSI -Q  M 00000000
+   00000005 MSI -Q  M 00000000
+   00000006 MSI -Q  M 00000000
+   00000007 MSI -Q  M 00000000
+   00001000 MSI --    00000012   0/6    380/16384 @1fe3e0000 ^1 [ 80000010 ... ]
+   00001001 MSI --    00000013   0/6    380/16384 @1fe3e0000 ^1 [ 80000010 ... ]
+   00001100 MSI --    00000100   1/6    305/16384 @1fc230000 ^1 [ 80000010 ... ]
+   00001101 MSI -Q  M 00000000
+   00001200 LSI -Q  M 00000000
+   00001201 LSI -Q  M 00000000
+   00001202 LSI -Q  M 00000000
+   00001203 LSI -Q  M 00000000
+   00001300 MSI --    00000102   1/6    305/16384 @1fc230000 ^1 [ 80000010 ... ]
+   00001301 MSI --    00000103   2/6    220/16384 @1fc2f0000 ^1 [ 80000010 ... ]
+   00001302 MSI --    00000104   3/6    201/16384 @1fc390000 ^1 [ 80000010 ... ]
+
+The source information and configuration:
+
+- The ``LISN`` column outputs the interrupt number of the source in
+  range ``[ 0x0 ... 0x1FFF ]`` and its type : ``MSI`` or ``LSI``
+- The ``PQ`` column reflects the state of the PQ bits of the source :
+
+  - ``--`` source is ready to take events
+  - ``P-`` an event was sent and an EOI is PENDING
+  - ``PQ`` an event was QUEUED
+  - ``-Q`` source is OFF
+
+  a ``M`` indicates that source is *MASKED* at the EAS level,
+
+The targeting configuration :
+
+- The ``EISN`` column is the event data that will be queued in the event
+  queue of the O/S.
+- The ``CPU/PRIO`` column is the tuple defining the CPU number and
+  priority queue serving the source.
+- The ``EQ`` column outputs :
+
+  - the current index of the event queue/ the max number of entries
+  - the O/S event queue address
+  - the toggle bit
+  - the last entries that were pushed in the event queue.
diff --git a/docs/specs/ppc-xive.rst b/docs/specs/ppc-xive.rst
new file mode 100644
index 0000000000..b997dc0629
--- /dev/null
+++ b/docs/specs/ppc-xive.rst
@@ -0,0 +1,199 @@
+================================
+POWER9 XIVE interrupt controller
+================================
+
+The POWER9 processor comes with a new interrupt controller
+architecture, called XIVE as "eXternal Interrupt Virtualization
+Engine".
+
+Compared to the previous architecture, the main characteristics of
+XIVE are to support a larger number of interrupt sources and to
+deliver interrupts directly to virtual processors without hypervisor
+assistance. This removes the context switches required for the
+delivery process.
+
+
+XIVE architecture
+=================
+
+The XIVE IC is composed of three sub-engines, each taking care of a
+processing layer of external interrupts:
+
+- Interrupt Virtualization Source Engine (IVSE), or Source Controller
+  (SC). These are found in PCI PHBs, in the PSI host bridge
+  controller, but also inside the main controller for the core IPIs
+  and other sub-chips (NX, CAP, NPU) of the chip/processor. They are
+  configured to feed the IVRE with events.
+- Interrupt Virtualization Routing Engine (IVRE) or Virtualization
+  Controller (VC). It handles event coalescing and perform interrupt
+  routing by matching an event source number with an Event
+  Notification Descriptor (END).
+- Interrupt Virtualization Presentation Engine (IVPE) or Presentation
+  Controller (PC). It maintains the interrupt context state of each
+  thread and handles the delivery of the external interrupt to the
+  thread.
+
+::
+
+                XIVE Interrupt Controller
+                +------------------------------------+      IPIs
+                | +---------+ +---------+ +--------+ |    +-------+
+                | |IVRE     | |Common Q | |IVPE    |----> | CORES |
+                | |     esb | |         | |        |----> |       |
+                | |     eas | |  Bridge | |   tctx |----> |       |
+                | |SC   end | |         | |    nvt | |    |       |
+    +------+    | +---------+ +----+----+ +--------+ |    +-+-+-+-+
+    | RAM  |    +------------------|-----------------+      | | |
+    |      |                       |                        | | |
+    |      |                       |                        | | |
+    |      |  +--------------------v------------------------v-v-v--+    other
+    |      <--+                     Power Bus                      +--> chips
+    |  esb |  +---------+-----------------------+------------------+
+    |  eas |            |                       |
+    |  end |         +--|------+                |
+    |  nvt |       +----+----+ |           +----+----+
+    +------+       |IVSE     | |           |IVSE     |
+                   |         | |           |         |
+                   | PQ-bits | |           | PQ-bits |
+                   | local   |-+           |  in VC  |
+                   +---------+             +---------+
+                      PCIe                 NX,NPU,CAPI
+
+
+    PQ-bits: 2 bits source state machine (P:pending Q:queued)
+    esb: Event State Buffer (Array of PQ bits in an IVSE)
+    eas: Event Assignment Structure
+    end: Event Notification Descriptor
+    nvt: Notification Virtual Target
+    tctx: Thread interrupt Context registers
+
+
+
+XIVE internal tables
+--------------------
+
+Each of the sub-engines uses a set of tables to redirect interrupts
+from event sources to CPU threads.
+
+::
+
+                                            +-------+
+    User or O/S                             |  EQ   |
+        or                          +------>|entries|
+    Hypervisor                      |       |  ..   |
+      Memory                        |       +-------+
+                                    |           ^
+                                    |           |
+               +-------------------------------------------------+
+                                    |           |
+    Hypervisor      +------+    +---+--+    +---+--+   +------+
+      Memory        | ESB  |    | EAT  |    | ENDT |   | NVTT |
+     (skiboot)      +----+-+    +----+-+    +----+-+   +------+
+                      ^  |        ^  |        ^  |       ^
+                      |  |        |  |        |  |       |
+               +-------------------------------------------------+
+                      |  |        |  |        |  |       |
+                      |  |        |  |        |  |       |
+                 +----|--|--------|--|--------|--|-+   +-|-----+    +------+
+                 |    |  |        |  |        |  | |   | | tctx|    |Thread|
+     IPI or   ---+    +  v        +  v        +  v |---| +  .. |----->     |
+    HW events    |                                 |   |       |    |      |
+                 |             IVRE                |   | IVPE  |    +------+
+                 +---------------------------------+   +-------+
+
+
+The IVSE have a 2-bits state machine, P for pending and Q for queued,
+for each source that allows events to be triggered. They are stored in
+an Event State Buffer (ESB) array and can be controlled by MMIOs.
+
+If the event is let through, the IVRE looks up in the Event Assignment
+Structure (EAS) table for an Event Notification Descriptor (END)
+configured for the source. Each Event Notification Descriptor defines
+a notification path to a CPU and an in-memory Event Queue, in which
+will be enqueued an EQ data for the O/S to pull.
+
+The IVPE determines if a Notification Virtual Target (NVT) can handle
+the event by scanning the thread contexts of the VCPUs dispatched on
+the processor HW threads. It maintains the interrupt context state of
+each thread in a NVT table.
+
+XIVE thread interrupt context
+-----------------------------
+
+The XIVE presenter can generate four different exceptions to its
+HW threads:
+
+- hypervisor exception
+- O/S exception
+- Event-Based Branch (user level)
+- msgsnd (doorbell)
+
+Each exception has a state independent from the others called a Thread
+Interrupt Management context. This context is a set of registers which
+lets the thread handle priority management and interrupt
+acknowledgment among other things. The most important ones being :
+
+- Interrupt Priority Register  (PIPR)
+- Interrupt Pending Buffer     (IPB)
+- Current Processor Priority   (CPPR)
+- Notification Source Register (NSR)
+
+TIMA
+~~~~
+
+The Thread Interrupt Management registers are accessible through a
+specific MMIO region, called the Thread Interrupt Management Area
+(TIMA), four aligned pages, each exposing a different view of the
+registers. First page (page address ending in ``0b00``) gives access
+to the entire context and is reserved for the ring 0 view for the
+physical thread context. The second (page address ending in ``0b01``)
+is for the hypervisor, ring 1 view. The third (page address ending in
+``0b10``) is for the operating system, ring 2 view. The fourth (page
+address ending in ``0b11``) is for user level, ring 3 view.
+
+Interrupt flow from an O/S perspective
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+After an event data has been enqueued in the O/S Event Queue, the IVPE
+raises the bit corresponding to the priority of the pending interrupt
+in the register IBP (Interrupt Pending Buffer) to indicate that an
+event is pending in one of the 8 priority queues. The Pending
+Interrupt Priority Register (PIPR) is also updated using the IPB. This
+register represent the priority of the most favored pending
+notification.
+
+The PIPR is then compared to the the Current Processor Priority
+Register (CPPR). If it is more favored (numerically less than), the
+CPU interrupt line is raised and the EO bit of the Notification Source
+Register (NSR) is updated to notify the presence of an exception for
+the O/S. The O/S acknowledges the interrupt with a special load in the
+Thread Interrupt Management Area.
+
+The O/S handles the interrupt and when done, performs an EOI using a
+MMIO operation on the ESB management page of the associate source.
+
+Overview of the QEMU models for XIVE
+====================================
+
+The XiveSource models the IVSE in general, internal and external. It
+handles the source ESBs and the MMIO interface to control them.
+
+The XiveNotifier is a small helper interface interconnecting the
+XiveSource to the XiveRouter.
+
+The XiveRouter is an abstract model acting as a combined IVRE and
+IVPE. It routes event notifications using the EAS and END tables to
+the IVPE sub-engine which does a CAM scan to find a CPU to deliver the
+exception. Storage should be provided by the inheriting classes.
+
+XiveEnDSource is a special source object. It exposes the END ESB MMIOs
+of the Event Queues which are used for coalescing event notifications
+and for escalation. Not used on the field, only to sync the EQ cache
+in OPAL.
+
+Finally, the XiveTCTX contains the interrupt state context of a thread,
+four sets of registers, one for each exception that can be delivered
+to a CPU. These contexts are scanned by the IVPE to find a matching VP
+when a notification is triggered. It also models the Thread Interrupt
+Management Area (TIMA), which exposes the thread context registers to
+the CPU for interrupt management.
diff --git a/gdbstub.c b/gdbstub.c
index b129df4e59..462f89edfe 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -37,7 +37,7 @@
 #include "qemu/sockets.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/kvm.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 #include "exec/exec-all.h"
 
 #ifdef CONFIG_USER_ONLY
diff --git a/hmp.c b/hmp.c
index 56a3ed7375..be5e345c6f 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2560,7 +2560,8 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
     if (!blk) {
         BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
         if (bs) {
-            blk = local_blk = blk_new(0, BLK_PERM_ALL);
+            blk = local_blk = blk_new(bdrv_get_aio_context(bs),
+                                      0, BLK_PERM_ALL);
             ret = blk_insert_bs(blk, bs, &err);
             if (ret < 0) {
                 goto fail;
diff --git a/hw/Kconfig b/hw/Kconfig
index 88b9f15007..195f541e50 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -29,6 +29,7 @@ source pci/Kconfig
 source rdma/Kconfig
 source scsi/Kconfig
 source sd/Kconfig
+source semihosting/Kconfig
 source smbios/Kconfig
 source ssi/Kconfig
 source timer/Kconfig
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 82aa7fab8e..d770926ba9 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -36,6 +36,7 @@ devices-dirs-$(CONFIG_SOFTMMU) += watchdog/
 devices-dirs-$(CONFIG_SOFTMMU) += xen/
 devices-dirs-$(CONFIG_MEM_DEVICE) += mem/
 devices-dirs-$(CONFIG_SOFTMMU) += smbios/
+devices-dirs-y += semihosting/
 devices-dirs-y += core/
 common-obj-y += $(devices-dirs-y)
 obj-y += $(devices-dirs-y)
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index eca3beed75..7c59cf900b 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -23,6 +23,10 @@ config ACPI_NVDIMM
     bool
     depends on ACPI
 
+config ACPI_PCI
+    bool
+    depends on ACPI && PCI
+
 config ACPI_VMGENID
     bool
     default y
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 2d46e3789a..661a9b8c2f 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -11,6 +11,7 @@ common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
 common-obj-y += acpi_interface.o
 common-obj-y += bios-linker-loader.o
 common-obj-y += aml-build.o
+common-obj-$(CONFIG_ACPI_PCI) += pci.o
 common-obj-$(CONFIG_TPM) += tpm.o
 
 common-obj-$(CONFIG_IPMI) += ipmi.o
diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c
new file mode 100644
index 0000000000..9510597a19
--- /dev/null
+++ b/hw/acpi/pci.c
@@ -0,0 +1,61 @@
+/*
+ * Support for generating PCI related ACPI tables and passing them to Guests
+ *
+ * Copyright (C) 2006 Fabrice Bellard
+ * Copyright (C) 2008-2010  Kevin O'Connor <kevin@koconnor.net>
+ * Copyright (C) 2013-2019 Red Hat Inc
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author: Wei Yang <richardw.yang@linux.intel.com>
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/pci.h"
+#include "hw/pci/pcie_host.h"
+
+void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info)
+{
+    int mcfg_start = table_data->len;
+
+    /*
+     * PCI Firmware Specification, Revision 3.0
+     * 4.1.2 MCFG Table Description.
+     */
+    acpi_data_push(table_data, sizeof(AcpiTableHeader));
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 8);
+
+    /*
+     * Memory Mapped Enhanced Configuration Space Base Address Allocation
+     * Structure
+     */
+    /* Base address, processor-relative */
+    build_append_int_noprefix(table_data, info->base, 8);
+    /* PCI segment group number */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* Starting PCI Bus number */
+    build_append_int_noprefix(table_data, 0, 1);
+    /* Final PCI Bus number */
+    build_append_int_noprefix(table_data, PCIE_MMCFG_BUS(info->size - 1), 1);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 4);
+
+    build_header(linker, table_data, (void *)(table_data->data + mcfg_start),
+                 "MCFG", table_data->len - mcfg_start, 1, NULL, NULL);
+}
+
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index af8cffde9c..9aced9d54d 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -19,6 +19,7 @@ config ARM_VIRT
     select PLATFORM_BUS
     select SMBIOS
     select VIRTIO_MMIO
+    select ACPI_PCI
 
 config CHEETAH
     bool
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 415cff7a01..33070a6df8 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -160,9 +160,9 @@ static void aspeed_board_init(MachineState *machine,
     ram_addr_t max_ram_size;
 
     bmc = g_new0(AspeedBoardState, 1);
-    object_initialize(&bmc->soc, (sizeof(bmc->soc)), cfg->soc_name);
-    object_property_add_child(OBJECT(machine), "soc", OBJECT(&bmc->soc),
-                              &error_abort);
+    object_initialize_child(OBJECT(machine), "soc", &bmc->soc,
+                            (sizeof(bmc->soc)), cfg->soc_name, &error_abort,
+                            NULL);
 
     sc = ASPEED_SOC_GET_CLASS(&bmc->soc);
 
diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
index a27233d487..faff42b84a 100644
--- a/hw/arm/aspeed_soc.c
+++ b/hw/arm/aspeed_soc.c
@@ -106,12 +106,11 @@ static void aspeed_soc_init(Object *obj)
     AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
     int i;
 
-    object_initialize(&s->cpu, sizeof(s->cpu), sc->info->cpu_type);
-    object_property_add_child(obj, "cpu", OBJECT(&s->cpu), NULL);
+    object_initialize_child(obj, "cpu", OBJECT(&s->cpu), sizeof(s->cpu),
+                            sc->info->cpu_type, &error_abort, NULL);
 
-    object_initialize(&s->scu, sizeof(s->scu), TYPE_ASPEED_SCU);
-    object_property_add_child(obj, "scu", OBJECT(&s->scu), NULL);
-    qdev_set_parent_bus(DEVICE(&s->scu), sysbus_get_default());
+    sysbus_init_child_obj(obj, "scu", OBJECT(&s->scu), sizeof(s->scu),
+                          TYPE_ASPEED_SCU);
     qdev_prop_set_uint32(DEVICE(&s->scu), "silicon-rev",
                          sc->info->silicon_rev);
     object_property_add_alias(obj, "hw-strap1", OBJECT(&s->scu),
@@ -121,36 +120,29 @@ static void aspeed_soc_init(Object *obj)
     object_property_add_alias(obj, "hw-prot-key", OBJECT(&s->scu),
                               "hw-prot-key", &error_abort);
 
-    object_initialize(&s->vic, sizeof(s->vic), TYPE_ASPEED_VIC);
-    object_property_add_child(obj, "vic", OBJECT(&s->vic), NULL);
-    qdev_set_parent_bus(DEVICE(&s->vic), sysbus_get_default());
+    sysbus_init_child_obj(obj, "vic", OBJECT(&s->vic), sizeof(s->vic),
+                          TYPE_ASPEED_VIC);
 
-    object_initialize(&s->timerctrl, sizeof(s->timerctrl), TYPE_ASPEED_TIMER);
-    object_property_add_child(obj, "timerctrl", OBJECT(&s->timerctrl), NULL);
+    sysbus_init_child_obj(obj, "timerctrl", OBJECT(&s->timerctrl),
+                          sizeof(s->timerctrl), TYPE_ASPEED_TIMER);
     object_property_add_const_link(OBJECT(&s->timerctrl), "scu",
                                    OBJECT(&s->scu), &error_abort);
-    qdev_set_parent_bus(DEVICE(&s->timerctrl), sysbus_get_default());
 
-    object_initialize(&s->i2c, sizeof(s->i2c), TYPE_ASPEED_I2C);
-    object_property_add_child(obj, "i2c", OBJECT(&s->i2c), NULL);
-    qdev_set_parent_bus(DEVICE(&s->i2c), sysbus_get_default());
+    sysbus_init_child_obj(obj, "i2c", OBJECT(&s->i2c), sizeof(s->i2c),
+                          TYPE_ASPEED_I2C);
 
-    object_initialize(&s->fmc, sizeof(s->fmc), sc->info->fmc_typename);
-    object_property_add_child(obj, "fmc", OBJECT(&s->fmc), NULL);
-    qdev_set_parent_bus(DEVICE(&s->fmc), sysbus_get_default());
+    sysbus_init_child_obj(obj, "fmc", OBJECT(&s->fmc), sizeof(s->fmc),
+                          sc->info->fmc_typename);
     object_property_add_alias(obj, "num-cs", OBJECT(&s->fmc), "num-cs",
                               &error_abort);
 
     for (i = 0; i < sc->info->spis_num; i++) {
-        object_initialize(&s->spi[i], sizeof(s->spi[i]),
-                          sc->info->spi_typename[i]);
-        object_property_add_child(obj, "spi[*]", OBJECT(&s->spi[i]), NULL);
-        qdev_set_parent_bus(DEVICE(&s->spi[i]), sysbus_get_default());
+        sysbus_init_child_obj(obj, "spi[*]", OBJECT(&s->spi[i]),
+                              sizeof(s->spi[i]), sc->info->spi_typename[i]);
     }
 
-    object_initialize(&s->sdmc, sizeof(s->sdmc), TYPE_ASPEED_SDMC);
-    object_property_add_child(obj, "sdmc", OBJECT(&s->sdmc), NULL);
-    qdev_set_parent_bus(DEVICE(&s->sdmc), sysbus_get_default());
+    sysbus_init_child_obj(obj, "sdmc", OBJECT(&s->sdmc), sizeof(s->sdmc),
+                          TYPE_ASPEED_SDMC);
     qdev_prop_set_uint32(DEVICE(&s->sdmc), "silicon-rev",
                          sc->info->silicon_rev);
     object_property_add_alias(obj, "ram-size", OBJECT(&s->sdmc),
@@ -159,16 +151,14 @@ static void aspeed_soc_init(Object *obj)
                               "max-ram-size", &error_abort);
 
     for (i = 0; i < sc->info->wdts_num; i++) {
-        object_initialize(&s->wdt[i], sizeof(s->wdt[i]), TYPE_ASPEED_WDT);
-        object_property_add_child(obj, "wdt[*]", OBJECT(&s->wdt[i]), NULL);
-        qdev_set_parent_bus(DEVICE(&s->wdt[i]), sysbus_get_default());
+        sysbus_init_child_obj(obj, "wdt[*]", OBJECT(&s->wdt[i]),
+                              sizeof(s->wdt[i]), TYPE_ASPEED_WDT);
         qdev_prop_set_uint32(DEVICE(&s->wdt[i]), "silicon-rev",
                                     sc->info->silicon_rev);
     }
 
-    object_initialize(&s->ftgmac100, sizeof(s->ftgmac100), TYPE_FTGMAC100);
-    object_property_add_child(obj, "ftgmac100", OBJECT(&s->ftgmac100), NULL);
-    qdev_set_parent_bus(DEVICE(&s->ftgmac100), sysbus_get_default());
+    sysbus_init_child_obj(obj, "ftgmac100", OBJECT(&s->ftgmac100),
+                          sizeof(s->ftgmac100), TYPE_FTGMAC100);
 }
 
 static void aspeed_soc_realize(DeviceState *dev, Error **errp)
diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
index 6be7660e8c..0fb54c7964 100644
--- a/hw/arm/bcm2835_peripherals.c
+++ b/hw/arm/bcm2835_peripherals.c
@@ -41,44 +41,36 @@ static void bcm2835_peripherals_init(Object *obj)
                        MBOX_CHAN_COUNT << MBOX_AS_CHAN_SHIFT);
 
     /* Interrupt Controller */
-    object_initialize(&s->ic, sizeof(s->ic), TYPE_BCM2835_IC);
-    object_property_add_child(obj, "ic", OBJECT(&s->ic), NULL);
-    qdev_set_parent_bus(DEVICE(&s->ic), sysbus_get_default());
+    sysbus_init_child_obj(obj, "ic", &s->ic, sizeof(s->ic), TYPE_BCM2835_IC);
 
     /* UART0 */
-    s->uart0 = SYS_BUS_DEVICE(object_new("pl011"));
-    object_property_add_child(obj, "uart0", OBJECT(s->uart0), NULL);
-    qdev_set_parent_bus(DEVICE(s->uart0), sysbus_get_default());
+    sysbus_init_child_obj(obj, "uart0", &s->uart0, sizeof(s->uart0),
+                          TYPE_PL011);
 
     /* AUX / UART1 */
-    object_initialize(&s->aux, sizeof(s->aux), TYPE_BCM2835_AUX);
-    object_property_add_child(obj, "aux", OBJECT(&s->aux), NULL);
-    qdev_set_parent_bus(DEVICE(&s->aux), sysbus_get_default());
+    sysbus_init_child_obj(obj, "aux", &s->aux, sizeof(s->aux),
+                          TYPE_BCM2835_AUX);
 
     /* Mailboxes */
-    object_initialize(&s->mboxes, sizeof(s->mboxes), TYPE_BCM2835_MBOX);
-    object_property_add_child(obj, "mbox", OBJECT(&s->mboxes), NULL);
-    qdev_set_parent_bus(DEVICE(&s->mboxes), sysbus_get_default());
+    sysbus_init_child_obj(obj, "mbox", &s->mboxes, sizeof(s->mboxes),
+                          TYPE_BCM2835_MBOX);
 
     object_property_add_const_link(OBJECT(&s->mboxes), "mbox-mr",
                                    OBJECT(&s->mbox_mr), &error_abort);
 
     /* Framebuffer */
-    object_initialize(&s->fb, sizeof(s->fb), TYPE_BCM2835_FB);
-    object_property_add_child(obj, "fb", OBJECT(&s->fb), NULL);
+    sysbus_init_child_obj(obj, "fb", &s->fb, sizeof(s->fb), TYPE_BCM2835_FB);
     object_property_add_alias(obj, "vcram-size", OBJECT(&s->fb), "vcram-size",
                               &error_abort);
-    qdev_set_parent_bus(DEVICE(&s->fb), sysbus_get_default());
 
     object_property_add_const_link(OBJECT(&s->fb), "dma-mr",
                                    OBJECT(&s->gpu_bus_mr), &error_abort);
 
     /* Property channel */
-    object_initialize(&s->property, sizeof(s->property), TYPE_BCM2835_PROPERTY);
-    object_property_add_child(obj, "property", OBJECT(&s->property), NULL);
+    sysbus_init_child_obj(obj, "property", &s->property, sizeof(s->property),
+                          TYPE_BCM2835_PROPERTY);
     object_property_add_alias(obj, "board-rev", OBJECT(&s->property),
                               "board-rev", &error_abort);
-    qdev_set_parent_bus(DEVICE(&s->property), sysbus_get_default());
 
     object_property_add_const_link(OBJECT(&s->property), "fb",
                                    OBJECT(&s->fb), &error_abort);
@@ -86,32 +78,27 @@ static void bcm2835_peripherals_init(Object *obj)
                                    OBJECT(&s->gpu_bus_mr), &error_abort);
 
     /* Random Number Generator */
-    object_initialize(&s->rng, sizeof(s->rng), TYPE_BCM2835_RNG);
-    object_property_add_child(obj, "rng", OBJECT(&s->rng), NULL);
-    qdev_set_parent_bus(DEVICE(&s->rng), sysbus_get_default());
+    sysbus_init_child_obj(obj, "rng", &s->rng, sizeof(s->rng),
+                          TYPE_BCM2835_RNG);
 
     /* Extended Mass Media Controller */
-    object_initialize(&s->sdhci, sizeof(s->sdhci), TYPE_SYSBUS_SDHCI);
-    object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL);
-    qdev_set_parent_bus(DEVICE(&s->sdhci), sysbus_get_default());
+    sysbus_init_child_obj(obj, "sdhci", &s->sdhci, sizeof(s->sdhci),
+                          TYPE_SYSBUS_SDHCI);
 
     /* SDHOST */
-    object_initialize(&s->sdhost, sizeof(s->sdhost), TYPE_BCM2835_SDHOST);
-    object_property_add_child(obj, "sdhost", OBJECT(&s->sdhost), NULL);
-    qdev_set_parent_bus(DEVICE(&s->sdhost), sysbus_get_default());
+    sysbus_init_child_obj(obj, "sdhost", &s->sdhost, sizeof(s->sdhost),
+                          TYPE_BCM2835_SDHOST);
 
     /* DMA Channels */
-    object_initialize(&s->dma, sizeof(s->dma), TYPE_BCM2835_DMA);
-    object_property_add_child(obj, "dma", OBJECT(&s->dma), NULL);
-    qdev_set_parent_bus(DEVICE(&s->dma), sysbus_get_default());
+    sysbus_init_child_obj(obj, "dma", &s->dma, sizeof(s->dma),
+                          TYPE_BCM2835_DMA);
 
     object_property_add_const_link(OBJECT(&s->dma), "dma-mr",
                                    OBJECT(&s->gpu_bus_mr), &error_abort);
 
     /* GPIO */
-    object_initialize(&s->gpio, sizeof(s->gpio), TYPE_BCM2835_GPIO);
-    object_property_add_child(obj, "gpio", OBJECT(&s->gpio), NULL);
-    qdev_set_parent_bus(DEVICE(&s->gpio), sysbus_get_default());
+    sysbus_init_child_obj(obj, "gpio", &s->gpio, sizeof(s->gpio),
+                          TYPE_BCM2835_GPIO);
 
     object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci",
                                    OBJECT(&s->sdhci.sdbus), &error_abort);
@@ -166,16 +153,16 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
     sysbus_pass_irq(SYS_BUS_DEVICE(s), SYS_BUS_DEVICE(&s->ic));
 
     /* UART0 */
-    qdev_prop_set_chr(DEVICE(s->uart0), "chardev", serial_hd(0));
-    object_property_set_bool(OBJECT(s->uart0), true, "realized", &err);
+    qdev_prop_set_chr(DEVICE(&s->uart0), "chardev", serial_hd(0));
+    object_property_set_bool(OBJECT(&s->uart0), true, "realized", &err);
     if (err) {
         error_propagate(errp, err);
         return;
     }
 
     memory_region_add_subregion(&s->peri_mr, UART0_OFFSET,
-                                sysbus_mmio_get_region(s->uart0, 0));
-    sysbus_connect_irq(s->uart0, 0,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->uart0), 0));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart0), 0,
         qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
                                INTERRUPT_UART));
     /* AUX / UART1 */
diff --git a/hw/arm/digic.c b/hw/arm/digic.c
index 726abb9b48..6ef26c6bac 100644
--- a/hw/arm/digic.c
+++ b/hw/arm/digic.c
@@ -32,27 +32,22 @@
 static void digic_init(Object *obj)
 {
     DigicState *s = DIGIC(obj);
-    DeviceState *dev;
     int i;
 
-    object_initialize(&s->cpu, sizeof(s->cpu), "arm946-" TYPE_ARM_CPU);
-    object_property_add_child(obj, "cpu", OBJECT(&s->cpu), NULL);
+    object_initialize_child(obj, "cpu", &s->cpu, sizeof(s->cpu),
+                            "arm946-" TYPE_ARM_CPU, &error_abort, NULL);
 
     for (i = 0; i < DIGIC4_NB_TIMERS; i++) {
 #define DIGIC_TIMER_NAME_MLEN    11
         char name[DIGIC_TIMER_NAME_MLEN];
 
-        object_initialize(&s->timer[i], sizeof(s->timer[i]), TYPE_DIGIC_TIMER);
-        dev = DEVICE(&s->timer[i]);
-        qdev_set_parent_bus(dev, sysbus_get_default());
         snprintf(name, DIGIC_TIMER_NAME_MLEN, "timer[%d]", i);
-        object_property_add_child(obj, name, OBJECT(&s->timer[i]), NULL);
+        sysbus_init_child_obj(obj, name, &s->timer[i], sizeof(s->timer[i]),
+                              TYPE_DIGIC_TIMER);
     }
 
-    object_initialize(&s->uart, sizeof(s->uart), TYPE_DIGIC_UART);
-    dev = DEVICE(&s->uart);
-    qdev_set_parent_bus(dev, sysbus_get_default());
-    object_property_add_child(obj, "uart", OBJECT(&s->uart), NULL);
+    sysbus_init_child_obj(obj, "uart", &s->uart, sizeof(s->uart),
+                          TYPE_DIGIC_UART);
 }
 
 static void digic_realize(DeviceState *dev, Error **errp)
diff --git a/hw/arm/imx25_pdk.c b/hw/arm/imx25_pdk.c
index 9f3ee14739..a0423ffb67 100644
--- a/hw/arm/imx25_pdk.c
+++ b/hw/arm/imx25_pdk.c
@@ -72,9 +72,8 @@ static void imx25_pdk_init(MachineState *machine)
     unsigned int alias_offset;
     int i;
 
-    object_initialize(&s->soc, sizeof(s->soc), TYPE_FSL_IMX25);
-    object_property_add_child(OBJECT(machine), "soc", OBJECT(&s->soc),
-                              &error_abort);
+    object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc),
+                            TYPE_FSL_IMX25, &error_abort, NULL);
 
     object_property_set_bool(OBJECT(&s->soc), true, "realized", &error_fatal);
 
@@ -132,15 +131,6 @@ static void imx25_pdk_init(MachineState *machine)
      */
     if (!qtest_enabled()) {
         arm_load_kernel(&s->soc.cpu, &imx25_pdk_binfo);
-    } else {
-        /*
-         * This I2C device doesn't exist on the real board.
-         * We add it here (only on qtest usage) to be able to do a bit
-         * of simple qtest. See "make check" for details.
-         */
-        i2c_create_slave((I2CBus *)qdev_get_child_bus(DEVICE(&s->soc.i2c[0]),
-                                                      "i2c-bus.0"),
-                         "ds1338", 0x68);
     }
 }
 
diff --git a/hw/arm/kzm.c b/hw/arm/kzm.c
index 139934c4ec..44cba8782b 100644
--- a/hw/arm/kzm.c
+++ b/hw/arm/kzm.c
@@ -71,9 +71,8 @@ static void kzm_init(MachineState *machine)
     unsigned int alias_offset;
     unsigned int i;
 
-    object_initialize(&s->soc, sizeof(s->soc), TYPE_FSL_IMX31);
-    object_property_add_child(OBJECT(machine), "soc", OBJECT(&s->soc),
-                              &error_abort);
+    object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc),
+                            TYPE_FSL_IMX31, &error_abort, NULL);
 
     object_property_set_bool(OBJECT(&s->soc), true, "realized", &error_fatal);
 
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
index c167a5fa59..d85dc2c4bd 100644
--- a/hw/arm/mps2-tz.c
+++ b/hw/arm/mps2-tz.c
@@ -214,9 +214,9 @@ static MemoryRegion *make_scc(MPS2TZMachineState *mms, void *opaque,
     DeviceState *sccdev;
     MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms);
 
-    object_initialize(scc, sizeof(mms->scc), TYPE_MPS2_SCC);
+    sysbus_init_child_obj(OBJECT(mms), "scc", scc,
+                          sizeof(mms->scc), TYPE_MPS2_SCC);
     sccdev = DEVICE(scc);
-    qdev_set_parent_bus(sccdev, sysbus_get_default());
     qdev_prop_set_uint32(sccdev, "scc-cfg4", 0x2);
     qdev_prop_set_uint32(sccdev, "scc-aid", 0x00200008);
     qdev_prop_set_uint32(sccdev, "scc-id", mmc->scc_id);
@@ -229,8 +229,8 @@ static MemoryRegion *make_fpgaio(MPS2TZMachineState *mms, void *opaque,
 {
     MPS2FPGAIO *fpgaio = opaque;
 
-    object_initialize(fpgaio, sizeof(mms->fpgaio), TYPE_MPS2_FPGAIO);
-    qdev_set_parent_bus(DEVICE(fpgaio), sysbus_get_default());
+    sysbus_init_child_obj(OBJECT(mms), "fpgaio", fpgaio,
+                          sizeof(mms->fpgaio), TYPE_MPS2_FPGAIO);
     object_property_set_bool(OBJECT(fpgaio), true, "realized", &error_fatal);
     return sysbus_mmio_get_region(SYS_BUS_DEVICE(fpgaio), 0);
 }
diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c
index b74f1378c9..10efff36b2 100644
--- a/hw/arm/mps2.c
+++ b/hw/arm/mps2.c
@@ -174,9 +174,9 @@ static void mps2_common_init(MachineState *machine)
         g_assert_not_reached();
     }
 
-    object_initialize(&mms->armv7m, sizeof(mms->armv7m), TYPE_ARMV7M);
+    sysbus_init_child_obj(OBJECT(mms), "armv7m", &mms->armv7m,
+                          sizeof(mms->armv7m), TYPE_ARMV7M);
     armv7m = DEVICE(&mms->armv7m);
-    qdev_set_parent_bus(armv7m, sysbus_get_default());
     switch (mmc->fpga_type) {
     case FPGA_AN385:
         qdev_prop_set_uint32(armv7m, "num-irq", 32);
@@ -308,9 +308,9 @@ static void mps2_common_init(MachineState *machine)
                        qdev_get_gpio_in(armv7m, 10));
     sysbus_mmio_map(SYS_BUS_DEVICE(&mms->dualtimer), 0, 0x40002000);
 
-    object_initialize(&mms->scc, sizeof(mms->scc), TYPE_MPS2_SCC);
+    sysbus_init_child_obj(OBJECT(mms), "scc", &mms->scc,
+                          sizeof(mms->scc), TYPE_MPS2_SCC);
     sccdev = DEVICE(&mms->scc);
-    qdev_set_parent_bus(sccdev, sysbus_get_default());
     qdev_prop_set_uint32(sccdev, "scc-cfg4", 0x2);
     qdev_prop_set_uint32(sccdev, "scc-aid", 0x00200008);
     qdev_prop_set_uint32(sccdev, "scc-id", mmc->scc_id);
diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c
index 2b5fe10e2f..8c249fcabb 100644
--- a/hw/arm/raspi.c
+++ b/hw/arm/raspi.c
@@ -182,10 +182,9 @@ static void raspi_init(MachineState *machine, int version)
         exit(1);
     }
 
-    object_initialize(&s->soc, sizeof(s->soc),
-                      version == 3 ? TYPE_BCM2837 : TYPE_BCM2836);
-    object_property_add_child(OBJECT(machine), "soc", OBJECT(&s->soc),
-                              &error_abort);
+    object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc),
+                            version == 3 ? TYPE_BCM2837 : TYPE_BCM2836,
+                            &error_abort, NULL);
 
     /* Allocate and map RAM */
     memory_region_allocate_system_memory(&s->ram, OBJECT(machine), "ram",
diff --git a/hw/arm/sabrelite.c b/hw/arm/sabrelite.c
index ee140e5d9e..f1b00de229 100644
--- a/hw/arm/sabrelite.c
+++ b/hw/arm/sabrelite.c
@@ -55,9 +55,8 @@ static void sabrelite_init(MachineState *machine)
         exit(1);
     }
 
-    object_initialize(&s->soc, sizeof(s->soc), TYPE_FSL_IMX6);
-    object_property_add_child(OBJECT(machine), "soc", OBJECT(&s->soc),
-                              &error_abort);
+    object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc),
+                            TYPE_FSL_IMX6, &error_abort, NULL);
 
     object_property_set_bool(OBJECT(&s->soc), true, "realized", &err);
     if (err != NULL) {
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index e7c96d658e..4a64f9985c 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -546,23 +546,6 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
                  "SRAT", table_data->len - srat_start, 3, NULL, NULL);
 }
 
-static void
-build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info)
-{
-    AcpiTableMcfg *mcfg;
-    int len = sizeof(*mcfg) + sizeof(mcfg->allocation[0]);
-
-    mcfg = acpi_data_push(table_data, len);
-    mcfg->allocation[0].address = cpu_to_le64(info->base);
-
-    /* Only a single allocation so no need to play with segments */
-    mcfg->allocation[0].pci_segment = cpu_to_le16(0);
-    mcfg->allocation[0].start_bus_number = 0;
-    mcfg->allocation[0].end_bus_number = PCIE_MMCFG_BUS(info->size - 1);
-
-    build_header(linker, table_data, (void *)mcfg, "MCFG", len, 1, NULL, NULL);
-}
-
 /* GTDT */
 static void
 build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
diff --git a/hw/arm/xlnx-zcu102.c b/hw/arm/xlnx-zcu102.c
index b6bc6a93b8..c802f26fbd 100644
--- a/hw/arm/xlnx-zcu102.c
+++ b/hw/arm/xlnx-zcu102.c
@@ -91,9 +91,8 @@ static void xlnx_zcu102_init(MachineState *machine)
     memory_region_allocate_system_memory(&s->ddr_ram, NULL, "ddr-ram",
                                          ram_size);
 
-    object_initialize(&s->soc, sizeof(s->soc), TYPE_XLNX_ZYNQMP);
-    object_property_add_child(OBJECT(machine), "soc", OBJECT(&s->soc),
-                              &error_abort);
+    object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc),
+                            TYPE_XLNX_ZYNQMP, &error_abort, NULL);
 
     object_property_set_link(OBJECT(&s->soc), OBJECT(&s->ddr_ram),
                          "ddr-ram", &error_abort);
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 4f8bc41d9d..6e99190302 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -191,10 +191,10 @@ static void xlnx_zynqmp_create_rpu(XlnxZynqMPState *s, const char *boot_cpu,
     for (i = 0; i < num_rpus; i++) {
         char *name;
 
-        object_initialize(&s->rpu_cpu[i], sizeof(s->rpu_cpu[i]),
-                          "cortex-r5f-" TYPE_ARM_CPU);
-        object_property_add_child(OBJECT(&s->rpu_cluster), "rpu-cpu[*]",
-                                  OBJECT(&s->rpu_cpu[i]), &error_abort);
+        object_initialize_child(OBJECT(&s->rpu_cluster), "rpu-cpu[*]",
+                                &s->rpu_cpu[i], sizeof(s->rpu_cpu[i]),
+                                "cortex-r5f-" TYPE_ARM_CPU, &error_abort,
+                                NULL);
 
         name = object_get_canonical_path_component(OBJECT(&s->rpu_cpu[i]));
         if (strcmp(name, boot_cpu)) {
diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c
index 2265622d44..a4e8d99e77 100644
--- a/hw/audio/ac97.c
+++ b/hw/audio/ac97.c
@@ -1388,7 +1388,7 @@ static void ac97_realize(PCIDevice *dev, Error **errp)
     pci_register_bar (&s->dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->io_nam);
     pci_register_bar (&s->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &s->io_nabm);
     AUD_register_card ("ac97", &s->card);
-    ac97_on_reset (&s->dev.qdev);
+    ac97_on_reset(DEVICE(s));
 }
 
 static void ac97_exit(PCIDevice *dev)
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 8c37bd314a..158c78f852 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -173,6 +173,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
     unsigned i;
     unsigned nvqs = s->conf->num_queues;
+    Error *local_err = NULL;
     int r;
 
     if (vblk->dataplane_started || s->starting) {
@@ -212,7 +213,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
     vblk->dataplane_started = true;
     trace_virtio_blk_data_plane_start(s);
 
-    blk_set_aio_context(s->conf->conf.blk, s->ctx);
+    r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err);
+    if (r < 0) {
+        error_report_err(local_err);
+        goto fail_guest_notifiers;
+    }
 
     /* Kick right away to begin processing requests already in vring */
     for (i = 0; i < nvqs; i++) {
@@ -281,8 +286,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
     aio_context_acquire(s->ctx);
     aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
 
-    /* Drain and switch bs back to the QEMU main loop */
-    blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
+    /* Drain and try to switch bs back to the QEMU main loop. If other users
+     * keep the BlockBackend in the iothread, that's ok */
+    blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL);
 
     aio_context_release(s->ctx);
 
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index bb8f1186e4..f7ad452bbd 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -682,7 +682,8 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
     }
 
     aio_context_acquire(dataplane->ctx);
-    blk_set_aio_context(dataplane->blk, qemu_get_aio_context());
+    /* Xen doesn't have multiple users for nodes, so this can't fail */
+    blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort);
     aio_context_release(dataplane->ctx);
 
     xendev = dataplane->xendev;
@@ -811,7 +812,8 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
     }
 
     aio_context_acquire(dataplane->ctx);
-    blk_set_aio_context(dataplane->blk, dataplane->ctx);
+    /* If other users keep the BlockBackend in the iothread, that's ok */
+    blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL);
     aio_context_release(dataplane->ctx);
     return;
 
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 6f19f127a5..37ccedc9f7 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -538,7 +538,7 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp)
 
     if (!dev->conf.blk) {
         /* Anonymous BlockBackend for an empty drive */
-        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
+        dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
         ret = blk_attach_dev(dev->conf.blk, qdev);
         assert(ret == 0);
     }
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 63a5b58849..30e50f7a38 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -219,6 +219,30 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
     return NVME_INVALID_FIELD | NVME_DNR;
 }
 
+static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                                   uint64_t prp1, uint64_t prp2)
+{
+    QEMUSGList qsg;
+    QEMUIOVector iov;
+    uint16_t status = NVME_SUCCESS;
+
+    if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+    if (qsg.nsg > 0) {
+        if (dma_buf_write(ptr, len, &qsg)) {
+            status = NVME_INVALID_FIELD | NVME_DNR;
+        }
+        qemu_sglist_destroy(&qsg);
+    } else {
+        if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
+            status = NVME_INVALID_FIELD | NVME_DNR;
+        }
+        qemu_iovec_destroy(&iov);
+    }
+    return status;
+}
+
 static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
     uint64_t prp1, uint64_t prp2)
 {
@@ -678,7 +702,6 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
     return ret;
 }
 
-
 static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
 {
     NvmeIdentify *c = (NvmeIdentify *)cmd;
@@ -696,6 +719,57 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
     }
 }
 
+static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
+{
+    trace_nvme_setfeat_timestamp(ts);
+
+    n->host_timestamp = le64_to_cpu(ts);
+    n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+}
+
+static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
+{
+    uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+    uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms;
+
+    union nvme_timestamp {
+        struct {
+            uint64_t timestamp:48;
+            uint64_t sync:1;
+            uint64_t origin:3;
+            uint64_t rsvd1:12;
+        };
+        uint64_t all;
+    };
+
+    union nvme_timestamp ts;
+    ts.all = 0;
+
+    /*
+     * If the sum of the Timestamp value set by the host and the elapsed
+     * time exceeds 2^48, the value returned should be reduced modulo 2^48.
+     */
+    ts.timestamp = (n->host_timestamp + elapsed_time) & 0xffffffffffff;
+
+    /* If the host timestamp is non-zero, set the timestamp origin */
+    ts.origin = n->host_timestamp ? 0x01 : 0x00;
+
+    trace_nvme_getfeat_timestamp(ts.all);
+
+    return cpu_to_le64(ts.all);
+}
+
+static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
+{
+    uint64_t prp1 = le64_to_cpu(cmd->prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+    uint64_t timestamp = nvme_get_timestamp(n);
+
+    return nvme_dma_read_prp(n, (uint8_t *)&timestamp,
+                                 sizeof(timestamp), prp1, prp2);
+}
+
 static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
@@ -710,6 +784,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
         trace_nvme_getfeat_numq(result);
         break;
+    case NVME_TIMESTAMP:
+        return nvme_get_feature_timestamp(n, cmd);
+        break;
     default:
         trace_nvme_err_invalid_getfeat(dw10);
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -719,6 +796,24 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     return NVME_SUCCESS;
 }
 
+static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
+{
+    uint16_t ret;
+    uint64_t timestamp;
+    uint64_t prp1 = le64_to_cpu(cmd->prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+    ret = nvme_dma_write_prp(n, (uint8_t *)&timestamp,
+                                sizeof(timestamp), prp1, prp2);
+    if (ret != NVME_SUCCESS) {
+        return ret;
+    }
+
+    nvme_set_timestamp(n, timestamp);
+
+    return NVME_SUCCESS;
+}
+
 static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
@@ -735,6 +830,11 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         req->cqe.result =
             cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
         break;
+
+    case NVME_TIMESTAMP:
+        return nvme_set_feature_timestamp(n, cmd);
+        break;
+
     default:
         trace_nvme_err_invalid_setfeat(dw10);
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -907,6 +1007,8 @@ static int nvme_start_ctrl(NvmeCtrl *n)
     nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
         NVME_AQA_ASQS(n->bar.aqa) + 1);
 
+    nvme_set_timestamp(n, 0ULL);
+
     return 0;
 }
 
@@ -1270,7 +1372,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
     id->sqes = (0x6 << 4) | 0x6;
     id->cqes = (0x4 << 4) | 0x4;
     id->nn = cpu_to_le32(n->num_namespaces);
-    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS);
+    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP);
     id->psd[0].mp = cpu_to_le16(0x9c4);
     id->psd[0].enlat = cpu_to_le32(0x10);
     id->psd[0].exlat = cpu_to_le32(0x4);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 56c9d4b4b1..557194ee19 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -79,6 +79,8 @@ typedef struct NvmeCtrl {
     uint32_t    cmbloc;
     uint8_t     *cmbuf;
     uint64_t    irq_status;
+    uint64_t    host_timestamp;                 /* Timestamp sent by the host */
+    uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
 
     char            *serial;
     NvmeNamespace   *namespaces;
diff --git a/hw/block/trace-events b/hw/block/trace-events
index b92039a573..97a17838ed 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -46,6 +46,8 @@ nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
 nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
 nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
 nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
+nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64""
+nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64""
 nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index ef635be4c2..31b0f5ccc8 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -609,7 +609,7 @@ static void xen_cdrom_realize(XenBlockDevice *blockdev, Error **errp)
         int rc;
 
         /* Set up an empty drive */
-        conf->blk = blk_new(0, BLK_PERM_ALL);
+        conf->blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
 
         rc = blk_attach_dev(conf->blk, DEVICE(blockdev));
         if (!rc) {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 16ba667434..f1a0f45f9c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -24,6 +24,9 @@
 #include "hw/pci/pci.h"
 #include "hw/mem/nvdimm.h"
 
+GlobalProperty hw_compat_4_0_1[] = {};
+const size_t hw_compat_4_0_1_len = G_N_ELEMENTS(hw_compat_4_0_1);
+
 GlobalProperty hw_compat_4_0[] = {};
 const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0);
 
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index b45a7ef54b..ba412dd2ca 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -69,8 +69,8 @@ static void set_pointer(Object *obj, Visitor *v, Property *prop,
 
 /* --- drive --- */
 
-static void parse_drive(DeviceState *dev, const char *str, void **ptr,
-                        const char *propname, Error **errp)
+static void do_parse_drive(DeviceState *dev, const char *str, void **ptr,
+                           const char *propname, bool iothread, Error **errp)
 {
     BlockBackend *blk;
     bool blk_created = false;
@@ -80,7 +80,16 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr,
     if (!blk) {
         BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
         if (bs) {
-            blk = blk_new(0, BLK_PERM_ALL);
+            /*
+             * If the device supports iothreads, it will make sure to move the
+             * block node to the right AioContext if necessary (or fail if this
+             * isn't possible because of other users). Devices that are not
+             * aware of iothreads require their BlockBackends to be in the main
+             * AioContext.
+             */
+            AioContext *ctx = iothread ? bdrv_get_aio_context(bs) :
+                                         qemu_get_aio_context();
+            blk = blk_new(ctx, 0, BLK_PERM_ALL);
             blk_created = true;
 
             ret = blk_insert_bs(blk, bs, errp);
@@ -118,6 +127,18 @@ fail:
     }
 }
 
+static void parse_drive(DeviceState *dev, const char *str, void **ptr,
+                        const char *propname, Error **errp)
+{
+    do_parse_drive(dev, str, ptr, propname, false, errp);
+}
+
+static void parse_drive_iothread(DeviceState *dev, const char *str, void **ptr,
+                                 const char *propname, Error **errp)
+{
+    do_parse_drive(dev, str, ptr, propname, true, errp);
+}
+
 static void release_drive(Object *obj, const char *name, void *opaque)
 {
     DeviceState *dev = DEVICE(obj);
@@ -160,6 +181,12 @@ static void set_drive(Object *obj, Visitor *v, const char *name, void *opaque,
     set_pointer(obj, v, opaque, parse_drive, name, errp);
 }
 
+static void set_drive_iothread(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    set_pointer(obj, v, opaque, parse_drive_iothread, name, errp);
+}
+
 const PropertyInfo qdev_prop_drive = {
     .name  = "str",
     .description = "Node name or ID of a block device to use as a backend",
@@ -168,6 +195,14 @@ const PropertyInfo qdev_prop_drive = {
     .release = release_drive,
 };
 
+const PropertyInfo qdev_prop_drive_iothread = {
+    .name  = "str",
+    .description = "Node name or ID of a block device to use as a backend",
+    .get   = get_drive,
+    .set   = set_drive_iothread,
+    .release = release_drive,
+};
+
 /* --- character device --- */
 
 static void get_chr(Object *obj, Visitor *v, const char *name, void *opaque,
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 307cf90a51..689a867a22 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -153,6 +153,16 @@ static void sysbus_mmio_map_common(SysBusDevice *dev, int n, hwaddr addr,
     }
 }
 
+void sysbus_mmio_unmap(SysBusDevice *dev, int n)
+{
+    assert(n >= 0 && n < dev->num_mmio);
+
+    if (dev->mmio[n].addr != (hwaddr)-1) {
+        memory_region_del_subregion(get_system_memory(), dev->mmio[n].memory);
+        dev->mmio[n].addr = (hwaddr)-1;
+    }
+}
+
 void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
 {
     sysbus_mmio_map_common(dev, n, addr, false, 0);
diff --git a/hw/display/Kconfig b/hw/display/Kconfig
index dc1f113df2..910dccb2f7 100644
--- a/hw/display/Kconfig
+++ b/hw/display/Kconfig
@@ -111,6 +111,16 @@ config VIRTIO_VGA
     depends on VIRTIO_PCI
     select VGA
 
+config VHOST_USER_GPU
+    bool
+    default y
+    depends on VIRTIO_GPU && VHOST_USER
+
+config VHOST_USER_VGA
+    bool
+    default y
+    depends on VIRTIO_VGA && VHOST_USER_GPU
+
 config DPCD
     bool
     select AUX
diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs
index 650031f725..a64998fc7b 100644
--- a/hw/display/Makefile.objs
+++ b/hw/display/Makefile.objs
@@ -43,9 +43,12 @@ obj-$(CONFIG_VGA) += vga.o
 
 common-obj-$(CONFIG_QXL) += qxl.o qxl-logger.o qxl-render.o
 
-obj-$(CONFIG_VIRTIO_GPU) += virtio-gpu.o virtio-gpu-3d.o
+obj-$(CONFIG_VIRTIO_GPU) += virtio-gpu-base.o virtio-gpu.o virtio-gpu-3d.o
+obj-$(CONFIG_VHOST_USER_GPU) += vhost-user-gpu.o
 obj-$(call land,$(CONFIG_VIRTIO_GPU),$(CONFIG_VIRTIO_PCI)) += virtio-gpu-pci.o
+obj-$(call land,$(CONFIG_VHOST_USER_GPU),$(CONFIG_VIRTIO_PCI)) += vhost-user-gpu-pci.o
 obj-$(CONFIG_VIRTIO_VGA) += virtio-vga.o
+obj-$(CONFIG_VHOST_USER_VGA) += vhost-user-vga.o
 virtio-gpu.o-cflags := $(VIRGL_CFLAGS)
 virtio-gpu.o-libs += $(VIRGL_LIBS)
 virtio-gpu-3d.o-cflags := $(VIRGL_CFLAGS)
diff --git a/hw/display/vhost-user-gpu-pci.c b/hw/display/vhost-user-gpu-pci.c
new file mode 100644
index 0000000000..7d9b1f5a8c
--- /dev/null
+++ b/hw/display/vhost-user-gpu-pci.c
@@ -0,0 +1,51 @@
+/*
+ * vhost-user GPU PCI device
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/virtio/virtio-gpu-pci.h"
+
+#define TYPE_VHOST_USER_GPU_PCI "vhost-user-gpu-pci"
+#define VHOST_USER_GPU_PCI(obj)                                     \
+    OBJECT_CHECK(VhostUserGPUPCI, (obj), TYPE_VHOST_USER_GPU_PCI)
+
+typedef struct VhostUserGPUPCI {
+    VirtIOGPUPCIBase parent_obj;
+
+    VhostUserGPU vdev;
+} VhostUserGPUPCI;
+
+static void vhost_user_gpu_pci_initfn(Object *obj)
+{
+    VhostUserGPUPCI *dev = VHOST_USER_GPU_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_GPU);
+
+    VIRTIO_GPU_PCI_BASE(obj)->vgpu = VIRTIO_GPU_BASE(&dev->vdev);
+
+    object_property_add_alias(obj, "chardev",
+                              OBJECT(&dev->vdev), "chardev",
+                              &error_abort);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_gpu_pci_info = {
+    .generic_name = TYPE_VHOST_USER_GPU_PCI,
+    .parent = TYPE_VIRTIO_GPU_PCI_BASE,
+    .instance_size = sizeof(VhostUserGPUPCI),
+    .instance_init = vhost_user_gpu_pci_initfn,
+};
+
+static void vhost_user_gpu_pci_register_types(void)
+{
+    virtio_pci_types_register(&vhost_user_gpu_pci_info);
+}
+
+type_init(vhost_user_gpu_pci_register_types)
diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
new file mode 100644
index 0000000000..7181d9cdba
--- /dev/null
+++ b/hw/display/vhost-user-gpu.c
@@ -0,0 +1,607 @@
+/*
+ * vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * Authors:
+ *     Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/virtio/virtio-gpu.h"
+#include "chardev/char-fe.h"
+#include "qapi/error.h"
+#include "migration/blocker.h"
+
+#define VHOST_USER_GPU(obj)                                    \
+    OBJECT_CHECK(VhostUserGPU, (obj), TYPE_VHOST_USER_GPU)
+
+typedef enum VhostUserGpuRequest {
+    VHOST_USER_GPU_NONE = 0,
+    VHOST_USER_GPU_GET_PROTOCOL_FEATURES,
+    VHOST_USER_GPU_SET_PROTOCOL_FEATURES,
+    VHOST_USER_GPU_GET_DISPLAY_INFO,
+    VHOST_USER_GPU_CURSOR_POS,
+    VHOST_USER_GPU_CURSOR_POS_HIDE,
+    VHOST_USER_GPU_CURSOR_UPDATE,
+    VHOST_USER_GPU_SCANOUT,
+    VHOST_USER_GPU_UPDATE,
+    VHOST_USER_GPU_DMABUF_SCANOUT,
+    VHOST_USER_GPU_DMABUF_UPDATE,
+} VhostUserGpuRequest;
+
+typedef struct VhostUserGpuDisplayInfoReply {
+    struct virtio_gpu_resp_display_info info;
+} VhostUserGpuDisplayInfoReply;
+
+typedef struct VhostUserGpuCursorPos {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+} QEMU_PACKED VhostUserGpuCursorPos;
+
+typedef struct VhostUserGpuCursorUpdate {
+    VhostUserGpuCursorPos pos;
+    uint32_t hot_x;
+    uint32_t hot_y;
+    uint32_t data[64 * 64];
+} QEMU_PACKED VhostUserGpuCursorUpdate;
+
+typedef struct VhostUserGpuScanout {
+    uint32_t scanout_id;
+    uint32_t width;
+    uint32_t height;
+} QEMU_PACKED VhostUserGpuScanout;
+
+typedef struct VhostUserGpuUpdate {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+    uint32_t width;
+    uint32_t height;
+    uint8_t data[];
+} QEMU_PACKED VhostUserGpuUpdate;
+
+typedef struct VhostUserGpuDMABUFScanout {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+    uint32_t width;
+    uint32_t height;
+    uint32_t fd_width;
+    uint32_t fd_height;
+    uint32_t fd_stride;
+    uint32_t fd_flags;
+    int fd_drm_fourcc;
+} QEMU_PACKED VhostUserGpuDMABUFScanout;
+
+typedef struct VhostUserGpuMsg {
+    uint32_t request; /* VhostUserGpuRequest */
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+    union {
+        VhostUserGpuCursorPos cursor_pos;
+        VhostUserGpuCursorUpdate cursor_update;
+        VhostUserGpuScanout scanout;
+        VhostUserGpuUpdate update;
+        VhostUserGpuDMABUFScanout dmabuf_scanout;
+        struct virtio_gpu_resp_display_info display_info;
+        uint64_t u64;
+    } payload;
+} QEMU_PACKED VhostUserGpuMsg;
+
+static VhostUserGpuMsg m __attribute__ ((unused));
+#define VHOST_USER_GPU_HDR_SIZE \
+    (sizeof(m.request) + sizeof(m.size) + sizeof(m.flags))
+
+#define VHOST_USER_GPU_MSG_FLAG_REPLY 0x4
+
+static void vhost_user_gpu_update_blocked(VhostUserGPU *g, bool blocked);
+
+static void
+vhost_user_gpu_handle_cursor(VhostUserGPU *g, VhostUserGpuMsg *msg)
+{
+    VhostUserGpuCursorPos *pos = &msg->payload.cursor_pos;
+    struct virtio_gpu_scanout *s;
+
+    if (pos->scanout_id >= g->parent_obj.conf.max_outputs) {
+        return;
+    }
+    s = &g->parent_obj.scanout[pos->scanout_id];
+
+    if (msg->request == VHOST_USER_GPU_CURSOR_UPDATE) {
+        VhostUserGpuCursorUpdate *up = &msg->payload.cursor_update;
+        if (!s->current_cursor) {
+            s->current_cursor = cursor_alloc(64, 64);
+        }
+
+        s->current_cursor->hot_x = up->hot_x;
+        s->current_cursor->hot_y = up->hot_y;
+
+        memcpy(s->current_cursor->data, up->data,
+               64 * 64 * sizeof(uint32_t));
+
+        dpy_cursor_define(s->con, s->current_cursor);
+    }
+
+    dpy_mouse_set(s->con, pos->x, pos->y,
+                  msg->request != VHOST_USER_GPU_CURSOR_POS_HIDE);
+}
+
+static void
+vhost_user_gpu_send_msg(VhostUserGPU *g, const VhostUserGpuMsg *msg)
+{
+    qemu_chr_fe_write(&g->vhost_chr, (uint8_t *)msg,
+                      VHOST_USER_GPU_HDR_SIZE + msg->size);
+}
+
+static void
+vhost_user_gpu_unblock(VhostUserGPU *g)
+{
+    VhostUserGpuMsg msg = {
+        .request = VHOST_USER_GPU_DMABUF_UPDATE,
+        .flags = VHOST_USER_GPU_MSG_FLAG_REPLY,
+    };
+
+    vhost_user_gpu_send_msg(g, &msg);
+}
+
+static void
+vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg)
+{
+    QemuConsole *con = NULL;
+    struct virtio_gpu_scanout *s;
+
+    switch (msg->request) {
+    case VHOST_USER_GPU_GET_PROTOCOL_FEATURES: {
+        VhostUserGpuMsg reply = {
+            .request = msg->request,
+            .flags = VHOST_USER_GPU_MSG_FLAG_REPLY,
+            .size = sizeof(uint64_t),
+        };
+
+        vhost_user_gpu_send_msg(g, &reply);
+        break;
+    }
+    case VHOST_USER_GPU_SET_PROTOCOL_FEATURES: {
+        break;
+    }
+    case VHOST_USER_GPU_GET_DISPLAY_INFO: {
+        struct virtio_gpu_resp_display_info display_info = { {} };
+        VhostUserGpuMsg reply = {
+            .request = msg->request,
+            .flags = VHOST_USER_GPU_MSG_FLAG_REPLY,
+            .size = sizeof(struct virtio_gpu_resp_display_info),
+        };
+
+        display_info.hdr.type = VIRTIO_GPU_RESP_OK_DISPLAY_INFO;
+        virtio_gpu_base_fill_display_info(VIRTIO_GPU_BASE(g), &display_info);
+        memcpy(&reply.payload.display_info, &display_info,
+               sizeof(display_info));
+        vhost_user_gpu_send_msg(g, &reply);
+        break;
+    }
+    case VHOST_USER_GPU_SCANOUT: {
+        VhostUserGpuScanout *m = &msg->payload.scanout;
+
+        if (m->scanout_id >= g->parent_obj.conf.max_outputs) {
+            return;
+        }
+
+        g->parent_obj.enable = 1;
+        s = &g->parent_obj.scanout[m->scanout_id];
+        con = s->con;
+
+        if (m->scanout_id == 0 && m->width == 0) {
+            s->ds = qemu_create_message_surface(640, 480,
+                                                "Guest disabled display.");
+            dpy_gfx_replace_surface(con, s->ds);
+        } else {
+            s->ds = qemu_create_displaysurface(m->width, m->height);
+            /* replace surface on next update */
+        }
+
+        break;
+    }
+    case VHOST_USER_GPU_DMABUF_SCANOUT: {
+        VhostUserGpuDMABUFScanout *m = &msg->payload.dmabuf_scanout;
+        int fd = qemu_chr_fe_get_msgfd(&g->vhost_chr);
+        QemuDmaBuf *dmabuf;
+
+        if (m->scanout_id >= g->parent_obj.conf.max_outputs) {
+            error_report("invalid scanout: %d", m->scanout_id);
+            if (fd >= 0) {
+                close(fd);
+            }
+            break;
+        }
+
+        g->parent_obj.enable = 1;
+        con = g->parent_obj.scanout[m->scanout_id].con;
+        dmabuf = &g->dmabuf[m->scanout_id];
+        if (dmabuf->fd >= 0) {
+            close(dmabuf->fd);
+            dmabuf->fd = -1;
+        }
+        if (!console_has_gl_dmabuf(con)) {
+            /* it would be nice to report that error earlier */
+            error_report("console doesn't support dmabuf!");
+            break;
+        }
+        dpy_gl_release_dmabuf(con, dmabuf);
+        if (fd == -1) {
+            dpy_gl_scanout_disable(con);
+            break;
+        }
+        *dmabuf = (QemuDmaBuf) {
+            .fd = fd,
+            .width = m->fd_width,
+            .height = m->fd_height,
+            .stride = m->fd_stride,
+            .fourcc = m->fd_drm_fourcc,
+            .y0_top = m->fd_flags & VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP,
+        };
+        dpy_gl_scanout_dmabuf(con, dmabuf);
+        break;
+    }
+    case VHOST_USER_GPU_DMABUF_UPDATE: {
+        VhostUserGpuUpdate *m = &msg->payload.update;
+
+        if (m->scanout_id >= g->parent_obj.conf.max_outputs ||
+            !g->parent_obj.scanout[m->scanout_id].con) {
+            error_report("invalid scanout update: %d", m->scanout_id);
+            vhost_user_gpu_unblock(g);
+            break;
+        }
+
+        con = g->parent_obj.scanout[m->scanout_id].con;
+        if (!console_has_gl(con)) {
+            error_report("console doesn't support GL!");
+            vhost_user_gpu_unblock(g);
+            break;
+        }
+        dpy_gl_update(con, m->x, m->y, m->width, m->height);
+        g->backend_blocked = true;
+        break;
+    }
+    case VHOST_USER_GPU_UPDATE: {
+        VhostUserGpuUpdate *m = &msg->payload.update;
+
+        if (m->scanout_id >= g->parent_obj.conf.max_outputs) {
+            break;
+        }
+        s = &g->parent_obj.scanout[m->scanout_id];
+        con = s->con;
+        pixman_image_t *image =
+            pixman_image_create_bits(PIXMAN_x8r8g8b8,
+                                     m->width,
+                                     m->height,
+                                     (uint32_t *)m->data,
+                                     m->width * 4);
+
+        pixman_image_composite(PIXMAN_OP_SRC,
+                               image, NULL, s->ds->image,
+                               0, 0, 0, 0, m->x, m->y, m->width, m->height);
+
+        pixman_image_unref(image);
+        if (qemu_console_surface(con) != s->ds) {
+            dpy_gfx_replace_surface(con, s->ds);
+        } else {
+            dpy_gfx_update(con, m->x, m->y, m->width, m->height);
+        }
+        break;
+    }
+    default:
+        g_warning("unhandled message %d %d", msg->request, msg->size);
+    }
+
+    if (con && qemu_console_is_gl_blocked(con)) {
+        vhost_user_gpu_update_blocked(g, true);
+    }
+}
+
+static void
+vhost_user_gpu_chr_read(void *opaque)
+{
+    VhostUserGPU *g = opaque;
+    VhostUserGpuMsg *msg = NULL;
+    VhostUserGpuRequest request;
+    uint32_t size, flags;
+    int r;
+
+    r = qemu_chr_fe_read_all(&g->vhost_chr,
+                             (uint8_t *)&request, sizeof(uint32_t));
+    if (r != sizeof(uint32_t)) {
+        error_report("failed to read msg header: %d, %d", r, errno);
+        goto end;
+    }
+
+    r = qemu_chr_fe_read_all(&g->vhost_chr,
+                             (uint8_t *)&flags, sizeof(uint32_t));
+    if (r != sizeof(uint32_t)) {
+        error_report("failed to read msg flags");
+        goto end;
+    }
+
+    r = qemu_chr_fe_read_all(&g->vhost_chr,
+                             (uint8_t *)&size, sizeof(uint32_t));
+    if (r != sizeof(uint32_t)) {
+        error_report("failed to read msg size");
+        goto end;
+    }
+
+    msg = g_malloc(VHOST_USER_GPU_HDR_SIZE + size);
+    g_return_if_fail(msg != NULL);
+
+    r = qemu_chr_fe_read_all(&g->vhost_chr,
+                             (uint8_t *)&msg->payload, size);
+    if (r != size) {
+        error_report("failed to read msg payload %d != %d", r, size);
+        goto end;
+    }
+
+    msg->request = request;
+    msg->flags = size;
+    msg->size = size;
+
+    if (request == VHOST_USER_GPU_CURSOR_UPDATE ||
+        request == VHOST_USER_GPU_CURSOR_POS ||
+        request == VHOST_USER_GPU_CURSOR_POS_HIDE) {
+        vhost_user_gpu_handle_cursor(g, msg);
+    } else {
+        vhost_user_gpu_handle_display(g, msg);
+    }
+
+end:
+    g_free(msg);
+}
+
+static void
+vhost_user_gpu_update_blocked(VhostUserGPU *g, bool blocked)
+{
+    qemu_set_fd_handler(g->vhost_gpu_fd,
+                        blocked ? NULL : vhost_user_gpu_chr_read, NULL, g);
+}
+
+static void
+vhost_user_gpu_gl_unblock(VirtIOGPUBase *b)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(b);
+
+    if (g->backend_blocked) {
+        vhost_user_gpu_unblock(VHOST_USER_GPU(g));
+        g->backend_blocked = false;
+    }
+
+    vhost_user_gpu_update_blocked(VHOST_USER_GPU(g), false);
+}
+
+static bool
+vhost_user_gpu_do_set_socket(VhostUserGPU *g, Error **errp)
+{
+    Chardev *chr;
+    int sv[2];
+
+    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+        error_setg_errno(errp, errno, "socketpair() failed");
+        return false;
+    }
+
+    chr = CHARDEV(object_new(TYPE_CHARDEV_SOCKET));
+    if (!chr || qemu_chr_add_client(chr, sv[0]) == -1) {
+        error_setg(errp, "Failed to make socket chardev");
+        goto err;
+    }
+    if (!qemu_chr_fe_init(&g->vhost_chr, chr, errp)) {
+        goto err;
+    }
+    if (vhost_user_gpu_set_socket(&g->vhost->dev, sv[1]) < 0) {
+        error_setg(errp, "Failed to set vhost-user-gpu socket");
+        qemu_chr_fe_deinit(&g->vhost_chr, false);
+        goto err;
+    }
+
+    g->vhost_gpu_fd = sv[0];
+    vhost_user_gpu_update_blocked(g, false);
+    close(sv[1]);
+    return true;
+
+err:
+    close(sv[0]);
+    close(sv[1]);
+    if (chr) {
+        object_unref(OBJECT(chr));
+    }
+    return false;
+}
+
+static void
+vhost_user_gpu_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+    VirtIOGPUBase *b = VIRTIO_GPU_BASE(vdev);
+    struct virtio_gpu_config *vgconfig =
+        (struct virtio_gpu_config *)config_data;
+    int ret;
+
+    memset(config_data, 0, sizeof(struct virtio_gpu_config));
+
+    ret = vhost_dev_get_config(&g->vhost->dev,
+                               config_data, sizeof(struct virtio_gpu_config));
+    if (ret) {
+        error_report("vhost-user-gpu: get device config space failed");
+        return;
+    }
+
+    /* those fields are managed by qemu */
+    vgconfig->num_scanouts = b->virtio_config.num_scanouts;
+    vgconfig->events_read = b->virtio_config.events_read;
+    vgconfig->events_clear = b->virtio_config.events_clear;
+}
+
+static void
+vhost_user_gpu_set_config(VirtIODevice *vdev,
+                          const uint8_t *config_data)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+    VirtIOGPUBase *b = VIRTIO_GPU_BASE(vdev);
+    const struct virtio_gpu_config *vgconfig =
+        (const struct virtio_gpu_config *)config_data;
+    int ret;
+
+    if (vgconfig->events_clear) {
+        b->virtio_config.events_read &= ~vgconfig->events_clear;
+    }
+
+    ret = vhost_dev_set_config(&g->vhost->dev, config_data,
+                               0, sizeof(struct virtio_gpu_config),
+                               VHOST_SET_CONFIG_TYPE_MASTER);
+    if (ret) {
+        error_report("vhost-user-gpu: set device config space failed");
+        return;
+    }
+}
+
+static void
+vhost_user_gpu_set_status(VirtIODevice *vdev, uint8_t val)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+    Error *err = NULL;
+
+    if (val & VIRTIO_CONFIG_S_DRIVER_OK && vdev->vm_running) {
+        if (!vhost_user_gpu_do_set_socket(g, &err)) {
+            error_report_err(err);
+            return;
+        }
+        vhost_user_backend_start(g->vhost);
+    } else {
+        /* unblock any wait and stop processing */
+        if (g->vhost_gpu_fd != -1) {
+            vhost_user_gpu_update_blocked(g, true);
+            qemu_chr_fe_deinit(&g->vhost_chr, true);
+            g->vhost_gpu_fd = -1;
+        }
+        vhost_user_backend_stop(g->vhost);
+    }
+}
+
+static bool
+vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+
+    return vhost_virtqueue_pending(&g->vhost->dev, idx);
+}
+
+static void
+vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+
+    vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask);
+}
+
+static void
+vhost_user_gpu_instance_init(Object *obj)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(obj);
+
+    g->vhost = VHOST_USER_BACKEND(object_new(TYPE_VHOST_USER_BACKEND));
+    object_property_add_alias(obj, "chardev",
+                              OBJECT(g->vhost), "chardev", &error_abort);
+}
+
+static void
+vhost_user_gpu_instance_finalize(Object *obj)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(obj);
+
+    object_unref(OBJECT(g->vhost));
+}
+
+static void
+vhost_user_gpu_reset(VirtIODevice *vdev)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+
+    virtio_gpu_base_reset(VIRTIO_GPU_BASE(vdev));
+
+    vhost_user_backend_stop(g->vhost);
+}
+
+static int
+vhost_user_gpu_config_change(struct vhost_dev *dev)
+{
+    error_report("vhost-user-gpu: unhandled backend config change");
+    return -1;
+}
+
+static const VhostDevConfigOps config_ops = {
+    .vhost_dev_config_notifier = vhost_user_gpu_config_change,
+};
+
+static void
+vhost_user_gpu_device_realize(DeviceState *qdev, Error **errp)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(qdev);
+    VirtIODevice *vdev = VIRTIO_DEVICE(g);
+
+    vhost_dev_set_config_notifier(&g->vhost->dev, &config_ops);
+    if (vhost_user_backend_dev_init(g->vhost, vdev, 2, errp) < 0) {
+        return;
+    }
+
+    if (virtio_has_feature(g->vhost->dev.features, VIRTIO_GPU_F_VIRGL)) {
+        g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED;
+    }
+
+    if (!virtio_gpu_base_device_realize(qdev, NULL, NULL, errp)) {
+        return;
+    }
+
+    g->vhost_gpu_fd = -1;
+}
+
+static Property vhost_user_gpu_properties[] = {
+    VIRTIO_GPU_BASE_PROPERTIES(VhostUserGPU, parent_obj.conf),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void
+vhost_user_gpu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    VirtIOGPUBaseClass *vgc = VIRTIO_GPU_BASE_CLASS(klass);
+
+    vgc->gl_unblock = vhost_user_gpu_gl_unblock;
+
+    vdc->realize = vhost_user_gpu_device_realize;
+    vdc->reset = vhost_user_gpu_reset;
+    vdc->set_status   = vhost_user_gpu_set_status;
+    vdc->guest_notifier_mask = vhost_user_gpu_guest_notifier_mask;
+    vdc->guest_notifier_pending = vhost_user_gpu_guest_notifier_pending;
+    vdc->get_config = vhost_user_gpu_get_config;
+    vdc->set_config = vhost_user_gpu_set_config;
+
+    dc->props = vhost_user_gpu_properties;
+}
+
+static const TypeInfo vhost_user_gpu_info = {
+    .name = TYPE_VHOST_USER_GPU,
+    .parent = TYPE_VIRTIO_GPU_BASE,
+    .instance_size = sizeof(VhostUserGPU),
+    .instance_init = vhost_user_gpu_instance_init,
+    .instance_finalize = vhost_user_gpu_instance_finalize,
+    .class_init = vhost_user_gpu_class_init,
+};
+
+static void vhost_user_gpu_register_types(void)
+{
+    type_register_static(&vhost_user_gpu_info);
+}
+
+type_init(vhost_user_gpu_register_types)
diff --git a/hw/display/vhost-user-vga.c b/hw/display/vhost-user-vga.c
new file mode 100644
index 0000000000..a7195276d9
--- /dev/null
+++ b/hw/display/vhost-user-vga.c
@@ -0,0 +1,52 @@
+/*
+ * vhost-user VGA device
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "virtio-vga.h"
+
+#define TYPE_VHOST_USER_VGA "vhost-user-vga"
+
+#define VHOST_USER_VGA(obj)                                \
+    OBJECT_CHECK(VhostUserVGA, (obj), TYPE_VHOST_USER_VGA)
+
+typedef struct VhostUserVGA {
+    VirtIOVGABase parent_obj;
+
+    VhostUserGPU vdev;
+} VhostUserVGA;
+
+static void vhost_user_vga_inst_initfn(Object *obj)
+{
+    VhostUserVGA *dev = VHOST_USER_VGA(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_GPU);
+
+    VIRTIO_VGA_BASE(dev)->vgpu = VIRTIO_GPU_BASE(&dev->vdev);
+
+    object_property_add_alias(obj, "chardev",
+                              OBJECT(&dev->vdev), "chardev",
+                              &error_abort);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_vga_info = {
+    .generic_name  = TYPE_VHOST_USER_VGA,
+    .parent        = TYPE_VIRTIO_VGA_BASE,
+    .instance_size = sizeof(struct VhostUserVGA),
+    .instance_init = vhost_user_vga_inst_initfn,
+};
+
+static void vhost_user_vga_register_types(void)
+{
+    virtio_pci_types_register(&vhost_user_vga_info);
+}
+
+type_init(vhost_user_vga_register_types)
diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c
index 2d302526ab..b918167aec 100644
--- a/hw/display/virtio-gpu-3d.c
+++ b/hw/display/virtio-gpu-3d.c
@@ -118,11 +118,11 @@ static void virgl_cmd_context_destroy(VirtIOGPU *g,
 static void virtio_gpu_rect_update(VirtIOGPU *g, int idx, int x, int y,
                                 int width, int height)
 {
-    if (!g->scanout[idx].con) {
+    if (!g->parent_obj.scanout[idx].con) {
         return;
     }
 
-    dpy_gl_update(g->scanout[idx].con, x, y, width, height);
+    dpy_gl_update(g->parent_obj.scanout[idx].con, x, y, width, height);
 }
 
 static void virgl_cmd_resource_flush(VirtIOGPU *g,
@@ -135,8 +135,8 @@ static void virgl_cmd_resource_flush(VirtIOGPU *g,
     trace_virtio_gpu_cmd_res_flush(rf.resource_id,
                                    rf.r.width, rf.r.height, rf.r.x, rf.r.y);
 
-    for (i = 0; i < g->conf.max_outputs; i++) {
-        if (g->scanout[i].resource_id != rf.resource_id) {
+    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
+        if (g->parent_obj.scanout[i].resource_id != rf.resource_id) {
             continue;
         }
         virtio_gpu_rect_update(g, i, rf.r.x, rf.r.y, rf.r.width, rf.r.height);
@@ -154,13 +154,13 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g,
     trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id,
                                      ss.r.width, ss.r.height, ss.r.x, ss.r.y);
 
-    if (ss.scanout_id >= g->conf.max_outputs) {
+    if (ss.scanout_id >= g->parent_obj.conf.max_outputs) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
                       __func__, ss.scanout_id);
         cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
         return;
     }
-    g->enable = 1;
+    g->parent_obj.enable = 1;
 
     memset(&info, 0, sizeof(info));
 
@@ -173,20 +173,22 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g,
             cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
             return;
         }
-        qemu_console_resize(g->scanout[ss.scanout_id].con,
+        qemu_console_resize(g->parent_obj.scanout[ss.scanout_id].con,
                             ss.r.width, ss.r.height);
         virgl_renderer_force_ctx_0();
-        dpy_gl_scanout_texture(g->scanout[ss.scanout_id].con, info.tex_id,
-                               info.flags & 1 /* FIXME: Y_0_TOP */,
-                               info.width, info.height,
-                               ss.r.x, ss.r.y, ss.r.width, ss.r.height);
+        dpy_gl_scanout_texture(
+            g->parent_obj.scanout[ss.scanout_id].con, info.tex_id,
+            info.flags & 1 /* FIXME: Y_0_TOP */,
+            info.width, info.height,
+            ss.r.x, ss.r.y, ss.r.width, ss.r.height);
     } else {
         if (ss.scanout_id != 0) {
-            dpy_gfx_replace_surface(g->scanout[ss.scanout_id].con, NULL);
+            dpy_gfx_replace_surface(
+                g->parent_obj.scanout[ss.scanout_id].con, NULL);
         }
-        dpy_gl_scanout_disable(g->scanout[ss.scanout_id].con);
+        dpy_gl_scanout_disable(g->parent_obj.scanout[ss.scanout_id].con);
     }
-    g->scanout[ss.scanout_id].resource_id = ss.resource_id;
+    g->parent_obj.scanout[ss.scanout_id].resource_id = ss.resource_id;
 }
 
 static void virgl_cmd_submit_3d(VirtIOGPU *g,
@@ -209,7 +211,7 @@ static void virgl_cmd_submit_3d(VirtIOGPU *g,
         goto out;
     }
 
-    if (virtio_gpu_stats_enabled(g->conf)) {
+    if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
         g->stats.req_3d++;
         g->stats.bytes_3d += cs.size;
     }
@@ -507,7 +509,7 @@ static void virgl_write_fence(void *opaque, uint32_t fence)
         QTAILQ_REMOVE(&g->fenceq, cmd, next);
         g_free(cmd);
         g->inflight--;
-        if (virtio_gpu_stats_enabled(g->conf)) {
+        if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
             fprintf(stderr, "inflight: %3d (-)\r", g->inflight);
         }
     }
@@ -524,7 +526,7 @@ virgl_create_context(void *opaque, int scanout_idx,
     qparams.major_ver = params->major_ver;
     qparams.minor_ver = params->minor_ver;
 
-    ctx = dpy_gl_ctx_create(g->scanout[scanout_idx].con, &qparams);
+    ctx = dpy_gl_ctx_create(g->parent_obj.scanout[scanout_idx].con, &qparams);
     return (virgl_renderer_gl_context)ctx;
 }
 
@@ -533,7 +535,7 @@ static void virgl_destroy_context(void *opaque, virgl_renderer_gl_context ctx)
     VirtIOGPU *g = opaque;
     QEMUGLContext qctx = (QEMUGLContext)ctx;
 
-    dpy_gl_ctx_destroy(g->scanout[0].con, qctx);
+    dpy_gl_ctx_destroy(g->parent_obj.scanout[0].con, qctx);
 }
 
 static int virgl_make_context_current(void *opaque, int scanout_idx,
@@ -542,7 +544,8 @@ static int virgl_make_context_current(void *opaque, int scanout_idx,
     VirtIOGPU *g = opaque;
     QEMUGLContext qctx = (QEMUGLContext)ctx;
 
-    return dpy_gl_ctx_make_current(g->scanout[scanout_idx].con, qctx);
+    return dpy_gl_ctx_make_current(g->parent_obj.scanout[scanout_idx].con,
+                                   qctx);
 }
 
 static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = {
@@ -594,11 +597,11 @@ void virtio_gpu_virgl_reset(VirtIOGPU *g)
     int i;
 
     /* virgl_renderer_reset() ??? */
-    for (i = 0; i < g->conf.max_outputs; i++) {
+    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
         if (i != 0) {
-            dpy_gfx_replace_surface(g->scanout[i].con, NULL);
+            dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL);
         }
-        dpy_gl_scanout_disable(g->scanout[i].con);
+        dpy_gl_scanout_disable(g->parent_obj.scanout[i].con);
     }
 }
 
@@ -614,7 +617,7 @@ int virtio_gpu_virgl_init(VirtIOGPU *g)
     g->fence_poll = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                  virtio_gpu_fence_poll, g);
 
-    if (virtio_gpu_stats_enabled(g->conf)) {
+    if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
         g->print_stats = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                       virtio_gpu_print_stats, g);
         timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
new file mode 100644
index 0000000000..55e07995fe
--- /dev/null
+++ b/hw/display/virtio-gpu-base.c
@@ -0,0 +1,268 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-gpu.h"
+#include "migration/blocker.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+void
+virtio_gpu_base_reset(VirtIOGPUBase *g)
+{
+    int i;
+
+    g->enable = 0;
+    g->use_virgl_renderer = false;
+
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        g->scanout[i].resource_id = 0;
+        g->scanout[i].width = 0;
+        g->scanout[i].height = 0;
+        g->scanout[i].x = 0;
+        g->scanout[i].y = 0;
+        g->scanout[i].ds = NULL;
+    }
+}
+
+void
+virtio_gpu_base_fill_display_info(VirtIOGPUBase *g,
+                                  struct virtio_gpu_resp_display_info *dpy_info)
+{
+    int i;
+
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        if (g->enabled_output_bitmask & (1 << i)) {
+            dpy_info->pmodes[i].enabled = 1;
+            dpy_info->pmodes[i].r.width = cpu_to_le32(g->req_state[i].width);
+            dpy_info->pmodes[i].r.height = cpu_to_le32(g->req_state[i].height);
+        }
+    }
+}
+
+static void virtio_gpu_invalidate_display(void *opaque)
+{
+}
+
+static void virtio_gpu_update_display(void *opaque)
+{
+}
+
+static void virtio_gpu_text_update(void *opaque, console_ch_t *chardata)
+{
+}
+
+static void virtio_gpu_notify_event(VirtIOGPUBase *g, uint32_t event_type)
+{
+    g->virtio_config.events_read |= event_type;
+    virtio_notify_config(&g->parent_obj);
+}
+
+static int virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
+{
+    VirtIOGPUBase *g = opaque;
+
+    if (idx >= g->conf.max_outputs) {
+        return -1;
+    }
+
+    g->req_state[idx].x = info->xoff;
+    g->req_state[idx].y = info->yoff;
+    g->req_state[idx].width = info->width;
+    g->req_state[idx].height = info->height;
+
+    if (info->width && info->height) {
+        g->enabled_output_bitmask |= (1 << idx);
+    } else {
+        g->enabled_output_bitmask &= ~(1 << idx);
+    }
+
+    /* send event to guest */
+    virtio_gpu_notify_event(g, VIRTIO_GPU_EVENT_DISPLAY);
+    return 0;
+}
+
+static void
+virtio_gpu_gl_block(void *opaque, bool block)
+{
+    VirtIOGPUBase *g = opaque;
+    VirtIOGPUBaseClass *vgc = VIRTIO_GPU_BASE_GET_CLASS(g);
+
+    if (block) {
+        g->renderer_blocked++;
+    } else {
+        g->renderer_blocked--;
+    }
+    assert(g->renderer_blocked >= 0);
+
+    if (g->renderer_blocked == 0) {
+        vgc->gl_unblock(g);
+    }
+}
+
+const GraphicHwOps virtio_gpu_ops = {
+    .invalidate = virtio_gpu_invalidate_display,
+    .gfx_update = virtio_gpu_update_display,
+    .text_update = virtio_gpu_text_update,
+    .ui_info = virtio_gpu_ui_info,
+    .gl_block = virtio_gpu_gl_block,
+};
+
+bool
+virtio_gpu_base_device_realize(DeviceState *qdev,
+                               VirtIOHandleOutput ctrl_cb,
+                               VirtIOHandleOutput cursor_cb,
+                               Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev);
+    Error *local_err = NULL;
+    int i;
+
+    if (g->conf.max_outputs > VIRTIO_GPU_MAX_SCANOUTS) {
+        error_setg(errp, "invalid max_outputs > %d", VIRTIO_GPU_MAX_SCANOUTS);
+        return false;
+    }
+
+    g->use_virgl_renderer = false;
+    if (virtio_gpu_virgl_enabled(g->conf)) {
+        error_setg(&g->migration_blocker, "virgl is not yet migratable");
+        migrate_add_blocker(g->migration_blocker, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            error_free(g->migration_blocker);
+            return false;
+        }
+    }
+
+    g->virtio_config.num_scanouts = cpu_to_le32(g->conf.max_outputs);
+    virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU,
+                sizeof(struct virtio_gpu_config));
+
+    if (virtio_gpu_virgl_enabled(g->conf)) {
+        /* use larger control queue in 3d mode */
+        virtio_add_queue(vdev, 256, ctrl_cb);
+        virtio_add_queue(vdev, 16, cursor_cb);
+    } else {
+        virtio_add_queue(vdev, 64, ctrl_cb);
+        virtio_add_queue(vdev, 16, cursor_cb);
+    }
+
+    g->enabled_output_bitmask = 1;
+
+    g->req_state[0].width = g->conf.xres;
+    g->req_state[0].height = g->conf.yres;
+
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        g->scanout[i].con =
+            graphic_console_init(DEVICE(g), i, &virtio_gpu_ops, g);
+        if (i > 0) {
+            dpy_gfx_replace_surface(g->scanout[i].con, NULL);
+        }
+    }
+
+    return true;
+}
+
+static uint64_t
+virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t features,
+                             Error **errp)
+{
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
+
+    if (virtio_gpu_virgl_enabled(g->conf)) {
+        features |= (1 << VIRTIO_GPU_F_VIRGL);
+    }
+    if (virtio_gpu_edid_enabled(g->conf)) {
+        features |= (1 << VIRTIO_GPU_F_EDID);
+    }
+
+    return features;
+}
+
+static void
+virtio_gpu_base_set_features(VirtIODevice *vdev, uint64_t features)
+{
+    static const uint32_t virgl = (1 << VIRTIO_GPU_F_VIRGL);
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
+
+    g->use_virgl_renderer = ((features & virgl) == virgl);
+    trace_virtio_gpu_features(g->use_virgl_renderer);
+}
+
+static void
+virtio_gpu_base_device_unrealize(DeviceState *qdev, Error **errp)
+{
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev);
+
+    if (g->migration_blocker) {
+        migrate_del_blocker(g->migration_blocker);
+        error_free(g->migration_blocker);
+    }
+}
+
+static void
+virtio_gpu_base_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    vdc->unrealize = virtio_gpu_base_device_unrealize;
+    vdc->get_features = virtio_gpu_base_get_features;
+    vdc->set_features = virtio_gpu_base_set_features;
+
+    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
+    dc->hotpluggable = false;
+}
+
+static const TypeInfo virtio_gpu_base_info = {
+    .name = TYPE_VIRTIO_GPU_BASE,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VirtIOGPUBase),
+    .class_size = sizeof(VirtIOGPUBaseClass),
+    .class_init = virtio_gpu_base_class_init,
+    .abstract = true
+};
+
+static void
+virtio_register_types(void)
+{
+    type_register_static(&virtio_gpu_base_info);
+}
+
+type_init(virtio_register_types)
+
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctrl_hdr)                != 24);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_update_cursor)           != 56);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_unref)          != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_create_2d)      != 40);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_set_scanout)             != 48);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_flush)          != 48);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_transfer_to_host_2d)     != 56);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_mem_entry)               != 16);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_attach_backing) != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_detach_backing) != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_display_info)       != 408);
+
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_transfer_host_3d)        != 72);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_create_3d)      != 72);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_create)              != 96);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_destroy)             != 24);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_resource)            != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_cmd_submit)              != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_get_capset_info)         != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_capset_info)        != 40);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_get_capset)              != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_capset)             != 24);
diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index 0bc4d9d424..206870cd4c 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -16,33 +16,18 @@
 #include "hw/pci/pci.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-bus.h"
-#include "hw/virtio/virtio-pci.h"
-#include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-gpu-pci.h"
 
-typedef struct VirtIOGPUPCI VirtIOGPUPCI;
-
-/*
- * virtio-gpu-pci: This extends VirtioPCIProxy.
- */
-#define TYPE_VIRTIO_GPU_PCI "virtio-gpu-pci"
-#define VIRTIO_GPU_PCI(obj) \
-        OBJECT_CHECK(VirtIOGPUPCI, (obj), TYPE_VIRTIO_GPU_PCI)
-
-struct VirtIOGPUPCI {
-    VirtIOPCIProxy parent_obj;
-    VirtIOGPU vdev;
-};
-
-static Property virtio_gpu_pci_properties[] = {
+static Property virtio_gpu_pci_base_properties[] = {
     DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+static void virtio_gpu_pci_base_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
 {
-    VirtIOGPUPCI *vgpu = VIRTIO_GPU_PCI(vpci_dev);
-    VirtIOGPU *g = &vgpu->vdev;
-    DeviceState *vdev = DEVICE(&vgpu->vdev);
+    VirtIOGPUPCIBase *vgpu = VIRTIO_GPU_PCI_BASE(vpci_dev);
+    VirtIOGPUBase *g = vgpu->vgpu;
+    DeviceState *vdev = DEVICE(g);
     int i;
     Error *local_error = NULL;
 
@@ -64,36 +49,56 @@ static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
     }
 }
 
-static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data)
+static void virtio_gpu_pci_base_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
     PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
 
     set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
-    dc->props = virtio_gpu_pci_properties;
+    dc->props = virtio_gpu_pci_base_properties;
     dc->hotpluggable = false;
-    k->realize = virtio_gpu_pci_realize;
+    k->realize = virtio_gpu_pci_base_realize;
     pcidev_k->class_id = PCI_CLASS_DISPLAY_OTHER;
 }
 
+static const TypeInfo virtio_gpu_pci_base_info = {
+    .name = TYPE_VIRTIO_GPU_PCI_BASE,
+    .parent = TYPE_VIRTIO_PCI,
+    .instance_size = sizeof(VirtIOGPUPCIBase),
+    .class_init = virtio_gpu_pci_base_class_init,
+    .abstract = true
+};
+
+#define TYPE_VIRTIO_GPU_PCI "virtio-gpu-pci"
+#define VIRTIO_GPU_PCI(obj)                                 \
+    OBJECT_CHECK(VirtIOGPUPCI, (obj), TYPE_VIRTIO_GPU_PCI)
+
+typedef struct VirtIOGPUPCI {
+    VirtIOGPUPCIBase parent_obj;
+    VirtIOGPU vdev;
+} VirtIOGPUPCI;
+
 static void virtio_gpu_initfn(Object *obj)
 {
     VirtIOGPUPCI *dev = VIRTIO_GPU_PCI(obj);
 
     virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
                                 TYPE_VIRTIO_GPU);
+    VIRTIO_GPU_PCI_BASE(obj)->vgpu = VIRTIO_GPU_BASE(&dev->vdev);
 }
 
 static const VirtioPCIDeviceTypeInfo virtio_gpu_pci_info = {
     .generic_name = TYPE_VIRTIO_GPU_PCI,
+    .parent = TYPE_VIRTIO_GPU_PCI_BASE,
     .instance_size = sizeof(VirtIOGPUPCI),
     .instance_init = virtio_gpu_initfn,
-    .class_init = virtio_gpu_pci_class_init,
 };
 
 static void virtio_gpu_pci_register_types(void)
 {
+    type_register_static(&virtio_gpu_pci_base_info);
     virtio_pci_types_register(&virtio_gpu_pci_info);
 }
+
 type_init(virtio_gpu_pci_register_types)
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 372b31ef0a..4a49da5ecb 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -20,11 +20,13 @@
 #include "sysemu/dma.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-gpu-bswap.h"
+#include "hw/virtio/virtio-gpu-pixman.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/display/edid.h"
-#include "migration/blocker.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
+#include "qemu/error-report.h"
 
 #define VIRTIO_GPU_VM_VERSION 1
 
@@ -34,53 +36,11 @@ virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id);
 static void virtio_gpu_cleanup_mapping(VirtIOGPU *g,
                                        struct virtio_gpu_simple_resource *res);
 
-static void
-virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr)
-{
-    le32_to_cpus(&hdr->type);
-    le32_to_cpus(&hdr->flags);
-    le64_to_cpus(&hdr->fence_id);
-    le32_to_cpus(&hdr->ctx_id);
-    le32_to_cpus(&hdr->padding);
-}
-
-static void virtio_gpu_bswap_32(void *ptr,
-                                size_t size)
-{
-#ifdef HOST_WORDS_BIGENDIAN
-
-    size_t i;
-    struct virtio_gpu_ctrl_hdr *hdr = (struct virtio_gpu_ctrl_hdr *) ptr;
-
-    virtio_gpu_ctrl_hdr_bswap(hdr);
-
-    i = sizeof(struct virtio_gpu_ctrl_hdr);
-    while (i < size) {
-        le32_to_cpus((uint32_t *)(ptr + i));
-        i = i + sizeof(uint32_t);
-    }
-
-#endif
-}
-
-static void
-virtio_gpu_t2d_bswap(struct virtio_gpu_transfer_to_host_2d *t2d)
-{
-    virtio_gpu_ctrl_hdr_bswap(&t2d->hdr);
-    le32_to_cpus(&t2d->r.x);
-    le32_to_cpus(&t2d->r.y);
-    le32_to_cpus(&t2d->r.width);
-    le32_to_cpus(&t2d->r.height);
-    le64_to_cpus(&t2d->offset);
-    le32_to_cpus(&t2d->resource_id);
-    le32_to_cpus(&t2d->padding);
-}
-
 #ifdef CONFIG_VIRGL
 #include <virglrenderer.h>
 #define VIRGL(_g, _virgl, _simple, ...)                     \
     do {                                                    \
-        if (_g->use_virgl_renderer) {                       \
+        if (_g->parent_obj.use_virgl_renderer) {            \
             _virgl(__VA_ARGS__);                            \
         } else {                                            \
             _simple(__VA_ARGS__);                           \
@@ -148,10 +108,10 @@ static void update_cursor(VirtIOGPU *g, struct virtio_gpu_update_cursor *cursor)
     struct virtio_gpu_scanout *s;
     bool move = cursor->hdr.type == VIRTIO_GPU_CMD_MOVE_CURSOR;
 
-    if (cursor->pos.scanout_id >= g->conf.max_outputs) {
+    if (cursor->pos.scanout_id >= g->parent_obj.conf.max_outputs) {
         return;
     }
-    s = &g->scanout[cursor->pos.scanout_id];
+    s = &g->parent_obj.scanout[cursor->pos.scanout_id];
 
     trace_virtio_gpu_update_cursor(cursor->pos.scanout_id,
                                    cursor->pos.x,
@@ -182,53 +142,6 @@ static void update_cursor(VirtIOGPU *g, struct virtio_gpu_update_cursor *cursor)
                   cursor->resource_id ? 1 : 0);
 }
 
-static void virtio_gpu_get_config(VirtIODevice *vdev, uint8_t *config)
-{
-    VirtIOGPU *g = VIRTIO_GPU(vdev);
-    memcpy(config, &g->virtio_config, sizeof(g->virtio_config));
-}
-
-static void virtio_gpu_set_config(VirtIODevice *vdev, const uint8_t *config)
-{
-    VirtIOGPU *g = VIRTIO_GPU(vdev);
-    struct virtio_gpu_config vgconfig;
-
-    memcpy(&vgconfig, config, sizeof(g->virtio_config));
-
-    if (vgconfig.events_clear) {
-        g->virtio_config.events_read &= ~vgconfig.events_clear;
-    }
-}
-
-static uint64_t virtio_gpu_get_features(VirtIODevice *vdev, uint64_t features,
-                                        Error **errp)
-{
-    VirtIOGPU *g = VIRTIO_GPU(vdev);
-
-    if (virtio_gpu_virgl_enabled(g->conf)) {
-        features |= (1 << VIRTIO_GPU_F_VIRGL);
-    }
-    if (virtio_gpu_edid_enabled(g->conf)) {
-        features |= (1 << VIRTIO_GPU_F_EDID);
-    }
-    return features;
-}
-
-static void virtio_gpu_set_features(VirtIODevice *vdev, uint64_t features)
-{
-    static const uint32_t virgl = (1 << VIRTIO_GPU_F_VIRGL);
-    VirtIOGPU *g = VIRTIO_GPU(vdev);
-
-    g->use_virgl_renderer = ((features & virgl) == virgl);
-    trace_virtio_gpu_features(g->use_virgl_renderer);
-}
-
-static void virtio_gpu_notify_event(VirtIOGPU *g, uint32_t event_type)
-{
-    g->virtio_config.events_read |= event_type;
-    virtio_notify_config(&g->parent_obj);
-}
-
 static struct virtio_gpu_simple_resource *
 virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id)
 {
@@ -277,21 +190,6 @@ void virtio_gpu_ctrl_response_nodata(VirtIOGPU *g,
     virtio_gpu_ctrl_response(g, cmd, &resp, sizeof(resp));
 }
 
-static void
-virtio_gpu_fill_display_info(VirtIOGPU *g,
-                             struct virtio_gpu_resp_display_info *dpy_info)
-{
-    int i;
-
-    for (i = 0; i < g->conf.max_outputs; i++) {
-        if (g->enabled_output_bitmask & (1 << i)) {
-            dpy_info->pmodes[i].enabled = 1;
-            dpy_info->pmodes[i].r.width = cpu_to_le32(g->req_state[i].width);
-            dpy_info->pmodes[i].r.height = cpu_to_le32(g->req_state[i].height);
-        }
-    }
-}
-
 void virtio_gpu_get_display_info(VirtIOGPU *g,
                                  struct virtio_gpu_ctrl_command *cmd)
 {
@@ -300,7 +198,7 @@ void virtio_gpu_get_display_info(VirtIOGPU *g,
     trace_virtio_gpu_cmd_get_display_info();
     memset(&display_info, 0, sizeof(display_info));
     display_info.hdr.type = VIRTIO_GPU_RESP_OK_DISPLAY_INFO;
-    virtio_gpu_fill_display_info(g, &display_info);
+    virtio_gpu_base_fill_display_info(VIRTIO_GPU_BASE(g), &display_info);
     virtio_gpu_ctrl_response(g, cmd, &display_info.hdr,
                              sizeof(display_info));
 }
@@ -309,9 +207,10 @@ static void
 virtio_gpu_generate_edid(VirtIOGPU *g, int scanout,
                          struct virtio_gpu_resp_edid *edid)
 {
+    VirtIOGPUBase *b = VIRTIO_GPU_BASE(g);
     qemu_edid_info info = {
-        .prefx = g->req_state[scanout].width,
-        .prefy = g->req_state[scanout].height,
+        .prefx = b->req_state[scanout].width,
+        .prefy = b->req_state[scanout].height,
     };
 
     edid->size = cpu_to_le32(sizeof(edid->edid));
@@ -323,11 +222,12 @@ void virtio_gpu_get_edid(VirtIOGPU *g,
 {
     struct virtio_gpu_resp_edid edid;
     struct virtio_gpu_cmd_get_edid get_edid;
+    VirtIOGPUBase *b = VIRTIO_GPU_BASE(g);
 
     VIRTIO_GPU_FILL_CMD(get_edid);
     virtio_gpu_bswap_32(&get_edid, sizeof(get_edid));
 
-    if (get_edid.scanout >= g->conf.max_outputs) {
+    if (get_edid.scanout >= b->conf.max_outputs) {
         cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
         return;
     }
@@ -339,30 +239,6 @@ void virtio_gpu_get_edid(VirtIOGPU *g,
     virtio_gpu_ctrl_response(g, cmd, &edid.hdr, sizeof(edid));
 }
 
-static pixman_format_code_t get_pixman_format(uint32_t virtio_gpu_format)
-{
-    switch (virtio_gpu_format) {
-    case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
-        return PIXMAN_BE_b8g8r8x8;
-    case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
-        return PIXMAN_BE_b8g8r8a8;
-    case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
-        return PIXMAN_BE_x8r8g8b8;
-    case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
-        return PIXMAN_BE_a8r8g8b8;
-    case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
-        return PIXMAN_BE_r8g8b8x8;
-    case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
-        return PIXMAN_BE_r8g8b8a8;
-    case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
-        return PIXMAN_BE_x8b8g8r8;
-    case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
-        return PIXMAN_BE_a8b8g8r8;
-    default:
-        return 0;
-    }
-}
-
 static uint32_t calc_image_hostmem(pixman_format_code_t pformat,
                                    uint32_t width, uint32_t height)
 {
@@ -409,7 +285,7 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
     res->format = c2d.format;
     res->resource_id = c2d.resource_id;
 
-    pformat = get_pixman_format(c2d.format);
+    pformat = virtio_gpu_get_pixman_format(c2d.format);
     if (!pformat) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: host couldn't handle guest format %d\n",
@@ -420,7 +296,7 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
     }
 
     res->hostmem = calc_image_hostmem(pformat, c2d.width, c2d.height);
-    if (res->hostmem + g->hostmem < g->conf.max_hostmem) {
+    if (res->hostmem + g->hostmem < g->conf_max_hostmem) {
         res->image = pixman_image_create_bits(pformat,
                                               c2d.width,
                                               c2d.height,
@@ -442,7 +318,7 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
 
 static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
 {
-    struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id];
+    struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
     struct virtio_gpu_simple_resource *res;
     DisplaySurface *ds = NULL;
 
@@ -474,7 +350,7 @@ static void virtio_gpu_resource_destroy(VirtIOGPU *g,
     int i;
 
     if (res->scanout_bitmask) {
-        for (i = 0; i < g->conf.max_outputs; i++) {
+        for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
             if (res->scanout_bitmask & (1 << i)) {
                 virtio_gpu_disable_scanout(g, i);
             }
@@ -604,7 +480,7 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
 
     pixman_region_init_rect(&flush_region,
                             rf.r.x, rf.r.y, rf.r.width, rf.r.height);
-    for (i = 0; i < g->conf.max_outputs; i++) {
+    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
         struct virtio_gpu_scanout *scanout;
         pixman_region16_t region, finalregion;
         pixman_box16_t *extents;
@@ -612,7 +488,7 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
         if (!(res->scanout_bitmask & (1 << i))) {
             continue;
         }
-        scanout = &g->scanout[i];
+        scanout = &g->parent_obj.scanout[i];
 
         pixman_region_init(&finalregion);
         pixman_region_init_rect(&region, scanout->x, scanout->y,
@@ -622,7 +498,7 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
         pixman_region_translate(&finalregion, -scanout->x, -scanout->y);
         extents = pixman_region_extents(&finalregion);
         /* work out the area we need to update for each console */
-        dpy_gfx_update(g->scanout[i].con,
+        dpy_gfx_update(g->parent_obj.scanout[i].con,
                        extents->x1, extents->y1,
                        extents->x2 - extents->x1,
                        extents->y2 - extents->y1);
@@ -653,14 +529,14 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
     trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id,
                                      ss.r.width, ss.r.height, ss.r.x, ss.r.y);
 
-    if (ss.scanout_id >= g->conf.max_outputs) {
+    if (ss.scanout_id >= g->parent_obj.conf.max_outputs) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
                       __func__, ss.scanout_id);
         cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
         return;
     }
 
-    g->enable = 1;
+    g->parent_obj.enable = 1;
     if (ss.resource_id == 0) {
         virtio_gpu_disable_scanout(g, ss.scanout_id);
         return;
@@ -691,7 +567,7 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
         return;
     }
 
-    scanout = &g->scanout[ss.scanout_id];
+    scanout = &g->parent_obj.scanout[ss.scanout_id];
 
     format = pixman_image_get_format(res->image);
     bpp = DIV_ROUND_UP(PIXMAN_FORMAT_BPP(format), 8);
@@ -714,7 +590,8 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
             return;
         }
         pixman_image_unref(rect);
-        dpy_gfx_replace_surface(g->scanout[ss.scanout_id].con, scanout->ds);
+        dpy_gfx_replace_surface(g->parent_obj.scanout[ss.scanout_id].con,
+                                scanout->ds);
     }
 
     ores = virtio_gpu_find_resource(g, scanout->resource_id);
@@ -932,7 +809,7 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
     while (!QTAILQ_EMPTY(&g->cmdq)) {
         cmd = QTAILQ_FIRST(&g->cmdq);
 
-        if (g->renderer_blocked) {
+        if (g->parent_obj.renderer_blocked) {
             break;
         }
 
@@ -941,14 +818,14 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
               g, cmd);
 
         QTAILQ_REMOVE(&g->cmdq, cmd, next);
-        if (virtio_gpu_stats_enabled(g->conf)) {
+        if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
             g->stats.requests++;
         }
 
         if (!cmd->finished) {
             QTAILQ_INSERT_TAIL(&g->fenceq, cmd, next);
             g->inflight++;
-            if (virtio_gpu_stats_enabled(g->conf)) {
+            if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
                 if (g->stats.max_inflight < g->inflight) {
                     g->stats.max_inflight = g->inflight;
                 }
@@ -960,6 +837,19 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
     }
 }
 
+static void virtio_gpu_gl_unblock(VirtIOGPUBase *b)
+{
+    VirtIOGPU *g = VIRTIO_GPU(b);
+
+#ifdef CONFIG_VIRGL
+    if (g->renderer_reset) {
+        g->renderer_reset = false;
+        virtio_gpu_virgl_reset(g);
+    }
+#endif
+    virtio_gpu_process_cmdq(g);
+}
+
 static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIOGPU *g = VIRTIO_GPU(vdev);
@@ -970,7 +860,7 @@ static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
     }
 
 #ifdef CONFIG_VIRGL
-    if (!g->renderer_inited && g->use_virgl_renderer) {
+    if (!g->renderer_inited && g->parent_obj.use_virgl_renderer) {
         virtio_gpu_virgl_init(g);
         g->renderer_inited = true;
     }
@@ -988,7 +878,7 @@ static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
     virtio_gpu_process_cmdq(g);
 
 #ifdef CONFIG_VIRGL
-    if (g->use_virgl_renderer) {
+    if (g->parent_obj.use_virgl_renderer) {
         virtio_gpu_virgl_fence_poll(g);
     }
 #endif
@@ -997,7 +887,7 @@ static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 static void virtio_gpu_ctrl_bh(void *opaque)
 {
     VirtIOGPU *g = opaque;
-    virtio_gpu_handle_ctrl(&g->parent_obj, g->ctrl_vq);
+    virtio_gpu_handle_ctrl(&g->parent_obj.parent_obj, g->ctrl_vq);
 }
 
 static void virtio_gpu_handle_cursor(VirtIODevice *vdev, VirtQueue *vq)
@@ -1035,75 +925,9 @@ static void virtio_gpu_handle_cursor(VirtIODevice *vdev, VirtQueue *vq)
 static void virtio_gpu_cursor_bh(void *opaque)
 {
     VirtIOGPU *g = opaque;
-    virtio_gpu_handle_cursor(&g->parent_obj, g->cursor_vq);
+    virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq);
 }
 
-static void virtio_gpu_invalidate_display(void *opaque)
-{
-}
-
-static void virtio_gpu_update_display(void *opaque)
-{
-}
-
-static void virtio_gpu_text_update(void *opaque, console_ch_t *chardata)
-{
-}
-
-static int virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
-{
-    VirtIOGPU *g = opaque;
-
-    if (idx >= g->conf.max_outputs) {
-        return -1;
-    }
-
-    g->req_state[idx].x = info->xoff;
-    g->req_state[idx].y = info->yoff;
-    g->req_state[idx].width = info->width;
-    g->req_state[idx].height = info->height;
-
-    if (info->width && info->height) {
-        g->enabled_output_bitmask |= (1 << idx);
-    } else {
-        g->enabled_output_bitmask &= ~(1 << idx);
-    }
-
-    /* send event to guest */
-    virtio_gpu_notify_event(g, VIRTIO_GPU_EVENT_DISPLAY);
-    return 0;
-}
-
-static void virtio_gpu_gl_block(void *opaque, bool block)
-{
-    VirtIOGPU *g = opaque;
-
-    if (block) {
-        g->renderer_blocked++;
-    } else {
-        g->renderer_blocked--;
-    }
-    assert(g->renderer_blocked >= 0);
-
-    if (g->renderer_blocked == 0) {
-#ifdef CONFIG_VIRGL
-        if (g->renderer_reset) {
-            g->renderer_reset = false;
-            virtio_gpu_virgl_reset(g);
-        }
-#endif
-        virtio_gpu_process_cmdq(g);
-    }
-}
-
-const GraphicHwOps virtio_gpu_ops = {
-    .invalidate = virtio_gpu_invalidate_display,
-    .gfx_update = virtio_gpu_update_display,
-    .text_update = virtio_gpu_text_update,
-    .ui_info = virtio_gpu_ui_info,
-    .gl_block = virtio_gpu_gl_block,
-};
-
 static const VMStateDescription vmstate_virtio_gpu_scanout = {
     .name = "virtio-gpu-one-scanout",
     .version_id = 1,
@@ -1126,10 +950,11 @@ static const VMStateDescription vmstate_virtio_gpu_scanouts = {
     .name = "virtio-gpu-scanouts",
     .version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_INT32(enable, struct VirtIOGPU),
-        VMSTATE_UINT32_EQUAL(conf.max_outputs, struct VirtIOGPU, NULL),
-        VMSTATE_STRUCT_VARRAY_UINT32(scanout, struct VirtIOGPU,
-                                     conf.max_outputs, 1,
+        VMSTATE_INT32(parent_obj.enable, struct VirtIOGPU),
+        VMSTATE_UINT32_EQUAL(parent_obj.conf.max_outputs,
+                             struct VirtIOGPU, NULL),
+        VMSTATE_STRUCT_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
+                                     parent_obj.conf.max_outputs, 1,
                                      vmstate_virtio_gpu_scanout,
                                      struct virtio_gpu_scanout),
         VMSTATE_END_OF_LIST()
@@ -1185,7 +1010,7 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
         res->iov_cnt = qemu_get_be32(f);
 
         /* allocate */
-        pformat = get_pixman_format(res->format);
+        pformat = virtio_gpu_get_pixman_format(res->format);
         if (!pformat) {
             g_free(res);
             return -EINVAL;
@@ -1244,8 +1069,8 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
 
     /* load & apply scanout state */
     vmstate_load_state(f, &vmstate_virtio_gpu_scanouts, g, 1);
-    for (i = 0; i < g->conf.max_outputs; i++) {
-        scanout = &g->scanout[i];
+    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
+        scanout = &g->parent_obj.scanout[i];
         if (!scanout->resource_id) {
             continue;
         }
@@ -1274,84 +1099,35 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
     VirtIOGPU *g = VIRTIO_GPU(qdev);
     bool have_virgl;
-    Error *local_err = NULL;
-    int i;
-
-    if (g->conf.max_outputs > VIRTIO_GPU_MAX_SCANOUTS) {
-        error_setg(errp, "invalid max_outputs > %d", VIRTIO_GPU_MAX_SCANOUTS);
-        return;
-    }
 
-    g->use_virgl_renderer = false;
 #if !defined(CONFIG_VIRGL) || defined(HOST_WORDS_BIGENDIAN)
     have_virgl = false;
 #else
     have_virgl = display_opengl;
 #endif
     if (!have_virgl) {
-        g->conf.flags &= ~(1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
-    }
-
-    if (virtio_gpu_virgl_enabled(g->conf)) {
-        error_setg(&g->migration_blocker, "virgl is not yet migratable");
-        migrate_add_blocker(g->migration_blocker, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            error_free(g->migration_blocker);
-            return;
-        }
-    }
-
-    g->virtio_config.num_scanouts = cpu_to_le32(g->conf.max_outputs);
-    virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU,
-                sizeof(struct virtio_gpu_config));
-
-    g->req_state[0].width = g->conf.xres;
-    g->req_state[0].height = g->conf.yres;
-
-    if (virtio_gpu_virgl_enabled(g->conf)) {
-        /* use larger control queue in 3d mode */
-        g->ctrl_vq   = virtio_add_queue(vdev, 256, virtio_gpu_handle_ctrl_cb);
-        g->cursor_vq = virtio_add_queue(vdev, 16, virtio_gpu_handle_cursor_cb);
-
+        g->parent_obj.conf.flags &= ~(1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
+    } else {
 #if defined(CONFIG_VIRGL)
-        g->virtio_config.num_capsets = virtio_gpu_virgl_get_num_capsets(g);
-#else
-        g->virtio_config.num_capsets = 0;
+        VIRTIO_GPU_BASE(g)->virtio_config.num_capsets =
+            virtio_gpu_virgl_get_num_capsets(g);
 #endif
-    } else {
-        g->ctrl_vq   = virtio_add_queue(vdev, 64, virtio_gpu_handle_ctrl_cb);
-        g->cursor_vq = virtio_add_queue(vdev, 16, virtio_gpu_handle_cursor_cb);
     }
 
+    if (!virtio_gpu_base_device_realize(qdev,
+                                        virtio_gpu_handle_ctrl_cb,
+                                        virtio_gpu_handle_cursor_cb,
+                                        errp)) {
+        return;
+    }
+
+    g->ctrl_vq = virtio_get_queue(vdev, 0);
+    g->cursor_vq = virtio_get_queue(vdev, 1);
     g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
     g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
     QTAILQ_INIT(&g->reslist);
     QTAILQ_INIT(&g->cmdq);
     QTAILQ_INIT(&g->fenceq);
-
-    g->enabled_output_bitmask = 1;
-
-    for (i = 0; i < g->conf.max_outputs; i++) {
-        g->scanout[i].con =
-            graphic_console_init(DEVICE(g), i, &virtio_gpu_ops, g);
-        if (i > 0) {
-            dpy_gfx_replace_surface(g->scanout[i].con, NULL);
-        }
-    }
-}
-
-static void virtio_gpu_device_unrealize(DeviceState *qdev, Error **errp)
-{
-    VirtIOGPU *g = VIRTIO_GPU(qdev);
-    if (g->migration_blocker) {
-        migrate_del_blocker(g->migration_blocker);
-        error_free(g->migration_blocker);
-    }
-}
-
-static void virtio_gpu_instance_init(Object *obj)
-{
 }
 
 static void virtio_gpu_reset(VirtIODevice *vdev)
@@ -1359,21 +1135,16 @@ static void virtio_gpu_reset(VirtIODevice *vdev)
     VirtIOGPU *g = VIRTIO_GPU(vdev);
     struct virtio_gpu_simple_resource *res, *tmp;
     struct virtio_gpu_ctrl_command *cmd;
-    int i;
 
-    g->enable = 0;
+#ifdef CONFIG_VIRGL
+    if (g->parent_obj.use_virgl_renderer) {
+        virtio_gpu_virgl_reset(g);
+    }
+#endif
 
     QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
         virtio_gpu_resource_destroy(g, res);
     }
-    for (i = 0; i < g->conf.max_outputs; i++) {
-        g->scanout[i].resource_id = 0;
-        g->scanout[i].width = 0;
-        g->scanout[i].height = 0;
-        g->scanout[i].x = 0;
-        g->scanout[i].y = 0;
-        g->scanout[i].ds = NULL;
-    }
 
     while (!QTAILQ_EMPTY(&g->cmdq)) {
         cmd = QTAILQ_FIRST(&g->cmdq);
@@ -1389,15 +1160,37 @@ static void virtio_gpu_reset(VirtIODevice *vdev)
     }
 
 #ifdef CONFIG_VIRGL
-    if (g->use_virgl_renderer) {
-        if (g->renderer_blocked) {
+    if (g->parent_obj.use_virgl_renderer) {
+        if (g->parent_obj.renderer_blocked) {
             g->renderer_reset = true;
         } else {
             virtio_gpu_virgl_reset(g);
         }
-        g->use_virgl_renderer = 0;
+        g->parent_obj.use_virgl_renderer = false;
     }
 #endif
+
+    virtio_gpu_base_reset(VIRTIO_GPU_BASE(vdev));
+}
+
+static void
+virtio_gpu_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
+
+    memcpy(config, &g->virtio_config, sizeof(g->virtio_config));
+}
+
+static void
+virtio_gpu_set_config(VirtIODevice *vdev, const uint8_t *config)
+{
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
+    const struct virtio_gpu_config *vgconfig =
+        (const struct virtio_gpu_config *)config;
+
+    if (vgconfig->events_clear) {
+        g->virtio_config.events_read &= ~vgconfig->events_clear;
+    }
 }
 
 /*
@@ -1428,18 +1221,15 @@ static const VMStateDescription vmstate_virtio_gpu = {
 };
 
 static Property virtio_gpu_properties[] = {
-    DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1),
-    DEFINE_PROP_SIZE("max_hostmem", VirtIOGPU, conf.max_hostmem, 256 * MiB),
+    VIRTIO_GPU_BASE_PROPERTIES(VirtIOGPU, parent_obj.conf),
+    DEFINE_PROP_SIZE("max_hostmem", VirtIOGPU, conf_max_hostmem,
+                     256 * MiB),
 #ifdef CONFIG_VIRGL
-    DEFINE_PROP_BIT("virgl", VirtIOGPU, conf.flags,
+    DEFINE_PROP_BIT("virgl", VirtIOGPU, parent_obj.conf.flags,
                     VIRTIO_GPU_FLAG_VIRGL_ENABLED, true),
-    DEFINE_PROP_BIT("stats", VirtIOGPU, conf.flags,
+    DEFINE_PROP_BIT("stats", VirtIOGPU, parent_obj.conf.flags,
                     VIRTIO_GPU_FLAG_STATS_ENABLED, false),
 #endif
-    DEFINE_PROP_BIT("edid", VirtIOGPU, conf.flags,
-                    VIRTIO_GPU_FLAG_EDID_ENABLED, false),
-    DEFINE_PROP_UINT32("xres", VirtIOGPU, conf.xres, 1024),
-    DEFINE_PROP_UINT32("yres", VirtIOGPU, conf.yres, 768),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1447,27 +1237,22 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    VirtIOGPUBaseClass *vgc = VIRTIO_GPU_BASE_CLASS(klass);
 
+    vgc->gl_unblock = virtio_gpu_gl_unblock;
     vdc->realize = virtio_gpu_device_realize;
-    vdc->unrealize = virtio_gpu_device_unrealize;
+    vdc->reset = virtio_gpu_reset;
     vdc->get_config = virtio_gpu_get_config;
     vdc->set_config = virtio_gpu_set_config;
-    vdc->get_features = virtio_gpu_get_features;
-    vdc->set_features = virtio_gpu_set_features;
 
-    vdc->reset = virtio_gpu_reset;
-
-    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
-    dc->props = virtio_gpu_properties;
     dc->vmsd = &vmstate_virtio_gpu;
-    dc->hotpluggable = false;
+    dc->props = virtio_gpu_properties;
 }
 
 static const TypeInfo virtio_gpu_info = {
     .name = TYPE_VIRTIO_GPU,
-    .parent = TYPE_VIRTIO_DEVICE,
+    .parent = TYPE_VIRTIO_GPU_BASE,
     .instance_size = sizeof(VirtIOGPU),
-    .instance_init = virtio_gpu_instance_init,
     .class_init = virtio_gpu_class_init,
 };
 
@@ -1477,26 +1262,3 @@ static void virtio_register_types(void)
 }
 
 type_init(virtio_register_types)
-
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctrl_hdr)                != 24);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_update_cursor)           != 56);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_unref)          != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_create_2d)      != 40);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_set_scanout)             != 48);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_flush)          != 48);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_transfer_to_host_2d)     != 56);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_mem_entry)               != 16);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_attach_backing) != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_detach_backing) != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_display_info)       != 408);
-
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_transfer_host_3d)        != 72);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_create_3d)      != 72);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_create)              != 96);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_destroy)             != 24);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_resource)            != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_cmd_submit)              != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_get_capset_info)         != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_capset_info)        != 40);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_get_capset)              != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_capset)             != 24);
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index 5d57bf5b0c..67e34935c2 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -1,63 +1,42 @@
 #include "qemu/osdep.h"
 #include "hw/hw.h"
 #include "hw/pci/pci.h"
-#include "vga_int.h"
-#include "hw/virtio/virtio-pci.h"
 #include "hw/virtio/virtio-gpu.h"
 #include "qapi/error.h"
+#include "virtio-vga.h"
 
-/*
- * virtio-vga: This extends VirtioPCIProxy.
- */
-#define TYPE_VIRTIO_VGA "virtio-vga"
-#define VIRTIO_VGA(obj) \
-        OBJECT_CHECK(VirtIOVGA, (obj), TYPE_VIRTIO_VGA)
-#define VIRTIO_VGA_GET_CLASS(obj) \
-        OBJECT_GET_CLASS(VirtIOVGAClass, obj, TYPE_VIRTIO_VGA)
-#define VIRTIO_VGA_CLASS(klass) \
-        OBJECT_CLASS_CHECK(VirtIOVGAClass, klass, TYPE_VIRTIO_VGA)
-
-typedef struct VirtIOVGA {
-    VirtIOPCIProxy parent_obj;
-    VirtIOGPU      vdev;
-    VGACommonState vga;
-    MemoryRegion   vga_mrs[3];
-} VirtIOVGA;
-
-typedef struct VirtIOVGAClass {
-    VirtioPCIClass parent_class;
-    DeviceReset parent_reset;
-} VirtIOVGAClass;
-
-static void virtio_vga_invalidate_display(void *opaque)
+static void virtio_vga_base_invalidate_display(void *opaque)
 {
-    VirtIOVGA *vvga = opaque;
+    VirtIOVGABase *vvga = opaque;
+    VirtIOGPUBase *g = vvga->vgpu;
 
-    if (vvga->vdev.enable) {
-        virtio_gpu_ops.invalidate(&vvga->vdev);
+    if (g->enable) {
+        virtio_gpu_ops.invalidate(g);
     } else {
         vvga->vga.hw_ops->invalidate(&vvga->vga);
     }
 }
 
-static void virtio_vga_update_display(void *opaque)
+static void virtio_vga_base_update_display(void *opaque)
 {
-    VirtIOVGA *vvga = opaque;
+    VirtIOVGABase *vvga = opaque;
+    VirtIOGPUBase *g = vvga->vgpu;
 
-    if (vvga->vdev.enable) {
-        virtio_gpu_ops.gfx_update(&vvga->vdev);
+    if (g->enable) {
+        virtio_gpu_ops.gfx_update(g);
     } else {
         vvga->vga.hw_ops->gfx_update(&vvga->vga);
     }
 }
 
-static void virtio_vga_text_update(void *opaque, console_ch_t *chardata)
+static void virtio_vga_base_text_update(void *opaque, console_ch_t *chardata)
 {
-    VirtIOVGA *vvga = opaque;
+    VirtIOVGABase *vvga = opaque;
+    VirtIOGPUBase *g = vvga->vgpu;
 
-    if (vvga->vdev.enable) {
+    if (g->enable) {
         if (virtio_gpu_ops.text_update) {
-            virtio_gpu_ops.text_update(&vvga->vdev, chardata);
+            virtio_gpu_ops.text_update(g, chardata);
         }
     } else {
         if (vvga->vga.hw_ops->text_update) {
@@ -66,49 +45,52 @@ static void virtio_vga_text_update(void *opaque, console_ch_t *chardata)
     }
 }
 
-static int virtio_vga_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
+static int virtio_vga_base_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
 {
-    VirtIOVGA *vvga = opaque;
+    VirtIOVGABase *vvga = opaque;
+    VirtIOGPUBase *g = vvga->vgpu;
 
     if (virtio_gpu_ops.ui_info) {
-        return virtio_gpu_ops.ui_info(&vvga->vdev, idx, info);
+        return virtio_gpu_ops.ui_info(g, idx, info);
     }
     return -1;
 }
 
-static void virtio_vga_gl_block(void *opaque, bool block)
+static void virtio_vga_base_gl_block(void *opaque, bool block)
 {
-    VirtIOVGA *vvga = opaque;
+    VirtIOVGABase *vvga = opaque;
+    VirtIOGPUBase *g = vvga->vgpu;
 
     if (virtio_gpu_ops.gl_block) {
-        virtio_gpu_ops.gl_block(&vvga->vdev, block);
+        virtio_gpu_ops.gl_block(g, block);
     }
 }
 
-static const GraphicHwOps virtio_vga_ops = {
-    .invalidate = virtio_vga_invalidate_display,
-    .gfx_update = virtio_vga_update_display,
-    .text_update = virtio_vga_text_update,
-    .ui_info = virtio_vga_ui_info,
-    .gl_block = virtio_vga_gl_block,
+static const GraphicHwOps virtio_vga_base_ops = {
+    .invalidate = virtio_vga_base_invalidate_display,
+    .gfx_update = virtio_vga_base_update_display,
+    .text_update = virtio_vga_base_text_update,
+    .ui_info = virtio_vga_base_ui_info,
+    .gl_block = virtio_vga_base_gl_block,
 };
 
-static const VMStateDescription vmstate_virtio_vga = {
+static const VMStateDescription vmstate_virtio_vga_base = {
     .name = "virtio-vga",
     .version_id = 2,
     .minimum_version_id = 2,
     .fields = (VMStateField[]) {
         /* no pci stuff here, saving the virtio device will handle that */
-        VMSTATE_STRUCT(vga, VirtIOVGA, 0, vmstate_vga_common, VGACommonState),
+        VMSTATE_STRUCT(vga, VirtIOVGABase, 0,
+                       vmstate_vga_common, VGACommonState),
         VMSTATE_END_OF_LIST()
     }
 };
 
 /* VGA device wrapper around PCI device around virtio GPU */
-static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+static void virtio_vga_base_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
 {
-    VirtIOVGA *vvga = VIRTIO_VGA(vpci_dev);
-    VirtIOGPU *g = &vvga->vdev;
+    VirtIOVGABase *vvga = VIRTIO_VGA_BASE(vpci_dev);
+    VirtIOGPUBase *g = vvga->vgpu;
     VGACommonState *vga = &vvga->vga;
     Error *err = NULL;
     uint32_t offset;
@@ -168,7 +150,7 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
                                  vvga->vga_mrs, true, false);
 
     vga->con = g->scanout[0].con;
-    graphic_console_set_hwops(vga->con, &virtio_vga_ops, vvga);
+    graphic_console_set_hwops(vga->con, &virtio_vga_base_ops, vvga);
 
     for (i = 0; i < g->conf.max_outputs; i++) {
         object_property_set_link(OBJECT(g->scanout[i].con),
@@ -177,10 +159,10 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
     }
 }
 
-static void virtio_vga_reset(DeviceState *dev)
+static void virtio_vga_base_reset(DeviceState *dev)
 {
-    VirtIOVGAClass *klass = VIRTIO_VGA_GET_CLASS(dev);
-    VirtIOVGA *vvga = VIRTIO_VGA(dev);
+    VirtIOVGABaseClass *klass = VIRTIO_VGA_BASE_GET_CLASS(dev);
+    VirtIOVGABase *vvga = VIRTIO_VGA_BASE(dev);
 
     /* reset virtio-gpu */
     klass->parent_reset(dev);
@@ -190,48 +172,70 @@ static void virtio_vga_reset(DeviceState *dev)
     vga_dirty_log_start(&vvga->vga);
 }
 
-static Property virtio_vga_properties[] = {
+static Property virtio_vga_base_properties[] = {
     DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-static void virtio_vga_class_init(ObjectClass *klass, void *data)
+static void virtio_vga_base_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
-    VirtIOVGAClass *v = VIRTIO_VGA_CLASS(klass);
+    VirtIOVGABaseClass *v = VIRTIO_VGA_BASE_CLASS(klass);
     PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
 
     set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
-    dc->props = virtio_vga_properties;
-    dc->vmsd = &vmstate_virtio_vga;
+    dc->props = virtio_vga_base_properties;
+    dc->vmsd = &vmstate_virtio_vga_base;
     dc->hotpluggable = false;
-    device_class_set_parent_reset(dc, virtio_vga_reset,
+    device_class_set_parent_reset(dc, virtio_vga_base_reset,
                                   &v->parent_reset);
 
-    k->realize = virtio_vga_realize;
+    k->realize = virtio_vga_base_realize;
     pcidev_k->romfile = "vgabios-virtio.bin";
     pcidev_k->class_id = PCI_CLASS_DISPLAY_VGA;
 }
 
+static TypeInfo virtio_vga_base_info = {
+    .name          = TYPE_VIRTIO_VGA_BASE,
+    .parent        = TYPE_VIRTIO_PCI,
+    .instance_size = sizeof(struct VirtIOVGABase),
+    .class_size    = sizeof(struct VirtIOVGABaseClass),
+    .class_init    = virtio_vga_base_class_init,
+    .abstract      = true,
+};
+
+#define TYPE_VIRTIO_VGA "virtio-vga"
+
+#define VIRTIO_VGA(obj)                             \
+    OBJECT_CHECK(VirtIOVGA, (obj), TYPE_VIRTIO_VGA)
+
+typedef struct VirtIOVGA {
+    VirtIOVGABase parent_obj;
+
+    VirtIOGPU     vdev;
+} VirtIOVGA;
+
 static void virtio_vga_inst_initfn(Object *obj)
 {
     VirtIOVGA *dev = VIRTIO_VGA(obj);
 
     virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
                                 TYPE_VIRTIO_GPU);
+    VIRTIO_VGA_BASE(dev)->vgpu = VIRTIO_GPU_BASE(&dev->vdev);
 }
 
+
 static VirtioPCIDeviceTypeInfo virtio_vga_info = {
     .generic_name  = TYPE_VIRTIO_VGA,
+    .parent        = TYPE_VIRTIO_VGA_BASE,
     .instance_size = sizeof(struct VirtIOVGA),
     .instance_init = virtio_vga_inst_initfn,
-    .class_size    = sizeof(struct VirtIOVGAClass),
-    .class_init    = virtio_vga_class_init,
 };
 
 static void virtio_vga_register_types(void)
 {
+    type_register_static(&virtio_vga_base_info);
     virtio_pci_types_register(&virtio_vga_info);
 }
 
diff --git a/hw/display/virtio-vga.h b/hw/display/virtio-vga.h
new file mode 100644
index 0000000000..c10bf390aa
--- /dev/null
+++ b/hw/display/virtio-vga.h
@@ -0,0 +1,32 @@
+#ifndef VIRTIO_VGA_H_
+#define VIRTIO_VGA_H_
+
+#include "hw/virtio/virtio-gpu-pci.h"
+#include "vga_int.h"
+
+/*
+ * virtio-vga-base: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_VGA_BASE "virtio-vga-base"
+#define VIRTIO_VGA_BASE(obj)                                \
+    OBJECT_CHECK(VirtIOVGABase, (obj), TYPE_VIRTIO_VGA_BASE)
+#define VIRTIO_VGA_BASE_GET_CLASS(obj)                      \
+    OBJECT_GET_CLASS(VirtIOVGABaseClass, obj, TYPE_VIRTIO_VGA_BASE)
+#define VIRTIO_VGA_BASE_CLASS(klass)                        \
+    OBJECT_CLASS_CHECK(VirtIOVGABaseClass, klass, TYPE_VIRTIO_VGA_BASE)
+
+typedef struct VirtIOVGABase {
+    VirtIOPCIProxy parent_obj;
+
+    VirtIOGPUBase *vgpu;
+    VGACommonState vga;
+    MemoryRegion vga_mrs[3];
+} VirtIOVGABase;
+
+typedef struct VirtIOVGABaseClass {
+    VirtioPCIClass parent_class;
+
+    DeviceReset parent_reset;
+} VirtIOVGABaseClass;
+
+#endif /* VIRTIO_VGA_H_ */
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 0d78d73894..85dc1640bc 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2405,22 +2405,6 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
                  table_data->len - srat_start, 1, NULL, NULL);
 }
 
-static void
-build_mcfg_q35(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info)
-{
-    AcpiTableMcfg *mcfg;
-    int len = sizeof(*mcfg) + 1 * sizeof(mcfg->allocation[0]);
-
-    mcfg = acpi_data_push(table_data, len);
-    mcfg->allocation[0].address = cpu_to_le64(info->base);
-    /* Only a single allocation so no need to play with segments */
-    mcfg->allocation[0].pci_segment = cpu_to_le16(0);
-    mcfg->allocation[0].start_bus_number = 0;
-    mcfg->allocation[0].end_bus_number = PCIE_MMCFG_BUS(info->size - 1);
-
-    build_header(linker, table_data, (void *)mcfg, "MCFG", len, 1, NULL, NULL);
-}
-
 /*
  * VT-d spec 8.1 DMA Remapping Reporting Structure
  * (version Oct. 2014 or later)
@@ -2690,7 +2674,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
     }
     if (acpi_get_mcfg(&mcfg)) {
         acpi_add_table(table_offsets, tables_blob);
-        build_mcfg_q35(tables_blob, tables->linker, &mcfg);
+        build_mcfg(tables_blob, tables->linker, &mcfg);
     }
     if (x86_iommu_get_default()) {
         IommuType IOMMUType = x86_iommu_get_type();
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 2632b73f80..edc240bcbf 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -110,6 +110,9 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 /* Physical Address of PVH entry point read from kernel ELF NOTE */
 static size_t pvh_start_addr;
 
+GlobalProperty pc_compat_4_0_1[] = {};
+const size_t pc_compat_4_0_1_len = G_N_ELEMENTS(pc_compat_4_0_1);
+
 GlobalProperty pc_compat_4_0[] = {};
 const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0);
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 37dd350511..dcddc64662 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -357,7 +357,7 @@ static void pc_q35_machine_options(MachineClass *m)
     m->units_per_default_bus = 1;
     m->default_machine_opts = "firmware=bios-256k.bin";
     m->default_display = "std";
-    m->default_kernel_irqchip_split = true;
+    m->default_kernel_irqchip_split = false;
     m->no_floppy = 1;
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE);
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
@@ -374,10 +374,22 @@ static void pc_q35_4_1_machine_options(MachineClass *m)
 DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL,
                    pc_q35_4_1_machine_options);
 
-static void pc_q35_4_0_machine_options(MachineClass *m)
+static void pc_q35_4_0_1_machine_options(MachineClass *m)
 {
     pc_q35_4_1_machine_options(m);
     m->alias = NULL;
+    compat_props_add(m->compat_props, hw_compat_4_0_1, hw_compat_4_0_1_len);
+    compat_props_add(m->compat_props, pc_compat_4_0_1, pc_compat_4_0_1_len);
+}
+
+DEFINE_Q35_MACHINE(v4_0_1, "pc-q35-4.0.1", NULL,
+                   pc_q35_4_0_1_machine_options);
+
+static void pc_q35_4_0_machine_options(MachineClass *m)
+{
+    pc_q35_4_0_1_machine_options(m);
+    m->default_kernel_irqchip_split = true;
+    m->alias = NULL;
     compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len);
     compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
 }
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 573b022e1e..360cd20bd8 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -168,7 +168,7 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp)
             return;
         } else {
             /* Anonymous BlockBackend for an empty drive */
-            dev->conf.blk = blk_new(0, BLK_PERM_ALL);
+            dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
             ret = blk_attach_dev(dev->conf.blk, &dev->qdev);
             assert(ret == 0);
         }
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index df712c3e6c..03019b9a03 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -39,6 +39,7 @@ obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
 obj-$(CONFIG_XIVE) += xive.o
 obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
+obj-$(CONFIG_XIVE_KVM) += spapr_xive_kvm.o
 obj-$(CONFIG_POWERNV) += xics_pnv.o pnv_xive.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 815e720cfa..dc2c206d9a 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -2595,9 +2595,9 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
          * as we didn't know then if the CPU had the security extensions;
          * so we have to do it here.
          */
-        object_initialize(&s->systick[M_REG_S], sizeof(s->systick[M_REG_S]),
-                          TYPE_SYSTICK);
-        qdev_set_parent_bus(DEVICE(&s->systick[M_REG_S]), sysbus_get_default());
+        sysbus_init_child_obj(OBJECT(dev), "systick-reg-s",
+                              &s->systick[M_REG_S],
+                              sizeof(s->systick[M_REG_S]), TYPE_SYSTICK);
 
         object_property_set_bool(OBJECT(&s->systick[M_REG_S]), true,
                                  "realized", &err);
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 097f88d460..62e0ef8fa5 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -41,13 +41,6 @@
 #define SPAPR_XIVE_NVT_BASE 0x400
 
 /*
- * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
- * to the controller block id value. It can nevertheless be changed
- * for testing purpose.
- */
-#define SPAPR_XIVE_BLOCK_ID 0x0
-
-/*
  * sPAPR NVT and END indexing helpers
  */
 static uint32_t spapr_xive_nvt_to_target(uint8_t nvt_blk, uint32_t nvt_idx)
@@ -86,6 +79,22 @@ static int spapr_xive_target_to_nvt(uint32_t target,
  * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
  * priorities per CPU
  */
+int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
+                             uint32_t *out_server, uint8_t *out_prio)
+{
+
+    assert(end_blk == SPAPR_XIVE_BLOCK_ID);
+
+    if (out_server) {
+        *out_server = end_idx >> 3;
+    }
+
+    if (out_prio) {
+        *out_prio = end_idx & 0x7;
+    }
+    return 0;
+}
+
 static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
                                   uint8_t *out_end_blk, uint32_t *out_end_idx)
 {
@@ -120,6 +129,7 @@ static int spapr_xive_target_to_end(uint32_t target, uint8_t prio,
 static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end,
                                           Monitor *mon)
 {
+    uint64_t qaddr_base = xive_end_qaddr(end);
     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
@@ -127,9 +137,9 @@ static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end,
     uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
     uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
 
-    monitor_printf(mon, "%3d/%d % 6d/%5d ^%d",
+    monitor_printf(mon, "%3d/%d % 6d/%5d @%"PRIx64" ^%d",
                    spapr_xive_nvt_to_target(0, nvt),
-                   priority, qindex, qentries, qgen);
+                   priority, qindex, qentries, qaddr_base, qgen);
 
     xive_end_queue_pic_print_info(end, 6, mon);
     monitor_printf(mon, "]");
@@ -140,7 +150,17 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
     XiveSource *xsrc = &xive->source;
     int i;
 
-    monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_synchronize_state(xive, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return;
+        }
+    }
+
+    monitor_printf(mon, "  LISN         PQ    EISN     CPU/PRIO EQ\n");
 
     for (i = 0; i < xive->nr_irqs; i++) {
         uint8_t pq = xive_source_esb_get(xsrc, i);
@@ -173,7 +193,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
     }
 }
 
-static void spapr_xive_map_mmio(SpaprXive *xive)
+void spapr_xive_map_mmio(SpaprXive *xive)
 {
     sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base);
     sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base);
@@ -250,6 +270,9 @@ static void spapr_xive_instance_init(Object *obj)
     object_initialize_child(obj, "end_source", &xive->end_source,
                             sizeof(xive->end_source), TYPE_XIVE_END_SOURCE,
                             &error_abort, NULL);
+
+    /* Not connected to the KVM XIVE device */
+    xive->fd = -1;
 }
 
 static void spapr_xive_realize(DeviceState *dev, Error **errp)
@@ -304,22 +327,36 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
     xive->eat = g_new0(XiveEAS, xive->nr_irqs);
     xive->endt = g_new0(XiveEND, xive->nr_ends);
 
-    /* TIMA initialization */
-    memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
-                          "xive.tima", 4ull << TM_SHIFT);
+    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
+                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
+
+    qemu_register_reset(spapr_xive_reset, dev);
 
     /* Define all XIVE MMIO regions on SysBus */
     sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio);
     sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
     sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio);
+}
 
-    /* Map all regions */
-    spapr_xive_map_mmio(xive);
+void spapr_xive_init(SpaprXive *xive, Error **errp)
+{
+    XiveSource *xsrc = &xive->source;
 
-    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
-                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
+    /*
+     * The emulated XIVE device can only be initialized once. If the
+     * ESB memory region has been already mapped, it means we have been
+     * through there.
+     */
+    if (memory_region_is_mapped(&xsrc->esb_mmio)) {
+        return;
+    }
 
-    qemu_register_reset(spapr_xive_reset, dev);
+    /* TIMA initialization */
+    memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
+                          "xive.tima", 4ull << TM_SHIFT);
+
+    /* Map all regions */
+    spapr_xive_map_mmio(xive);
 }
 
 static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk,
@@ -427,10 +464,34 @@ static const VMStateDescription vmstate_spapr_xive_eas = {
     },
 };
 
+static int vmstate_spapr_xive_pre_save(void *opaque)
+{
+    if (kvm_irqchip_in_kernel()) {
+        return kvmppc_xive_pre_save(SPAPR_XIVE(opaque));
+    }
+
+    return 0;
+}
+
+/*
+ * Called by the sPAPR IRQ backend 'post_load' method at the machine
+ * level.
+ */
+int spapr_xive_post_load(SpaprXive *xive, int version_id)
+{
+    if (kvm_irqchip_in_kernel()) {
+        return kvmppc_xive_post_load(xive, version_id);
+    }
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_spapr_xive = {
     .name = TYPE_SPAPR_XIVE,
     .version_id = 1,
     .minimum_version_id = 1,
+    .pre_save = vmstate_spapr_xive_pre_save,
+    .post_load = NULL, /* handled at the machine level */
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_EQUAL(nr_irqs, SpaprXive, NULL),
         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, SpaprXive, nr_irqs,
@@ -494,6 +555,17 @@ bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi)
     if (lsi) {
         xive_source_irq_set_lsi(xsrc, lisn);
     }
+
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_source_reset_one(xsrc, lisn, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return false;
+        }
+    }
+
     return true;
 }
 
@@ -755,6 +827,16 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
         new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
     }
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
+
 out:
     xive->eat[lisn] = new_eas;
     return H_SUCCESS;
@@ -993,6 +1075,12 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
     case 16:
     case 21:
     case 24:
+        if (!QEMU_IS_ALIGNED(qpage, 1ul << qsize)) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: EQ @0x%" HWADDR_PRIx
+                          " is not naturally aligned with %" HWADDR_PRIx "\n",
+                          qpage, (hwaddr)1 << qsize);
+            return H_P4;
+        }
         end.w2 = cpu_to_be32((qpage >> 32) & 0x0fffffff);
         end.w3 = cpu_to_be32(qpage & 0xffffffff);
         end.w0 |= cpu_to_be32(END_W0_ENQUEUE);
@@ -1060,6 +1148,16 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
      */
 
 out:
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
+
     /* Update END */
     memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND));
     return H_SUCCESS;
@@ -1144,14 +1242,23 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
     }
 
     if (xive_end_is_enqueue(end)) {
-        args[1] = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
-            | be32_to_cpu(end->w3);
+        args[1] = xive_end_qaddr(end);
         args[2] = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
     } else {
         args[1] = 0;
         args[2] = 0;
     }
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
+
     /* TODO: do we need any locking on the END ? */
     if (flags & SPAPR_XIVE_END_DEBUG) {
         /* Load the event queue generation number into the return flags */
@@ -1304,15 +1411,20 @@ static target_ulong h_int_esb(PowerPCCPU *cpu,
         return H_P3;
     }
 
-    mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
+    if (kvm_irqchip_in_kernel()) {
+        args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data,
+                                     flags & SPAPR_XIVE_ESB_STORE);
+    } else {
+        mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
 
-    if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
-                      (flags & SPAPR_XIVE_ESB_STORE))) {
-        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
-                      HWADDR_PRIx "\n", mmio_addr);
-        return H_HARDWARE;
+        if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
+                          (flags & SPAPR_XIVE_ESB_STORE))) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
+                          HWADDR_PRIx "\n", mmio_addr);
+            return H_HARDWARE;
+        }
+        args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
     }
-    args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
     return H_SUCCESS;
 }
 
@@ -1369,7 +1481,20 @@ static target_ulong h_int_sync(PowerPCCPU *cpu,
      * This is not needed when running the emulation under QEMU
      */
 
-    /* This is not real hardware. Nothing to be done */
+    /*
+     * This is not real hardware. Nothing to be done unless when
+     * under KVM
+     */
+
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_sync_source(xive, lisn, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
     return H_SUCCESS;
 }
 
@@ -1404,6 +1529,16 @@ static target_ulong h_int_reset(PowerPCCPU *cpu,
     }
 
     device_reset(DEVICE(xive));
+
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_reset(xive, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
     return H_SUCCESS;
 }
 
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
new file mode 100644
index 0000000000..b48f135838
--- /dev/null
+++ b/hw/intc/spapr_xive_kvm.c
@@ -0,0 +1,823 @@
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2019, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/spapr_xive.h"
+#include "hw/ppc/xive.h"
+#include "kvm_ppc.h"
+
+#include <sys/ioctl.h>
+
+/*
+ * Helpers for CPU hotplug
+ *
+ * TODO: make a common KVMEnabledCPU layer for XICS and XIVE
+ */
+typedef struct KVMEnabledCPU {
+    unsigned long vcpu_id;
+    QLIST_ENTRY(KVMEnabledCPU) node;
+} KVMEnabledCPU;
+
+static QLIST_HEAD(, KVMEnabledCPU)
+    kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
+
+static bool kvm_cpu_is_enabled(CPUState *cs)
+{
+    KVMEnabledCPU *enabled_cpu;
+    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
+
+    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
+        if (enabled_cpu->vcpu_id == vcpu_id) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static void kvm_cpu_enable(CPUState *cs)
+{
+    KVMEnabledCPU *enabled_cpu;
+    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
+
+    enabled_cpu = g_malloc(sizeof(*enabled_cpu));
+    enabled_cpu->vcpu_id = vcpu_id;
+    QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
+}
+
+static void kvm_cpu_disable_all(void)
+{
+    KVMEnabledCPU *enabled_cpu, *next;
+
+    QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) {
+        QLIST_REMOVE(enabled_cpu, node);
+        g_free(enabled_cpu);
+    }
+}
+
+/*
+ * XIVE Thread Interrupt Management context (KVM)
+ */
+
+static void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp)
+{
+    uint64_t state[2];
+    int ret;
+
+    /* word0 and word1 of the OS ring. */
+    state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
+
+    ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
+    if (ret != 0) {
+        error_setg_errno(errp, errno,
+                         "XIVE: could not restore KVM state of CPU %ld",
+                         kvm_arch_vcpu_id(tctx->cs));
+    }
+}
+
+void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
+{
+    SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
+    uint64_t state[2] = { 0 };
+    int ret;
+
+    /* The KVM XIVE device is not in use */
+    if (xive->fd == -1) {
+        return;
+    }
+
+    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
+    if (ret != 0) {
+        error_setg_errno(errp, errno,
+                         "XIVE: could not capture KVM state of CPU %ld",
+                         kvm_arch_vcpu_id(tctx->cs));
+        return;
+    }
+
+    /* word0 and word1 of the OS ring. */
+    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
+}
+
+typedef struct {
+    XiveTCTX *tctx;
+    Error *err;
+} XiveCpuGetState;
+
+static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
+                                                 run_on_cpu_data arg)
+{
+    XiveCpuGetState *s = arg.host_ptr;
+
+    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
+}
+
+void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
+{
+    XiveCpuGetState s = {
+        .tctx = tctx,
+        .err = NULL,
+    };
+
+    /*
+     * Kick the vCPU to make sure they are available for the KVM ioctl.
+     */
+    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
+               RUN_ON_CPU_HOST_PTR(&s));
+
+    if (s.err) {
+        error_propagate(errp, s.err);
+        return;
+    }
+}
+
+void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
+{
+    SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
+    unsigned long vcpu_id;
+    int ret;
+
+    /* The KVM XIVE device is not in use */
+    if (xive->fd == -1) {
+        return;
+    }
+
+    /* Check if CPU was hot unplugged and replugged. */
+    if (kvm_cpu_is_enabled(tctx->cs)) {
+        return;
+    }
+
+    vcpu_id = kvm_arch_vcpu_id(tctx->cs);
+
+    ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
+                              vcpu_id, 0);
+    if (ret < 0) {
+        error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s",
+                   vcpu_id, strerror(errno));
+        return;
+    }
+
+    kvm_cpu_enable(tctx->cs);
+}
+
+/*
+ * XIVE Interrupt Source (KVM)
+ */
+
+void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
+                                   Error **errp)
+{
+    uint32_t end_idx;
+    uint32_t end_blk;
+    uint8_t priority;
+    uint32_t server;
+    bool masked;
+    uint32_t eisn;
+    uint64_t kvm_src;
+    Error *local_err = NULL;
+
+    assert(xive_eas_is_valid(eas));
+
+    end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+    end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
+    eisn = xive_get_field64(EAS_END_DATA, eas->w);
+    masked = xive_eas_is_masked(eas);
+
+    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+    kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
+        KVM_XIVE_SOURCE_PRIORITY_MASK;
+    kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
+        KVM_XIVE_SOURCE_SERVER_MASK;
+    kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) &
+        KVM_XIVE_SOURCE_MASKED_MASK;
+    kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) &
+        KVM_XIVE_SOURCE_EISN_MASK;
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn,
+                      &kvm_src, true, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp)
+{
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn,
+                      NULL, true, errp);
+}
+
+/*
+ * At reset, the interrupt sources are simply created and MASKED. We
+ * only need to inform the KVM XIVE device about their type: LSI or
+ * MSI.
+ */
+void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
+{
+    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
+    uint64_t state = 0;
+
+    /* The KVM XIVE device is not in use */
+    if (xive->fd == -1) {
+        return;
+    }
+
+    if (xive_source_irq_is_lsi(xsrc, srcno)) {
+        state |= KVM_XIVE_LEVEL_SENSITIVE;
+        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+            state |= KVM_XIVE_LEVEL_ASSERTED;
+        }
+    }
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state,
+                      true, errp);
+}
+
+static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
+{
+    int i;
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_source_reset_one(xsrc, i, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+/*
+ * This is used to perform the magic loads on the ESB pages, described
+ * in xive.h.
+ *
+ * Memory barriers should not be needed for loads (no store for now).
+ */
+static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+                            uint64_t data, bool write)
+{
+    uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) +
+        offset;
+
+    if (write) {
+        *addr = cpu_to_be64(data);
+        return -1;
+    } else {
+        /* Prevent the compiler from optimizing away the load */
+        volatile uint64_t value = be64_to_cpu(*addr);
+        return value;
+    }
+}
+
+static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
+{
+    return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3;
+}
+
+static void xive_esb_trigger(XiveSource *xsrc, int srcno)
+{
+    uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno);
+
+    *addr = 0x0;
+}
+
+uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+                            uint64_t data, bool write)
+{
+    if (write) {
+        return xive_esb_rw(xsrc, srcno, offset, data, 1);
+    }
+
+    /*
+     * Special Load EOI handling for LSI sources. Q bit is never set
+     * and the interrupt should be re-triggered if the level is still
+     * asserted.
+     */
+    if (xive_source_irq_is_lsi(xsrc, srcno) &&
+        offset == XIVE_ESB_LOAD_EOI) {
+        xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00);
+        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+            xive_esb_trigger(xsrc, srcno);
+        }
+        return 0;
+    } else {
+        return xive_esb_rw(xsrc, srcno, offset, 0, 0);
+    }
+}
+
+static void kvmppc_xive_source_get_state(XiveSource *xsrc)
+{
+    int i;
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        /* Perform a load without side effect to retrieve the PQ bits */
+        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
+
+        /* and save PQ locally */
+        xive_source_esb_set(xsrc, i, pq);
+    }
+}
+
+void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
+{
+    XiveSource *xsrc = opaque;
+    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
+    struct kvm_irq_level args;
+    int rc;
+
+    /* The KVM XIVE device should be in use */
+    assert(xive->fd != -1);
+
+    args.irq = srcno;
+    if (!xive_source_irq_is_lsi(xsrc, srcno)) {
+        if (!val) {
+            return;
+        }
+        args.level = KVM_INTERRUPT_SET;
+    } else {
+        if (val) {
+            xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
+            args.level = KVM_INTERRUPT_SET_LEVEL;
+        } else {
+            xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
+            args.level = KVM_INTERRUPT_UNSET;
+        }
+    }
+    rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
+    if (rc < 0) {
+        error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno));
+    }
+}
+
+/*
+ * sPAPR XIVE interrupt controller (KVM)
+ */
+void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                  uint32_t end_idx, XiveEND *end,
+                                  Error **errp)
+{
+    struct kvm_ppc_xive_eq kvm_eq = { 0 };
+    uint64_t kvm_eq_idx;
+    uint8_t priority;
+    uint32_t server;
+    Error *local_err = NULL;
+
+    assert(xive_end_is_valid(end));
+
+    /* Encode the tuple (server, prio) as a KVM EQ index */
+    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
+            KVM_XIVE_EQ_PRIORITY_MASK;
+    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
+        KVM_XIVE_EQ_SERVER_MASK;
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
+                      &kvm_eq, false, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /*
+     * The EQ index and toggle bit are updated by HW. These are the
+     * only fields from KVM we want to update QEMU with. The other END
+     * fields should already be in the QEMU END table.
+     */
+    end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
+        xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
+}
+
+void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                  uint32_t end_idx, XiveEND *end,
+                                  Error **errp)
+{
+    struct kvm_ppc_xive_eq kvm_eq = { 0 };
+    uint64_t kvm_eq_idx;
+    uint8_t priority;
+    uint32_t server;
+    Error *local_err = NULL;
+
+    /*
+     * Build the KVM state from the local END structure.
+     */
+
+    kvm_eq.flags = 0;
+    if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) {
+        kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
+    }
+
+    /*
+     * If the hcall is disabling the EQ, set the size and page address
+     * to zero. When migrating, only valid ENDs are taken into
+     * account.
+     */
+    if (xive_end_is_valid(end)) {
+        kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
+        kvm_eq.qaddr  = xive_end_qaddr(end);
+        /*
+         * The EQ toggle bit and index should only be relevant when
+         * restoring the EQ state
+         */
+        kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1);
+        kvm_eq.qindex  = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+    } else {
+        kvm_eq.qshift = 0;
+        kvm_eq.qaddr  = 0;
+    }
+
+    /* Encode the tuple (server, prio) as a KVM EQ index */
+    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
+            KVM_XIVE_EQ_PRIORITY_MASK;
+    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
+        KVM_XIVE_EQ_SERVER_MASK;
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
+                      &kvm_eq, true, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+void kvmppc_xive_reset(SpaprXive *xive, Error **errp)
+{
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET,
+                      NULL, true, errp);
+}
+
+static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
+{
+    Error *local_err = NULL;
+    int i;
+
+    for (i = 0; i < xive->nr_ends; i++) {
+        if (!xive_end_is_valid(&xive->endt[i])) {
+            continue;
+        }
+
+        kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
+                                     &xive->endt[i], &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+/*
+ * The primary goal of the XIVE VM change handler is to mark the EQ
+ * pages dirty when all XIVE event notifications have stopped.
+ *
+ * Whenever the VM is stopped, the VM change handler sets the source
+ * PQs to PENDING to stop the flow of events and to possibly catch a
+ * triggered interrupt occuring while the VM is stopped. The previous
+ * state is saved in anticipation of a migration. The XIVE controller
+ * is then synced through KVM to flush any in-flight event
+ * notification and stabilize the EQs.
+ *
+ * At this stage, we can mark the EQ page dirty and let a migration
+ * sequence transfer the EQ pages to the destination, which is done
+ * just after the stop state.
+ *
+ * The previous configuration of the sources is restored when the VM
+ * runs again. If an interrupt was queued while the VM was stopped,
+ * simply generate a trigger.
+ */
+static void kvmppc_xive_change_state_handler(void *opaque, int running,
+                                             RunState state)
+{
+    SpaprXive *xive = opaque;
+    XiveSource *xsrc = &xive->source;
+    Error *local_err = NULL;
+    int i;
+
+    /*
+     * Restore the sources to their initial state. This is called when
+     * the VM resumes after a stop or a migration.
+     */
+    if (running) {
+        for (i = 0; i < xsrc->nr_irqs; i++) {
+            uint8_t pq = xive_source_esb_get(xsrc, i);
+            uint8_t old_pq;
+
+            old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
+
+            /*
+             * An interrupt was queued while the VM was stopped,
+             * generate a trigger.
+             */
+            if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) {
+                xive_esb_trigger(xsrc, i);
+            }
+        }
+
+        return;
+    }
+
+    /*
+     * Mask the sources, to stop the flow of event notifications, and
+     * save the PQs locally in the XiveSource object. The XiveSource
+     * state will be collected later on by its vmstate handler if a
+     * migration is in progress.
+     */
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
+
+        /*
+         * PQ is set to PENDING to possibly catch a triggered
+         * interrupt occuring while the VM is stopped (hotplug event
+         * for instance) .
+         */
+        if (pq != XIVE_ESB_OFF) {
+            pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10);
+        }
+        xive_source_esb_set(xsrc, i, pq);
+    }
+
+    /*
+     * Sync the XIVE controller in KVM, to flush in-flight event
+     * notification that should be enqueued in the EQs and mark the
+     * XIVE EQ pages dirty to collect all updates.
+     */
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL,
+                      KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+}
+
+void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp)
+{
+    /* The KVM XIVE device is not in use */
+    if (xive->fd == -1) {
+        return;
+    }
+
+    /*
+     * When the VM is stopped, the sources are masked and the previous
+     * state is saved in anticipation of a migration. We should not
+     * synchronize the source state in that case else we will override
+     * the saved state.
+     */
+    if (runstate_is_running()) {
+        kvmppc_xive_source_get_state(&xive->source);
+    }
+
+    /* EAT: there is no extra state to query from KVM */
+
+    /* ENDT */
+    kvmppc_xive_get_queues(xive, errp);
+}
+
+/*
+ * The SpaprXive 'pre_save' method is called by the vmstate handler of
+ * the SpaprXive model, after the XIVE controller is synced in the VM
+ * change handler.
+ */
+int kvmppc_xive_pre_save(SpaprXive *xive)
+{
+    Error *local_err = NULL;
+
+    /* The KVM XIVE device is not in use */
+    if (xive->fd == -1) {
+        return 0;
+    }
+
+    /* EAT: there is no extra state to query from KVM */
+
+    /* ENDT */
+    kvmppc_xive_get_queues(xive, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * The SpaprXive 'post_load' method is not called by a vmstate
+ * handler. It is called at the sPAPR machine level at the end of the
+ * migration sequence by the sPAPR IRQ backend 'post_load' method,
+ * when all XIVE states have been transferred and loaded.
+ */
+int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
+{
+    Error *local_err = NULL;
+    CPUState *cs;
+    int i;
+
+    /* The KVM XIVE device should be in use */
+    assert(xive->fd != -1);
+
+    /* Restore the ENDT first. The targetting depends on it. */
+    for (i = 0; i < xive->nr_ends; i++) {
+        if (!xive_end_is_valid(&xive->endt[i])) {
+            continue;
+        }
+
+        kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
+                                     &xive->endt[i], &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
+    }
+
+    /* Restore the EAT */
+    for (i = 0; i < xive->nr_irqs; i++) {
+        if (!xive_eas_is_valid(&xive->eat[i])) {
+            continue;
+        }
+
+        kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
+    }
+
+    /* Restore the thread interrupt contexts */
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
+    }
+
+    /* The source states will be restored when the machine starts running */
+    return 0;
+}
+
+static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
+                              Error **errp)
+{
+    void *addr;
+    uint32_t page_shift = 16; /* TODO: fix page_shift */
+
+    addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd,
+                pgoff << page_shift);
+    if (addr == MAP_FAILED) {
+        error_setg_errno(errp, errno, "XIVE: unable to set memory mapping");
+        return NULL;
+    }
+
+    return addr;
+}
+
+/*
+ * All the XIVE memory regions are now backed by mappings from the KVM
+ * XIVE device.
+ */
+void kvmppc_xive_connect(SpaprXive *xive, Error **errp)
+{
+    XiveSource *xsrc = &xive->source;
+    Error *local_err = NULL;
+    size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+    size_t tima_len = 4ull << TM_SHIFT;
+    CPUState *cs;
+
+    /*
+     * The KVM XIVE device already in use. This is the case when
+     * rebooting under the XIVE-only interrupt mode.
+     */
+    if (xive->fd != -1) {
+        return;
+    }
+
+    if (!kvmppc_has_cap_xive()) {
+        error_setg(errp, "IRQ_XIVE capability must be present for KVM");
+        return;
+    }
+
+    /* First, create the KVM XIVE device */
+    xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
+    if (xive->fd < 0) {
+        error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device");
+        return;
+    }
+
+    /*
+     * 1. Source ESB pages - KVM mapping
+     */
+    xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
+                                      &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    memory_region_init_ram_device_ptr(&xsrc->esb_mmio, OBJECT(xsrc),
+                                      "xive.esb", esb_len, xsrc->esb_mmap);
+
+    /*
+     * 2. END ESB pages (No KVM support yet)
+     */
+
+    /*
+     * 3. TIMA pages - KVM mapping
+     */
+    xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
+                                     &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    memory_region_init_ram_device_ptr(&xive->tm_mmio, OBJECT(xive),
+                                      "xive.tima", tima_len, xive->tm_mmap);
+
+    xive->change = qemu_add_vm_change_state_handler(
+        kvmppc_xive_change_state_handler, xive);
+
+    /* Connect the presenters to the initial VCPUs of the machine */
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+    /* Update the KVM sources */
+    kvmppc_xive_source_reset(xsrc, &local_err);
+    if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+    }
+
+    kvm_kernel_irqchip = true;
+    kvm_msi_via_irqfd_allowed = true;
+    kvm_gsi_direct_mapping = true;
+
+    /* Map all regions */
+    spapr_xive_map_mmio(xive);
+}
+
+void kvmppc_xive_disconnect(SpaprXive *xive, Error **errp)
+{
+    XiveSource *xsrc;
+    size_t esb_len;
+
+    /* The KVM XIVE device is not in use */
+    if (!xive || xive->fd == -1) {
+        return;
+    }
+
+    if (!kvmppc_has_cap_xive()) {
+        error_setg(errp, "IRQ_XIVE capability must be present for KVM");
+        return;
+    }
+
+    /* Clear the KVM mapping */
+    xsrc = &xive->source;
+    esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+
+    sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 0);
+    munmap(xsrc->esb_mmap, esb_len);
+
+    sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 1);
+
+    sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 2);
+    munmap(xive->tm_mmap, 4ull << TM_SHIFT);
+
+    /*
+     * When the KVM device fd is closed, the KVM device is destroyed
+     * and removed from the list of devices of the VM. The VCPU
+     * presenters are also detached from the device.
+     */
+    close(xive->fd);
+    xive->fd = -1;
+
+    kvm_kernel_irqchip = false;
+    kvm_msi_via_irqfd_allowed = false;
+    kvm_gsi_direct_mapping = false;
+
+    /* Clear the local list of presenter (hotplug) */
+    kvm_cpu_disable_all();
+
+    /* VM Change state handler is not needed anymore */
+    qemu_del_vm_change_state_handler(xive->change);
+}
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index af7dc709ab..79f5a8a916 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -610,6 +610,12 @@ static const TypeInfo ics_simple_info = {
     .class_size = sizeof(ICSStateClass),
 };
 
+static void ics_reset_irq(ICSIRQState *irq)
+{
+    irq->priority = 0xff;
+    irq->saved_priority = 0xff;
+}
+
 static void ics_base_reset(DeviceState *dev)
 {
     ICSState *ics = ICS_BASE(dev);
@@ -623,8 +629,7 @@ static void ics_base_reset(DeviceState *dev)
     memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
 
     for (i = 0; i < ics->nr_irqs; i++) {
-        ics->irqs[i].priority = 0xff;
-        ics->irqs[i].saved_priority = 0xff;
+        ics_reset_irq(ics->irqs + i);
         ics->irqs[i].flags = flags[i];
     }
 }
@@ -760,6 +765,7 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
         lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI;
 
     if (kvm_irqchip_in_kernel()) {
+        ics_reset_irq(ics->irqs + srcno);
         ics_set_kvm_state_one(ics, srcno);
     }
 }
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 78a252e6df..5ba5b77561 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -33,6 +33,7 @@
 #include "trace.h"
 #include "sysemu/kvm.h"
 #include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
 #include "hw/ppc/xics.h"
 #include "hw/ppc/xics_spapr.h"
 #include "kvm_ppc.h"
@@ -51,6 +52,16 @@ typedef struct KVMEnabledICP {
 static QLIST_HEAD(, KVMEnabledICP)
     kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps);
 
+static void kvm_disable_icps(void)
+{
+    KVMEnabledICP *enabled_icp, *next;
+
+    QLIST_FOREACH_SAFE(enabled_icp, &kvm_enabled_icps, node, next) {
+        QLIST_REMOVE(enabled_icp, node);
+        g_free(enabled_icp);
+    }
+}
+
 /*
  * ICP-KVM
  */
@@ -59,6 +70,11 @@ void icp_get_kvm_state(ICPState *icp)
     uint64_t state;
     int ret;
 
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return;
+    }
+
     /* ICP for this CPU thread is not in use, exiting */
     if (!icp->cs) {
         return;
@@ -95,6 +111,11 @@ int icp_set_kvm_state(ICPState *icp)
     uint64_t state;
     int ret;
 
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return 0;
+    }
+
     /* ICP for this CPU thread is not in use, exiting */
     if (!icp->cs) {
         return 0;
@@ -123,8 +144,9 @@ void icp_kvm_realize(DeviceState *dev, Error **errp)
     unsigned long vcpu_id;
     int ret;
 
+    /* The KVM XICS device is not in use */
     if (kernel_xics_fd == -1) {
-        abort();
+        return;
     }
 
     cs = icp->cs;
@@ -160,6 +182,11 @@ void ics_get_kvm_state(ICSState *ics)
     uint64_t state;
     int i;
 
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return;
+    }
+
     for (i = 0; i < ics->nr_irqs; i++) {
         ICSIRQState *irq = &ics->irqs[i];
 
@@ -220,6 +247,11 @@ int ics_set_kvm_state_one(ICSState *ics, int srcno)
     ICSIRQState *irq = &ics->irqs[srcno];
     int ret;
 
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return 0;
+    }
+
     state = irq->server;
     state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK)
         << KVM_XICS_PRIORITY_SHIFT;
@@ -259,6 +291,11 @@ int ics_set_kvm_state(ICSState *ics)
 {
     int i;
 
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return 0;
+    }
+
     for (i = 0; i < ics->nr_irqs; i++) {
         int ret;
 
@@ -276,6 +313,9 @@ void ics_kvm_set_irq(ICSState *ics, int srcno, int val)
     struct kvm_irq_level args;
     int rc;
 
+    /* The KVM XICS device should be in use */
+    assert(kernel_xics_fd != -1);
+
     args.irq = srcno + ics->offset;
     if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) {
         if (!val) {
@@ -303,6 +343,16 @@ static void rtas_dummy(PowerPCCPU *cpu, SpaprMachineState *spapr,
 int xics_kvm_init(SpaprMachineState *spapr, Error **errp)
 {
     int rc;
+    CPUState *cs;
+    Error *local_err = NULL;
+
+    /*
+     * The KVM XICS device already in use. This is the case when
+     * rebooting under the XICS-only interrupt mode.
+     */
+    if (kernel_xics_fd != -1) {
+        return 0;
+    }
 
     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
         error_setg(errp,
@@ -351,6 +401,26 @@ int xics_kvm_init(SpaprMachineState *spapr, Error **errp)
     kvm_msi_via_irqfd_allowed = true;
     kvm_gsi_direct_mapping = true;
 
+    /* Create the presenters */
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        icp_kvm_realize(DEVICE(spapr_cpu_state(cpu)->icp), &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            goto fail;
+        }
+    }
+
+    /* Update the KVM sources */
+    ics_set_kvm_state(spapr->ics);
+
+    /* Connect the presenters to the initial VCPUs of the machine */
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        icp_set_kvm_state(spapr_cpu_state(cpu)->icp);
+    }
+
     return 0;
 
 fail:
@@ -360,3 +430,44 @@ fail:
     kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
     return -1;
 }
+
+void xics_kvm_disconnect(SpaprMachineState *spapr, Error **errp)
+{
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return;
+    }
+
+    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
+        error_setg(errp,
+                   "KVM and IRQ_XICS capability must be present for KVM XICS device");
+        return;
+    }
+
+    /*
+     * Only on P9 using the XICS-on XIVE KVM device:
+     *
+     * When the KVM device fd is closed, the device is destroyed and
+     * removed from the list of devices of the VM. The VCPU presenters
+     * are also detached from the device.
+     */
+    close(kernel_xics_fd);
+    kernel_xics_fd = -1;
+
+    spapr_rtas_unregister(RTAS_IBM_SET_XIVE);
+    spapr_rtas_unregister(RTAS_IBM_GET_XIVE);
+    spapr_rtas_unregister(RTAS_IBM_INT_OFF);
+    spapr_rtas_unregister(RTAS_IBM_INT_ON);
+
+    kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
+    kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
+    kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
+    kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
+
+    kvm_kernel_irqchip = false;
+    kvm_msi_via_irqfd_allowed = false;
+    kvm_gsi_direct_mapping = false;
+
+    /* Clear the presenter from the VCPUs */
+    kvm_disable_icps();
+}
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 9d2b8adef7..5a1835e8b1 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -239,6 +239,13 @@ static void rtas_int_on(PowerPCCPU *cpu, SpaprMachineState *spapr,
 
 void xics_spapr_init(SpaprMachineState *spapr)
 {
+    /* Emulated mode can only be initialized once. */
+    if (spapr->ics->init) {
+        return;
+    }
+
+    spapr->ics->init = true;
+
     /* Registration of global state belongs into realize */
     spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
     spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index a0b87001da..0c74e47aa4 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -493,6 +493,16 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
     int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
     int i;
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return;
+        }
+    }
+
     monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
                    "  W2\n", cpu_index);
 
@@ -555,6 +565,15 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    /* Connect the presenter to the VCPU (required for CPU hotplug) */
+    if (kvm_irqchip_in_kernel()) {
+        kvmppc_xive_cpu_connect(tctx, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
     qemu_register_reset(xive_tctx_reset, dev);
 }
 
@@ -563,10 +582,27 @@ static void xive_tctx_unrealize(DeviceState *dev, Error **errp)
     qemu_unregister_reset(xive_tctx_reset, dev);
 }
 
+static int vmstate_xive_tctx_pre_save(void *opaque)
+{
+    Error *local_err = NULL;
+
+    if (kvm_irqchip_in_kernel()) {
+        kvmppc_xive_cpu_get_state(XIVE_TCTX(opaque), &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_xive_tctx = {
     .name = TYPE_XIVE_TCTX,
     .version_id = 1,
     .minimum_version_id = 1,
+    .pre_save = vmstate_xive_tctx_pre_save,
+    .post_load = NULL, /* handled by the sPAPRxive model */
     .fields = (VMStateField[]) {
         VMSTATE_BUFFER(regs, XiveTCTX),
         VMSTATE_END_OF_LIST()
@@ -990,9 +1026,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp)
     xsrc->status = g_malloc0(xsrc->nr_irqs);
     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
 
-    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
-                          &xive_source_esb_ops, xsrc, "xive.esb",
-                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+    if (!kvm_irqchip_in_kernel()) {
+        memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+                              &xive_source_esb_ops, xsrc, "xive.esb",
+                              (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+    }
 
     qemu_register_reset(xive_source_reset, dev);
 }
@@ -1042,8 +1080,7 @@ static const TypeInfo xive_source_info = {
 
 void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
 {
-    uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
-        | be32_to_cpu(end->w3);
+    uint64_t qaddr_base = xive_end_qaddr(end);
     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
     uint32_t qentries = 1 << (qsize + 10);
@@ -1072,8 +1109,7 @@ void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
 
 void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
 {
-    uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
-        | be32_to_cpu(end->w3);
+    uint64_t qaddr_base = xive_end_qaddr(end);
     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
@@ -1101,8 +1137,7 @@ void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
 
 static void xive_end_enqueue(XiveEND *end, uint32_t data)
 {
-    uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
-        | be32_to_cpu(end->w3);
+    uint64_t qaddr_base = xive_end_qaddr(end);
     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c
index a5d67bc6d7..c08970b24a 100644
--- a/hw/isa/i82378.c
+++ b/hw/isa/i82378.c
@@ -21,7 +21,6 @@
 #include "hw/pci/pci.h"
 #include "hw/i386/pc.h"
 #include "hw/timer/i8254.h"
-#include "hw/timer/mc146818rtc.h"
 #include "hw/audio/pcspk.h"
 
 #define TYPE_I82378 "i82378"
@@ -105,9 +104,6 @@ static void i82378_realize(PCIDevice *pci, Error **errp)
 
     /* 2 82C37 (dma) */
     isa = isa_create_simple(isabus, "i82374");
-
-    /* timer */
-    isa_create_simple(isabus, TYPE_MC146818_RTC);
 }
 
 static void i82378_init(Object *obj)
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 031ee9cd93..35d17246e9 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -412,7 +412,7 @@ void ich9_lpc_pm_init(PCIDevice *lpc_pci, bool smm_enabled)
                                  true);
     }
 
-    ich9_lpc_reset(&lpc->d.qdev);
+    ich9_lpc_reset(DEVICE(lpc));
 }
 
 /* APM */
diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index 85d0532dd5..d46754f61c 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -369,7 +369,7 @@ static void vt82c686b_pm_realize(PCIDevice *dev, Error **errp)
     pci_conf[0x90] = s->smb_io_base | 1;
     pci_conf[0x91] = s->smb_io_base >> 8;
     pci_conf[0xd2] = 0x90;
-    pm_smbus_init(&s->dev.qdev, &s->smb, false);
+    pm_smbus_init(DEVICE(s), &s->smb, false);
     memory_region_add_subregion(get_system_io(), s->smb_io_base, &s->smb.io);
 
     apm_init(dev, &s->apm, NULL, s);
diff --git a/hw/microblaze/xlnx-zynqmp-pmu.c b/hw/microblaze/xlnx-zynqmp-pmu.c
index 57dc1ccd42..df6c0048aa 100644
--- a/hw/microblaze/xlnx-zynqmp-pmu.c
+++ b/hw/microblaze/xlnx-zynqmp-pmu.c
@@ -55,6 +55,7 @@ typedef struct XlnxZynqMPPMUSoCState {
     /*< public >*/
     MicroBlazeCPU cpu;
     XlnxPMUIOIntc intc;
+    XlnxZynqMPIPI ipi[XLNX_ZYNQMP_PMU_NUM_IPIS];
 }  XlnxZynqMPPMUSoCState;
 
 
@@ -67,6 +68,14 @@ static void xlnx_zynqmp_pmu_soc_init(Object *obj)
 
     sysbus_init_child_obj(obj, "intc", &s->intc, sizeof(s->intc),
                           TYPE_XLNX_PMU_IO_INTC);
+
+    /* Create the IPI device */
+    for (int i = 0; i < XLNX_ZYNQMP_PMU_NUM_IPIS; i++) {
+        char *name = g_strdup_printf("ipi%d", i);
+        sysbus_init_child_obj(obj, name, &s->ipi[i],
+                              sizeof(XlnxZynqMPIPI), TYPE_XLNX_ZYNQMP_IPI);
+        g_free(name);
+    }
 }
 
 static void xlnx_zynqmp_pmu_soc_realize(DeviceState *dev, Error **errp)
@@ -112,6 +121,15 @@ static void xlnx_zynqmp_pmu_soc_realize(DeviceState *dev, Error **errp)
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->intc), 0, XLNX_ZYNQMP_PMU_INTC_ADDR);
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->intc), 0,
                        qdev_get_gpio_in(DEVICE(&s->cpu), MB_CPU_IRQ));
+
+    /* Connect the IPI device */
+    for (int i = 0; i < XLNX_ZYNQMP_PMU_NUM_IPIS; i++) {
+        object_property_set_bool(OBJECT(&s->ipi[i]), true, "realized",
+                                 &error_abort);
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->ipi[i]), 0, ipi_addr[i]);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->ipi[i]), 0,
+                           qdev_get_gpio_in(DEVICE(&s->intc), ipi_irq[i]));
+    }
 }
 
 static void xlnx_zynqmp_pmu_soc_class_init(ObjectClass *oc, void *data)
@@ -144,9 +162,6 @@ static void xlnx_zynqmp_pmu_init(MachineState *machine)
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *pmu_rom = g_new(MemoryRegion, 1);
     MemoryRegion *pmu_ram = g_new(MemoryRegion, 1);
-    XlnxZynqMPIPI *ipi[XLNX_ZYNQMP_PMU_NUM_IPIS];
-    qemu_irq irq[32];
-    int i;
 
     /* Create the ROM */
     memory_region_init_rom(pmu_rom, NULL, "xlnx-zynqmp-pmu.rom",
@@ -161,29 +176,11 @@ static void xlnx_zynqmp_pmu_init(MachineState *machine)
                                 pmu_ram);
 
     /* Create the PMU device */
-    object_initialize(pmu, sizeof(XlnxZynqMPPMUSoCState), TYPE_XLNX_ZYNQMP_PMU_SOC);
-    object_property_add_child(OBJECT(machine), "pmu", OBJECT(pmu),
-                              &error_abort);
+    object_initialize_child(OBJECT(machine), "pmu", pmu,
+                            sizeof(XlnxZynqMPPMUSoCState),
+                            TYPE_XLNX_ZYNQMP_PMU_SOC, &error_abort, NULL);
     object_property_set_bool(OBJECT(pmu), true, "realized", &error_fatal);
 
-    for (i = 0; i < 32; i++) {
-        irq[i] = qdev_get_gpio_in(DEVICE(&pmu->intc), i);
-    }
-
-    /* Create and connect the IPI device */
-    for (i = 0; i < XLNX_ZYNQMP_PMU_NUM_IPIS; i++) {
-        ipi[i] = g_new0(XlnxZynqMPIPI, 1);
-        object_initialize(ipi[i], sizeof(XlnxZynqMPIPI), TYPE_XLNX_ZYNQMP_IPI);
-        qdev_set_parent_bus(DEVICE(ipi[i]), sysbus_get_default());
-    }
-
-    for (i = 0; i < XLNX_ZYNQMP_PMU_NUM_IPIS; i++) {
-        object_property_set_bool(OBJECT(ipi[i]), true, "realized",
-                                 &error_abort);
-        sysbus_mmio_map(SYS_BUS_DEVICE(ipi[i]), 0, ipi_addr[i]);
-        sysbus_connect_irq(SYS_BUS_DEVICE(ipi[i]), 0, irq[ipi_irq[i]]);
-    }
-
     /* Load the kernel */
     microblaze_load_kernel(&pmu->cpu, XLNX_ZYNQMP_PMU_RAM_ADDR,
                            machine->ram_size,
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index a8b29f62f5..1ffccc8da9 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -49,7 +49,7 @@ typedef struct {
     SysBusDevice parent_obj;
 
     MachineState *mach;
-    MIPSCPSState *cps;
+    MIPSCPSState cps;
     SerialState *uart;
 
     CharBackend lcd_display;
@@ -188,7 +188,7 @@ static uint64_t boston_platreg_read(void *opaque, hwaddr addr,
     case PLAT_DDR3_STATUS:
         return PLAT_DDR3_STATUS_LOCKED | PLAT_DDR3_STATUS_CALIBRATED;
     case PLAT_MMCM_DIV:
-        gic_freq = mips_gictimer_get_freq(s->cps->gic.gic_timer) / 1000000;
+        gic_freq = mips_gictimer_get_freq(s->cps.gic.gic_timer) / 1000000;
         val = gic_freq << PLAT_MMCM_DIV_INPUT_SHIFT;
         val |= 1 << PLAT_MMCM_DIV_MUL_SHIFT;
         val |= 1 << PLAT_MMCM_DIV_CLK0DIV_SHIFT;
@@ -455,20 +455,19 @@ static void boston_mach_init(MachineState *machine)
 
     is_64b = cpu_supports_isa(machine->cpu_type, ISA_MIPS64);
 
-    s->cps = MIPS_CPS(object_new(TYPE_MIPS_CPS));
-    qdev_set_parent_bus(DEVICE(s->cps), sysbus_get_default());
-
-    object_property_set_str(OBJECT(s->cps), machine->cpu_type, "cpu-type",
+    sysbus_init_child_obj(OBJECT(machine), "cps", OBJECT(&s->cps),
+                          sizeof(s->cps), TYPE_MIPS_CPS);
+    object_property_set_str(OBJECT(&s->cps), machine->cpu_type, "cpu-type",
                             &err);
-    object_property_set_int(OBJECT(s->cps), smp_cpus, "num-vp", &err);
-    object_property_set_bool(OBJECT(s->cps), true, "realized", &err);
+    object_property_set_int(OBJECT(&s->cps), smp_cpus, "num-vp", &err);
+    object_property_set_bool(OBJECT(&s->cps), true, "realized", &err);
 
     if (err != NULL) {
         error_report("%s", error_get_pretty(err));
         exit(1);
     }
 
-    sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s->cps), 0, 0, 1);
+    sysbus_mmio_map_overlap(SYS_BUS_DEVICE(&s->cps), 0, 0, 1);
 
     flash =  g_new(MemoryRegion, 1);
     memory_region_init_rom(flash, NULL, "boston.flash", 128 * MiB, &err);
@@ -487,17 +486,17 @@ static void boston_mach_init(MachineState *machine)
     xilinx_pcie_init(sys_mem, 0,
                      0x10000000, 32 * MiB,
                      0x40000000, 1 * GiB,
-                     get_cps_irq(s->cps, 2), false);
+                     get_cps_irq(&s->cps, 2), false);
 
     xilinx_pcie_init(sys_mem, 1,
                      0x12000000, 32 * MiB,
                      0x20000000, 512 * MiB,
-                     get_cps_irq(s->cps, 1), false);
+                     get_cps_irq(&s->cps, 1), false);
 
     pcie2 = xilinx_pcie_init(sys_mem, 2,
                              0x14000000, 32 * MiB,
                              0x16000000, 1 * MiB,
-                             get_cps_irq(s->cps, 0), true);
+                             get_cps_irq(&s->cps, 0), true);
 
     platreg = g_new(MemoryRegion, 1);
     memory_region_init_io(platreg, NULL, &boston_platreg_ops, s,
@@ -505,7 +504,7 @@ static void boston_mach_init(MachineState *machine)
     memory_region_add_subregion_overlap(sys_mem, 0x17ffd000, platreg, 0);
 
     s->uart = serial_mm_init(sys_mem, 0x17ffe000, 2,
-                             get_cps_irq(s->cps, 3), 10000000,
+                             get_cps_irq(&s->cps, 3), 10000000,
                              serial_hd(0), DEVICE_NATIVE_ENDIAN);
 
     lcd = g_new(MemoryRegion, 1);
diff --git a/hw/mips/cps.c b/hw/mips/cps.c
index fc97f59af4..649b35a76c 100644
--- a/hw/mips/cps.c
+++ b/hw/mips/cps.c
@@ -94,9 +94,8 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
 
     /* Inter-Thread Communication Unit */
     if (itu_present) {
-        object_initialize(&s->itu, sizeof(s->itu), TYPE_MIPS_ITU);
-        qdev_set_parent_bus(DEVICE(&s->itu), sysbus_get_default());
-
+        sysbus_init_child_obj(OBJECT(dev), "itu", &s->itu, sizeof(s->itu),
+                              TYPE_MIPS_ITU);
         object_property_set_int(OBJECT(&s->itu), 16, "num-fifo", &err);
         object_property_set_int(OBJECT(&s->itu), 16, "num-semaphores", &err);
         object_property_set_bool(OBJECT(&s->itu), saar_present, "saar-present",
@@ -115,9 +114,8 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
     }
 
     /* Cluster Power Controller */
-    object_initialize(&s->cpc, sizeof(s->cpc), TYPE_MIPS_CPC);
-    qdev_set_parent_bus(DEVICE(&s->cpc), sysbus_get_default());
-
+    sysbus_init_child_obj(OBJECT(dev), "cpc", &s->cpc, sizeof(s->cpc),
+                          TYPE_MIPS_CPC);
     object_property_set_int(OBJECT(&s->cpc), s->num_vp, "num-vp", &err);
     object_property_set_int(OBJECT(&s->cpc), 1, "vp-start-running", &err);
     object_property_set_bool(OBJECT(&s->cpc), true, "realized", &err);
@@ -130,9 +128,8 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
                             sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->cpc), 0));
 
     /* Global Interrupt Controller */
-    object_initialize(&s->gic, sizeof(s->gic), TYPE_MIPS_GIC);
-    qdev_set_parent_bus(DEVICE(&s->gic), sysbus_get_default());
-
+    sysbus_init_child_obj(OBJECT(dev), "gic", &s->gic, sizeof(s->gic),
+                          TYPE_MIPS_GIC);
     object_property_set_int(OBJECT(&s->gic), s->num_vp, "num-vp", &err);
     object_property_set_int(OBJECT(&s->gic), 128, "num-irq", &err);
     object_property_set_bool(OBJECT(&s->gic), true, "realized", &err);
@@ -147,9 +144,8 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
     /* Global Configuration Registers */
     gcr_base = env->CP0_CMGCRBase << 4;
 
-    object_initialize(&s->gcr, sizeof(s->gcr), TYPE_MIPS_GCR);
-    qdev_set_parent_bus(DEVICE(&s->gcr), sysbus_get_default());
-
+    sysbus_init_child_obj(OBJECT(dev), "gcr", &s->gcr, sizeof(s->gcr),
+                          TYPE_MIPS_GCR);
     object_property_set_int(OBJECT(&s->gcr), s->num_vp, "num-vp", &err);
     object_property_set_int(OBJECT(&s->gcr), 0x800, "gcr-rev", &err);
     object_property_set_int(OBJECT(&s->gcr), gcr_base, "gcr-base", &err);
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 439665ab45..37ec89b07e 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -55,7 +55,7 @@
 #include "qemu/error-report.h"
 #include "hw/empty_slot.h"
 #include "sysemu/kvm.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 #include "hw/mips/cps.h"
 
 #define ENVP_ADDR		0x80002000l
@@ -94,7 +94,7 @@ typedef struct {
 typedef struct {
     SysBusDevice parent_obj;
 
-    MIPSCPSState *cps;
+    MIPSCPSState cps;
     qemu_irq *i8259;
 } MaltaState;
 
@@ -1151,20 +1151,19 @@ static void create_cps(MaltaState *s, const char *cpu_type,
 {
     Error *err = NULL;
 
-    s->cps = MIPS_CPS(object_new(TYPE_MIPS_CPS));
-    qdev_set_parent_bus(DEVICE(s->cps), sysbus_get_default());
-
-    object_property_set_str(OBJECT(s->cps), cpu_type, "cpu-type", &err);
-    object_property_set_int(OBJECT(s->cps), smp_cpus, "num-vp", &err);
-    object_property_set_bool(OBJECT(s->cps), true, "realized", &err);
+    sysbus_init_child_obj(OBJECT(s), "cps", OBJECT(&s->cps), sizeof(s->cps),
+                          TYPE_MIPS_CPS);
+    object_property_set_str(OBJECT(&s->cps), cpu_type, "cpu-type", &err);
+    object_property_set_int(OBJECT(&s->cps), smp_cpus, "num-vp", &err);
+    object_property_set_bool(OBJECT(&s->cps), true, "realized", &err);
     if (err != NULL) {
         error_report("%s", error_get_pretty(err));
         exit(1);
     }
 
-    sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s->cps), 0, 0, 1);
+    sysbus_mmio_map_overlap(SYS_BUS_DEVICE(&s->cps), 0, 0, 1);
 
-    *i8259_irq = get_cps_irq(s->cps, 3);
+    *i8259_irq = get_cps_irq(&s->cps, 3);
     *cbus_irq = NULL;
 }
 
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index 91af452c9e..19e5545e2c 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -98,23 +98,24 @@ static void edu_lower_irq(EduState *edu, uint32_t val)
     }
 }
 
-static bool within(uint32_t addr, uint32_t start, uint32_t end)
+static bool within(uint64_t addr, uint64_t start, uint64_t end)
 {
     return start <= addr && addr < end;
 }
 
-static void edu_check_range(uint32_t addr, uint32_t size1, uint32_t start,
-                uint32_t size2)
+static void edu_check_range(uint64_t addr, uint64_t size1, uint64_t start,
+                uint64_t size2)
 {
-    uint32_t end1 = addr + size1;
-    uint32_t end2 = start + size2;
+    uint64_t end1 = addr + size1;
+    uint64_t end2 = start + size2;
 
     if (within(addr, start, end2) &&
             end1 > addr && within(end1, start, end2)) {
         return;
     }
 
-    hw_error("EDU: DMA range 0x%.8x-0x%.8x out of bounds (0x%.8x-0x%.8x)!",
+    hw_error("EDU: DMA range 0x%016"PRIx64"-0x%016"PRIx64
+             " out of bounds (0x%016"PRIx64"-0x%016"PRIx64")!",
             addr, end1 - 1, start, end2 - 1);
 }
 
@@ -139,13 +140,13 @@ static void edu_dma_timer(void *opaque)
     }
 
     if (EDU_DMA_DIR(edu->dma.cmd) == EDU_DMA_FROM_PCI) {
-        uint32_t dst = edu->dma.dst;
+        uint64_t dst = edu->dma.dst;
         edu_check_range(dst, edu->dma.cnt, DMA_START, DMA_SIZE);
         dst -= DMA_START;
         pci_dma_read(&edu->pdev, edu_clamp_addr(edu, edu->dma.src),
                 edu->dma_buf + dst, edu->dma.cnt);
     } else {
-        uint32_t src = edu->dma.src;
+        uint64_t src = edu->dma.src;
         edu_check_range(src, edu->dma.cnt, DMA_START, DMA_SIZE);
         src -= DMA_START;
         pci_dma_write(&edu->pdev, edu_clamp_addr(edu, edu->dma.dst),
@@ -185,7 +186,11 @@ static uint64_t edu_mmio_read(void *opaque, hwaddr addr, unsigned size)
     EduState *edu = opaque;
     uint64_t val = ~0ULL;
 
-    if (size != 4) {
+    if (addr < 0x80 && size != 4) {
+        return val;
+    }
+
+    if (addr >= 0x80 && size != 4 && size != 8) {
         return val;
     }
 
@@ -289,6 +294,15 @@ static const MemoryRegionOps edu_mmio_ops = {
     .read = edu_mmio_read,
     .write = edu_mmio_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 8,
+    },
+
 };
 
 /*
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index 94da85c8d7..b726c73022 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -346,12 +346,12 @@ static void macio_newworld_realize(PCIDevice *d, Error **errp)
         object_property_set_bool(OBJECT(&ns->gpio), true, "realized", &err);
 
         /* PMU */
-        object_initialize(&s->pmu, sizeof(s->pmu), TYPE_VIA_PMU);
+        object_initialize_child(OBJECT(s), "pmu", &s->pmu, sizeof(s->pmu),
+                                TYPE_VIA_PMU, &error_abort, NULL);
         object_property_set_link(OBJECT(&s->pmu), OBJECT(sysbus_dev), "gpio",
                                  &error_abort);
         qdev_prop_set_bit(DEVICE(&s->pmu), "has-adb", ns->has_adb);
         qdev_set_parent_bus(DEVICE(&s->pmu), BUS(&s->macio_bus));
-        object_property_add_child(OBJECT(s), "pmu", OBJECT(&s->pmu), NULL);
 
         object_property_set_bool(OBJECT(&s->pmu), true, "realized", &err);
         if (err) {
@@ -365,9 +365,9 @@ static void macio_newworld_realize(PCIDevice *d, Error **errp)
                                     sysbus_mmio_get_region(sysbus_dev, 0));
     } else {
         /* CUDA */
-        object_initialize(&s->cuda, sizeof(s->cuda), TYPE_CUDA);
+        object_initialize_child(OBJECT(s), "cuda", &s->cuda, sizeof(s->cuda),
+                                TYPE_CUDA, &error_abort, NULL);
         qdev_set_parent_bus(DEVICE(&s->cuda), BUS(&s->macio_bus));
-        object_property_add_child(OBJECT(s), "cuda", OBJECT(&s->cuda), NULL);
         qdev_prop_set_uint64(DEVICE(&s->cuda), "timebase-frequency",
                              s->frequency);
 
diff --git a/hw/pci-bridge/dec.c b/hw/pci-bridge/dec.c
index 8484bfd434..ca40253730 100644
--- a/hw/pci-bridge/dec.c
+++ b/hw/pci-bridge/dec.c
@@ -68,7 +68,7 @@ static void dec_21154_pci_bridge_class_init(ObjectClass *klass, void *data)
     k->vendor_id = PCI_VENDOR_ID_DEC;
     k->device_id = PCI_DEVICE_ID_DEC_21154;
     k->config_write = pci_bridge_write_config;
-    k->is_bridge = 1;
+    k->is_bridge = true;
     dc->desc = "DEC 21154 PCI-PCI bridge";
     dc->reset = pci_bridge_reset;
     dc->vmsd = &vmstate_pci_device;
@@ -129,7 +129,7 @@ static void dec_21154_pci_host_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_DEC_21154;
     k->revision = 0x02;
     k->class_id = PCI_CLASS_BRIDGE_PCI;
-    k->is_bridge = 1;
+    k->is_bridge = true;
     /*
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
diff --git a/hw/pci-bridge/i82801b11.c b/hw/pci-bridge/i82801b11.c
index 10e590e5c6..6d8b0f54a7 100644
--- a/hw/pci-bridge/i82801b11.c
+++ b/hw/pci-bridge/i82801b11.c
@@ -90,7 +90,7 @@ static void i82801b11_bridge_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    k->is_bridge = 1;
+    k->is_bridge = true;
     k->vendor_id = PCI_VENDOR_ID_INTEL;
     k->device_id = PCI_DEVICE_ID_INTEL_82801BA_11;
     k->revision = ICH9_D2P_A2_REVISION;
diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index ff6b8323da..c56ed1f52f 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -253,7 +253,7 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
     k->vendor_id = PCI_VENDOR_ID_REDHAT;
     k->device_id = PCI_DEVICE_ID_REDHAT_BRIDGE;
     k->class_id = PCI_CLASS_BRIDGE_PCI;
-    k->is_bridge = 1,
+    k->is_bridge = true;
     dc->desc = "Standard PCI Bridge";
     dc->reset = qdev_pci_bridge_dev_reset;
     dc->props = pci_bridge_dev_properties;
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index d491b40d04..9a4fba413a 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -143,7 +143,7 @@ static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
 
-    k->is_bridge = 1;
+    k->is_bridge = true;
     k->vendor_id = PCI_VENDOR_ID_REDHAT;
     k->device_id = PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE;
     k->realize = pcie_pci_bridge_realize;
diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c
index e94d918b6d..be3f4d5e03 100644
--- a/hw/pci-bridge/pcie_root_port.c
+++ b/hw/pci-bridge/pcie_root_port.c
@@ -162,7 +162,7 @@ static void rp_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
-    k->is_bridge = 1;
+    k->is_bridge = true;
     k->config_write = rp_write_config;
     k->realize = rp_realize;
     k->exit = rp_exit;
diff --git a/hw/pci-bridge/simba.c b/hw/pci-bridge/simba.c
index dea4c8c5e7..7cf0d6e047 100644
--- a/hw/pci-bridge/simba.c
+++ b/hw/pci-bridge/simba.c
@@ -76,7 +76,7 @@ static void simba_pci_bridge_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_SUN_SIMBA;
     k->revision = 0x11;
     k->config_write = pci_bridge_write_config;
-    k->is_bridge = 1;
+    k->is_bridge = true;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->reset = pci_bridge_reset;
     dc->vmsd = &vmstate_pci_device;
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index 467bbabe4c..ab2a51e15d 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -152,7 +152,7 @@ static void xio3130_downstream_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
-    k->is_bridge = 1;
+    k->is_bridge = true;
     k->config_write = xio3130_downstream_write_config;
     k->realize = xio3130_downstream_realize;
     k->exit = xio3130_downstream_exitfn;
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index b524908cf1..1d41a49ab0 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -126,7 +126,7 @@ static void xio3130_upstream_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
-    k->is_bridge = 1;
+    k->is_bridge = true;
     k->config_write = xio3130_upstream_write_config;
     k->realize = xio3130_upstream_realize;
     k->exit = xio3130_upstream_exitfn;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index b386777045..d3893bdfe1 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -120,6 +120,27 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
     vmstate_register(NULL, -1, &vmstate_pcibus, bus);
 }
 
+static void pcie_bus_realize(BusState *qbus, Error **errp)
+{
+    PCIBus *bus = PCI_BUS(qbus);
+
+    pci_bus_realize(qbus, errp);
+
+    /*
+     * A PCI-E bus can support extended config space if it's the root
+     * bus, or if the bus/bridge above it does as well
+     */
+    if (pci_bus_is_root(bus)) {
+        bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
+    } else {
+        PCIBus *parent_bus = pci_get_bus(bus->parent_dev);
+
+        if (pci_bus_allows_extended_config_space(parent_bus)) {
+            bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
+        }
+    }
+}
+
 static void pci_bus_unrealize(BusState *qbus, Error **errp)
 {
     PCIBus *bus = PCI_BUS(qbus);
@@ -142,11 +163,6 @@ static uint16_t pcibus_numa_node(PCIBus *bus)
     return NUMA_NODE_UNASSIGNED;
 }
 
-static bool pcibus_allows_extended_config_space(PCIBus *bus)
-{
-    return false;
-}
-
 static void pci_bus_class_init(ObjectClass *klass, void *data)
 {
     BusClass *k = BUS_CLASS(klass);
@@ -161,7 +177,6 @@ static void pci_bus_class_init(ObjectClass *klass, void *data)
 
     pbc->bus_num = pcibus_num;
     pbc->numa_node = pcibus_numa_node;
-    pbc->allows_extended_config_space = pcibus_allows_extended_config_space;
 }
 
 static const TypeInfo pci_bus_info = {
@@ -182,16 +197,11 @@ static const TypeInfo conventional_pci_interface_info = {
     .parent        = TYPE_INTERFACE,
 };
 
-static bool pciebus_allows_extended_config_space(PCIBus *bus)
-{
-    return true;
-}
-
 static void pcie_bus_class_init(ObjectClass *klass, void *data)
 {
-    PCIBusClass *pbc = PCI_BUS_CLASS(klass);
+    BusClass *k = BUS_CLASS(klass);
 
-    pbc->allows_extended_config_space = pciebus_allows_extended_config_space;
+    k->realize = pcie_bus_realize;
 }
 
 static const TypeInfo pcie_bus_info = {
@@ -410,11 +420,6 @@ bool pci_bus_is_express(PCIBus *bus)
     return object_dynamic_cast(OBJECT(bus), TYPE_PCIE_BUS);
 }
 
-bool pci_bus_allows_extended_config_space(PCIBus *bus)
-{
-    return PCI_BUS_GET_CLASS(bus)->allows_extended_config_space(bus);
-}
-
 void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
                               const char *name,
                               MemoryRegion *address_space_mem,
@@ -718,37 +723,6 @@ static int pci_parse_devaddr(const char *addr, int *domp, int *busp,
     return 0;
 }
 
-static PCIBus *pci_get_bus_devfn(int *devfnp, PCIBus *root,
-                                 const char *devaddr)
-{
-    int dom, bus;
-    unsigned slot;
-
-    if (!root) {
-        fprintf(stderr, "No primary PCI bus\n");
-        return NULL;
-    }
-
-    assert(!root->parent_dev);
-
-    if (!devaddr) {
-        *devfnp = -1;
-        return pci_find_bus_nr(root, 0);
-    }
-
-    if (pci_parse_devaddr(devaddr, &dom, &bus, &slot, NULL) < 0) {
-        return NULL;
-    }
-
-    if (dom != 0) {
-        fprintf(stderr, "No support for non-zero PCI domains\n");
-        return NULL;
-    }
-
-    *devfnp = PCI_DEVFN(slot, 0);
-    return pci_find_bus_nr(root, bus);
-}
-
 static void pci_init_cmask(PCIDevice *dev)
 {
     pci_set_word(dev->cmask + PCI_VENDOR_ID, 0xffff);
@@ -1890,6 +1864,8 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
     DeviceState *dev;
     int devfn;
     int i;
+    int dom, busnr;
+    unsigned slot;
 
     if (nd->model && !strcmp(nd->model, "virtio")) {
         g_free(nd->model);
@@ -1923,7 +1899,32 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
         exit(1);
     }
 
-    bus = pci_get_bus_devfn(&devfn, rootbus, devaddr);
+    if (!rootbus) {
+        error_report("No primary PCI bus");
+        exit(1);
+    }
+
+    assert(!rootbus->parent_dev);
+
+    if (!devaddr) {
+        devfn = -1;
+        busnr = 0;
+    } else {
+        if (pci_parse_devaddr(devaddr, &dom, &busnr, &slot, NULL) < 0) {
+            error_report("Invalid PCI device address %s for device %s",
+                         devaddr, nd->model);
+            exit(1);
+        }
+
+        if (dom != 0) {
+            error_report("No support for non-zero PCI domains");
+            exit(1);
+        }
+
+        devfn = PCI_DEVFN(slot, 0);
+    }
+
+    bus = pci_find_bus_nr(rootbus, busnr);
     if (!bus) {
         error_report("Invalid PCI device address %s for device %s",
                      devaddr, nd->model);
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index c6d9ded320..8d954885c0 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -273,7 +273,7 @@ void pci_bridge_write_config(PCIDevice *d,
     newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
     if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
         /* Trigger hot reset on 0->1 transition. */
-        qbus_reset_all(&s->sec_bus.qbus);
+        qbus_reset_all(BUS(&s->sec_bus));
     }
 }
 
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index 9d64b2e12f..5f3497256c 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -53,16 +53,9 @@ static inline PCIDevice *pci_dev_find_by_addr(PCIBus *bus, uint32_t addr)
 
 static void pci_adjust_config_limit(PCIBus *bus, uint32_t *limit)
 {
-    if (*limit > PCI_CONFIG_SPACE_SIZE) {
-        if (!pci_bus_allows_extended_config_space(bus)) {
-            *limit = PCI_CONFIG_SPACE_SIZE;
-            return;
-        }
-
-        if (!pci_bus_is_root(bus)) {
-            PCIDevice *bridge = pci_bridge_get_device(bus);
-            pci_adjust_config_limit(pci_get_bus(bridge), limit);
-        }
+    if ((*limit > PCI_CONFIG_SPACE_SIZE) &&
+        !pci_bus_allows_extended_config_space(bus)) {
+        *limit = PCI_CONFIG_SPACE_SIZE;
     }
 }
 
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index a3465155f0..f927ec9c74 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -122,3 +122,8 @@ config XIVE_SPAPR
     default y
     depends on PSERIES
     select XIVE
+
+config XIVE_KVM
+    bool
+    default y
+    depends on XIVE_SPAPR && KVM
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index dfb4ea5742..046f0a83c8 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -450,7 +450,8 @@ static void pnv_dt_power_mgt(void *fdt)
 
 static void *pnv_dt_create(MachineState *machine)
 {
-    const char plat_compat[] = "qemu,powernv\0ibm,powernv";
+    const char plat_compat8[] = "qemu,powernv8\0qemu,powernv\0ibm,powernv";
+    const char plat_compat9[] = "qemu,powernv9\0ibm,powernv";
     PnvMachineState *pnv = PNV_MACHINE(machine);
     void *fdt;
     char *buf;
@@ -465,8 +466,14 @@ static void *pnv_dt_create(MachineState *machine)
     _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2)));
     _FDT((fdt_setprop_string(fdt, 0, "model",
                              "IBM PowerNV (emulated by qemu)")));
-    _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat,
-                      sizeof(plat_compat))));
+    if (pnv_is_power9(pnv)) {
+        _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat9,
+                          sizeof(plat_compat9))));
+    } else {
+        _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat8,
+                          sizeof(plat_compat8))));
+    }
+
 
     buf =  qemu_uuid_unparse_strdup(&qemu_uuid);
     _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf)));
@@ -994,14 +1001,12 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp)
         PnvCore *pnv_core = PNV_CORE(chip->cores + (i * 4) * typesize);
         int core_id = CPU_CORE(pnv_core)->core_id;
 
-        object_initialize(eq, sizeof(*eq), TYPE_PNV_QUAD);
         snprintf(eq_name, sizeof(eq_name), "eq[%d]", core_id);
+        object_initialize_child(OBJECT(chip), eq_name, eq, sizeof(*eq),
+                                TYPE_PNV_QUAD, &error_fatal, NULL);
 
-        object_property_add_child(OBJECT(chip), eq_name, OBJECT(eq),
-                                  &error_fatal);
         object_property_set_int(OBJECT(eq), core_id, "id", &error_fatal);
         object_property_set_bool(OBJECT(eq), true, "realized", &error_fatal);
-        object_unref(OBJECT(eq));
 
         pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->id),
                                 &eq->xscom_regs);
@@ -1165,10 +1170,9 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
             continue;
         }
 
-        object_initialize(pnv_core, typesize, typename);
         snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid);
-        object_property_add_child(OBJECT(chip), core_name, OBJECT(pnv_core),
-                                  &error_fatal);
+        object_initialize_child(OBJECT(chip), core_name, pnv_core, typesize,
+                                typename, &error_fatal, NULL);
         object_property_set_int(OBJECT(pnv_core), smp_threads, "nr-threads",
                                 &error_fatal);
         object_property_set_int(OBJECT(pnv_core), core_hwid,
@@ -1180,7 +1184,6 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
                                        OBJECT(chip), &error_fatal);
         object_property_set_bool(OBJECT(pnv_core), true, "realized",
                                  &error_fatal);
-        object_unref(OBJECT(pnv_core));
 
         /* Each core has an XSCOM MMIO region */
         if (!pnv_chip_is_power9(chip)) {
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
index c285ef514e..f53a6d7a94 100644
--- a/hw/ppc/pnv_xscom.c
+++ b/hw/ppc/pnv_xscom.c
@@ -29,6 +29,12 @@
 
 #include <libfdt.h>
 
+/* PRD registers */
+#define PRD_P8_IPOLL_REG_MASK           0x01020013
+#define PRD_P8_IPOLL_REG_STATUS         0x01020014
+#define PRD_P9_IPOLL_REG_MASK           0x000F0033
+#define PRD_P9_IPOLL_REG_STATUS         0x000F0034
+
 static void xscom_complete(CPUState *cs, uint64_t hmer_bits)
 {
     /*
@@ -70,6 +76,12 @@ static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba)
     case 0x1010c00:     /* PIBAM FIR */
     case 0x1010c03:     /* PIBAM FIR MASK */
 
+        /* PRD registers */
+    case PRD_P8_IPOLL_REG_MASK:
+    case PRD_P8_IPOLL_REG_STATUS:
+    case PRD_P9_IPOLL_REG_MASK:
+    case PRD_P9_IPOLL_REG_STATUS:
+
         /* P9 xscom reset */
     case 0x0090018:     /* Receive status reg */
     case 0x0090012:     /* log register */
@@ -124,6 +136,12 @@ static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val)
     case 0x201302a:     /* CAPP stuff */
     case 0x2013801:     /* CAPP stuff */
     case 0x2013802:     /* CAPP stuff */
+
+        /* P8 PRD registers */
+    case PRD_P8_IPOLL_REG_MASK:
+    case PRD_P8_IPOLL_REG_STATUS:
+    case PRD_P9_IPOLL_REG_MASK:
+    case PRD_P9_IPOLL_REG_STATUS:
         return true;
     default:
         return false;
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index b7f459d475..2a8009e20b 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -601,7 +601,7 @@ static int prep_set_cmos_checksum(DeviceState *dev, void *opaque)
     uint16_t checksum = *(uint16_t *)opaque;
     ISADevice *rtc;
 
-    if (object_dynamic_cast(OBJECT(dev), "mc146818rtc")) {
+    if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
         rtc = ISA_DEVICE(dev);
         rtc_set_memory(rtc, 0x2e, checksum & 0xff);
         rtc_set_memory(rtc, 0x3e, checksum & 0xff);
@@ -675,6 +675,11 @@ static void ibm_40p_init(MachineState *machine)
     qdev_prop_set_uint32(dev, "ram-size", machine->ram_size);
     qdev_init_nofail(dev);
 
+    /* RTC */
+    dev = DEVICE(isa_create(isa_bus, TYPE_MC146818_RTC));
+    qdev_prop_set_int32(dev, "base_year", 1900);
+    qdev_init_nofail(dev);
+
     /* initialize CMOS checksums */
     cmos_checksum = 0x6aa9;
     qbus_walk_children(BUS(isa_bus), prep_set_cmos_checksum, NULL, NULL, NULL,
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2ef3ce4362..e2b33e5890 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -500,7 +500,10 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
 
     if (env->spr_cb[SPR_PURR].oea_read) {
-        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1)));
+    }
+    if (env->spr_cb[SPR_SPURR].oea_read) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1)));
     }
 
     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
@@ -2122,6 +2125,7 @@ static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_cap_cfpc,
         &vmstate_spapr_cap_sbbc,
         &vmstate_spapr_cap_ibs,
+        &vmstate_spapr_cap_hpt_maxpagesize,
         &vmstate_spapr_irq_map,
         &vmstate_spapr_cap_nested_kvm_hv,
         &vmstate_spapr_dtb,
@@ -4348,7 +4352,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
     spapr_caps_add_properties(smc, &error_abort);
-    smc->irq = &spapr_irq_xics;
+    smc->irq = &spapr_irq_dual;
     smc->dr_phb_enabled = true;
 }
 
@@ -4407,18 +4411,7 @@ DEFINE_SPAPR_MACHINE(4_1, "4.1", true);
 /*
  * pseries-4.0
  */
-static void spapr_machine_4_0_class_options(MachineClass *mc)
-{
-    spapr_machine_4_1_class_options(mc);
-    compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
-}
-
-DEFINE_SPAPR_MACHINE(4_0, "4.0", false);
-
-/*
- * pseries-3.1
- */
-static void phb_placement_3_1(SpaprMachineState *spapr, uint32_t index,
+static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
                               uint64_t *buid, hwaddr *pio,
                               hwaddr *mmio32, hwaddr *mmio64,
                               unsigned n_dma, uint32_t *liobns,
@@ -4430,6 +4423,22 @@ static void phb_placement_3_1(SpaprMachineState *spapr, uint32_t index,
     *nv2atsd = 0;
 }
 
+static void spapr_machine_4_0_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_4_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
+    smc->phb_placement = phb_placement_4_0;
+    smc->irq = &spapr_irq_xics;
+    smc->pre_4_1_migration = true;
+}
+
+DEFINE_SPAPR_MACHINE(4_0, "4.0", false);
+
+/*
+ * pseries-3.1
+ */
 static void spapr_machine_3_1_class_options(MachineClass *mc)
 {
     SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
@@ -4445,7 +4454,6 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
     smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
     smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
-    smc->phb_placement = phb_placement_3_1;
 }
 
 DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 9b1c10baa6..31b4661399 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -64,6 +64,7 @@ typedef struct SpaprCapabilityInfo {
     void (*apply)(SpaprMachineState *spapr, uint8_t val, Error **errp);
     void (*cpu_apply)(SpaprMachineState *spapr, PowerPCCPU *cpu,
                       uint8_t val, Error **errp);
+    bool (*migrate_needed)(void *opaque);
 } SpaprCapabilityInfo;
 
 static void spapr_cap_get_bool(Object *obj, Visitor *v, const char *name,
@@ -350,6 +351,11 @@ static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
     spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
 }
 
+static bool cap_hpt_maxpagesize_migrate_needed(void *opaque)
+{
+    return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_1_migration;
+}
+
 static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift,
                               uint32_t pshift)
 {
@@ -542,6 +548,7 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "int",
         .apply = cap_hpt_maxpagesize_apply,
         .cpu_apply = cap_hpt_maxpagesize_cpu_apply,
+        .migrate_needed = cap_hpt_maxpagesize_migrate_needed,
     },
     [SPAPR_CAP_NESTED_KVM_HV] = {
         .name = "nested-hv",
@@ -679,8 +686,11 @@ int spapr_caps_post_migration(SpaprMachineState *spapr)
 static bool spapr_cap_##sname##_needed(void *opaque)    \
 {                                                       \
     SpaprMachineState *spapr = opaque;                  \
+    bool (*needed)(void *opaque) =                      \
+        capability_table[cap].migrate_needed;           \
                                                         \
-    return spapr->cmd_line_caps[cap] &&                 \
+    return needed ? needed(opaque) : true &&            \
+           spapr->cmd_line_caps[cap] &&                 \
            (spapr->eff.caps[cap] !=                     \
             spapr->def.caps[cap]);                      \
 }                                                       \
@@ -703,6 +713,7 @@ SPAPR_CAP_MIG_STATE(dfp, SPAPR_CAP_DFP);
 SPAPR_CAP_MIG_STATE(cfpc, SPAPR_CAP_CFPC);
 SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
 SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
+SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index f04e06cdf6..5621fb9a3d 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -58,9 +58,11 @@ static void spapr_cpu_reset(void *opaque)
      *
      * Disable Power-saving mode Exit Cause exceptions for the CPU, so
      * we don't get spurious wakups before an RTAS start-cpu call.
+     * For the same reason, set PSSCR_EC.
      */
     lpcr &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV | pcc->lpcr_pm);
     lpcr |= LPCR_LPES0 | LPCR_LPES1;
+    env->spr[SPR_PSSCR] |= PSSCR_EC;
 
     /* Set RMLS to the max (ie, 16G) */
     lpcr &= ~LPCR_RMLS;
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 6c16d2b120..0a050ad3d8 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1513,6 +1513,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     bool guest_radix;
     Error *local_err = NULL;
     bool raw_mode_supported = false;
+    bool guest_xive;
 
     cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err);
     if (local_err) {
@@ -1545,10 +1546,17 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
         error_report("guest requested hash and radix MMU, which is invalid.");
         exit(EXIT_FAILURE);
     }
+    if (spapr_ovec_test(ov5_guest, OV5_XIVE_BOTH)) {
+        error_report("guest requested an invalid interrupt mode");
+        exit(EXIT_FAILURE);
+    }
+
     /* The radix/hash bit in byte 24 requires special handling: */
     guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300);
     spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300);
 
+    guest_xive = spapr_ovec_test(ov5_guest, OV5_XIVE_EXPLOIT);
+
     /*
      * HPT resizing is a bit of a special case, because when enabled
      * we assume an HPT guest will support it until it says it
@@ -1633,6 +1641,24 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     }
 
     /*
+     * Ensure the guest asks for an interrupt mode we support; otherwise
+     * terminate the boot.
+     */
+    if (guest_xive) {
+        if (spapr->irq->ov5 == SPAPR_OV5_XIVE_LEGACY) {
+            error_report(
+"Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
+            exit(EXIT_FAILURE);
+        }
+    } else {
+        if (spapr->irq->ov5 == SPAPR_OV5_XIVE_EXPLOIT) {
+            error_report(
+"Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    /*
      * Generate a machine reset when we have an update of the
      * interrupt mode. Only required when the machine supports both
      * modes.
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index b1f79ea9de..3156daf093 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -62,38 +62,46 @@ void spapr_irq_msi_reset(SpaprMachineState *spapr)
     bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr);
 }
 
-
-/*
- * XICS IRQ backend.
- */
-
-static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs,
-                                Error **errp)
+static void spapr_irq_init_device(SpaprMachineState *spapr,
+                                  SpaprIrq *irq, Error **errp)
 {
     MachineState *machine = MACHINE(spapr);
-    Object *obj;
     Error *local_err = NULL;
-    bool xics_kvm = false;
 
-    if (kvm_enabled()) {
-        if (machine_kernel_irqchip_allowed(machine) &&
-            !xics_kvm_init(spapr, &local_err)) {
-            xics_kvm = true;
-        }
-        if (machine_kernel_irqchip_required(machine) && !xics_kvm) {
+    if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
+        irq->init_kvm(spapr, &local_err);
+        if (local_err && machine_kernel_irqchip_required(machine)) {
             error_prepend(&local_err,
                           "kernel_irqchip requested but unavailable: ");
             error_propagate(errp, local_err);
             return;
         }
-        error_free(local_err);
-        local_err = NULL;
-    }
 
-    if (!xics_kvm) {
-        xics_spapr_init(spapr);
+        if (!local_err) {
+            return;
+        }
+
+        /*
+         * We failed to initialize the KVM device, fallback to
+         * emulated mode
+         */
+        error_prepend(&local_err, "kernel_irqchip allowed but unavailable: ");
+        warn_report_err(local_err);
     }
 
+    irq->init_emu(spapr, errp);
+}
+
+/*
+ * XICS IRQ backend.
+ */
+
+static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs,
+                                Error **errp)
+{
+    Object *obj;
+    Error *local_err = NULL;
+
     obj = object_new(TYPE_ICS_SIMPLE);
     object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort);
     object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
@@ -212,7 +220,13 @@ static void spapr_irq_set_irq_xics(void *opaque, int srcno, int val)
 
 static void spapr_irq_reset_xics(SpaprMachineState *spapr, Error **errp)
 {
-    /* TODO: create the KVM XICS device */
+    Error *local_err = NULL;
+
+    spapr_irq_init_device(spapr, &spapr_irq_xics, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 }
 
 static const char *spapr_irq_get_nodename_xics(SpaprMachineState *spapr)
@@ -220,6 +234,18 @@ static const char *spapr_irq_get_nodename_xics(SpaprMachineState *spapr)
     return XICS_NODENAME;
 }
 
+static void spapr_irq_init_emu_xics(SpaprMachineState *spapr, Error **errp)
+{
+    xics_spapr_init(spapr);
+}
+
+static void spapr_irq_init_kvm_xics(SpaprMachineState *spapr, Error **errp)
+{
+    if (kvm_enabled()) {
+        xics_kvm_init(spapr, errp);
+    }
+}
+
 #define SPAPR_IRQ_XICS_NR_IRQS     0x1000
 #define SPAPR_IRQ_XICS_NR_MSIS     \
     (XICS_IRQ_BASE + SPAPR_IRQ_XICS_NR_IRQS - SPAPR_IRQ_MSI)
@@ -240,6 +266,8 @@ SpaprIrq spapr_irq_xics = {
     .reset       = spapr_irq_reset_xics,
     .set_irq     = spapr_irq_set_irq_xics,
     .get_nodename = spapr_irq_get_nodename_xics,
+    .init_emu    = spapr_irq_init_emu_xics,
+    .init_kvm    = spapr_irq_init_kvm_xics,
 };
 
 /*
@@ -248,19 +276,10 @@ SpaprIrq spapr_irq_xics = {
 static void spapr_irq_init_xive(SpaprMachineState *spapr, int nr_irqs,
                                 Error **errp)
 {
-    MachineState *machine = MACHINE(spapr);
     uint32_t nr_servers = spapr_max_server_number(spapr);
     DeviceState *dev;
     int i;
 
-    /* KVM XIVE device not yet available */
-    if (kvm_enabled()) {
-        if (machine_kernel_irqchip_required(machine)) {
-            error_setg(errp, "kernel_irqchip requested. no KVM XIVE support");
-            return;
-        }
-    }
-
     dev = qdev_create(NULL, TYPE_SPAPR_XIVE);
     qdev_prop_set_uint32(dev, "nr-irqs", nr_irqs);
     /*
@@ -350,12 +369,13 @@ static void spapr_irq_cpu_intc_create_xive(SpaprMachineState *spapr,
 
 static int spapr_irq_post_load_xive(SpaprMachineState *spapr, int version_id)
 {
-    return 0;
+    return spapr_xive_post_load(spapr->xive, version_id);
 }
 
 static void spapr_irq_reset_xive(SpaprMachineState *spapr, Error **errp)
 {
     CPUState *cs;
+    Error *local_err = NULL;
 
     CPU_FOREACH(cs) {
         PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -364,6 +384,12 @@ static void spapr_irq_reset_xive(SpaprMachineState *spapr, Error **errp)
         spapr_xive_set_tctx_os_cam(spapr_cpu_state(cpu)->tctx);
     }
 
+    spapr_irq_init_device(spapr, &spapr_irq_xive, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
     /* Activate the XIVE MMIOs */
     spapr_xive_mmio_set_enabled(spapr->xive, true);
 }
@@ -372,7 +398,11 @@ static void spapr_irq_set_irq_xive(void *opaque, int srcno, int val)
 {
     SpaprMachineState *spapr = opaque;
 
-    xive_source_set_irq(&spapr->xive->source, srcno, val);
+    if (kvm_irqchip_in_kernel()) {
+        kvmppc_xive_source_set_irq(&spapr->xive->source, srcno, val);
+    } else {
+        xive_source_set_irq(&spapr->xive->source, srcno, val);
+    }
 }
 
 static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr)
@@ -380,6 +410,18 @@ static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr)
     return spapr->xive->nodename;
 }
 
+static void spapr_irq_init_emu_xive(SpaprMachineState *spapr, Error **errp)
+{
+    spapr_xive_init(spapr->xive, errp);
+}
+
+static void spapr_irq_init_kvm_xive(SpaprMachineState *spapr, Error **errp)
+{
+    if (kvm_enabled()) {
+        kvmppc_xive_connect(spapr->xive, errp);
+    }
+}
+
 /*
  * XIVE uses the full IRQ number space. Set it to 8K to be compatible
  * with XICS.
@@ -404,6 +446,8 @@ SpaprIrq spapr_irq_xive = {
     .reset       = spapr_irq_reset_xive,
     .set_irq     = spapr_irq_set_irq_xive,
     .get_nodename = spapr_irq_get_nodename_xive,
+    .init_emu    = spapr_irq_init_emu_xive,
+    .init_kvm    = spapr_irq_init_kvm_xive,
 };
 
 /*
@@ -428,14 +472,8 @@ static SpaprIrq *spapr_irq_current(SpaprMachineState *spapr)
 static void spapr_irq_init_dual(SpaprMachineState *spapr, int nr_irqs,
                                 Error **errp)
 {
-    MachineState *machine = MACHINE(spapr);
     Error *local_err = NULL;
 
-    if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
-        error_setg(errp, "No KVM support for the 'dual' machine");
-        return;
-    }
-
     spapr_irq_xics.init(spapr, spapr_irq_xics.nr_irqs, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -514,6 +552,9 @@ static int spapr_irq_post_load_dual(SpaprMachineState *spapr, int version_id)
      * defaults to XICS at startup.
      */
     if (spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        if (kvm_irqchip_in_kernel()) {
+            xics_kvm_disconnect(spapr, &error_fatal);
+        }
         spapr_irq_xive.reset(spapr, &error_fatal);
     }
 
@@ -522,12 +563,30 @@ static int spapr_irq_post_load_dual(SpaprMachineState *spapr, int version_id)
 
 static void spapr_irq_reset_dual(SpaprMachineState *spapr, Error **errp)
 {
+    Error *local_err = NULL;
+
     /*
      * Deactivate the XIVE MMIOs. The XIVE backend will reenable them
      * if selected.
      */
     spapr_xive_mmio_set_enabled(spapr->xive, false);
 
+    /* Destroy all KVM devices */
+    if (kvm_irqchip_in_kernel()) {
+        xics_kvm_disconnect(spapr, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            error_prepend(errp, "KVM XICS disconnect failed: ");
+            return;
+        }
+        kvmppc_xive_disconnect(spapr->xive, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            error_prepend(errp, "KVM XIVE disconnect failed: ");
+            return;
+        }
+    }
+
     spapr_irq_current(spapr)->reset(spapr, errp);
 }
 
@@ -565,6 +624,8 @@ SpaprIrq spapr_irq_dual = {
     .reset       = spapr_irq_reset_dual,
     .set_irq     = spapr_irq_set_irq_dual,
     .get_nodename = spapr_irq_get_nodename_dual,
+    .init_emu    = NULL, /* should not be used */
+    .init_kvm    = NULL, /* should not be used */
 };
 
 
@@ -763,6 +824,9 @@ SpaprIrq spapr_irq_xics_legacy = {
     .dt_populate = spapr_dt_xics,
     .cpu_intc_create = spapr_irq_cpu_intc_create_xics,
     .post_load   = spapr_irq_post_load_xics,
+    .reset       = spapr_irq_reset_xics,
     .set_irq     = spapr_irq_set_irq_xics,
     .get_nodename = spapr_irq_get_nodename_xics,
+    .init_emu    = spapr_irq_init_emu_xics,
+    .init_kvm    = spapr_irq_init_kvm_xics,
 };
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 97961b0128..9cf2c41b8c 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1626,28 +1626,6 @@ static void spapr_phb_unrealize(DeviceState *dev, Error **errp)
     memory_region_del_subregion(get_system_memory(), &sphb->mem32window);
 }
 
-static bool spapr_phb_allows_extended_config_space(PCIBus *bus)
-{
-    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(BUS(bus)->parent);
-
-    return sphb->pcie_ecs;
-}
-
-static void spapr_phb_root_bus_class_init(ObjectClass *klass, void *data)
-{
-    PCIBusClass *pbc = PCI_BUS_CLASS(klass);
-
-    pbc->allows_extended_config_space = spapr_phb_allows_extended_config_space;
-}
-
-#define TYPE_SPAPR_PHB_ROOT_BUS "pci"
-
-static const TypeInfo spapr_phb_root_bus_info = {
-    .name = TYPE_SPAPR_PHB_ROOT_BUS,
-    .parent = TYPE_PCI_BUS,
-    .class_init = spapr_phb_root_bus_class_init,
-};
-
 static void spapr_phb_realize(DeviceState *dev, Error **errp)
 {
     /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user
@@ -1753,7 +1731,16 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
                                 pci_spapr_set_irq, pci_swizzle_map_irq_fn, sphb,
                                 &sphb->memspace, &sphb->iospace,
                                 PCI_DEVFN(0, 0), PCI_NUM_PINS,
-                                TYPE_SPAPR_PHB_ROOT_BUS);
+                                TYPE_PCI_BUS);
+
+    /*
+     * Despite resembling a vanilla PCI bus in most ways, the PAPR
+     * para-virtualized PCI bus *does* permit PCI-E extended config
+     * space access
+     */
+    if (sphb->pcie_ecs) {
+        bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
+    }
     phb->bus = bus;
     qbus_set_hotplug_handler(BUS(phb->bus), OBJECT(sphb), NULL);
 
@@ -2348,7 +2335,6 @@ void spapr_pci_rtas_init(void)
 static void spapr_pci_register_types(void)
 {
     type_register_static(&spapr_phb_info);
-    type_register_static(&spapr_phb_root_bus_info);
 }
 
 type_init(spapr_pci_register_types)
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index ee24212765..5bc1a93271 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -177,6 +177,7 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr,
         } else {
             lpcr &= ~(LPCR_UPRT | LPCR_GTSE | LPCR_HR);
         }
+        env->spr[SPR_PSSCR] &= ~PSSCR_EC;
     }
     ppc_store_lpcr(newcpu, lpcr);
 
@@ -205,8 +206,11 @@ static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr,
 
     /* Disable Power-saving mode Exit Cause exceptions for the CPU.
      * This could deliver an interrupt on a dying CPU and crash the
-     * guest */
+     * guest.
+     * For the same reason, set PSSCR_EC.
+     */
     ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm);
+    env->spr[SPR_PSSCR] |= PSSCR_EC;
     cs->halted = 1;
     kvmppc_set_reg_ppc_online(cpu, 0);
     qemu_cpu_kick(cs);
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index cf34874e9d..c39051068d 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -14,16 +14,9 @@
  */
 
 #include "qemu/osdep.h"
-#include "sysemu/sysemu.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qlist.h"
-#include "qapi/qmp/qnum.h"
 #include "qapi/qapi-events-rdma.h"
 
 #include <infiniband/verbs.h>
-#include <infiniband/umad_types.h>
-#include <infiniband/umad.h>
-#include <rdma/rdma_user_cm.h>
 
 #include "contrib/rdmacm-mux/rdmacm-mux.h"
 #include "trace.h"
diff --git a/hw/riscv/Makefile.objs b/hw/riscv/Makefile.objs
index 79bfb3abf9..a65027304a 100644
--- a/hw/riscv/Makefile.objs
+++ b/hw/riscv/Makefile.objs
@@ -2,6 +2,7 @@ obj-$(CONFIG_SPIKE) += riscv_htif.o
 obj-$(CONFIG_HART) += riscv_hart.o
 obj-$(CONFIG_SIFIVE_E) += sifive_e.o
 obj-$(CONFIG_SIFIVE) += sifive_clint.o
+obj-$(CONFIG_SIFIVE) += sifive_gpio.o
 obj-$(CONFIG_SIFIVE) += sifive_prci.o
 obj-$(CONFIG_SIFIVE) += sifive_plic.o
 obj-$(CONFIG_SIFIVE) += sifive_test.o
diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
index b1cd11363c..80ac56fa7d 100644
--- a/hw/riscv/sifive_e.c
+++ b/hw/riscv/sifive_e.c
@@ -146,11 +146,15 @@ static void riscv_sifive_e_soc_init(Object *obj)
                             &error_abort);
     object_property_set_int(OBJECT(&s->cpus), smp_cpus, "num-harts",
                             &error_abort);
+    sysbus_init_child_obj(obj, "riscv.sifive.e.gpio0",
+                          &s->gpio, sizeof(s->gpio),
+                          TYPE_SIFIVE_GPIO);
 }
 
 static void riscv_sifive_e_soc_realize(DeviceState *dev, Error **errp)
 {
     const struct MemmapEntry *memmap = sifive_e_memmap;
+    Error *err = NULL;
 
     SiFiveESoCState *s = RISCV_E_SOC(dev);
     MemoryRegion *sys_mem = get_system_memory();
@@ -184,8 +188,28 @@ static void riscv_sifive_e_soc_realize(DeviceState *dev, Error **errp)
     sifive_mmio_emulate(sys_mem, "riscv.sifive.e.aon",
         memmap[SIFIVE_E_AON].base, memmap[SIFIVE_E_AON].size);
     sifive_prci_create(memmap[SIFIVE_E_PRCI].base);
-    sifive_mmio_emulate(sys_mem, "riscv.sifive.e.gpio0",
-        memmap[SIFIVE_E_GPIO0].base, memmap[SIFIVE_E_GPIO0].size);
+
+    /* GPIO */
+
+    object_property_set_bool(OBJECT(&s->gpio), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    /* Map GPIO registers */
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpio), 0, memmap[SIFIVE_E_GPIO0].base);
+
+    /* Pass all GPIOs to the SOC layer so they are available to the board */
+    qdev_pass_gpios(DEVICE(&s->gpio), dev, NULL);
+
+    /* Connect GPIO interrupts to the PLIC */
+    for (int i = 0; i < 32; i++) {
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio), i,
+                           qdev_get_gpio_in(DEVICE(s->plic),
+                                            SIFIVE_E_GPIO0_IRQ0 + i));
+    }
+
     sifive_uart_create(sys_mem, memmap[SIFIVE_E_UART0].base,
         serial_hd(0), qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_E_UART0_IRQ));
     sifive_mmio_emulate(sys_mem, "riscv.sifive.e.qspi0",
diff --git a/hw/riscv/sifive_gpio.c b/hw/riscv/sifive_gpio.c
new file mode 100644
index 0000000000..06bd8112d7
--- /dev/null
+++ b/hw/riscv/sifive_gpio.c
@@ -0,0 +1,388 @@
+/*
+ * sifive System-on-Chip general purpose input/output register definition
+ *
+ * Copyright 2019 AdaCore
+ *
+ * Base on nrf51_gpio.c:
+ *
+ * Copyright 2018 Steffen Görtz <contrib@steffen-goertz.de>
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/riscv/sifive_gpio.h"
+#include "trace.h"
+
+static void update_output_irq(SIFIVEGPIOState *s)
+{
+
+    uint32_t pending;
+    uint32_t pin;
+
+    pending = s->high_ip & s->high_ie;
+    pending |= s->low_ip & s->low_ie;
+    pending |= s->rise_ip & s->rise_ie;
+    pending |= s->fall_ip & s->fall_ie;
+
+    for (int i = 0; i < SIFIVE_GPIO_PINS; i++) {
+        pin = 1 << i;
+        qemu_set_irq(s->irq[i], (pending & pin) != 0);
+        trace_sifive_gpio_update_output_irq(i, (pending & pin) != 0);
+    }
+}
+
+static void update_state(SIFIVEGPIOState *s)
+{
+    size_t i;
+    bool prev_ival, in, in_mask, port, out_xor, pull, output_en, input_en,
+        rise_ip, fall_ip, low_ip, high_ip, oval, actual_value, ival;
+
+    for (i = 0; i < SIFIVE_GPIO_PINS; i++) {
+
+        prev_ival = extract32(s->value, i, 1);
+        in        = extract32(s->in, i, 1);
+        in_mask   = extract32(s->in_mask, i, 1);
+        port      = extract32(s->port, i, 1);
+        out_xor   = extract32(s->out_xor, i, 1);
+        pull      = extract32(s->pue, i, 1);
+        output_en = extract32(s->output_en, i, 1);
+        input_en  = extract32(s->input_en, i, 1);
+        rise_ip   = extract32(s->rise_ip, i, 1);
+        fall_ip   = extract32(s->fall_ip, i, 1);
+        low_ip    = extract32(s->low_ip, i, 1);
+        high_ip   = extract32(s->high_ip, i, 1);
+
+        /* Output value (IOF not supported) */
+        oval = output_en && (port ^ out_xor);
+
+        /* Pin both driven externally and internally */
+        if (output_en && in_mask) {
+            qemu_log_mask(LOG_GUEST_ERROR, "GPIO pin %zu short circuited\n", i);
+        }
+
+        if (in_mask) {
+            /* The pin is driven by external device */
+            actual_value = in;
+        } else if (output_en) {
+            /* The pin is driven by internal circuit */
+            actual_value = oval;
+        } else {
+            /* Floating? Apply pull-up resistor */
+            actual_value = pull;
+        }
+
+        qemu_set_irq(s->output[i], actual_value);
+
+        /* Input value */
+        ival = input_en && actual_value;
+
+        /* Interrupts */
+        high_ip = high_ip || ival;
+        s->high_ip = deposit32(s->high_ip, i, 1, high_ip);
+
+        low_ip = low_ip || !ival;
+        s->low_ip = deposit32(s->low_ip,  i, 1, low_ip);
+
+        rise_ip = rise_ip || (ival && !prev_ival);
+        s->rise_ip = deposit32(s->rise_ip, i, 1, rise_ip);
+
+        fall_ip = fall_ip || (!ival && prev_ival);
+        s->fall_ip = deposit32(s->fall_ip, i, 1, fall_ip);
+
+        /* Update value */
+        s->value = deposit32(s->value, i, 1, ival);
+    }
+    update_output_irq(s);
+}
+
+static uint64_t sifive_gpio_read(void *opaque, hwaddr offset, unsigned int size)
+{
+    SIFIVEGPIOState *s = SIFIVE_GPIO(opaque);
+    uint64_t r = 0;
+
+    switch (offset) {
+    case SIFIVE_GPIO_REG_VALUE:
+        r = s->value;
+        break;
+
+    case SIFIVE_GPIO_REG_INPUT_EN:
+        r = s->input_en;
+        break;
+
+    case SIFIVE_GPIO_REG_OUTPUT_EN:
+        r = s->output_en;
+        break;
+
+    case SIFIVE_GPIO_REG_PORT:
+        r = s->port;
+        break;
+
+    case SIFIVE_GPIO_REG_PUE:
+        r = s->pue;
+        break;
+
+    case SIFIVE_GPIO_REG_DS:
+        r = s->ds;
+        break;
+
+    case SIFIVE_GPIO_REG_RISE_IE:
+        r = s->rise_ie;
+        break;
+
+    case SIFIVE_GPIO_REG_RISE_IP:
+        r = s->rise_ip;
+        break;
+
+    case SIFIVE_GPIO_REG_FALL_IE:
+        r = s->fall_ie;
+        break;
+
+    case SIFIVE_GPIO_REG_FALL_IP:
+        r = s->fall_ip;
+        break;
+
+    case SIFIVE_GPIO_REG_HIGH_IE:
+        r = s->high_ie;
+        break;
+
+    case SIFIVE_GPIO_REG_HIGH_IP:
+        r = s->high_ip;
+        break;
+
+    case SIFIVE_GPIO_REG_LOW_IE:
+        r = s->low_ie;
+        break;
+
+    case SIFIVE_GPIO_REG_LOW_IP:
+        r = s->low_ip;
+        break;
+
+    case SIFIVE_GPIO_REG_IOF_EN:
+        r = s->iof_en;
+        break;
+
+    case SIFIVE_GPIO_REG_IOF_SEL:
+        r = s->iof_sel;
+        break;
+
+    case SIFIVE_GPIO_REG_OUT_XOR:
+        r = s->out_xor;
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                "%s: bad read offset 0x%" HWADDR_PRIx "\n",
+                      __func__, offset);
+    }
+
+    trace_sifive_gpio_read(offset, r);
+
+    return r;
+}
+
+static void sifive_gpio_write(void *opaque, hwaddr offset,
+                       uint64_t value, unsigned int size)
+{
+    SIFIVEGPIOState *s = SIFIVE_GPIO(opaque);
+
+    trace_sifive_gpio_write(offset, value);
+
+    switch (offset) {
+
+    case SIFIVE_GPIO_REG_INPUT_EN:
+        s->input_en = value;
+        break;
+
+    case SIFIVE_GPIO_REG_OUTPUT_EN:
+        s->output_en = value;
+        break;
+
+    case SIFIVE_GPIO_REG_PORT:
+        s->port = value;
+        break;
+
+    case SIFIVE_GPIO_REG_PUE:
+        s->pue = value;
+        break;
+
+    case SIFIVE_GPIO_REG_DS:
+        s->ds = value;
+        break;
+
+    case SIFIVE_GPIO_REG_RISE_IE:
+        s->rise_ie = value;
+        break;
+
+    case SIFIVE_GPIO_REG_RISE_IP:
+         /* Write 1 to clear */
+        s->rise_ip &= ~value;
+        break;
+
+    case SIFIVE_GPIO_REG_FALL_IE:
+        s->fall_ie = value;
+        break;
+
+    case SIFIVE_GPIO_REG_FALL_IP:
+         /* Write 1 to clear */
+        s->fall_ip &= ~value;
+        break;
+
+    case SIFIVE_GPIO_REG_HIGH_IE:
+        s->high_ie = value;
+        break;
+
+    case SIFIVE_GPIO_REG_HIGH_IP:
+         /* Write 1 to clear */
+        s->high_ip &= ~value;
+        break;
+
+    case SIFIVE_GPIO_REG_LOW_IE:
+        s->low_ie = value;
+        break;
+
+    case SIFIVE_GPIO_REG_LOW_IP:
+         /* Write 1 to clear */
+        s->low_ip &= ~value;
+        break;
+
+    case SIFIVE_GPIO_REG_IOF_EN:
+        s->iof_en = value;
+        break;
+
+    case SIFIVE_GPIO_REG_IOF_SEL:
+        s->iof_sel = value;
+        break;
+
+    case SIFIVE_GPIO_REG_OUT_XOR:
+        s->out_xor = value;
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: bad write offset 0x%" HWADDR_PRIx "\n",
+                      __func__, offset);
+    }
+
+    update_state(s);
+}
+
+static const MemoryRegionOps gpio_ops = {
+    .read =  sifive_gpio_read,
+    .write = sifive_gpio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+};
+
+static void sifive_gpio_set(void *opaque, int line, int value)
+{
+    SIFIVEGPIOState *s = SIFIVE_GPIO(opaque);
+
+    trace_sifive_gpio_set(line, value);
+
+    assert(line >= 0 && line < SIFIVE_GPIO_PINS);
+
+    s->in_mask = deposit32(s->in_mask, line, 1, value >= 0);
+    if (value >= 0) {
+        s->in = deposit32(s->in, line, 1, value != 0);
+    }
+
+    update_state(s);
+}
+
+static void sifive_gpio_reset(DeviceState *dev)
+{
+    SIFIVEGPIOState *s = SIFIVE_GPIO(dev);
+
+    s->value = 0;
+    s->input_en = 0;
+    s->output_en = 0;
+    s->port = 0;
+    s->pue = 0;
+    s->ds = 0;
+    s->rise_ie = 0;
+    s->rise_ip = 0;
+    s->fall_ie = 0;
+    s->fall_ip = 0;
+    s->high_ie = 0;
+    s->high_ip = 0;
+    s->low_ie = 0;
+    s->low_ip = 0;
+    s->iof_en = 0;
+    s->iof_sel = 0;
+    s->out_xor = 0;
+    s->in = 0;
+    s->in_mask = 0;
+
+}
+
+static const VMStateDescription vmstate_sifive_gpio = {
+    .name = TYPE_SIFIVE_GPIO,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(value,     SIFIVEGPIOState),
+        VMSTATE_UINT32(input_en,  SIFIVEGPIOState),
+        VMSTATE_UINT32(output_en, SIFIVEGPIOState),
+        VMSTATE_UINT32(port,      SIFIVEGPIOState),
+        VMSTATE_UINT32(pue,       SIFIVEGPIOState),
+        VMSTATE_UINT32(rise_ie,   SIFIVEGPIOState),
+        VMSTATE_UINT32(rise_ip,   SIFIVEGPIOState),
+        VMSTATE_UINT32(fall_ie,   SIFIVEGPIOState),
+        VMSTATE_UINT32(fall_ip,   SIFIVEGPIOState),
+        VMSTATE_UINT32(high_ie,   SIFIVEGPIOState),
+        VMSTATE_UINT32(high_ip,   SIFIVEGPIOState),
+        VMSTATE_UINT32(low_ie,    SIFIVEGPIOState),
+        VMSTATE_UINT32(low_ip,    SIFIVEGPIOState),
+        VMSTATE_UINT32(iof_en,    SIFIVEGPIOState),
+        VMSTATE_UINT32(iof_sel,   SIFIVEGPIOState),
+        VMSTATE_UINT32(out_xor,   SIFIVEGPIOState),
+        VMSTATE_UINT32(in, SIFIVEGPIOState),
+        VMSTATE_UINT32(in_mask, SIFIVEGPIOState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void sifive_gpio_init(Object *obj)
+{
+    SIFIVEGPIOState *s = SIFIVE_GPIO(obj);
+
+    memory_region_init_io(&s->mmio, obj, &gpio_ops, s,
+            TYPE_SIFIVE_GPIO, SIFIVE_GPIO_SIZE);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+
+
+    for (int i = 0; i < SIFIVE_GPIO_PINS; i++) {
+        sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq[i]);
+    }
+
+    qdev_init_gpio_in(DEVICE(s), sifive_gpio_set, SIFIVE_GPIO_PINS);
+    qdev_init_gpio_out(DEVICE(s), s->output, SIFIVE_GPIO_PINS);
+}
+
+static void sifive_gpio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_sifive_gpio;
+    dc->reset = sifive_gpio_reset;
+    dc->desc = "sifive GPIO";
+}
+
+static const TypeInfo sifive_gpio_info = {
+    .name = TYPE_SIFIVE_GPIO,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SIFIVEGPIOState),
+    .instance_init = sifive_gpio_init,
+    .class_init = sifive_gpio_class_init
+};
+
+static void sifive_gpio_register_types(void)
+{
+    type_register_static(&sifive_gpio_info);
+}
+
+type_init(sifive_gpio_register_types)
diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
index 2a000a5800..5b33d4be3b 100644
--- a/hw/riscv/spike.c
+++ b/hw/riscv/spike.c
@@ -39,6 +39,7 @@
 #include "chardev/char.h"
 #include "sysemu/arch_init.h"
 #include "sysemu/device_tree.h"
+#include "sysemu/qtest.h"
 #include "exec/address-spaces.h"
 #include "elf.h"
 
@@ -160,7 +161,89 @@ static void create_fdt(SpikeState *s, const struct MemmapEntry *memmap,
         qemu_fdt_add_subnode(fdt, "/chosen");
         qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
     }
- }
+}
+
+static void spike_board_init(MachineState *machine)
+{
+    const struct MemmapEntry *memmap = spike_memmap;
+
+    SpikeState *s = g_new0(SpikeState, 1);
+    MemoryRegion *system_memory = get_system_memory();
+    MemoryRegion *main_mem = g_new(MemoryRegion, 1);
+    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
+    int i;
+
+    /* Initialize SOC */
+    object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc),
+                            TYPE_RISCV_HART_ARRAY, &error_abort, NULL);
+    object_property_set_str(OBJECT(&s->soc), machine->cpu_type, "cpu-type",
+                            &error_abort);
+    object_property_set_int(OBJECT(&s->soc), smp_cpus, "num-harts",
+                            &error_abort);
+    object_property_set_bool(OBJECT(&s->soc), true, "realized",
+                            &error_abort);
+
+    /* register system main memory (actual RAM) */
+    memory_region_init_ram(main_mem, NULL, "riscv.spike.ram",
+                           machine->ram_size, &error_fatal);
+    memory_region_add_subregion(system_memory, memmap[SPIKE_DRAM].base,
+        main_mem);
+
+    /* create device tree */
+    create_fdt(s, memmap, machine->ram_size, machine->kernel_cmdline);
+
+    /* boot rom */
+    memory_region_init_rom(mask_rom, NULL, "riscv.spike.mrom",
+                           memmap[SPIKE_MROM].size, &error_fatal);
+    memory_region_add_subregion(system_memory, memmap[SPIKE_MROM].base,
+                                mask_rom);
+
+    if (machine->kernel_filename) {
+        load_kernel(machine->kernel_filename);
+    }
+
+    /* reset vector */
+    uint32_t reset_vec[8] = {
+        0x00000297,                  /* 1:  auipc  t0, %pcrel_hi(dtb) */
+        0x02028593,                  /*     addi   a1, t0, %pcrel_lo(1b) */
+        0xf1402573,                  /*     csrr   a0, mhartid  */
+#if defined(TARGET_RISCV32)
+        0x0182a283,                  /*     lw     t0, 24(t0) */
+#elif defined(TARGET_RISCV64)
+        0x0182b283,                  /*     ld     t0, 24(t0) */
+#endif
+        0x00028067,                  /*     jr     t0 */
+        0x00000000,
+        memmap[SPIKE_DRAM].base,     /* start: .dword DRAM_BASE */
+        0x00000000,
+                                     /* dtb: */
+    };
+
+    /* copy in the reset vector in little_endian byte order */
+    for (i = 0; i < sizeof(reset_vec) >> 2; i++) {
+        reset_vec[i] = cpu_to_le32(reset_vec[i]);
+    }
+    rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec),
+                          memmap[SPIKE_MROM].base, &address_space_memory);
+
+    /* copy in the device tree */
+    if (fdt_pack(s->fdt) || fdt_totalsize(s->fdt) >
+            memmap[SPIKE_MROM].size - sizeof(reset_vec)) {
+        error_report("not enough space to store device-tree");
+        exit(1);
+    }
+    qemu_fdt_dumpdtb(s->fdt, fdt_totalsize(s->fdt));
+    rom_add_blob_fixed_as("mrom.fdt", s->fdt, fdt_totalsize(s->fdt),
+                          memmap[SPIKE_MROM].base + sizeof(reset_vec),
+                          &address_space_memory);
+
+    /* initialize HTIF using symbols found in load_kernel */
+    htif_mm_init(system_memory, mask_rom, &s->soc.harts[0].env, serial_hd(0));
+
+    /* Core Local Interruptor (timer and IPI) */
+    sifive_clint_create(memmap[SPIKE_CLINT].base, memmap[SPIKE_CLINT].size,
+        smp_cpus, SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE);
+}
 
 static void spike_v1_10_0_board_init(MachineState *machine)
 {
@@ -172,6 +255,12 @@ static void spike_v1_10_0_board_init(MachineState *machine)
     MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
     int i;
 
+    if (!qtest_enabled()) {
+        info_report("The Spike v1.10.0 machine has been deprecated. "
+                    "Please use the generic spike machine and specify the ISA "
+                    "versions using -cpu.");
+    }
+
     /* Initialize SOC */
     object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc),
                             TYPE_RISCV_HART_ARRAY, &error_abort, NULL);
@@ -254,6 +343,12 @@ static void spike_v1_09_1_board_init(MachineState *machine)
     MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
     int i;
 
+    if (!qtest_enabled()) {
+        info_report("The Spike v1.09.1 machine has been deprecated. "
+                    "Please use the generic spike machine and specify the ISA "
+                    "versions using -cpu.");
+    }
+
     /* Initialize SOC */
     object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc),
                             TYPE_RISCV_HART_ARRAY, &error_abort, NULL);
@@ -359,8 +454,17 @@ static void spike_v1_10_0_machine_init(MachineClass *mc)
     mc->desc = "RISC-V Spike Board (Privileged ISA v1.10)";
     mc->init = spike_v1_10_0_board_init;
     mc->max_cpus = 1;
+}
+
+static void spike_machine_init(MachineClass *mc)
+{
+    mc->desc = "RISC-V Spike Board";
+    mc->init = spike_board_init;
+    mc->max_cpus = 1;
     mc->is_default = 1;
+    mc->default_cpu_type = SPIKE_V1_10_0_CPU;
 }
 
 DEFINE_MACHINE("spike_v1.9.1", spike_v1_09_1_machine_init)
 DEFINE_MACHINE("spike_v1.10", spike_v1_10_0_machine_init)
+DEFINE_MACHINE("spike", spike_machine_init)
diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events
new file mode 100644
index 0000000000..6d59233e23
--- /dev/null
+++ b/hw/riscv/trace-events
@@ -0,0 +1,7 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# hw/gpio/sifive_gpio.c
+sifive_gpio_read(uint64_t offset, uint64_t r) "offset 0x%" PRIx64 " value 0x%" PRIx64
+sifive_gpio_write(uint64_t offset, uint64_t value) "offset 0x%" PRIx64 " value 0x%" PRIx64
+sifive_gpio_set(int64_t line, int64_t value) "line %" PRIi64 " value %" PRIi64
+sifive_gpio_update_output_irq(int64_t line, int64_t value) "line %" PRIi64 " value %" PRIi64
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index fc4c6b306e..84d94d0c42 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -29,7 +29,6 @@
 #include "hw/sysbus.h"
 #include "hw/char/serial.h"
 #include "target/riscv/cpu.h"
-#include "hw/riscv/riscv_htif.h"
 #include "hw/riscv/riscv_hart.h"
 #include "hw/riscv/sifive_plic.h"
 #include "hw/riscv/sifive_clint.h"
@@ -400,7 +399,7 @@ static void riscv_virt_board_init(MachineState *machine)
     /* Initialize SOC */
     object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc),
                             TYPE_RISCV_HART_ARRAY, &error_abort, NULL);
-    object_property_set_str(OBJECT(&s->soc), VIRT_CPU, "cpu-type",
+    object_property_set_str(OBJECT(&s->soc), machine->cpu_type, "cpu-type",
                             &error_abort);
     object_property_set_int(OBJECT(&s->soc), smp_cpus, "num-harts",
                             &error_abort);
@@ -526,6 +525,7 @@ static void riscv_virt_board_machine_init(MachineClass *mc)
     mc->desc = "RISC-V VirtIO Board (Privileged ISA v1.10)";
     mc->init = riscv_virt_board_init;
     mc->max_cpus = 8; /* hardcoded limit in BBL */
+    mc->default_cpu_type = VIRT_CPU;
 }
 
 DEFINE_MACHINE("virt", riscv_virt_board_machine_init)
diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index ee5b83448b..e574223a22 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c
@@ -466,12 +466,12 @@ static void init_event_facility(Object *obj)
     new = object_new(TYPE_SCLP_QUIESCE);
     object_property_add_child(obj, TYPE_SCLP_QUIESCE, new, NULL);
     object_unref(new);
-    qdev_set_parent_bus(DEVICE(new), &event_facility->sbus.qbus);
+    qdev_set_parent_bus(DEVICE(new), BUS(&event_facility->sbus));
 
     new = object_new(TYPE_SCLP_CPU_HOTPLUG);
     object_property_add_child(obj, TYPE_SCLP_CPU_HOTPLUG, new, NULL);
     object_unref(new);
-    qdev_set_parent_bus(DEVICE(new), &event_facility->sbus.qbus);
+    qdev_set_parent_bus(DEVICE(new), BUS(&event_facility->sbus));
     /* the facility will automatically realize the devices via the bus */
 }
 
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index bbc6e8fa0b..4d643686cb 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -669,7 +669,9 @@ DEFINE_CCW_MACHINE(4_1, "4.1", true);
 
 static void ccw_machine_4_0_instance_options(MachineState *machine)
 {
+    static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V4_0 };
     ccw_machine_4_1_instance_options(machine);
+    s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat);
 }
 
 static void ccw_machine_4_0_class_options(MachineClass *mc)
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index da7239d94f..a8b7a199f9 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -1860,7 +1860,7 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val)
         }
         if (val & LSI_SCNTL1_RST) {
             if (!(s->sstat0 & LSI_SSTAT0_RST)) {
-                qbus_reset_all(&s->bus.qbus);
+                qbus_reset_all(BUS(&s->bus));
                 s->sstat0 |= LSI_SSTAT0_RST;
                 lsi_script_scsi_interrupt(s, LSI_SIST0_RST, 0);
             }
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index 929404fb48..e800683e91 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -540,7 +540,7 @@ reply_maybe_async:
         break;
 
     case MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS:
-        qbus_reset_all(&s->bus.qbus);
+        qbus_reset_all(BUS(&s->bus));
         break;
 
     default:
@@ -803,7 +803,7 @@ static void mptsas_soft_reset(MPTSASState *s)
     s->intr_mask = MPI_HIM_DIM | MPI_HIM_RIM;
     mptsas_update_interrupt(s);
 
-    qbus_reset_all(&s->bus.qbus);
+    qbus_reset_all(BUS(&s->bus));
     s->intr_status = 0;
     s->intr_mask = save_mask;
 
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index e7e865ab3b..7b89ac798b 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2336,6 +2336,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
         return;
     }
 
+    if (blk_get_aio_context(s->qdev.conf.blk) != qemu_get_aio_context() &&
+        !s->qdev.hba_supports_iothread)
+    {
+        error_setg(errp, "HBA does not support iothreads");
+        return;
+    }
+
     if (dev->type == TYPE_DISK) {
         if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) {
             return;
@@ -2417,7 +2424,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
     if (!dev->conf.blk) {
         /* Anonymous BlockBackend for an empty drive. As we put it into
          * dev->conf, qdev takes care of detaching on unplug. */
-        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
+        dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
         ret = blk_attach_dev(dev->conf.blk, &dev->qdev);
         assert(ret == 0);
     }
@@ -2929,13 +2936,14 @@ static const TypeInfo scsi_disk_base_info = {
     .abstract      = true,
 };
 
-#define DEFINE_SCSI_DISK_PROPERTIES()                                \
-    DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf),               \
-    DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),         \
-    DEFINE_PROP_STRING("ver", SCSIDiskState, version),               \
-    DEFINE_PROP_STRING("serial", SCSIDiskState, serial),             \
-    DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),             \
-    DEFINE_PROP_STRING("product", SCSIDiskState, product),           \
+#define DEFINE_SCSI_DISK_PROPERTIES()                                   \
+    DEFINE_PROP_DRIVE_IOTHREAD("drive", SCSIDiskState, qdev.conf.blk),  \
+    DEFINE_BLOCK_PROPERTIES_BASE(SCSIDiskState, qdev.conf),             \
+    DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),            \
+    DEFINE_PROP_STRING("ver", SCSIDiskState, version),                  \
+    DEFINE_PROP_STRING("serial", SCSIDiskState, serial),                \
+    DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),                \
+    DEFINE_PROP_STRING("product", SCSIDiskState, product),              \
     DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id)
 
 
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 61e2e57da9..6b01accf61 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -114,6 +114,10 @@ static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val)
     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
     bool start = (val & VIRTIO_CONFIG_S_DRIVER_OK);
 
+    if (!vdev->vm_running) {
+        start = false;
+    }
+
     if (vsc->dev.started == start) {
         return;
     }
@@ -135,6 +139,28 @@ static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
 }
 
+static int vhost_scsi_pre_save(void *opaque)
+{
+    VHostSCSICommon *vsc = opaque;
+
+    /* At this point, backend must be stopped, otherwise
+     * it might keep writing to memory. */
+    assert(!vsc->dev.started);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_virtio_vhost_scsi = {
+    .name = "virtio-vhost_scsi",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+    .pre_save = vhost_scsi_pre_save,
+};
+
 static void vhost_scsi_realize(DeviceState *dev, Error **errp)
 {
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
@@ -173,13 +199,18 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
         goto close_fd;
     }
 
-    error_setg(&vsc->migration_blocker,
-               "vhost-scsi does not support migration");
-    migrate_add_blocker(vsc->migration_blocker, &err);
-    if (err) {
-        error_propagate(errp, err);
-        error_free(vsc->migration_blocker);
-        goto close_fd;
+    if (!vsc->migratable) {
+        error_setg(&vsc->migration_blocker,
+                "vhost-scsi does not support migration in all cases. "
+                "When external environment supports it (Orchestrator migrates "
+                "target SCSI device state or use shared storage over network), "
+                "set 'migratable' property to true to enable migration.");
+        migrate_add_blocker(vsc->migration_blocker, &err);
+        if (err) {
+            error_propagate(errp, err);
+            error_free(vsc->migration_blocker);
+            goto close_fd;
+        }
     }
 
     vsc->dev.nvqs = VHOST_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
@@ -204,7 +235,9 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
     return;
 
  free_vqs:
-    migrate_del_blocker(vsc->migration_blocker);
+    if (!vsc->migratable) {
+        migrate_del_blocker(vsc->migration_blocker);
+    }
     g_free(vsc->dev.vqs);
  close_fd:
     close(vhostfd);
@@ -217,8 +250,10 @@ static void vhost_scsi_unrealize(DeviceState *dev, Error **errp)
     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(dev);
     struct vhost_virtqueue *vqs = vsc->dev.vqs;
 
-    migrate_del_blocker(vsc->migration_blocker);
-    error_free(vsc->migration_blocker);
+    if (!vsc->migratable) {
+        migrate_del_blocker(vsc->migration_blocker);
+        error_free(vsc->migration_blocker);
+    }
 
     /* This will stop vhost backend. */
     vhost_scsi_set_status(vdev, 0);
@@ -242,6 +277,7 @@ static Property vhost_scsi_properties[] = {
     DEFINE_PROP_BIT64("t10_pi", VHostSCSICommon, host_features,
                                                  VIRTIO_SCSI_F_T10_PI,
                                                  false),
+    DEFINE_PROP_BOOL("migratable", VHostSCSICommon, migratable, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -252,6 +288,7 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data)
     FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(klass);
 
     dc->props = vhost_scsi_properties;
+    dc->vmsd = &vmstate_virtio_vhost_scsi;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
     vdc->realize = vhost_scsi_realize;
     vdc->unrealize = vhost_scsi_unrealize;
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 8b1e6876db..a9fd8ea305 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -69,6 +69,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
     VHostUserSCSI *s = VHOST_USER_SCSI(dev);
     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+    struct vhost_virtqueue *vqs = NULL;
     Error *err = NULL;
     int ret;
 
@@ -93,6 +94,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
     vsc->dev.vqs = g_new(struct vhost_virtqueue, vsc->dev.nvqs);
     vsc->dev.vq_index = 0;
     vsc->dev.backend_features = 0;
+    vqs = vsc->dev.vqs;
 
     ret = vhost_dev_init(&vsc->dev, &s->vhost_user,
                          VHOST_BACKEND_TYPE_USER, 0);
@@ -100,6 +102,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
         error_setg(errp, "vhost-user-scsi: vhost initialization failed: %s",
                    strerror(-ret));
         vhost_user_cleanup(&s->vhost_user);
+        g_free(vqs);
         return;
     }
 
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 839f120256..12d21bbc61 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -696,7 +696,7 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
 
     assert(!s->dataplane_started);
     s->resetting++;
-    qbus_reset_all(&s->bus.qbus);
+    qbus_reset_all(BUS(&s->bus));
     s->resetting--;
 
     vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
@@ -789,28 +789,31 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
     }
 }
 
+static void virtio_scsi_pre_hotplug(HotplugHandler *hotplug_dev,
+                                    DeviceState *dev, Error **errp)
+{
+    SCSIDevice *sd = SCSI_DEVICE(dev);
+    sd->hba_supports_iothread = true;
+}
+
 static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
                                 Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
     VirtIOSCSI *s = VIRTIO_SCSI(vdev);
     SCSIDevice *sd = SCSI_DEVICE(dev);
+    int ret;
 
     if (s->ctx && !s->dataplane_fenced) {
-        AioContext *ctx;
         if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
             return;
         }
-        ctx = blk_get_aio_context(sd->conf.blk);
-        if (ctx != s->ctx && ctx != qemu_get_aio_context()) {
-            error_setg(errp, "Cannot attach a blockdev that is using "
-                       "a different iothread");
-            return;
-        }
         virtio_scsi_acquire(s);
-        blk_set_aio_context(sd->conf.blk, s->ctx);
+        ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp);
         virtio_scsi_release(s);
-
+        if (ret < 0) {
+            return;
+        }
     }
 
     if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
@@ -839,7 +842,8 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     if (s->ctx) {
         virtio_scsi_acquire(s);
-        blk_set_aio_context(sd->conf.blk, qemu_get_aio_context());
+        /* If other users keep the BlockBackend in the iothread, that's ok */
+        blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
         virtio_scsi_release(s);
     }
 
@@ -986,6 +990,7 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data)
     vdc->reset = virtio_scsi_reset;
     vdc->start_ioeventfd = virtio_scsi_dataplane_start;
     vdc->stop_ioeventfd = virtio_scsi_dataplane_stop;
+    hc->pre_plug = virtio_scsi_pre_hotplug;
     hc->plug = virtio_scsi_hotplug;
     hc->unplug = virtio_scsi_hotunplug;
 }
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index 584b4be07e..c39e33fa35 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -440,7 +440,7 @@ static void
 pvscsi_reset_adapter(PVSCSIState *s)
 {
     s->resetting++;
-    qbus_reset_all_fn(&s->bus);
+    qbus_reset_all(BUS(&s->bus));
     s->resetting--;
     pvscsi_process_completion_queue(s);
     assert(QTAILQ_EMPTY(&s->pending_queue));
@@ -848,7 +848,7 @@ pvscsi_on_cmd_reset_bus(PVSCSIState *s)
     trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_RESET_BUS");
 
     s->resetting++;
-    qbus_reset_all_fn(&s->bus);
+    qbus_reset_all(BUS(&s->bus));
     s->resetting--;
     return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
 }
diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index df42aa1c54..dd1ba649d9 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c
@@ -277,7 +277,7 @@ static void milkymist_memcard_realize(DeviceState *dev, Error **errp)
     /* FIXME use a qdev drive property instead of drive_get_next() */
     dinfo = drive_get_next(IF_SD);
     blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
-    carddev = qdev_create(&s->sdbus.qbus, TYPE_SD_CARD);
+    carddev = qdev_create(BUS(&s->sdbus), TYPE_SD_CARD);
     qdev_prop_set_drive(carddev, "drive", blk, &err);
     object_property_set_bool(OBJECT(carddev), true, "realized", &err);
     if (err) {
diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
index 623d0333e8..25e1009277 100644
--- a/hw/sd/ssi-sd.c
+++ b/hw/sd/ssi-sd.c
@@ -249,7 +249,7 @@ static void ssi_sd_realize(SSISlave *d, Error **errp)
     /* Create and plug in the sd card */
     /* FIXME use a qdev drive property instead of drive_get_next() */
     dinfo = drive_get_next(IF_SD);
-    carddev = qdev_create(&s->sdbus.qbus, TYPE_SD_CARD);
+    carddev = qdev_create(BUS(&s->sdbus), TYPE_SD_CARD);
     if (dinfo) {
         qdev_prop_set_drive(carddev, "drive", blk_by_legacy_dinfo(dinfo), &err);
     }
diff --git a/hw/semihosting/Kconfig b/hw/semihosting/Kconfig
new file mode 100644
index 0000000000..efe0a30734
--- /dev/null
+++ b/hw/semihosting/Kconfig
@@ -0,0 +1,3 @@
+
+config SEMIHOSTING
+       bool
diff --git a/hw/semihosting/Makefile.objs b/hw/semihosting/Makefile.objs
new file mode 100644
index 0000000000..4ad47c05c0
--- /dev/null
+++ b/hw/semihosting/Makefile.objs
@@ -0,0 +1,2 @@
+obj-$(CONFIG_SEMIHOSTING) += config.o
+obj-$(CONFIG_SEMIHOSTING) += console.o
diff --git a/hw/semihosting/config.c b/hw/semihosting/config.c
new file mode 100644
index 0000000000..2a8e7e1045
--- /dev/null
+++ b/hw/semihosting/config.c
@@ -0,0 +1,186 @@
+/*
+ * Semihosting configuration
+ *
+ * Copyright (c) 2015 Imagination Technologies
+ * Copyright (c) 2019 Linaro Ltd
+ *
+ * This controls the configuration of semihosting for all guest
+ * targets that support it. Architecture specific handling is handled
+ * in target/HW/HW-semi.c
+ *
+ * Semihosting is sightly strange in that it is also supported by some
+ * linux-user targets. However in that use case no configuration of
+ * the outputs and command lines is supported.
+ *
+ * The config module is common to all softmmu targets however as vl.c
+ * needs to link against the helpers.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "hw/semihosting/semihost.h"
+#include "chardev/char.h"
+
+QemuOptsList qemu_semihosting_config_opts = {
+    .name = "semihosting-config",
+    .implied_opt_name = "enable",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_semihosting_config_opts.head),
+    .desc = {
+        {
+            .name = "enable",
+            .type = QEMU_OPT_BOOL,
+        }, {
+            .name = "target",
+            .type = QEMU_OPT_STRING,
+        }, {
+            .name = "chardev",
+            .type = QEMU_OPT_STRING,
+        }, {
+            .name = "arg",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+typedef struct SemihostingConfig {
+    bool enabled;
+    SemihostingTarget target;
+    Chardev *chardev;
+    const char **argv;
+    int argc;
+    const char *cmdline; /* concatenated argv */
+} SemihostingConfig;
+
+static SemihostingConfig semihosting;
+static const char *semihost_chardev;
+
+bool semihosting_enabled(void)
+{
+    return semihosting.enabled;
+}
+
+SemihostingTarget semihosting_get_target(void)
+{
+    return semihosting.target;
+}
+
+const char *semihosting_get_arg(int i)
+{
+    if (i >= semihosting.argc) {
+        return NULL;
+    }
+    return semihosting.argv[i];
+}
+
+int semihosting_get_argc(void)
+{
+    return semihosting.argc;
+}
+
+const char *semihosting_get_cmdline(void)
+{
+    if (semihosting.cmdline == NULL && semihosting.argc > 0) {
+        semihosting.cmdline = g_strjoinv(" ", (gchar **)semihosting.argv);
+    }
+    return semihosting.cmdline;
+}
+
+static int add_semihosting_arg(void *opaque,
+                               const char *name, const char *val,
+                               Error **errp)
+{
+    SemihostingConfig *s = opaque;
+    if (strcmp(name, "arg") == 0) {
+        s->argc++;
+        /* one extra element as g_strjoinv() expects NULL-terminated array */
+        s->argv = g_realloc(s->argv, (s->argc + 1) * sizeof(void *));
+        s->argv[s->argc - 1] = val;
+        s->argv[s->argc] = NULL;
+    }
+    return 0;
+}
+
+/* Use strings passed via -kernel/-append to initialize semihosting.argv[] */
+void semihosting_arg_fallback(const char *file, const char *cmd)
+{
+    char *cmd_token;
+
+    /* argv[0] */
+    add_semihosting_arg(&semihosting, "arg", file, NULL);
+
+    /* split -append and initialize argv[1..n] */
+    cmd_token = strtok(g_strdup(cmd), " ");
+    while (cmd_token) {
+        add_semihosting_arg(&semihosting, "arg", cmd_token, NULL);
+        cmd_token = strtok(NULL, " ");
+    }
+}
+
+Chardev *semihosting_get_chardev(void)
+{
+    return semihosting.chardev;
+}
+
+void qemu_semihosting_enable(void)
+{
+    semihosting.enabled = true;
+    semihosting.target = SEMIHOSTING_TARGET_AUTO;
+}
+
+int qemu_semihosting_config_options(const char *optarg)
+{
+    QemuOptsList *opt_list = qemu_find_opts("semihosting-config");
+    QemuOpts *opts = qemu_opts_parse_noisily(opt_list, optarg, false);
+
+    semihosting.enabled = true;
+
+    if (opts != NULL) {
+        semihosting.enabled = qemu_opt_get_bool(opts, "enable",
+                                                true);
+        const char *target = qemu_opt_get(opts, "target");
+        /* setup of chardev is deferred until they are initialised */
+        semihost_chardev = qemu_opt_get(opts, "chardev");
+        if (target != NULL) {
+            if (strcmp("native", target) == 0) {
+                semihosting.target = SEMIHOSTING_TARGET_NATIVE;
+            } else if (strcmp("gdb", target) == 0) {
+                semihosting.target = SEMIHOSTING_TARGET_GDB;
+            } else  if (strcmp("auto", target) == 0) {
+                semihosting.target = SEMIHOSTING_TARGET_AUTO;
+            } else {
+                error_report("unsupported semihosting-config %s",
+                             optarg);
+                return 1;
+            }
+        } else {
+            semihosting.target = SEMIHOSTING_TARGET_AUTO;
+        }
+        /* Set semihosting argument count and vector */
+        qemu_opt_foreach(opts, add_semihosting_arg,
+                         &semihosting, NULL);
+    } else {
+        error_report("unsupported semihosting-config %s", optarg);
+        return 1;
+    }
+
+    return 0;
+}
+
+void qemu_semihosting_connect_chardevs(void)
+{
+    /* We had to defer this until chardevs were created */
+    if (semihost_chardev) {
+        Chardev *chr = qemu_chr_find(semihost_chardev);
+        if (chr == NULL) {
+            error_report("semihosting chardev '%s' not found",
+                         semihost_chardev);
+            exit(1);
+        }
+        semihosting.chardev = chr;
+    }
+}
diff --git a/hw/semihosting/console.c b/hw/semihosting/console.c
new file mode 100644
index 0000000000..466ea6dade
--- /dev/null
+++ b/hw/semihosting/console.c
@@ -0,0 +1,84 @@
+/*
+ * Semihosting Console Support
+ *
+ * Copyright (c) 2015 Imagination Technologies
+ * Copyright (c) 2019 Linaro Ltd
+ *
+ * This provides support for outputting to a semihosting console.
+ *
+ * While most semihosting implementations support reading and writing
+ * to arbitrary file descriptors we treat the console as something
+ * specifically for debugging interaction. This means messages can be
+ * re-directed to gdb (if currently being used to debug) or even
+ * re-directed elsewhere.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/semihosting/semihost.h"
+#include "hw/semihosting/console.h"
+#include "exec/gdbstub.h"
+#include "qemu/log.h"
+#include "chardev/char.h"
+
+int qemu_semihosting_log_out(const char *s, int len)
+{
+    Chardev *chardev = semihosting_get_chardev();
+    if (chardev) {
+        return qemu_chr_write_all(chardev, (uint8_t *) s, len);
+    } else {
+        return write(STDERR_FILENO, s, len);
+    }
+}
+
+/*
+ * A re-implementation of lock_user_string that we can use locally
+ * instead of relying on softmmu-semi. Hopefully we can deprecate that
+ * in time. We either copy len bytes if specified or until we find a NULL.
+ */
+static GString *copy_user_string(CPUArchState *env, target_ulong addr, int len)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    GString *s = g_string_sized_new(len ? len : 128);
+    uint8_t c;
+    bool done;
+
+    do {
+        if (cpu_memory_rw_debug(cpu, addr++, &c, 1, 0) == 0) {
+            s = g_string_append_c(s, c);
+            done = len ? s->len == len : c == 0;
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: passed inaccessible address " TARGET_FMT_lx,
+                          __func__, addr);
+            done = true;
+        }
+    } while (!done);
+
+    return s;
+}
+
+static void semihosting_cb(CPUState *cs, target_ulong ret, target_ulong err)
+{
+    if (ret == (target_ulong) -1) {
+        qemu_log("%s: gdb console output failed ("TARGET_FMT_ld")",
+                 __func__, err);
+    }
+}
+
+int qemu_semihosting_console_out(CPUArchState *env, target_ulong addr, int len)
+{
+    GString *s = copy_user_string(env, addr, len);
+    int out = s->len;
+
+    if (use_gdb_syscalls()) {
+        gdb_do_syscall(semihosting_cb, "write,2,%x,%x", addr, s->len);
+    } else {
+        out = qemu_semihosting_log_out(s->str, s->len);
+    }
+
+    g_string_free(s, true);
+    return out;
+}
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index cd5551d94f..0e4e93ef16 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -616,7 +616,7 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp)
      * The hack is probably a bad idea.
      */
     blk_ref(blk);
-    blk_detach_dev(blk, &s->dev.qdev);
+    blk_detach_dev(blk, DEVICE(s));
     s->conf.blk = NULL;
 
     usb_desc_create_serial(dev);
diff --git a/hw/vfio/display.c b/hw/vfio/display.c
index 2c2d3e5b71..a5a608c5b2 100644
--- a/hw/vfio/display.c
+++ b/hw/vfio/display.c
@@ -248,6 +248,7 @@ static VFIODMABuf *vfio_display_get_dmabuf(VFIOPCIDevice *vdev,
     dmabuf->buf.height = plane.height;
     dmabuf->buf.stride = plane.stride;
     dmabuf->buf.fourcc = plane.drm_format;
+    dmabuf->buf.modifier = plane.drm_format_mod;
     dmabuf->buf.fd     = fd;
     if (plane_type == DRM_PLANE_TYPE_CURSOR) {
         vfio_display_update_cursor(dmabuf, &plane);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8e555db12e..2a4091d216 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2717,7 +2717,7 @@ static void vfio_req_notifier_handler(void *opaque)
         return;
     }
 
-    qdev_unplug(&vdev->pdev.qdev, &err);
+    qdev_unplug(DEVICE(vdev), &err);
     if (err) {
         warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
     }
@@ -2839,7 +2839,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
     vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev);
     vdev->vbasedev.ops = &vfio_pci_ops;
     vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
-    vdev->vbasedev.dev = &vdev->pdev.qdev;
+    vdev->vbasedev.dev = DEVICE(vdev);
 
     tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
     len = readlink(tmp, group_path, sizeof(group_path));
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 553319c7ac..4ca5b2551e 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -96,6 +96,7 @@ typedef enum VhostUserRequest {
     VHOST_USER_POSTCOPY_END     = 30,
     VHOST_USER_GET_INFLIGHT_FD = 31,
     VHOST_USER_SET_INFLIGHT_FD = 32,
+    VHOST_USER_GPU_SET_SOCKET = 33,
     VHOST_USER_MAX
 } VhostUserRequest;
 
@@ -353,6 +354,16 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
     return 0;
 }
 
+int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
+{
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_GPU_SET_SOCKET,
+        .hdr.flags = VHOST_USER_VERSION,
+    };
+
+    return vhost_user_write(dev, &msg, &fd, 1);
+}
+
 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
                                    struct vhost_log *log)
 {
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 7f61018f2a..60747a6f93 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1081,7 +1081,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
 
     r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
     if (r < 0) {
-        VHOST_OPS_DEBUG("vhost VQ %d ring restore failed: %d", idx, r);
+        VHOST_OPS_DEBUG("vhost VQ %u ring restore failed: %d", idx, r);
         /* Connection to the backend is broken, so let's sync internal
          * last avail idx to the device used idx.
          */
@@ -1650,7 +1650,6 @@ fail_vq:
                              hdev->vqs + i,
                              hdev->vq_index + i);
     }
-    i = hdev->nvqs;
 
 fail_mem:
 fail_features:
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 4805727b53..07f4a64b48 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2312,9 +2312,8 @@ void virtio_instance_init_common(Object *proxy_obj, void *data,
 {
     DeviceState *vdev = data;
 
-    object_initialize(vdev, vdev_size, vdev_name);
-    object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
-    object_unref(OBJECT(vdev));
+    object_initialize_child(proxy_obj, "virtio-backend", vdev, vdev_size,
+                            vdev_name, &error_abort, NULL);
     qdev_alias_all_properties(vdev, proxy_obj);
 }
 
diff --git a/hw/watchdog/wdt_i6300esb.c b/hw/watchdog/wdt_i6300esb.c
index 1c6eddf86a..f2d1e86526 100644
--- a/hw/watchdog/wdt_i6300esb.c
+++ b/hw/watchdog/wdt_i6300esb.c
@@ -200,7 +200,7 @@ static void i6300esb_timer_expired(void *vp)
         if (d->reboot_enabled) {
             d->previous_reboot_flag = 1;
             watchdog_perform_action(); /* This reboots, exits, etc */
-            i6300esb_reset(&d->dev.qdev);
+            i6300esb_reset(DEVICE(d));
         }
 
         /* In "free running mode" we start stage 1 again. */
diff --git a/include/block/block.h b/include/block/block.h
index 9b083e2bca..f9415ed740 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -156,10 +156,15 @@ typedef struct HDGeometry {
  * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
  *                 layer, set by block layer
  *
- * Internal flag:
+ * Internal flags:
  * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
  *                 that the block layer recompute the answer from the returned
  *                 BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
+ * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
+ *                     zeroes in file child of current block node inside
+ *                     returned region. Only valid together with both
+ *                     BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
+ *                     appear with BDRV_BLOCK_ZERO.
  *
  * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
  * host offset within the returned BDS that is allocated for the
@@ -184,6 +189,7 @@ typedef struct HDGeometry {
 #define BDRV_BLOCK_RAW          0x08
 #define BDRV_BLOCK_ALLOCATED    0x10
 #define BDRV_BLOCK_EOF          0x20
+#define BDRV_BLOCK_RECURSE      0x40
 #define BDRV_BLOCK_OFFSET_MASK  BDRV_SECTOR_MASK
 
 typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
@@ -428,8 +434,8 @@ void bdrv_drain_all(void);
     AIO_WAIT_WHILE(bdrv_get_aio_context(bs_),              \
                    cond); })
 
-int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes);
-int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes);
+int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
 int bdrv_has_zero_init_1(BlockDriverState *bs);
 int bdrv_has_zero_init(BlockDriverState *bs);
 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
@@ -577,15 +583,6 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs);
  */
 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
 
-/**
- * bdrv_set_aio_context:
- *
- * Changes the #AioContext used for fd handlers, timers, and BHs by this
- * BlockDriverState and all its children.
- *
- * This function must be called with iothread lock held.
- */
-void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context);
 void bdrv_set_aio_context_ignore(BlockDriverState *bs,
                                  AioContext *new_context, GSList **ignore);
 int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1eebc7c8f3..06df2bda1b 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1163,6 +1163,7 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr);
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildRole *child_role,
+                                  AioContext *ctx,
                                   uint64_t perm, uint64_t shared_perm,
                                   void *opaque, Error **errp);
 void bdrv_root_unref_child(BdrvChild *child);
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 849a6f3fa3..3ec8efcc43 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -581,6 +581,7 @@ enum NvmeIdCtrlOncs {
     NVME_ONCS_WRITE_ZEROS   = 1 << 3,
     NVME_ONCS_FEATURES      = 1 << 4,
     NVME_ONCS_RESRVATIONS   = 1 << 5,
+    NVME_ONCS_TIMESTAMP     = 1 << 6,
 };
 
 #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
@@ -622,6 +623,7 @@ enum NvmeFeatureIds {
     NVME_INTERRUPT_VECTOR_CONF      = 0x9,
     NVME_WRITE_ATOMICITY            = 0xa,
     NVME_ASYNCHRONOUS_EVENT_CONF    = 0xb,
+    NVME_TIMESTAMP                  = 0xe,
     NVME_SOFTWARE_PROGRESS_MARKER   = 0x80
 };
 
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
index ba223dd1f1..0cb7cc74a2 100644
--- a/include/block/raw-aio.h
+++ b/include/block/raw-aio.h
@@ -50,9 +50,6 @@ LinuxAioState *laio_init(Error **errp);
 void laio_cleanup(LinuxAioState *s);
 int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
                                 uint64_t offset, QEMUIOVector *qiov, int type);
-BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type);
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
 void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
diff --git a/include/elf.h b/include/elf.h
index ea7708a4ea..3501e0c8d0 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -598,6 +598,7 @@ typedef struct {
 #define HWCAP_S390_ETF3EH       256
 #define HWCAP_S390_HIGH_GPRS    512
 #define HWCAP_S390_TE           1024
+#define HWCAP_S390_VXRS         2048
 
 /* M68K specific definitions. */
 /* We use the top 24 bits to encode information about the
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9144a47f57..e6140e8a04 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1255,23 +1255,6 @@ void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize,
 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client);
 
 /**
- * memory_region_get_dirty: Check whether a range of bytes is dirty
- *                          for a specified client.
- *
- * Checks whether a range of bytes has been written to since the last
- * call to memory_region_reset_dirty() with the same @client.  Dirty logging
- * must be enabled.
- *
- * @mr: the memory region being queried.
- * @addr: the address (relative to the start of the region) being queried.
- * @size: the size of the range being queried.
- * @client: the user of the logging information; %DIRTY_MEMORY_MIGRATION or
- *          %DIRTY_MEMORY_VGA.
- */
-bool memory_region_get_dirty(MemoryRegion *mr, hwaddr addr,
-                             hwaddr size, unsigned client);
-
-/**
  * memory_region_set_dirty: Mark a range of bytes as dirty in a memory region.
  *
  * Marks a range of bytes as dirty, after it has been dirtied outside
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index f9aa4bd398..57a3f58b0c 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -449,24 +449,6 @@ struct AcpiSratProcessorGiccAffinity {
 
 typedef struct AcpiSratProcessorGiccAffinity AcpiSratProcessorGiccAffinity;
 
-/* PCI fw r3.0 MCFG table. */
-/* Subtable */
-struct AcpiMcfgAllocation {
-    uint64_t address;                /* Base address, processor-relative */
-    uint16_t pci_segment;            /* PCI segment group number */
-    uint8_t start_bus_number;       /* Starting PCI Bus number */
-    uint8_t end_bus_number;         /* Final PCI Bus number */
-    uint32_t reserved;
-} QEMU_PACKED;
-typedef struct AcpiMcfgAllocation AcpiMcfgAllocation;
-
-struct AcpiTableMcfg {
-    ACPI_TABLE_HEADER_DEF;
-    uint8_t reserved[8];
-    AcpiMcfgAllocation allocation[0];
-} QEMU_PACKED;
-typedef struct AcpiTableMcfg AcpiTableMcfg;
-
 /*
  * TCPA Description Table
  *
diff --git a/include/hw/acpi/pci.h b/include/hw/acpi/pci.h
index 124af7d32a..8bbd32cf45 100644
--- a/include/hw/acpi/pci.h
+++ b/include/hw/acpi/pci.h
@@ -30,4 +30,5 @@ typedef struct AcpiMcfgInfo {
     uint32_t size;
 } AcpiMcfgInfo;
 
+void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info);
 #endif
diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h
index f5b193f670..e79c21771f 100644
--- a/include/hw/arm/bcm2835_peripherals.h
+++ b/include/hw/arm/bcm2835_peripherals.h
@@ -13,6 +13,7 @@
 
 #include "qemu-common.h"
 #include "hw/sysbus.h"
+#include "hw/char/pl011.h"
 #include "hw/char/bcm2835_aux.h"
 #include "hw/display/bcm2835_fb.h"
 #include "hw/dma/bcm2835_dma.h"
@@ -37,7 +38,7 @@ typedef struct BCM2835PeripheralState {
     MemoryRegion ram_alias[4];
     qemu_irq irq, fiq;
 
-    SysBusDevice *uart0;
+    PL011State uart0;
     BCM2835AuxState aux;
     BCM2835FBState fb;
     BCM2835DMAState dma;
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index d06f25aa0f..607539057a 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -45,8 +45,7 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
     return exp;
 }
 
-#define DEFINE_BLOCK_PROPERTIES(_state, _conf)                          \
-    DEFINE_PROP_DRIVE("drive", _state, _conf.blk),                      \
+#define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf)                     \
     DEFINE_PROP_BLOCKSIZE("logical_block_size", _state,                 \
                           _conf.logical_block_size),                    \
     DEFINE_PROP_BLOCKSIZE("physical_block_size", _state,                \
@@ -59,6 +58,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
                             ON_OFF_AUTO_AUTO), \
     DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false)
 
+#define DEFINE_BLOCK_PROPERTIES(_state, _conf)                          \
+    DEFINE_PROP_DRIVE("drive", _state, _conf.blk),                      \
+    DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf)
+
 #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)      \
     DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
     DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0), \
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 6f7916f88f..6ff02bf3e4 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -292,6 +292,9 @@ struct MachineState {
     } \
     type_init(machine_initfn##_register_types)
 
+extern GlobalProperty hw_compat_4_0_1[];
+extern const size_t hw_compat_4_0_1_len;
+
 extern GlobalProperty hw_compat_4_0[];
 extern const size_t hw_compat_4_0_len;
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 43df7230a2..5d5636241e 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -293,6 +293,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
 int e820_get_num_entries(void);
 bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
+extern GlobalProperty pc_compat_4_0_1[];
+extern const size_t pc_compat_4_0_1_len;
+
 extern GlobalProperty pc_compat_4_0[];
 extern const size_t pc_compat_4_0_len;
 
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index edf44de21d..d082707dfa 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -234,7 +234,7 @@ typedef struct PCIDeviceClass {
      * This doesn't mean pci host switch.
      * When card bus bridge is supported, this would be enhanced.
      */
-    int is_bridge;
+    bool is_bridge;
 
     /* rom bar */
     const char *romfile;
@@ -395,7 +395,6 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
 #define TYPE_PCIE_BUS "PCIE"
 
 bool pci_bus_is_express(PCIBus *bus);
-bool pci_bus_allows_extended_config_space(PCIBus *bus);
 
 void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
                               const char *name,
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index aea98d5040..0714f578af 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -17,12 +17,13 @@ typedef struct PCIBusClass {
 
     int (*bus_num)(PCIBus *bus);
     uint16_t (*numa_node)(PCIBus *bus);
-    bool (*allows_extended_config_space)(PCIBus *bus);
 } PCIBusClass;
 
 enum PCIBusFlags {
     /* This bus is the root of a PCI domain */
     PCI_BUS_IS_ROOT                                         = 0x0001,
+    /* PCIe extended configuration space is accessible on this bus */
+    PCI_BUS_EXTENDED_CONFIG_SPACE                           = 0x0002,
 };
 
 struct PCIBus {
@@ -57,4 +58,9 @@ static inline bool pci_bus_is_root(PCIBus *bus)
     return !!(bus->flags & PCI_BUS_IS_ROOT);
 }
 
+static inline bool pci_bus_allows_extended_config_space(PCIBus *bus)
+{
+    return !!(bus->flags & PCI_BUS_EXTENDED_CONFIG_SPACE);
+}
+
 #endif /* QEMU_PCI_BUS_H */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7e32f309c2..4f5becf1f3 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -119,6 +119,7 @@ struct SpaprMachineClass {
     bool pre_2_10_has_unused_icps;
     bool legacy_irq_allocation;
     bool broken_host_serial_model; /* present real host info to the guest */
+    bool pre_4_1_migration; /* don't migrate hpt-max-page-size */
 
     void (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
                           uint64_t *buid, hwaddr *pio, 
@@ -849,6 +850,7 @@ extern const VMStateDescription vmstate_spapr_cap_dfp;
 extern const VMStateDescription vmstate_spapr_cap_cfpc;
 extern const VMStateDescription vmstate_spapr_cap_sbbc;
 extern const VMStateDescription vmstate_spapr_cap_ibs;
+extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
index b855f74e44..14cab73c9c 100644
--- a/include/hw/ppc/spapr_irq.h
+++ b/include/hw/ppc/spapr_irq.h
@@ -48,6 +48,8 @@ typedef struct SpaprIrq {
     void (*reset)(SpaprMachineState *spapr, Error **errp);
     void (*set_irq)(void *opaque, int srcno, int val);
     const char *(*get_nodename)(SpaprMachineState *spapr);
+    void (*init_emu)(SpaprMachineState *spapr, Error **errp);
+    void (*init_kvm)(SpaprMachineState *spapr, Error **errp);
 } SpaprIrq;
 
 extern SpaprIrq spapr_irq_xics;
diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index fc3e9652f9..b26befcf6b 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -38,16 +38,55 @@ typedef struct SpaprXive {
     /* TIMA mapping address */
     hwaddr        tm_base;
     MemoryRegion  tm_mmio;
+
+    /* KVM support */
+    int           fd;
+    void          *tm_mmap;
+    VMChangeStateEntry *change;
 } SpaprXive;
 
+/*
+ * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
+ * to the controller block id value. It can nevertheless be changed
+ * for testing purpose.
+ */
+#define SPAPR_XIVE_BLOCK_ID 0x0
+
 bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
 bool spapr_xive_irq_free(SpaprXive *xive, uint32_t lisn);
 void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon);
+int spapr_xive_post_load(SpaprXive *xive, int version_id);
 
 void spapr_xive_hcall_init(SpaprMachineState *spapr);
 void spapr_dt_xive(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt,
                    uint32_t phandle);
 void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx);
 void spapr_xive_mmio_set_enabled(SpaprXive *xive, bool enable);
+void spapr_xive_map_mmio(SpaprXive *xive);
+
+int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
+                             uint32_t *out_server, uint8_t *out_prio);
+void spapr_xive_init(SpaprXive *xive, Error **errp);
+
+/*
+ * KVM XIVE device helpers
+ */
+void kvmppc_xive_connect(SpaprXive *xive, Error **errp);
+void kvmppc_xive_disconnect(SpaprXive *xive, Error **errp);
+void kvmppc_xive_reset(SpaprXive *xive, Error **errp);
+void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
+                                   Error **errp);
+void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp);
+uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+                            uint64_t data, bool write);
+void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                 uint32_t end_idx, XiveEND *end,
+                                 Error **errp);
+void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                 uint32_t end_idx, XiveEND *end,
+                                 Error **errp);
+void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp);
+int kvmppc_xive_pre_save(SpaprXive *xive);
+int kvmppc_xive_post_load(SpaprXive *xive, int version_id);
 
 #endif /* PPC_SPAPR_XIVE_H */
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index eb65ad7e43..d6f8e4c4c2 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -119,6 +119,7 @@ struct ICSState {
     uint32_t offset;
     ICSIRQState *irqs;
     XICSFabric *xics;
+    bool init; /* sPAPR ICS device initialized */
 };
 
 #define ICS_PROP_XICS "xics"
diff --git a/include/hw/ppc/xics_spapr.h b/include/hw/ppc/xics_spapr.h
index 15a8dcff66..2476b540ed 100644
--- a/include/hw/ppc/xics_spapr.h
+++ b/include/hw/ppc/xics_spapr.h
@@ -34,6 +34,7 @@
 void spapr_dt_xics(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt,
                    uint32_t phandle);
 int xics_kvm_init(SpaprMachineState *spapr, Error **errp);
+void xics_kvm_disconnect(SpaprMachineState *spapr, Error **errp);
 void xics_spapr_init(SpaprMachineState *spapr);
 
 #endif /* XICS_SPAPR_H */
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index c4f27742ca..d872f96d1a 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -140,6 +140,7 @@
 #ifndef PPC_XIVE_H
 #define PPC_XIVE_H
 
+#include "sysemu/kvm.h"
 #include "hw/qdev-core.h"
 #include "hw/sysbus.h"
 #include "hw/ppc/xive_regs.h"
@@ -194,6 +195,9 @@ typedef struct XiveSource {
     uint32_t        esb_shift;
     MemoryRegion    esb_mmio;
 
+    /* KVM support */
+    void            *esb_mmap;
+
     XiveNotifier    *xive;
 } XiveSource;
 
@@ -423,4 +427,14 @@ static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx)
     return (nvt_blk << 19) | nvt_idx;
 }
 
+/*
+ * KVM XIVE device helpers
+ */
+
+void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
+void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
+void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
+void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
+void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp);
+
 #endif /* PPC_XIVE_H */
diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
index bf36678a24..1a8c5b5e64 100644
--- a/include/hw/ppc/xive_regs.h
+++ b/include/hw/ppc/xive_regs.h
@@ -208,6 +208,12 @@ typedef struct XiveEND {
 #define xive_end_is_backlog(end)  (be32_to_cpu((end)->w0) & END_W0_BACKLOG)
 #define xive_end_is_escalate(end) (be32_to_cpu((end)->w0) & END_W0_ESCALATE_CTL)
 
+static inline uint64_t xive_end_qaddr(XiveEND *end)
+{
+    return ((uint64_t) be32_to_cpu(end->w2) & 0x0fffffff) << 32 |
+        be32_to_cpu(end->w3);
+}
+
 /* Notification Virtual Target (NVT) */
 typedef struct XiveNVT {
         uint32_t        w0;
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index b6758c852e..1eae5ab056 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -28,6 +28,7 @@ extern const PropertyInfo qdev_prop_blockdev_on_error;
 extern const PropertyInfo qdev_prop_bios_chs_trans;
 extern const PropertyInfo qdev_prop_fdc_drive_type;
 extern const PropertyInfo qdev_prop_drive;
+extern const PropertyInfo qdev_prop_drive_iothread;
 extern const PropertyInfo qdev_prop_netdev;
 extern const PropertyInfo qdev_prop_pci_devfn;
 extern const PropertyInfo qdev_prop_blocksize;
@@ -198,6 +199,8 @@ extern const PropertyInfo qdev_prop_pcie_link_width;
     DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, NICPeers)
 #define DEFINE_PROP_DRIVE(_n, _s, _f) \
     DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockBackend *)
+#define DEFINE_PROP_DRIVE_IOTHREAD(_n, _s, _f) \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_drive_iothread, BlockBackend *)
 #define DEFINE_PROP_MACADDR(_n, _s, _f)         \
     DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr)
 #define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \
diff --git a/include/hw/riscv/sifive_e.h b/include/hw/riscv/sifive_e.h
index f715f8606f..3b14eb7462 100644
--- a/include/hw/riscv/sifive_e.h
+++ b/include/hw/riscv/sifive_e.h
@@ -19,6 +19,8 @@
 #ifndef HW_SIFIVE_E_H
 #define HW_SIFIVE_E_H
 
+#include "hw/riscv/sifive_gpio.h"
+
 #define TYPE_RISCV_E_SOC "riscv.sifive.e.soc"
 #define RISCV_E_SOC(obj) \
     OBJECT_CHECK(SiFiveESoCState, (obj), TYPE_RISCV_E_SOC)
@@ -30,6 +32,7 @@ typedef struct SiFiveESoCState {
     /*< public >*/
     RISCVHartArrayState cpus;
     DeviceState *plic;
+    SIFIVEGPIOState gpio;
 } SiFiveESoCState;
 
 typedef struct SiFiveEState {
@@ -63,8 +66,9 @@ enum {
 };
 
 enum {
-    SIFIVE_E_UART0_IRQ = 3,
-    SIFIVE_E_UART1_IRQ = 4
+    SIFIVE_E_UART0_IRQ  = 3,
+    SIFIVE_E_UART1_IRQ  = 4,
+    SIFIVE_E_GPIO0_IRQ0 = 8
 };
 
 #define SIFIVE_E_PLIC_HART_CONFIG "M"
diff --git a/include/hw/riscv/sifive_gpio.h b/include/hw/riscv/sifive_gpio.h
new file mode 100644
index 0000000000..fce03d6c41
--- /dev/null
+++ b/include/hw/riscv/sifive_gpio.h
@@ -0,0 +1,72 @@
+/*
+ * sifive System-on-Chip general purpose input/output register definition
+ *
+ * Copyright 2019 AdaCore
+ *
+ * Base on nrf51_gpio.c:
+ *
+ * Copyright 2018 Steffen Görtz <contrib@steffen-goertz.de>
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+#ifndef SIFIVE_GPIO_H
+#define SIFIVE_GPIO_H
+
+#include "hw/sysbus.h"
+#define TYPE_SIFIVE_GPIO "sifive_soc.gpio"
+#define SIFIVE_GPIO(obj) OBJECT_CHECK(SIFIVEGPIOState, (obj), TYPE_SIFIVE_GPIO)
+
+#define SIFIVE_GPIO_PINS 32
+
+#define SIFIVE_GPIO_SIZE 0x100
+
+#define SIFIVE_GPIO_REG_VALUE      0x000
+#define SIFIVE_GPIO_REG_INPUT_EN   0x004
+#define SIFIVE_GPIO_REG_OUTPUT_EN  0x008
+#define SIFIVE_GPIO_REG_PORT       0x00C
+#define SIFIVE_GPIO_REG_PUE        0x010
+#define SIFIVE_GPIO_REG_DS         0x014
+#define SIFIVE_GPIO_REG_RISE_IE    0x018
+#define SIFIVE_GPIO_REG_RISE_IP    0x01C
+#define SIFIVE_GPIO_REG_FALL_IE    0x020
+#define SIFIVE_GPIO_REG_FALL_IP    0x024
+#define SIFIVE_GPIO_REG_HIGH_IE    0x028
+#define SIFIVE_GPIO_REG_HIGH_IP    0x02C
+#define SIFIVE_GPIO_REG_LOW_IE     0x030
+#define SIFIVE_GPIO_REG_LOW_IP     0x034
+#define SIFIVE_GPIO_REG_IOF_EN     0x038
+#define SIFIVE_GPIO_REG_IOF_SEL    0x03C
+#define SIFIVE_GPIO_REG_OUT_XOR    0x040
+
+typedef struct SIFIVEGPIOState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+
+    qemu_irq irq[SIFIVE_GPIO_PINS];
+    qemu_irq output[SIFIVE_GPIO_PINS];
+
+    uint32_t value;             /* Actual value of the pin */
+    uint32_t input_en;
+    uint32_t output_en;
+    uint32_t port;              /* Pin value requested by the user */
+    uint32_t pue;
+    uint32_t ds;
+    uint32_t rise_ie;
+    uint32_t rise_ip;
+    uint32_t fall_ie;
+    uint32_t fall_ip;
+    uint32_t high_ie;
+    uint32_t high_ip;
+    uint32_t low_ie;
+    uint32_t low_ip;
+    uint32_t iof_en;
+    uint32_t iof_sel;
+    uint32_t out_xor;
+    uint32_t in;
+    uint32_t in_mask;
+
+} SIFIVEGPIOState;
+
+#endif
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index 568764b570..d01a1a85c4 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -74,9 +74,9 @@ enum {
                                FDT_PLIC_ADDR_CELLS + FDT_PLIC_INT_CELLS)
 
 #if defined(TARGET_RISCV32)
-#define VIRT_CPU TYPE_RISCV_CPU_RV32GCSU_V1_10_0
+#define VIRT_CPU TYPE_RISCV_CPU_BASE32
 #elif defined(TARGET_RISCV64)
-#define VIRT_CPU TYPE_RISCV_CPU_RV64GCSU_V1_10_0
+#define VIRT_CPU TYPE_RISCV_CPU_BASE64
 #endif
 
 #endif
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index acef25faa4..426566a5c6 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -88,6 +88,7 @@ struct SCSIDevice
     int scsi_version;
     int default_scsi_version;
     bool needs_vpd_bl_emulation;
+    bool hba_supports_iothread;
 };
 
 extern const VMStateDescription vmstate_scsi_device;
diff --git a/include/hw/semihosting/console.h b/include/hw/semihosting/console.h
new file mode 100644
index 0000000000..30e66ae20a
--- /dev/null
+++ b/include/hw/semihosting/console.h
@@ -0,0 +1,38 @@
+/*
+ * Semihosting Console
+ *
+ * Copyright (c) 2019 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _SEMIHOST_CONSOLE_H_
+#define _SEMIHOST_CONSOLE_H_
+
+/**
+ * qemu_semihosting_console_out:
+ * @env: CPUArchState
+ * @s: host address of guest string
+ * @len: length of string or 0 (string is null terminated)
+ *
+ * Send a guest string to the debug console. This may be the remote
+ * gdb session if a softmmu guest is currently being debugged.
+ *
+ * Returns: number of bytes written.
+ */
+int qemu_semihosting_console_out(CPUArchState *env, target_ulong s, int len);
+
+/**
+ * qemu_semihosting_log_out:
+ * @s: pointer to string
+ * @len: length of string
+ *
+ * Send a string to the debug output. Unlike console_out these strings
+ * can't be sent to a remote gdb instance as they don't exist in guest
+ * memory.
+ *
+ * Returns: number of bytes written
+ */
+int qemu_semihosting_log_out(const char *s, int len);
+
+#endif /* _SEMIHOST_CONSOLE_H_ */
diff --git a/include/exec/semihost.h b/include/hw/semihosting/semihost.h
index 5980939c7b..60fc42d851 100644
--- a/include/exec/semihost.h
+++ b/include/hw/semihosting/semihost.h
@@ -51,12 +51,23 @@ static inline const char *semihosting_get_cmdline(void)
 {
     return NULL;
 }
-#else
+
+static inline Chardev *semihosting_get_chardev(void)
+{
+    return NULL;
+}
+#else /* !CONFIG_USER_ONLY */
 bool semihosting_enabled(void);
 SemihostingTarget semihosting_get_target(void);
 const char *semihosting_get_arg(int i);
 int semihosting_get_argc(void);
 const char *semihosting_get_cmdline(void);
-#endif
+void semihosting_arg_fallback(const char *file, const char *cmd);
+Chardev *semihosting_get_chardev(void);
+/* for vl.c hooks */
+void qemu_semihosting_enable(void);
+int qemu_semihosting_config_options(const char *opt);
+void qemu_semihosting_connect_chardevs(void);
+#endif /* CONFIG_USER_ONLY */
 
-#endif
+#endif /* SEMIHOST_H */
diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h
index 1aedcf05c9..4c668fbbdc 100644
--- a/include/hw/sysbus.h
+++ b/include/hw/sysbus.h
@@ -89,6 +89,7 @@ qemu_irq sysbus_get_connected_irq(SysBusDevice *dev, int n);
 void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr);
 void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
                              int priority);
+void sysbus_mmio_unmap(SysBusDevice *dev, int n);
 void sysbus_add_io(SysBusDevice *dev, hwaddr addr,
                    MemoryRegion *mem);
 MemoryRegion *sysbus_address_space(SysBusDevice *dev);
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index d6632a18e6..6f6670783f 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -170,4 +170,6 @@ int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev,
 int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
                                           struct vhost_iotlb_msg *imsg);
 
+int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd);
+
 #endif /* VHOST_BACKEND_H */
diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h
index 57fb1d87b5..4eab767ee8 100644
--- a/include/hw/virtio/vhost-scsi-common.h
+++ b/include/hw/virtio/vhost-scsi-common.h
@@ -36,6 +36,7 @@ typedef struct VHostSCSICommon {
     int target;
     int lun;
     uint64_t host_features;
+    bool migratable;
 } VHostSCSICommon;
 
 int vhost_scsi_common_start(VHostSCSICommon *vsc);
diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h
new file mode 100644
index 0000000000..38d12160f6
--- /dev/null
+++ b/include/hw/virtio/virtio-gpu-bswap.h
@@ -0,0 +1,61 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_GPU_BSWAP_H
+#define HW_VIRTIO_GPU_BSWAP_H
+
+#include "qemu/bswap.h"
+
+static inline void
+virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr)
+{
+    le32_to_cpus(&hdr->type);
+    le32_to_cpus(&hdr->flags);
+    le64_to_cpus(&hdr->fence_id);
+    le32_to_cpus(&hdr->ctx_id);
+    le32_to_cpus(&hdr->padding);
+}
+
+static inline void
+virtio_gpu_bswap_32(void *ptr, size_t size)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+
+    size_t i;
+    struct virtio_gpu_ctrl_hdr *hdr = (struct virtio_gpu_ctrl_hdr *) ptr;
+
+    virtio_gpu_ctrl_hdr_bswap(hdr);
+
+    i = sizeof(struct virtio_gpu_ctrl_hdr);
+    while (i < size) {
+        le32_to_cpus((uint32_t *)(ptr + i));
+        i = i + sizeof(uint32_t);
+    }
+
+#endif
+}
+
+static inline void
+virtio_gpu_t2d_bswap(struct virtio_gpu_transfer_to_host_2d *t2d)
+{
+    virtio_gpu_ctrl_hdr_bswap(&t2d->hdr);
+    le32_to_cpus(&t2d->r.x);
+    le32_to_cpus(&t2d->r.y);
+    le32_to_cpus(&t2d->r.width);
+    le32_to_cpus(&t2d->r.height);
+    le64_to_cpus(&t2d->offset);
+    le32_to_cpus(&t2d->resource_id);
+    le32_to_cpus(&t2d->padding);
+}
+
+#endif
diff --git a/include/hw/virtio/virtio-gpu-pci.h b/include/hw/virtio/virtio-gpu-pci.h
new file mode 100644
index 0000000000..2f69b5a9cc
--- /dev/null
+++ b/include/hw/virtio/virtio-gpu-pci.h
@@ -0,0 +1,40 @@
+/*
+ * Virtio GPU PCI Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_GPU_PCI_H
+#define HW_VIRTIO_GPU_PCI_H
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-gpu.h"
+
+typedef struct VirtIOGPUPCIBase VirtIOGPUPCIBase;
+
+/*
+ * virtio-gpu-pci-base: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_GPU_PCI_BASE "virtio-gpu-pci-base"
+#define VIRTIO_GPU_PCI_BASE(obj)                                    \
+    OBJECT_CHECK(VirtIOGPUPCIBase, (obj), TYPE_VIRTIO_GPU_PCI_BASE)
+
+struct VirtIOGPUPCIBase {
+    VirtIOPCIProxy parent_obj;
+    VirtIOGPUBase *vgpu;
+};
+
+/* to share between PCI and VGA */
+#define DEFINE_VIRTIO_GPU_PCI_PROPERTIES(_state)                \
+    DEFINE_PROP_BIT("ioeventfd", _state, flags,                 \
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false),  \
+        DEFINE_PROP_UINT32("vectors", _state, nvectors, 3)
+
+#endif /* HW_VIRTIO_GPU_PCI_H */
diff --git a/include/hw/virtio/virtio-gpu-pixman.h b/include/hw/virtio/virtio-gpu-pixman.h
new file mode 100644
index 0000000000..4dba782758
--- /dev/null
+++ b/include/hw/virtio/virtio-gpu-pixman.h
@@ -0,0 +1,45 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_GPU_PIXMAN_H
+#define HW_VIRTIO_GPU_PIXMAN_H
+
+#include "ui/qemu-pixman.h"
+#include "standard-headers/linux/virtio_gpu.h"
+
+static inline pixman_format_code_t
+virtio_gpu_get_pixman_format(uint32_t virtio_gpu_format)
+{
+    switch (virtio_gpu_format) {
+    case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
+        return PIXMAN_BE_b8g8r8x8;
+    case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
+        return PIXMAN_BE_b8g8r8a8;
+    case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
+        return PIXMAN_BE_x8r8g8b8;
+    case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
+        return PIXMAN_BE_a8r8g8b8;
+    case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
+        return PIXMAN_BE_r8g8b8x8;
+    case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
+        return PIXMAN_BE_r8g8b8a8;
+    case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
+        return PIXMAN_BE_x8b8g8r8;
+    case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
+        return PIXMAN_BE_a8b8g8r8;
+    default:
+        return 0;
+    }
+}
+
+#endif
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 60425c5d58..8ecac1987a 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -19,13 +19,24 @@
 #include "ui/console.h"
 #include "hw/virtio/virtio.h"
 #include "qemu/log.h"
+#include "sysemu/vhost-user-backend.h"
 
 #include "standard-headers/linux/virtio_gpu.h"
 
+#define TYPE_VIRTIO_GPU_BASE "virtio-gpu-base"
+#define VIRTIO_GPU_BASE(obj)                                                \
+    OBJECT_CHECK(VirtIOGPUBase, (obj), TYPE_VIRTIO_GPU_BASE)
+#define VIRTIO_GPU_BASE_GET_CLASS(obj)                                      \
+    OBJECT_GET_CLASS(VirtIOGPUBaseClass, obj, TYPE_VIRTIO_GPU_BASE)
+#define VIRTIO_GPU_BASE_CLASS(klass)                                        \
+    OBJECT_CLASS_CHECK(VirtIOGPUBaseClass, klass, TYPE_VIRTIO_GPU_BASE)
+
 #define TYPE_VIRTIO_GPU "virtio-gpu-device"
 #define VIRTIO_GPU(obj)                                        \
         OBJECT_CHECK(VirtIOGPU, (obj), TYPE_VIRTIO_GPU)
 
+#define TYPE_VHOST_USER_GPU "vhost-user-gpu"
+
 #define VIRTIO_ID_GPU 16
 
 struct virtio_gpu_simple_resource {
@@ -58,7 +69,7 @@ struct virtio_gpu_requested_state {
     int x, y;
 };
 
-enum virtio_gpu_conf_flags {
+enum virtio_gpu_base_conf_flags {
     VIRTIO_GPU_FLAG_VIRGL_ENABLED = 1,
     VIRTIO_GPU_FLAG_STATS_ENABLED,
     VIRTIO_GPU_FLAG_EDID_ENABLED,
@@ -71,8 +82,7 @@ enum virtio_gpu_conf_flags {
 #define virtio_gpu_edid_enabled(_cfg) \
     (_cfg.flags & (1 << VIRTIO_GPU_FLAG_EDID_ENABLED))
 
-struct virtio_gpu_conf {
-    uint64_t max_hostmem;
+struct virtio_gpu_base_conf {
     uint32_t max_outputs;
     uint32_t flags;
     uint32_t xres;
@@ -88,31 +98,55 @@ struct virtio_gpu_ctrl_command {
     QTAILQ_ENTRY(virtio_gpu_ctrl_command) next;
 };
 
-typedef struct VirtIOGPU {
+typedef struct VirtIOGPUBase {
     VirtIODevice parent_obj;
 
-    QEMUBH *ctrl_bh;
-    QEMUBH *cursor_bh;
+    Error *migration_blocker;
+
+    struct virtio_gpu_base_conf conf;
+    struct virtio_gpu_config virtio_config;
+
+    bool use_virgl_renderer;
+    int renderer_blocked;
+    int enable;
+
+    struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS];
+
+    int enabled_output_bitmask;
+    struct virtio_gpu_requested_state req_state[VIRTIO_GPU_MAX_SCANOUTS];
+} VirtIOGPUBase;
+
+typedef struct VirtIOGPUBaseClass {
+    VirtioDeviceClass parent;
+
+    void (*gl_unblock)(VirtIOGPUBase *g);
+} VirtIOGPUBaseClass;
+
+#define VIRTIO_GPU_BASE_PROPERTIES(_state, _conf)                       \
+    DEFINE_PROP_UINT32("max_outputs", _state, _conf.max_outputs, 1),    \
+    DEFINE_PROP_BIT("edid", _state, _conf.flags, \
+                    VIRTIO_GPU_FLAG_EDID_ENABLED, false), \
+    DEFINE_PROP_UINT32("xres", _state, _conf.xres, 1024), \
+    DEFINE_PROP_UINT32("yres", _state, _conf.yres, 768)
+
+typedef struct VirtIOGPU {
+    VirtIOGPUBase parent_obj;
+
+    uint64_t conf_max_hostmem;
+
     VirtQueue *ctrl_vq;
     VirtQueue *cursor_vq;
 
-    int enable;
+    QEMUBH *ctrl_bh;
+    QEMUBH *cursor_bh;
 
     QTAILQ_HEAD(, virtio_gpu_simple_resource) reslist;
     QTAILQ_HEAD(, virtio_gpu_ctrl_command) cmdq;
     QTAILQ_HEAD(, virtio_gpu_ctrl_command) fenceq;
 
-    struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS];
-    struct virtio_gpu_requested_state req_state[VIRTIO_GPU_MAX_SCANOUTS];
-
-    struct virtio_gpu_conf conf;
     uint64_t hostmem;
-    int enabled_output_bitmask;
-    struct virtio_gpu_config virtio_config;
 
-    bool use_virgl_renderer;
     bool renderer_inited;
-    int renderer_blocked;
     bool renderer_reset;
     QEMUTimer *fence_poll;
     QEMUTimer *print_stats;
@@ -124,17 +158,19 @@ typedef struct VirtIOGPU {
         uint32_t req_3d;
         uint32_t bytes_3d;
     } stats;
-
-    Error *migration_blocker;
 } VirtIOGPU;
 
-extern const GraphicHwOps virtio_gpu_ops;
+typedef struct VhostUserGPU {
+    VirtIOGPUBase parent_obj;
+
+    VhostUserBackend *vhost;
+    int vhost_gpu_fd; /* closed by the chardev */
+    CharBackend vhost_chr;
+    QemuDmaBuf dmabuf[VIRTIO_GPU_MAX_SCANOUTS];
+    bool backend_blocked;
+} VhostUserGPU;
 
-/* to share between PCI and VGA */
-#define DEFINE_VIRTIO_GPU_PCI_PROPERTIES(_state)               \
-    DEFINE_PROP_BIT("ioeventfd", _state, flags,                \
-                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), \
-    DEFINE_PROP_UINT32("vectors", _state, nvectors, 3)
+extern const GraphicHwOps virtio_gpu_ops;
 
 #define VIRTIO_GPU_FILL_CMD(out) do {                                   \
         size_t s;                                                       \
@@ -148,6 +184,15 @@ extern const GraphicHwOps virtio_gpu_ops;
         }                                                               \
     } while (0)
 
+/* virtio-gpu-base.c */
+bool virtio_gpu_base_device_realize(DeviceState *qdev,
+                                    VirtIOHandleOutput ctrl_cb,
+                                    VirtIOHandleOutput cursor_cb,
+                                    Error **errp);
+void virtio_gpu_base_reset(VirtIOGPUBase *g);
+void virtio_gpu_base_fill_display_info(VirtIOGPUBase *g,
+                        struct virtio_gpu_resp_display_info *dpy_info);
+
 /* virtio-gpu.c */
 void virtio_gpu_ctrl_response(VirtIOGPU *g,
                               struct virtio_gpu_ctrl_command *cmd,
@@ -175,4 +220,5 @@ void virtio_gpu_virgl_fence_poll(VirtIOGPU *g);
 void virtio_gpu_virgl_reset(VirtIOGPU *g);
 int virtio_gpu_virgl_init(VirtIOGPU *g);
 int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
+
 #endif
diff --git a/include/sysemu/accel.h b/include/sysemu/accel.h
index 70e9e2f2a1..81293cdb08 100644
--- a/include/sysemu/accel.h
+++ b/include/sysemu/accel.h
@@ -36,7 +36,6 @@ typedef struct AccelClass {
     ObjectClass parent_class;
     /*< public >*/
 
-    const char *opt_name;
     const char *name;
     int (*init_machine)(MachineState *ms);
     void (*setup_post)(MachineState *ms, AccelState *accel);
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 938de34fe9..733c4957eb 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -76,7 +76,7 @@ typedef struct BlockBackendPublic {
     ThrottleGroupMember throttle_group_member;
 } BlockBackendPublic;
 
-BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm);
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
 BlockBackend *blk_new_open(const char *filename, const char *reference,
                            QDict *options, int flags, Error **errp);
 int blk_get_refcnt(BlockBackend *blk);
@@ -208,7 +208,8 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
 void blk_op_block_all(BlockBackend *blk, Error *reason);
 void blk_op_unblock_all(BlockBackend *blk, Error *reason);
 AioContext *blk_get_aio_context(BlockBackend *blk);
-void blk_set_aio_context(BlockBackend *blk, AioContext *new_context);
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                        Error **errp);
 void blk_add_aio_context_notifier(BlockBackend *blk,
         void (*attached_aio_context)(AioContext *new_context, void *opaque),
         void (*detach_aio_context)(void *opaque), void *opaque);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 5f133cae83..61579ae71e 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -193,5 +193,6 @@ extern QemuOptsList qemu_nic_opts;
 extern QemuOptsList qemu_net_opts;
 extern QemuOptsList qemu_global_opts;
 extern QemuOptsList qemu_mon_opts;
+extern QemuOptsList qemu_semihosting_config_opts;
 
 #endif
diff --git a/include/ui/console.h b/include/ui/console.h
index fef900db76..f981696848 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -175,6 +175,7 @@ typedef struct QemuDmaBuf {
     uint32_t  height;
     uint32_t  stride;
     uint32_t  fourcc;
+    uint64_t  modifier;
     uint32_t  texture;
     bool      y0_top;
 } QemuDmaBuf;
diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h
index b976cb8728..d714127799 100644
--- a/include/ui/egl-helpers.h
+++ b/include/ui/egl-helpers.h
@@ -36,7 +36,8 @@ extern struct gbm_device *qemu_egl_rn_gbm_dev;
 extern EGLContext qemu_egl_rn_ctx;
 
 int egl_rendernode_init(const char *rendernode, DisplayGLMode mode);
-int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc);
+int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc,
+                           EGLuint64KHR *modifier);
 
 void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf);
 void egl_dmabuf_release_texture(QemuDmaBuf *dmabuf);
diff --git a/linux-user/Makefile.objs b/linux-user/Makefile.objs
index 769b8d8336..285c5dfa17 100644
--- a/linux-user/Makefile.objs
+++ b/linux-user/Makefile.objs
@@ -6,4 +6,6 @@ obj-y = main.o syscall.o strace.o mmap.o signal.o \
 obj-$(TARGET_HAS_BFLT) += flatload.o
 obj-$(TARGET_I386) += vm86.o
 obj-$(TARGET_ARM) += arm/nwfpe/
+obj-$(TARGET_ARM) += arm/semihost.o
+obj-$(TARGET_AARCH64) += arm/semihost.o
 obj-$(TARGET_M68K) += m68k-sim.o
diff --git a/linux-user/arm/semihost.c b/linux-user/arm/semihost.c
new file mode 100644
index 0000000000..9554102a85
--- /dev/null
+++ b/linux-user/arm/semihost.c
@@ -0,0 +1,24 @@
+/*
+ * ARM Semihosting Console Support
+ *
+ * Copyright (c) 2019 Linaro Ltd
+ *
+ * Currently ARM is unique in having support for semihosting support
+ * in linux-user. So for now we implement the common console API but
+ * just for arm linux-user.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/semihosting/console.h"
+#include "qemu.h"
+
+int qemu_semihosting_console_out(CPUArchState *env, target_ulong addr, int len)
+{
+    void *s = lock_user_string(addr);
+    len = write(STDERR_FILENO, s, len ? len : strlen(s));
+    unlock_user(s, addr, 0);
+    return len;
+}
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index a57b7049dd..5451d262ec 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1308,6 +1308,34 @@ static inline void init_thread(struct target_pt_regs *regs,
 #define ELF_DATA	ELFDATA2MSB
 #define ELF_ARCH	EM_S390
 
+#include "elf.h"
+
+#define ELF_HWCAP get_elf_hwcap()
+
+#define GET_FEATURE(_feat, _hwcap) \
+    do { if (s390_has_feat(_feat)) { hwcap |= _hwcap; } } while (0)
+
+static uint32_t get_elf_hwcap(void)
+{
+    /*
+     * Let's assume we always have esan3 and zarch.
+     * 31-bit processes can use 64-bit registers (high gprs).
+     */
+    uint32_t hwcap = HWCAP_S390_ESAN3 | HWCAP_S390_ZARCH | HWCAP_S390_HIGH_GPRS;
+
+    GET_FEATURE(S390_FEAT_STFLE, HWCAP_S390_STFLE);
+    GET_FEATURE(S390_FEAT_MSA, HWCAP_S390_MSA);
+    GET_FEATURE(S390_FEAT_LONG_DISPLACEMENT, HWCAP_S390_LDISP);
+    GET_FEATURE(S390_FEAT_EXTENDED_IMMEDIATE, HWCAP_S390_EIMM);
+    if (s390_has_feat(S390_FEAT_EXTENDED_TRANSLATION_3) &&
+        s390_has_feat(S390_FEAT_ETF3_ENH)) {
+        hwcap |= HWCAP_S390_ETF3EH;
+    }
+    GET_FEATURE(S390_FEAT_VECTOR, HWCAP_S390_VXRS);
+
+    return hwcap;
+}
+
 static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
 {
     regs->psw.addr = infop->entry;
diff --git a/linux-user/riscv/target_elf.h b/linux-user/riscv/target_elf.h
index a6716a6aac..9dd65652ee 100644
--- a/linux-user/riscv/target_elf.h
+++ b/linux-user/riscv/target_elf.h
@@ -9,6 +9,7 @@
 #define RISCV_TARGET_ELF_H
 static inline const char *cpu_get_model(uint32_t eflags)
 {
+    /* TYPE_RISCV_CPU_ANY */
     return "any";
 }
 #endif
diff --git a/linux-user/s390x/signal.c b/linux-user/s390x/signal.c
index 3d3cb67bbe..ecfa2a14a9 100644
--- a/linux-user/s390x/signal.c
+++ b/linux-user/s390x/signal.c
@@ -123,7 +123,7 @@ static void save_sigregs(CPUS390XState *env, target_sigregs *sregs)
      */
     //save_fp_regs(&current->thread.fp_regs); FIXME
     for (i = 0; i < 16; i++) {
-        __put_user(get_freg(env, i)->ll, &sregs->fpregs.fprs[i]);
+        __put_user(*get_freg(env, i), &sregs->fpregs.fprs[i]);
     }
 }
 
@@ -254,7 +254,7 @@ restore_sigregs(CPUS390XState *env, target_sigregs *sc)
         __get_user(env->aregs[i], &sc->regs.acrs[i]);
     }
     for (i = 0; i < 16; i++) {
-        __get_user(get_freg(env, i)->ll, &sc->fpregs.fprs[i]);
+        __get_user(*get_freg(env, i), &sc->fpregs.fprs[i]);
     }
 
     return err;
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 55212585e0..5e29e675e9 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -763,14 +763,7 @@ safe_syscall2(int, nanosleep, const struct timespec *, req,
 safe_syscall4(int, clock_nanosleep, const clockid_t, clock, int, flags,
               const struct timespec *, req, struct timespec *, rem)
 #endif
-#ifdef __NR_msgsnd
-safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz,
-              int, flags)
-safe_syscall5(int, msgrcv, int, msgid, void *, msgp, size_t, sz,
-              long, msgtype, int, flags)
-safe_syscall4(int, semtimedop, int, semid, struct sembuf *, tsops,
-              unsigned, nsops, const struct timespec *, timeout)
-#else
+#if !defined(__NR_msgsnd) || !defined(__NR_msgrcv) || !defined(__NR_semtimedop)
 /* This host kernel architecture uses a single ipc syscall; fake up
  * wrappers for the sub-operations to hide this implementation detail.
  * Annoyingly we can't include linux/ipc.h to get the constant definitions
@@ -785,14 +778,29 @@ safe_syscall4(int, semtimedop, int, semid, struct sembuf *, tsops,
 
 safe_syscall6(int, ipc, int, call, long, first, long, second, long, third,
               void *, ptr, long, fifth)
+#endif
+#ifdef __NR_msgsnd
+safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz,
+              int, flags)
+#else
 static int safe_msgsnd(int msgid, const void *msgp, size_t sz, int flags)
 {
     return safe_ipc(Q_IPCCALL(0, Q_MSGSND), msgid, sz, flags, (void *)msgp, 0);
 }
+#endif
+#ifdef __NR_msgrcv
+safe_syscall5(int, msgrcv, int, msgid, void *, msgp, size_t, sz,
+              long, msgtype, int, flags)
+#else
 static int safe_msgrcv(int msgid, void *msgp, size_t sz, long type, int flags)
 {
     return safe_ipc(Q_IPCCALL(1, Q_MSGRCV), msgid, sz, flags, msgp, type);
 }
+#endif
+#ifdef __NR_semtimedop
+safe_syscall4(int, semtimedop, int, semid, struct sembuf *, tsops,
+              unsigned, nsops, const struct timespec *, timeout)
+#else
 static int safe_semtimedop(int semid, struct sembuf *tsops, unsigned nsops,
                            const struct timespec *timeout)
 {
diff --git a/memory.c b/memory.c
index 3071c4bdad..0920c105aa 100644
--- a/memory.c
+++ b/memory.c
@@ -2027,14 +2027,6 @@ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
     memory_region_transaction_commit();
 }
 
-bool memory_region_get_dirty(MemoryRegion *mr, hwaddr addr,
-                             hwaddr size, unsigned client)
-{
-    assert(mr->ram_block);
-    return cpu_physical_memory_get_dirty(memory_region_get_ram_addr(mr) + addr,
-                                         size, client);
-}
-
 void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
                              hwaddr size)
 {
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index d1bb863cb6..4a896a09eb 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -273,7 +273,6 @@ static int init_dirty_bitmap_migration(void)
     BlockDriverState *bs;
     BdrvDirtyBitmap *bitmap;
     DirtyBitmapMigBitmapState *dbms;
-    BdrvNextIterator it;
     Error *local_err = NULL;
 
     dirty_bitmap_mig_state.bulk_completed = false;
@@ -281,13 +280,8 @@ static int init_dirty_bitmap_migration(void)
     dirty_bitmap_mig_state.prev_bitmap = NULL;
     dirty_bitmap_mig_state.no_bitmaps = false;
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
-        const char *drive_name = bdrv_get_device_or_node_name(bs);
-
-        /* skip automatically inserted nodes */
-        while (bs && bs->drv && bs->implicit) {
-            bs = backing_bs(bs);
-        }
+    for (bs = bdrv_next_all_states(NULL); bs; bs = bdrv_next_all_states(bs)) {
+        const char *name = bdrv_get_device_or_node_name(bs);
 
         for (bitmap = bdrv_dirty_bitmap_next(bs, NULL); bitmap;
              bitmap = bdrv_dirty_bitmap_next(bs, bitmap))
@@ -296,7 +290,7 @@ static int init_dirty_bitmap_migration(void)
                 continue;
             }
 
-            if (drive_name == NULL) {
+            if (!name || strcmp(name, "") == 0) {
                 error_report("Found bitmap '%s' in unnamed node %p. It can't "
                              "be migrated", bdrv_dirty_bitmap_name(bitmap), bs);
                 goto fail;
@@ -313,7 +307,7 @@ static int init_dirty_bitmap_migration(void)
 
             dbms = g_new0(DirtyBitmapMigBitmapState, 1);
             dbms->bs = bs;
-            dbms->node_name = drive_name;
+            dbms->node_name = name;
             dbms->bitmap = bitmap;
             dbms->total_sectors = bdrv_nb_sectors(bs);
             dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
diff --git a/migration/block.c b/migration/block.c
index 83c633fb3f..91f98ef44a 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -417,7 +417,8 @@ static int init_blk_migration(QEMUFile *f)
         }
 
         bmds = g_new0(BlkMigDevState, 1);
-        bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+        bmds->blk = blk_new(qemu_get_aio_context(),
+                            BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
         bmds->blk_name = g_strdup(bdrv_get_device_name(bs));
         bmds->bulk_completed = 0;
         bmds->total_sectors = sectors;
diff --git a/migration/fd.c b/migration/fd.c
index a7c13df4ad..0a29ecdebf 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -52,12 +52,14 @@ static gboolean fd_accept_incoming_migration(QIOChannel *ioc,
     return G_SOURCE_REMOVE;
 }
 
-void fd_start_incoming_migration(const char *infd, Error **errp)
+void fd_start_incoming_migration(const char *fdname, Error **errp)
 {
     QIOChannel *ioc;
-    int fd;
+    int fd = monitor_fd_param(cur_mon, fdname, errp);
+    if (fd == -1) {
+        return;
+    }
 
-    fd = strtol(infd, NULL, 0);
     trace_migration_fd_incoming(fd);
 
     ioc = qio_channel_new_fd(fd, errp);
diff --git a/migration/fd.h b/migration/fd.h
index a14a63ce2e..b901bc014e 100644
--- a/migration/fd.h
+++ b/migration/fd.h
@@ -16,7 +16,7 @@
 
 #ifndef QEMU_MIGRATION_FD_H
 #define QEMU_MIGRATION_FD_H
-void fd_start_incoming_migration(const char *path, Error **errp);
+void fd_start_incoming_migration(const char *fdname, Error **errp);
 
 void fd_start_outgoing_migration(MigrationState *s, const char *fdname,
                                  Error **errp);
diff --git a/migration/ram.c b/migration/ram.c
index 4c60869226..908517fc2b 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -661,8 +661,6 @@ typedef struct {
     uint64_t num_packets;
     /* pages sent through this channel */
     uint64_t num_pages;
-    /* syncs main thread and channels */
-    QemuSemaphore sem_sync;
 }  MultiFDSendParams;
 
 typedef struct {
@@ -894,8 +892,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
 
 struct {
     MultiFDSendParams *params;
-    /* number of created threads */
-    int count;
     /* array of pages to sent */
     MultiFDPages_t *pages;
     /* syncs main thread and channels */
@@ -1027,7 +1023,6 @@ void multifd_save_cleanup(void)
         p->c = NULL;
         qemu_mutex_destroy(&p->mutex);
         qemu_sem_destroy(&p->sem);
-        qemu_sem_destroy(&p->sem_sync);
         g_free(p->name);
         p->name = NULL;
         multifd_pages_clear(p->pages);
@@ -1174,8 +1169,6 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
         p->running = true;
         qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
                            QEMU_THREAD_JOINABLE);
-
-        atomic_inc(&multifd_send_state->count);
     }
 }
 
@@ -1191,7 +1184,6 @@ int multifd_save_setup(void)
     thread_count = migrate_multifd_channels();
     multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
     multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
-    atomic_set(&multifd_send_state->count, 0);
     multifd_send_state->pages = multifd_pages_init(page_count);
     qemu_sem_init(&multifd_send_state->sem_sync, 0);
     qemu_sem_init(&multifd_send_state->channels_ready, 0);
@@ -1201,7 +1193,6 @@ int multifd_save_setup(void)
 
         qemu_mutex_init(&p->mutex);
         qemu_sem_init(&p->sem, 0);
-        qemu_sem_init(&p->sem_sync, 0);
         p->quit = false;
         p->pending_job = 0;
         p->id = i;
@@ -2236,7 +2227,7 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
 }
 
 /**
- * get_queued_page: unqueue a page from the postocpy requests
+ * get_queued_page: unqueue a page from the postcopy requests
  *
  * Skips pages that are already sent (!dirty)
  *
@@ -3182,11 +3173,11 @@ static int ram_state_init(RAMState **rsp)
     QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
 
     /*
-     * Count the total number of pages used by ram blocks not including any
-     * gaps due to alignment or unplugs.
+     * This must match with the initial values of dirty bitmap.
+     * Currently we initialize the dirty bitmap to all zeros so
+     * here the total dirty page count is zero.
      */
-    (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
-
+    (*rsp)->migration_dirty_pages = 0;
     ram_state_reset(*rsp);
 
     return 0;
@@ -3201,8 +3192,16 @@ static void ram_list_init_bitmaps(void)
     if (ram_bytes_total()) {
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
             pages = block->max_length >> TARGET_PAGE_BITS;
+            /*
+             * The initial dirty bitmap for migration must be set with all
+             * ones to make sure we'll migrate every guest RAM page to
+             * destination.
+             * Here we didn't set RAMBlock.bmap simply because it is already
+             * set in ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION] in
+             * ram_block_add, and that's where we'll sync the dirty bitmaps.
+             * Here setting RAMBlock.bmap would be fine too but not necessary.
+             */
             block->bmap = bitmap_new(pages);
-            bitmap_set(block->bmap, 0, pages);
             if (migrate_postcopy_ram()) {
                 block->unsentmap = bitmap_new(pages);
                 bitmap_set(block->unsentmap, 0, pages);
diff --git a/nbd/server.c b/nbd/server.c
index e21bd501dc..aeca3893fe 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1484,13 +1484,15 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset,
     if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
         perm |= BLK_PERM_WRITE;
     }
-    blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
-                        BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
+    blk = blk_new(bdrv_get_aio_context(bs), perm,
+                  BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                  BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto fail;
     }
     blk_set_enable_write_cache(blk, !writethrough);
+    blk_set_allow_aio_context_change(blk, true);
 
     exp->refcount = 1;
     QTAILQ_INIT(&exp->clients);
diff --git a/numa.c b/numa.c
index 3875e1efda..955ec0c830 100644
--- a/numa.c
+++ b/numa.c
@@ -470,8 +470,8 @@ void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp)
                                     "node-id", errp);
         }
     } else if (node_id != slot->props.node_id) {
-        error_setg(errp, "node-id=%d must match numa node specified "
-                   "with -numa option", node_id);
+        error_setg(errp, "invalid node-id, must be %"PRId64,
+                   slot->props.node_id);
     }
 }
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 7ccbfff9d0..1defcde048 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2004,18 +2004,34 @@
             '*persistent': 'bool', '*autoload': 'bool', '*disabled': 'bool' } }
 
 ##
+# @BlockDirtyBitmapMergeSource:
+#
+# @local: name of the bitmap, attached to the same node as target bitmap.
+#
+# @external: bitmap with specified node
+#
+# Since: 4.1
+##
+{ 'alternate': 'BlockDirtyBitmapMergeSource',
+  'data': { 'local': 'str',
+            'external': 'BlockDirtyBitmap' } }
+
+##
 # @BlockDirtyBitmapMerge:
 #
-# @node: name of device/node which the bitmap is tracking
+# @node: name of device/node which the @target bitmap is tracking
 #
 # @target: name of the destination dirty bitmap
 #
-# @bitmaps: name(s) of the source dirty bitmap(s)
+# @bitmaps: name(s) of the source dirty bitmap(s) at @node and/or fully
+#           specifed BlockDirtyBitmap elements. The latter are supported
+#           since 4.1.
 #
 # Since: 4.0
 ##
 { 'struct': 'BlockDirtyBitmapMerge',
-  'data': { 'node': 'str', 'target': 'str', 'bitmaps': ['str'] } }
+  'data': { 'node': 'str', 'target': 'str',
+            'bitmaps': ['BlockDirtyBitmapMergeSource'] } }
 
 ##
 # @block-dirty-bitmap-add:
@@ -3215,6 +3231,8 @@
 #
 # @cor_write: a write due to copy-on-read (since 2.11)
 #
+# @cluster_alloc_space: an allocation of file space for a cluster (since 4.1)
+#
 # Since: 2.9
 ##
 { 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG',
@@ -3233,7 +3251,7 @@
             'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
             'pwritev_zero', 'pwritev_done', 'empty_image_prepare',
             'l1_shrink_write_table', 'l1_shrink_free_l2_clusters',
-            'cor_write'] }
+            'cor_write', 'cluster_alloc_space'] }
 
 ##
 # @BlkdebugInjectErrorOptions:
diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi
index 90cb677849..50292d820b 100644
--- a/qemu-deprecated.texi
+++ b/qemu-deprecated.texi
@@ -138,6 +138,21 @@ The ``acl_show'', ``acl_reset'', ``acl_policy'', ``acl_add'', and
 ``acl_remove'' commands are deprecated with no replacement. Authorization
 for VNC should be performed using the pluggable QAuthZ objects.
 
+@section System emulator CPUS
+
+@subsection RISC-V ISA CPUs (since 4.1)
+
+The RISC-V cpus with the ISA version in the CPU name have been depcreated. The
+four CPUs are: ``rv32gcsu-v1.9.1``, ``rv32gcsu-v1.10.0``, ``rv64gcsu-v1.9.1`` and
+``rv64gcsu-v1.10.0``. Instead the version can be specified via the CPU ``priv_spec``
+option when using the ``rv32`` or ``rv64`` CPUs.
+
+@subsection RISC-V ISA CPUs (since 4.1)
+
+The RISC-V no MMU cpus have been depcreated. The two CPUs: ``rv32imacu-nommu`` and
+``rv64imacu-nommu`` should no longer be used. Instead the MMU status can be specified
+via the CPU ``mmu`` option when using the ``rv32`` or ``rv64`` CPUs.
+
 @section System emulator devices
 
 @subsection bluetooth (since 3.1)
@@ -160,6 +175,12 @@ This machine type uses an unmaintained firmware, broken in lots of ways,
 and unable to start post-2004 operating systems. 40p machine type should be
 used instead.
 
+@subsection spike_v1.9.1 and spike_v1.10 (since 4.1)
+
+The version specific Spike machines have been deprecated in favour of the
+generic ``spike`` machine. If you need to specify an older version of the RISC-V
+spec you can use the ``-cpu rv64gcsu,priv_spec=v1.9.1`` command line argument.
+
 @section Device options
 
 @subsection Block device options
diff --git a/qemu-img.c b/qemu-img.c
index 28fba1e7a7..07b6e2a808 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3164,7 +3164,7 @@ static int img_rebase(int argc, char **argv)
     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
     uint8_t *buf_old = NULL;
     uint8_t *buf_new = NULL;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
     char *filename;
     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
     int c, flags, src_flags, ret;
@@ -3309,29 +3309,19 @@ static int img_rebase(int argc, char **argv)
 
     /* For safe rebasing we need to compare old and new backing file */
     if (!unsafe) {
-        char backing_name[PATH_MAX];
         QDict *options = NULL;
-
-        if (bs->backing) {
-            if (bs->backing_format[0] != '\0') {
-                options = qdict_new();
-                qdict_put_str(options, "driver", bs->backing_format);
-            }
-
-            if (force_share) {
-                if (!options) {
-                    options = qdict_new();
-                }
-                qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
-            }
-            bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
-            blk_old_backing = blk_new_open(backing_name, NULL,
-                                           options, src_flags, &local_err);
-            if (!blk_old_backing) {
+        BlockDriverState *base_bs = backing_bs(bs);
+
+        if (base_bs) {
+            blk_old_backing = blk_new(qemu_get_aio_context(),
+                                      BLK_PERM_CONSISTENT_READ,
+                                      BLK_PERM_ALL);
+            ret = blk_insert_bs(blk_old_backing, base_bs,
+                                &local_err);
+            if (ret < 0) {
                 error_reportf_err(local_err,
-                                  "Could not open old backing file '%s': ",
-                                  backing_name);
-                ret = -1;
+                                  "Could not reuse old backing file '%s': ",
+                                  base_bs->filename);
                 goto out;
             }
         } else {
@@ -3364,15 +3354,35 @@ static int img_rebase(int argc, char **argv)
                 goto out;
             }
 
-            blk_new_backing = blk_new_open(out_real_path, NULL,
-                                           options, src_flags, &local_err);
-            g_free(out_real_path);
-            if (!blk_new_backing) {
-                error_reportf_err(local_err,
-                                  "Could not open new backing file '%s': ",
-                                  out_baseimg);
-                ret = -1;
-                goto out;
+            /*
+             * Find out whether we rebase an image on top of a previous image
+             * in its chain.
+             */
+            prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
+            if (prefix_chain_bs) {
+                g_free(out_real_path);
+                blk_new_backing = blk_new(qemu_get_aio_context(),
+                                          BLK_PERM_CONSISTENT_READ,
+                                          BLK_PERM_ALL);
+                ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
+                                    &local_err);
+                if (ret < 0) {
+                    error_reportf_err(local_err,
+                                      "Could not reuse backing file '%s': ",
+                                      out_baseimg);
+                    goto out;
+                }
+            } else {
+                blk_new_backing = blk_new_open(out_real_path, NULL,
+                                               options, src_flags, &local_err);
+                g_free(out_real_path);
+                if (!blk_new_backing) {
+                    error_reportf_err(local_err,
+                                      "Could not open new backing file '%s': ",
+                                      out_baseimg);
+                    ret = -1;
+                    goto out;
+                }
             }
         }
     }
@@ -3448,6 +3458,23 @@ static int img_rebase(int argc, char **argv)
                 continue;
             }
 
+            if (prefix_chain_bs) {
+                /*
+                 * If cluster wasn't changed since prefix_chain, we don't need
+                 * to take action
+                 */
+                ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
+                                              offset, n, &n);
+                if (ret < 0) {
+                    error_report("error while reading image metadata: %s",
+                                 strerror(-ret));
+                    goto out;
+                }
+                if (!ret) {
+                    continue;
+                }
+            }
+
             /*
              * Read old and new backing file and take into consideration that
              * backing files may be smaller than the COW image.
diff --git a/qemu-options.hx b/qemu-options.hx
index 7ae3373a00..0d8beb4afd 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2080,7 +2080,7 @@ Specify SMBIOS type 0 fields
 @item -smbios type=1[,manufacturer=@var{str}][,product=@var{str}][,version=@var{str}][,serial=@var{str}][,uuid=@var{uuid}][,sku=@var{str}][,family=@var{str}]
 Specify SMBIOS type 1 fields
 
-@item -smbios type=2[,manufacturer=@var{str}][,product=@var{str}][,version=@var{str}][,serial=@var{str}][,asset=@var{str}][,location=@var{str}][,family=@var{str}]
+@item -smbios type=2[,manufacturer=@var{str}][,product=@var{str}][,version=@var{str}][,serial=@var{str}][,asset=@var{str}][,location=@var{str}]
 Specify SMBIOS type 2 fields
 
 @item -smbios type=3[,manufacturer=@var{str}][,version=@var{str}][,serial=@var{str}][,asset=@var{str}][,sku=@var{str}]
@@ -4025,12 +4025,12 @@ STEXI
 Enable semihosting mode (ARM, M68K, Xtensa, MIPS, Nios II only).
 ETEXI
 DEF("semihosting-config", HAS_ARG, QEMU_OPTION_semihosting_config,
-    "-semihosting-config [enable=on|off][,target=native|gdb|auto][,arg=str[,...]]\n" \
+    "-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,arg=str[,...]]\n" \
     "                semihosting configuration\n",
 QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32 |
 QEMU_ARCH_MIPS | QEMU_ARCH_NIOS2)
 STEXI
-@item -semihosting-config [enable=on|off][,target=native|gdb|auto][,arg=str[,...]]
+@item -semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,arg=str[,...]]
 @findex -semihosting-config
 Enable and configure semihosting (ARM, M68K, Xtensa, MIPS, Nios II only).
 @table @option
@@ -4038,6 +4038,8 @@ Enable and configure semihosting (ARM, M68K, Xtensa, MIPS, Nios II only).
 Defines where the semihosting calls will be addressed, to QEMU (@code{native})
 or to GDB (@code{gdb}). The default is @code{auto}, which means @code{gdb}
 during debug sessions and @code{native} otherwise.
+@item chardev=@var{str1}
+Send the output to a chardev backend output for native or auto output when not in gdb
 @item arg=@var{str1},arg=@var{str2},...
 Allows the user to pass input arguments, and can be used multiple times to build
 up a list. The old-style @code{-kernel}/@code{-append} method of passing a
diff --git a/qom/object.c b/qom/object.c
index 99c4fa707e..3966a3d461 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -28,6 +28,7 @@
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qnum.h"
 #include "qapi/qmp/qstring.h"
+#include "qemu/error-report.h"
 
 #define MAX_INTERFACES 32
 
@@ -448,7 +449,6 @@ static void object_initialize_with_type(void *data, size_t size, TypeImpl *type)
 {
     Object *obj = data;
 
-    g_assert(type != NULL);
     type_initialize(type);
 
     g_assert(type->instance_size >= sizeof(Object));
@@ -468,6 +468,11 @@ void object_initialize(void *data, size_t size, const char *typename)
 {
     TypeImpl *type = type_get_by_name(typename);
 
+    if (!type) {
+        error_report("missing object type '%s'", typename);
+        abort();
+    }
+
     object_initialize_with_type(data, size, type);
 }
 
diff --git a/rules.mak b/rules.mak
index df45bcffb4..967295dd2b 100644
--- a/rules.mak
+++ b/rules.mak
@@ -144,7 +144,7 @@ cc-option = $(if $(shell $(CC) $1 $2 -S -o /dev/null -xc /dev/null \
 cc-c-option = $(if $(shell $(CC) $1 $2 -c -o /dev/null -xc /dev/null \
                 >/dev/null 2>&1 && echo OK), $2, $3)
 
-VPATH_SUFFIXES = %.c %.h %.S %.cc %.cpp %.m %.mak %.texi %.sh %.rc Kconfig%
+VPATH_SUFFIXES = %.c %.h %.S %.cc %.cpp %.m %.mak %.texi %.sh %.rc Kconfig% %.json.in
 set-vpath = $(if $1,$(foreach PATTERN,$(VPATH_SUFFIXES),$(eval vpath $(PATTERN) $1)))
 
 # install-prog list, dir
@@ -392,3 +392,10 @@ TEXI2MAN = $(call quiet-command, \
 	$(call TEXI2MAN)
 %.8:
 	$(call TEXI2MAN)
+
+GEN_SUBST = $(call quiet-command, \
+	sed -e "s!@libexecdir@!$(libexecdir)!g" < $< > $@, \
+	"GEN","$@")
+
+%.json: %.json.in
+	$(call GEN_SUBST)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 88682cb0a9..c2aaf421da 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1949,7 +1949,8 @@ sub process {
 		}
 
 # no C99 // comments
-		if ($line =~ m{//}) {
+		if ($line =~ m{//} &&
+		    $rawline !~ m{// SPDX-License-Identifier: }) {
 			ERROR("do not use C99 // comments\n" . $herecurr);
 		}
 		# Remove C99 comments.
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 73452ad265..9c7393b08c 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -40,3 +40,4 @@ stub-obj-y += pci-host-piix.o
 stub-obj-y += ram-block.o
 stub-obj-y += ramfb.o
 stub-obj-y += fw_cfg.o
+stub-obj-$(CONFIG_SOFTMMU) += semihost.o
diff --git a/stubs/semihost.c b/stubs/semihost.c
new file mode 100644
index 0000000000..4d5b3c0653
--- /dev/null
+++ b/stubs/semihost.c
@@ -0,0 +1,70 @@
+/*
+ * Semihosting Stubs for SoftMMU
+ *
+ * Copyright (c) 2019 Linaro Ltd
+ *
+ * Stubs for SoftMMU targets that don't actually do semihosting.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/option.h"
+#include "qemu/error-report.h"
+#include "hw/semihosting/semihost.h"
+
+/* Empty config */
+QemuOptsList qemu_semihosting_config_opts = {
+    .name = "",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_semihosting_config_opts.head),
+    .desc = {
+        { /* end of list */ }
+    },
+};
+
+/* Queries to config status default to off */
+bool semihosting_enabled(void)
+{
+    return false;
+}
+
+SemihostingTarget semihosting_get_target(void)
+{
+    return SEMIHOSTING_TARGET_AUTO;
+}
+
+/*
+ * All the rest are empty subs. We could g_assert_not_reached() but
+ * that adds extra weight to the final binary. Waste not want not.
+ */
+void qemu_semihosting_enable(void)
+{
+}
+
+int qemu_semihosting_config_options(const char *optarg)
+{
+    return 1;
+}
+
+const char *semihosting_get_arg(int i)
+{
+    return NULL;
+}
+
+int semihosting_get_argc(void)
+{
+    return 0;
+}
+
+const char *semihosting_get_cmdline(void)
+{
+    return NULL;
+}
+
+void semihosting_arg_fallback(const char *file, const char *cmd)
+{
+}
+
+void qemu_semihosting_connect_chardevs(void)
+{
+}
diff --git a/target/arm/arm-semi.c b/target/arm/arm-semi.c
index ddb94e0aba..53e807ab72 100644
--- a/target/arm/arm-semi.c
+++ b/target/arm/arm-semi.c
@@ -2,6 +2,7 @@
  *  Arm "Angel" semihosting syscalls
  *
  *  Copyright (c) 2005, 2007 CodeSourcery.
+ *  Copyright (c) 2019 Linaro
  *  Written by Paul Brook.
  *
  *  This program is free software; you can redistribute it and/or modify
@@ -16,12 +17,18 @@
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *  ARM Semihosting is documented in:
+ *     Semihosting for AArch32 and AArch64 Release 2.0
+ *     https://static.docs.arm.com/100863/0200/semihosting.pdf
  */
 
 #include "qemu/osdep.h"
 
 #include "cpu.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
+#include "hw/semihosting/console.h"
+#include "qemu/log.h"
 #ifdef CONFIG_USER_ONLY
 #include "qemu.h"
 
@@ -239,6 +246,15 @@ static target_ulong arm_gdb_syscall(ARMCPU *cpu, gdb_syscall_complete_cb cb,
      put_user_u64(val, args + (n) * 8) :                \
      put_user_u32(val, args + (n) * 4))
 
+/*
+ * Do a semihosting call.
+ *
+ * The specification always says that the "return register" either
+ * returns a specific value or is corrupted, so we don't need to
+ * report to our caller whether we are returning a value or trying to
+ * leave the register unchanged. We use 0xdeadbeef as the return value
+ * when there isn't a defined return value for the call.
+ */
 target_ulong do_arm_semihosting(CPUARMState *env)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
@@ -299,32 +315,10 @@ target_ulong do_arm_semihosting(CPUARMState *env)
             return set_swi_errno(ts, close(arg0));
         }
     case TARGET_SYS_WRITEC:
-        {
-          char c;
-
-          if (get_user_u8(c, args))
-              /* FIXME - should this error code be -TARGET_EFAULT ? */
-              return (uint32_t)-1;
-          /* Write to debug console.  stderr is near enough.  */
-          if (use_gdb_syscalls()) {
-                return arm_gdb_syscall(cpu, arm_semi_cb, "write,2,%x,1", args);
-          } else {
-                return write(STDERR_FILENO, &c, 1);
-          }
-        }
+        qemu_semihosting_console_out(env, args, 1);
+        return 0xdeadbeef;
     case TARGET_SYS_WRITE0:
-        if (!(s = lock_user_string(args)))
-            /* FIXME - should this error code be -TARGET_EFAULT ? */
-            return (uint32_t)-1;
-        len = strlen(s);
-        if (use_gdb_syscalls()) {
-            return arm_gdb_syscall(cpu, arm_semi_cb, "write,2,%x,%x",
-                                   args, len);
-        } else {
-            ret = write(STDERR_FILENO, s, len);
-        }
-        unlock_user(s, args, 0);
-        return ret;
+        return qemu_semihosting_console_out(env, args, 0);
     case TARGET_SYS_WRITE:
         GET_ARG(0);
         GET_ARG(1);
@@ -337,13 +331,15 @@ target_ulong do_arm_semihosting(CPUARMState *env)
         } else {
             s = lock_user(VERIFY_READ, arg1, len, 1);
             if (!s) {
-                /* FIXME - should this error code be -TARGET_EFAULT ? */
-                return (uint32_t)-1;
+                /* Return bytes not written on error */
+                return len;
             }
             ret = set_swi_errno(ts, write(arg0, s, len));
             unlock_user(s, arg1, 0);
-            if (ret == (uint32_t)-1)
-                return -1;
+            if (ret == (uint32_t)-1) {
+                ret = 0;
+            }
+            /* Return bytes not written */
             return len - ret;
         }
     case TARGET_SYS_READ:
@@ -358,19 +354,21 @@ target_ulong do_arm_semihosting(CPUARMState *env)
         } else {
             s = lock_user(VERIFY_WRITE, arg1, len, 0);
             if (!s) {
-                /* FIXME - should this error code be -TARGET_EFAULT ? */
-                return (uint32_t)-1;
+                /* return bytes not read */
+                return len;
             }
             do {
                 ret = set_swi_errno(ts, read(arg0, s, len));
             } while (ret == -1 && errno == EINTR);
             unlock_user(s, arg1, len);
-            if (ret == (uint32_t)-1)
-                return -1;
+            if (ret == (uint32_t)-1) {
+                ret = 0;
+            }
+            /* Return bytes not read */
             return len - ret;
         }
     case TARGET_SYS_READC:
-       /* XXX: Read from debug console. Not implemented.  */
+        qemu_log_mask(LOG_UNIMP, "%s: SYS_READC not implemented", __func__);
         return 0;
     case TARGET_SYS_ISTTY:
         GET_ARG(0);
@@ -404,7 +402,7 @@ target_ulong do_arm_semihosting(CPUARMState *env)
             return buf.st_size;
         }
     case TARGET_SYS_TMPNAM:
-        /* XXX: Not implemented.  */
+        qemu_log_mask(LOG_UNIMP, "%s: SYS_TMPNAM not implemented", __func__);
         return -1;
     case TARGET_SYS_REMOVE:
         GET_ARG(0);
@@ -509,14 +507,16 @@ target_ulong do_arm_semihosting(CPUARMState *env)
 
             output_size = ts->info->arg_end - ts->info->arg_start;
             if (!output_size) {
-                /* We special-case the "empty command line" case (argc==0).
-                   Just provide the terminating 0. */
+                /*
+                 * We special-case the "empty command line" case (argc==0).
+                 * Just provide the terminating 0.
+                 */
                 output_size = 1;
             }
 #endif
 
             if (output_size > input_size) {
-                 /* Not enough space to store command-line arguments.  */
+                /* Not enough space to store command-line arguments.  */
                 return -1;
             }
 
@@ -570,8 +570,10 @@ target_ulong do_arm_semihosting(CPUARMState *env)
             GET_ARG(0);
 
 #ifdef CONFIG_USER_ONLY
-            /* Some C libraries assume the heap immediately follows .bss, so
-               allocate it using sbrk.  */
+            /*
+             * Some C libraries assume the heap immediately follows .bss, so
+             * allocate it using sbrk.
+             */
             if (!ts->heap_limit) {
                 abi_ulong ret;
 
@@ -619,7 +621,8 @@ target_ulong do_arm_semihosting(CPUARMState *env)
         }
     case TARGET_SYS_EXIT:
         if (is_a64(env)) {
-            /* The A64 version of this call takes a parameter block,
+            /*
+             * The A64 version of this call takes a parameter block,
              * so the application-exit type can return a subcode which
              * is the exit status code from the application.
              */
@@ -632,14 +635,17 @@ target_ulong do_arm_semihosting(CPUARMState *env)
                 ret = 1;
             }
         } else {
-            /* ARM specifies only Stopped_ApplicationExit as normal
-             * exit, everything else is considered an error */
+            /*
+             * ARM specifies only Stopped_ApplicationExit as normal
+             * exit, everything else is considered an error
+             */
             ret = (args == ADP_Stopped_ApplicationExit) ? 0 : 1;
         }
         gdb_exit(env, ret);
         exit(ret);
     case TARGET_SYS_SYNCCACHE:
-        /* Clean the D-cache and invalidate the I-cache for the specified
+        /*
+         * Clean the D-cache and invalidate the I-cache for the specified
          * virtual address range. This is a nop for us since we don't
          * implement caches. This is only present on A64.
          */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index acd23c53ca..719fb92e60 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -16,7 +16,7 @@
 #include "exec/cpu_ldst.h"
 #include "arm_ldst.h"
 #include <zlib.h> /* For crc32 */
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 #include "sysemu/cpus.h"
 #include "sysemu/kvm.h"
 #include "fpu/softfloat.h"
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 42999c5801..092f0df3c4 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -29,7 +29,7 @@
 #include "qemu/host-utils.h"
 #include "qemu/qemu-print.h"
 
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 #include "exec/gen-icount.h"
 
 #include "exec/helper-proto.h"
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 298c262825..d240c1b714 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -30,7 +30,7 @@
 #include "qemu/bitops.h"
 #include "qemu/qemu-print.h"
 #include "arm_ldst.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 536d7d1520..c1ab86d63e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4730,6 +4730,9 @@ static void x86_cpu_reset(CPUState *s)
 
     env->pat = 0x0007040600070406ULL;
     env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT;
+    if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) {
+        env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_MWAIT;
+    }
 
     memset(env->dr, 0, sizeof(env->dr));
     env->dr[6] = DR6_FIXED_1;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index fce6660bac..bd06523a53 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -387,6 +387,7 @@ typedef enum X86Seg {
 #define MSR_IA32_MISC_ENABLE            0x1a0
 /* Indicates good rep/movs microcode on some processors: */
 #define MSR_IA32_MISC_ENABLE_DEFAULT    1
+#define MSR_IA32_MISC_ENABLE_MWAIT      (1ULL << 18)
 
 #define MSR_MTRRphysBase(reg)           (0x200 + 2 * (reg))
 #define MSR_MTRRphysMask(reg)           (0x200 + 2 * (reg) + 1)
diff --git a/target/lm32/helper.c b/target/lm32/helper.c
index 20ea17ba23..8cd4840052 100644
--- a/target/lm32/helper.c
+++ b/target/lm32/helper.c
@@ -22,7 +22,7 @@
 #include "exec/exec-all.h"
 #include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 #include "exec/log.h"
 
 bool lm32_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c
index 1ecc772b5c..bde2d551ff 100644
--- a/target/m68k/op_helper.c
+++ b/target/m68k/op_helper.c
@@ -21,7 +21,7 @@
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 
 #if defined(CONFIG_USER_ONLY)
 
diff --git a/target/mips/Makefile.objs b/target/mips/Makefile.objs
index 651f36f517..3448ad5e19 100644
--- a/target/mips/Makefile.objs
+++ b/target/mips/Makefile.objs
@@ -1,4 +1,5 @@
 obj-y += translate.o dsp_helper.o op_helper.o lmi_helper.o helper.o cpu.o
-obj-y += gdbstub.o msa_helper.o mips-semi.o
+obj-y += gdbstub.o msa_helper.o
+obj-$(CONFIG_SOFTMMU) += mips-semi.o
 obj-$(CONFIG_SOFTMMU) += machine.o cp0_timer.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index 1f41cf66d5..06a8ed4748 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -22,10 +22,10 @@ typedef struct CPUMIPSTLBContext CPUMIPSTLBContext;
 
 typedef union wr_t wr_t;
 union wr_t {
-    int8_t  b[MSA_WRLEN/8];
-    int16_t h[MSA_WRLEN/16];
-    int32_t w[MSA_WRLEN/32];
-    int64_t d[MSA_WRLEN/64];
+    int8_t  b[MSA_WRLEN / 8];
+    int16_t h[MSA_WRLEN / 16];
+    int32_t w[MSA_WRLEN / 32];
+    int64_t d[MSA_WRLEN / 64];
 };
 
 typedef union fpr_t fpr_t;
@@ -37,7 +37,8 @@ union fpr_t {
 /* FPU/MSA register mapping is not tested on big-endian hosts. */
     wr_t     wr;   /* vector data */
 };
-/* define FP_ENDIAN_IDX to access the same location
+/*
+ *define FP_ENDIAN_IDX to access the same location
  * in the fpr_t union regardless of the host endianness
  */
 #if defined(HOST_WORDS_BIGENDIAN)
@@ -71,16 +72,29 @@ struct CPUMIPSFPUContext {
 #define FCR31_FS 24
 #define FCR31_ABS2008 19
 #define FCR31_NAN2008 18
-#define SET_FP_COND(num,env)     do { ((env).fcr31) |= ((num) ? (1 << ((num) + 24)) : (1 << 23)); } while(0)
-#define CLEAR_FP_COND(num,env)   do { ((env).fcr31) &= ~((num) ? (1 << ((num) + 24)) : (1 << 23)); } while(0)
-#define GET_FP_COND(env)         ((((env).fcr31 >> 24) & 0xfe) | (((env).fcr31 >> 23) & 0x1))
+#define SET_FP_COND(num, env)     do { ((env).fcr31) |=                 \
+                                       ((num) ? (1 << ((num) + 24)) :   \
+                                                (1 << 23));             \
+                                     } while (0)
+#define CLEAR_FP_COND(num, env)   do { ((env).fcr31) &=                 \
+                                       ~((num) ? (1 << ((num) + 24)) :  \
+                                                 (1 << 23));            \
+                                     } while (0)
+#define GET_FP_COND(env)         ((((env).fcr31 >> 24) & 0xfe) |        \
+                                 (((env).fcr31 >> 23) & 0x1))
 #define GET_FP_CAUSE(reg)        (((reg) >> 12) & 0x3f)
 #define GET_FP_ENABLE(reg)       (((reg) >>  7) & 0x1f)
 #define GET_FP_FLAGS(reg)        (((reg) >>  2) & 0x1f)
-#define SET_FP_CAUSE(reg,v)      do { (reg) = ((reg) & ~(0x3f << 12)) | ((v & 0x3f) << 12); } while(0)
-#define SET_FP_ENABLE(reg,v)     do { (reg) = ((reg) & ~(0x1f <<  7)) | ((v & 0x1f) << 7); } while(0)
-#define SET_FP_FLAGS(reg,v)      do { (reg) = ((reg) & ~(0x1f <<  2)) | ((v & 0x1f) << 2); } while(0)
-#define UPDATE_FP_FLAGS(reg,v)   do { (reg) |= ((v & 0x1f) << 2); } while(0)
+#define SET_FP_CAUSE(reg, v)      do { (reg) = ((reg) & ~(0x3f << 12)) | \
+                                               ((v & 0x3f) << 12);       \
+                                     } while (0)
+#define SET_FP_ENABLE(reg, v)     do { (reg) = ((reg) & ~(0x1f <<  7)) | \
+                                               ((v & 0x1f) << 7);        \
+                                     } while (0)
+#define SET_FP_FLAGS(reg, v)      do { (reg) = ((reg) & ~(0x1f <<  2)) | \
+                                               ((v & 0x1f) << 2);        \
+                                     } while (0)
+#define UPDATE_FP_FLAGS(reg, v)   do { (reg) |= ((v & 0x1f) << 2); } while (0)
 #define FP_INEXACT        1
 #define FP_UNDERFLOW      2
 #define FP_OVERFLOW       4
@@ -95,25 +109,25 @@ struct CPUMIPSFPUContext {
 typedef struct CPUMIPSMVPContext CPUMIPSMVPContext;
 struct CPUMIPSMVPContext {
     int32_t CP0_MVPControl;
-#define CP0MVPCo_CPA	3
-#define CP0MVPCo_STLB	2
-#define CP0MVPCo_VPC	1
-#define CP0MVPCo_EVP	0
+#define CP0MVPCo_CPA    3
+#define CP0MVPCo_STLB   2
+#define CP0MVPCo_VPC    1
+#define CP0MVPCo_EVP    0
     int32_t CP0_MVPConf0;
-#define CP0MVPC0_M	31
-#define CP0MVPC0_TLBS	29
-#define CP0MVPC0_GS	28
-#define CP0MVPC0_PCP	27
-#define CP0MVPC0_PTLBE	16
-#define CP0MVPC0_TCA	15
-#define CP0MVPC0_PVPE	10
-#define CP0MVPC0_PTC	0
+#define CP0MVPC0_M      31
+#define CP0MVPC0_TLBS   29
+#define CP0MVPC0_GS     28
+#define CP0MVPC0_PCP    27
+#define CP0MVPC0_PTLBE  16
+#define CP0MVPC0_TCA    15
+#define CP0MVPC0_PVPE   10
+#define CP0MVPC0_PTC    0
     int32_t CP0_MVPConf1;
-#define CP0MVPC1_CIM	31
-#define CP0MVPC1_CIF	30
-#define CP0MVPC1_PCX	20
-#define CP0MVPC1_PCP2	10
-#define CP0MVPC1_PCP1	0
+#define CP0MVPC1_CIM    31
+#define CP0MVPC1_CIF    30
+#define CP0MVPC1_PCX    20
+#define CP0MVPC1_PCP2   10
+#define CP0MVPC1_PCP1   0
 };
 
 typedef struct mips_def_t mips_def_t;
@@ -481,44 +495,44 @@ struct CPUMIPSState {
  */
     int32_t CP0_Random;
     int32_t CP0_VPEControl;
-#define CP0VPECo_YSI	21
-#define CP0VPECo_GSI	20
-#define CP0VPECo_EXCPT	16
-#define CP0VPECo_TE	15
-#define CP0VPECo_TargTC	0
+#define CP0VPECo_YSI    21
+#define CP0VPECo_GSI    20
+#define CP0VPECo_EXCPT  16
+#define CP0VPECo_TE     15
+#define CP0VPECo_TargTC 0
     int32_t CP0_VPEConf0;
-#define CP0VPEC0_M	31
-#define CP0VPEC0_XTC	21
-#define CP0VPEC0_TCS	19
-#define CP0VPEC0_SCS	18
-#define CP0VPEC0_DSC	17
-#define CP0VPEC0_ICS	16
-#define CP0VPEC0_MVP	1
-#define CP0VPEC0_VPA	0
+#define CP0VPEC0_M      31
+#define CP0VPEC0_XTC    21
+#define CP0VPEC0_TCS    19
+#define CP0VPEC0_SCS    18
+#define CP0VPEC0_DSC    17
+#define CP0VPEC0_ICS    16
+#define CP0VPEC0_MVP    1
+#define CP0VPEC0_VPA    0
     int32_t CP0_VPEConf1;
-#define CP0VPEC1_NCX	20
-#define CP0VPEC1_NCP2	10
-#define CP0VPEC1_NCP1	0
+#define CP0VPEC1_NCX    20
+#define CP0VPEC1_NCP2   10
+#define CP0VPEC1_NCP1   0
     target_ulong CP0_YQMask;
     target_ulong CP0_VPESchedule;
     target_ulong CP0_VPEScheFBack;
     int32_t CP0_VPEOpt;
-#define CP0VPEOpt_IWX7	15
-#define CP0VPEOpt_IWX6	14
-#define CP0VPEOpt_IWX5	13
-#define CP0VPEOpt_IWX4	12
-#define CP0VPEOpt_IWX3	11
-#define CP0VPEOpt_IWX2	10
-#define CP0VPEOpt_IWX1	9
-#define CP0VPEOpt_IWX0	8
-#define CP0VPEOpt_DWX7	7
-#define CP0VPEOpt_DWX6	6
-#define CP0VPEOpt_DWX5	5
-#define CP0VPEOpt_DWX4	4
-#define CP0VPEOpt_DWX3	3
-#define CP0VPEOpt_DWX2	2
-#define CP0VPEOpt_DWX1	1
-#define CP0VPEOpt_DWX0	0
+#define CP0VPEOpt_IWX7  15
+#define CP0VPEOpt_IWX6  14
+#define CP0VPEOpt_IWX5  13
+#define CP0VPEOpt_IWX4  12
+#define CP0VPEOpt_IWX3  11
+#define CP0VPEOpt_IWX2  10
+#define CP0VPEOpt_IWX1  9
+#define CP0VPEOpt_IWX0  8
+#define CP0VPEOpt_DWX7  7
+#define CP0VPEOpt_DWX6  6
+#define CP0VPEOpt_DWX5  5
+#define CP0VPEOpt_DWX4  4
+#define CP0VPEOpt_DWX3  3
+#define CP0VPEOpt_DWX2  2
+#define CP0VPEOpt_DWX1  1
+#define CP0VPEOpt_DWX0  0
 /*
  * CP0 Register 2
  */
@@ -625,33 +639,33 @@ struct CPUMIPSState {
 #define CP0PC_PSN       0     /*  5..0  */
     int32_t CP0_SRSConf0_rw_bitmask;
     int32_t CP0_SRSConf0;
-#define CP0SRSC0_M	31
-#define CP0SRSC0_SRS3	20
-#define CP0SRSC0_SRS2	10
-#define CP0SRSC0_SRS1	0
+#define CP0SRSC0_M      31
+#define CP0SRSC0_SRS3   20
+#define CP0SRSC0_SRS2   10
+#define CP0SRSC0_SRS1   0
     int32_t CP0_SRSConf1_rw_bitmask;
     int32_t CP0_SRSConf1;
-#define CP0SRSC1_M	31
-#define CP0SRSC1_SRS6	20
-#define CP0SRSC1_SRS5	10
-#define CP0SRSC1_SRS4	0
+#define CP0SRSC1_M      31
+#define CP0SRSC1_SRS6   20
+#define CP0SRSC1_SRS5   10
+#define CP0SRSC1_SRS4   0
     int32_t CP0_SRSConf2_rw_bitmask;
     int32_t CP0_SRSConf2;
-#define CP0SRSC2_M	31
-#define CP0SRSC2_SRS9	20
-#define CP0SRSC2_SRS8	10
-#define CP0SRSC2_SRS7	0
+#define CP0SRSC2_M      31
+#define CP0SRSC2_SRS9   20
+#define CP0SRSC2_SRS8   10
+#define CP0SRSC2_SRS7   0
     int32_t CP0_SRSConf3_rw_bitmask;
     int32_t CP0_SRSConf3;
-#define CP0SRSC3_M	31
-#define CP0SRSC3_SRS12	20
-#define CP0SRSC3_SRS11	10
-#define CP0SRSC3_SRS10	0
+#define CP0SRSC3_M      31
+#define CP0SRSC3_SRS12  20
+#define CP0SRSC3_SRS11  10
+#define CP0SRSC3_SRS10  0
     int32_t CP0_SRSConf4_rw_bitmask;
     int32_t CP0_SRSConf4;
-#define CP0SRSC4_SRS15	20
-#define CP0SRSC4_SRS14	10
-#define CP0SRSC4_SRS13	0
+#define CP0SRSC4_SRS15  20
+#define CP0SRSC4_SRS14  10
+#define CP0SRSC4_SRS13  0
 /*
  * CP0 Register 7
  */
@@ -963,9 +977,11 @@ struct CPUMIPSState {
     /* TMASK defines different execution modes */
 #define MIPS_HFLAG_TMASK  0x1F5807FF
 #define MIPS_HFLAG_MODE   0x00007 /* execution modes                    */
-    /* The KSU flags must be the lowest bits in hflags. The flag order
-       must be the same as defined for CP0 Status. This allows to use
-       the bits as the value of mmu_idx. */
+    /*
+     * The KSU flags must be the lowest bits in hflags. The flag order
+     * must be the same as defined for CP0 Status. This allows to use
+     * the bits as the value of mmu_idx.
+     */
 #define MIPS_HFLAG_KSU    0x00003 /* kernel/supervisor/user mode mask   */
 #define MIPS_HFLAG_UM     0x00002 /* user mode flag                     */
 #define MIPS_HFLAG_SM     0x00001 /* supervisor mode flag               */
@@ -975,18 +991,22 @@ struct CPUMIPSState {
 #define MIPS_HFLAG_CP0    0x00010 /* CP0 enabled                        */
 #define MIPS_HFLAG_FPU    0x00020 /* FPU enabled                        */
 #define MIPS_HFLAG_F64    0x00040 /* 64-bit FPU enabled                 */
-    /* True if the MIPS IV COP1X instructions can be used.  This also
-       controls the non-COP1X instructions RECIP.S, RECIP.D, RSQRT.S
-       and RSQRT.D.  */
+    /*
+     * True if the MIPS IV COP1X instructions can be used.  This also
+     * controls the non-COP1X instructions RECIP.S, RECIP.D, RSQRT.S
+     * and RSQRT.D.
+     */
 #define MIPS_HFLAG_COP1X  0x00080 /* COP1X instructions enabled         */
 #define MIPS_HFLAG_RE     0x00100 /* Reversed endianness                */
 #define MIPS_HFLAG_AWRAP  0x00200 /* 32-bit compatibility address wrapping */
 #define MIPS_HFLAG_M16    0x00400 /* MIPS16 mode flag                   */
 #define MIPS_HFLAG_M16_SHIFT 10
-    /* If translation is interrupted between the branch instruction and
+    /*
+     * If translation is interrupted between the branch instruction and
      * the delay slot, record what type of branch it is so that we can
      * resume translation properly.  It might be possible to reduce
-     * this from three bits to two.  */
+     * this from three bits to two.
+     */
 #define MIPS_HFLAG_BMASK_BASE  0x803800
 #define MIPS_HFLAG_B      0x00800 /* Unconditional branch               */
 #define MIPS_HFLAG_BC     0x01000 /* Conditional branch                 */
@@ -1073,8 +1093,10 @@ void mips_cpu_list(void);
 extern void cpu_wrdsp(uint32_t rs, uint32_t mask_num, CPUMIPSState *env);
 extern uint32_t cpu_rddsp(uint32_t mask_num, CPUMIPSState *env);
 
-/* MMU modes definitions. We carefully match the indices with our
-   hflags layout. */
+/*
+ * MMU modes definitions. We carefully match the indices with our
+ * hflags layout.
+ */
 #define MMU_MODE0_SUFFIX _kernel
 #define MMU_MODE1_SUFFIX _super
 #define MMU_MODE2_SUFFIX _user
@@ -1090,14 +1112,15 @@ static inline int hflags_mmu_index(uint32_t hflags)
     }
 }
 
-static inline int cpu_mmu_index (CPUMIPSState *env, bool ifetch)
+static inline int cpu_mmu_index(CPUMIPSState *env, bool ifetch)
 {
     return hflags_mmu_index(env->hflags);
 }
 
 #include "exec/cpu-all.h"
 
-/* Memory access type :
+/*
+ * Memory access type :
  * may be needed for precise access rights control and precise exceptions.
  */
 enum {
@@ -1182,7 +1205,7 @@ void cpu_mips_soft_irq(CPUMIPSState *env, int irq, int level);
 void itc_reconfigure(struct MIPSITUState *tag);
 
 /* helper.c */
-target_ulong exception_resume_pc (CPUMIPSState *env);
+target_ulong exception_resume_pc(CPUMIPSState *env);
 
 static inline void restore_snan_bit_mode(CPUMIPSState *env)
 {
diff --git a/target/mips/dsp_helper.c b/target/mips/dsp_helper.c
index 739b69dd45..8c58eeb0bf 100644
--- a/target/mips/dsp_helper.c
+++ b/target/mips/dsp_helper.c
@@ -22,8 +22,10 @@
 #include "exec/helper-proto.h"
 #include "qemu/bitops.h"
 
-/* As the byte ordering doesn't matter, i.e. all columns are treated
-   identically, these unions can be used directly.  */
+/*
+ * As the byte ordering doesn't matter, i.e. all columns are treated
+ * identically, these unions can be used directly.
+ */
 typedef union {
     uint8_t  ub[4];
     int8_t   sb[4];
@@ -1445,9 +1447,15 @@ target_ulong helper_precr_ob_qh(target_ulong rs, target_ulong rt)
     return temp;
 }
 
-#define PRECR_QH_PW(name, var) \
-target_ulong helper_precr_##name##_qh_pw(target_ulong rs, target_ulong rt, \
-                                    uint32_t sa)                      \
+
+/*
+ * In case sa == 0, use rt2, rt0, rs2, rs0.
+ * In case sa != 0, use rt3, rt1, rs3, rs1.
+ */
+#define PRECR_QH_PW(name, var)                                        \
+target_ulong helper_precr_##name##_qh_pw(target_ulong rs,             \
+                                         target_ulong rt,             \
+                                         uint32_t sa)                 \
 {                                                                     \
     uint16_t rs3, rs2, rs1, rs0;                                      \
     uint16_t rt3, rt2, rt1, rt0;                                      \
@@ -1456,8 +1464,6 @@ target_ulong helper_precr_##name##_qh_pw(target_ulong rs, target_ulong rt, \
     MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);                       \
     MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);                       \
                                                                       \
-    /* When sa = 0, we use rt2, rt0, rs2, rs0;                        \
-     * when sa != 0, we use rt3, rt1, rs3, rs1. */                    \
     if (sa == 0) {                                                    \
         tempD = rt2 << var;                                           \
         tempC = rt0 << var;                                           \
@@ -1965,7 +1971,8 @@ SHIFT_PH(shra_r, rnd16_rashift);
 #undef SHIFT_PH
 
 /** DSP Multiply Sub-class insns **/
-/* Return value made up by two 16bits value.
+/*
+ * Return value made up by two 16bits value.
  * FIXME give the macro a better name.
  */
 #define MUL_RETURN32_16_PH(name, func, \
@@ -3274,11 +3281,15 @@ target_ulong helper_dextr_l(target_ulong ac, target_ulong shift,
                             CPUMIPSState *env)
 {
     uint64_t temp[3];
+    target_ulong ret;
 
     shift = shift & 0x3F;
 
     mipsdsp_rndrashift_acc(temp, ac, shift, env);
-    return (temp[1] << 63) | (temp[0] >> 1);
+
+    ret = (temp[1] << 63) | (temp[0] >> 1);
+
+    return ret;
 }
 
 target_ulong helper_dextr_r_l(target_ulong ac, target_ulong shift,
@@ -3286,6 +3297,7 @@ target_ulong helper_dextr_r_l(target_ulong ac, target_ulong shift,
 {
     uint64_t temp[3];
     uint32_t temp128;
+    target_ulong ret;
 
     shift = shift & 0x3F;
     mipsdsp_rndrashift_acc(temp, ac, shift, env);
@@ -3305,7 +3317,9 @@ target_ulong helper_dextr_r_l(target_ulong ac, target_ulong shift,
         set_DSPControl_overflow_flag(1, 23, env);
     }
 
-    return (temp[1] << 63) | (temp[0] >> 1);
+    ret = (temp[1] << 63) | (temp[0] >> 1);
+
+    return ret;
 }
 
 target_ulong helper_dextr_rs_l(target_ulong ac, target_ulong shift,
@@ -3313,6 +3327,7 @@ target_ulong helper_dextr_rs_l(target_ulong ac, target_ulong shift,
 {
     uint64_t temp[3];
     uint32_t temp128;
+    target_ulong ret;
 
     shift = shift & 0x3F;
     mipsdsp_rndrashift_acc(temp, ac, shift, env);
@@ -3338,7 +3353,10 @@ target_ulong helper_dextr_rs_l(target_ulong ac, target_ulong shift,
         }
         set_DSPControl_overflow_flag(1, 23, env);
     }
-    return (temp[1] << 63) | (temp[0] >> 1);
+
+    ret = (temp[1] << 63) | (temp[0] >> 1);
+
+    return ret;
 }
 #endif
 
diff --git a/target/mips/helper.c b/target/mips/helper.c
index 9799f2ede1..68e44df4da 100644
--- a/target/mips/helper.c
+++ b/target/mips/helper.c
@@ -43,7 +43,7 @@ int no_mmu_map_address (CPUMIPSState *env, hwaddr *physical, int *prot,
                         target_ulong address, int rw, int access_type)
 {
     *physical = address;
-    *prot = PAGE_READ | PAGE_WRITE;
+    *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
     return TLBRET_MATCH;
 }
 
@@ -61,7 +61,7 @@ int fixed_mmu_map_address (CPUMIPSState *env, hwaddr *physical, int *prot,
     else
         *physical = address;
 
-    *prot = PAGE_READ | PAGE_WRITE;
+    *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
     return TLBRET_MATCH;
 }
 
@@ -101,6 +101,9 @@ int r4k_map_address (CPUMIPSState *env, hwaddr *physical, int *prot,
                 *prot = PAGE_READ;
                 if (n ? tlb->D1 : tlb->D0)
                     *prot |= PAGE_WRITE;
+                if (!(n ? tlb->XI1 : tlb->XI0)) {
+                    *prot |= PAGE_EXEC;
+                }
                 return TLBRET_MATCH;
             }
             return TLBRET_DIRTY;
@@ -182,7 +185,7 @@ static int get_seg_physical_address(CPUMIPSState *env, hwaddr *physical,
     } else {
         /* The segment is unmapped */
         *physical = physical_base | (real_address & segmask);
-        *prot = PAGE_READ | PAGE_WRITE;
+        *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
         return TLBRET_MATCH;
     }
 }
@@ -907,7 +910,7 @@ bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     }
     if (ret == TLBRET_MATCH) {
         tlb_set_page(cs, address & TARGET_PAGE_MASK,
-                     physical & TARGET_PAGE_MASK, prot | PAGE_EXEC,
+                     physical & TARGET_PAGE_MASK, prot,
                      mmu_idx, TARGET_PAGE_SIZE);
         return true;
     }
@@ -927,7 +930,7 @@ bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                                        access_type, mips_access_type, mmu_idx);
             if (ret == TLBRET_MATCH) {
                 tlb_set_page(cs, address & TARGET_PAGE_MASK,
-                             physical & TARGET_PAGE_MASK, prot | PAGE_EXEC,
+                             physical & TARGET_PAGE_MASK, prot,
                              mmu_idx, TARGET_PAGE_SIZE);
                 return true;
             }
diff --git a/target/mips/helper.h b/target/mips/helper.h
index a6d687e340..51f0e1c183 100644
--- a/target/mips/helper.h
+++ b/target/mips/helper.h
@@ -2,7 +2,9 @@ DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int)
 DEF_HELPER_2(raise_exception, noreturn, env, i32)
 DEF_HELPER_1(raise_exception_debug, noreturn, env)
 
+#ifndef CONFIG_USER_ONLY
 DEF_HELPER_1(do_semihosting, void, env)
+#endif
 
 #ifdef TARGET_MIPS64
 DEF_HELPER_4(sdl, void, env, tl, tl, int)
@@ -876,9 +878,7 @@ DEF_HELPER_5(msa_hsub_u_df, void, env, i32, i32, i32, i32)
 
 DEF_HELPER_5(msa_sldi_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_splati_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_copy_s_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_copy_u_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_insert_df, void, env, i32, i32, i32, i32)
+
 DEF_HELPER_5(msa_insve_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_3(msa_ctcmsa, void, env, tl, i32)
 DEF_HELPER_2(msa_cfcmsa, tl, env, i32)
@@ -938,6 +938,18 @@ DEF_HELPER_4(msa_pcnt_df, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_nloc_df, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_nlzc_df, void, env, i32, i32, i32)
 
+DEF_HELPER_4(msa_copy_s_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_copy_s_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_copy_s_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_copy_s_d, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_copy_u_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_copy_u_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_copy_u_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_insert_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_insert_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_insert_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_insert_d, void, env, i32, i32, i32)
+
 DEF_HELPER_4(msa_fclass_df, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ftrunc_s_df, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ftrunc_u_df, void, env, i32, i32, i32)
diff --git a/target/mips/lmi_helper.c b/target/mips/lmi_helper.c
index fb1245b39d..6c645cf679 100644
--- a/target/mips/lmi_helper.c
+++ b/target/mips/lmi_helper.c
@@ -21,9 +21,11 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 
-/* If the byte ordering doesn't matter, i.e. all columns are treated
-   identically, then this union can be used directly.  If byte ordering
-   does matter, we generally ignore dumping to memory.  */
+/*
+ * If the byte ordering doesn't matter, i.e. all columns are treated
+ * identically, then this union can be used directly.  If byte ordering
+ * does matter, we generally ignore dumping to memory.
+ */
 typedef union {
     uint8_t  ub[8];
     int8_t   sb[8];
diff --git a/target/mips/mips-semi.c b/target/mips/mips-semi.c
index a7aefbaefc..35bdfd7c77 100644
--- a/target/mips/mips-semi.c
+++ b/target/mips/mips-semi.c
@@ -22,7 +22,8 @@
 #include "qemu/log.h"
 #include "exec/helper-proto.h"
 #include "exec/softmmu-semi.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
+#include "hw/semihosting/console.h"
 
 typedef enum UHIOp {
     UHI_exit = 1,
@@ -329,13 +330,12 @@ void helper_do_semihosting(CPUMIPSState *env)
         p2 = strstr(p, "%d");
         if (p2) {
             int char_num = p2 - p;
-            char *buf = g_malloc(char_num + 1);
-            strncpy(buf, p, char_num);
-            buf[char_num] = '\0';
-            gpr[2] = printf("%s%d%s", buf, (int)gpr[5], p2 + 2);
-            g_free(buf);
+            GString *s = g_string_new_len(p, char_num);
+            g_string_append_printf(s, "%d%s", (int)gpr[5], p2 + 2);
+            gpr[2] = qemu_semihosting_log_out(s->str, s->len);
+            g_string_free(s, true);
         } else {
-            gpr[2] = printf("%s", p);
+            gpr[2] = qemu_semihosting_log_out(p, strlen(p));
         }
         FREE_TARGET_STRING(p, gpr[4]);
         break;
diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
index c74e3cdc65..be059a3d6f 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/msa_helper.c
@@ -42,6 +42,467 @@
 /* Element-by-element access macros */
 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
 
+
+
+/*
+ * Bit Count
+ * ---------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | NLOC.B        | Vector Leading Ones Count (byte)                         |
+ * | NLOC.H        | Vector Leading Ones Count (halfword)                     |
+ * | NLOC.W        | Vector Leading Ones Count (word)                         |
+ * | NLOC.D        | Vector Leading Ones Count (doubleword)                   |
+ * | NLZC.B        | Vector Leading Zeros Count (byte)                        |
+ * | NLZC.H        | Vector Leading Zeros Count (halfword)                    |
+ * | NLZC.W        | Vector Leading Zeros Count (word)                        |
+ * | NLZC.D        | Vector Leading Zeros Count (doubleword)                  |
+ * | PCNT.B        | Vector Population Count (byte)                           |
+ * | PCNT.H        | Vector Population Count (halfword)                       |
+ * | PCNT.W        | Vector Population Count (word)                           |
+ * | PCNT.D        | Vector Population Count (doubleword)                     |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Bit Count group helpers here */
+
+
+/*
+ * Bit Move
+ * --------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | BMNZ.V        | Vector Bit Move If Not Zero                              |
+ * | BMZ.V         | Vector Bit Move If Zero                                  |
+ * | BSEL.V        | Vector Bit Select                                        |
+ * | BINSL.B       | Vector Bit Insert Left (byte)                            |
+ * | BINSL.H       | Vector Bit Insert Left (halfword)                        |
+ * | BINSL.W       | Vector Bit Insert Left (word)                            |
+ * | BINSL.D       | Vector Bit Insert Left (doubleword)                      |
+ * | BINSR.B       | Vector Bit Insert Right (byte)                           |
+ * | BINSR.H       | Vector Bit Insert Right (halfword)                       |
+ * | BINSR.W       | Vector Bit Insert Right (word)                           |
+ * | BINSR.D       | Vector Bit Insert Right (doubleword)                     |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Bit Move group helpers here */
+
+
+/*
+ * Bit Set
+ * -------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | BCLR.B        | Vector Bit Clear (byte)                                  |
+ * | BCLR.H        | Vector Bit Clear (halfword)                              |
+ * | BCLR.W        | Vector Bit Clear (word)                                  |
+ * | BCLR.D        | Vector Bit Clear (doubleword)                            |
+ * | BNEG.B        | Vector Bit Negate (byte)                                 |
+ * | BNEG.H        | Vector Bit Negate (halfword)                             |
+ * | BNEG.W        | Vector Bit Negate (word)                                 |
+ * | BNEG.D        | Vector Bit Negate (doubleword)                           |
+ * | BSET.B        | Vector Bit Set (byte)                                    |
+ * | BSET.H        | Vector Bit Set (halfword)                                |
+ * | BSET.W        | Vector Bit Set (word)                                    |
+ * | BSET.D        | Vector Bit Set (doubleword)                              |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Bit Set group helpers here */
+
+
+/*
+ * Fixed Multiply
+ * --------------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | MADD_Q.H      | Vector Fixed-Point Multiply and Add (halfword)           |
+ * | MADD_Q.W      | Vector Fixed-Point Multiply and Add (word)               |
+ * | MADDR_Q.H     | Vector Fixed-Point Multiply and Add Rounded (halfword)   |
+ * | MADDR_Q.W     | Vector Fixed-Point Multiply and Add Rounded (word)       |
+ * | MSUB_Q.H      | Vector Fixed-Point Multiply and Subtr. (halfword)        |
+ * | MSUB_Q.W      | Vector Fixed-Point Multiply and Subtr. (word)            |
+ * | MSUBR_Q.H     | Vector Fixed-Point Multiply and Subtr. Rounded (halfword)|
+ * | MSUBR_Q.W     | Vector Fixed-Point Multiply and Subtr. Rounded (word)    |
+ * | MUL_Q.H       | Vector Fixed-Point Multiply (halfword)                   |
+ * | MUL_Q.W       | Vector Fixed-Point Multiply (word)                       |
+ * | MULR_Q.H      | Vector Fixed-Point Multiply Rounded (halfword)           |
+ * | MULR_Q.W      | Vector Fixed-Point Multiply Rounded (word)               |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Fixed Multiply group helpers here */
+
+
+/*
+ * Float Max Min
+ * -------------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | FMAX_A.W      | Vector Floating-Point Maximum (Absolute) (word)          |
+ * | FMAX_A.D      | Vector Floating-Point Maximum (Absolute) (doubleword)    |
+ * | FMAX.W        | Vector Floating-Point Maximum (word)                     |
+ * | FMAX.D        | Vector Floating-Point Maximum (doubleword)               |
+ * | FMIN_A.W      | Vector Floating-Point Minimum (Absolute) (word)          |
+ * | FMIN_A.D      | Vector Floating-Point Minimum (Absolute) (doubleword)    |
+ * | FMIN.W        | Vector Floating-Point Minimum (word)                     |
+ * | FMIN.D        | Vector Floating-Point Minimum (doubleword)               |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Float Max Min group helpers here */
+
+
+/*
+ * Int Add
+ * -------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | ADD_A.B       | Vector Add Absolute Values (byte)                        |
+ * | ADD_A.H       | Vector Add Absolute Values (halfword)                    |
+ * | ADD_A.W       | Vector Add Absolute Values (word)                        |
+ * | ADD_A.D       | Vector Add Absolute Values (doubleword)                  |
+ * | ADDS_A.B      | Vector Signed Saturated Add (of Absolute) (byte)         |
+ * | ADDS_A.H      | Vector Signed Saturated Add (of Absolute) (halfword)     |
+ * | ADDS_A.W      | Vector Signed Saturated Add (of Absolute) (word)         |
+ * | ADDS_A.D      | Vector Signed Saturated Add (of Absolute) (doubleword)   |
+ * | ADDS_S.B      | Vector Signed Saturated Add (of Signed) (byte)           |
+ * | ADDS_S.H      | Vector Signed Saturated Add (of Signed) (halfword)       |
+ * | ADDS_S.W      | Vector Signed Saturated Add (of Signed) (word)           |
+ * | ADDS_S.D      | Vector Signed Saturated Add (of Signed) (doubleword)     |
+ * | ADDS_U.B      | Vector Unsigned Saturated Add (of Unsigned) (byte)       |
+ * | ADDS_U.H      | Vector Unsigned Saturated Add (of Unsigned) (halfword)   |
+ * | ADDS_U.W      | Vector Unsigned Saturated Add (of Unsigned) (word)       |
+ * | ADDS_U.D      | Vector Unsigned Saturated Add (of Unsigned) (doubleword) |
+ * | ADDV.B        | Vector Add (byte)                                        |
+ * | ADDV.H        | Vector Add (halfword)                                    |
+ * | ADDV.W        | Vector Add (word)                                        |
+ * | ADDV.D        | Vector Add (doubleword)                                  |
+ * | HSUB_S.H      | Vector Signed Horizontal Add (halfword)                  |
+ * | HSUB_S.W      | Vector Signed Horizontal Add (word)                      |
+ * | HSUB_S.D      | Vector Signed Horizontal Add (doubleword)                |
+ * | HSUB_U.H      | Vector Unigned Horizontal Add (halfword)                 |
+ * | HSUB_U.W      | Vector Unigned Horizontal Add (word)                     |
+ * | HSUB_U.D      | Vector Unigned Horizontal Add (doubleword)               |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Int Add group helpers here */
+
+
+/*
+ * Int Average
+ * -----------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | AVE_S.B       | Vector Signed Average (byte)                             |
+ * | AVE_S.H       | Vector Signed Average (halfword)                         |
+ * | AVE_S.W       | Vector Signed Average (word)                             |
+ * | AVE_S.D       | Vector Signed Average (doubleword)                       |
+ * | AVE_U.B       | Vector Unsigned Average (byte)                           |
+ * | AVE_U.H       | Vector Unsigned Average (halfword)                       |
+ * | AVE_U.W       | Vector Unsigned Average (word)                           |
+ * | AVE_U.D       | Vector Unsigned Average (doubleword)                     |
+ * | AVER_S.B      | Vector Signed Average Rounded (byte)                     |
+ * | AVER_S.H      | Vector Signed Average Rounded (halfword)                 |
+ * | AVER_S.W      | Vector Signed Average Rounded (word)                     |
+ * | AVER_S.D      | Vector Signed Average Rounded (doubleword)               |
+ * | AVER_U.B      | Vector Unsigned Average Rounded (byte)                   |
+ * | AVER_U.H      | Vector Unsigned Average Rounded (halfword)               |
+ * | AVER_U.W      | Vector Unsigned Average Rounded (word)                   |
+ * | AVER_U.D      | Vector Unsigned Average Rounded (doubleword)             |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Int Average group helpers here */
+
+
+/*
+ * Int Compare
+ * -----------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | CEQ.B         | Vector Compare Equal (byte)                              |
+ * | CEQ.H         | Vector Compare Equal (halfword)                          |
+ * | CEQ.W         | Vector Compare Equal (word)                              |
+ * | CEQ.D         | Vector Compare Equal (doubleword)                        |
+ * | CLE_S.B       | Vector Compare Signed Less Than or Equal (byte)          |
+ * | CLE_S.H       | Vector Compare Signed Less Than or Equal (halfword)      |
+ * | CLE_S.W       | Vector Compare Signed Less Than or Equal (word)          |
+ * | CLE_S.D       | Vector Compare Signed Less Than or Equal (doubleword)    |
+ * | CLE_U.B       | Vector Compare Unsigned Less Than or Equal (byte)        |
+ * | CLE_U.H       | Vector Compare Unsigned Less Than or Equal (halfword)    |
+ * | CLE_U.W       | Vector Compare Unsigned Less Than or Equal (word)        |
+ * | CLE_U.D       | Vector Compare Unsigned Less Than or Equal (doubleword)  |
+ * | CLT_S.B       | Vector Compare Signed Less Than (byte)                   |
+ * | CLT_S.H       | Vector Compare Signed Less Than (halfword)               |
+ * | CLT_S.W       | Vector Compare Signed Less Than (word)                   |
+ * | CLT_S.D       | Vector Compare Signed Less Than (doubleword)             |
+ * | CLT_U.B       | Vector Compare Unsigned Less Than (byte)                 |
+ * | CLT_U.H       | Vector Compare Unsigned Less Than (halfword)             |
+ * | CLT_U.W       | Vector Compare Unsigned Less Than (word)                 |
+ * | CLT_U.D       | Vector Compare Unsigned Less Than (doubleword)           |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Int Compare group helpers here */
+
+
+/*
+ * Int Divide
+ * ----------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | DIV_S.B       | Vector Signed Divide (byte)                              |
+ * | DIV_S.H       | Vector Signed Divide (halfword)                          |
+ * | DIV_S.W       | Vector Signed Divide (word)                              |
+ * | DIV_S.D       | Vector Signed Divide (doubleword)                        |
+ * | DIV_U.B       | Vector Unsigned Divide (byte)                            |
+ * | DIV_U.H       | Vector Unsigned Divide (halfword)                        |
+ * | DIV_U.W       | Vector Unsigned Divide (word)                            |
+ * | DIV_U.D       | Vector Unsigned Divide (doubleword)                      |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Int Divide group helpers here */
+
+
+/*
+ * Int Dot Product
+ * ---------------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | DOTP_S.H      | Vector Signed Dot Product (halfword)                     |
+ * | DOTP_S.W      | Vector Signed Dot Product (word)                         |
+ * | DOTP_S.D      | Vector Signed Dot Product (doubleword)                   |
+ * | DOTP_U.H      | Vector Unsigned Dot Product (halfword)                   |
+ * | DOTP_U.W      | Vector Unsigned Dot Product (word)                       |
+ * | DOTP_U.D      | Vector Unsigned Dot Product (doubleword)                 |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Int Dot Product group helpers here */
+
+
+/*
+ * Int Max Min
+ * -----------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | MAX_A.B       | Vector Maximum Based on Absolute Value (byte)            |
+ * | MAX_A.H       | Vector Maximum Based on Absolute Value (halfword)        |
+ * | MAX_A.W       | Vector Maximum Based on Absolute Value (word)            |
+ * | MAX_A.D       | Vector Maximum Based on Absolute Value (doubleword)      |
+ * | MAX_S.B       | Vector Signed Maximum (byte)                             |
+ * | MAX_S.H       | Vector Signed Maximum (halfword)                         |
+ * | MAX_S.W       | Vector Signed Maximum (word)                             |
+ * | MAX_S.D       | Vector Signed Maximum (doubleword)                       |
+ * | MAX_U.B       | Vector Unsigned Maximum (byte)                           |
+ * | MAX_U.H       | Vector Unsigned Maximum (halfword)                       |
+ * | MAX_U.W       | Vector Unsigned Maximum (word)                           |
+ * | MAX_U.D       | Vector Unsigned Maximum (doubleword)                     |
+ * | MIN_A.B       | Vector Minimum Based on Absolute Value (byte)            |
+ * | MIN_A.H       | Vector Minimum Based on Absolute Value (halfword)        |
+ * | MIN_A.W       | Vector Minimum Based on Absolute Value (word)            |
+ * | MIN_A.D       | Vector Minimum Based on Absolute Value (doubleword)      |
+ * | MIN_S.B       | Vector Signed Minimum (byte)                             |
+ * | MIN_S.H       | Vector Signed Minimum (halfword)                         |
+ * | MIN_S.W       | Vector Signed Minimum (word)                             |
+ * | MIN_S.D       | Vector Signed Minimum (doubleword)                       |
+ * | MIN_U.B       | Vector Unsigned Minimum (byte)                           |
+ * | MIN_U.H       | Vector Unsigned Minimum (halfword)                       |
+ * | MIN_U.W       | Vector Unsigned Minimum (word)                           |
+ * | MIN_U.D       | Vector Unsigned Minimum (doubleword)                     |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Int Max Min group helpers here */
+
+
+/*
+ * Int Modulo
+ * ----------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | MOD_S.B       | Vector Signed Modulo (byte)                              |
+ * | MOD_S.H       | Vector Signed Modulo (halfword)                          |
+ * | MOD_S.W       | Vector Signed Modulo (word)                              |
+ * | MOD_S.D       | Vector Signed Modulo (doubleword)                        |
+ * | MOD_U.B       | Vector Unsigned Modulo (byte)                            |
+ * | MOD_U.H       | Vector Unsigned Modulo (halfword)                        |
+ * | MOD_U.W       | Vector Unsigned Modulo (word)                            |
+ * | MOD_U.D       | Vector Unsigned Modulo (doubleword)                      |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Int Modulo group helpers here */
+
+
+/*
+ * Int Multiply
+ * ------------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | MADDV.B       | Vector Multiply and Add (byte)                           |
+ * | MADDV.H       | Vector Multiply and Add (halfword)                       |
+ * | MADDV.W       | Vector Multiply and Add (word)                           |
+ * | MADDV.D       | Vector Multiply and Add (doubleword)                     |
+ * | MSUBV.B       | Vector Multiply and Subtract (byte)                      |
+ * | MSUBV.H       | Vector Multiply and Subtract (halfword)                  |
+ * | MSUBV.W       | Vector Multiply and Subtract (word)                      |
+ * | MSUBV.D       | Vector Multiply and Subtract (doubleword)                |
+ * | MULV.B        | Vector Multiply (byte)                                   |
+ * | MULV.H        | Vector Multiply (halfword)                               |
+ * | MULV.W        | Vector Multiply (word)                                   |
+ * | MULV.D        | Vector Multiply (doubleword)                             |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Int Multiply group helpers here */
+
+
+/*
+ * Int Subtract
+ * ------------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
+ * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
+ * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
+ * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
+ * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
+ * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
+ * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
+ * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
+ * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
+ * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
+ * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
+ * | HSUB_U.H      | Vector Unigned Horizontal Subtract (halfword)            |
+ * | HSUB_U.W      | Vector Unigned Horizontal Subtract (word)                |
+ * | HSUB_U.D      | Vector Unigned Horizontal Subtract (doubleword)          |
+ * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
+ * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
+ * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
+ * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
+ * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
+ * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
+ * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
+ * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
+ * | SUBSUS_S.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
+ * | SUBSUS_S.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
+ * | SUBSUS_S.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
+ * | SUBSUS_S.D    | Vector Uns. Sat. Subtract (of S. from Uns.) (doubleword) |
+ * | SUBSUU_U.B    | Vector Signed Saturated Subtract (of Uns.) (byte)        |
+ * | SUBSUU_U.H    | Vector Signed Saturated Subtract (of Uns.) (halfword)    |
+ * | SUBSUU_U.W    | Vector Signed Saturated Subtract (of Uns.) (word)        |
+ * | SUBSUU_U.D    | Vector Signed Saturated Subtract (of Uns.) (doubleword)  |
+ * | SUBV.B        | Vector Subtract (byte)                                   |
+ * | SUBV.H        | Vector Subtract (halfword)                               |
+ * | SUBV.W        | Vector Subtract (word)                                   |
+ * | SUBV.D        | Vector Subtract (doubleword)                             |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Int Subtract group helpers here */
+
+
+/*
+ * Interleave
+ * ----------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | ILVEV.B       | Vector Interleave Even (byte)                            |
+ * | ILVEV.H       | Vector Interleave Even (halfword)                        |
+ * | ILVEV.W       | Vector Interleave Even (word)                            |
+ * | ILVEV.D       | Vector Interleave Even (doubleword)                      |
+ * | ILVOD.B       | Vector Interleave Odd (byte)                             |
+ * | ILVOD.H       | Vector Interleave Odd (halfword)                         |
+ * | ILVOD.W       | Vector Interleave Odd (word)                             |
+ * | ILVOD.D       | Vector Interleave Odd (doubleword)                       |
+ * | ILVL.B        | Vector Interleave Left (byte)                            |
+ * | ILVL.H        | Vector Interleave Left (halfword)                        |
+ * | ILVL.W        | Vector Interleave Left (word)                            |
+ * | ILVL.D        | Vector Interleave Left (doubleword)                      |
+ * | ILVR.B        | Vector Interleave Right (byte)                           |
+ * | ILVR.H        | Vector Interleave Right (halfword)                       |
+ * | ILVR.W        | Vector Interleave Right (word)                           |
+ * | ILVR.D        | Vector Interleave Right (doubleword)                     |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Interleave group helpers here */
+
+
+/*
+ * Logic
+ * -----
+ *
+ * +---------------+----------------------------------------------------------+
+ * | AND.V         | Vector Logical And                                       |
+ * | NOR.V         | Vector Logical Negated Or                                |
+ * | OR.V          | Vector Logical Or                                        |
+ * | XOR.V         | Vector Logical Exclusive Or                              |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Logic group helpers here */
+
+
+/*
+ * Pack
+ * ----
+ *
+ * +---------------+----------------------------------------------------------+
+ * | PCKEV.B       | Vector Pack Even (byte)                                  |
+ * | PCKEV.H       | Vector Pack Even (halfword)                              |
+ * | PCKEV.W       | Vector Pack Even (word)                                  |
+ * | PCKEV.D       | Vector Pack Even (doubleword)                            |
+ * | PCKOD.B       | Vector Pack Odd (byte)                                   |
+ * | PCKOD.H       | Vector Pack Odd (halfword)                               |
+ * | PCKOD.W       | Vector Pack Odd (word)                                   |
+ * | PCKOD.D       | Vector Pack Odd (doubleword)                             |
+ * | VSHF.B        | Vector Data Preserving Shuffle (byte)                    |
+ * | VSHF.H        | Vector Data Preserving Shuffle (halfword)                |
+ * | VSHF.W        | Vector Data Preserving Shuffle (word)                    |
+ * | VSHF.D        | Vector Data Preserving Shuffle (doubleword)              |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Pack group helpers here */
+
+
+/*
+ * Shift
+ * -----
+ *
+ * +---------------+----------------------------------------------------------+
+ * | SLL.B         | Vector Shift Left (byte)                                 |
+ * | SLL.H         | Vector Shift Left (halfword)                             |
+ * | SLL.W         | Vector Shift Left (word)                                 |
+ * | SLL.D         | Vector Shift Left (doubleword)                           |
+ * | SRA.B         | Vector Shift Right Arithmetic (byte)                     |
+ * | SRA.H         | Vector Shift Right Arithmetic (halfword)                 |
+ * | SRA.W         | Vector Shift Right Arithmetic (word)                     |
+ * | SRA.D         | Vector Shift Right Arithmetic (doubleword)               |
+ * | SRAR.B        | Vector Shift Right Arithmetic Rounded (byte)             |
+ * | SRAR.H        | Vector Shift Right Arithmetic Rounded (halfword)         |
+ * | SRAR.W        | Vector Shift Right Arithmetic Rounded (word)             |
+ * | SRAR.D        | Vector Shift Right Arithmetic Rounded (doubleword)       |
+ * | SRL.B         | Vector Shift Right Logical (byte)                        |
+ * | SRL.H         | Vector Shift Right Logical (halfword)                    |
+ * | SRL.W         | Vector Shift Right Logical (word)                        |
+ * | SRL.D         | Vector Shift Right Logical (doubleword)                  |
+ * | SRLR.B        | Vector Shift Right Logical Rounded (byte)                |
+ * | SRLR.H        | Vector Shift Right Logical Rounded (halfword)            |
+ * | SRLR.W        | Vector Shift Right Logical Rounded (word)                |
+ * | SRLR.D        | Vector Shift Right Logical Rounded (doubleword)          |
+ * +---------------+----------------------------------------------------------+
+ */
+
+/* TODO: insert Shift group helpers here */
+
+
 static inline void msa_move_v(wr_t *pwd, wr_t *pws)
 {
     uint32_t i;
@@ -130,10 +591,6 @@ void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
     }                                                                   \
 }
 
-MSA_FN_VECTOR(and_v, pwd->d[i], pws->d[i] & pwt->d[i])
-MSA_FN_VECTOR(or_v, pwd->d[i], pws->d[i] | pwt->d[i])
-MSA_FN_VECTOR(nor_v, pwd->d[i], ~(pws->d[i] | pwt->d[i]))
-MSA_FN_VECTOR(xor_v, pwd->d[i], pws->d[i] ^ pwt->d[i])
 MSA_FN_VECTOR(bmnz_v, pwd->d[i],
         BIT_MOVE_IF_NOT_ZERO(pwd->d[i], pws->d[i], pwt->d[i], DF_DOUBLE))
 MSA_FN_VECTOR(bmz_v, pwd->d[i],
@@ -145,6 +602,46 @@ MSA_FN_VECTOR(bsel_v, pwd->d[i],
 #undef BIT_SELECT
 #undef MSA_FN_VECTOR
 
+void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0] = pws->d[0] & pwt->d[0];
+    pwd->d[1] = pws->d[1] & pwt->d[1];
+}
+
+void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0] = pws->d[0] | pwt->d[0];
+    pwd->d[1] = pws->d[1] | pwt->d[1];
+}
+
+void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0] = ~(pws->d[0] | pwt->d[0]);
+    pwd->d[1] = ~(pws->d[1] | pwt->d[1]);
+}
+
+void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0] = pws->d[0] ^ pwt->d[0];
+    pwd->d[1] = pws->d[1] ^ pwt->d[1];
+}
+
 static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
 {
     return arg1 + arg2;
@@ -360,16 +857,16 @@ static inline int64_t msa_binsr_df(uint32_t df, int64_t dest, int64_t arg1,
 
 static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
 {
-    return arg < M_MIN_INT(m+1) ? M_MIN_INT(m+1) :
-                                  arg > M_MAX_INT(m+1) ? M_MAX_INT(m+1) :
-                                                         arg;
+    return arg < M_MIN_INT(m + 1) ? M_MIN_INT(m + 1) :
+                                    arg > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) :
+                                                             arg;
 }
 
 static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
 {
     uint64_t u_arg = UNSIGNED(arg, df);
-    return  u_arg < M_MAX_UINT(m+1) ? u_arg :
-                                      M_MAX_UINT(m+1);
+    return  u_arg < M_MAX_UINT(m + 1) ? u_arg :
+                                        M_MAX_UINT(m + 1);
 }
 
 static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
@@ -641,14 +1138,15 @@ static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
         return DF_MIN_INT(df);
     }
-    return arg2 ? arg1 / arg2 : 0;
+    return arg2 ? arg1 / arg2
+                : arg1 >= 0 ? -1 : 1;
 }
 
 static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
     uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return u_arg2 ? u_arg1 / u_arg2 : 0;
+    return arg2 ? u_arg1 / u_arg2 : -1;
 }
 
 static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
@@ -656,27 +1154,27 @@ static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
         return 0;
     }
-    return arg2 ? arg1 % arg2 : 0;
+    return arg2 ? arg1 % arg2 : arg1;
 }
 
 static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
     uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return u_arg2 ? u_arg1 % u_arg2 : 0;
+    return u_arg2 ? u_arg1 % u_arg2 : u_arg1;
 }
 
 #define SIGNED_EVEN(a, df) \
-        ((((int64_t)(a)) << (64 - DF_BITS(df)/2)) >> (64 - DF_BITS(df)/2))
+        ((((int64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
 
 #define UNSIGNED_EVEN(a, df) \
-        ((((uint64_t)(a)) << (64 - DF_BITS(df)/2)) >> (64 - DF_BITS(df)/2))
+        ((((uint64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
 
 #define SIGNED_ODD(a, df) \
-        ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)/2))
+        ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
 
 #define UNSIGNED_ODD(a, df) \
-        ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)/2))
+        ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
 
 #define SIGNED_EXTRACT(e, o, a, df)     \
     do {                                \
@@ -804,28 +1302,45 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
-    uint32_t i;                                                         \
                                                                         \
     switch (df) {                                                       \
     case DF_BYTE:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
-            pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], pwt->b[i]);  \
-        }                                                               \
+        pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
+        pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
+        pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
+        pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
+        pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
+        pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
+        pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
+        pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
+        pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
+        pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
+        pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
+        pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
+        pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
+        pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
+        pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
+        pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
         break;                                                          \
     case DF_HALF:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
-            pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], pwt->h[i]);  \
-        }                                                               \
+        pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
+        pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
+        pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
+        pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
+        pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
+        pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
+        pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
+        pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
         break;                                                          \
     case DF_WORD:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
-            pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], pwt->w[i]);  \
-        }                                                               \
+        pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
+        pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
+        pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
+        pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
         break;                                                          \
     case DF_DOUBLE:                                                     \
-        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
-            pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], pwt->d[i]);  \
-        }                                                               \
+        pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
+        pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
         break;                                                          \
     default:                                                            \
         assert(0);                                                      \
@@ -1011,42 +1526,71 @@ static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
 }
 
 #define MSA_TEROP_DF(func) \
-void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,   \
-                          uint32_t ws, uint32_t wt)                     \
-{                                                                       \
-    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
-    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
-    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
-    uint32_t i;                                                         \
-                                                                        \
-    switch (df) {                                                       \
-    case DF_BYTE:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
-            pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
-                                            pwt->b[i]);                 \
-        }                                                               \
-        break;                                                          \
-    case DF_HALF:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
-            pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
-                                            pwt->h[i]);                 \
-        }                                                               \
-        break;                                                          \
-    case DF_WORD:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
-            pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
-                                            pwt->w[i]);                 \
-        }                                                               \
-        break;                                                          \
-    case DF_DOUBLE:                                                     \
-        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
-            pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
-                                            pwt->d[i]);                 \
-        }                                                               \
-        break;                                                          \
-    default:                                                            \
-        assert(0);                                                      \
-    }                                                                   \
+void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
+                                uint32_t ws, uint32_t wt)                     \
+{                                                                             \
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
+                                                                              \
+    switch (df) {                                                             \
+    case DF_BYTE:                                                             \
+        pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
+                                             pwt->b[0]);                      \
+        pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
+                                             pwt->b[1]);                      \
+        pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
+                                             pwt->b[2]);                      \
+        pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
+                                             pwt->b[3]);                      \
+        pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
+                                             pwt->b[4]);                      \
+        pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
+                                             pwt->b[5]);                      \
+        pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
+                                             pwt->b[6]);                      \
+        pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
+                                             pwt->b[7]);                      \
+        pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
+                                             pwt->b[8]);                      \
+        pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
+                                             pwt->b[9]);                      \
+        pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
+                                             pwt->b[10]);                     \
+        pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
+                                             pwt->b[11]);                     \
+        pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
+                                             pwt->b[12]);                     \
+        pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
+                                             pwt->b[13]);                     \
+        pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
+                                             pwt->b[14]);                     \
+        pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
+                                             pwt->b[15]);                     \
+        break;                                                                \
+    case DF_HALF:                                                             \
+        pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
+        pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
+        pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
+        pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
+        pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
+        pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
+        pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
+        pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
+        break;                                                                \
+    case DF_WORD:                                                             \
+        pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
+        pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
+        pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
+        pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
+        break;                                                                \
+    case DF_DOUBLE:                                                           \
+        pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
+        pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
+        break;                                                                \
+    default:                                                                  \
+        assert(0);                                                            \
+    }                                                                         \
 }
 
 MSA_TEROP_DF(maddv)
@@ -1158,61 +1702,14 @@ void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
             (DF_ELEMENTS(DF) / 2)
 
 #define Rb(pwr, i) (pwr->b[i])
-#define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE)/2])
+#define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE) / 2])
 #define Rh(pwr, i) (pwr->h[i])
-#define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF)/2])
+#define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF) / 2])
 #define Rw(pwr, i) (pwr->w[i])
-#define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD)/2])
+#define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD) / 2])
 #define Rd(pwr, i) (pwr->d[i])
-#define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE)/2])
+#define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE) / 2])
 
-#define MSA_DO(DF)                      \
-    do {                                \
-        R##DF(pwx, i) = pwt->DF[2*i];   \
-        L##DF(pwx, i) = pws->DF[2*i];   \
-    } while (0)
-MSA_FN_DF(pckev_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                      \
-    do {                                \
-        R##DF(pwx, i) = pwt->DF[2*i+1]; \
-        L##DF(pwx, i) = pws->DF[2*i+1]; \
-    } while (0)
-MSA_FN_DF(pckod_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                      \
-    do {                                \
-        pwx->DF[2*i]   = L##DF(pwt, i); \
-        pwx->DF[2*i+1] = L##DF(pws, i); \
-    } while (0)
-MSA_FN_DF(ilvl_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                      \
-    do {                                \
-        pwx->DF[2*i]   = R##DF(pwt, i); \
-        pwx->DF[2*i+1] = R##DF(pws, i); \
-    } while (0)
-MSA_FN_DF(ilvr_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                      \
-    do {                                \
-        pwx->DF[2*i]   = pwt->DF[2*i];  \
-        pwx->DF[2*i+1] = pws->DF[2*i];  \
-    } while (0)
-MSA_FN_DF(ilvev_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                          \
-    do {                                    \
-        pwx->DF[2*i]   = pwt->DF[2*i+1];    \
-        pwx->DF[2*i+1] = pws->DF[2*i+1];    \
-    } while (0)
-MSA_FN_DF(ilvod_df)
-#undef MSA_DO
 #undef MSA_LOOP_COND
 
 #define MSA_LOOP_COND(DF) \
@@ -1230,98 +1727,480 @@ MSA_FN_DF(vshf_df)
 #undef MSA_LOOP_COND
 #undef MSA_FN_DF
 
-void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-                        uint32_t ws, uint32_t n)
+
+void helper_msa_ilvev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    msa_sld_df(df, pwd, pws, n);
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[15] = pws->b[14];
+        pwd->b[14] = pwt->b[14];
+        pwd->b[13] = pws->b[12];
+        pwd->b[12] = pwt->b[12];
+        pwd->b[11] = pws->b[10];
+        pwd->b[10] = pwt->b[10];
+        pwd->b[9]  = pws->b[8];
+        pwd->b[8]  = pwt->b[8];
+        pwd->b[7]  = pws->b[6];
+        pwd->b[6]  = pwt->b[6];
+        pwd->b[5]  = pws->b[4];
+        pwd->b[4]  = pwt->b[4];
+        pwd->b[3]  = pws->b[2];
+        pwd->b[2]  = pwt->b[2];
+        pwd->b[1]  = pws->b[0];
+        pwd->b[0]  = pwt->b[0];
+        break;
+    case DF_HALF:
+        pwd->h[7] = pws->h[6];
+        pwd->h[6] = pwt->h[6];
+        pwd->h[5] = pws->h[4];
+        pwd->h[4] = pwt->h[4];
+        pwd->h[3] = pws->h[2];
+        pwd->h[2] = pwt->h[2];
+        pwd->h[1] = pws->h[0];
+        pwd->h[0] = pwt->h[0];
+        break;
+    case DF_WORD:
+        pwd->w[3] = pws->w[2];
+        pwd->w[2] = pwt->w[2];
+        pwd->w[1] = pws->w[0];
+        pwd->w[0] = pwt->w[0];
+        break;
+    case DF_DOUBLE:
+        pwd->d[1] = pws->d[0];
+        pwd->d[0] = pwt->d[0];
+        break;
+    default:
+        assert(0);
+    }
 }
 
-void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-                          uint32_t ws, uint32_t n)
+void helper_msa_ilvod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    msa_splat_df(df, pwd, pws, n);
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[0]  = pwt->b[1];
+        pwd->b[1]  = pws->b[1];
+        pwd->b[2]  = pwt->b[3];
+        pwd->b[3]  = pws->b[3];
+        pwd->b[4]  = pwt->b[5];
+        pwd->b[5]  = pws->b[5];
+        pwd->b[6]  = pwt->b[7];
+        pwd->b[7]  = pws->b[7];
+        pwd->b[8]  = pwt->b[9];
+        pwd->b[9]  = pws->b[9];
+        pwd->b[10] = pwt->b[11];
+        pwd->b[11] = pws->b[11];
+        pwd->b[12] = pwt->b[13];
+        pwd->b[13] = pws->b[13];
+        pwd->b[14] = pwt->b[15];
+        pwd->b[15] = pws->b[15];
+        break;
+    case DF_HALF:
+        pwd->h[0] = pwt->h[1];
+        pwd->h[1] = pws->h[1];
+        pwd->h[2] = pwt->h[3];
+        pwd->h[3] = pws->h[3];
+        pwd->h[4] = pwt->h[5];
+        pwd->h[5] = pws->h[5];
+        pwd->h[6] = pwt->h[7];
+        pwd->h[7] = pws->h[7];
+        break;
+    case DF_WORD:
+        pwd->w[0] = pwt->w[1];
+        pwd->w[1] = pws->w[1];
+        pwd->w[2] = pwt->w[3];
+        pwd->w[3] = pws->w[3];
+        break;
+    case DF_DOUBLE:
+        pwd->d[0] = pwt->d[1];
+        pwd->d[1] = pws->d[1];
+        break;
+    default:
+        assert(0);
+    }
 }
 
-void helper_msa_copy_s_df(CPUMIPSState *env, uint32_t df, uint32_t rd,
-                          uint32_t ws, uint32_t n)
+void helper_msa_ilvl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                        uint32_t ws, uint32_t wt)
 {
-    n %= DF_ELEMENTS(df);
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
     switch (df) {
     case DF_BYTE:
-        env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
+        pwd->b[0]  = pwt->b[8];
+        pwd->b[1]  = pws->b[8];
+        pwd->b[2]  = pwt->b[9];
+        pwd->b[3]  = pws->b[9];
+        pwd->b[4]  = pwt->b[10];
+        pwd->b[5]  = pws->b[10];
+        pwd->b[6]  = pwt->b[11];
+        pwd->b[7]  = pws->b[11];
+        pwd->b[8]  = pwt->b[12];
+        pwd->b[9]  = pws->b[12];
+        pwd->b[10] = pwt->b[13];
+        pwd->b[11] = pws->b[13];
+        pwd->b[12] = pwt->b[14];
+        pwd->b[13] = pws->b[14];
+        pwd->b[14] = pwt->b[15];
+        pwd->b[15] = pws->b[15];
         break;
     case DF_HALF:
-        env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
+        pwd->h[0] = pwt->h[4];
+        pwd->h[1] = pws->h[4];
+        pwd->h[2] = pwt->h[5];
+        pwd->h[3] = pws->h[5];
+        pwd->h[4] = pwt->h[6];
+        pwd->h[5] = pws->h[6];
+        pwd->h[6] = pwt->h[7];
+        pwd->h[7] = pws->h[7];
         break;
     case DF_WORD:
-        env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
+        pwd->w[0] = pwt->w[2];
+        pwd->w[1] = pws->w[2];
+        pwd->w[2] = pwt->w[3];
+        pwd->w[3] = pws->w[3];
         break;
-#ifdef TARGET_MIPS64
     case DF_DOUBLE:
-        env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
+        pwd->d[0] = pwt->d[1];
+        pwd->d[1] = pws->d[1];
         break;
-#endif
     default:
         assert(0);
     }
 }
 
-void helper_msa_copy_u_df(CPUMIPSState *env, uint32_t df, uint32_t rd,
-                          uint32_t ws, uint32_t n)
+void helper_msa_ilvr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                        uint32_t ws, uint32_t wt)
 {
-    n %= DF_ELEMENTS(df);
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
     switch (df) {
     case DF_BYTE:
-        env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
+        pwd->b[15] = pws->b[7];
+        pwd->b[14] = pwt->b[7];
+        pwd->b[13] = pws->b[6];
+        pwd->b[12] = pwt->b[6];
+        pwd->b[11] = pws->b[5];
+        pwd->b[10] = pwt->b[5];
+        pwd->b[9]  = pws->b[4];
+        pwd->b[8]  = pwt->b[4];
+        pwd->b[7]  = pws->b[3];
+        pwd->b[6]  = pwt->b[3];
+        pwd->b[5]  = pws->b[2];
+        pwd->b[4]  = pwt->b[2];
+        pwd->b[3]  = pws->b[1];
+        pwd->b[2]  = pwt->b[1];
+        pwd->b[1]  = pws->b[0];
+        pwd->b[0]  = pwt->b[0];
         break;
     case DF_HALF:
-        env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
+        pwd->h[7] = pws->h[3];
+        pwd->h[6] = pwt->h[3];
+        pwd->h[5] = pws->h[2];
+        pwd->h[4] = pwt->h[2];
+        pwd->h[3] = pws->h[1];
+        pwd->h[2] = pwt->h[1];
+        pwd->h[1] = pws->h[0];
+        pwd->h[0] = pwt->h[0];
         break;
     case DF_WORD:
-        env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
+        pwd->w[3] = pws->w[1];
+        pwd->w[2] = pwt->w[1];
+        pwd->w[1] = pws->w[0];
+        pwd->w[0] = pwt->w[0];
         break;
-#ifdef TARGET_MIPS64
     case DF_DOUBLE:
-        env->active_tc.gpr[rd] = (uint64_t)env->active_fpu.fpr[ws].wr.d[n];
+        pwd->d[1] = pws->d[0];
+        pwd->d[0] = pwt->d[0];
         break;
-#endif
     default:
         assert(0);
     }
 }
 
-void helper_msa_insert_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-                          uint32_t rs_num, uint32_t n)
+void helper_msa_pckev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    target_ulong rs = env->active_tc.gpr[rs_num];
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
     switch (df) {
     case DF_BYTE:
-        pwd->b[n] = (int8_t)rs;
+        pwd->b[15] = pws->b[14];
+        pwd->b[13] = pws->b[10];
+        pwd->b[11] = pws->b[6];
+        pwd->b[9]  = pws->b[2];
+        pwd->b[7]  = pwt->b[14];
+        pwd->b[5]  = pwt->b[10];
+        pwd->b[3]  = pwt->b[6];
+        pwd->b[1]  = pwt->b[2];
+        pwd->b[14] = pws->b[12];
+        pwd->b[10] = pws->b[4];
+        pwd->b[6]  = pwt->b[12];
+        pwd->b[2]  = pwt->b[4];
+        pwd->b[12] = pws->b[8];
+        pwd->b[4]  = pwt->b[8];
+        pwd->b[8]  = pws->b[0];
+        pwd->b[0]  = pwt->b[0];
         break;
     case DF_HALF:
-        pwd->h[n] = (int16_t)rs;
+        pwd->h[7] = pws->h[6];
+        pwd->h[5] = pws->h[2];
+        pwd->h[3] = pwt->h[6];
+        pwd->h[1] = pwt->h[2];
+        pwd->h[6] = pws->h[4];
+        pwd->h[2] = pwt->h[4];
+        pwd->h[4] = pws->h[0];
+        pwd->h[0] = pwt->h[0];
         break;
     case DF_WORD:
-        pwd->w[n] = (int32_t)rs;
+        pwd->w[3] = pws->w[2];
+        pwd->w[1] = pwt->w[2];
+        pwd->w[2] = pws->w[0];
+        pwd->w[0] = pwt->w[0];
         break;
     case DF_DOUBLE:
-        pwd->d[n] = (int64_t)rs;
+        pwd->d[1] = pws->d[0];
+        pwd->d[0] = pwt->d[0];
         break;
     default:
         assert(0);
     }
 }
 
+void helper_msa_pckod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[0]  = pwt->b[1];
+        pwd->b[2]  = pwt->b[5];
+        pwd->b[4]  = pwt->b[9];
+        pwd->b[6]  = pwt->b[13];
+        pwd->b[8]  = pws->b[1];
+        pwd->b[10] = pws->b[5];
+        pwd->b[12] = pws->b[9];
+        pwd->b[14] = pws->b[13];
+        pwd->b[1]  = pwt->b[3];
+        pwd->b[5]  = pwt->b[11];
+        pwd->b[9]  = pws->b[3];
+        pwd->b[13] = pws->b[11];
+        pwd->b[3]  = pwt->b[7];
+        pwd->b[11] = pws->b[7];
+        pwd->b[7]  = pwt->b[15];
+        pwd->b[15] = pws->b[15];
+        break;
+    case DF_HALF:
+        pwd->h[0] = pwt->h[1];
+        pwd->h[2] = pwt->h[5];
+        pwd->h[4] = pws->h[1];
+        pwd->h[6] = pws->h[5];
+        pwd->h[1] = pwt->h[3];
+        pwd->h[5] = pws->h[3];
+        pwd->h[3] = pwt->h[7];
+        pwd->h[7] = pws->h[7];
+        break;
+    case DF_WORD:
+        pwd->w[0] = pwt->w[1];
+        pwd->w[2] = pws->w[1];
+        pwd->w[1] = pwt->w[3];
+        pwd->w[3] = pws->w[3];
+        break;
+    case DF_DOUBLE:
+        pwd->d[0] = pwt->d[1];
+        pwd->d[1] = pws->d[1];
+        break;
+    default:
+        assert(0);
+    }
+}
+
+
+void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                        uint32_t ws, uint32_t n)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+
+    msa_sld_df(df, pwd, pws, n);
+}
+
+void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                          uint32_t ws, uint32_t n)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+
+    msa_splat_df(df, pwd, pws, n);
+}
+
+void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
+                         uint32_t ws, uint32_t n)
+{
+    n %= 16;
+#if defined(HOST_WORDS_BIGENDIAN)
+    if (n < 8) {
+        n = 8 - n - 1;
+    } else {
+        n = 24 - n - 1;
+    }
+#endif
+    env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
+}
+
+void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
+                         uint32_t ws, uint32_t n)
+{
+    n %= 8;
+#if defined(HOST_WORDS_BIGENDIAN)
+    if (n < 4) {
+        n = 4 - n - 1;
+    } else {
+        n = 12 - n - 1;
+    }
+#endif
+    env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
+}
+
+void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
+                         uint32_t ws, uint32_t n)
+{
+    n %= 4;
+#if defined(HOST_WORDS_BIGENDIAN)
+    if (n < 2) {
+        n = 2 - n - 1;
+    } else {
+        n = 6 - n - 1;
+    }
+#endif
+    env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
+}
+
+void helper_msa_copy_s_d(CPUMIPSState *env, uint32_t rd,
+                         uint32_t ws, uint32_t n)
+{
+    n %= 2;
+    env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
+}
+
+void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
+                         uint32_t ws, uint32_t n)
+{
+    n %= 16;
+#if defined(HOST_WORDS_BIGENDIAN)
+    if (n < 8) {
+        n = 8 - n - 1;
+    } else {
+        n = 24 - n - 1;
+    }
+#endif
+    env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
+}
+
+void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
+                         uint32_t ws, uint32_t n)
+{
+    n %= 8;
+#if defined(HOST_WORDS_BIGENDIAN)
+    if (n < 4) {
+        n = 4 - n - 1;
+    } else {
+        n = 12 - n - 1;
+    }
+#endif
+    env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
+}
+
+void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
+                         uint32_t ws, uint32_t n)
+{
+    n %= 4;
+#if defined(HOST_WORDS_BIGENDIAN)
+    if (n < 2) {
+        n = 2 - n - 1;
+    } else {
+        n = 6 - n - 1;
+    }
+#endif
+    env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
+}
+
+void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
+                          uint32_t rs_num, uint32_t n)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    target_ulong rs = env->active_tc.gpr[rs_num];
+    n %= 16;
+#if defined(HOST_WORDS_BIGENDIAN)
+    if (n < 8) {
+        n = 8 - n - 1;
+    } else {
+        n = 24 - n - 1;
+    }
+#endif
+    pwd->b[n] = (int8_t)rs;
+}
+
+void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
+                          uint32_t rs_num, uint32_t n)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    target_ulong rs = env->active_tc.gpr[rs_num];
+    n %= 8;
+#if defined(HOST_WORDS_BIGENDIAN)
+    if (n < 4) {
+        n = 4 - n - 1;
+    } else {
+        n = 12 - n - 1;
+    }
+#endif
+    pwd->h[n] = (int16_t)rs;
+}
+
+void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
+                          uint32_t rs_num, uint32_t n)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    target_ulong rs = env->active_tc.gpr[rs_num];
+    n %= 4;
+#if defined(HOST_WORDS_BIGENDIAN)
+    if (n < 2) {
+        n = 2 - n - 1;
+    } else {
+        n = 6 - n - 1;
+    }
+#endif
+    pwd->w[n] = (int32_t)rs;
+}
+
+void helper_msa_insert_d(CPUMIPSState *env, uint32_t wd,
+                          uint32_t rs_num, uint32_t n)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    target_ulong rs = env->active_tc.gpr[rs_num];
+    n %= 2;
+    pwd->d[n] = (int64_t)rs;
+}
+
 void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
                          uint32_t ws, uint32_t n)
 {
@@ -1462,28 +2341,45 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
 {                                                                       \
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
-    uint32_t i;                                                         \
                                                                         \
     switch (df) {                                                       \
     case DF_BYTE:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
-            pwd->b[i] = msa_ ## func ## _df(df, pws->b[i]);             \
-        }                                                               \
+        pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0]);                \
+        pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1]);                \
+        pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2]);                \
+        pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3]);                \
+        pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4]);                \
+        pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5]);                \
+        pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6]);                \
+        pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7]);                \
+        pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8]);                \
+        pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9]);                \
+        pwd->b[10] = msa_ ## func ## _df(df, pws->b[10]);               \
+        pwd->b[11] = msa_ ## func ## _df(df, pws->b[11]);               \
+        pwd->b[12] = msa_ ## func ## _df(df, pws->b[12]);               \
+        pwd->b[13] = msa_ ## func ## _df(df, pws->b[13]);               \
+        pwd->b[14] = msa_ ## func ## _df(df, pws->b[14]);               \
+        pwd->b[15] = msa_ ## func ## _df(df, pws->b[15]);               \
         break;                                                          \
     case DF_HALF:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
-            pwd->h[i] = msa_ ## func ## _df(df, pws->h[i]);             \
-        }                                                               \
+        pwd->h[0] = msa_ ## func ## _df(df, pws->h[0]);                 \
+        pwd->h[1] = msa_ ## func ## _df(df, pws->h[1]);                 \
+        pwd->h[2] = msa_ ## func ## _df(df, pws->h[2]);                 \
+        pwd->h[3] = msa_ ## func ## _df(df, pws->h[3]);                 \
+        pwd->h[4] = msa_ ## func ## _df(df, pws->h[4]);                 \
+        pwd->h[5] = msa_ ## func ## _df(df, pws->h[5]);                 \
+        pwd->h[6] = msa_ ## func ## _df(df, pws->h[6]);                 \
+        pwd->h[7] = msa_ ## func ## _df(df, pws->h[7]);                 \
         break;                                                          \
     case DF_WORD:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
-            pwd->w[i] = msa_ ## func ## _df(df, pws->w[i]);             \
-        }                                                               \
+        pwd->w[0] = msa_ ## func ## _df(df, pws->w[0]);                 \
+        pwd->w[1] = msa_ ## func ## _df(df, pws->w[1]);                 \
+        pwd->w[2] = msa_ ## func ## _df(df, pws->w[2]);                 \
+        pwd->w[3] = msa_ ## func ## _df(df, pws->w[3]);                 \
         break;                                                          \
     case DF_DOUBLE:                                                     \
-        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
-            pwd->d[i] = msa_ ## func ## _df(df, pws->d[i]);             \
-        }                                                               \
+        pwd->d[0] = msa_ ## func ## _df(df, pws->d[0]);                 \
+        pwd->d[1] = msa_ ## func ## _df(df, pws->d[1]);                 \
         break;                                                          \
     default:                                                            \
         assert(0);                                                      \
@@ -1576,8 +2472,10 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
         c &= ~FP_UNDERFLOW;
     }
 
-    /* Reciprocal operations set only Inexact when valid and not
-       divide by zero */
+    /*
+     * Reciprocal operations set only Inexact when valid and not
+     * divide by zero
+     */
     if ((action & RECIPROCAL_INEXACT) &&
             (c & (FP_INVALID | FP_DIV0)) == 0) {
         c = FP_INEXACT;
@@ -1586,15 +2484,19 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
     cause = c & enable;    /* all current enabled exceptions */
 
     if (cause == 0) {
-        /* No enabled exception, update the MSACSR Cause
-         with all current exceptions */
+        /*
+         * No enabled exception, update the MSACSR Cause
+         * with all current exceptions
+         */
         SET_FP_CAUSE(env->active_tc.msacsr,
                 (GET_FP_CAUSE(env->active_tc.msacsr) | c));
     } else {
         /* Current exceptions are enabled */
         if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
-            /* Exception(s) will trap, update MSACSR Cause
-           with all enabled exceptions */
+            /*
+             * Exception(s) will trap, update MSACSR Cause
+             * with all enabled exceptions
+             */
             SET_FP_CAUSE(env->active_tc.msacsr,
                     (GET_FP_CAUSE(env->active_tc.msacsr) | c));
         }
@@ -2660,9 +3562,11 @@ void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     switch (df) {
     case DF_WORD:
         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
-            /* Half precision floats come in two formats: standard
-               IEEE and "ARM" format.  The latter gains extra exponent
-               range by omitting the NaN/Inf encodings.  */
+            /*
+             * Half precision floats come in two formats: standard
+             * IEEE and "ARM" format.  The latter gains extra exponent
+             * range by omitting the NaN/Inf encodings.
+             */
             flag ieee = 1;
 
             MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
@@ -2921,7 +3825,7 @@ void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
 void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
         uint32_t wd, uint32_t ws)
 {
-    float_status* status = &env->active_tc.msa_fp_status;
+    float_status *status = &env->active_tc.msa_fp_status;
 
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
@@ -3218,9 +4122,11 @@ void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     switch (df) {
     case DF_WORD:
         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
-            /* Half precision floats come in two formats: standard
-               IEEE and "ARM" format.  The latter gains extra exponent
-               range by omitting the NaN/Inf encodings.  */
+            /*
+             * Half precision floats come in two formats: standard
+             * IEEE and "ARM" format.  The latter gains extra exponent
+             * range by omitting the NaN/Inf encodings.
+             */
             flag ieee = 1;
 
             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
@@ -3252,9 +4158,11 @@ void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     switch (df) {
     case DF_WORD:
         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
-            /* Half precision floats come in two formats: standard
-               IEEE and "ARM" format.  The latter gains extra exponent
-               range by omitting the NaN/Inf encodings.  */
+            /*
+             * Half precision floats come in two formats: standard
+             * IEEE and "ARM" format.  The latter gains extra exponent
+             * range by omitting the NaN/Inf encodings.
+             */
             flag ieee = 1;
 
             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
diff --git a/target/mips/op_helper.c b/target/mips/op_helper.c
index 6d86912958..39180275b5 100644
--- a/target/mips/op_helper.c
+++ b/target/mips/op_helper.c
@@ -4356,31 +4356,179 @@ FOP_CONDN_S(sne,  (float32_lt(fst1, fst0, &env->active_fpu.fp_status)
 #define MEMOP_IDX(DF)
 #endif
 
-#define MSA_LD_DF(DF, TYPE, LD_INSN, ...)                               \
-void helper_msa_ld_ ## TYPE(CPUMIPSState *env, uint32_t wd,             \
-                            target_ulong addr)                          \
-{                                                                       \
-    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
-    wr_t wx;                                                            \
-    int i;                                                              \
-    MEMOP_IDX(DF)                                                       \
-    for (i = 0; i < DF_ELEMENTS(DF); i++) {                             \
-        wx.TYPE[i] = LD_INSN(env, addr + (i << DF), ##__VA_ARGS__);     \
-    }                                                                   \
-    memcpy(pwd, &wx, sizeof(wr_t));                                     \
+void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
+                     target_ulong addr)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    MEMOP_IDX(DF_BYTE)
+#if !defined(CONFIG_USER_ONLY)
+#if !defined(HOST_WORDS_BIGENDIAN)
+    pwd->b[0]  = helper_ret_ldub_mmu(env, addr + (0  << DF_BYTE), oi, GETPC());
+    pwd->b[1]  = helper_ret_ldub_mmu(env, addr + (1  << DF_BYTE), oi, GETPC());
+    pwd->b[2]  = helper_ret_ldub_mmu(env, addr + (2  << DF_BYTE), oi, GETPC());
+    pwd->b[3]  = helper_ret_ldub_mmu(env, addr + (3  << DF_BYTE), oi, GETPC());
+    pwd->b[4]  = helper_ret_ldub_mmu(env, addr + (4  << DF_BYTE), oi, GETPC());
+    pwd->b[5]  = helper_ret_ldub_mmu(env, addr + (5  << DF_BYTE), oi, GETPC());
+    pwd->b[6]  = helper_ret_ldub_mmu(env, addr + (6  << DF_BYTE), oi, GETPC());
+    pwd->b[7]  = helper_ret_ldub_mmu(env, addr + (7  << DF_BYTE), oi, GETPC());
+    pwd->b[8]  = helper_ret_ldub_mmu(env, addr + (8  << DF_BYTE), oi, GETPC());
+    pwd->b[9]  = helper_ret_ldub_mmu(env, addr + (9  << DF_BYTE), oi, GETPC());
+    pwd->b[10] = helper_ret_ldub_mmu(env, addr + (10 << DF_BYTE), oi, GETPC());
+    pwd->b[11] = helper_ret_ldub_mmu(env, addr + (11 << DF_BYTE), oi, GETPC());
+    pwd->b[12] = helper_ret_ldub_mmu(env, addr + (12 << DF_BYTE), oi, GETPC());
+    pwd->b[13] = helper_ret_ldub_mmu(env, addr + (13 << DF_BYTE), oi, GETPC());
+    pwd->b[14] = helper_ret_ldub_mmu(env, addr + (14 << DF_BYTE), oi, GETPC());
+    pwd->b[15] = helper_ret_ldub_mmu(env, addr + (15 << DF_BYTE), oi, GETPC());
+#else
+    pwd->b[0]  = helper_ret_ldub_mmu(env, addr + (7  << DF_BYTE), oi, GETPC());
+    pwd->b[1]  = helper_ret_ldub_mmu(env, addr + (6  << DF_BYTE), oi, GETPC());
+    pwd->b[2]  = helper_ret_ldub_mmu(env, addr + (5  << DF_BYTE), oi, GETPC());
+    pwd->b[3]  = helper_ret_ldub_mmu(env, addr + (4  << DF_BYTE), oi, GETPC());
+    pwd->b[4]  = helper_ret_ldub_mmu(env, addr + (3  << DF_BYTE), oi, GETPC());
+    pwd->b[5]  = helper_ret_ldub_mmu(env, addr + (2  << DF_BYTE), oi, GETPC());
+    pwd->b[6]  = helper_ret_ldub_mmu(env, addr + (1  << DF_BYTE), oi, GETPC());
+    pwd->b[7]  = helper_ret_ldub_mmu(env, addr + (0  << DF_BYTE), oi, GETPC());
+    pwd->b[8]  = helper_ret_ldub_mmu(env, addr + (15 << DF_BYTE), oi, GETPC());
+    pwd->b[9]  = helper_ret_ldub_mmu(env, addr + (14 << DF_BYTE), oi, GETPC());
+    pwd->b[10] = helper_ret_ldub_mmu(env, addr + (13 << DF_BYTE), oi, GETPC());
+    pwd->b[11] = helper_ret_ldub_mmu(env, addr + (12 << DF_BYTE), oi, GETPC());
+    pwd->b[12] = helper_ret_ldub_mmu(env, addr + (11 << DF_BYTE), oi, GETPC());
+    pwd->b[13] = helper_ret_ldub_mmu(env, addr + (10 << DF_BYTE), oi, GETPC());
+    pwd->b[14] = helper_ret_ldub_mmu(env, addr + (9  << DF_BYTE), oi, GETPC());
+    pwd->b[15] = helper_ret_ldub_mmu(env, addr + (8  << DF_BYTE), oi, GETPC());
+#endif
+#else
+#if !defined(HOST_WORDS_BIGENDIAN)
+    pwd->b[0]  = cpu_ldub_data(env, addr + (0  << DF_BYTE));
+    pwd->b[1]  = cpu_ldub_data(env, addr + (1  << DF_BYTE));
+    pwd->b[2]  = cpu_ldub_data(env, addr + (2  << DF_BYTE));
+    pwd->b[3]  = cpu_ldub_data(env, addr + (3  << DF_BYTE));
+    pwd->b[4]  = cpu_ldub_data(env, addr + (4  << DF_BYTE));
+    pwd->b[5]  = cpu_ldub_data(env, addr + (5  << DF_BYTE));
+    pwd->b[6]  = cpu_ldub_data(env, addr + (6  << DF_BYTE));
+    pwd->b[7]  = cpu_ldub_data(env, addr + (7  << DF_BYTE));
+    pwd->b[8]  = cpu_ldub_data(env, addr + (8  << DF_BYTE));
+    pwd->b[9]  = cpu_ldub_data(env, addr + (9  << DF_BYTE));
+    pwd->b[10] = cpu_ldub_data(env, addr + (10 << DF_BYTE));
+    pwd->b[11] = cpu_ldub_data(env, addr + (11 << DF_BYTE));
+    pwd->b[12] = cpu_ldub_data(env, addr + (12 << DF_BYTE));
+    pwd->b[13] = cpu_ldub_data(env, addr + (13 << DF_BYTE));
+    pwd->b[14] = cpu_ldub_data(env, addr + (14 << DF_BYTE));
+    pwd->b[15] = cpu_ldub_data(env, addr + (15 << DF_BYTE));
+#else
+    pwd->b[0]  = cpu_ldub_data(env, addr + (7  << DF_BYTE));
+    pwd->b[1]  = cpu_ldub_data(env, addr + (6  << DF_BYTE));
+    pwd->b[2]  = cpu_ldub_data(env, addr + (5  << DF_BYTE));
+    pwd->b[3]  = cpu_ldub_data(env, addr + (4  << DF_BYTE));
+    pwd->b[4]  = cpu_ldub_data(env, addr + (3  << DF_BYTE));
+    pwd->b[5]  = cpu_ldub_data(env, addr + (2  << DF_BYTE));
+    pwd->b[6]  = cpu_ldub_data(env, addr + (1  << DF_BYTE));
+    pwd->b[7]  = cpu_ldub_data(env, addr + (0  << DF_BYTE));
+    pwd->b[8]  = cpu_ldub_data(env, addr + (15 << DF_BYTE));
+    pwd->b[9]  = cpu_ldub_data(env, addr + (14 << DF_BYTE));
+    pwd->b[10] = cpu_ldub_data(env, addr + (13 << DF_BYTE));
+    pwd->b[11] = cpu_ldub_data(env, addr + (12 << DF_BYTE));
+    pwd->b[12] = cpu_ldub_data(env, addr + (11 << DF_BYTE));
+    pwd->b[13] = cpu_ldub_data(env, addr + (10 << DF_BYTE));
+    pwd->b[14] = cpu_ldub_data(env, addr + (9 << DF_BYTE));
+    pwd->b[15] = cpu_ldub_data(env, addr + (8 << DF_BYTE));
+#endif
+#endif
+}
+
+void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
+                     target_ulong addr)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    MEMOP_IDX(DF_HALF)
+#if !defined(CONFIG_USER_ONLY)
+#if !defined(HOST_WORDS_BIGENDIAN)
+    pwd->h[0] = helper_ret_lduw_mmu(env, addr + (0 << DF_HALF), oi, GETPC());
+    pwd->h[1] = helper_ret_lduw_mmu(env, addr + (1 << DF_HALF), oi, GETPC());
+    pwd->h[2] = helper_ret_lduw_mmu(env, addr + (2 << DF_HALF), oi, GETPC());
+    pwd->h[3] = helper_ret_lduw_mmu(env, addr + (3 << DF_HALF), oi, GETPC());
+    pwd->h[4] = helper_ret_lduw_mmu(env, addr + (4 << DF_HALF), oi, GETPC());
+    pwd->h[5] = helper_ret_lduw_mmu(env, addr + (5 << DF_HALF), oi, GETPC());
+    pwd->h[6] = helper_ret_lduw_mmu(env, addr + (6 << DF_HALF), oi, GETPC());
+    pwd->h[7] = helper_ret_lduw_mmu(env, addr + (7 << DF_HALF), oi, GETPC());
+#else
+    pwd->h[0] = helper_ret_lduw_mmu(env, addr + (3 << DF_HALF), oi, GETPC());
+    pwd->h[1] = helper_ret_lduw_mmu(env, addr + (2 << DF_HALF), oi, GETPC());
+    pwd->h[2] = helper_ret_lduw_mmu(env, addr + (1 << DF_HALF), oi, GETPC());
+    pwd->h[3] = helper_ret_lduw_mmu(env, addr + (0 << DF_HALF), oi, GETPC());
+    pwd->h[4] = helper_ret_lduw_mmu(env, addr + (7 << DF_HALF), oi, GETPC());
+    pwd->h[5] = helper_ret_lduw_mmu(env, addr + (6 << DF_HALF), oi, GETPC());
+    pwd->h[6] = helper_ret_lduw_mmu(env, addr + (5 << DF_HALF), oi, GETPC());
+    pwd->h[7] = helper_ret_lduw_mmu(env, addr + (4 << DF_HALF), oi, GETPC());
+#endif
+#else
+#if !defined(HOST_WORDS_BIGENDIAN)
+    pwd->h[0] = cpu_lduw_data(env, addr + (0 << DF_HALF));
+    pwd->h[1] = cpu_lduw_data(env, addr + (1 << DF_HALF));
+    pwd->h[2] = cpu_lduw_data(env, addr + (2 << DF_HALF));
+    pwd->h[3] = cpu_lduw_data(env, addr + (3 << DF_HALF));
+    pwd->h[4] = cpu_lduw_data(env, addr + (4 << DF_HALF));
+    pwd->h[5] = cpu_lduw_data(env, addr + (5 << DF_HALF));
+    pwd->h[6] = cpu_lduw_data(env, addr + (6 << DF_HALF));
+    pwd->h[7] = cpu_lduw_data(env, addr + (7 << DF_HALF));
+#else
+    pwd->h[0] = cpu_lduw_data(env, addr + (3 << DF_HALF));
+    pwd->h[1] = cpu_lduw_data(env, addr + (2 << DF_HALF));
+    pwd->h[2] = cpu_lduw_data(env, addr + (1 << DF_HALF));
+    pwd->h[3] = cpu_lduw_data(env, addr + (0 << DF_HALF));
+    pwd->h[4] = cpu_lduw_data(env, addr + (7 << DF_HALF));
+    pwd->h[5] = cpu_lduw_data(env, addr + (6 << DF_HALF));
+    pwd->h[6] = cpu_lduw_data(env, addr + (5 << DF_HALF));
+    pwd->h[7] = cpu_lduw_data(env, addr + (4 << DF_HALF));
+#endif
+#endif
 }
 
+void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
+                     target_ulong addr)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    MEMOP_IDX(DF_WORD)
 #if !defined(CONFIG_USER_ONLY)
-MSA_LD_DF(DF_BYTE,   b, helper_ret_ldub_mmu, oi, GETPC())
-MSA_LD_DF(DF_HALF,   h, helper_ret_lduw_mmu, oi, GETPC())
-MSA_LD_DF(DF_WORD,   w, helper_ret_ldul_mmu, oi, GETPC())
-MSA_LD_DF(DF_DOUBLE, d, helper_ret_ldq_mmu,  oi, GETPC())
+#if !defined(HOST_WORDS_BIGENDIAN)
+    pwd->w[0] = helper_ret_ldul_mmu(env, addr + (0 << DF_WORD), oi, GETPC());
+    pwd->w[1] = helper_ret_ldul_mmu(env, addr + (1 << DF_WORD), oi, GETPC());
+    pwd->w[2] = helper_ret_ldul_mmu(env, addr + (2 << DF_WORD), oi, GETPC());
+    pwd->w[3] = helper_ret_ldul_mmu(env, addr + (3 << DF_WORD), oi, GETPC());
 #else
-MSA_LD_DF(DF_BYTE,   b, cpu_ldub_data)
-MSA_LD_DF(DF_HALF,   h, cpu_lduw_data)
-MSA_LD_DF(DF_WORD,   w, cpu_ldl_data)
-MSA_LD_DF(DF_DOUBLE, d, cpu_ldq_data)
+    pwd->w[0] = helper_ret_ldul_mmu(env, addr + (1 << DF_WORD), oi, GETPC());
+    pwd->w[1] = helper_ret_ldul_mmu(env, addr + (0 << DF_WORD), oi, GETPC());
+    pwd->w[2] = helper_ret_ldul_mmu(env, addr + (3 << DF_WORD), oi, GETPC());
+    pwd->w[3] = helper_ret_ldul_mmu(env, addr + (2 << DF_WORD), oi, GETPC());
 #endif
+#else
+#if !defined(HOST_WORDS_BIGENDIAN)
+    pwd->w[0] = cpu_ldl_data(env, addr + (0 << DF_WORD));
+    pwd->w[1] = cpu_ldl_data(env, addr + (1 << DF_WORD));
+    pwd->w[2] = cpu_ldl_data(env, addr + (2 << DF_WORD));
+    pwd->w[3] = cpu_ldl_data(env, addr + (3 << DF_WORD));
+#else
+    pwd->w[0] = cpu_ldl_data(env, addr + (1 << DF_WORD));
+    pwd->w[1] = cpu_ldl_data(env, addr + (0 << DF_WORD));
+    pwd->w[2] = cpu_ldl_data(env, addr + (3 << DF_WORD));
+    pwd->w[3] = cpu_ldl_data(env, addr + (2 << DF_WORD));
+#endif
+#endif
+}
+
+void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
+                     target_ulong addr)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    MEMOP_IDX(DF_DOUBLE)
+#if !defined(CONFIG_USER_ONLY)
+    pwd->d[0] = helper_ret_ldq_mmu(env, addr + (0 << DF_DOUBLE), oi, GETPC());
+    pwd->d[1] = helper_ret_ldq_mmu(env, addr + (1 << DF_DOUBLE), oi, GETPC());
+#else
+    pwd->d[0] = cpu_ldq_data(env, addr + (0 << DF_DOUBLE));
+    pwd->d[1] = cpu_ldq_data(env, addr + (1 << DF_DOUBLE));
+#endif
+}
 
 #define MSA_PAGESPAN(x) \
         ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN/8 - 1) >= TARGET_PAGE_SIZE)
@@ -4402,31 +4550,191 @@ static inline void ensure_writable_pages(CPUMIPSState *env,
 #endif
 }
 
-#define MSA_ST_DF(DF, TYPE, ST_INSN, ...)                               \
-void helper_msa_st_ ## TYPE(CPUMIPSState *env, uint32_t wd,             \
-                            target_ulong addr)                          \
-{                                                                       \
-    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
-    int mmu_idx = cpu_mmu_index(env, false);				\
-    int i;                                                              \
-    MEMOP_IDX(DF)                                                       \
-    ensure_writable_pages(env, addr, mmu_idx, GETPC());                 \
-    for (i = 0; i < DF_ELEMENTS(DF); i++) {                             \
-        ST_INSN(env, addr + (i << DF), pwd->TYPE[i], ##__VA_ARGS__);    \
-    }                                                                   \
+void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
+                     target_ulong addr)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    int mmu_idx = cpu_mmu_index(env, false);
+
+    MEMOP_IDX(DF_BYTE)
+    ensure_writable_pages(env, addr, mmu_idx, GETPC());
+#if !defined(CONFIG_USER_ONLY)
+#if !defined(HOST_WORDS_BIGENDIAN)
+    helper_ret_stb_mmu(env, addr + (0  << DF_BYTE), pwd->b[0],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (1  << DF_BYTE), pwd->b[1],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (2  << DF_BYTE), pwd->b[2],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (3  << DF_BYTE), pwd->b[3],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (4  << DF_BYTE), pwd->b[4],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (5  << DF_BYTE), pwd->b[5],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (6  << DF_BYTE), pwd->b[6],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (7  << DF_BYTE), pwd->b[7],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (8  << DF_BYTE), pwd->b[8],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (9  << DF_BYTE), pwd->b[9],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (10 << DF_BYTE), pwd->b[10], oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (11 << DF_BYTE), pwd->b[11], oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (12 << DF_BYTE), pwd->b[12], oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (13 << DF_BYTE), pwd->b[13], oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (14 << DF_BYTE), pwd->b[14], oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (15 << DF_BYTE), pwd->b[15], oi, GETPC());
+#else
+    helper_ret_stb_mmu(env, addr + (7  << DF_BYTE), pwd->b[0],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (6  << DF_BYTE), pwd->b[1],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (5  << DF_BYTE), pwd->b[2],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (4  << DF_BYTE), pwd->b[3],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (3  << DF_BYTE), pwd->b[4],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (2  << DF_BYTE), pwd->b[5],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (1  << DF_BYTE), pwd->b[6],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (0  << DF_BYTE), pwd->b[7],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (15 << DF_BYTE), pwd->b[8],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (14 << DF_BYTE), pwd->b[9],  oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (13 << DF_BYTE), pwd->b[10], oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (12 << DF_BYTE), pwd->b[11], oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (11 << DF_BYTE), pwd->b[12], oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (10 << DF_BYTE), pwd->b[13], oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (9  << DF_BYTE), pwd->b[14], oi, GETPC());
+    helper_ret_stb_mmu(env, addr + (8  << DF_BYTE), pwd->b[15], oi, GETPC());
+#endif
+#else
+#if !defined(HOST_WORDS_BIGENDIAN)
+    cpu_stb_data(env, addr + (0  << DF_BYTE), pwd->b[0]);
+    cpu_stb_data(env, addr + (1  << DF_BYTE), pwd->b[1]);
+    cpu_stb_data(env, addr + (2  << DF_BYTE), pwd->b[2]);
+    cpu_stb_data(env, addr + (3  << DF_BYTE), pwd->b[3]);
+    cpu_stb_data(env, addr + (4  << DF_BYTE), pwd->b[4]);
+    cpu_stb_data(env, addr + (5  << DF_BYTE), pwd->b[5]);
+    cpu_stb_data(env, addr + (6  << DF_BYTE), pwd->b[6]);
+    cpu_stb_data(env, addr + (7  << DF_BYTE), pwd->b[7]);
+    cpu_stb_data(env, addr + (8  << DF_BYTE), pwd->b[8]);
+    cpu_stb_data(env, addr + (9  << DF_BYTE), pwd->b[9]);
+    cpu_stb_data(env, addr + (10 << DF_BYTE), pwd->b[10]);
+    cpu_stb_data(env, addr + (11 << DF_BYTE), pwd->b[11]);
+    cpu_stb_data(env, addr + (12 << DF_BYTE), pwd->b[12]);
+    cpu_stb_data(env, addr + (13 << DF_BYTE), pwd->b[13]);
+    cpu_stb_data(env, addr + (14 << DF_BYTE), pwd->b[14]);
+    cpu_stb_data(env, addr + (15 << DF_BYTE), pwd->b[15]);
+#else
+    cpu_stb_data(env, addr + (7  << DF_BYTE), pwd->b[0]);
+    cpu_stb_data(env, addr + (6  << DF_BYTE), pwd->b[1]);
+    cpu_stb_data(env, addr + (5  << DF_BYTE), pwd->b[2]);
+    cpu_stb_data(env, addr + (4  << DF_BYTE), pwd->b[3]);
+    cpu_stb_data(env, addr + (3  << DF_BYTE), pwd->b[4]);
+    cpu_stb_data(env, addr + (2  << DF_BYTE), pwd->b[5]);
+    cpu_stb_data(env, addr + (1  << DF_BYTE), pwd->b[6]);
+    cpu_stb_data(env, addr + (0  << DF_BYTE), pwd->b[7]);
+    cpu_stb_data(env, addr + (15 << DF_BYTE), pwd->b[8]);
+    cpu_stb_data(env, addr + (14 << DF_BYTE), pwd->b[9]);
+    cpu_stb_data(env, addr + (13 << DF_BYTE), pwd->b[10]);
+    cpu_stb_data(env, addr + (12 << DF_BYTE), pwd->b[11]);
+    cpu_stb_data(env, addr + (11 << DF_BYTE), pwd->b[12]);
+    cpu_stb_data(env, addr + (10 << DF_BYTE), pwd->b[13]);
+    cpu_stb_data(env, addr + (9  << DF_BYTE), pwd->b[14]);
+    cpu_stb_data(env, addr + (8  << DF_BYTE), pwd->b[15]);
+#endif
+#endif
+}
+
+void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
+                     target_ulong addr)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    int mmu_idx = cpu_mmu_index(env, false);
+
+    MEMOP_IDX(DF_HALF)
+    ensure_writable_pages(env, addr, mmu_idx, GETPC());
+#if !defined(CONFIG_USER_ONLY)
+#if !defined(HOST_WORDS_BIGENDIAN)
+    helper_ret_stw_mmu(env, addr + (0 << DF_HALF), pwd->h[0], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (1 << DF_HALF), pwd->h[1], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (2 << DF_HALF), pwd->h[2], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (3 << DF_HALF), pwd->h[3], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (4 << DF_HALF), pwd->h[4], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (5 << DF_HALF), pwd->h[5], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (6 << DF_HALF), pwd->h[6], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (7 << DF_HALF), pwd->h[7], oi, GETPC());
+#else
+    helper_ret_stw_mmu(env, addr + (3 << DF_HALF), pwd->h[0], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (2 << DF_HALF), pwd->h[1], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (1 << DF_HALF), pwd->h[2], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (0 << DF_HALF), pwd->h[3], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (7 << DF_HALF), pwd->h[4], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (6 << DF_HALF), pwd->h[5], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (5 << DF_HALF), pwd->h[6], oi, GETPC());
+    helper_ret_stw_mmu(env, addr + (4 << DF_HALF), pwd->h[7], oi, GETPC());
+#endif
+#else
+#if !defined(HOST_WORDS_BIGENDIAN)
+    cpu_stw_data(env, addr + (0 << DF_HALF), pwd->h[0]);
+    cpu_stw_data(env, addr + (1 << DF_HALF), pwd->h[1]);
+    cpu_stw_data(env, addr + (2 << DF_HALF), pwd->h[2]);
+    cpu_stw_data(env, addr + (3 << DF_HALF), pwd->h[3]);
+    cpu_stw_data(env, addr + (4 << DF_HALF), pwd->h[4]);
+    cpu_stw_data(env, addr + (5 << DF_HALF), pwd->h[5]);
+    cpu_stw_data(env, addr + (6 << DF_HALF), pwd->h[6]);
+    cpu_stw_data(env, addr + (7 << DF_HALF), pwd->h[7]);
+#else
+    cpu_stw_data(env, addr + (3 << DF_HALF), pwd->h[0]);
+    cpu_stw_data(env, addr + (2 << DF_HALF), pwd->h[1]);
+    cpu_stw_data(env, addr + (1 << DF_HALF), pwd->h[2]);
+    cpu_stw_data(env, addr + (0 << DF_HALF), pwd->h[3]);
+    cpu_stw_data(env, addr + (7 << DF_HALF), pwd->h[4]);
+    cpu_stw_data(env, addr + (6 << DF_HALF), pwd->h[5]);
+    cpu_stw_data(env, addr + (5 << DF_HALF), pwd->h[6]);
+    cpu_stw_data(env, addr + (4 << DF_HALF), pwd->h[7]);
+#endif
+#endif
 }
 
+void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
+                     target_ulong addr)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    int mmu_idx = cpu_mmu_index(env, false);
+
+    MEMOP_IDX(DF_WORD)
+    ensure_writable_pages(env, addr, mmu_idx, GETPC());
 #if !defined(CONFIG_USER_ONLY)
-MSA_ST_DF(DF_BYTE,   b, helper_ret_stb_mmu, oi, GETPC())
-MSA_ST_DF(DF_HALF,   h, helper_ret_stw_mmu, oi, GETPC())
-MSA_ST_DF(DF_WORD,   w, helper_ret_stl_mmu, oi, GETPC())
-MSA_ST_DF(DF_DOUBLE, d, helper_ret_stq_mmu, oi, GETPC())
+#if !defined(HOST_WORDS_BIGENDIAN)
+    helper_ret_stl_mmu(env, addr + (0 << DF_WORD), oi, GETPC(), pwd->w[0]);
+    helper_ret_stl_mmu(env, addr + (1 << DF_WORD), oi, GETPC(), pwd->w[1]);
+    helper_ret_stl_mmu(env, addr + (2 << DF_WORD), oi, GETPC(), pwd->w[2]);
+    helper_ret_stl_mmu(env, addr + (3 << DF_WORD), oi, GETPC(), pwd->w[3]);
 #else
-MSA_ST_DF(DF_BYTE,   b, cpu_stb_data)
-MSA_ST_DF(DF_HALF,   h, cpu_stw_data)
-MSA_ST_DF(DF_WORD,   w, cpu_stl_data)
-MSA_ST_DF(DF_DOUBLE, d, cpu_stq_data)
+    helper_ret_stl_mmu(env, addr + (1 << DF_WORD), oi, GETPC(), pwd->w[0]);
+    helper_ret_stl_mmu(env, addr + (0 << DF_WORD), oi, GETPC(), pwd->w[1]);
+    helper_ret_stl_mmu(env, addr + (3 << DF_WORD), oi, GETPC(), pwd->w[2]);
+    helper_ret_stl_mmu(env, addr + (2 << DF_WORD), oi, GETPC(), pwd->w[3]);
+#endif
+#else
+#if !defined(HOST_WORDS_BIGENDIAN)
+    cpu_stl_data(env, addr + (0 << DF_WORD), pwd->w[0]);
+    cpu_stl_data(env, addr + (1 << DF_WORD), pwd->w[1]);
+    cpu_stl_data(env, addr + (2 << DF_WORD), pwd->w[2]);
+    cpu_stl_data(env, addr + (3 << DF_WORD), pwd->w[3]);
+#else
+    cpu_stl_data(env, addr + (1 << DF_WORD), pwd->w[0]);
+    cpu_stl_data(env, addr + (0 << DF_WORD), pwd->w[1]);
+    cpu_stl_data(env, addr + (3 << DF_WORD), pwd->w[2]);
+    cpu_stl_data(env, addr + (2 << DF_WORD), pwd->w[3]);
+#endif
 #endif
+}
+
+void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
+                     target_ulong addr)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    int mmu_idx = cpu_mmu_index(env, false);
+
+    MEMOP_IDX(DF_DOUBLE)
+    ensure_writable_pages(env, addr, mmu_idx, GETPC());
+#if !defined(CONFIG_USER_ONLY)
+    helper_ret_stq_mmu(env, addr + (0 << DF_DOUBLE), pwd->d[0], oi, GETPC());
+    helper_ret_stq_mmu(env, addr + (1 << DF_DOUBLE), pwd->d[1], oi, GETPC());
+#else
+    cpu_stq_data(env, addr + (0 << DF_DOUBLE), pwd->d[0]);
+    cpu_stq_data(env, addr + (1 << DF_DOUBLE), pwd->d[1]);
+#endif
+}
 
 void helper_cache(CPUMIPSState *env, target_ulong addr, uint32_t op)
 {
diff --git a/target/mips/translate.c b/target/mips/translate.c
index f96c0d01ef..e37722dfff 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -32,7 +32,7 @@
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 
 #include "target/mips/trace.h"
 #include "trace-tcg.h"
@@ -13726,6 +13726,14 @@ static inline bool is_uhi(int sdbbp_code)
 #endif
 }
 
+#ifdef CONFIG_USER_ONLY
+/* The above should dead-code away any calls to this..*/
+static inline void gen_helper_do_semihosting(void *env)
+{
+    g_assert_not_reached();
+}
+#endif
+
 static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
 {
     int rx, ry;
@@ -24349,6 +24357,146 @@ static void decode_opc_special(CPUMIPSState *env, DisasContext *ctx)
  *                     PEXTUW
  */
 
+/*
+ *  PCPYH rd, rt
+ *
+ *    Parallel Copy Halfword
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---------+---------+-----------+
+ *  |    MMI    |0 0 0 0 0|   rt    |   rd    |  PCPYH  |    MMI3   |
+ *  +-----------+---------+---------+---------+---------+-----------+
+ */
+static void gen_mmi_pcpyh(DisasContext *ctx)
+{
+    uint32_t pd, rt, rd;
+    uint32_t opcode;
+
+    opcode = ctx->opcode;
+
+    pd = extract32(opcode, 21, 5);
+    rt = extract32(opcode, 16, 5);
+    rd = extract32(opcode, 11, 5);
+
+    if (unlikely(pd != 0)) {
+        generate_exception_end(ctx, EXCP_RI);
+    } else if (rd == 0) {
+        /* nop */
+    } else if (rt == 0) {
+        tcg_gen_movi_i64(cpu_gpr[rd], 0);
+        tcg_gen_movi_i64(cpu_mmr[rd], 0);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new();
+        TCGv_i64 t1 = tcg_temp_new();
+        uint64_t mask = (1ULL << 16) - 1;
+
+        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask);
+        tcg_gen_movi_i64(t1, 0);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+
+        tcg_gen_mov_i64(cpu_gpr[rd], t1);
+
+        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask);
+        tcg_gen_movi_i64(t1, 0);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+
+        tcg_gen_mov_i64(cpu_mmr[rd], t1);
+
+        tcg_temp_free(t0);
+        tcg_temp_free(t1);
+    }
+}
+
+/*
+ *  PCPYLD rd, rs, rt
+ *
+ *    Parallel Copy Lower Doubleword
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---------+---------+-----------+
+ *  |    MMI    |   rs    |   rt    |   rd    | PCPYLD  |    MMI2   |
+ *  +-----------+---------+---------+---------+---------+-----------+
+ */
+static void gen_mmi_pcpyld(DisasContext *ctx)
+{
+    uint32_t rs, rt, rd;
+    uint32_t opcode;
+
+    opcode = ctx->opcode;
+
+    rs = extract32(opcode, 21, 5);
+    rt = extract32(opcode, 16, 5);
+    rd = extract32(opcode, 11, 5);
+
+    if (rd == 0) {
+        /* nop */
+    } else {
+        if (rs == 0) {
+            tcg_gen_movi_i64(cpu_mmr[rd], 0);
+        } else {
+            tcg_gen_mov_i64(cpu_mmr[rd], cpu_gpr[rs]);
+        }
+        if (rt == 0) {
+            tcg_gen_movi_i64(cpu_gpr[rd], 0);
+        } else {
+            if (rd != rt) {
+                tcg_gen_mov_i64(cpu_gpr[rd], cpu_gpr[rt]);
+            }
+        }
+    }
+}
+
+/*
+ *  PCPYUD rd, rs, rt
+ *
+ *    Parallel Copy Upper Doubleword
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---------+---------+-----------+
+ *  |    MMI    |   rs    |   rt    |   rd    | PCPYUD  |    MMI3   |
+ *  +-----------+---------+---------+---------+---------+-----------+
+ */
+static void gen_mmi_pcpyud(DisasContext *ctx)
+{
+    uint32_t rs, rt, rd;
+    uint32_t opcode;
+
+    opcode = ctx->opcode;
+
+    rs = extract32(opcode, 21, 5);
+    rt = extract32(opcode, 16, 5);
+    rd = extract32(opcode, 11, 5);
+
+    if (rd == 0) {
+        /* nop */
+    } else {
+        if (rs == 0) {
+            tcg_gen_movi_i64(cpu_gpr[rd], 0);
+        } else {
+            tcg_gen_mov_i64(cpu_gpr[rd], cpu_mmr[rs]);
+        }
+        if (rt == 0) {
+            tcg_gen_movi_i64(cpu_mmr[rd], 0);
+        } else {
+            if (rd != rt) {
+                tcg_gen_mov_i64(cpu_mmr[rd], cpu_mmr[rt]);
+            }
+        }
+    }
+}
+
 #endif
 
 
@@ -27363,7 +27511,6 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
     case MMI_OPC_2_PINTH:     /* TODO: MMI_OPC_2_PINTH */
     case MMI_OPC_2_PMULTW:    /* TODO: MMI_OPC_2_PMULTW */
     case MMI_OPC_2_PDIVW:     /* TODO: MMI_OPC_2_PDIVW */
-    case MMI_OPC_2_PCPYLD:    /* TODO: MMI_OPC_2_PCPYLD */
     case MMI_OPC_2_PMADDH:    /* TODO: MMI_OPC_2_PMADDH */
     case MMI_OPC_2_PHMADH:    /* TODO: MMI_OPC_2_PHMADH */
     case MMI_OPC_2_PAND:      /* TODO: MMI_OPC_2_PAND */
@@ -27378,6 +27525,9 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
     case MMI_OPC_2_PROT3W:    /* TODO: MMI_OPC_2_PROT3W */
         generate_exception_end(ctx, EXCP_RI); /* TODO: MMI_OPC_CLASS_MMI2 */
         break;
+    case MMI_OPC_2_PCPYLD:
+        gen_mmi_pcpyld(ctx);
+        break;
     default:
         MIPS_INVAL("TX79 MMI class MMI2");
         generate_exception_end(ctx, EXCP_RI);
@@ -27397,14 +27547,18 @@ static void decode_mmi3(CPUMIPSState *env, DisasContext *ctx)
     case MMI_OPC_3_PINTEH:     /* TODO: MMI_OPC_3_PINTEH */
     case MMI_OPC_3_PMULTUW:    /* TODO: MMI_OPC_3_PMULTUW */
     case MMI_OPC_3_PDIVUW:     /* TODO: MMI_OPC_3_PDIVUW */
-    case MMI_OPC_3_PCPYUD:     /* TODO: MMI_OPC_3_PCPYUD */
     case MMI_OPC_3_POR:        /* TODO: MMI_OPC_3_POR */
     case MMI_OPC_3_PNOR:       /* TODO: MMI_OPC_3_PNOR */
     case MMI_OPC_3_PEXCH:      /* TODO: MMI_OPC_3_PEXCH */
-    case MMI_OPC_3_PCPYH:      /* TODO: MMI_OPC_3_PCPYH */
     case MMI_OPC_3_PEXCW:      /* TODO: MMI_OPC_3_PEXCW */
         generate_exception_end(ctx, EXCP_RI); /* TODO: MMI_OPC_CLASS_MMI3 */
         break;
+    case MMI_OPC_3_PCPYH:
+        gen_mmi_pcpyh(ctx);
+        break;
+    case MMI_OPC_3_PCPYUD:
+        gen_mmi_pcpyud(ctx);
+        break;
     default:
         MIPS_INVAL("TX79 MMI class MMI3");
         generate_exception_end(ctx, EXCP_RI);
@@ -28297,20 +28451,73 @@ static void gen_msa_elm_df(CPUMIPSState *env, DisasContext *ctx, uint32_t df,
             generate_exception_end(ctx, EXCP_RI);
             break;
         }
+        if ((MASK_MSA_ELM(ctx->opcode) == OPC_COPY_U_df) &&
+              (df == DF_WORD)) {
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        }
 #endif
         switch (MASK_MSA_ELM(ctx->opcode)) {
         case OPC_COPY_S_df:
             if (likely(wd != 0)) {
-                gen_helper_msa_copy_s_df(cpu_env, tdf, twd, tws, tn);
+                switch (df) {
+                case DF_BYTE:
+                    gen_helper_msa_copy_s_b(cpu_env, twd, tws, tn);
+                    break;
+                case DF_HALF:
+                    gen_helper_msa_copy_s_h(cpu_env, twd, tws, tn);
+                    break;
+                case DF_WORD:
+                    gen_helper_msa_copy_s_w(cpu_env, twd, tws, tn);
+                    break;
+#if defined(TARGET_MIPS64)
+                case DF_DOUBLE:
+                    gen_helper_msa_copy_s_d(cpu_env, twd, tws, tn);
+                    break;
+#endif
+                default:
+                    assert(0);
+                }
             }
             break;
         case OPC_COPY_U_df:
             if (likely(wd != 0)) {
-                gen_helper_msa_copy_u_df(cpu_env, tdf, twd, tws, tn);
+                switch (df) {
+                case DF_BYTE:
+                    gen_helper_msa_copy_u_b(cpu_env, twd, tws, tn);
+                    break;
+                case DF_HALF:
+                    gen_helper_msa_copy_u_h(cpu_env, twd, tws, tn);
+                    break;
+#if defined(TARGET_MIPS64)
+                case DF_WORD:
+                    gen_helper_msa_copy_u_w(cpu_env, twd, tws, tn);
+                    break;
+#endif
+                default:
+                    assert(0);
+                }
             }
             break;
         case OPC_INSERT_df:
-            gen_helper_msa_insert_df(cpu_env, tdf, twd, tws, tn);
+            switch (df) {
+            case DF_BYTE:
+                gen_helper_msa_insert_b(cpu_env, twd, tws, tn);
+                break;
+            case DF_HALF:
+                gen_helper_msa_insert_h(cpu_env, twd, tws, tn);
+                break;
+            case DF_WORD:
+                gen_helper_msa_insert_w(cpu_env, twd, tws, tn);
+                break;
+#if defined(TARGET_MIPS64)
+            case DF_DOUBLE:
+                gen_helper_msa_insert_d(cpu_env, twd, tws, tn);
+                break;
+#endif
+            default:
+                assert(0);
+            }
             break;
         }
         break;
diff --git a/target/nios2/helper.c b/target/nios2/helper.c
index ffb83fc104..57c97bde3c 100644
--- a/target/nios2/helper.c
+++ b/target/nios2/helper.c
@@ -26,7 +26,7 @@
 #include "exec/cpu_ldst.h"
 #include "exec/log.h"
 #include "exec/helper-proto.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 
 #if defined(CONFIG_USER_ONLY)
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 638a6e99c4..02b67a333e 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -180,18 +180,6 @@ DEF_HELPER_3(vmuloub, void, avr, avr, avr)
 DEF_HELPER_3(vmulouh, void, avr, avr, avr)
 DEF_HELPER_3(vmulouw, void, avr, avr, avr)
 DEF_HELPER_3(vmuluwm, void, avr, avr, avr)
-DEF_HELPER_3(vsrab, void, avr, avr, avr)
-DEF_HELPER_3(vsrah, void, avr, avr, avr)
-DEF_HELPER_3(vsraw, void, avr, avr, avr)
-DEF_HELPER_3(vsrad, void, avr, avr, avr)
-DEF_HELPER_3(vsrb, void, avr, avr, avr)
-DEF_HELPER_3(vsrh, void, avr, avr, avr)
-DEF_HELPER_3(vsrw, void, avr, avr, avr)
-DEF_HELPER_3(vsrd, void, avr, avr, avr)
-DEF_HELPER_3(vslb, void, avr, avr, avr)
-DEF_HELPER_3(vslh, void, avr, avr, avr)
-DEF_HELPER_3(vslw, void, avr, avr, avr)
-DEF_HELPER_3(vsld, void, avr, avr, avr)
 DEF_HELPER_3(vslo, void, avr, avr, avr)
 DEF_HELPER_3(vsro, void, avr, avr, avr)
 DEF_HELPER_3(vsrv, void, avr, avr, avr)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 9af779ad38..8ce89f2ad9 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1791,23 +1791,6 @@ VSHIFT(l, 1)
 VSHIFT(r, 0)
 #undef VSHIFT
 
-#define VSL(suffix, element, mask)                                      \
-    void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
-    {                                                                   \
-        int i;                                                          \
-                                                                        \
-        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
-            unsigned int shift = b->element[i] & mask;                  \
-                                                                        \
-            r->element[i] = a->element[i] << shift;                     \
-        }                                                               \
-    }
-VSL(b, u8, 0x7)
-VSL(h, u16, 0x0F)
-VSL(w, u32, 0x1F)
-VSL(d, u64, 0x3F)
-#undef VSL
-
 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
     int i;
@@ -1815,10 +1798,10 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 
     size = ARRAY_SIZE(r->u8);
     for (i = 0; i < size; i++) {
-        shift = b->u8[i] & 0x7;             /* extract shift value */
-        bytes = (a->u8[i] << 8) +             /* extract adjacent bytes */
-            (((i + 1) < size) ? a->u8[i + 1] : 0);
-        r->u8[i] = (bytes << shift) >> 8;   /* shift and store result */
+        shift = b->VsrB(i) & 0x7;             /* extract shift value */
+        bytes = (a->VsrB(i) << 8) +           /* extract adjacent bytes */
+            (((i + 1) < size) ? a->VsrB(i + 1) : 0);
+        r->VsrB(i) = (bytes << shift) >> 8;   /* shift and store result */
     }
 }
 
@@ -1833,10 +1816,10 @@ void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
      * order will guarantee that computed result is not fed back.
      */
     for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
-        shift = b->u8[i] & 0x7;                 /* extract shift value */
-        bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
+        shift = b->VsrB(i) & 0x7;               /* extract shift value */
+        bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
                                                 /* extract adjacent bytes */
-        r->u8[i] = (bytes >> shift) & 0xFF;     /* shift and store result */
+        r->VsrB(i) = (bytes >> shift) & 0xFF;   /* shift and store result */
     }
 }
 
@@ -1980,26 +1963,6 @@ VNEG(vnegw, s32)
 VNEG(vnegd, s64)
 #undef VNEG
 
-#define VSR(suffix, element, mask)                                      \
-    void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
-    {                                                                   \
-        int i;                                                          \
-                                                                        \
-        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
-            unsigned int shift = b->element[i] & mask;                  \
-            r->element[i] = a->element[i] >> shift;                     \
-        }                                                               \
-    }
-VSR(ab, s8, 0x7)
-VSR(ah, s16, 0xF)
-VSR(aw, s32, 0x1F)
-VSR(ad, s64, 0x3F)
-VSR(b, u8, 0x7)
-VSR(h, u16, 0xF)
-VSR(w, u32, 0x1F)
-VSR(d, u64, 0x3F)
-#undef VSR
-
 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
     int sh = (b->VsrB(0xf) >> 3) & 0xf;
@@ -2053,7 +2016,7 @@ void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
         int64_t t = (int64_t)b->VsrSW(upper + i * 2);
 
-        result.VsrW(i) = 0;
+        result.VsrD(i) = 0;
         for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
             t += a->VsrSW(2 * i + j);
         }
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 02e22e2017..3bf0a46c33 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -75,6 +75,7 @@ static int cap_fixup_hcalls;
 static int cap_htm;             /* Hardware transactional memory support */
 static int cap_mmu_radix;
 static int cap_mmu_hash_v3;
+static int cap_xive;
 static int cap_resize_hpt;
 static int cap_ppc_pvr_compat;
 static int cap_ppc_safe_cache;
@@ -146,6 +147,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
+    cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
     kvmppc_get_cpu_characteristics(s);
     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
@@ -1721,7 +1723,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
             trace_kvm_handle_dcr_write();
             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
         } else {
-            trace_kvm_handle_drc_read();
+            trace_kvm_handle_dcr_read();
             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
         }
         break;
@@ -2478,6 +2480,11 @@ static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
     return 0;
 }
 
+bool kvmppc_has_cap_xive(void)
+{
+    return cap_xive;
+}
+
 static void kvmppc_get_cpu_characteristics(KVMState *s)
 {
     struct kvm_ppc_cpu_char c;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 22385134b4..45776cad79 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -60,6 +60,7 @@ bool kvmppc_has_cap_fixup_hcalls(void);
 bool kvmppc_has_cap_htm(void);
 bool kvmppc_has_cap_mmu_radix(void);
 bool kvmppc_has_cap_mmu_hash_v3(void);
+bool kvmppc_has_cap_xive(void);
 int kvmppc_get_cap_safe_cache(void);
 int kvmppc_get_cap_safe_bounds_check(void);
 int kvmppc_get_cap_safe_indirect_branch(void);
@@ -316,6 +317,11 @@ static inline bool kvmppc_has_cap_mmu_hash_v3(void)
     return false;
 }
 
+static inline bool kvmppc_has_cap_xive(void)
+{
+    return false;
+}
+
 static inline int kvmppc_get_cap_safe_cache(void)
 {
     return 0;
diff --git a/target/ppc/trace-events b/target/ppc/trace-events
index 7b3cfe11fd..3dc6740706 100644
--- a/target/ppc/trace-events
+++ b/target/ppc/trace-events
@@ -22,7 +22,7 @@ kvm_failed_put_vpa(void) "Warning: Unable to set VPA information to KVM"
 kvm_failed_get_vpa(void) "Warning: Unable to get VPA information from KVM"
 kvm_injected_interrupt(int irq) "injected interrupt %d"
 kvm_handle_dcr_write(void) "handle dcr write"
-kvm_handle_drc_read(void) "handle dcr read"
+kvm_handle_dcr_read(void) "handle dcr read"
 kvm_handle_halt(void) "handle halt"
 kvm_handle_papr_hcall(void) "handle PAPR hypercall"
 kvm_handle_epr(void) "handle epr"
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index 6861f4c5b9..663275b729 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -530,21 +530,21 @@ GEN_VXFORM(vmuleuw, 4, 10);
 GEN_VXFORM(vmulesb, 4, 12);
 GEN_VXFORM(vmulesh, 4, 13);
 GEN_VXFORM(vmulesw, 4, 14);
-GEN_VXFORM(vslb, 2, 4);
-GEN_VXFORM(vslh, 2, 5);
-GEN_VXFORM(vslw, 2, 6);
+GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4);
+GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5);
+GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6);
 GEN_VXFORM(vrlwnm, 2, 6);
 GEN_VXFORM_DUAL(vslw, PPC_ALTIVEC, PPC_NONE, \
                 vrlwnm, PPC_NONE, PPC2_ISA300)
-GEN_VXFORM(vsld, 2, 23);
-GEN_VXFORM(vsrb, 2, 8);
-GEN_VXFORM(vsrh, 2, 9);
-GEN_VXFORM(vsrw, 2, 10);
-GEN_VXFORM(vsrd, 2, 27);
-GEN_VXFORM(vsrab, 2, 12);
-GEN_VXFORM(vsrah, 2, 13);
-GEN_VXFORM(vsraw, 2, 14);
-GEN_VXFORM(vsrad, 2, 15);
+GEN_VXFORM_V(vsld, MO_64, tcg_gen_gvec_shlv, 2, 23);
+GEN_VXFORM_V(vsrb, MO_8, tcg_gen_gvec_shrv, 2, 8);
+GEN_VXFORM_V(vsrh, MO_16, tcg_gen_gvec_shrv, 2, 9);
+GEN_VXFORM_V(vsrw, MO_32, tcg_gen_gvec_shrv, 2, 10);
+GEN_VXFORM_V(vsrd, MO_64, tcg_gen_gvec_shrv, 2, 27);
+GEN_VXFORM_V(vsrab, MO_8, tcg_gen_gvec_sarv, 2, 12);
+GEN_VXFORM_V(vsrah, MO_16, tcg_gen_gvec_sarv, 2, 13);
+GEN_VXFORM_V(vsraw, MO_32, tcg_gen_gvec_sarv, 2, 14);
+GEN_VXFORM_V(vsrad, MO_64, tcg_gen_gvec_sarv, 2, 15);
 GEN_VXFORM(vsrv, 2, 28);
 GEN_VXFORM(vslv, 2, 29);
 GEN_VXFORM(vslo, 6, 16);
diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
index 11d9b75d01..199d22da97 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -227,7 +227,7 @@ static void gen_lxvb16x(DisasContext *ctx)
     tcg_temp_free_i64(xtl);
 }
 
-#define VSX_VECTOR_LOAD_STORE(name, op, indexed)            \
+#define VSX_VECTOR_LOAD(name, op, indexed)                  \
 static void gen_##name(DisasContext *ctx)                   \
 {                                                           \
     int xt;                                                 \
@@ -254,8 +254,6 @@ static void gen_##name(DisasContext *ctx)                   \
     }                                                       \
     xth = tcg_temp_new_i64();                               \
     xtl = tcg_temp_new_i64();                               \
-    get_cpu_vsrh(xth, xt);                                  \
-    get_cpu_vsrl(xtl, xt);                                  \
     gen_set_access_type(ctx, ACCESS_INT);                   \
     EA = tcg_temp_new();                                    \
     if (indexed) {                                          \
@@ -281,10 +279,61 @@ static void gen_##name(DisasContext *ctx)                   \
     tcg_temp_free_i64(xtl);                                 \
 }
 
-VSX_VECTOR_LOAD_STORE(lxv, ld_i64, 0)
-VSX_VECTOR_LOAD_STORE(stxv, st_i64, 0)
-VSX_VECTOR_LOAD_STORE(lxvx, ld_i64, 1)
-VSX_VECTOR_LOAD_STORE(stxvx, st_i64, 1)
+VSX_VECTOR_LOAD(lxv, ld_i64, 0)
+VSX_VECTOR_LOAD(lxvx, ld_i64, 1)
+
+#define VSX_VECTOR_STORE(name, op, indexed)                 \
+static void gen_##name(DisasContext *ctx)                   \
+{                                                           \
+    int xt;                                                 \
+    TCGv EA;                                                \
+    TCGv_i64 xth;                                           \
+    TCGv_i64 xtl;                                           \
+                                                            \
+    if (indexed) {                                          \
+        xt = xT(ctx->opcode);                               \
+    } else {                                                \
+        xt = DQxT(ctx->opcode);                             \
+    }                                                       \
+                                                            \
+    if (xt < 32) {                                          \
+        if (unlikely(!ctx->vsx_enabled)) {                  \
+            gen_exception(ctx, POWERPC_EXCP_VSXU);          \
+            return;                                         \
+        }                                                   \
+    } else {                                                \
+        if (unlikely(!ctx->altivec_enabled)) {              \
+            gen_exception(ctx, POWERPC_EXCP_VPU);           \
+            return;                                         \
+        }                                                   \
+    }                                                       \
+    xth = tcg_temp_new_i64();                               \
+    xtl = tcg_temp_new_i64();                               \
+    get_cpu_vsrh(xth, xt);                                  \
+    get_cpu_vsrl(xtl, xt);                                  \
+    gen_set_access_type(ctx, ACCESS_INT);                   \
+    EA = tcg_temp_new();                                    \
+    if (indexed) {                                          \
+        gen_addr_reg_index(ctx, EA);                        \
+    } else {                                                \
+        gen_addr_imm_index(ctx, EA, 0x0F);                  \
+    }                                                       \
+    if (ctx->le_mode) {                                     \
+        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_LEQ);   \
+        tcg_gen_addi_tl(EA, EA, 8);                         \
+        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_LEQ);   \
+    } else {                                                \
+        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_BEQ);   \
+        tcg_gen_addi_tl(EA, EA, 8);                         \
+        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_BEQ);   \
+    }                                                       \
+    tcg_temp_free(EA);                                      \
+    tcg_temp_free_i64(xth);                                 \
+    tcg_temp_free_i64(xtl);                                 \
+}
+
+VSX_VECTOR_STORE(stxv, st_i64, 0)
+VSX_VECTOR_STORE(stxvx, st_i64, 1)
 
 #ifdef TARGET_PPC64
 #define VSX_VECTOR_LOAD_STORE_LENGTH(name)                      \
@@ -329,7 +378,6 @@ static void gen_##name(DisasContext *ctx)                         \
         return;                                                   \
     }                                                             \
     xth = tcg_temp_new_i64();                                     \
-    get_cpu_vsrh(xth, rD(ctx->opcode) + 32);                      \
     gen_set_access_type(ctx, ACCESS_INT);                         \
     EA = tcg_temp_new();                                          \
     gen_addr_imm_index(ctx, EA, 0x03);                            \
@@ -513,8 +561,8 @@ static void gen_##name(DisasContext *ctx)                         \
     tcg_temp_free_i64(xth);                                       \
 }
 
-VSX_LOAD_SCALAR_DS(stxsd, st64_i64)
-VSX_LOAD_SCALAR_DS(stxssp, st32fs)
+VSX_STORE_SCALAR_DS(stxsd, st64_i64)
+VSX_STORE_SCALAR_DS(stxssp, st32fs)
 
 static void gen_mfvsrwz(DisasContext *ctx)
 {
@@ -858,8 +906,8 @@ static void glue(gen_, name)(DisasContext *ctx)                  \
         xbh = tcg_temp_new_i64();                                \
         xbl = tcg_temp_new_i64();                                \
         sgm = tcg_temp_new_i64();                                \
-        set_cpu_vsrh(xB(ctx->opcode), xbh);                      \
-        set_cpu_vsrl(xB(ctx->opcode), xbl);                      \
+        get_cpu_vsrh(xbh, xB(ctx->opcode));                      \
+        get_cpu_vsrl(xbl, xB(ctx->opcode));                      \
         tcg_gen_movi_i64(sgm, sgn_mask);                         \
         switch (op) {                                            \
             case OP_ABS: {                                       \
@@ -1192,7 +1240,7 @@ static void gen_xxbrq(DisasContext *ctx)
     tcg_gen_bswap64_i64(xtl, xbh);
     set_cpu_vsrl(xT(ctx->opcode), xtl);
     tcg_gen_mov_i64(xth, t0);
-    set_cpu_vsrl(xT(ctx->opcode), xth);
+    set_cpu_vsrh(xT(ctx->opcode), xth);
 
     tcg_temp_free_i64(t0);
     tcg_temp_free_i64(xth);
@@ -1220,7 +1268,7 @@ static void gen_xxbrw(DisasContext *ctx)
     get_cpu_vsrl(xbl, xB(ctx->opcode));
 
     gen_bswap32x4(xth, xtl, xbh, xbl);
-    set_cpu_vsrl(xT(ctx->opcode), xth);
+    set_cpu_vsrh(xT(ctx->opcode), xth);
     set_cpu_vsrl(xT(ctx->opcode), xtl);
 
     tcg_temp_free_i64(xth);
@@ -1355,13 +1403,13 @@ static void gen_xxspltib(DisasContext *ctx)
     int rt = xT(ctx->opcode);
 
     if (rt < 32) {
-        if (unlikely(!ctx->altivec_enabled)) {
-            gen_exception(ctx, POWERPC_EXCP_VPU);
+        if (unlikely(!ctx->vsx_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VSXU);
             return;
         }
     } else {
-        if (unlikely(!ctx->vsx_enabled)) {
-            gen_exception(ctx, POWERPC_EXCP_VSXU);
+        if (unlikely(!ctx->altivec_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VPU);
             return;
         }
     }
@@ -1820,7 +1868,7 @@ static void gen_xvxsigdp(DisasContext *ctx)
     tcg_gen_movi_i64(t0, 0x0010000000000000);
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0);
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0);
-    tcg_gen_deposit_i64(xth, t0, xbl, 0, 52);
+    tcg_gen_deposit_i64(xtl, t0, xbl, 0, 52);
     set_cpu_vsrl(xT(ctx->opcode), xtl);
 
     tcg_temp_free_i64(t0);
diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
index 9c6c109327..b1c79bc1d1 100644
--- a/target/riscv/Makefile.objs
+++ b/target/riscv/Makefile.objs
@@ -5,16 +5,19 @@ DECODETREE = $(SRC_PATH)/scripts/decodetree.py
 decode32-y = $(SRC_PATH)/target/riscv/insn32.decode
 decode32-$(TARGET_RISCV64) += $(SRC_PATH)/target/riscv/insn32-64.decode
 
+decode16-y = $(SRC_PATH)/target/riscv/insn16.decode
+decode16-$(TARGET_RISCV32) += $(SRC_PATH)/target/riscv/insn16-32.decode
+decode16-$(TARGET_RISCV64) += $(SRC_PATH)/target/riscv/insn16-64.decode
+
 target/riscv/decode_insn32.inc.c: $(decode32-y) $(DECODETREE)
 	$(call quiet-command, \
-	  $(PYTHON) $(DECODETREE) -o $@ --decode decode_insn32 $(decode32-y), \
-	  "GEN", $(TARGET_DIR)$@)
+	  $(PYTHON) $(DECODETREE) -o $@ --static-decode decode_insn32 \
+          $(decode32-y), "GEN", $(TARGET_DIR)$@)
 
-target/riscv/decode_insn16.inc.c: \
-  $(SRC_PATH)/target/riscv/insn16.decode $(DECODETREE)
+target/riscv/decode_insn16.inc.c: $(decode16-y) $(DECODETREE)
 	$(call quiet-command, \
-	  $(PYTHON) $(DECODETREE) -o $@ --decode decode_insn16 --insnwidth 16 $<, \
-	  "GEN", $(TARGET_DIR)$@)
+	  $(PYTHON) $(DECODETREE) -o $@ --static-decode decode_insn16 \
+          --insnwidth 16 $(decode16-y), "GEN", $(TARGET_DIR)$@)
 
 target/riscv/translate.o: target/riscv/decode_insn32.inc.c \
 	target/riscv/decode_insn16.inc.c
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index b7675707e0..e29879915f 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -23,6 +23,7 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "qapi/error.h"
+#include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
 
 /* RISC-V CPU definitions */
@@ -30,17 +31,17 @@
 static const char riscv_exts[26] = "IEMAFDQCLBJTPVNSUHKORWXYZG";
 
 const char * const riscv_int_regnames[] = {
-  "zero", "ra  ", "sp  ", "gp  ", "tp  ", "t0  ", "t1  ", "t2  ",
-  "s0  ", "s1  ", "a0  ", "a1  ", "a2  ", "a3  ", "a4  ", "a5  ",
-  "a6  ", "a7  ", "s2  ", "s3  ", "s4  ", "s5  ", "s6  ", "s7  ",
-  "s8  ", "s9  ", "s10 ", "s11 ", "t3  ", "t4  ", "t5  ", "t6  "
+  "zero", "ra", "sp",  "gp",  "tp", "t0", "t1", "t2",
+  "s0",   "s1", "a0",  "a1",  "a2", "a3", "a4", "a5",
+  "a6",   "a7", "s2",  "s3",  "s4", "s5", "s6", "s7",
+  "s8",   "s9", "s10", "s11", "t3", "t4", "t5", "t6"
 };
 
 const char * const riscv_fpr_regnames[] = {
-  "ft0 ", "ft1 ", "ft2 ", "ft3 ", "ft4 ", "ft5 ", "ft6 ",  "ft7 ",
-  "fs0 ", "fs1 ", "fa0 ", "fa1 ", "fa2 ", "fa3 ", "fa4 ",  "fa5 ",
-  "fa6 ", "fa7 ", "fs2 ", "fs3 ", "fs4 ", "fs5 ", "fs6 ",  "fs7 ",
-  "fs8 ", "fs9 ", "fs10", "fs11", "ft8 ", "ft9 ", "ft10",  "ft11"
+  "ft0", "ft1", "ft2",  "ft3",  "ft4", "ft5", "ft6",  "ft7",
+  "fs0", "fs1", "fa0",  "fa1",  "fa2", "fa3", "fa4",  "fa5",
+  "fa6", "fa7", "fs2",  "fs3",  "fs4", "fs5", "fs6",  "fs7",
+  "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11"
 };
 
 const char * const riscv_excp_names[] = {
@@ -114,6 +115,12 @@ static void riscv_any_cpu_init(Object *obj)
 
 #if defined(TARGET_RISCV32)
 
+static void riscv_base32_cpu_init(Object *obj)
+{
+    CPURISCVState *env = &RISCV_CPU(obj)->env;
+    set_misa(env, RV32 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
+}
+
 static void rv32gcsu_priv1_09_1_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
@@ -145,6 +152,12 @@ static void rv32imacu_nommu_cpu_init(Object *obj)
 
 #elif defined(TARGET_RISCV64)
 
+static void riscv_base64_cpu_init(Object *obj)
+{
+    CPURISCVState *env = &RISCV_CPU(obj)->env;
+    set_misa(env, RV64 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
+}
+
 static void rv64gcsu_priv1_09_1_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
@@ -296,7 +309,11 @@ static void riscv_cpu_disas_set_info(CPUState *s, disassemble_info *info)
 static void riscv_cpu_realize(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
+    RISCVCPU *cpu = RISCV_CPU(dev);
+    CPURISCVState *env = &cpu->env;
     RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
+    int priv_version = PRIV_VERSION_1_10_0;
+    int user_version = USER_VERSION_2_02_0;
     Error *local_err = NULL;
 
     cpu_exec_realizefn(cs, &local_err);
@@ -305,6 +322,41 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    if (cpu->cfg.priv_spec) {
+        if (!g_strcmp0(cpu->cfg.priv_spec, "v1.10.0")) {
+            priv_version = PRIV_VERSION_1_10_0;
+        } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.9.1")) {
+            priv_version = PRIV_VERSION_1_09_1;
+        } else {
+            error_setg(errp,
+                       "Unsupported privilege spec version '%s'",
+                       cpu->cfg.priv_spec);
+            return;
+        }
+    }
+
+    if (cpu->cfg.user_spec) {
+        if (!g_strcmp0(cpu->cfg.user_spec, "v2.02.0")) {
+            user_version = USER_VERSION_2_02_0;
+        } else {
+            error_setg(errp,
+                       "Unsupported user spec version '%s'",
+                       cpu->cfg.user_spec);
+            return;
+        }
+    }
+
+    set_versions(env, user_version, priv_version);
+    set_resetvec(env, DEFAULT_RSTVEC);
+
+    if (cpu->cfg.mmu) {
+        set_feature(env, RISCV_FEATURE_MMU);
+    }
+
+    if (cpu->cfg.pmp) {
+        set_feature(env, RISCV_FEATURE_PMP);
+    }
+
     riscv_cpu_register_gdb_regs_for_features(cs);
 
     qemu_init_vcpu(cs);
@@ -326,6 +378,14 @@ static const VMStateDescription vmstate_riscv_cpu = {
     .unmigratable = 1,
 };
 
+static Property riscv_cpu_properties[] = {
+    DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
+    DEFINE_PROP_STRING("user_spec", RISCVCPU, cfg.user_spec),
+    DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true),
+    DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void riscv_cpu_class_init(ObjectClass *c, void *data)
 {
     RISCVCPUClass *mcc = RISCV_CPU_CLASS(c);
@@ -365,6 +425,7 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data)
 #endif
     /* For now, mark unmigratable: */
     cc->vmsd = &vmstate_riscv_cpu;
+    dc->props = riscv_cpu_properties;
 }
 
 char *riscv_isa_string(RISCVCPU *cpu)
@@ -430,12 +491,14 @@ static const TypeInfo riscv_cpu_type_infos[] = {
     },
     DEFINE_CPU(TYPE_RISCV_CPU_ANY,              riscv_any_cpu_init),
 #if defined(TARGET_RISCV32)
+    DEFINE_CPU(TYPE_RISCV_CPU_BASE32,           riscv_base32_cpu_init),
     DEFINE_CPU(TYPE_RISCV_CPU_RV32GCSU_V1_09_1, rv32gcsu_priv1_09_1_cpu_init),
     DEFINE_CPU(TYPE_RISCV_CPU_RV32GCSU_V1_10_0, rv32gcsu_priv1_10_0_cpu_init),
     DEFINE_CPU(TYPE_RISCV_CPU_RV32IMACU_NOMMU,  rv32imacu_nommu_cpu_init),
     DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_E31,       rv32imacu_nommu_cpu_init),
     DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_U34,       rv32gcsu_priv1_10_0_cpu_init)
 #elif defined(TARGET_RISCV64)
+    DEFINE_CPU(TYPE_RISCV_CPU_BASE64,           riscv_base64_cpu_init),
     DEFINE_CPU(TYPE_RISCV_CPU_RV64GCSU_V1_09_1, rv64gcsu_priv1_09_1_cpu_init),
     DEFINE_CPU(TYPE_RISCV_CPU_RV64GCSU_V1_10_0, rv64gcsu_priv1_10_0_cpu_init),
     DEFINE_CPU(TYPE_RISCV_CPU_RV64IMACU_NOMMU,  rv64imacu_nommu_cpu_init),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index c17184f4e4..74e726c1c9 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -48,6 +48,8 @@
 #define CPU_RESOLVING_TYPE TYPE_RISCV_CPU
 
 #define TYPE_RISCV_CPU_ANY              RISCV_CPU_TYPE_NAME("any")
+#define TYPE_RISCV_CPU_BASE32           RISCV_CPU_TYPE_NAME("rv32")
+#define TYPE_RISCV_CPU_BASE64           RISCV_CPU_TYPE_NAME("rv64")
 #define TYPE_RISCV_CPU_RV32GCSU_V1_09_1 RISCV_CPU_TYPE_NAME("rv32gcsu-v1.9.1")
 #define TYPE_RISCV_CPU_RV32GCSU_V1_10_0 RISCV_CPU_TYPE_NAME("rv32gcsu-v1.10.0")
 #define TYPE_RISCV_CPU_RV32IMACU_NOMMU  RISCV_CPU_TYPE_NAME("rv32imacu-nommu")
@@ -224,6 +226,14 @@ typedef struct RISCVCPU {
     CPUState parent_obj;
     /*< public >*/
     CPURISCVState env;
+
+    /* Configuration Settings */
+    struct {
+        char *priv_spec;
+        char *user_spec;
+        bool mmu;
+        bool pmp;
+    } cfg;
 } RISCVCPU;
 
 static inline RISCVCPU *riscv_env_get_cpu(CPURISCVState *env)
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 7180fccf54..dc9d53d4be 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -202,6 +202,23 @@
 #define CSR_DPC             0x7b1
 #define CSR_DSCRATCH        0x7b2
 
+/* Hpervisor CSRs */
+#define CSR_HSTATUS         0xa00
+#define CSR_HEDELEG         0xa02
+#define CSR_HIDELEG         0xa03
+#define CSR_HGATP           0xa80
+
+#if defined(TARGET_RISCV32)
+#define HGATP_MODE           SATP32_MODE
+#define HGATP_ASID           SATP32_ASID
+#define HGATP_PPN            SATP32_PPN
+#endif
+#if defined(TARGET_RISCV64)
+#define HGATP_MODE           SATP64_MODE
+#define HGATP_ASID           SATP64_ASID
+#define HGATP_PPN            SATP64_PPN
+#endif
+
 /* Performance Counters */
 #define CSR_MHPMCOUNTER3    0xb03
 #define CSR_MHPMCOUNTER4    0xb04
@@ -292,9 +309,6 @@
 #define CSR_MHPMCOUNTER31H  0xb9f
 
 /* Legacy Hypervisor Trap Setup (priv v1.9.1) */
-#define CSR_HSTATUS         0x200
-#define CSR_HEDELEG         0x202
-#define CSR_HIDELEG         0x203
 #define CSR_HIE             0x204
 #define CSR_HTVEC           0x205
 
@@ -316,14 +330,11 @@
 /* mstatus CSR bits */
 #define MSTATUS_UIE         0x00000001
 #define MSTATUS_SIE         0x00000002
-#define MSTATUS_HIE         0x00000004
 #define MSTATUS_MIE         0x00000008
 #define MSTATUS_UPIE        0x00000010
 #define MSTATUS_SPIE        0x00000020
-#define MSTATUS_HPIE        0x00000040
 #define MSTATUS_MPIE        0x00000080
 #define MSTATUS_SPP         0x00000100
-#define MSTATUS_HPP         0x00000600
 #define MSTATUS_MPP         0x00001800
 #define MSTATUS_FS          0x00006000
 #define MSTATUS_XS          0x00018000
@@ -335,6 +346,8 @@
 #define MSTATUS_TVM         0x00100000 /* since: priv-1.10 */
 #define MSTATUS_TW          0x20000000 /* since: priv-1.10 */
 #define MSTATUS_TSR         0x40000000 /* since: priv-1.10 */
+#define MSTATUS_MTL         0x4000000000ULL
+#define MSTATUS_MPV         0x8000000000ULL
 
 #define MSTATUS64_UXL       0x0000000300000000ULL
 #define MSTATUS64_SXL       0x0000000C00000000ULL
@@ -380,10 +393,28 @@
 #define SSTATUS_SD SSTATUS64_SD
 #endif
 
+/* hstatus CSR bits */
+#define HSTATUS_SPRV         0x00000001
+#define HSTATUS_STL          0x00000040
+#define HSTATUS_SPV          0x00000080
+#define HSTATUS_SP2P         0x00000100
+#define HSTATUS_SP2V         0x00000200
+#define HSTATUS_VTVM         0x00100000
+#define HSTATUS_VTSR         0x00400000
+
+#define HSTATUS32_WPRI       0xFF8FF87E
+#define HSTATUS64_WPRI       0xFFFFFFFFFF8FF87EULL
+
+#if defined(TARGET_RISCV32)
+#define HSTATUS_WPRI HSTATUS32_WPRI
+#elif defined(TARGET_RISCV64)
+#define HSTATUS_WPRI HSTATUS64_WPRI
+#endif
+
 /* Privilege modes */
 #define PRV_U 0
 #define PRV_S 1
-#define PRV_H 2
+#define PRV_H 2 /* Reserved */
 #define PRV_M 3
 
 /* RV32 satp CSR field masks */
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 41d6db41c3..c577a262b8 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -82,10 +82,31 @@ int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint32_t interrupts)
     }
 }
 
-/* iothread_mutex must be held */
+struct CpuAsyncInfo {
+    uint32_t new_mip;
+};
+
+static void riscv_cpu_update_mip_irqs_async(CPUState *target_cpu_state,
+                                            run_on_cpu_data data)
+{
+    CPURISCVState *env = &RISCV_CPU(target_cpu_state)->env;
+    RISCVCPU *cpu = riscv_env_get_cpu(env);
+    struct CpuAsyncInfo *info = (struct CpuAsyncInfo *) data.host_ptr;
+
+    if (info->new_mip) {
+        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
+    } else {
+        cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
+    }
+
+    g_free(info);
+}
+
 uint32_t riscv_cpu_update_mip(RISCVCPU *cpu, uint32_t mask, uint32_t value)
 {
     CPURISCVState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
+    struct CpuAsyncInfo *info;
     uint32_t old, new, cmp = atomic_read(&env->mip);
 
     do {
@@ -94,11 +115,11 @@ uint32_t riscv_cpu_update_mip(RISCVCPU *cpu, uint32_t mask, uint32_t value)
         cmp = atomic_cmpxchg(&env->mip, old, new);
     } while (old != cmp);
 
-    if (new) {
-        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
-    } else {
-        cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
-    }
+    info = g_new(struct CpuAsyncInfo, 1);
+    info->new_mip = new;
+
+    async_run_on_cpu(cs, riscv_cpu_update_mip_irqs_async,
+                     RUN_ON_CPU_HOST_PTR(info));
 
     return old;
 }
@@ -494,7 +515,7 @@ void riscv_cpu_do_interrupt(CPUState *cs)
         s = set_field(s, MSTATUS_SPP, env->priv);
         s = set_field(s, MSTATUS_SIE, 0);
         env->mstatus = s;
-        env->scause = cause | ~(((target_ulong)-1) >> async);
+        env->scause = cause | ((target_ulong)async << (TARGET_LONG_BITS - 1));
         env->sepc = env->pc;
         env->sbadaddr = tval;
         env->pc = (env->stvec >> 2 << 2) +
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index e1d91b6c60..f9e2910643 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -237,6 +237,7 @@ static const target_ulong sstatus_v1_9_mask = SSTATUS_SIE | SSTATUS_SPIE |
 static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE |
     SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS |
     SSTATUS_SUM | SSTATUS_MXR | SSTATUS_SD;
+static const target_ulong sip_writable_mask = SIP_SSIP | MIP_USIP | MIP_UEIP;
 
 #if defined(TARGET_RISCV32)
 static const char valid_vm_1_09[16] = {
@@ -290,7 +291,6 @@ static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val)
 {
     target_ulong mstatus = env->mstatus;
     target_ulong mask = 0;
-    target_ulong mpp = get_field(val, MSTATUS_MPP);
 
     /* flush tlb on mstatus fields that affect VM */
     if (env->priv_ver <= PRIV_VERSION_1_09_1) {
@@ -305,7 +305,7 @@ static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val)
                 MSTATUS_VM : 0);
     }
     if (env->priv_ver >= PRIV_VERSION_1_10_0) {
-        if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP |
+        if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
                 MSTATUS_MPRV | MSTATUS_SUM)) {
             tlb_flush(CPU(riscv_env_get_cpu(env)));
         }
@@ -313,13 +313,13 @@ static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val)
             MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM |
             MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
             MSTATUS_TW;
-    }
-
-    /* silenty discard mstatus.mpp writes for unsupported modes */
-    if (mpp == PRV_H ||
-        (!riscv_has_ext(env, RVS) && mpp == PRV_S) ||
-        (!riscv_has_ext(env, RVU) && mpp == PRV_U)) {
-        mask &= ~MSTATUS_MPP;
+#if defined(TARGET_RISCV64)
+            /*
+             * RV32: MPV and MTL are not in mstatus. The current plan is to
+             * add them to mstatush. For now, we just don't support it.
+             */
+            mask |= MSTATUS_MPP | MSTATUS_MPV;
+#endif
     }
 
     mstatus = (mstatus & ~mask) | (val & mask);
@@ -555,9 +555,7 @@ static int rmw_mip(CPURISCVState *env, int csrno, target_ulong *ret_value,
     uint32_t old_mip;
 
     if (mask) {
-        qemu_mutex_lock_iothread();
         old_mip = riscv_cpu_update_mip(cpu, mask, (new_value & mask));
-        qemu_mutex_unlock_iothread();
     } else {
         old_mip = atomic_read(&env->mip);
     }
@@ -685,8 +683,10 @@ static int write_sbadaddr(CPURISCVState *env, int csrno, target_ulong val)
 static int rmw_sip(CPURISCVState *env, int csrno, target_ulong *ret_value,
                    target_ulong new_value, target_ulong write_mask)
 {
-    return rmw_mip(env, CSR_MSTATUS, ret_value, new_value,
-                   write_mask & env->mideleg);
+    int ret = rmw_mip(env, CSR_MSTATUS, ret_value, new_value,
+                      write_mask & env->mideleg & sip_writable_mask);
+    *ret_value &= env->mideleg;
+    return ret;
 }
 
 /* Supervisor Protection and Translation */
@@ -723,7 +723,9 @@ static int write_satp(CPURISCVState *env, int csrno, target_ulong val)
         if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) {
             return -1;
         } else {
-            tlb_flush(CPU(riscv_env_get_cpu(env)));
+            if((val ^ env->satp) & SATP_ASID) {
+                tlb_flush(CPU(riscv_env_get_cpu(env)));
+            }
             env->satp = val;
         }
     }
diff --git a/target/riscv/insn16-32.decode b/target/riscv/insn16-32.decode
new file mode 100644
index 0000000000..0819b17028
--- /dev/null
+++ b/target/riscv/insn16-32.decode
@@ -0,0 +1,28 @@
+#
+# RISC-V translation routines for the RVXI Base Integer Instruction Set.
+#
+# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2 or later, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# *** RV32C Standard Extension (Quadrant 0) ***
+flw               011  ... ... .. ... 00 @cl_w
+fsw               111  ... ... .. ... 00 @cs_w
+
+# *** RV32C Standard Extension (Quadrant 1) ***
+jal               001     ........... 01 @cj    rd=1  # C.JAL
+
+# *** RV32C Standard Extension (Quadrant 2) ***
+flw               011 .  .....  ..... 10 @c_lwsp
+fsw               111 .  .....  ..... 10 @c_swsp
diff --git a/target/riscv/insn16-64.decode b/target/riscv/insn16-64.decode
new file mode 100644
index 0000000000..672e1e916f
--- /dev/null
+++ b/target/riscv/insn16-64.decode
@@ -0,0 +1,36 @@
+#
+# RISC-V translation routines for the RVXI Base Integer Instruction Set.
+#
+# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2 or later, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# *** RV64C Standard Extension (Quadrant 0) ***
+ld                011  ... ... .. ... 00 @cl_d
+sd                111  ... ... .. ... 00 @cs_d
+
+# *** RV64C Standard Extension (Quadrant 1) ***
+{
+  illegal         001 -  00000  ----- 01 # c.addiw, RES rd=0
+  addiw           001 .  .....  ..... 01 @ci
+}
+subw              100 1 11 ... 00 ... 01 @cs_2
+addw              100 1 11 ... 01 ... 01 @cs_2
+
+# *** RV64C Standard Extension (Quadrant 2) ***
+{
+  illegal         011 -  00000  ----- 10 # c.ldsp, RES rd=0
+  ld              011 .  .....  ..... 10 @c_ldsp
+}
+sd                111 .  .....  ..... 10 @c_sdsp
diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index 17cc52cf2a..1cb93876fe 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -30,7 +30,7 @@
 %imm_cb        12:s1 5:2 2:1 10:2 3:2 !function=ex_shift_1
 %imm_cj        12:s1 8:1 9:2 6:1 7:1 2:1 11:1 3:3 !function=ex_shift_1
 
-%nzuimm_6bit   12:1 2:5
+%shimm_6bit   12:1 2:5               !function=ex_rvc_shifti
 %uimm_6bit_ld 2:3 12:1 5:2           !function=ex_shift_3
 %uimm_6bit_lw 2:2 12:1 4:3           !function=ex_shift_2
 %uimm_6bit_sd 7:3 10:3               !function=ex_shift_3
@@ -40,90 +40,93 @@
 %imm_lui       12:s1 2:5             !function=ex_shift_12
 
 
+# Argument sets imported from insn32.decode:
+&empty                  !extern
+&r         rd rs1 rs2   !extern
+&i         imm rs1 rd   !extern
+&s         imm rs1 rs2  !extern
+&j         imm rd       !extern
+&b         imm rs2 rs1  !extern
+&u         imm rd       !extern
+&shift     shamt rs1 rd !extern
 
-# Argument sets:
-&cl               rs1 rd
-&cl_dw     uimm   rs1 rd
-&ci        imm        rd
-&ciw       nzuimm     rd
-&cs               rs1 rs2
-&cs_dw     uimm   rs1 rs2
-&cb        imm    rs1
-&cr               rd  rs2
-&cj       imm
-&c_shift   shamt      rd
-
-&c_ld      uimm  rd
-&c_sd      uimm  rs2
-
-&caddi16sp_lui  imm_lui imm_addi16sp rd
-&cflwsp_ldsp    uimm_flwsp uimm_ldsp rd
-&cfswsp_sdsp    uimm_fswsp uimm_sdsp rs2
 
 # Formats 16:
-@cr        ....  ..... .....  .. &cr                      rs2=%rs2_5  %rd
-@ci        ... . ..... .....  .. &ci     imm=%imm_ci                  %rd
-@ciw       ...   ........ ... .. &ciw    nzuimm=%nzuimm_ciw           rd=%rs2_3
-@cl_d      ... ... ... .. ... .. &cl_dw  uimm=%uimm_cl_d  rs1=%rs1_3  rd=%rs2_3
-@cl_w      ... ... ... .. ... .. &cl_dw  uimm=%uimm_cl_w  rs1=%rs1_3  rd=%rs2_3
-@cl        ... ... ... .. ... .. &cl                      rs1=%rs1_3  rd=%rs2_3
-@cs        ... ... ... .. ... .. &cs                      rs1=%rs1_3  rs2=%rs2_3
-@cs_2      ... ... ... .. ... .. &cr                      rd=%rs1_3   rs2=%rs2_3
-@cs_d      ... ... ... .. ... .. &cs_dw  uimm=%uimm_cl_d  rs1=%rs1_3  rs2=%rs2_3
-@cs_w      ... ... ... .. ... .. &cs_dw  uimm=%uimm_cl_w  rs1=%rs1_3  rs2=%rs2_3
-@cb        ... ... ... .. ... .. &cb     imm=%imm_cb      rs1=%rs1_3
-@cj        ...    ........... .. &cj     imm=%imm_cj
-
-@c_ld      ... . .....  ..... .. &c_ld     uimm=%uimm_6bit_ld  %rd
-@c_lw      ... . .....  ..... .. &c_ld     uimm=%uimm_6bit_lw  %rd
-@c_sd      ... . .....  ..... .. &c_sd     uimm=%uimm_6bit_sd  rs2=%rs2_5
-@c_sw      ... . .....  ..... .. &c_sd     uimm=%uimm_6bit_sw  rs2=%rs2_5
-
-@c_addi16sp_lui ... .  ..... ..... .. &caddi16sp_lui %imm_lui %imm_addi16sp %rd
-@c_flwsp_ldsp   ... .  ..... ..... .. &cflwsp_ldsp uimm_flwsp=%uimm_6bit_lw \
-    uimm_ldsp=%uimm_6bit_ld %rd
-@c_fswsp_sdsp   ... .  ..... ..... .. &cfswsp_sdsp uimm_fswsp=%uimm_6bit_sw \
-    uimm_sdsp=%uimm_6bit_sd rs2=%rs2_5
-
-@c_shift        ... . .. ... ..... .. &c_shift rd=%rs1_3 shamt=%nzuimm_6bit
-@c_shift2       ... . .. ... ..... .. &c_shift rd=%rd    shamt=%nzuimm_6bit
-
-@c_andi         ... . .. ... ..... .. &ci imm=%imm_ci rd=%rs1_3
-
-# *** RV64C Standard Extension (Quadrant 0) ***
-c_addi4spn        000    ........ ... 00 @ciw
-c_fld             001  ... ... .. ... 00 @cl_d
-c_lw              010  ... ... .. ... 00 @cl_w
-c_flw_ld          011  --- ... -- ... 00 @cl    #Note: Must parse uimm manually
-c_fsd             101  ... ... .. ... 00 @cs_d
-c_sw              110  ... ... .. ... 00 @cs_w
-c_fsw_sd          111  --- ... -- ... 00 @cs    #Note: Must parse uimm manually
-
-# *** RV64C Standard Extension (Quadrant 1) ***
-c_addi            000 .  .....  ..... 01 @ci
-c_jal_addiw       001 .  .....  ..... 01 @ci #Note: parse rd and/or imm manually
-c_li              010 .  .....  ..... 01 @ci
-c_addi16sp_lui    011 .  .....  ..... 01 @c_addi16sp_lui # shares opc with C.LUI
-c_srli            100 . 00 ...  ..... 01 @c_shift
-c_srai            100 . 01 ...  ..... 01 @c_shift
-c_andi            100 . 10 ...  ..... 01 @c_andi
-c_sub             100 0 11 ... 00 ... 01 @cs_2
-c_xor             100 0 11 ... 01 ... 01 @cs_2
-c_or              100 0 11 ... 10 ... 01 @cs_2
-c_and             100 0 11 ... 11 ... 01 @cs_2
-c_subw            100 1 11 ... 00 ... 01 @cs_2
-c_addw            100 1 11 ... 01 ... 01 @cs_2
-c_j               101     ........... 01 @cj
-c_beqz            110  ... ...  ..... 01 @cb
-c_bnez            111  ... ...  ..... 01 @cb
-
-# *** RV64C Standard Extension (Quadrant 2) ***
-c_slli            000 .  .....  ..... 10 @c_shift2
-c_fldsp           001 .  .....  ..... 10 @c_ld
-c_lwsp            010 .  .....  ..... 10 @c_lw
-c_flwsp_ldsp      011 .  .....  ..... 10 @c_flwsp_ldsp #C.LDSP:RV64;C.FLWSP:RV32
-c_jr_mv           100 0  .....  ..... 10 @cr
-c_ebreak_jalr_add 100 1  .....  ..... 10 @cr
-c_fsdsp           101   ......  ..... 10 @c_sd
-c_swsp            110 .  .....  ..... 10 @c_sw
-c_fswsp_sdsp      111 .  .....  ..... 10 @c_fswsp_sdsp #C.SDSP:RV64;C.FSWSP:RV32
+@cr        ....  ..... .....  .. &r      rs2=%rs2_5       rs1=%rd     %rd
+@ci        ... . ..... .....  .. &i      imm=%imm_ci      rs1=%rd     %rd
+@cl_d      ... ... ... .. ... .. &i      imm=%uimm_cl_d   rs1=%rs1_3  rd=%rs2_3
+@cl_w      ... ... ... .. ... .. &i      imm=%uimm_cl_w   rs1=%rs1_3  rd=%rs2_3
+@cs_2      ... ... ... .. ... .. &r      rs2=%rs2_3       rs1=%rs1_3  rd=%rs1_3
+@cs_d      ... ... ... .. ... .. &s      imm=%uimm_cl_d   rs1=%rs1_3  rs2=%rs2_3
+@cs_w      ... ... ... .. ... .. &s      imm=%uimm_cl_w   rs1=%rs1_3  rs2=%rs2_3
+@cj        ...    ........... .. &j      imm=%imm_cj
+@cb_z      ... ... ... .. ... .. &b      imm=%imm_cb      rs1=%rs1_3  rs2=0
+
+@c_ldsp    ... . .....  ..... .. &i      imm=%uimm_6bit_ld rs1=2 %rd
+@c_lwsp    ... . .....  ..... .. &i      imm=%uimm_6bit_lw rs1=2 %rd
+@c_sdsp    ... . .....  ..... .. &s      imm=%uimm_6bit_sd rs1=2 rs2=%rs2_5
+@c_swsp    ... . .....  ..... .. &s      imm=%uimm_6bit_sw rs1=2 rs2=%rs2_5
+@c_li      ... . .....  ..... .. &i      imm=%imm_ci rs1=0 %rd
+@c_lui     ... . .....  ..... .. &u      imm=%imm_lui %rd
+@c_jalr    ... . .....  ..... .. &i      imm=0 rs1=%rd
+@c_mv      ... . ..... .....  .. &i      imm=0 rs1=%rs2_5 %rd
+
+@c_addi4spn     ... .  ..... ..... .. &i imm=%nzuimm_ciw rs1=2 rd=%rs2_3
+@c_addi16sp     ... .  ..... ..... .. &i imm=%imm_addi16sp rs1=2 rd=2
+
+@c_shift        ... . .. ... ..... .. \
+                &shift rd=%rs1_3 rs1=%rs1_3 shamt=%shimm_6bit
+@c_shift2       ... . .. ... ..... .. \
+                &shift rd=%rd rs1=%rd shamt=%shimm_6bit
+
+@c_andi         ... . .. ... ..... .. &i imm=%imm_ci rs1=%rs1_3 rd=%rs1_3
+
+# *** RV32/64C Standard Extension (Quadrant 0) ***
+{
+  # Opcode of all zeros is illegal; rd != 0, nzuimm == 0 is reserved.
+  illegal         000  000 000 00 --- 00
+  addi            000  ... ... .. ... 00 @c_addi4spn
+}
+fld               001  ... ... .. ... 00 @cl_d
+lw                010  ... ... .. ... 00 @cl_w
+fsd               101  ... ... .. ... 00 @cs_d
+sw                110  ... ... .. ... 00 @cs_w
+
+# *** RV32/64C Standard Extension (Quadrant 1) ***
+addi              000 .  .....  ..... 01 @ci
+addi              010 .  .....  ..... 01 @c_li
+{
+  illegal         011 0  -----  00000 01 # c.addi16sp and c.lui, RES nzimm=0
+  addi            011 .  00010  ..... 01 @c_addi16sp
+  lui             011 .  .....  ..... 01 @c_lui
+}
+srli              100 . 00 ...  ..... 01 @c_shift
+srai              100 . 01 ...  ..... 01 @c_shift
+andi              100 . 10 ...  ..... 01 @c_andi
+sub               100 0 11 ... 00 ... 01 @cs_2
+xor               100 0 11 ... 01 ... 01 @cs_2
+or                100 0 11 ... 10 ... 01 @cs_2
+and               100 0 11 ... 11 ... 01 @cs_2
+jal               101     ........... 01 @cj    rd=0  # C.J
+beq               110  ... ...  ..... 01 @cb_z
+bne               111  ... ...  ..... 01 @cb_z
+
+# *** RV32/64C Standard Extension (Quadrant 2) ***
+slli              000 .  .....  ..... 10 @c_shift2
+fld               001 .  .....  ..... 10 @c_ldsp
+{
+  illegal         010 -  00000  ----- 10 # c.lwsp, RES rd=0
+  lw              010 .  .....  ..... 10 @c_lwsp
+}
+{
+  illegal         100 0  00000  00000 10 # c.jr, RES rs1=0
+  jalr            100 0  .....  00000 10 @c_jalr rd=0  # C.JR
+  addi            100 0  .....  ..... 10 @c_mv
+}
+{
+  ebreak          100 1  00000  00000 10
+  jalr            100 1  .....  00000 10 @c_jalr rd=1  # C.JALR
+  add             100 1  .....  ..... 10 @cr
+}
+fsd               101   ......  ..... 10 @c_sdsp
+sw                110 .  .....  ..... 10 @c_swsp
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 6f3ab7aa52..77f794ed70 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -34,9 +34,13 @@
 %imm_u    12:s20                 !function=ex_shift_12
 
 # Argument sets:
+&empty
 &b    imm rs2 rs1
 &i    imm rs1 rd
+&j    imm rd
 &r    rd rs1 rs2
+&s    imm rs1 rs2
+&u    imm rd
 &shift     shamt rs1 rd
 &atomic    aq rl rs2 rs1 rd
 
@@ -44,9 +48,9 @@
 @r       .......   ..... ..... ... ..... ....... &r                %rs2 %rs1 %rd
 @i       ............    ..... ... ..... ....... &i      imm=%imm_i     %rs1 %rd
 @b       .......   ..... ..... ... ..... ....... &b      imm=%imm_b %rs2 %rs1
-@s       .......   ..... ..... ... ..... .......         imm=%imm_s %rs2 %rs1
-@u       ....................      ..... .......         imm=%imm_u          %rd
-@j       ....................      ..... .......         imm=%imm_j          %rd
+@s       .......   ..... ..... ... ..... ....... &s      imm=%imm_s %rs2 %rs1
+@u       ....................      ..... ....... &u      imm=%imm_u          %rd
+@j       ....................      ..... ....... &j      imm=%imm_j          %rd
 
 @sh      ......  ...... .....  ... ..... ....... &shift  shamt=%sh10      %rs1 %rd
 @csr     ............   .....  ... ..... .......               %csr     %rs1 %rd
diff --git a/target/riscv/insn_trans/trans_privileged.inc.c b/target/riscv/insn_trans/trans_privileged.inc.c
index acb605923e..664d6ba3f2 100644
--- a/target/riscv/insn_trans/trans_privileged.inc.c
+++ b/target/riscv/insn_trans/trans_privileged.inc.c
@@ -22,7 +22,7 @@ static bool trans_ecall(DisasContext *ctx, arg_ecall *a)
 {
     /* always generates U-level ECALL, fixed in do_interrupt handler */
     generate_exception(ctx, RISCV_EXCP_U_ECALL);
-    tcg_gen_exit_tb(NULL, 0); /* no chaining */
+    exit_tb(ctx); /* no chaining */
     ctx->base.is_jmp = DISAS_NORETURN;
     return true;
 }
@@ -30,7 +30,7 @@ static bool trans_ecall(DisasContext *ctx, arg_ecall *a)
 static bool trans_ebreak(DisasContext *ctx, arg_ebreak *a)
 {
     generate_exception(ctx, RISCV_EXCP_BREAKPOINT);
-    tcg_gen_exit_tb(NULL, 0); /* no chaining */
+    exit_tb(ctx); /* no chaining */
     ctx->base.is_jmp = DISAS_NORETURN;
     return true;
 }
@@ -47,7 +47,7 @@ static bool trans_sret(DisasContext *ctx, arg_sret *a)
 
     if (has_ext(ctx, RVS)) {
         gen_helper_sret(cpu_pc, cpu_env, cpu_pc);
-        tcg_gen_exit_tb(NULL, 0); /* no chaining */
+        exit_tb(ctx); /* no chaining */
         ctx->base.is_jmp = DISAS_NORETURN;
     } else {
         return false;
@@ -68,7 +68,7 @@ static bool trans_mret(DisasContext *ctx, arg_mret *a)
 #ifndef CONFIG_USER_ONLY
     tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
     gen_helper_mret(cpu_pc, cpu_env, cpu_pc);
-    tcg_gen_exit_tb(NULL, 0); /* no chaining */
+    exit_tb(ctx); /* no chaining */
     ctx->base.is_jmp = DISAS_NORETURN;
     return true;
 #else
diff --git a/target/riscv/insn_trans/trans_rvc.inc.c b/target/riscv/insn_trans/trans_rvc.inc.c
deleted file mode 100644
index 3e5d6fd5ea..0000000000
--- a/target/riscv/insn_trans/trans_rvc.inc.c
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * RISC-V translation routines for the RVC Compressed Instruction Set.
- *
- * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
- * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
- *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2 or later, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-static bool trans_c_addi4spn(DisasContext *ctx, arg_c_addi4spn *a)
-{
-    if (a->nzuimm == 0) {
-        /* Reserved in ISA */
-        return false;
-    }
-    arg_addi arg = { .rd = a->rd, .rs1 = 2, .imm = a->nzuimm };
-    return trans_addi(ctx, &arg);
-}
-
-static bool trans_c_fld(DisasContext *ctx, arg_c_fld *a)
-{
-    arg_fld arg = { .rd = a->rd, .rs1 = a->rs1, .imm = a->uimm };
-    return trans_fld(ctx, &arg);
-}
-
-static bool trans_c_lw(DisasContext *ctx, arg_c_lw *a)
-{
-    arg_lw arg = { .rd = a->rd, .rs1 = a->rs1, .imm = a->uimm };
-    return trans_lw(ctx, &arg);
-}
-
-static bool trans_c_flw_ld(DisasContext *ctx, arg_c_flw_ld *a)
-{
-#ifdef TARGET_RISCV32
-    /* C.FLW ( RV32FC-only ) */
-    REQUIRE_FPU;
-    REQUIRE_EXT(ctx, RVF);
-
-    arg_c_lw tmp;
-    decode_insn16_extract_cl_w(ctx, &tmp, ctx->opcode);
-    arg_flw arg = { .rd = tmp.rd, .rs1 = tmp.rs1, .imm = tmp.uimm };
-    return trans_flw(ctx, &arg);
-#else
-    /* C.LD ( RV64C/RV128C-only ) */
-    arg_c_fld tmp;
-    decode_insn16_extract_cl_d(ctx, &tmp, ctx->opcode);
-    arg_ld arg = { .rd = tmp.rd, .rs1 = tmp.rs1, .imm = tmp.uimm };
-    return trans_ld(ctx, &arg);
-#endif
-}
-
-static bool trans_c_fsd(DisasContext *ctx, arg_c_fsd *a)
-{
-    arg_fsd arg = { .rs1 = a->rs1, .rs2 = a->rs2, .imm = a->uimm };
-    return trans_fsd(ctx, &arg);
-}
-
-static bool trans_c_sw(DisasContext *ctx, arg_c_sw *a)
-{
-    arg_sw arg = { .rs1 = a->rs1, .rs2 = a->rs2, .imm = a->uimm };
-    return trans_sw(ctx, &arg);
-}
-
-static bool trans_c_fsw_sd(DisasContext *ctx, arg_c_fsw_sd *a)
-{
-#ifdef TARGET_RISCV32
-    /* C.FSW ( RV32FC-only ) */
-    REQUIRE_FPU;
-    REQUIRE_EXT(ctx, RVF);
-
-    arg_c_sw tmp;
-    decode_insn16_extract_cs_w(ctx, &tmp, ctx->opcode);
-    arg_fsw arg = { .rs1 = tmp.rs1, .rs2 = tmp.rs2, .imm = tmp.uimm };
-    return trans_fsw(ctx, &arg);
-#else
-    /* C.SD ( RV64C/RV128C-only ) */
-    arg_c_fsd tmp;
-    decode_insn16_extract_cs_d(ctx, &tmp, ctx->opcode);
-    arg_sd arg = { .rs1 = tmp.rs1, .rs2 = tmp.rs2, .imm = tmp.uimm };
-    return trans_sd(ctx, &arg);
-#endif
-}
-
-static bool trans_c_addi(DisasContext *ctx, arg_c_addi *a)
-{
-    if (a->imm == 0) {
-        /* Hint: insn is valid but does not affect state */
-        return true;
-    }
-    arg_addi arg = { .rd = a->rd, .rs1 = a->rd, .imm = a->imm };
-    return trans_addi(ctx, &arg);
-}
-
-static bool trans_c_jal_addiw(DisasContext *ctx, arg_c_jal_addiw *a)
-{
-#ifdef TARGET_RISCV32
-    /* C.JAL */
-    arg_c_j tmp;
-    decode_insn16_extract_cj(ctx, &tmp, ctx->opcode);
-    arg_jal arg = { .rd = 1, .imm = tmp.imm };
-    return trans_jal(ctx, &arg);
-#else
-    /* C.ADDIW */
-    arg_addiw arg = { .rd = a->rd, .rs1 = a->rd, .imm = a->imm };
-    return trans_addiw(ctx, &arg);
-#endif
-}
-
-static bool trans_c_li(DisasContext *ctx, arg_c_li *a)
-{
-    if (a->rd == 0) {
-        /* Hint: insn is valid but does not affect state */
-        return true;
-    }
-    arg_addi arg = { .rd = a->rd, .rs1 = 0, .imm = a->imm };
-    return trans_addi(ctx, &arg);
-}
-
-static bool trans_c_addi16sp_lui(DisasContext *ctx, arg_c_addi16sp_lui *a)
-{
-    if (a->rd == 2) {
-        /* C.ADDI16SP */
-        arg_addi arg = { .rd = 2, .rs1 = 2, .imm = a->imm_addi16sp };
-        return trans_addi(ctx, &arg);
-    } else if (a->imm_lui != 0) {
-        /* C.LUI */
-        if (a->rd == 0) {
-            /* Hint: insn is valid but does not affect state */
-            return true;
-        }
-        arg_lui arg = { .rd = a->rd, .imm = a->imm_lui };
-        return trans_lui(ctx, &arg);
-    }
-    return false;
-}
-
-static bool trans_c_srli(DisasContext *ctx, arg_c_srli *a)
-{
-    int shamt = a->shamt;
-    if (shamt == 0) {
-        /* For RV128 a shamt of 0 means a shift by 64 */
-        shamt = 64;
-    }
-    /* Ensure, that shamt[5] is zero for RV32 */
-    if (shamt >= TARGET_LONG_BITS) {
-        return false;
-    }
-
-    arg_srli arg = { .rd = a->rd, .rs1 = a->rd, .shamt = a->shamt };
-    return trans_srli(ctx, &arg);
-}
-
-static bool trans_c_srai(DisasContext *ctx, arg_c_srai *a)
-{
-    int shamt = a->shamt;
-    if (shamt == 0) {
-        /* For RV128 a shamt of 0 means a shift by 64 */
-        shamt = 64;
-    }
-    /* Ensure, that shamt[5] is zero for RV32 */
-    if (shamt >= TARGET_LONG_BITS) {
-        return false;
-    }
-
-    arg_srai arg = { .rd = a->rd, .rs1 = a->rd, .shamt = a->shamt };
-    return trans_srai(ctx, &arg);
-}
-
-static bool trans_c_andi(DisasContext *ctx, arg_c_andi *a)
-{
-    arg_andi arg = { .rd = a->rd, .rs1 = a->rd, .imm = a->imm };
-    return trans_andi(ctx, &arg);
-}
-
-static bool trans_c_sub(DisasContext *ctx, arg_c_sub *a)
-{
-    arg_sub arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
-    return trans_sub(ctx, &arg);
-}
-
-static bool trans_c_xor(DisasContext *ctx, arg_c_xor *a)
-{
-    arg_xor arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
-    return trans_xor(ctx, &arg);
-}
-
-static bool trans_c_or(DisasContext *ctx, arg_c_or *a)
-{
-    arg_or arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
-    return trans_or(ctx, &arg);
-}
-
-static bool trans_c_and(DisasContext *ctx, arg_c_and *a)
-{
-    arg_and arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
-    return trans_and(ctx, &arg);
-}
-
-static bool trans_c_subw(DisasContext *ctx, arg_c_subw *a)
-{
-#ifdef TARGET_RISCV64
-    arg_subw arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
-    return trans_subw(ctx, &arg);
-#else
-    return false;
-#endif
-}
-
-static bool trans_c_addw(DisasContext *ctx, arg_c_addw *a)
-{
-#ifdef TARGET_RISCV64
-    arg_addw arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
-    return trans_addw(ctx, &arg);
-#else
-    return false;
-#endif
-}
-
-static bool trans_c_j(DisasContext *ctx, arg_c_j *a)
-{
-    arg_jal arg = { .rd = 0, .imm = a->imm };
-    return trans_jal(ctx, &arg);
-}
-
-static bool trans_c_beqz(DisasContext *ctx, arg_c_beqz *a)
-{
-    arg_beq arg = { .rs1 = a->rs1, .rs2 = 0, .imm = a->imm };
-    return trans_beq(ctx, &arg);
-}
-
-static bool trans_c_bnez(DisasContext *ctx, arg_c_bnez *a)
-{
-    arg_bne arg = { .rs1 = a->rs1, .rs2 = 0, .imm = a->imm };
-    return trans_bne(ctx, &arg);
-}
-
-static bool trans_c_slli(DisasContext *ctx, arg_c_slli *a)
-{
-    int shamt = a->shamt;
-    if (shamt == 0) {
-        /* For RV128 a shamt of 0 means a shift by 64 */
-        shamt = 64;
-    }
-    /* Ensure, that shamt[5] is zero for RV32 */
-    if (shamt >= TARGET_LONG_BITS) {
-        return false;
-    }
-
-    arg_slli arg = { .rd = a->rd, .rs1 = a->rd, .shamt = a->shamt };
-    return trans_slli(ctx, &arg);
-}
-
-static bool trans_c_fldsp(DisasContext *ctx, arg_c_fldsp *a)
-{
-    arg_fld arg = { .rd = a->rd, .rs1 = 2, .imm = a->uimm };
-    return trans_fld(ctx, &arg);
-}
-
-static bool trans_c_lwsp(DisasContext *ctx, arg_c_lwsp *a)
-{
-    arg_lw arg = { .rd = a->rd, .rs1 = 2, .imm = a->uimm };
-    return trans_lw(ctx, &arg);
-}
-
-static bool trans_c_flwsp_ldsp(DisasContext *ctx, arg_c_flwsp_ldsp *a)
-{
-#ifdef TARGET_RISCV32
-    /* C.FLWSP */
-    arg_flw arg_flw = { .rd = a->rd, .rs1 = 2, .imm = a->uimm_flwsp };
-    return trans_flw(ctx, &arg_flw);
-#else
-    /* C.LDSP */
-    arg_ld arg_ld = { .rd = a->rd, .rs1 = 2, .imm = a->uimm_ldsp };
-    return trans_ld(ctx, &arg_ld);
-#endif
-    return false;
-}
-
-static bool trans_c_jr_mv(DisasContext *ctx, arg_c_jr_mv *a)
-{
-    if (a->rd != 0 && a->rs2 == 0) {
-        /* C.JR */
-        arg_jalr arg = { .rd = 0, .rs1 = a->rd, .imm = 0 };
-        return trans_jalr(ctx, &arg);
-    } else if (a->rd != 0 && a->rs2 != 0) {
-        /* C.MV */
-        arg_add arg = { .rd = a->rd, .rs1 = 0, .rs2 = a->rs2 };
-        return trans_add(ctx, &arg);
-    }
-    return false;
-}
-
-static bool trans_c_ebreak_jalr_add(DisasContext *ctx, arg_c_ebreak_jalr_add *a)
-{
-    if (a->rd == 0 && a->rs2 == 0) {
-        /* C.EBREAK */
-        arg_ebreak arg = { };
-        return trans_ebreak(ctx, &arg);
-    } else if (a->rd != 0) {
-        if (a->rs2 == 0) {
-            /* C.JALR */
-            arg_jalr arg = { .rd = 1, .rs1 = a->rd, .imm = 0 };
-            return trans_jalr(ctx, &arg);
-        } else {
-            /* C.ADD */
-            arg_add arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
-            return trans_add(ctx, &arg);
-        }
-    }
-    return false;
-}
-
-static bool trans_c_fsdsp(DisasContext *ctx, arg_c_fsdsp *a)
-{
-    arg_fsd arg = { .rs1 = 2, .rs2 = a->rs2, .imm = a->uimm };
-    return trans_fsd(ctx, &arg);
-}
-
-static bool trans_c_swsp(DisasContext *ctx, arg_c_swsp *a)
-{
-    arg_sw arg = { .rs1 = 2, .rs2 = a->rs2, .imm = a->uimm };
-    return trans_sw(ctx, &arg);
-}
-
-static bool trans_c_fswsp_sdsp(DisasContext *ctx, arg_c_fswsp_sdsp *a)
-{
-#ifdef TARGET_RISCV32
-    /* C.FSWSP */
-    arg_fsw a_fsw = { .rs1 = 2, .rs2 = a->rs2, .imm = a->uimm_fswsp };
-    return trans_fsw(ctx, &a_fsw);
-#else
-    /* C.SDSP */
-    arg_sd a_sd = { .rs1 = 2, .rs2 = a->rs2, .imm = a->uimm_sdsp };
-    return trans_sd(ctx, &a_sd);
-#endif
-}
diff --git a/target/riscv/insn_trans/trans_rvi.inc.c b/target/riscv/insn_trans/trans_rvi.inc.c
index d420a4d8b2..6cda078ed6 100644
--- a/target/riscv/insn_trans/trans_rvi.inc.c
+++ b/target/riscv/insn_trans/trans_rvi.inc.c
@@ -18,6 +18,12 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+static bool trans_illegal(DisasContext *ctx, arg_empty *a)
+{
+    gen_exception_illegal(ctx);
+    return true;
+}
+
 static bool trans_lui(DisasContext *ctx, arg_lui *a)
 {
     if (a->rd != 0) {
@@ -60,7 +66,7 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
     if (a->rd != 0) {
         tcg_gen_movi_tl(cpu_gpr[a->rd], ctx->pc_succ_insn);
     }
-    tcg_gen_lookup_and_goto_ptr();
+    lookup_and_goto_ptr(ctx);
 
     if (misaligned) {
         gen_set_label(misaligned);
@@ -217,7 +223,7 @@ static bool trans_sd(DisasContext *ctx, arg_sd *a)
 
 static bool trans_addi(DisasContext *ctx, arg_addi *a)
 {
-    return gen_arith_imm(ctx, a, &tcg_gen_add_tl);
+    return gen_arith_imm_fn(ctx, a, &tcg_gen_addi_tl);
 }
 
 static void gen_slt(TCGv ret, TCGv s1, TCGv s2)
@@ -233,25 +239,25 @@ static void gen_sltu(TCGv ret, TCGv s1, TCGv s2)
 
 static bool trans_slti(DisasContext *ctx, arg_slti *a)
 {
-    return gen_arith_imm(ctx, a, &gen_slt);
+    return gen_arith_imm_tl(ctx, a, &gen_slt);
 }
 
 static bool trans_sltiu(DisasContext *ctx, arg_sltiu *a)
 {
-    return gen_arith_imm(ctx, a, &gen_sltu);
+    return gen_arith_imm_tl(ctx, a, &gen_sltu);
 }
 
 static bool trans_xori(DisasContext *ctx, arg_xori *a)
 {
-    return gen_arith_imm(ctx, a, &tcg_gen_xor_tl);
+    return gen_arith_imm_fn(ctx, a, &tcg_gen_xori_tl);
 }
 static bool trans_ori(DisasContext *ctx, arg_ori *a)
 {
-    return gen_arith_imm(ctx, a, &tcg_gen_or_tl);
+    return gen_arith_imm_fn(ctx, a, &tcg_gen_ori_tl);
 }
 static bool trans_andi(DisasContext *ctx, arg_andi *a)
 {
-    return gen_arith_imm(ctx, a, &tcg_gen_and_tl);
+    return gen_arith_imm_fn(ctx, a, &tcg_gen_andi_tl);
 }
 static bool trans_slli(DisasContext *ctx, arg_slli *a)
 {
@@ -358,7 +364,7 @@ static bool trans_and(DisasContext *ctx, arg_and *a)
 #ifdef TARGET_RISCV64
 static bool trans_addiw(DisasContext *ctx, arg_addiw *a)
 {
-    return gen_arith_imm(ctx, a, &gen_addw);
+    return gen_arith_imm_tl(ctx, a, &gen_addw);
 }
 
 static bool trans_slliw(DisasContext *ctx, arg_slliw *a)
@@ -483,7 +489,7 @@ static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a)
      * however we need to end the translation block
      */
     tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
-    tcg_gen_exit_tb(NULL, 0);
+    exit_tb(ctx);
     ctx->base.is_jmp = DISAS_NORETURN;
     return true;
 }
@@ -504,7 +510,7 @@ static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a)
     gen_io_end(); \
     gen_set_gpr(a->rd, dest); \
     tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn); \
-    tcg_gen_exit_tb(NULL, 0); \
+    exit_tb(ctx); \
     ctx->base.is_jmp = DISAS_NORETURN; \
     tcg_temp_free(source1); \
     tcg_temp_free(csr_store); \
diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
index b7dc18a41e..644d0fb35f 100644
--- a/target/riscv/op_helper.c
+++ b/target/riscv/op_helper.c
@@ -145,9 +145,10 @@ void helper_tlb_flush(CPURISCVState *env)
 {
     RISCVCPU *cpu = riscv_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    if (env->priv == PRV_S &&
-        env->priv_ver >= PRIV_VERSION_1_10_0 &&
-        get_field(env->mstatus, MSTATUS_TVM)) {
+    if (!(env->priv >= PRV_S) ||
+        (env->priv == PRV_S &&
+         env->priv_ver >= PRIV_VERSION_1_10_0 &&
+         get_field(env->mstatus, MSTATUS_TVM))) {
         riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
     } else {
         tlb_flush(cs);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 2ff6b49487..313c27b700 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -109,6 +109,26 @@ static void gen_exception_debug(void)
     tcg_temp_free_i32(helper_tmp);
 }
 
+/* Wrapper around tcg_gen_exit_tb that handles single stepping */
+static void exit_tb(DisasContext *ctx)
+{
+    if (ctx->base.singlestep_enabled) {
+        gen_exception_debug();
+    } else {
+        tcg_gen_exit_tb(NULL, 0);
+    }
+}
+
+/* Wrapper around tcg_gen_lookup_and_goto_ptr that handles single stepping */
+static void lookup_and_goto_ptr(DisasContext *ctx)
+{
+    if (ctx->base.singlestep_enabled) {
+        gen_exception_debug();
+    } else {
+        tcg_gen_lookup_and_goto_ptr();
+    }
+}
+
 static void gen_exception_illegal(DisasContext *ctx)
 {
     generate_exception(ctx, RISCV_EXCP_ILLEGAL_INST);
@@ -138,14 +158,14 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
         /* chaining is only allowed when the jump is to the same page */
         tcg_gen_goto_tb(n);
         tcg_gen_movi_tl(cpu_pc, dest);
+
+        /* No need to check for single stepping here as use_goto_tb() will
+         * return false in case of single stepping.
+         */
         tcg_gen_exit_tb(ctx->base.tb, n);
     } else {
         tcg_gen_movi_tl(cpu_pc, dest);
-        if (ctx->base.singlestep_enabled) {
-            gen_exception_debug();
-        } else {
-            tcg_gen_lookup_and_goto_ptr();
-        }
+        lookup_and_goto_ptr(ctx);
     }
 }
 
@@ -538,12 +558,32 @@ static int ex_rvc_register(DisasContext *ctx, int reg)
     return 8 + reg;
 }
 
-bool decode_insn32(DisasContext *ctx, uint32_t insn);
+static int ex_rvc_shifti(DisasContext *ctx, int imm)
+{
+    /* For RV128 a shamt of 0 means a shift by 64. */
+    return imm ? imm : 64;
+}
+
 /* Include the auto-generated decoder for 32 bit insn */
 #include "decode_insn32.inc.c"
 
-static bool gen_arith_imm(DisasContext *ctx, arg_i *a,
-                          void(*func)(TCGv, TCGv, TCGv))
+static bool gen_arith_imm_fn(DisasContext *ctx, arg_i *a,
+                             void (*func)(TCGv, TCGv, target_long))
+{
+    TCGv source1;
+    source1 = tcg_temp_new();
+
+    gen_get_gpr(source1, a->rs1);
+
+    (*func)(source1, source1, a->imm);
+
+    gen_set_gpr(a->rd, source1);
+    tcg_temp_free(source1);
+    return true;
+}
+
+static bool gen_arith_imm_tl(DisasContext *ctx, arg_i *a,
+                             void (*func)(TCGv, TCGv, TCGv))
 {
     TCGv source1, source2;
     source1 = tcg_temp_new();
@@ -667,10 +707,25 @@ static bool gen_shift(DisasContext *ctx, arg_r *a,
 #include "insn_trans/trans_rvd.inc.c"
 #include "insn_trans/trans_privileged.inc.c"
 
-bool decode_insn16(DisasContext *ctx, uint16_t insn);
-/* auto-generated decoder*/
+/*
+ * Auto-generated decoder.
+ * Note that the 16-bit decoder reuses some of the trans_* functions
+ * initially declared by the 32-bit decoder, which results in duplicate
+ * declaration warnings.  Suppress them.
+ */
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wredundant-decls"
+# ifdef __clang__
+#  pragma GCC diagnostic ignored "-Wtypedef-redefinition"
+# endif
+#endif
+
 #include "decode_insn16.inc.c"
-#include "insn_trans/trans_rvc.inc.c"
+
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+# pragma GCC diagnostic pop
+#endif
 
 static void decode_opc(DisasContext *ctx)
 {
diff --git a/target/s390x/Makefile.objs b/target/s390x/Makefile.objs
index 0316457880..3e2745594a 100644
--- a/target/s390x/Makefile.objs
+++ b/target/s390x/Makefile.objs
@@ -1,7 +1,8 @@
 obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o
 obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o
 obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o
-obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o
+obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o vec_string_helper.o
+obj-$(CONFIG_TCG) += vec_fpu_helper.o
 obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o
 obj-$(CONFIG_SOFTMMU) += sigp.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index c9ef0a6e60..50fa0ae4b6 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -104,7 +104,7 @@ static void s390x_write_elf64_fpregset(Note *note, S390CPU *cpu, int id)
     note->hdr.n_type = cpu_to_be32(NT_FPREGSET);
     note->contents.fpregset.fpc = cpu_to_be32(cpu->env.fpc);
     for (i = 0; i <= 15; i++) {
-        note->contents.fpregset.fprs[i] = cpu_to_be64(get_freg(cs, i)->ll);
+        note->contents.fpregset.fprs[i] = cpu_to_be64(*get_freg(cs, i));
     }
 }
 
@@ -114,7 +114,7 @@ static void s390x_write_elf64_vregslo(Note *note, S390CPU *cpu,  int id)
 
     note->hdr.n_type = cpu_to_be32(NT_S390_VXRS_LOW);
     for (i = 0; i <= 15; i++) {
-        note->contents.vregslo.vregs[i] = cpu_to_be64(cpu->env.vregs[i][1].ll);
+        note->contents.vregslo.vregs[i] = cpu_to_be64(cpu->env.vregs[i][1]);
     }
 }
 
@@ -127,8 +127,8 @@ static void s390x_write_elf64_vregshi(Note *note, S390CPU *cpu, int id)
 
     note->hdr.n_type = cpu_to_be32(NT_S390_VXRS_HIGH);
     for (i = 0; i <= 15; i++) {
-        temp_vregshi->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i + 16][0].ll);
-        temp_vregshi->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i + 16][1].ll);
+        temp_vregshi->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i + 16][0]);
+        temp_vregshi->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i + 16][1]);
     }
 }
 
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index b1df63d82c..6af1a1530f 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -145,6 +145,9 @@ static void s390_cpu_full_reset(CPUState *s)
 #if defined(CONFIG_USER_ONLY)
     /* user mode should always be allowed to use the full FPU */
     env->cregs[0] |= CR0_AFP;
+    if (s390_has_feat(S390_FEAT_VECTOR)) {
+        env->cregs[0] |= CR0_VECTOR;
+    }
 #endif
 
     /* architectured initial value for Breaking-Event-Address register */
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 7305cacc7b..4fc08a2c88 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -66,7 +66,7 @@ struct CPUS390XState {
      * The floating point registers are part of the vector registers.
      * vregs[0][0] -> vregs[15][0] are 16 floating point registers
      */
-    CPU_DoubleU vregs[32][2];  /* vector registers */
+    uint64_t vregs[32][2] QEMU_ALIGNED(16);  /* vector registers */
     uint32_t aregs[16];    /* access registers */
     uint8_t riccb[64];     /* runtime instrumentation control */
     uint64_t gscb[4];      /* guarded storage control */
@@ -153,7 +153,7 @@ struct CPUS390XState {
 
 };
 
-static inline CPU_DoubleU *get_freg(CPUS390XState *cs, int nr)
+static inline uint64_t *get_freg(CPUS390XState *cs, int nr)
 {
     return &cs->vregs[nr][0];
 }
@@ -215,6 +215,7 @@ extern const struct VMStateDescription vmstate_s390_cpu;
 #define PGM_SPECIAL_OP                  0x0013
 #define PGM_OPERAND                     0x0015
 #define PGM_TRACE_TABLE                 0x0016
+#define PGM_VECTOR_PROCESSING           0x001b
 #define PGM_SPACE_SWITCH                0x001c
 #define PGM_HFP_SQRT                    0x001d
 #define PGM_PC_TRANS_SPEC               0x001f
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 21ea819483..b5d16e4c89 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -86,8 +86,8 @@ static S390CPUDef s390_cpu_defs[] = {
     CPUDEF_INIT(0x8562, 15, 1, 47, 0x08000000U, "gen15b", "IBM 8562 GA1"),
 };
 
-#define QEMU_MAX_CPU_TYPE 0x2827
-#define QEMU_MAX_CPU_GEN 12
+#define QEMU_MAX_CPU_TYPE 0x2964
+#define QEMU_MAX_CPU_GEN 13
 #define QEMU_MAX_CPU_EC_GA 2
 static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX };
 static S390FeatBitmap qemu_max_cpu_feat;
diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
index 3a467b72c5..f21bcf79ae 100644
--- a/target/s390x/excp_helper.c
+++ b/target/s390x/excp_helper.c
@@ -62,6 +62,21 @@ void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
     tcg_s390_program_interrupt(env, PGM_DATA, ILEN_AUTO, ra);
 }
 
+void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
+                                             uintptr_t ra)
+{
+    g_assert(vxc <= 0xff);
+#if !defined(CONFIG_USER_ONLY)
+    /* Always store the VXC into the lowcore, without AFP it is undefined */
+    stl_phys(CPU(s390_env_get_cpu(env))->as,
+             env->psa + offsetof(LowCore, data_exc_code), vxc);
+#endif
+
+    /* Always store the VXC into the FPC, without AFP it is undefined */
+    env->fpc = deposit32(env->fpc, 8, 8, vxc);
+    tcg_s390_program_interrupt(env, PGM_VECTOR_PROCESSING, ILEN_AUTO, ra);
+}
+
 void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc)
 {
     tcg_s390_data_exception(env, dxc, GETPC());
@@ -390,8 +405,8 @@ static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao)
     }
 
     for (i = 0; i < 32; i++) {
-        sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0].ll);
-        sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1].ll);
+        sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0]);
+        sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]);
     }
 
     cpu_physical_memory_unmap(sa, len, 1, len);
@@ -429,7 +444,7 @@ static void do_mchk_interrupt(CPUS390XState *env)
     lowcore->ar_access_id = 1;
 
     for (i = 0; i < 16; i++) {
-        lowcore->floating_pt_save_area[i] = cpu_to_be64(get_freg(env, i)->ll);
+        lowcore->floating_pt_save_area[i] = cpu_to_be64(*get_freg(env, i));
         lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
         lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
         lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);
diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c
index 1be68bafea..d2c17ed942 100644
--- a/target/s390x/fpu_helper.c
+++ b/target/s390x/fpu_helper.c
@@ -112,7 +112,7 @@ static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr)
     }
 }
 
-static inline int float_comp_to_cc(CPUS390XState *env, int float_compare)
+int float_comp_to_cc(CPUS390XState *env, int float_compare)
 {
     S390CPU *cpu = s390_env_get_cpu(env);
 
@@ -746,7 +746,7 @@ static inline uint16_t dcmask(int bit, bool neg)
 }
 
 #define DEF_FLOAT_DCMASK(_TYPE) \
-static uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1)       \
+uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1)              \
 {                                                                  \
     const bool neg = _TYPE##_is_neg(f1);                           \
                                                                    \
diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c
index df147596ce..9cfd8fe3e0 100644
--- a/target/s390x/gdbstub.c
+++ b/target/s390x/gdbstub.c
@@ -116,7 +116,7 @@ static int cpu_read_fp_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
     case S390_FPC_REGNUM:
         return gdb_get_reg32(mem_buf, env->fpc);
     case S390_F0_REGNUM ... S390_F15_REGNUM:
-        return gdb_get_reg64(mem_buf, get_freg(env, n - S390_F0_REGNUM)->ll);
+        return gdb_get_reg64(mem_buf, *get_freg(env, n - S390_F0_REGNUM));
     default:
         return 0;
     }
@@ -129,7 +129,7 @@ static int cpu_write_fp_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
         env->fpc = ldl_p(mem_buf);
         return 4;
     case S390_F0_REGNUM ... S390_F15_REGNUM:
-        get_freg(env, n - S390_F0_REGNUM)->ll = ldtul_p(mem_buf);
+        *get_freg(env, n - S390_F0_REGNUM) = ldtul_p(mem_buf);
         return 8;
     default:
         return 0;
@@ -150,11 +150,11 @@ static int cpu_read_vreg(CPUS390XState *env, uint8_t *mem_buf, int n)
 
     switch (n) {
     case S390_V0L_REGNUM ... S390_V15L_REGNUM:
-        ret = gdb_get_reg64(mem_buf, env->vregs[n][1].ll);
+        ret = gdb_get_reg64(mem_buf, env->vregs[n][1]);
         break;
     case S390_V16_REGNUM ... S390_V31_REGNUM:
-        ret = gdb_get_reg64(mem_buf, env->vregs[n][0].ll);
-        ret += gdb_get_reg64(mem_buf + 8, env->vregs[n][1].ll);
+        ret = gdb_get_reg64(mem_buf, env->vregs[n][0]);
+        ret += gdb_get_reg64(mem_buf + 8, env->vregs[n][1]);
         break;
     default:
         ret = 0;
@@ -167,11 +167,11 @@ static int cpu_write_vreg(CPUS390XState *env, uint8_t *mem_buf, int n)
 {
     switch (n) {
     case S390_V0L_REGNUM ... S390_V15L_REGNUM:
-        env->vregs[n][1].ll = ldtul_p(mem_buf + 8);
+        env->vregs[n][1] = ldtul_p(mem_buf + 8);
         return 8;
     case S390_V16_REGNUM ... S390_V31_REGNUM:
-        env->vregs[n][0].ll = ldtul_p(mem_buf);
-        env->vregs[n][1].ll = ldtul_p(mem_buf + 8);
+        env->vregs[n][0] = ldtul_p(mem_buf);
+        env->vregs[n][1] = ldtul_p(mem_buf + 8);
         return 16;
     default:
         return 0;
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index c346b76bdf..dc320a06c2 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -689,7 +689,7 @@ static uint16_t qemu_V3_1[] = {
     S390_FEAT_MSA_EXT_4,
 };
 
-static uint16_t qemu_LATEST[] = {
+static uint16_t qemu_V4_0[] = {
     /*
      * Only BFP bits are implemented (HFP, DFP, PFPO and DIVIDE TO INTEGER not
      * implemented yet).
@@ -698,10 +698,13 @@ static uint16_t qemu_LATEST[] = {
     S390_FEAT_ZPCI,
 };
 
+static uint16_t qemu_LATEST[] = {
+    S390_FEAT_STFLE_53,
+    S390_FEAT_VECTOR,
+};
+
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
-    /* z13+ features */
-    S390_FEAT_STFLE_53,
     /* generates a dependency warning, leave it out for now */
     S390_FEAT_MSA_EXT_5,
 };
@@ -820,6 +823,7 @@ static FeatGroupDefSpec FeatGroupDef[] = {
 static FeatGroupDefSpec QemuFeatDef[] = {
     QEMU_FEAT_INITIALIZER(V2_11),
     QEMU_FEAT_INITIALIZER(V3_1),
+    QEMU_FEAT_INITIALIZER(V4_0),
     QEMU_FEAT_INITIALIZER(LATEST),
     QEMU_FEAT_INITIALIZER(MAX),
 };
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 3c8f0a7615..a69e5abf5f 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -249,7 +249,7 @@ int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch)
         cpu_physical_memory_write(offsetof(LowCore, ar_access_id), &ar_id, 1);
     }
     for (i = 0; i < 16; ++i) {
-        sa->fprs[i] = cpu_to_be64(get_freg(&cpu->env, i)->ll);
+        sa->fprs[i] = cpu_to_be64(*get_freg(&cpu->env, i));
     }
     for (i = 0; i < 16; ++i) {
         sa->grs[i] = cpu_to_be64(cpu->env.regs[i]);
@@ -299,8 +299,8 @@ int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len)
 
     if (s390_has_feat(S390_FEAT_VECTOR)) {
         for (i = 0; i < 32; i++) {
-            sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0].ll);
-            sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1].ll);
+            sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0]);
+            sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1]);
         }
     }
     if (s390_has_feat(S390_FEAT_GUARDED_STORAGE) && len >= ADTL_GS_MIN_SIZE) {
@@ -341,13 +341,13 @@ void s390_cpu_dump_state(CPUState *cs, FILE *f, int flags)
         if (s390_has_feat(S390_FEAT_VECTOR)) {
             for (i = 0; i < 32; i++) {
                 qemu_fprintf(f, "V%02d=%016" PRIx64 "%016" PRIx64 "%c",
-                             i, env->vregs[i][0].ll, env->vregs[i][1].ll,
+                             i, env->vregs[i][0], env->vregs[i][1],
                              i % 2 ? '\n' : ' ');
             }
         } else {
             for (i = 0; i < 16; i++) {
                 qemu_fprintf(f, "F%02d=%016" PRIx64 "%c",
-                             i, get_freg(env, i)->ll,
+                             i, *get_freg(env, i),
                              (i % 4) == 3 ? '\n' : ' ');
             }
         }
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7755a96c33..e9aff83b05 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -211,6 +211,90 @@ DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
 
+/* === Vector String Instructions === */
+DEF_HELPER_FLAGS_4(gvec_vfae8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfae16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfae32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfae_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfae_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfae_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfee8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfee16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_3(gvec_vistr8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
+DEF_HELPER_FLAGS_3(gvec_vistr16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
+DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
+DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_6(gvec_vstrc_cc8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
+
+/* === Vector Floating-Point Instructions */
+DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfa64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfce64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfce64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfce64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfch64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfch64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfch64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfch64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfche64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdlg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vclgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfi64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfsq64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vftci64s, void, ptr, cptr, env, i32)
+
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
 DEF_HELPER_4(diag, void, env, i32, i32, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index e61475bdc4..f421184fcd 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1191,6 +1191,64 @@
 /* VECTOR TEST UNDER MASK */
     F(0xe7d8, VTM,     VRR_a, V,   0, 0, 0, 0, vtm, 0, IF_VEC)
 
+/* === Vector String Instructions === */
+
+/* VECTOR FIND ANY ELEMENT EQUAL */
+    F(0xe782, VFAE,    VRR_b, V,   0, 0, 0, 0, vfae, 0, IF_VEC)
+/* VECTOR FIND ELEMENT EQUAL */
+    F(0xe780, VFEE,    VRR_b, V,   0, 0, 0, 0, vfee, 0, IF_VEC)
+/* VECTOR FIND ELEMENT NOT EQUAL */
+    F(0xe781, VFENE,   VRR_b, V,   0, 0, 0, 0, vfene, 0, IF_VEC)
+/* VECTOR ISOLATE STRING */
+    F(0xe75c, VISTR,   VRR_a, V,   0, 0, 0, 0, vistr, 0, IF_VEC)
+/* VECTOR STRING RANGE COMPARE */
+    F(0xe78a, VSTRC,   VRR_d, V,   0, 0, 0, 0, vstrc, 0, IF_VEC)
+
+/* === Vector Floating-Point Instructions */
+
+/* VECTOR FP ADD */
+    F(0xe7e3, VFA,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP COMPARE SCALAR */
+    F(0xe7cb, WFC,     VRR_a, V,   0, 0, 0, 0, wfc, 0, IF_VEC)
+/* VECTOR FP COMPARE AND SIGNAL SCALAR */
+    F(0xe7ca, WFK,     VRR_a, V,   0, 0, 0, 0, wfc, 0, IF_VEC)
+/* VECTOR FP COMPARE EQUAL */
+    F(0xe7e8, VFCE,    VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP COMPARE HIGH */
+    F(0xe7eb, VFCH,    VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP COMPARE HIGH OR EQUAL */
+    F(0xe7ea, VFCHE,   VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP CONVERT FROM FIXED 64-BIT */
+    F(0xe7c3, VCDG,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT FROM LOGICAL 64-BIT */
+    F(0xe7c1, VCDLG,   VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT TO FIXED 64-BIT */
+    F(0xe7c2, VCGD,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT TO LOGICAL 64-BIT */
+    F(0xe7c0, VCLGD,   VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP DIVIDE */
+    F(0xe7e5, VFD,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR LOAD FP INTEGER */
+    F(0xe7c7, VFI,     VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR LOAD LENGTHENED */
+    F(0xe7c4, VFLL,    VRR_a, V,   0, 0, 0, 0, vfll, 0, IF_VEC)
+/* VECTOR LOAD ROUNDED */
+    F(0xe7c5, VFLR,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP MULTIPLY */
+    F(0xe7e7, VFM,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP MULTIPLY AND ADD */
+    F(0xe78f, VFMA,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP MULTIPLY AND SUBTRACT */
+    F(0xe78e, VFMS,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP PERFORM SIGN OPERATION */
+    F(0xe7cc, VFPSO,   VRR_a, V,   0, 0, 0, 0, vfpso, 0, IF_VEC)
+/* VECTOR FP SQUARE ROOT */
+    F(0xe7ce, VFSQ,    VRR_a, V,   0, 0, 0, 0, vfsq, 0, IF_VEC)
+/* VECTOR FP SUBTRACT */
+    F(0xe7e2, VFS,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP TEST DATA CLASS IMMEDIATE */
+    F(0xe74a, VFTCI,   VRI_e, V,   0, 0, 0, 0, vftci, 0, IF_VEC)
+
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
     E(0xb250, CSP,     RRE,   Z,   r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index 9893fc094b..c243fa725b 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -285,6 +285,10 @@ uint32_t set_cc_nz_f128(float128 v);
 uint8_t s390_softfloat_exc_to_ieee(unsigned int exc);
 int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3);
 void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode);
+int float_comp_to_cc(CPUS390XState *env, int float_compare);
+uint16_t float32_dcmask(CPUS390XState *env, float32 f1);
+uint16_t float64_dcmask(CPUS390XState *env, float64 f1);
+uint16_t float128_dcmask(CPUS390XState *env, float128 f1);
 
 
 /* gdbstub.c */
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index e5e2b691f2..bcec9795ec 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -418,21 +418,21 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 
     if (can_sync_regs(cs, KVM_SYNC_VRS)) {
         for (i = 0; i < 32; i++) {
-            cs->kvm_run->s.regs.vrs[i][0] = env->vregs[i][0].ll;
-            cs->kvm_run->s.regs.vrs[i][1] = env->vregs[i][1].ll;
+            cs->kvm_run->s.regs.vrs[i][0] = env->vregs[i][0];
+            cs->kvm_run->s.regs.vrs[i][1] = env->vregs[i][1];
         }
         cs->kvm_run->s.regs.fpc = env->fpc;
         cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_VRS;
     } else if (can_sync_regs(cs, KVM_SYNC_FPRS)) {
         for (i = 0; i < 16; i++) {
-            cs->kvm_run->s.regs.fprs[i] = get_freg(env, i)->ll;
+            cs->kvm_run->s.regs.fprs[i] = *get_freg(env, i);
         }
         cs->kvm_run->s.regs.fpc = env->fpc;
         cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_FPRS;
     } else {
         /* Floating point */
         for (i = 0; i < 16; i++) {
-            fpu.fprs[i] = get_freg(env, i)->ll;
+            fpu.fprs[i] = *get_freg(env, i);
         }
         fpu.fpc = env->fpc;
 
@@ -586,13 +586,13 @@ int kvm_arch_get_registers(CPUState *cs)
     /* Floating point and vector registers */
     if (can_sync_regs(cs, KVM_SYNC_VRS)) {
         for (i = 0; i < 32; i++) {
-            env->vregs[i][0].ll = cs->kvm_run->s.regs.vrs[i][0];
-            env->vregs[i][1].ll = cs->kvm_run->s.regs.vrs[i][1];
+            env->vregs[i][0] = cs->kvm_run->s.regs.vrs[i][0];
+            env->vregs[i][1] = cs->kvm_run->s.regs.vrs[i][1];
         }
         env->fpc = cs->kvm_run->s.regs.fpc;
     } else if (can_sync_regs(cs, KVM_SYNC_FPRS)) {
         for (i = 0; i < 16; i++) {
-            get_freg(env, i)->ll = cs->kvm_run->s.regs.fprs[i];
+            *get_freg(env, i) = cs->kvm_run->s.regs.fprs[i];
         }
         env->fpc = cs->kvm_run->s.regs.fpc;
     } else {
@@ -601,7 +601,7 @@ int kvm_arch_get_registers(CPUState *cs)
             return r;
         }
         for (i = 0; i < 16; i++) {
-            get_freg(env, i)->ll = fpu.fprs[i];
+            *get_freg(env, i) = fpu.fprs[i];
         }
         env->fpc = fpu.fpc;
     }
diff --git a/target/s390x/machine.c b/target/s390x/machine.c
index cb792aa103..e6851a57bc 100644
--- a/target/s390x/machine.c
+++ b/target/s390x/machine.c
@@ -66,22 +66,22 @@ static const VMStateDescription vmstate_fpu = {
     .minimum_version_id = 1,
     .needed = fpu_needed,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT64(env.vregs[0][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[1][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[2][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[3][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[4][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[5][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[6][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[7][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[8][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[9][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[10][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[11][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[12][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[13][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[14][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[15][0].ll, S390CPU),
+        VMSTATE_UINT64(env.vregs[0][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[1][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[2][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[3][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[4][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[5][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[6][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[7][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[8][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[9][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[10][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[11][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[12][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[13][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[14][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[15][0], S390CPU),
         VMSTATE_UINT32(env.fpc, S390CPU),
         VMSTATE_END_OF_LIST()
     }
@@ -99,54 +99,54 @@ static const VMStateDescription vmstate_vregs = {
     .needed = vregs_needed,
     .fields = (VMStateField[]) {
         /* vregs[0][0] -> vregs[15][0] and fregs are overlays */
-        VMSTATE_UINT64(env.vregs[16][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[17][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[18][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[19][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[20][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[21][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[22][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[23][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[24][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[25][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[26][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[27][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[28][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[29][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[30][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[31][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[0][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[1][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[2][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[3][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[4][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[5][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[6][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[7][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[8][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[9][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[10][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[11][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[12][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[13][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[14][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[15][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[16][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[17][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[18][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[19][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[20][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[21][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[22][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[23][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[24][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[25][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[26][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[27][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[28][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[29][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[30][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[31][1].ll, S390CPU),
+        VMSTATE_UINT64(env.vregs[16][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[17][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[18][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[19][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[20][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[21][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[22][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[23][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[24][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[25][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[26][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[27][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[28][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[29][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[30][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[31][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[0][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[1][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[2][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[3][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[4][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[5][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[6][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[7][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[8][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[9][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[10][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[11][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[12][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[13][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[14][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[15][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[16][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[17][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[18][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[19][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[20][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[21][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[22][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[23][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[24][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[25][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[26][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[27][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[28][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[29][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[30][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[31][1], S390CPU),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index ee67c1fa0c..10aa617cf9 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -669,7 +669,7 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
 {
     const uintptr_t ra = GETPC();
     const int count_bytes = ((env->regs[0] & 0xff) + 1) * 8;
-    const int max_bytes = ROUND_UP(used_stfl_bytes, 8);
+    int max_bytes;
     int i;
 
     if (addr & 0x7) {
@@ -677,7 +677,14 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
     }
 
     prepare_stfl();
-    for (i = 0; i < count_bytes; ++i) {
+    max_bytes = ROUND_UP(used_stfl_bytes, 8);
+
+    /*
+     * The PoP says that doublewords beyond the highest-numbered facility
+     * bit may or may not be stored.  However, existing hardware appears to
+     * not store the words, and existing software depend on that.
+     */
+    for (i = 0; i < MIN(count_bytes, max_bytes); ++i) {
         cpu_stb_data_ra(env, addr + i, stfl_bytes[i], ra);
     }
 
diff --git a/target/s390x/tcg_s390x.h b/target/s390x/tcg_s390x.h
index ab2c4ba703..2813f9d48e 100644
--- a/target/s390x/tcg_s390x.h
+++ b/target/s390x/tcg_s390x.h
@@ -18,5 +18,7 @@ void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code,
                                               int ilen, uintptr_t ra);
 void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
                                            uintptr_t ra);
+void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
+                                             uintptr_t ra);
 
 #endif /* TCG_S390X_H */
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index fa57b7550e..ac0d8b6410 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -149,7 +149,7 @@ void s390x_translate_init(void)
 static inline int vec_full_reg_offset(uint8_t reg)
 {
     g_assert(reg < 32);
-    return offsetof(CPUS390XState, vregs[reg][0].d);
+    return offsetof(CPUS390XState, vregs[reg][0]);
 }
 
 static inline int vec_reg_offset(uint8_t reg, uint8_t enr, TCGMemOp es)
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 7e0bfcb190..7b1d31cba5 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -52,6 +52,11 @@
 #define ES_64   MO_64
 #define ES_128  4
 
+/* Floating-Point Format */
+#define FPF_SHORT       2
+#define FPF_LONG        3
+#define FPF_EXT         4
+
 static inline bool valid_vec_element(uint8_t enr, TCGMemOp es)
 {
     return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
@@ -188,6 +193,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
 #define gen_gvec_2s(v1, v2, c, gen) \
     tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                     16, 16, c, gen)
+#define gen_gvec_2_ool(v1, v2, data, fn) \
+    tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       16, 16, data, fn)
 #define gen_gvec_2i_ool(v1, v2, c, data, fn) \
     tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                         c, 16, 16, data, fn)
@@ -214,6 +222,10 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
     tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                        vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
                        16, 16, data, fn)
+#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
+    tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
+                       ptr, 16, 16, data, fn)
 #define gen_gvec_dup_i64(es, v1, c) \
     tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
 #define gen_gvec_mov(v1, v2) \
@@ -233,6 +245,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
 #define gen_gvec_fn_3(fn, es, v1, v2, v3) \
     tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                       vec_full_reg_offset(v3), 16, 16)
+#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                      vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)
 
 /*
  * Helper to carry out a 128 bit vector computation using 2 i64 values per
@@ -903,40 +918,11 @@ static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
-static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
-{
-    TCGv_i64 t = tcg_temp_new_i64();
-
-    /* bit in c not set -> copy bit from b */
-    tcg_gen_andc_i64(t, b, c);
-    /* bit in c set -> copy bit from a */
-    tcg_gen_and_i64(d, a, c);
-    /* merge the results */
-    tcg_gen_or_i64(d, d, t);
-    tcg_temp_free_i64(t);
-}
-
-static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
-                        TCGv_vec c)
-{
-    TCGv_vec t = tcg_temp_new_vec_matching(d);
-
-    tcg_gen_andc_vec(vece, t, b, c);
-    tcg_gen_and_vec(vece, d, a, c);
-    tcg_gen_or_vec(vece, d, d, t);
-    tcg_temp_free_vec(t);
-}
-
 static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
 {
-    static const GVecGen4 gvec_op = {
-        .fni8 = gen_sel_i64,
-        .fniv = gen_sel_vec,
-        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-    };
-
-    gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
-               get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
+    gen_gvec_fn_4(bitsel, ES_8, get_field(s->fields, v1),
+                  get_field(s->fields, v4), get_field(s->fields, v2),
+                  get_field(s->fields, v3));
     return DISAS_NEXT;
 }
 
@@ -2353,3 +2339,460 @@ static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
     set_cc_static(s);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfae(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfae8,
+        gen_helper_gvec_vfae16,
+        gen_helper_gvec_vfae32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfae_cc8,
+        gen_helper_gvec_vfae_cc16,
+        gen_helper_gvec_vfae_cc32,
+    };
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfee(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfee8,
+        gen_helper_gvec_vfee16,
+        gen_helper_gvec_vfee32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfee_cc8,
+        gen_helper_gvec_vfee_cc16,
+        gen_helper_gvec_vfee_cc32,
+    };
+
+    if (es > ES_32 || m5 & ~0x3) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfene(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfene8,
+        gen_helper_gvec_vfene16,
+        gen_helper_gvec_vfene32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfene_cc8,
+        gen_helper_gvec_vfene_cc16,
+        gen_helper_gvec_vfene_cc32,
+    };
+
+    if (es > ES_32 || m5 & ~0x3) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    static gen_helper_gvec_2 * const g[3] = {
+        gen_helper_gvec_vistr8,
+        gen_helper_gvec_vistr16,
+        gen_helper_gvec_vistr32,
+    };
+    static gen_helper_gvec_2_ptr * const g_cc[3] = {
+        gen_helper_gvec_vistr_cc8,
+        gen_helper_gvec_vistr_cc16,
+        gen_helper_gvec_vistr_cc32,
+    };
+
+    if (es > ES_32 || m5 & ~0x1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       cpu_env, 0, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_2_ool(get_field(s->fields, v1), get_field(s->fields, v2), 0,
+                       g[es]);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m5);
+    const uint8_t m6 = get_field(s->fields, m6);
+    static gen_helper_gvec_4 * const g[3] = {
+        gen_helper_gvec_vstrc8,
+        gen_helper_gvec_vstrc16,
+        gen_helper_gvec_vstrc32,
+    };
+    static gen_helper_gvec_4 * const g_rt[3] = {
+        gen_helper_gvec_vstrc_rt8,
+        gen_helper_gvec_vstrc_rt16,
+        gen_helper_gvec_vstrc_rt32,
+    };
+    static gen_helper_gvec_4_ptr * const g_cc[3] = {
+        gen_helper_gvec_vstrc_cc8,
+        gen_helper_gvec_vstrc_cc16,
+        gen_helper_gvec_vstrc_cc32,
+    };
+    static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
+        gen_helper_gvec_vstrc_cc_rt8,
+        gen_helper_gvec_vstrc_cc_rt16,
+        gen_helper_gvec_vstrc_cc_rt32,
+    };
+
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m6, 0, 1)) {
+        if (extract32(m6, 2, 1)) {
+            gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           cpu_env, m6, g_cc_rt[es]);
+        } else {
+            gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           cpu_env, m6, g_cc[es]);
+        }
+        set_cc_static(s);
+    } else {
+        if (extract32(m6, 2, 1)) {
+            gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           m6, g_rt[es]);
+        } else {
+            gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           m6, g[es]);
+        }
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    const bool se = extract32(m5, 3, 1);
+    gen_helper_gvec_3_ptr *fn;
+
+    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (s->fields->op2) {
+    case 0xe3:
+        fn = se ? gen_helper_gvec_vfa64s : gen_helper_gvec_vfa64;
+        break;
+    case 0xe5:
+        fn = se ? gen_helper_gvec_vfd64s : gen_helper_gvec_vfd64;
+        break;
+    case 0xe7:
+        fn = se ? gen_helper_gvec_vfm64s : gen_helper_gvec_vfm64;
+        break;
+    case 0xe2:
+        fn = se ? gen_helper_gvec_vfs64s : gen_helper_gvec_vfs64;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                   get_field(s->fields, v3), cpu_env, 0, fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_wfc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m3);
+    const uint8_t m4 = get_field(s->fields, m4);
+
+    if (fpf != FPF_LONG || m4) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (s->fields->op2 == 0xcb) {
+        gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       cpu_env, 0, gen_helper_gvec_wfc64);
+    } else {
+        gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       cpu_env, 0, gen_helper_gvec_wfk64);
+    }
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    const uint8_t m6 = get_field(s->fields, m6);
+    const bool se = extract32(m5, 3, 1);
+    const bool cs = extract32(m6, 0, 1);
+    gen_helper_gvec_3_ptr *fn;
+
+    if (fpf != FPF_LONG || extract32(m5, 0, 3) || extract32(m6, 1, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (cs) {
+        switch (s->fields->op2) {
+        case 0xe8:
+            fn = se ? gen_helper_gvec_vfce64s_cc : gen_helper_gvec_vfce64_cc;
+            break;
+        case 0xeb:
+            fn = se ? gen_helper_gvec_vfch64s_cc : gen_helper_gvec_vfch64_cc;
+            break;
+        case 0xea:
+            fn = se ? gen_helper_gvec_vfche64s_cc : gen_helper_gvec_vfche64_cc;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    } else {
+        switch (s->fields->op2) {
+        case 0xe8:
+            fn = se ? gen_helper_gvec_vfce64s : gen_helper_gvec_vfce64;
+            break;
+        case 0xeb:
+            fn = se ? gen_helper_gvec_vfch64s : gen_helper_gvec_vfch64;
+            break;
+        case 0xea:
+            fn = se ? gen_helper_gvec_vfche64s : gen_helper_gvec_vfche64;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+    gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                   get_field(s->fields, v3), cpu_env, 0, fn);
+    if (cs) {
+        set_cc_static(s);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m3);
+    const uint8_t m4 = get_field(s->fields, m4);
+    const uint8_t erm = get_field(s->fields, m5);
+    const bool se = extract32(m4, 3, 1);
+    gen_helper_gvec_2_ptr *fn;
+
+    if (fpf != FPF_LONG || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (s->fields->op2) {
+    case 0xc3:
+        fn = se ? gen_helper_gvec_vcdg64s : gen_helper_gvec_vcdg64;
+        break;
+    case 0xc1:
+        fn = se ? gen_helper_gvec_vcdlg64s : gen_helper_gvec_vcdlg64;
+        break;
+    case 0xc2:
+        fn = se ? gen_helper_gvec_vcgd64s : gen_helper_gvec_vcgd64;
+        break;
+    case 0xc0:
+        fn = se ? gen_helper_gvec_vclgd64s : gen_helper_gvec_vclgd64;
+        break;
+    case 0xc7:
+        fn = se ? gen_helper_gvec_vfi64s : gen_helper_gvec_vfi64;
+        break;
+    case 0xc5:
+        fn = se ? gen_helper_gvec_vflr64s : gen_helper_gvec_vflr64;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+                   deposit32(m4, 4, 4, erm), fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfll(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m3);
+    const uint8_t m4 = get_field(s->fields, m4);
+    gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfll32;
+
+    if (fpf != FPF_SHORT || extract32(m4, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m4, 3, 1)) {
+        fn = gen_helper_gvec_vfll32s;
+    }
+    gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+                   0, fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
+{
+    const uint8_t m5 = get_field(s->fields, m5);
+    const uint8_t fpf = get_field(s->fields, m6);
+    const bool se = extract32(m5, 3, 1);
+    gen_helper_gvec_4_ptr *fn;
+
+    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (s->fields->op2 == 0x8f) {
+        fn = se ? gen_helper_gvec_vfma64s : gen_helper_gvec_vfma64;
+    } else {
+        fn = se ? gen_helper_gvec_vfms64s : gen_helper_gvec_vfms64;
+    }
+    gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                   get_field(s->fields, v3), get_field(s->fields, v4), cpu_env,
+                   0, fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o)
+{
+    const uint8_t v1 = get_field(s->fields, v1);
+    const uint8_t v2 = get_field(s->fields, v2);
+    const uint8_t fpf = get_field(s->fields, m3);
+    const uint8_t m4 = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    TCGv_i64 tmp;
+
+    if (fpf != FPF_LONG || extract32(m4, 0, 3) || m5 > 2) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m4, 3, 1)) {
+        tmp = tcg_temp_new_i64();
+        read_vec_element_i64(tmp, v2, 0, ES_64);
+        switch (m5) {
+        case 0:
+            /* sign bit is inverted (complement) */
+            tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
+            break;
+        case 1:
+            /* sign bit is set to one (negative) */
+            tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
+            break;
+        case 2:
+            /* sign bit is set to zero (positive) */
+            tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
+            break;
+        }
+        write_vec_element_i64(tmp, v1, 0, ES_64);
+        tcg_temp_free_i64(tmp);
+    } else {
+        switch (m5) {
+        case 0:
+            /* sign bit is inverted (complement) */
+            gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
+            break;
+        case 1:
+            /* sign bit is set to one (negative) */
+            gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
+            break;
+        case 2:
+            /* sign bit is set to zero (positive) */
+            gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
+            break;
+        }
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m3);
+    const uint8_t m4 = get_field(s->fields, m4);
+    gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfsq64;
+
+    if (fpf != FPF_LONG || extract32(m4, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m4, 3, 1)) {
+        fn = gen_helper_gvec_vfsq64s;
+    }
+    gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+                   0, fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vftci(DisasContext *s, DisasOps *o)
+{
+    const uint16_t i3 = get_field(s->fields, i3);
+    const uint8_t fpf = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vftci64;
+
+    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 3, 1)) {
+        fn = gen_helper_gvec_vftci64s;
+    }
+    gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+                   i3, fn);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec.h b/target/s390x/vec.h
index 3313fb43ee..a6e361869b 100644
--- a/target/s390x/vec.h
+++ b/target/s390x/vec.h
@@ -12,6 +12,8 @@
 #ifndef S390X_VEC_H
 #define S390X_VEC_H
 
+#include "tcg/tcg.h"
+
 typedef union S390Vector {
     uint64_t doubleword[2];
     uint32_t word[4];
@@ -70,6 +72,23 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr)
     return v->doubleword[enr];
 }
 
+static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr,
+                                             uint8_t es)
+{
+    switch (es) {
+    case MO_8:
+        return s390_vec_read_element8(v, enr);
+    case MO_16:
+        return s390_vec_read_element16(v, enr);
+    case MO_32:
+        return s390_vec_read_element32(v, enr);
+    case MO_64:
+        return s390_vec_read_element64(v, enr);
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr,
                                            uint8_t data)
 {
@@ -98,4 +117,25 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
     v->doubleword[enr] = data;
 }
 
+static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
+                                          uint8_t es, uint64_t data)
+{
+    switch (es) {
+    case MO_8:
+        s390_vec_write_element8(v, enr, data);
+        break;
+    case MO_16:
+        s390_vec_write_element16(v, enr, data);
+        break;
+    case MO_32:
+        s390_vec_write_element32(v, enr, data);
+        break;
+    case MO_64:
+        s390_vec_write_element64(v, enr, data);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 #endif /* S390X_VEC_H */
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
new file mode 100644
index 0000000000..a48bd704bc
--- /dev/null
+++ b/target/s390x/vec_fpu_helper.c
@@ -0,0 +1,625 @@
+/*
+ * QEMU TCG support -- s390x vector floating point instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "internal.h"
+#include "vec.h"
+#include "tcg_s390x.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+#define VIC_INVALID         0x1
+#define VIC_DIVBYZERO       0x2
+#define VIC_OVERFLOW        0x3
+#define VIC_UNDERFLOW       0x4
+#define VIC_INEXACT         0x5
+
+/* returns the VEX. If the VEX is 0, there is no trap */
+static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
+                              uint8_t *vec_exc)
+{
+    uint8_t vece_exc = 0, trap_exc;
+    unsigned qemu_exc;
+
+    /* Retrieve and clear the softfloat exceptions */
+    qemu_exc = env->fpu_status.float_exception_flags;
+    if (qemu_exc == 0) {
+        return 0;
+    }
+    env->fpu_status.float_exception_flags = 0;
+
+    vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
+
+    /* Add them to the vector-wide s390x exception bits */
+    *vec_exc |= vece_exc;
+
+    /* Check for traps and construct the VXC */
+    trap_exc = vece_exc & env->fpc >> 24;
+    if (trap_exc) {
+        if (trap_exc & S390_IEEE_MASK_INVALID) {
+            return enr << 4 | VIC_INVALID;
+        } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
+            return enr << 4 | VIC_DIVBYZERO;
+        } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
+            return enr << 4 | VIC_OVERFLOW;
+        } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
+            return enr << 4 | VIC_UNDERFLOW;
+        } else if (!XxC) {
+            g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
+            /* inexact has lowest priority on traps */
+            return enr << 4 | VIC_INEXACT;
+        }
+    }
+    return 0;
+}
+
+static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
+                            uintptr_t retaddr)
+{
+    if (vxc) {
+        /* on traps, the fpc flags are not updated, instruction is suppressed */
+        tcg_s390_vector_exception(env, vxc, retaddr);
+    }
+    if (vec_exc) {
+        /* indicate exceptions for all elements combined */
+        env->fpc |= vec_exc << 16;
+    }
+}
+
+typedef uint64_t (*vop64_2_fn)(uint64_t a, float_status *s);
+static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                    bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i, old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    for (i = 0; i < 2; i++) {
+        const uint64_t a = s390_vec_read_element64(v2, i);
+
+        s390_vec_write_element64(&tmp, i, fn(a, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+typedef uint64_t (*vop64_3_fn)(uint64_t a, uint64_t b, float_status *s);
+static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                    CPUS390XState *env, bool s, vop64_3_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const uint64_t a = s390_vec_read_element64(v2, i);
+        const uint64_t b = s390_vec_read_element64(v3, i);
+
+        s390_vec_write_element64(&tmp, i, fn(a, b, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static uint64_t vfa64(uint64_t a, uint64_t b, float_status *s)
+{
+    return float64_add(a, b, s);
+}
+
+void HELPER(gvec_vfa64)(void *v1, const void *v2, const void *v3,
+                        CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, false, vfa64, GETPC());
+}
+
+void HELPER(gvec_vfa64s)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, true, vfa64, GETPC());
+}
+
+static int wfc64(const S390Vector *v1, const S390Vector *v2,
+                 CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+    /* only the zero-indexed elements are compared */
+    const float64 a = s390_vec_read_element64(v1, 0);
+    const float64 b = s390_vec_read_element64(v2, 0);
+    uint8_t vxc, vec_exc = 0;
+    int cmp;
+
+    if (signal) {
+        cmp = float64_compare(a, b, &env->fpu_status);
+    } else {
+        cmp = float64_compare_quiet(a, b, &env->fpu_status);
+    }
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+    return float_comp_to_cc(env, cmp);
+}
+
+void HELPER(gvec_wfc64)(const void *v1, const void *v2, CPUS390XState *env,
+                        uint32_t desc)
+{
+    env->cc_op = wfc64(v1, v2, env, false, GETPC());
+}
+
+void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env,
+                        uint32_t desc)
+{
+    env->cc_op = wfc64(v1, v2, env, true, GETPC());
+}
+
+typedef int (*vfc64_fn)(float64 a, float64 b, float_status *status);
+static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int match = 0;
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_element64(v2, i);
+        const float64 b = s390_vec_read_element64(v3, i);
+
+        /* swap the order of the parameters, so we can use existing functions */
+        if (fn(b, a, &env->fpu_status)) {
+            match++;
+            s390_vec_write_element64(&tmp, i, -1ull);
+        }
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+    if (match) {
+        return s || match == 2 ? 0 : 1;
+    }
+    return 3;
+}
+
+void HELPER(gvec_vfce64)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfce64s)(void *v1, const void *v2, const void *v3,
+                          CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfce64_cc)(void *v1, const void *v2, const void *v3,
+                            CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfce64s_cc)(void *v1, const void *v2, const void *v3,
+                            CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64s)(void *v1, const void *v2, const void *v3,
+                          CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64_cc)(void *v1, const void *v2, const void *v3,
+                            CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64s_cc)(void *v1, const void *v2, const void *v3,
+                             CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64)(void *v1, const void *v2, const void *v3,
+                          CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64s)(void *v1, const void *v2, const void *v3,
+                           CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64_cc)(void *v1, const void *v2, const void *v3,
+                             CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64s_cc)(void *v1, const void *v2, const void *v3,
+                              CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
+}
+
+static uint64_t vcdg64(uint64_t a, float_status *s)
+{
+    return int64_to_float64(a, s);
+}
+
+void HELPER(gvec_vcdg64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, false, XxC, erm, vcdg64, GETPC());
+}
+
+void HELPER(gvec_vcdg64s)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, true, XxC, erm, vcdg64, GETPC());
+}
+
+static uint64_t vcdlg64(uint64_t a, float_status *s)
+{
+    return uint64_to_float64(a, s);
+}
+
+void HELPER(gvec_vcdlg64)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, false, XxC, erm, vcdlg64, GETPC());
+}
+
+void HELPER(gvec_vcdlg64s)(void *v1, const void *v2, CPUS390XState *env,
+                           uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, true, XxC, erm, vcdlg64, GETPC());
+}
+
+static uint64_t vcgd64(uint64_t a, float_status *s)
+{
+    return float64_to_int64(a, s);
+}
+
+void HELPER(gvec_vcgd64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, false, XxC, erm, vcgd64, GETPC());
+}
+
+void HELPER(gvec_vcgd64s)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, true, XxC, erm, vcgd64, GETPC());
+}
+
+static uint64_t vclgd64(uint64_t a, float_status *s)
+{
+    return float64_to_uint64(a, s);
+}
+
+void HELPER(gvec_vclgd64)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, false, XxC, erm, vclgd64, GETPC());
+}
+
+void HELPER(gvec_vclgd64s)(void *v1, const void *v2, CPUS390XState *env,
+                           uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, true, XxC, erm, vclgd64, GETPC());
+}
+
+static uint64_t vfd64(uint64_t a, uint64_t b, float_status *s)
+{
+    return float64_div(a, b, s);
+}
+
+void HELPER(gvec_vfd64)(void *v1, const void *v2, const void *v3,
+                        CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, false, vfd64, GETPC());
+}
+
+void HELPER(gvec_vfd64s)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, true, vfd64, GETPC());
+}
+
+static uint64_t vfi64(uint64_t a, float_status *s)
+{
+    return float64_round_to_int(a, s);
+}
+
+void HELPER(gvec_vfi64)(void *v1, const void *v2, CPUS390XState *env,
+                        uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, false, XxC, erm, vfi64, GETPC());
+}
+
+void HELPER(gvec_vfi64s)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, true, XxC, erm, vfi64, GETPC());
+}
+
+static void vfll32(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                   bool s, uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        /* load from even element */
+        const float32 a = s390_vec_read_element32(v2, i * 2);
+        const uint64_t ret = float32_to_float64(a, &env->fpu_status);
+
+        s390_vec_write_element64(&tmp, i, ret);
+        /* indicate the source element */
+        vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    vfll32(v1, v2, env, false, GETPC());
+}
+
+void HELPER(gvec_vfll32s)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    vfll32(v1, v2, env, true, GETPC());
+}
+
+static void vflr64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                   bool s, bool XxC, uint8_t erm, uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i, old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    for (i = 0; i < 2; i++) {
+        float64 a = s390_vec_read_element64(v2, i);
+        uint32_t ret = float64_to_float32(a, &env->fpu_status);
+
+        /* place at even element */
+        s390_vec_write_element32(&tmp, i * 2, ret);
+        /* indicate the source element */
+        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vflr64(v1, v2, env, false, XxC, erm, GETPC());
+}
+
+void HELPER(gvec_vflr64s)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vflr64(v1, v2, env, true, XxC, erm, GETPC());
+}
+
+static uint64_t vfm64(uint64_t a, uint64_t b, float_status *s)
+{
+    return float64_mul(a, b, s);
+}
+
+void HELPER(gvec_vfm64)(void *v1, const void *v2, const void *v3,
+                        CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, false, vfm64, GETPC());
+}
+
+void HELPER(gvec_vfm64s)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, true, vfm64, GETPC());
+}
+
+static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
+                   uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const uint64_t a = s390_vec_read_element64(v2, i);
+        const uint64_t b = s390_vec_read_element64(v3, i);
+        const uint64_t c = s390_vec_read_element64(v4, i);
+        uint64_t ret = float64_muladd(a, b, c, flags, &env->fpu_status);
+
+        s390_vec_write_element64(&tmp, i, ret);
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+void HELPER(gvec_vfma64)(void *v1, const void *v2, const void *v3,
+                         const void *v4, CPUS390XState *env, uint32_t desc)
+{
+    vfma64(v1, v2, v3, v4, env, false, 0, GETPC());
+}
+
+void HELPER(gvec_vfma64s)(void *v1, const void *v2, const void *v3,
+                         const void *v4, CPUS390XState *env, uint32_t desc)
+{
+    vfma64(v1, v2, v3, v4, env, true, 0, GETPC());
+}
+
+void HELPER(gvec_vfms64)(void *v1, const void *v2, const void *v3,
+                         const void *v4, CPUS390XState *env, uint32_t desc)
+{
+    vfma64(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC());
+}
+
+void HELPER(gvec_vfms64s)(void *v1, const void *v2, const void *v3,
+                         const void *v4, CPUS390XState *env, uint32_t desc)
+{
+    vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC());
+}
+
+static uint64_t vfsq64(uint64_t a, float_status *s)
+{
+    return float64_sqrt(a, s);
+}
+
+void HELPER(gvec_vfsq64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    vop64_2(v1, v2, env, false, false, 0, vfsq64, GETPC());
+}
+
+void HELPER(gvec_vfsq64s)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    vop64_2(v1, v2, env, true, false, 0, vfsq64, GETPC());
+}
+
+static uint64_t vfs64(uint64_t a, uint64_t b, float_status *s)
+{
+    return float64_sub(a, b, s);
+}
+
+void HELPER(gvec_vfs64)(void *v1, const void *v2, const void *v3,
+                        CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, false, vfs64, GETPC());
+}
+
+void HELPER(gvec_vfs64s)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, true, vfs64, GETPC());
+}
+
+static int vftci64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                   bool s, uint16_t i3)
+{
+    int i, match = 0;
+
+    for (i = 0; i < 2; i++) {
+        float64 a = s390_vec_read_element64(v2, i);
+
+        if (float64_dcmask(env, a) & i3) {
+            match++;
+            s390_vec_write_element64(v1, i, -1ull);
+        } else {
+            s390_vec_write_element64(v1, i, 0);
+        }
+        if (s) {
+            break;
+        }
+    }
+
+    if (match) {
+        return s || match == 2 ? 0 : 1;
+    }
+    return 3;
+}
+
+void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    env->cc_op = vftci64(v1, v2, env, false, simd_data(desc));
+}
+
+void HELPER(gvec_vftci64s)(void *v1, const void *v2, CPUS390XState *env,
+                           uint32_t desc)
+{
+    env->cc_op = vftci64(v1, v2, env, true, simd_data(desc));
+}
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
new file mode 100644
index 0000000000..c516c0ceeb
--- /dev/null
+++ b/target/s390x/vec_string_helper.c
@@ -0,0 +1,473 @@
+/*
+ * QEMU TCG support -- s390x vector string instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "internal.h"
+#include "vec.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/helper-proto.h"
+
+/*
+ * Returns a bit set in the MSB of each element that is zero,
+ * as defined by the mask.
+ */
+static inline uint64_t zero_search(uint64_t a, uint64_t mask)
+{
+    return ~(((a & mask) + mask) | a | mask);
+}
+
+/*
+ * Returns a bit set in the MSB of each element that is not zero,
+ * as defined by the mask.
+ */
+static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
+{
+    return (((a & mask) + mask) | a) & ~mask;
+}
+
+/*
+ * Returns the byte offset for the first match, or 16 for no match.
+ */
+static inline int match_index(uint64_t c0, uint64_t c1)
+{
+    return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
+}
+
+/*
+ * Returns the number of bits composing one element.
+ */
+static uint8_t get_element_bits(uint8_t es)
+{
+    return (1 << es) * BITS_PER_BYTE;
+}
+
+/*
+ * Returns the bitmask for a single element.
+ */
+static uint64_t get_single_element_mask(uint8_t es)
+{
+    return -1ull >> (64 - get_element_bits(es));
+}
+
+/*
+ * Returns the bitmask for a single element (excluding the MSB).
+ */
+static uint64_t get_single_element_lsbs_mask(uint8_t es)
+{
+    return -1ull >> (65 - get_element_bits(es));
+}
+
+/*
+ * Returns the bitmasks for multiple elements (excluding the MSBs).
+ */
+static uint64_t get_element_lsbs_mask(uint8_t es)
+{
+    return dup_const(es, get_single_element_lsbs_mask(es));
+}
+
+static int vfae(void *v1, const void *v2, const void *v3, bool in,
+                bool rt, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    const int bits = get_element_bits(es);
+    uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_equal;
+    int i;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = 0;
+    e1 = 0;
+    /* compare against equality with every other element */
+    for (i = 0; i < 64; i += bits) {
+        t0 = rol64(b0, i);
+        t1 = rol64(b1, i);
+        e0 |= zero_search(a0 ^ t0, mask);
+        e0 |= zero_search(a0 ^ t1, mask);
+        e1 |= zero_search(a1 ^ t0, mask);
+        e1 |= zero_search(a1 ^ t1, mask);
+    }
+    /* invert the result if requested - invert only the MSBs */
+    if (in) {
+        e0 = ~e0 & ~mask;
+        e1 = ~e1 & ~mask;
+    }
+    first_equal = match_index(e0, e1);
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    if (rt) {
+        e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
+        e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
+        s390_vec_write_element64(v1, 0, e0);
+        s390_vec_write_element64(v1, 1, e1);
+    } else {
+        s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
+        s390_vec_write_element64(v1, 1, 0);
+    }
+
+    if (first_zero == 16 && first_equal == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_equal < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VFAE_HELPER(BITS)                                                  \
+void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool rt = extract32(simd_data(desc), 2, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
+}
+DEF_VFAE_HELPER(8)
+DEF_VFAE_HELPER(16)
+DEF_VFAE_HELPER(32)
+
+#define DEF_VFAE_CC_HELPER(BITS)                                               \
+void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
+                                CPUS390XState *env, uint32_t desc)             \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool rt = extract32(simd_data(desc), 2, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
+}
+DEF_VFAE_CC_HELPER(8)
+DEF_VFAE_CC_HELPER(16)
+DEF_VFAE_CC_HELPER(32)
+
+static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_equal;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = zero_search(a0 ^ b0, mask);
+    e1 = zero_search(a1 ^ b1, mask);
+    first_equal = match_index(e0, e1);
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
+    s390_vec_write_element64(v1, 1, 0);
+    if (first_zero == 16 && first_equal == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_equal < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VFEE_HELPER(BITS)                                                  \
+void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfee(v1, v2, v3, zs, MO_##BITS);                                           \
+}
+DEF_VFEE_HELPER(8)
+DEF_VFEE_HELPER(16)
+DEF_VFEE_HELPER(32)
+
+#define DEF_VFEE_CC_HELPER(BITS)                                               \
+void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
+                                CPUS390XState *env, uint32_t desc)             \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS);                              \
+}
+DEF_VFEE_CC_HELPER(8)
+DEF_VFEE_CC_HELPER(16)
+DEF_VFEE_CC_HELPER(32)
+
+static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_inequal;
+    bool smaller = false;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = nonzero_search(a0 ^ b0, mask);
+    e1 = nonzero_search(a1 ^ b1, mask);
+    first_inequal = match_index(e0, e1);
+
+    /* identify the smaller element */
+    if (first_inequal < 16) {
+        uint8_t enr = first_inequal / (1 << es);
+        uint32_t a = s390_vec_read_element(v2, enr, es);
+        uint32_t b = s390_vec_read_element(v3, enr, es);
+
+        smaller = a < b;
+    }
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
+    s390_vec_write_element64(v1, 1, 0);
+    if (first_zero == 16 && first_inequal == 16) {
+        return 3;
+    } else if (first_zero < first_inequal) {
+        return 0;
+    }
+    return smaller ? 1 : 2;
+}
+
+#define DEF_VFENE_HELPER(BITS)                                                 \
+void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3,        \
+                              uint32_t desc)                                   \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfene(v1, v2, v3, zs, MO_##BITS);                                          \
+}
+DEF_VFENE_HELPER(8)
+DEF_VFENE_HELPER(16)
+DEF_VFENE_HELPER(32)
+
+#define DEF_VFENE_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 CPUS390XState *env, uint32_t desc)            \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS);                             \
+}
+DEF_VFENE_CC_HELPER(8)
+DEF_VFENE_CC_HELPER(16)
+DEF_VFENE_CC_HELPER(32)
+
+static int vistr(void *v1, const void *v2, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0 = s390_vec_read_element64(v2, 0);
+    uint64_t a1 = s390_vec_read_element64(v2, 1);
+    uint64_t z;
+    int cc = 3;
+
+    z = zero_search(a0, mask);
+    if (z) {
+        a0 &= ~(-1ull >> clz64(z));
+        a1 = 0;
+        cc = 0;
+    } else {
+        z = zero_search(a1, mask);
+        if (z) {
+            a1 &= ~(-1ull >> clz64(z));
+            cc = 0;
+        }
+    }
+
+    s390_vec_write_element64(v1, 0, a0);
+    s390_vec_write_element64(v1, 1, a1);
+    return cc;
+}
+
+#define DEF_VISTR_HELPER(BITS)                                                 \
+void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc)         \
+{                                                                              \
+    vistr(v1, v2, MO_##BITS);                                                  \
+}
+DEF_VISTR_HELPER(8)
+DEF_VISTR_HELPER(16)
+DEF_VISTR_HELPER(32)
+
+#define DEF_VISTR_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
+                                uint32_t desc)                                 \
+{                                                                              \
+    env->cc_op = vistr(v1, v2, MO_##BITS);                                     \
+}
+DEF_VISTR_CC_HELPER(8)
+DEF_VISTR_CC_HELPER(16)
+DEF_VISTR_CC_HELPER(32)
+
+static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
+{
+    const bool equal = extract32(c, 7, 1);
+    const bool lower = extract32(c, 6, 1);
+    const bool higher = extract32(c, 5, 1);
+
+    if (data < l) {
+        return lower;
+    } else if (data > l) {
+        return higher;
+    }
+    return equal;
+}
+
+static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
+                 bool in, bool rt, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0 = s390_vec_read_element64(v2, 0);
+    uint64_t a1 = s390_vec_read_element64(v2, 1);
+    int first_zero = 16, first_match = 16;
+    S390Vector rt_result = {};
+    uint64_t z0, z1;
+    int i, j;
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    for (i = 0; i < 16 / (1 << es); i++) {
+        const uint32_t data = s390_vec_read_element(v2, i, es);
+        const int cur_byte = i * (1 << es);
+        bool any_match = false;
+
+        /* if we don't need a bit vector, we can stop early */
+        if (cur_byte == first_zero && !rt) {
+            break;
+        }
+
+        for (j = 0; j < 16 / (1 << es); j += 2) {
+            const uint32_t l1 = s390_vec_read_element(v3, j, es);
+            const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
+            /* we are only interested in the highest byte of each element */
+            const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
+            const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
+
+            if (element_compare(data, l1, c1) &&
+                element_compare(data, l2, c2)) {
+                any_match = true;
+                break;
+            }
+        }
+        /* invert the result if requested */
+        any_match = in ^ any_match;
+
+        if (any_match) {
+            /* indicate bit vector if requested */
+            if (rt) {
+                const uint64_t val = -1ull;
+
+                first_match = MIN(cur_byte, first_match);
+                s390_vec_write_element(&rt_result, i, es, val);
+            } else {
+                /* stop on the first match */
+                first_match = cur_byte;
+                break;
+            }
+        }
+    }
+
+    if (rt) {
+        *(S390Vector *)v1 = rt_result;
+    } else {
+        s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
+        s390_vec_write_element64(v1, 1, 0);
+    }
+
+    if (first_zero == 16 && first_match == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_match < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VSTRC_HELPER(BITS)                                                 \
+void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3,        \
+                              const void *v4, uint32_t desc)                   \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                               \
+}
+DEF_VSTRC_HELPER(8)
+DEF_VSTRC_HELPER(16)
+DEF_VSTRC_HELPER(32)
+
+#define DEF_VSTRC_RT_HELPER(BITS)                                              \
+void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 const void *v4, uint32_t desc)                \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                               \
+}
+DEF_VSTRC_RT_HELPER(8)
+DEF_VSTRC_RT_HELPER(16)
+DEF_VSTRC_RT_HELPER(32)
+
+#define DEF_VSTRC_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 const void *v4, CPUS390XState *env,           \
+                                 uint32_t desc)                                \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                  \
+}
+DEF_VSTRC_CC_HELPER(8)
+DEF_VSTRC_CC_HELPER(16)
+DEF_VSTRC_CC_HELPER(32)
+
+#define DEF_VSTRC_CC_RT_HELPER(BITS)                                           \
+void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
+                                    const void *v4, CPUS390XState *env,        \
+                                    uint32_t desc)                             \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                  \
+}
+DEF_VSTRC_CC_RT_HELPER(8)
+DEF_VSTRC_CC_RT_HELPER(16)
+DEF_VSTRC_CC_RT_HELPER(32)
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index 53dce470c1..6f1da87875 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -38,7 +38,7 @@
 #include "qemu/qemu-print.h"
 #include "sysemu/sysemu.h"
 #include "exec/cpu_ldst.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 #include "exec/translator.h"
 
 #include "exec/helper-proto.h"
diff --git a/target/xtensa/xtensa-semi.c b/target/xtensa/xtensa-semi.c
index 5f5ce4f344..38efa3485a 100644
--- a/target/xtensa/xtensa-semi.c
+++ b/target/xtensa/xtensa-semi.c
@@ -29,7 +29,7 @@
 #include "cpu.h"
 #include "chardev/char-fe.h"
 #include "exec/helper-proto.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
 #include "sysemu/sysemu.h"
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 1865f6b322..0cd5f465b7 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -255,9 +255,6 @@ check-qtest-sparc64-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF)
 check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
 check-qtest-sparc64-y += tests/boot-serial-test$(EXESUF)
 
-check-qtest-arm-y += tests/tmp105-test$(EXESUF)
-check-qtest-arm-y += tests/pca9552-test$(EXESUF)
-check-qtest-arm-y += tests/ds1338-test$(EXESUF)
 check-qtest-arm-y += tests/microbit-test$(EXESUF)
 check-qtest-arm-y += tests/m25p80-test$(EXESUF)
 check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF)
@@ -267,6 +264,10 @@ check-qtest-arm-y += tests/hexloader-test$(EXESUF)
 check-qtest-aarch64-y = tests/numa-test$(EXESUF)
 check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF)
 check-qtest-aarch64-y += tests/migration-test$(EXESUF)
+# TODO: once aarch64 TCG is fixed on ARM 32 bit host, make test unconditional
+ifneq ($(ARCH),arm)
+check-qtest-aarch64-y += tests/bios-tables-test$(EXESUF)
+endif
 
 check-qtest-microblazeel-y += $(check-qtest-microblaze-y)
 
@@ -678,7 +679,7 @@ libqgraph-obj-y = tests/libqos/qgraph.o
 
 libqos-obj-y = $(libqgraph-obj-y) tests/libqos/pci.o tests/libqos/fw_cfg.o
 libqos-obj-y += tests/libqos/malloc.o
-libqos-obj-y += tests/libqos/i2c.o tests/libqos/libqos.o
+libqos-obj-y += tests/libqos/libqos.o
 libqos-spapr-obj-y = $(libqos-obj-y) tests/libqos/malloc-spapr.o
 libqos-spapr-obj-y += tests/libqos/libqos-spapr.o
 libqos-spapr-obj-y += tests/libqos/rtas.o
@@ -686,14 +687,15 @@ libqos-spapr-obj-y += tests/libqos/pci-spapr.o
 libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o
 libqos-pc-obj-y += tests/libqos/malloc-pc.o tests/libqos/libqos-pc.o
 libqos-pc-obj-y += tests/libqos/ahci.o
-libqos-omap-obj-y = $(libqos-obj-y) tests/libqos/i2c-omap.o
-libqos-imx-obj-y = $(libqos-obj-y) tests/libqos/i2c-imx.o
 libqos-usb-obj-y = $(libqos-spapr-obj-y) $(libqos-pc-obj-y) tests/libqos/usb.o
 
 # Devices
 qos-test-obj-y = tests/qos-test.o $(libqgraph-obj-y)
 qos-test-obj-y += $(libqos-pc-obj-y) $(libqos-spapr-obj-y)
 qos-test-obj-y += tests/libqos/e1000e.o
+qos-test-obj-y += tests/libqos/i2c.o
+qos-test-obj-y += tests/libqos/i2c-imx.o
+qos-test-obj-y += tests/libqos/i2c-omap.o
 qos-test-obj-y += tests/libqos/sdhci.o
 qos-test-obj-y += tests/libqos/tpci200.o
 qos-test-obj-y += tests/libqos/virtio.o
@@ -709,6 +711,8 @@ qos-test-obj-y += tests/libqos/virtio-serial.o
 
 # Machines
 qos-test-obj-y += tests/libqos/aarch64-xlnx-zcu102-machine.o
+qos-test-obj-y += tests/libqos/arm-imx25-pdk-machine.o
+qos-test-obj-y += tests/libqos/arm-n800-machine.o
 qos-test-obj-y += tests/libqos/arm-raspi2-machine.o
 qos-test-obj-y += tests/libqos/arm-sabrelite-machine.o
 qos-test-obj-y += tests/libqos/arm-smdkc210-machine.o
@@ -719,6 +723,7 @@ qos-test-obj-y += tests/libqos/x86_64_pc-machine.o
 
 # Tests
 qos-test-obj-y += tests/ac97-test.o
+qos-test-obj-y += tests/ds1338-test.o
 qos-test-obj-y += tests/e1000-test.o
 qos-test-obj-y += tests/e1000e-test.o
 qos-test-obj-y += tests/eepro100-test.o
@@ -727,10 +732,12 @@ qos-test-obj-y += tests/ipoctal232-test.o
 qos-test-obj-y += tests/megasas-test.o
 qos-test-obj-y += tests/ne2000-test.o
 qos-test-obj-y += tests/nvme-test.o
+qos-test-obj-y += tests/pca9552-test.o
 qos-test-obj-y += tests/pci-test.o
 qos-test-obj-y += tests/pcnet-test.o
 qos-test-obj-y += tests/sdhci-test.o
 qos-test-obj-y += tests/spapr-phb-test.o
+qos-test-obj-y += tests/tmp105-test.o
 qos-test-obj-y += tests/usb-hcd-ohci-test.o $(libqos-usb-obj-y)
 qos-test-obj-$(CONFIG_VHOST_NET_USER) += tests/vhost-user-test.o $(chardev-obj-y) $(test-io-obj-y)
 qos-test-obj-y += tests/virtio-test.o
@@ -768,9 +775,6 @@ tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y)
 tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \
 	tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y)
 tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y)
-tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y)
-tests/pca9552-test$(EXESUF): tests/pca9552-test.o $(libqos-omap-obj-y)
-tests/ds1338-test$(EXESUF): tests/ds1338-test.o $(libqos-imx-obj-y)
 tests/microbit-test$(EXESUF): tests/microbit-test.o
 tests/m25p80-test$(EXESUF): tests/m25p80-test.o
 tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y)
@@ -904,7 +908,7 @@ check-report-unit.tap: $(check-unit-y)
 # Reports and overall runs
 
 check-report.tap: $(patsubst %,check-report-qtest-%.tap, $(QTEST_TARGETS)) check-report-unit.tap
-	$(call quiet-command,./scripts/tap-merge.py $^ > $@,"GEN","$@")
+	$(call quiet-command, cat $^ | scripts/tap-merge.pl >$@,"GEN","$@")
 
 # FPU Emulation tests (aka softfloat)
 #
diff --git a/tests/acceptance/linux_ssh_mips_malta.py b/tests/acceptance/linux_ssh_mips_malta.py
new file mode 100644
index 0000000000..aafb0c39f6
--- /dev/null
+++ b/tests/acceptance/linux_ssh_mips_malta.py
@@ -0,0 +1,230 @@
+# Functional test that boots a VM and run commands via a SSH session
+#
+# Copyright (c) Philippe Mathieu-Daudé <f4bug@amsat.org>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import os
+import re
+import base64
+import logging
+import paramiko
+import time
+
+from avocado import skipIf
+from avocado_qemu import Test
+from avocado.utils import process
+from avocado.utils import archive
+
+
+class LinuxSSH(Test):
+
+    timeout = 150 # Not for 'configure --enable-debug --enable-debug-tcg'
+
+    KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
+    VM_IP = '127.0.0.1'
+
+    IMAGE_INFO = {
+        'be': {
+            'image_url': 'https://people.debian.org/~aurel32/qemu/mips/'
+                         'debian_wheezy_mips_standard.qcow2',
+            'image_hash': '8987a63270df67345b2135a6b7a4885a35e392d5',
+            'rsa_hostkey': b'AAAAB3NzaC1yc2EAAAADAQABAAABAQCca1VitiyLAdQOld'
+                           b'zT43IOEVJZ0wHD78GJi8wDAjMiYWUzNSSn0rXGQsINHuH5'
+                           b'IlF+kBZsHinb/FtKCAyS9a8uCHhQI4SuB4QhAb0+39MlUw'
+                           b'Mm0CLkctgM2eUUZ6MQMQvDlqnue6CCkxN62EZYbaxmby7j'
+                           b'CQa1125o1HRKBvdGm2zrJWxXAfA+f1v6jHLyE8Jnu83eQ+'
+                           b'BFY25G+Vzx1PVc3zQBwJ8r0NGTRqy2//oWQP0h+bMsgeFe'
+                           b'KH/J3RJM22vg6+I4JAdBFcxnK+l781h1FuRxOn4O/Xslbg'
+                           b'go6WtB4V4TOsw2E/KfxI5IZ/icxF+swVcnvF46Hf3uQc/0'
+                           b'BBqb',
+        },
+        'le': {
+            'image_url': 'https://people.debian.org/~aurel32/qemu/mipsel/'
+                         'debian_wheezy_mipsel_standard.qcow2',
+            'image_hash': '7866764d9de3ef536ffca24c9fb9f04ffdb45802',
+            'rsa_hostkey': b'AAAAB3NzaC1yc2EAAAADAQABAAABAQClXJlBT71HL5yKvv'
+                           b'gfC7jmxSWx5zSBCzET6CLZczwAafSIs7YKfNOy/dQTxhuk'
+                           b'yIGFUugZFoF3E9PzdhunuyvyTd56MPoNIqFbb5rGokwU5I'
+                           b'TOx3dBHZR0mClypL6MVrwe0bsiIb8GhF1zioNwcsaAZnAi'
+                           b'KfXStVDtXvn/kLLq+xLABYt48CC5KYWoFaCoICskLAY+qo'
+                           b'L+LWyAnQisj4jAH8VSaSKIImFpfkHWEXPhHcC4ZBlDKtnH'
+                           b'po9vhfCHgnfW3Pzrqmk8BI4HysqPFVmJWkJGlGUL+sGeg3'
+                           b'ZZolAYuDXGuBrw8ooPJq2v2dOH+z6dyD2q/ypmAbyPqj5C'
+                           b'rc8H',
+        },
+    }
+
+    def wait_for_console_pattern(self, success_message,
+                                 failure_message='Oops'):
+        console = self.vm.console_socket.makefile()
+        console_logger = logging.getLogger('console')
+        while True:
+            msg = console.readline()
+            console_logger.debug(msg.strip())
+            if success_message in msg:
+                break
+            if failure_message in msg:
+                fail = 'Failure message found in console: %s' % failure_message
+                self.fail(fail)
+
+    def get_portfwd(self):
+        res = self.vm.command('human-monitor-command',
+                              command_line='info usernet')
+        line = res.split('\r\n')[2]
+        port = re.split(r'.*TCP.HOST_FORWARD.*127\.0\.0\.1 (\d+)\s+10\..*',
+                        line)[1]
+        self.log.debug("sshd listening on port:" + port)
+        return port
+
+    def ssh_connect(self, username, password, rsa_hostkey_b64=None):
+        self.ssh_logger = logging.getLogger('ssh')
+        self.ssh_username = username
+        self.ssh_ps1 = '# ' if username is 'root' else '$ '
+        self.ssh_client = paramiko.SSHClient()
+        port = self.get_portfwd()
+        if rsa_hostkey_b64:
+            rsa_hostkey_bin = base64.b64decode(rsa_hostkey_b64)
+            rsa_hostkey = paramiko.RSAKey(data = rsa_hostkey_bin)
+            ipport = '[%s]:%s' % (self.VM_IP, port)
+            self.ssh_logger.debug('ipport ' + ipport)
+            self.ssh_client.get_host_keys().add(ipport, 'ssh-rsa', rsa_hostkey)
+        for i in range(10):
+            try:
+                self.ssh_client.connect(self.VM_IP, int(port),
+                                        username, password, banner_timeout=90)
+                self.ssh_logger.info("Entering interactive session.")
+                return
+            except:
+                time.sleep(4)
+                pass
+        self.fail("sshd timeout")
+
+    def ssh_disconnect_vm(self):
+        self.ssh_client.close()
+
+    def ssh_command(self, command, is_root=True):
+        self.ssh_logger.info(self.ssh_ps1 + command)
+        stdin, stdout, stderr = self.ssh_client.exec_command(command)
+        stdout_lines = [line.strip('\n') for line in stdout]
+        for line in stdout_lines:
+            self.ssh_logger.info(line)
+        stderr_lines = [line.strip('\n') for line in stderr]
+        for line in stderr_lines:
+            self.ssh_logger.warning(line)
+        return stdout_lines, stderr_lines
+
+    def boot_debian_wheezy_image_and_ssh_login(self, endianess, kernel_path):
+        image_url = self.IMAGE_INFO[endianess]['image_url']
+        image_hash = self.IMAGE_INFO[endianess]['image_hash']
+        image_path = self.fetch_asset(image_url, asset_hash=image_hash)
+        rsa_hostkey_b64 = self.IMAGE_INFO[endianess]['rsa_hostkey']
+
+        self.vm.set_machine('malta')
+        self.vm.set_console()
+        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE
+                               + 'console=ttyS0 root=/dev/sda1')
+        self.vm.add_args('-no-reboot',
+                         '-kernel', kernel_path,
+                         '-append', kernel_command_line,
+                         '-hda', image_path,
+                         '-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0-:22',
+                         '-device', 'pcnet,netdev=vnet')
+        self.vm.launch()
+
+        self.log.info('VM launched, waiting for sshd')
+        console_pattern = 'Starting OpenBSD Secure Shell server: sshd'
+        self.wait_for_console_pattern(console_pattern)
+        self.log.info('sshd ready')
+
+        self.ssh_connect('root', 'root', rsa_hostkey_b64=rsa_hostkey_b64)
+
+    def shutdown_via_ssh(self):
+        self.ssh_command('poweroff')
+        self.ssh_disconnect_vm()
+        self.wait_for_console_pattern('Power down')
+
+    def run_common_commands(self):
+        stdout, stderr = self.ssh_command('lspci -d 11ab:4620')
+        self.assertIn(True, ["GT-64120" in line for line in stdout])
+
+        stdout, stderr = self.ssh_command('cat /sys/bus/i2c/devices/i2c-0/name')
+        self.assertIn(True, ["SMBus PIIX4 adapter" in line
+                             for line in stdout])
+
+        stdout, stderr = self.ssh_command('cat /proc/mtd')
+        self.assertIn(True, ["YAMON" in line
+                             for line in stdout])
+
+        # Empty 'Board Config'
+        stdout, stderr = self.ssh_command('md5sum /dev/mtd2ro')
+        self.assertIn(True, ["0dfbe8aa4c20b52e1b8bf3cb6cbdf193" in line
+                             for line in stdout])
+
+    def do_test_mips_malta(self, endianess, kernel_path, uname_m):
+        self.boot_debian_wheezy_image_and_ssh_login(endianess, kernel_path)
+
+        stdout, stderr = self.ssh_command('uname -a')
+        self.assertIn(True, [uname_m + " GNU/Linux" in line for line in stdout])
+
+        self.run_common_commands()
+        self.shutdown_via_ssh()
+
+    @skipIf(os.getenv('CONTINUOUS_INTEGRATION'), 'Running on Travis-CI')
+    def test_mips_malta32eb_kernel3_2_0(self):
+        """
+        :avocado: tags=arch:mips
+        :avocado: tags=machine:malta
+        :avocado: tags=endian:big
+        :avocado: tags=device:pcnet32
+        """
+        kernel_url = ('https://people.debian.org/~aurel32/qemu/mips/'
+                      'vmlinux-3.2.0-4-4kc-malta')
+        kernel_hash = '592e384a4edc16dade52a6cd5c785c637bcbc9ad'
+        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+
+        self.do_test_mips_malta('be', kernel_path, 'mips')
+
+    @skipIf(os.getenv('CONTINUOUS_INTEGRATION'), 'Running on Travis-CI')
+    def test_mips_malta32el_kernel3_2_0(self):
+        """
+        :avocado: tags=arch:mipsel
+        :avocado: tags=machine:malta
+        :avocado: tags=endian:little
+        :avocado: tags=device:pcnet32
+        """
+        kernel_url = ('https://people.debian.org/~aurel32/qemu/mipsel/'
+                      'vmlinux-3.2.0-4-4kc-malta')
+        kernel_hash = 'a66bea5a8adaa2cb3d36a1d4e0ccdb01be8f6c2a'
+        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+
+        self.do_test_mips_malta('le', kernel_path, 'mips')
+
+    @skipIf(os.getenv('CONTINUOUS_INTEGRATION'), 'Running on Travis-CI')
+    def test_mips_malta64eb_kernel3_2_0(self):
+        """
+        :avocado: tags=arch:mips64
+        :avocado: tags=machine:malta
+        :avocado: tags=endian:big
+        :avocado: tags=device:pcnet32
+        """
+        kernel_url = ('https://people.debian.org/~aurel32/qemu/mips/'
+                      'vmlinux-3.2.0-4-5kc-malta')
+        kernel_hash = 'db6eea7de35d36c77d8c165b6bcb222e16eb91db'
+        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+        self.do_test_mips_malta('be', kernel_path, 'mips64')
+
+    @skipIf(os.getenv('CONTINUOUS_INTEGRATION'), 'Running on Travis-CI')
+    def test_mips_malta64el_kernel3_2_0(self):
+        """
+        :avocado: tags=arch:mips64el
+        :avocado: tags=machine:malta
+        :avocado: tags=endian:little
+        :avocado: tags=device:pcnet32
+        """
+        kernel_url = ('https://people.debian.org/~aurel32/qemu/mipsel/'
+                      'vmlinux-3.2.0-4-5kc-malta')
+        kernel_hash = '6a7f77245acf231415a0e8b725d91ed2f3487794'
+        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+        self.do_test_mips_malta('le', kernel_path, 'mips64')
diff --git a/tests/bios-tables-test-allowed-diff.h b/tests/bios-tables-test-allowed-diff.h
new file mode 100644
index 0000000000..dfb8523c8b
--- /dev/null
+++ b/tests/bios-tables-test-allowed-diff.h
@@ -0,0 +1 @@
+/* List of comma-separated changed AML files to ignore */
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 11e07be093..0ce55182f2 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -342,13 +342,29 @@ try_again:
     return exp_tables;
 }
 
+static bool test_acpi_find_diff_allowed(AcpiSdtTable *sdt)
+{
+    const gchar *allowed_diff_file[] = {
+#include "bios-tables-test-allowed-diff.h"
+        NULL
+    };
+    const gchar **f;
+
+    for (f = allowed_diff_file; *f; ++f) {
+        if (!g_strcmp0(sdt->aml_file, *f)) {
+            return true;
+        }
+    }
+    return false;
+}
+
 /* test the list of tables in @data->tables against reference tables */
 static void test_acpi_asl(test_data *data)
 {
     int i;
     AcpiSdtTable *sdt, *exp_sdt;
     test_data exp_data;
-    gboolean exp_err, err;
+    gboolean exp_err, err, all_tables_match = true;
 
     memset(&exp_data, 0, sizeof(exp_data));
     exp_data.tables = load_expected_aml(data);
@@ -359,6 +375,20 @@ static void test_acpi_asl(test_data *data)
         sdt = &g_array_index(data->tables, AcpiSdtTable, i);
         exp_sdt = &g_array_index(exp_data.tables, AcpiSdtTable, i);
 
+        if (sdt->aml_len == exp_sdt->aml_len &&
+            !memcmp(sdt->aml, exp_sdt->aml, sdt->aml_len)) {
+            /* Identical table binaries: no need to disassemble. */
+            continue;
+        }
+
+        fprintf(stderr,
+                "acpi-test: Warning! %.4s binary file mismatch. "
+                "Actual [aml:%s], Expected [aml:%s].\n",
+                exp_sdt->aml, sdt->aml_file, exp_sdt->aml_file);
+
+        all_tables_match = all_tables_match &&
+            test_acpi_find_diff_allowed(exp_sdt);
+
         err = load_asl(data->tables, sdt);
         asl = normalize_asl(sdt->asl);
 
@@ -396,11 +426,12 @@ static void test_acpi_asl(test_data *data)
                             "see ASL difference.");
                     }
                 }
-          }
+            }
         }
         g_string_free(asl, true);
         g_string_free(exp_asl, true);
     }
+    g_assert(all_tables_match);
 
     free_test_data(&exp_data);
 }
@@ -813,6 +844,22 @@ static void test_acpi_piix4_tcg_dimm_pxm(void)
     test_acpi_tcg_dimm_pxm(MACHINE_PC);
 }
 
+static void test_acpi_virt_tcg(void)
+{
+    test_data data = {
+        .machine = "virt",
+        .accel = "tcg",
+        .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd",
+        .uefi_fl2 = "pc-bios/edk2-arm-vars.fd",
+        .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2",
+        .ram_start = 0x40000000ULL,
+        .scan_len = 128ULL * 1024 * 1024,
+    };
+
+    test_acpi_one("-cpu cortex-a57", &data);
+    free_test_data(&data);
+}
+
 int main(int argc, char *argv[])
 {
     const char *arch = qtest_get_arch();
@@ -841,6 +888,8 @@ int main(int argc, char *argv[])
         qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem);
         qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm);
         qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm);
+    } else if (strcmp(arch, "aarch64") == 0) {
+        qtest_add_func("acpi/virt", test_acpi_virt_tcg);
     }
     ret = g_test_run();
     boot_sector_cleanup(disk);
diff --git a/tests/data/acpi/rebuild-expected-aml.sh b/tests/data/acpi/rebuild-expected-aml.sh
index ff7e62249d..d2853218dd 100755
--- a/tests/data/acpi/rebuild-expected-aml.sh
+++ b/tests/data/acpi/rebuild-expected-aml.sh
@@ -12,7 +12,7 @@
 # This work is licensed under the terms of the GNU GPLv2.
 # See the COPYING.LIB file in the top-level directory.
 
-qemu_bins="x86_64-softmmu/qemu-system-x86_64"
+qemu_bins="x86_64-softmmu/qemu-system-x86_64 aarch64-softmmu/qemu-system-aarch64"
 
 if [ ! -e "tests/bios-tables-test" ]; then
     echo "Test: bios-tables-test is required! Run make check before this script."
diff --git a/tests/data/acpi/virt/APIC b/tests/data/acpi/virt/APIC
new file mode 100644
index 0000000000..797dfde284
--- /dev/null
+++ b/tests/data/acpi/virt/APIC
Binary files differdiff --git a/tests/data/acpi/virt/DSDT b/tests/data/acpi/virt/DSDT
new file mode 100644
index 0000000000..20e85c7f89
--- /dev/null
+++ b/tests/data/acpi/virt/DSDT
Binary files differdiff --git a/tests/data/acpi/virt/FACP b/tests/data/acpi/virt/FACP
new file mode 100644
index 0000000000..27de99f51b
--- /dev/null
+++ b/tests/data/acpi/virt/FACP
Binary files differdiff --git a/tests/data/acpi/virt/GTDT b/tests/data/acpi/virt/GTDT
new file mode 100644
index 0000000000..10107a65e9
--- /dev/null
+++ b/tests/data/acpi/virt/GTDT
Binary files differdiff --git a/tests/data/acpi/virt/MCFG b/tests/data/acpi/virt/MCFG
new file mode 100644
index 0000000000..e8987e1af0
--- /dev/null
+++ b/tests/data/acpi/virt/MCFG
Binary files differdiff --git a/tests/data/acpi/virt/SPCR b/tests/data/acpi/virt/SPCR
new file mode 100644
index 0000000000..377271a0e7
--- /dev/null
+++ b/tests/data/acpi/virt/SPCR
Binary files differdiff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index c0e1bf57a3..aaf5396b85 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -107,6 +107,7 @@ docker-image-debian-sparc64-cross: docker-image-debian-sid
 docker-image-debian-mips64-cross: docker-image-debian-sid
 docker-image-debian-riscv64-cross: docker-image-debian-sid
 docker-image-debian-powerpc-cross: docker-image-debian-sid
+docker-image-debian-ppc64-cross: docker-image-debian-sid
 docker-image-travis: NOUSER=1
 
 # Specialist build images, sometimes very limited tools
diff --git a/tests/docker/dockerfiles/debian-ppc64-cross.docker b/tests/docker/dockerfiles/debian-ppc64-cross.docker
new file mode 100644
index 0000000000..7f239c322d
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-ppc64-cross.docker
@@ -0,0 +1,11 @@
+#
+# Docker ppc64 cross-compiler target
+#
+# This docker target builds on the debian sid base image which
+# contains cross compilers for Debian "ports" targets.
+FROM qemu:debian-sid
+
+RUN DEBIAN_FRONTEND=noninteractive eatmydata \
+    apt-get install -y --no-install-recommends \
+       gcc-powerpc64-linux-gnu \
+       libc6-dev-ppc64-cross || { echo "Failed to build - see debian-sid.docker notes"; exit 1; }
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index 69d4a7f5d7..afbba29ada 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -8,6 +8,7 @@ ENV PACKAGES \
     bzip2-devel \
     ccache \
     clang \
+    cyrus-sasl-devel \
     device-mapper-multipath-devel \
     findutils \
     flex \
@@ -23,13 +24,17 @@ ENV PACKAGES \
     libaio-devel \
     libasan \
     libattr-devel \
+    libblockdev-mpath-devel \
     libcap-devel \
     libcap-ng-devel \
     libcurl-devel \
     libfdt-devel \
+    libiscsi-devel \
     libjpeg-devel \
+    libpmem-devel \
     libpng-devel \
     librbd-devel \
+    libseccomp-devel \
     libssh2-devel \
     libubsan \
     libusbx-devel \
@@ -74,10 +79,12 @@ ENV PACKAGES \
     pixman-devel \
     python3 \
     PyYAML \
+    rdma-core-devel \
     SDL2-devel \
     snappy-devel \
     sparse \
     spice-server-devel \
+    systemd-devel \
     systemtap-sdt-devel \
     tar \
     usbredir-devel \
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
new file mode 100644
index 0000000000..2e2900150b
--- /dev/null
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
@@ -0,0 +1,57 @@
+FROM ubuntu:18.04
+ENV PACKAGES flex bison \
+    ccache \
+    clang \
+    gcc \
+    gettext \
+    git \
+    glusterfs-common \
+    libaio-dev \
+    libattr1-dev \
+    libbluetooth-dev \
+    libbrlapi-dev \
+    libbz2-dev \
+    libcacard-dev \
+    libcap-dev \
+    libcap-ng-dev \
+    libcurl4-gnutls-dev \
+    libdrm-dev \
+    libepoxy-dev \
+    libfdt-dev \
+    libgbm-dev \
+    libgtk-3-dev \
+    libibverbs-dev \
+    libiscsi-dev \
+    libjemalloc-dev \
+    libjpeg-turbo8-dev \
+    liblzo2-dev \
+    libncurses5-dev \
+    libncursesw5-dev \
+    libnfs-dev \
+    libnss3-dev \
+    libnuma-dev \
+    libpixman-1-dev \
+    librados-dev \
+    librbd-dev \
+    librdmacm-dev \
+    libsasl2-dev \
+    libsdl2-dev \
+    libseccomp-dev \
+    libsnappy-dev \
+    libspice-protocol-dev \
+    libspice-server-dev \
+    libssh2-1-dev \
+    libusb-1.0-0-dev \
+    libusbredirhost-dev \
+    libvdeplug-dev \
+    libvte-2.91-dev \
+    libxen-dev \
+    make \
+    python-yaml \
+    sparse \
+    texinfo \
+    xfslibs-dev
+RUN apt-get update && \
+    apt-get -y install $PACKAGES
+RUN dpkg -l $PACKAGES | sort > /packages.txt
+ENV FEATURES clang pyyaml sdl2
diff --git a/tests/ds1338-test.c b/tests/ds1338-test.c
index 742dad9113..f6ade9a050 100644
--- a/tests/ds1338-test.c
+++ b/tests/ds1338-test.c
@@ -21,31 +21,22 @@
 #include "libqtest.h"
 #include "libqos/i2c.h"
 
-#define IMX25_I2C_0_BASE 0x43F80000
-
 #define DS1338_ADDR 0x68
 
-static I2CAdapter *i2c;
-static uint8_t addr;
-
 static inline uint8_t bcd2bin(uint8_t x)
 {
     return ((x) & 0x0f) + ((x) >> 4) * 10;
 }
 
-static void send_and_receive(void)
+static void send_and_receive(void *obj, void *data, QGuestAllocator *alloc)
 {
-    uint8_t cmd[1];
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+
     uint8_t resp[7];
     time_t now = time(NULL);
     struct tm *tm_ptr = gmtime(&now);
 
-    /* reset the index in the RTC memory */
-    cmd[0] = 0;
-    i2c_send(i2c, addr, cmd, 1);
-
-    /* retrieve the date */
-    i2c_recv(i2c, addr, resp, 7);
+    i2c_read_block(i2cdev, 0, resp, sizeof(resp));
 
     /* check retrieved time againt local time */
     g_assert_cmpuint(bcd2bin(resp[4]), == , tm_ptr->tm_mday);
@@ -53,23 +44,15 @@ static void send_and_receive(void)
     g_assert_cmpuint(2000 + bcd2bin(resp[6]), == , 1900 + tm_ptr->tm_year);
 }
 
-int main(int argc, char **argv)
+static void ds1338_register_nodes(void)
 {
-    QTestState *s = NULL;
-    int ret;
-
-    g_test_init(&argc, &argv, NULL);
-
-    s = qtest_start("-display none -machine imx25-pdk");
-    i2c = imx_i2c_create(s, IMX25_I2C_0_BASE);
-    addr = DS1338_ADDR;
-
-    qtest_add_func("/ds1338/tx-rx", send_and_receive);
-
-    ret = g_test_run();
-
-    qtest_quit(s);
-    g_free(i2c);
-
-    return ret;
+    QOSGraphEdgeOptions opts = {
+        .extra_device_opts = "address=0x68"
+    };
+    add_qi2c_address(&opts, &(QI2CAddress) { DS1338_ADDR });
+
+    qos_node_create_driver("ds1338", i2c_device_create);
+    qos_node_consumes("ds1338", "i2c-bus", &opts);
+    qos_add_test("tx-rx", "ds1338", send_and_receive, NULL);
 }
+libqos_init(ds1338_register_nodes);
diff --git a/tests/libqos/arm-imx25-pdk-machine.c b/tests/libqos/arm-imx25-pdk-machine.c
new file mode 100644
index 0000000000..25066fb8a9
--- /dev/null
+++ b/tests/libqos/arm-imx25-pdk-machine.c
@@ -0,0 +1,92 @@
+/*
+ * libqos driver framework
+ *
+ * Copyright (c) 2019 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "libqos/malloc.h"
+#include "libqos/qgraph.h"
+#include "libqos/i2c.h"
+
+#define ARM_PAGE_SIZE            4096
+#define IMX25_PDK_RAM_START      0x80000000
+#define IMX25_PDK_RAM_END        0x88000000
+
+typedef struct QIMX25PDKMachine QIMX25PDKMachine;
+
+struct QIMX25PDKMachine {
+    QOSGraphObject obj;
+    QGuestAllocator alloc;
+    IMXI2C i2c_1;
+};
+
+static void *imx25_pdk_get_driver(void *object, const char *interface)
+{
+    QIMX25PDKMachine *machine = object;
+    if (!g_strcmp0(interface, "memory")) {
+        return &machine->alloc;
+    }
+
+    fprintf(stderr, "%s not present in arm/imx25_pdk\n", interface);
+    g_assert_not_reached();
+}
+
+static QOSGraphObject *imx25_pdk_get_device(void *obj, const char *device)
+{
+    QIMX25PDKMachine *machine = obj;
+    if (!g_strcmp0(device, "imx.i2c")) {
+        return &machine->i2c_1.obj;
+    }
+
+    fprintf(stderr, "%s not present in arm/imx25_pdk\n", device);
+    g_assert_not_reached();
+}
+
+static void imx25_pdk_destructor(QOSGraphObject *obj)
+{
+    QIMX25PDKMachine *machine = (QIMX25PDKMachine *) obj;
+    alloc_destroy(&machine->alloc);
+}
+
+static void *qos_create_machine_arm_imx25_pdk(QTestState *qts)
+{
+    QIMX25PDKMachine *machine = g_new0(QIMX25PDKMachine, 1);
+
+    alloc_init(&machine->alloc, 0,
+               IMX25_PDK_RAM_START,
+               IMX25_PDK_RAM_END,
+               ARM_PAGE_SIZE);
+    machine->obj.get_device = imx25_pdk_get_device;
+    machine->obj.get_driver = imx25_pdk_get_driver;
+    machine->obj.destructor = imx25_pdk_destructor;
+
+    imx_i2c_init(&machine->i2c_1, qts, 0x43f80000);
+    return &machine->obj;
+}
+
+static void imx25_pdk_register_nodes(void)
+{
+    QOSGraphEdgeOptions edge = {
+        .extra_device_opts = "bus=i2c-bus.0"
+    };
+    qos_node_create_machine("arm/imx25-pdk", qos_create_machine_arm_imx25_pdk);
+    qos_node_contains("arm/imx25-pdk", "imx.i2c", &edge, NULL);
+}
+
+libqos_init(imx25_pdk_register_nodes);
diff --git a/tests/libqos/arm-n800-machine.c b/tests/libqos/arm-n800-machine.c
new file mode 100644
index 0000000000..87279bdb26
--- /dev/null
+++ b/tests/libqos/arm-n800-machine.c
@@ -0,0 +1,92 @@
+/*
+ * libqos driver framework
+ *
+ * Copyright (c) 2019 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "libqos/malloc.h"
+#include "libqos/qgraph.h"
+#include "libqos/i2c.h"
+
+#define ARM_PAGE_SIZE            4096
+#define N800_RAM_START      0x80000000
+#define N800_RAM_END        0x88000000
+
+typedef struct QN800Machine QN800Machine;
+
+struct QN800Machine {
+    QOSGraphObject obj;
+    QGuestAllocator alloc;
+    OMAPI2C i2c_1;
+};
+
+static void *n800_get_driver(void *object, const char *interface)
+{
+    QN800Machine *machine = object;
+    if (!g_strcmp0(interface, "memory")) {
+        return &machine->alloc;
+    }
+
+    fprintf(stderr, "%s not present in arm/n800\n", interface);
+    g_assert_not_reached();
+}
+
+static QOSGraphObject *n800_get_device(void *obj, const char *device)
+{
+    QN800Machine *machine = obj;
+    if (!g_strcmp0(device, "omap_i2c")) {
+        return &machine->i2c_1.obj;
+    }
+
+    fprintf(stderr, "%s not present in arm/n800\n", device);
+    g_assert_not_reached();
+}
+
+static void n800_destructor(QOSGraphObject *obj)
+{
+    QN800Machine *machine = (QN800Machine *) obj;
+    alloc_destroy(&machine->alloc);
+}
+
+static void *qos_create_machine_arm_n800(QTestState *qts)
+{
+    QN800Machine *machine = g_new0(QN800Machine, 1);
+
+    alloc_init(&machine->alloc, 0,
+               N800_RAM_START,
+               N800_RAM_END,
+               ARM_PAGE_SIZE);
+    machine->obj.get_device = n800_get_device;
+    machine->obj.get_driver = n800_get_driver;
+    machine->obj.destructor = n800_destructor;
+
+    omap_i2c_init(&machine->i2c_1, qts, 0x48070000);
+    return &machine->obj;
+}
+
+static void n800_register_nodes(void)
+{
+    QOSGraphEdgeOptions edge = {
+        .extra_device_opts = "bus=i2c-bus.0"
+    };
+    qos_node_create_machine("arm/n800", qos_create_machine_arm_n800);
+    qos_node_contains("arm/n800", "omap_i2c", &edge, NULL);
+}
+
+libqos_init(n800_register_nodes);
diff --git a/tests/libqos/i2c-imx.c b/tests/libqos/i2c-imx.c
index 0945f2ecdc..f33ece55a3 100644
--- a/tests/libqos/i2c-imx.c
+++ b/tests/libqos/i2c-imx.c
@@ -30,13 +30,6 @@ enum IMXI2CDirection {
     IMX_I2C_WRITE,
 };
 
-typedef struct IMXI2C {
-    I2CAdapter parent;
-
-    uint64_t addr;
-} IMXI2C;
-
-
 static void imx_i2c_set_slave_addr(IMXI2C *s, uint8_t addr,
                                    enum IMXI2CDirection direction)
 {
@@ -47,7 +40,7 @@ static void imx_i2c_set_slave_addr(IMXI2C *s, uint8_t addr,
 static void imx_i2c_send(I2CAdapter *i2c, uint8_t addr,
                          const uint8_t *buf, uint16_t len)
 {
-    IMXI2C *s = (IMXI2C *)i2c;
+    IMXI2C *s = container_of(i2c, IMXI2C, parent);
     uint8_t data;
     uint8_t status;
     uint16_t size = 0;
@@ -107,7 +100,7 @@ static void imx_i2c_send(I2CAdapter *i2c, uint8_t addr,
 static void imx_i2c_recv(I2CAdapter *i2c, uint8_t addr,
                          uint8_t *buf, uint16_t len)
 {
-    IMXI2C *s = (IMXI2C *)i2c;
+    IMXI2C *s = container_of(i2c, IMXI2C, parent);
     uint8_t data;
     uint8_t status;
     uint16_t size = 0;
@@ -193,16 +186,31 @@ static void imx_i2c_recv(I2CAdapter *i2c, uint8_t addr,
     g_assert((status & I2SR_IBB) == 0);
 }
 
-I2CAdapter *imx_i2c_create(QTestState *qts, uint64_t addr)
+static void *imx_i2c_get_driver(void *obj, const char *interface)
 {
-    IMXI2C *s = g_malloc0(sizeof(*s));
-    I2CAdapter *i2c = (I2CAdapter *)s;
+    IMXI2C *s = obj;
+    if (!g_strcmp0(interface, "i2c-bus")) {
+        return &s->parent;
+    }
+    fprintf(stderr, "%s not present in imx-i2c\n", interface);
+    g_assert_not_reached();
+}
 
+void imx_i2c_init(IMXI2C *s, QTestState *qts, uint64_t addr)
+{
     s->addr = addr;
 
-    i2c->send = imx_i2c_send;
-    i2c->recv = imx_i2c_recv;
-    i2c->qts = qts;
+    s->obj.get_driver = imx_i2c_get_driver;
 
-    return i2c;
+    s->parent.send = imx_i2c_send;
+    s->parent.recv = imx_i2c_recv;
+    s->parent.qts = qts;
 }
+
+static void imx_i2c_register_nodes(void)
+{
+    qos_node_create_driver("imx.i2c", NULL);
+    qos_node_produces("imx.i2c", "i2c-bus");
+}
+
+libqos_init(imx_i2c_register_nodes);
diff --git a/tests/libqos/i2c-omap.c b/tests/libqos/i2c-omap.c
index 1ef6e7b200..9ae8214fa8 100644
--- a/tests/libqos/i2c-omap.c
+++ b/tests/libqos/i2c-omap.c
@@ -40,12 +40,6 @@ enum OMAPI2CCONBits {
     OMAP_I2C_CON_I2C_EN = 1 << 15,
 };
 
-typedef struct OMAPI2C {
-    I2CAdapter parent;
-
-    uint64_t addr;
-} OMAPI2C;
-
 
 static void omap_i2c_set_slave_addr(OMAPI2C *s, uint8_t addr)
 {
@@ -59,7 +53,7 @@ static void omap_i2c_set_slave_addr(OMAPI2C *s, uint8_t addr)
 static void omap_i2c_send(I2CAdapter *i2c, uint8_t addr,
                           const uint8_t *buf, uint16_t len)
 {
-    OMAPI2C *s = (OMAPI2C *)i2c;
+    OMAPI2C *s = container_of(i2c, OMAPI2C, parent);
     uint16_t data;
 
     omap_i2c_set_slave_addr(s, addr);
@@ -103,8 +97,9 @@ static void omap_i2c_send(I2CAdapter *i2c, uint8_t addr,
 static void omap_i2c_recv(I2CAdapter *i2c, uint8_t addr,
                           uint8_t *buf, uint16_t len)
 {
-    OMAPI2C *s = (OMAPI2C *)i2c;
+    OMAPI2C *s = container_of(i2c, OMAPI2C, parent);
     uint16_t data, stat;
+    uint16_t orig_len = len;
 
     omap_i2c_set_slave_addr(s, addr);
 
@@ -116,16 +111,24 @@ static void omap_i2c_recv(I2CAdapter *i2c, uint8_t addr,
            OMAP_I2C_CON_STT |
            OMAP_I2C_CON_STP;
     qtest_writew(i2c->qts, s->addr + OMAP_I2C_CON, data);
-    data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_CON);
-    g_assert((data & OMAP_I2C_CON_STP) == 0);
 
     data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_STAT);
     g_assert((data & OMAP_I2C_STAT_NACK) == 0);
 
-    data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_CNT);
-    g_assert_cmpuint(data, ==, len);
-
     while (len > 0) {
+        data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_CON);
+        if (len <= 4) {
+            g_assert((data & OMAP_I2C_CON_STP) == 0);
+
+            data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_CNT);
+            g_assert_cmpuint(data, ==, orig_len);
+        } else {
+            g_assert((data & OMAP_I2C_CON_STP) != 0);
+
+            data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_CNT);
+            g_assert_cmpuint(data, ==, len - 4);
+        }
+
         data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_STAT);
         g_assert((data & OMAP_I2C_STAT_RRDY) != 0);
         g_assert((data & OMAP_I2C_STAT_ROVR) == 0);
@@ -152,21 +155,42 @@ static void omap_i2c_recv(I2CAdapter *i2c, uint8_t addr,
     g_assert((data & OMAP_I2C_CON_STP) == 0);
 }
 
-I2CAdapter *omap_i2c_create(QTestState *qts, uint64_t addr)
+static void *omap_i2c_get_driver(void *obj, const char *interface)
+{
+    OMAPI2C *s = obj;
+    if (!g_strcmp0(interface, "i2c-bus")) {
+        return &s->parent;
+    }
+    fprintf(stderr, "%s not present in omap_i2c\n", interface);
+    g_assert_not_reached();
+}
+
+static void omap_i2c_start_hw(QOSGraphObject *object)
 {
-    OMAPI2C *s = g_malloc0(sizeof(*s));
-    I2CAdapter *i2c = (I2CAdapter *)s;
+    OMAPI2C *s = (OMAPI2C *) object;
     uint16_t data;
 
+    /* verify the mmio address by looking for a known signature */
+    data = qtest_readw(s->parent.qts, s->addr + OMAP_I2C_REV);
+    g_assert_cmphex(data, ==, 0x34);
+}
+
+void omap_i2c_init(OMAPI2C *s, QTestState *qts, uint64_t addr)
+{
     s->addr = addr;
 
-    i2c->send = omap_i2c_send;
-    i2c->recv = omap_i2c_recv;
-    i2c->qts = qts;
+    s->obj.get_driver = omap_i2c_get_driver;
+    s->obj.start_hw = omap_i2c_start_hw;
 
-    /* verify the mmio address by looking for a known signature */
-    data = qtest_readw(qts, addr + OMAP_I2C_REV);
-    g_assert_cmphex(data, ==, 0x34);
+    s->parent.send = omap_i2c_send;
+    s->parent.recv = omap_i2c_recv;
+    s->parent.qts = qts;
+}
 
-    return i2c;
+static void omap_i2c_register_nodes(void)
+{
+    qos_node_create_driver("omap_i2c", NULL);
+    qos_node_produces("omap_i2c", "i2c-bus");
 }
+
+libqos_init(omap_i2c_register_nodes);
diff --git a/tests/libqos/i2c.c b/tests/libqos/i2c.c
index 23bc2a3eb2..156114e745 100644
--- a/tests/libqos/i2c.c
+++ b/tests/libqos/i2c.c
@@ -10,14 +10,76 @@
 #include "libqos/i2c.h"
 #include "libqtest.h"
 
-void i2c_send(I2CAdapter *i2c, uint8_t addr,
-              const uint8_t *buf, uint16_t len)
+void i2c_send(QI2CDevice *i2cdev, const uint8_t *buf, uint16_t len)
 {
-    i2c->send(i2c, addr, buf, len);
+    i2cdev->bus->send(i2cdev->bus, i2cdev->addr, buf, len);
 }
 
-void i2c_recv(I2CAdapter *i2c, uint8_t addr,
-              uint8_t *buf, uint16_t len)
+void i2c_recv(QI2CDevice *i2cdev, uint8_t *buf, uint16_t len)
 {
-    i2c->recv(i2c, addr, buf, len);
+    i2cdev->bus->recv(i2cdev->bus, i2cdev->addr, buf, len);
+}
+
+void i2c_read_block(QI2CDevice *i2cdev, uint8_t reg,
+                    uint8_t *buf, uint16_t len)
+{
+    i2c_send(i2cdev, &reg, 1);
+    i2c_recv(i2cdev, buf, len);
+}
+
+void i2c_write_block(QI2CDevice *i2cdev, uint8_t reg,
+                     const uint8_t *buf, uint16_t len)
+{
+    uint8_t *cmd = g_malloc(len + 1);
+    cmd[0] = reg;
+    memcpy(&cmd[1], buf, len);
+    i2c_send(i2cdev, cmd, len + 1);
+    g_free(cmd);
+}
+
+uint8_t i2c_get8(QI2CDevice *i2cdev, uint8_t reg)
+{
+    uint8_t resp[1];
+    i2c_read_block(i2cdev, reg, resp, sizeof(resp));
+    return resp[0];
+}
+
+uint16_t i2c_get16(QI2CDevice *i2cdev, uint8_t reg)
+{
+    uint8_t resp[2];
+    i2c_read_block(i2cdev, reg, resp, sizeof(resp));
+    return (resp[0] << 8) | resp[1];
+}
+
+void i2c_set8(QI2CDevice *i2cdev, uint8_t reg, uint8_t value)
+{
+    i2c_write_block(i2cdev, reg, &value, 1);
+}
+
+void i2c_set16(QI2CDevice *i2cdev, uint8_t reg, uint16_t value)
+{
+    uint8_t data[2];
+
+    data[0] = value >> 8;
+    data[1] = value & 255;
+    i2c_write_block(i2cdev, reg, data, sizeof(data));
+}
+
+void *i2c_device_create(void *i2c_bus, QGuestAllocator *alloc, void *addr)
+{
+    QI2CDevice *i2cdev = g_new0(QI2CDevice, 1);
+
+    i2cdev->bus = i2c_bus;
+    if (addr) {
+        i2cdev->addr = ((QI2CAddress *)addr)->addr;
+    }
+    return &i2cdev->obj;
+}
+
+void add_qi2c_address(QOSGraphEdgeOptions *opts, QI2CAddress *addr)
+{
+    g_assert(addr);
+
+    opts->arg = addr;
+    opts->size_arg = sizeof(QI2CAddress);
 }
diff --git a/tests/libqos/i2c.h b/tests/libqos/i2c.h
index cc01358a9f..945b65b34c 100644
--- a/tests/libqos/i2c.h
+++ b/tests/libqos/i2c.h
@@ -10,6 +10,7 @@
 #define LIBQOS_I2C_H
 
 #include "libqtest.h"
+#include "libqos/qgraph.h"
 
 typedef struct I2CAdapter I2CAdapter;
 struct I2CAdapter {
@@ -21,17 +22,61 @@ struct I2CAdapter {
     QTestState *qts;
 };
 
-#define OMAP2_I2C_1_BASE 0x48070000
+typedef struct QI2CAddress QI2CAddress;
+struct QI2CAddress {
+    uint8_t addr;
+};
+
+typedef struct QI2CDevice QI2CDevice;
+struct QI2CDevice {
+    /*
+     * For now, all devices are simple enough that there is no need for
+     * them to define their own constructor and get_driver functions.
+     * Therefore, QOSGraphObject is included directly in QI2CDevice;
+     * the tests expect to get a QI2CDevice rather than doing something
+     * like obj->get_driver("i2c-device").
+     *
+     * In fact there is no i2c-device interface even, because there are
+     * no generic I2C tests).
+     */
+    QOSGraphObject obj;
+    I2CAdapter *bus;
+    uint8_t addr;
+};
+
+void *i2c_device_create(void *i2c_bus, QGuestAllocator *alloc, void *addr);
+void add_qi2c_address(QOSGraphEdgeOptions *opts, QI2CAddress *addr);
+
+void i2c_send(QI2CDevice *dev, const uint8_t *buf, uint16_t len);
+void i2c_recv(QI2CDevice *dev, uint8_t *buf, uint16_t len);
+
+void i2c_read_block(QI2CDevice *dev, uint8_t reg,
+                    uint8_t *buf, uint16_t len);
+void i2c_write_block(QI2CDevice *dev, uint8_t reg,
+                     const uint8_t *buf, uint16_t len);
+uint8_t i2c_get8(QI2CDevice *dev, uint8_t reg);
+uint16_t i2c_get16(QI2CDevice *dev, uint8_t reg);
+void i2c_set8(QI2CDevice *dev, uint8_t reg, uint8_t value);
+void i2c_set16(QI2CDevice *dev, uint8_t reg, uint16_t value);
+
+/* i2c-omap.c */
+typedef struct OMAPI2C {
+    QOSGraphObject obj;
+    I2CAdapter parent;
+
+    uint64_t addr;
+} OMAPI2C;
+
+void omap_i2c_init(OMAPI2C *s, QTestState *qts, uint64_t addr);
 
-void i2c_send(I2CAdapter *i2c, uint8_t addr,
-              const uint8_t *buf, uint16_t len);
-void i2c_recv(I2CAdapter *i2c, uint8_t addr,
-              uint8_t *buf, uint16_t len);
+/* i2c-imx.c */
+typedef struct IMXI2C {
+    QOSGraphObject obj;
+    I2CAdapter parent;
 
-/* libi2c-omap.c */
-I2CAdapter *omap_i2c_create(QTestState *qts, uint64_t addr);
+    uint64_t addr;
+} IMXI2C;
 
-/* libi2c-imx.c */
-I2CAdapter *imx_i2c_create(QTestState *qts, uint64_t addr);
+void imx_i2c_init(IMXI2C *s, QTestState *qts, uint64_t addr);
 
 #endif
diff --git a/tests/libqos/qgraph.c b/tests/libqos/qgraph.c
index b149caaaa9..7a7ae2a19e 100644
--- a/tests/libqos/qgraph.c
+++ b/tests/libqos/qgraph.c
@@ -632,15 +632,19 @@ void qos_node_create_driver(const char *name, QOSCreateDriverFunc function)
 }
 
 void qos_node_contains(const char *container, const char *contained,
-                       ...)
+                       QOSGraphEdgeOptions *opts, ...)
 {
     va_list va;
-    va_start(va, contained);
-    QOSGraphEdgeOptions *opts;
 
+    if (opts == NULL) {
+        add_edge(container, contained, QEDGE_CONTAINS, NULL);
+        return;
+    }
+
+    va_start(va, opts);
     do {
-        opts = va_arg(va, QOSGraphEdgeOptions *);
         add_edge(container, contained, QEDGE_CONTAINS, opts);
+        opts = va_arg(va, QOSGraphEdgeOptions *);
     } while (opts != NULL);
 
     va_end(va);
diff --git a/tests/libqos/qgraph.h b/tests/libqos/qgraph.h
index e799095b30..3a25dda4b2 100644
--- a/tests/libqos/qgraph.h
+++ b/tests/libqos/qgraph.h
@@ -453,14 +453,16 @@ void qos_node_create_machine_args(const char *name,
 void qos_node_create_driver(const char *name, QOSCreateDriverFunc function);
 
 /**
- * qos_node_contains(): creates an edge of type QEDGE_CONTAINS and
- * adds it to the edge list mapped to @container in the
+ * qos_node_contains(): creates one or more edges of type QEDGE_CONTAINS
+ * and adds them to the edge list mapped to @container in the
  * edge hash table.
  *
- * This edge will have @container as source and @contained as destination.
+ * The edges will have @container as source and @contained as destination.
  *
- * It also has the possibility to add optional NULL-terminated
- * @opts parameters (see %QOSGraphEdgeOptions)
+ * If @opts is NULL, a single edge will be added with no options.
+ * If @opts is non-NULL, the arguments after @contained represent a
+ * NULL-terminated list of %QOSGraphEdgeOptions structs, and an
+ * edge will be added for each of them.
  *
  * This function can be useful when there are multiple devices
  * with the same node name contained in a machine/other node
@@ -480,7 +482,8 @@ void qos_node_create_driver(const char *name, QOSCreateDriverFunc function);
  * For contains, op1.arg and op1.size_arg represent the arg to pass
  * to @contained constructor to properly initialize it.
  */
-void qos_node_contains(const char *container, const char *contained, ...);
+void qos_node_contains(const char *container, const char *contained,
+                       QOSGraphEdgeOptions *opts, ...);
 
 /**
  * qos_node_produces(): creates an edge of type QEDGE_PRODUCES and
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 8ac0c02af4..9b9b5f37fc 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -32,6 +32,7 @@
 
 #define MAX_IRQ 256
 #define SOCKET_TIMEOUT 50
+#define SOCKET_MAX_FDS 16
 
 QTestState *global_qtest;
 
@@ -391,6 +392,40 @@ static void GCC_FMT_ATTR(2, 3) qtest_sendf(QTestState *s, const char *fmt, ...)
     va_end(ap);
 }
 
+/* Sends a message and file descriptors to the socket.
+ * It's needed for qmp-commands like getfd/add-fd */
+static void socket_send_fds(int socket_fd, int *fds, size_t fds_num,
+                            const char *buf, size_t buf_size)
+{
+    ssize_t ret;
+    struct msghdr msg = { 0 };
+    char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)] = { 0 };
+    size_t fdsize = sizeof(int) * fds_num;
+    struct cmsghdr *cmsg;
+    struct iovec iov = { .iov_base = (char *)buf, .iov_len = buf_size };
+
+    msg.msg_iov = &iov;
+    msg.msg_iovlen = 1;
+
+    if (fds && fds_num > 0) {
+        g_assert_cmpuint(fds_num, <, SOCKET_MAX_FDS);
+
+        msg.msg_control = control;
+        msg.msg_controllen = CMSG_SPACE(fdsize);
+
+        cmsg = CMSG_FIRSTHDR(&msg);
+        cmsg->cmsg_len = CMSG_LEN(fdsize);
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+        memcpy(CMSG_DATA(cmsg), fds, fdsize);
+    }
+
+    do {
+        ret = sendmsg(socket_fd, &msg, 0);
+    } while (ret < 0 && errno == EINTR);
+    g_assert_cmpint(ret, >, 0);
+}
+
 static GString *qtest_recv_line(QTestState *s)
 {
     GString *line;
@@ -545,7 +580,8 @@ QDict *qtest_qmp_receive(QTestState *s)
  * in the case that they choose to discard all replies up until
  * a particular EVENT is received.
  */
-void qmp_fd_vsend(int fd, const char *fmt, va_list ap)
+void qmp_fd_vsend_fds(int fd, int *fds, size_t fds_num,
+                      const char *fmt, va_list ap)
 {
     QObject *qobj;
 
@@ -569,25 +605,49 @@ void qmp_fd_vsend(int fd, const char *fmt, va_list ap)
             fprintf(stderr, "%s", str);
         }
         /* Send QMP request */
-        socket_send(fd, str, qstring_get_length(qstr));
+        if (fds && fds_num > 0) {
+            socket_send_fds(fd, fds, fds_num, str, qstring_get_length(qstr));
+        } else {
+            socket_send(fd, str, qstring_get_length(qstr));
+        }
 
         qobject_unref(qstr);
         qobject_unref(qobj);
     }
 }
 
+void qmp_fd_vsend(int fd, const char *fmt, va_list ap)
+{
+    qmp_fd_vsend_fds(fd, NULL, 0, fmt, ap);
+}
+
+void qtest_qmp_vsend_fds(QTestState *s, int *fds, size_t fds_num,
+                         const char *fmt, va_list ap)
+{
+    qmp_fd_vsend_fds(s->qmp_fd, fds, fds_num, fmt, ap);
+}
+
 void qtest_qmp_vsend(QTestState *s, const char *fmt, va_list ap)
 {
-    qmp_fd_vsend(s->qmp_fd, fmt, ap);
+    qmp_fd_vsend_fds(s->qmp_fd, NULL, 0, fmt, ap);
 }
 
 QDict *qmp_fdv(int fd, const char *fmt, va_list ap)
 {
-    qmp_fd_vsend(fd, fmt, ap);
+    qmp_fd_vsend_fds(fd, NULL, 0, fmt, ap);
 
     return qmp_fd_receive(fd);
 }
 
+QDict *qtest_vqmp_fds(QTestState *s, int *fds, size_t fds_num,
+                      const char *fmt, va_list ap)
+{
+    qtest_qmp_vsend_fds(s, fds, fds_num, fmt, ap);
+
+    /* Receive reply */
+    return qtest_qmp_receive(s);
+}
+
 QDict *qtest_vqmp(QTestState *s, const char *fmt, va_list ap)
 {
     qtest_qmp_vsend(s, fmt, ap);
@@ -616,6 +676,18 @@ void qmp_fd_send(int fd, const char *fmt, ...)
     va_end(ap);
 }
 
+QDict *qtest_qmp_fds(QTestState *s, int *fds, size_t fds_num,
+                     const char *fmt, ...)
+{
+    va_list ap;
+    QDict *response;
+
+    va_start(ap, fmt);
+    response = qtest_vqmp_fds(s, fds, fds_num, fmt, ap);
+    va_end(ap);
+    return response;
+}
+
 QDict *qtest_qmp(QTestState *s, const char *fmt, ...)
 {
     va_list ap;
@@ -1038,6 +1110,25 @@ QDict *qmp(const char *fmt, ...)
     return response;
 }
 
+void qmp_assert_success(const char *fmt, ...)
+{
+    va_list ap;
+    QDict *response;
+
+    va_start(ap, fmt);
+    response = qtest_vqmp(global_qtest, fmt, ap);
+    va_end(ap);
+
+    g_assert(response);
+    if (!qdict_haskey(response, "return")) {
+        QString *s = qobject_to_json_pretty(QOBJECT(response));
+        g_test_message("%s", qstring_get_str(s));
+        qobject_unref(s);
+    }
+    g_assert(qdict_haskey(response, "return"));
+    qobject_unref(response);
+}
+
 char *hmp(const char *fmt, ...)
 {
     va_list ap;
diff --git a/tests/libqtest.h b/tests/libqtest.h
index a98ea15b7d..cadf1d4a03 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -85,6 +85,21 @@ QTestState *qtest_init_with_serial(const char *extra_args, int *sock_fd);
 void qtest_quit(QTestState *s);
 
 /**
+ * qtest_qmp_fds:
+ * @s: #QTestState instance to operate on.
+ * @fds: array of file descriptors
+ * @fds_num: number of elements in @fds
+ * @fmt...: QMP message to send to qemu, formatted like
+ * qobject_from_jsonf_nofail().  See parse_escape() for what's
+ * supported after '%'.
+ *
+ * Sends a QMP message to QEMU with fds and returns the response.
+ */
+QDict *qtest_qmp_fds(QTestState *s, int *fds, size_t fds_num,
+                     const char *fmt, ...)
+    GCC_FMT_ATTR(4, 5);
+
+/**
  * qtest_qmp:
  * @s: #QTestState instance to operate on.
  * @fmt...: QMP message to send to qemu, formatted like
@@ -120,7 +135,23 @@ void qtest_qmp_send_raw(QTestState *s, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
 
 /**
- * qtest_qmpv:
+ * qtest_vqmp_fds:
+ * @s: #QTestState instance to operate on.
+ * @fds: array of file descriptors
+ * @fds_num: number of elements in @fds
+ * @fmt: QMP message to send to QEMU, formatted like
+ * qobject_from_jsonf_nofail().  See parse_escape() for what's
+ * supported after '%'.
+ * @ap: QMP message arguments
+ *
+ * Sends a QMP message to QEMU with fds and returns the response.
+ */
+QDict *qtest_vqmp_fds(QTestState *s, int *fds, size_t fds_num,
+                      const char *fmt, va_list ap)
+    GCC_FMT_ATTR(4, 0);
+
+/**
+ * qtest_vqmp:
  * @s: #QTestState instance to operate on.
  * @fmt: QMP message to send to QEMU, formatted like
  * qobject_from_jsonf_nofail().  See parse_escape() for what's
@@ -133,6 +164,22 @@ QDict *qtest_vqmp(QTestState *s, const char *fmt, va_list ap)
     GCC_FMT_ATTR(2, 0);
 
 /**
+ * qtest_qmp_vsend_fds:
+ * @s: #QTestState instance to operate on.
+ * @fds: array of file descriptors
+ * @fds_num: number of elements in @fds
+ * @fmt: QMP message to send to QEMU, formatted like
+ * qobject_from_jsonf_nofail().  See parse_escape() for what's
+ * supported after '%'.
+ * @ap: QMP message arguments
+ *
+ * Sends a QMP message to QEMU and leaves the response in the stream.
+ */
+void qtest_qmp_vsend_fds(QTestState *s, int *fds, size_t fds_num,
+                         const char *fmt, va_list ap)
+    GCC_FMT_ATTR(4, 0);
+
+/**
  * qtest_qmp_vsend:
  * @s: #QTestState instance to operate on.
  * @fmt: QMP message to send to QEMU, formatted like
@@ -619,6 +666,17 @@ static inline void qtest_end(void)
 QDict *qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
 
 /**
+ * qmp_assert_success:
+ * @fmt...: QMP message to send to qemu, formatted like
+ * qobject_from_jsonf_nofail().  See parse_escape() for what's
+ * supported after '%'.
+ *
+ * Sends a QMP message to QEMU and asserts that a 'return' key is present in
+ * the response.
+ */
+void qmp_assert_success(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+
+/*
  * qmp_eventwait:
  * @s: #event event to wait for.
  *
@@ -877,6 +935,8 @@ static inline int64_t clock_step(int64_t step)
 }
 
 QDict *qmp_fd_receive(int fd);
+void qmp_fd_vsend_fds(int fd, int *fds, size_t fds_num,
+                      const char *fmt, va_list ap) GCC_FMT_ATTR(4, 0);
 void qmp_fd_vsend(int fd, const char *fmt, va_list ap) GCC_FMT_ATTR(2, 0);
 void qmp_fd_send(int fd, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
 void qmp_fd_send_raw(int fd, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
diff --git a/tests/migration-test.c b/tests/migration-test.c
index bd3f5c3125..e0407576cb 100644
--- a/tests/migration-test.c
+++ b/tests/migration-test.c
@@ -177,6 +177,21 @@ static void stop_cb(void *opaque, const char *name, QDict *data)
 /*
  * Events can get in the way of responses we are actually waiting for.
  */
+GCC_FMT_ATTR(3, 4)
+static QDict *wait_command_fd(QTestState *who, int fd, const char *command, ...)
+{
+    va_list ap;
+
+    va_start(ap, command);
+    qtest_qmp_vsend_fds(who, &fd, 1, command, ap);
+    va_end(ap);
+
+    return qtest_qmp_receive_success(who, stop_cb, NULL);
+}
+
+/*
+ * Events can get in the way of responses we are actually waiting for.
+ */
 GCC_FMT_ATTR(2, 3)
 static QDict *wait_command(QTestState *who, const char *command, ...)
 {
@@ -480,6 +495,7 @@ static void migrate(QTestState *who, const char *uri, const char *fmt, ...)
     qdict_put_str(args, "uri", uri);
 
     rsp = qmp("{ 'execute': 'migrate', 'arguments': %p}", args);
+
     g_assert(qdict_haskey(rsp, "return"));
     qobject_unref(rsp);
 }
@@ -1027,6 +1043,90 @@ static void test_precopy_tcp(void)
     g_free(uri);
 }
 
+static void test_migrate_fd_proto(void)
+{
+    QTestState *from, *to;
+    int ret;
+    int pair[2];
+    QDict *rsp;
+    const char *error_desc;
+
+    if (test_migrate_start(&from, &to, "defer", false, false)) {
+        return;
+    }
+
+    /*
+     * We want to pick a speed slow enough that the test completes
+     * quickly, but that it doesn't complete precopy even on a slow
+     * machine, so also set the downtime.
+     */
+    /* 1 ms should make it not converge */
+    migrate_set_parameter(from, "downtime-limit", 1);
+    /* 1GB/s */
+    migrate_set_parameter(from, "max-bandwidth", 1000000000);
+
+    /* Wait for the first serial output from the source */
+    wait_for_serial("src_serial");
+
+    /* Create two connected sockets for migration */
+    ret = socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
+    g_assert_cmpint(ret, ==, 0);
+
+    /* Send the 1st socket to the target */
+    rsp = wait_command_fd(to, pair[0],
+                          "{ 'execute': 'getfd',"
+                          "  'arguments': { 'fdname': 'fd-mig' }}");
+    qobject_unref(rsp);
+    close(pair[0]);
+
+    /* Start incoming migration from the 1st socket */
+    rsp = wait_command(to, "{ 'execute': 'migrate-incoming',"
+                           "  'arguments': { 'uri': 'fd:fd-mig' }}");
+    qobject_unref(rsp);
+
+    /* Send the 2nd socket to the target */
+    rsp = wait_command_fd(from, pair[1],
+                          "{ 'execute': 'getfd',"
+                          "  'arguments': { 'fdname': 'fd-mig' }}");
+    qobject_unref(rsp);
+    close(pair[1]);
+
+    /* Start migration to the 2nd socket*/
+    migrate(from, "fd:fd-mig", "{}");
+
+    wait_for_migration_pass(from);
+
+    /* 300ms should converge */
+    migrate_set_parameter(from, "downtime-limit", 300);
+
+    if (!got_stop) {
+        qtest_qmp_eventwait(from, "STOP");
+    }
+    qtest_qmp_eventwait(to, "RESUME");
+
+    /* Test closing fds */
+    /* We assume, that QEMU removes named fd from its list,
+     * so this should fail */
+    rsp = qtest_qmp(from, "{ 'execute': 'closefd',"
+                          "  'arguments': { 'fdname': 'fd-mig' }}");
+    g_assert_true(qdict_haskey(rsp, "error"));
+    error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
+    g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
+    qobject_unref(rsp);
+
+    rsp = qtest_qmp(to, "{ 'execute': 'closefd',"
+                        "  'arguments': { 'fdname': 'fd-mig' }}");
+    g_assert_true(qdict_haskey(rsp, "error"));
+    error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
+    g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
+    qobject_unref(rsp);
+
+    /* Complete migration */
+    wait_for_serial("dest_serial");
+    wait_for_migration_complete(from);
+    test_migrate_end(from, to, true);
+}
+
 int main(int argc, char **argv)
 {
     char template[] = "/tmp/migration-test-XXXXXX";
@@ -1081,6 +1181,7 @@ int main(int argc, char **argv)
     qtest_add_func("/migration/precopy/tcp", test_precopy_tcp);
     /* qtest_add_func("/migration/ignore_shared", test_ignore_shared); */
     qtest_add_func("/migration/xbzrle/unix", test_xbzrle_unix);
+    qtest_add_func("/migration/fd_proto", test_migrate_fd_proto);
 
     ret = g_test_run();
 
diff --git a/tests/pca9552-test.c b/tests/pca9552-test.c
index 5466a67ed7..4b800d3c3e 100644
--- a/tests/pca9552-test.c
+++ b/tests/pca9552-test.c
@@ -10,107 +10,84 @@
 #include "qemu/osdep.h"
 
 #include "libqtest.h"
+#include "libqos/qgraph.h"
 #include "libqos/i2c.h"
 #include "hw/misc/pca9552_regs.h"
 
 #define PCA9552_TEST_ID   "pca9552-test"
 #define PCA9552_TEST_ADDR 0x60
 
-static I2CAdapter *i2c;
-
-static uint8_t pca9552_get8(I2CAdapter *i2c, uint8_t addr, uint8_t reg)
+static void pca9552_init(QI2CDevice *i2cdev)
 {
-    uint8_t resp[1];
-    i2c_send(i2c, addr, &reg, 1);
-    i2c_recv(i2c, addr, resp, 1);
-    return resp[0];
+    /* Switch on LEDs 0 and 12 */
+    i2c_set8(i2cdev, PCA9552_LS0, 0x54);
+    i2c_set8(i2cdev, PCA9552_LS3, 0x54);
 }
 
-static void pca9552_set8(I2CAdapter *i2c, uint8_t addr, uint8_t reg,
-                         uint8_t value)
-{
-    uint8_t cmd[2];
-    uint8_t resp[1];
-
-    cmd[0] = reg;
-    cmd[1] = value;
-    i2c_send(i2c, addr, cmd, 2);
-    i2c_recv(i2c, addr, resp, 1);
-    g_assert_cmphex(resp[0], ==, cmd[1]);
-}
-
-static void receive_autoinc(void)
+static void receive_autoinc(void *obj, void *data, QGuestAllocator *alloc)
 {
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
     uint8_t resp;
     uint8_t reg = PCA9552_LS0 | PCA9552_AUTOINC;
 
-    i2c_send(i2c, PCA9552_TEST_ADDR, &reg, 1);
+    pca9552_init(i2cdev);
+
+    i2c_send(i2cdev, &reg, 1);
 
     /* PCA9552_LS0 */
-    i2c_recv(i2c, PCA9552_TEST_ADDR, &resp, 1);
+    i2c_recv(i2cdev, &resp, 1);
     g_assert_cmphex(resp, ==, 0x54);
 
     /* PCA9552_LS1 */
-    i2c_recv(i2c, PCA9552_TEST_ADDR, &resp, 1);
+    i2c_recv(i2cdev, &resp, 1);
     g_assert_cmphex(resp, ==, 0x55);
 
     /* PCA9552_LS2 */
-    i2c_recv(i2c, PCA9552_TEST_ADDR, &resp, 1);
+    i2c_recv(i2cdev, &resp, 1);
     g_assert_cmphex(resp, ==, 0x55);
 
     /* PCA9552_LS3 */
-    i2c_recv(i2c, PCA9552_TEST_ADDR, &resp, 1);
+    i2c_recv(i2cdev, &resp, 1);
     g_assert_cmphex(resp, ==, 0x54);
 }
 
-static void send_and_receive(void)
+static void send_and_receive(void *obj, void *data, QGuestAllocator *alloc)
 {
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
     uint8_t value;
 
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_LS0);
+    value = i2c_get8(i2cdev, PCA9552_LS0);
     g_assert_cmphex(value, ==, 0x55);
 
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_INPUT0);
+    value = i2c_get8(i2cdev, PCA9552_INPUT0);
     g_assert_cmphex(value, ==, 0x0);
 
-    /* Switch on LED 0 */
-    pca9552_set8(i2c, PCA9552_TEST_ADDR, PCA9552_LS0, 0x54);
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_LS0);
+    pca9552_init(i2cdev);
+
+    value = i2c_get8(i2cdev, PCA9552_LS0);
     g_assert_cmphex(value, ==, 0x54);
 
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_INPUT0);
+    value = i2c_get8(i2cdev, PCA9552_INPUT0);
     g_assert_cmphex(value, ==, 0x01);
 
-    /* Switch on LED 12 */
-    pca9552_set8(i2c, PCA9552_TEST_ADDR, PCA9552_LS3, 0x54);
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_LS3);
+    value = i2c_get8(i2cdev, PCA9552_LS3);
     g_assert_cmphex(value, ==, 0x54);
 
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_INPUT1);
+    value = i2c_get8(i2cdev, PCA9552_INPUT1);
     g_assert_cmphex(value, ==, 0x10);
 }
 
-int main(int argc, char **argv)
+static void pca9552_register_nodes(void)
 {
-    QTestState *s = NULL;
-    int ret;
-
-    g_test_init(&argc, &argv, NULL);
-
-    s = qtest_start("-machine n800 "
-                    "-device pca9552,bus=i2c-bus.0,id=" PCA9552_TEST_ID
-                    ",address=0x60");
-    i2c = omap_i2c_create(s, OMAP2_I2C_1_BASE);
-
-    qtest_add_func("/pca9552/tx-rx", send_and_receive);
-    qtest_add_func("/pca9552/rx-autoinc", receive_autoinc);
-
-    ret = g_test_run();
+    QOSGraphEdgeOptions opts = {
+        .extra_device_opts = "address=0x60"
+    };
+    add_qi2c_address(&opts, &(QI2CAddress) { 0x60 });
 
-    if (s) {
-        qtest_quit(s);
-    }
-    g_free(i2c);
+    qos_node_create_driver("pca9552", i2c_device_create);
+    qos_node_consumes("pca9552", "i2c-bus", &opts);
 
-    return ret;
+    qos_add_test("tx-rx", "pca9552", send_and_receive, NULL);
+    qos_add_test("rx-autoinc", "pca9552", receive_autoinc, NULL);
 }
+libqos_init(pca9552_register_nodes);
diff --git a/tests/perf/block/qcow2/convert-blockstatus b/tests/perf/block/qcow2/convert-blockstatus
new file mode 100755
index 0000000000..a1a3c1ef43
--- /dev/null
+++ b/tests/perf/block/qcow2/convert-blockstatus
@@ -0,0 +1,71 @@
+#!/bin/bash
+#
+# Test lseek influence on qcow2 block-status
+#
+# Block layer may recursively check block_status in file child of qcow2, if
+# qcow2 driver returned DATA. There are several test cases to check influence
+# of lseek on block_status performance. To see real difference run on tmpfs.
+#
+# Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved.
+#
+# Tests originally written by Kevin Wolf
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+if [ "$#" -lt 1 ]; then
+    echo "Usage: $0 SOURCE_FILE"
+    exit 1
+fi
+
+ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../../.." >/dev/null 2>&1 && pwd )"
+QEMU_IMG="$ROOT_DIR/qemu-img"
+QEMU_IO="$ROOT_DIR/qemu-io"
+
+size=1G
+src="$1"
+
+# test-case plain
+
+(
+$QEMU_IMG create -f qcow2 "$src" $size
+for i in $(seq 16384 -1 0); do
+    echo "write $((i * 65536)) 64k"
+done | $QEMU_IO "$src"
+) > /dev/null
+
+echo -n "plain: "
+/usr/bin/time -f %e $QEMU_IMG convert -n "$src" null-co://
+
+# test-case forward
+
+(
+$QEMU_IMG create -f qcow2 "$src" $size
+for i in $(seq 0 2 16384); do
+    echo "write $((i * 65536)) 64k"
+done | $QEMU_IO "$src"
+for i in $(seq 1 2 16384); do
+    echo "write $((i * 65536)) 64k"
+done | $QEMU_IO "$src"
+) > /dev/null
+
+echo -n "forward: "
+/usr/bin/time -f %e $QEMU_IMG convert -n "$src" null-co://
+
+# test-case prealloc
+
+$QEMU_IMG create -f qcow2 -o preallocation=metadata "$src" $size > /dev/null
+
+echo -n "prealloc: "
+/usr/bin/time -f %e $QEMU_IMG convert -n "$src" null-co://
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index a3deb1fcad..200660f977 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -192,6 +192,30 @@ case "$QEMU_DEFAULT_MACHINE" in
 esac
 
 echo
+echo === Attach to node in non-default iothread ===
+echo
+
+case "$QEMU_DEFAULT_MACHINE" in
+    pc)
+        iothread="-drive file=$TEST_IMG,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on"
+
+        # Can't add a device in the main thread while virtio-scsi0 uses the node
+        run_qemu $iothread -device ide-hd,drive=disk,share-rw=on
+        run_qemu $iothread -device virtio-blk-pci,drive=disk,share-rw=on
+        run_qemu $iothread -device lsi53c895a,id=lsi0 -device scsi-hd,bus=lsi0.0,drive=disk,share-rw=on
+        run_qemu $iothread -device virtio-scsi,id=virtio-scsi1 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
+
+        # virtio-blk enables the iothread only when the driver initialises the
+        # device, so a second virtio-blk device can't be added even with the
+        # same iothread. virtio-scsi allows this.
+        run_qemu $iothread -device virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on
+        run_qemu $iothread -device virtio-scsi,id=virtio-scsi1,iothread=thread0 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
+        ;;
+     *)
+        ;;
+esac
+
+echo
 echo === Read-only ===
 echo
 
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index 9f1cf22608..8993835b94 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -137,6 +137,9 @@ QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) QEMU_PROG: -drive if=virtio: Device needs media, but drive is empty
 
 
+=== Attach to node in non-default iothread ===
+
+
 === Read-only ===
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=virtio,readonly=on
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index c4743cc31c..2d811c166c 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -173,6 +173,33 @@ QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) QEMU_PROG: -device scsi-hd,drive=disk: Device needs media, but drive is empty
 
 
+=== Attach to node in non-default iothread ===
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device ide-hd,drive=disk,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) QEMU_PROG: -device ide-hd,drive=disk,share-rw=on: Cannot change iothread of active block backend
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) QEMU_PROG: -device virtio-blk-pci,drive=disk,share-rw=on: Cannot change iothread of active block backend
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device lsi53c895a,id=lsi0 -device scsi-hd,bus=lsi0.0,drive=disk,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) QEMU_PROG: -device scsi-hd,bus=lsi0.0,drive=disk,share-rw=on: HBA does not support iothreads
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) QEMU_PROG: -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on: Cannot change iothread of active block backend
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) QEMU_PROG: -device virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on: Cannot change iothread of active block backend
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1,iothread=thread0 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) quit
+
+
 === Read-only ===
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=floppy,readonly=on
diff --git a/tests/qemu-iotests/056 b/tests/qemu-iotests/056
index 3df323984d..f40fc11a09 100755
--- a/tests/qemu-iotests/056
+++ b/tests/qemu-iotests/056
@@ -214,7 +214,7 @@ class BackupTest(iotests.QMPTestCase):
         res = self.vm.qmp('query-block-jobs')
         self.assert_qmp(res, 'return[0]/status', 'concluded')
         # Leave zombie job un-dismissed, observe a failure:
-        res = self.qmp_backup_and_wait(serror='Need a root block node',
+        res = self.qmp_backup_and_wait(serror="Node 'drive0' is busy: block device is in use by block job: backup",
                                        device='drive0', format=iotests.imgfmt,
                                        sync='full', target=self.dest_img,
                                        auto_dismiss=False)
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 89e911400c..b91d8321bb 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -150,10 +150,15 @@ $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
 echo
 echo "=== Testing overlap while COW is in flight ==="
 echo
+BACKING_IMG=$TEST_IMG.base
+TEST_IMG=$BACKING_IMG _make_test_img 1G
+
+$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
+
 # compat=0.10 is required in order to make the following discard actually
 # unallocate the sector rather than make it a zero sector - we want COW, after
 # all.
-IMGOPTS='compat=0.10' _make_test_img 1G
+IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G
 # Write two clusters, the second one enforces creation of an L2 table after
 # the first data cluster.
 $QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index e42bf8c5a9..0f6b0658a1 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -97,7 +97,10 @@ read 512/512 bytes at offset 0
 
 === Testing overlap while COW is in flight ===
 
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1073741824
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 backing_file=TEST_DIR/t.IMGFMT.base
 wrote 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 65536/65536 bytes at offset 536870912
diff --git a/tests/qemu-iotests/102 b/tests/qemu-iotests/102
index 749ff66b8a..b898df436f 100755
--- a/tests/qemu-iotests/102
+++ b/tests/qemu-iotests/102
@@ -55,7 +55,7 @@ $QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
 $QEMU_IMG resize -f raw --shrink "$TEST_IMG" $((5 * 64 * 1024))
 
 $QEMU_IO -c map "$TEST_IMG"
-$QEMU_IMG map "$TEST_IMG"
+$QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map
 
 echo
 echo '=== Testing map on an image file truncated outside of qemu ==='
diff --git a/tests/qemu-iotests/102.out b/tests/qemu-iotests/102.out
index 4401b08fee..cd2fdc7f96 100644
--- a/tests/qemu-iotests/102.out
+++ b/tests/qemu-iotests/102.out
@@ -7,7 +7,8 @@ wrote 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image resized.
 64 KiB (0x10000) bytes     allocated at offset 0 bytes (0x0)
-Offset          Length          Mapped to       File
+Offset          Length          File
+0               0x10000         TEST_DIR/t.IMGFMT
 
 === Testing map on an image file truncated outside of qemu ===
 
diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out
index 41c7291258..4d71d9dcae 100644
--- a/tests/qemu-iotests/141.out
+++ b/tests/qemu-iotests/141.out
@@ -42,9 +42,9 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
+{"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
-{"return": {}}
 {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
diff --git a/tests/qemu-iotests/144.out b/tests/qemu-iotests/144.out
index 55299201e4..a9a8216bea 100644
--- a/tests/qemu-iotests/144.out
+++ b/tests/qemu-iotests/144.out
@@ -14,10 +14,10 @@ Formatting 'TEST_DIR/tmp.qcow2', fmt=qcow2 size=536870912 backing_file=TEST_DIR/
 
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "virtio0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "virtio0"}}
+{"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "virtio0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
 {"return": {}}
-{"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "virtio0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "virtio0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240
index b4cf95096d..5be6b9c0f7 100755
--- a/tests/qemu-iotests/240
+++ b/tests/qemu-iotests/240
@@ -27,6 +27,12 @@ echo "QA output created by $seq"
 
 status=1	# failure is the default!
 
+_cleanup()
+{
+    rm -f "$TEST_DIR/nbd"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
 # get standard environment, filters and checks
 . ./common.rc
 . ./common.filter
@@ -122,6 +128,21 @@ run_qemu <<EOF
 { "execute": "quit"}
 EOF
 
+echo
+echo === Attach a SCSI disks using the same block device as a NBD server ===
+echo
+
+run_qemu <<EOF
+{ "execute": "qmp_capabilities" }
+{ "execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "hd0", "read-only": true}}
+{ "execute": "nbd-server-start", "arguments": {"addr":{"type":"unix","data":{"path":"$TEST_DIR/nbd"}}}}
+{ "execute": "nbd-server-add", "arguments": {"device":"hd0"}}
+{ "execute": "object-add", "arguments": {"qom-type": "iothread", "id": "iothread0"}}
+{ "execute": "device_add", "arguments": {"id": "scsi0", "driver": "${virtio_scsi}", "iothread": "iothread0"}}
+{ "execute": "device_add", "arguments": {"id": "scsi-hd0", "driver": "scsi-hd", "drive": "hd0", "bus": "scsi0.0"}}
+{ "execute": "quit"}
+EOF
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/240.out b/tests/qemu-iotests/240.out
index d76392966c..d00df50297 100644
--- a/tests/qemu-iotests/240.out
+++ b/tests/qemu-iotests/240.out
@@ -43,7 +43,20 @@ QMP_VERSION
 {"return": {}}
 {"return": {}}
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Cannot attach a blockdev that is using a different iothread"}}
+{"error": {"class": "GenericError", "desc": "Cannot change iothread of active block backend"}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+=== Attach a SCSI disks using the same block device as a NBD server ===
+
+Testing:
+QMP_VERSION
+{"return": {}}
 {"return": {}}
 {"return": {}}
 {"return": {}}
diff --git a/tests/qemu-iotests/250 b/tests/qemu-iotests/250
new file mode 100755
index 0000000000..c9c0a84a5a
--- /dev/null
+++ b/tests/qemu-iotests/250
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+#
+# Test big discard in qcow2 shrink
+#
+# Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=vsementsov@virtuozzo.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+# This test checks that qcow2_process_discards does not truncate a discard
+# request > 2G.
+# To reproduce bug we need to overflow int by one sequential discard, so we
+# need size > 2G, bigger cluster size (as with default 64k we may have maximum
+# of 512M sequential data, corresponding to one L1 entry), and we need some
+# data of the beginning of the disk mapped to the end of file to prevent
+# bdrv_co_truncate(bs->file) call in qcow2_co_truncate(), which might succeed
+# anyway.
+
+disk_usage()
+{
+    du --block-size=1 $1 | awk '{print $1}'
+}
+
+size=2100M
+IMGOPTS="cluster_size=1M,preallocation=metadata"
+
+_make_test_img $size
+$QEMU_IO -c 'discard 0 10M' -c 'discard 2090M 10M' \
+         -c 'write 2090M 10M' -c 'write 0 10M' "$TEST_IMG" | _filter_qemu_io
+
+# Check that our trick with swapping first and last 10M chunks succeeded.
+# Otherwise test may pass even if bdrv_pdiscard() fails in
+# qcow2_process_discards()
+$QEMU_IMG map "$TEST_IMG" | _filter_testdir
+
+before=$(disk_usage "$TEST_IMG")
+$QEMU_IMG resize --shrink "$TEST_IMG" 5M
+after=$(disk_usage "$TEST_IMG")
+
+echo "Disk usage delta: $((before - after))"
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/250.out b/tests/qemu-iotests/250.out
new file mode 100644
index 0000000000..f480fd273b
--- /dev/null
+++ b/tests/qemu-iotests/250.out
@@ -0,0 +1,16 @@
+QA output created by 250
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 preallocation=metadata
+discard 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 10485760/10485760 bytes at offset 2191523840
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 10485760/10485760 bytes at offset 2191523840
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          Mapped to       File
+0               0xa00000        0x82f00000      TEST_DIR/t.qcow2
+0x82a00000      0xa00000        0x500000        TEST_DIR/t.qcow2
+Image resized.
+Disk usage delta: 15728640
+*** done
diff --git a/tests/qemu-iotests/254 b/tests/qemu-iotests/254
new file mode 100755
index 0000000000..33cb80a512
--- /dev/null
+++ b/tests/qemu-iotests/254
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+#
+# Test external snapshot with bitmap copying.
+#
+# Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import iotests
+from iotests import qemu_img_create, file_path, log
+
+disk, top = file_path('disk', 'top')
+size = 1024 * 1024
+
+qemu_img_create('-f', iotests.imgfmt, disk, str(size))
+
+vm = iotests.VM().add_drive(disk, opts='node-name=base')
+vm.launch()
+
+vm.qmp_log('block-dirty-bitmap-add', node='drive0', name='bitmap0')
+
+vm.hmp_qemu_io('drive0', 'write 0 512K')
+
+vm.qmp_log('transaction', indent=2, actions=[
+    {'type': 'blockdev-snapshot-sync',
+     'data': {'device': 'drive0', 'snapshot-file': top,
+              'snapshot-node-name': 'snap'}},
+    {'type': 'block-dirty-bitmap-add',
+     'data': {'node': 'snap', 'name': 'bitmap0'}},
+    {'type': 'block-dirty-bitmap-merge',
+     'data': {'node': 'snap', 'target': 'bitmap0',
+              'bitmaps': [{'node': 'base', 'name': 'bitmap0'}]}}
+], filters=[iotests.filter_qmp_testfiles])
+
+result = vm.qmp('query-block')['return'][0]
+log("query-block: device = {}, node-name = {}, dirty-bitmaps:".format(
+    result['device'], result['inserted']['node-name']))
+log(result['dirty-bitmaps'], indent=2)
+
+vm.shutdown()
diff --git a/tests/qemu-iotests/254.out b/tests/qemu-iotests/254.out
new file mode 100644
index 0000000000..d7394cf002
--- /dev/null
+++ b/tests/qemu-iotests/254.out
@@ -0,0 +1,52 @@
+{"execute": "block-dirty-bitmap-add", "arguments": {"name": "bitmap0", "node": "drive0"}}
+{"return": {}}
+{
+  "execute": "transaction",
+  "arguments": {
+    "actions": [
+      {
+        "data": {
+          "device": "drive0",
+          "snapshot-file": "TEST_DIR/PID-top",
+          "snapshot-node-name": "snap"
+        },
+        "type": "blockdev-snapshot-sync"
+      },
+      {
+        "data": {
+          "name": "bitmap0",
+          "node": "snap"
+        },
+        "type": "block-dirty-bitmap-add"
+      },
+      {
+        "data": {
+          "bitmaps": [
+            {
+              "name": "bitmap0",
+              "node": "base"
+            }
+          ],
+          "node": "snap",
+          "target": "bitmap0"
+        },
+        "type": "block-dirty-bitmap-merge"
+      }
+    ]
+  }
+}
+{
+  "return": {}
+}
+query-block: device = drive0, node-name = snap, dirty-bitmaps:
+[
+  {
+    "busy": false,
+    "count": 524288,
+    "granularity": 65536,
+    "name": "bitmap0",
+    "persistent": false,
+    "recording": true,
+    "status": "active"
+  }
+]
diff --git a/tests/qemu-iotests/255 b/tests/qemu-iotests/255
new file mode 100755
index 0000000000..49433ec122
--- /dev/null
+++ b/tests/qemu-iotests/255
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+#
+# Test commit job graph modifications while requests are active
+#
+# Copyright (C) 2019 Red Hat, Inc.
+#
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import iotests
+from iotests import imgfmt
+
+iotests.verify_image_format(supported_fmts=['qcow2'])
+
+def blockdev_create(vm, options):
+    result = vm.qmp_log('blockdev-create',
+                        filters=[iotests.filter_qmp_testfiles],
+                        job_id='job0', options=options)
+
+    if 'return' in result:
+        assert result['return'] == {}
+        vm.run_job('job0')
+    iotests.log("")
+
+iotests.log('Finishing a commit job with background reads')
+iotests.log('============================================')
+iotests.log('')
+
+with iotests.FilePath('t.qcow2') as disk_path, \
+     iotests.FilePath('t.qcow2.mid') as mid_path, \
+     iotests.FilePath('t.qcow2.base') as base_path, \
+     iotests.VM() as vm:
+
+    iotests.log("=== Create backing chain and start VM ===")
+    iotests.log("")
+
+    size = 128 * 1024 * 1024
+    size_str = str(size)
+
+    iotests.create_image(base_path, size)
+    iotests.qemu_img_log('create', '-f', iotests.imgfmt, mid_path, size_str)
+    iotests.qemu_img_log('create', '-f', iotests.imgfmt, disk_path, size_str)
+
+    # Create a backing chain like this:
+    # base <- [throttled: bps-read=4096] <- mid <- overlay
+
+    vm.add_object('throttle-group,x-bps-read=4096,id=throttle0')
+    vm.add_blockdev('file,filename=%s,node-name=base' % (base_path))
+    vm.add_blockdev('throttle,throttle-group=throttle0,file=base,node-name=throttled')
+    vm.add_blockdev('file,filename=%s,node-name=mid-file' % (mid_path))
+    vm.add_blockdev('qcow2,file=mid-file,node-name=mid,backing=throttled')
+    vm.add_drive_raw('if=none,id=overlay,driver=qcow2,file=%s,backing=mid' % (disk_path))
+
+    vm.launch()
+
+    iotests.log("=== Start background read requests ===")
+    iotests.log("")
+
+    def start_requests():
+        vm.hmp_qemu_io('overlay', 'aio_read 0 4k')
+        vm.hmp_qemu_io('overlay', 'aio_read 0 4k')
+
+    start_requests()
+
+    iotests.log("=== Run a commit job ===")
+    iotests.log("")
+
+    result = vm.qmp_log('block-commit', job_id='job0', auto_finalize=False,
+                        device='overlay', top_node='mid')
+
+    vm.run_job('job0', auto_finalize=False, pre_finalize=start_requests,
+                auto_dismiss=True)
+
+    vm.shutdown()
+
+iotests.log('')
+iotests.log('Closing the VM while a job is being cancelled')
+iotests.log('=============================================')
+iotests.log('')
+
+with iotests.FilePath('src.qcow2') as src_path, \
+     iotests.FilePath('dst.qcow2') as dst_path, \
+     iotests.VM() as vm:
+
+    iotests.log('=== Create images and start VM ===')
+    iotests.log('')
+
+    size = 128 * 1024 * 1024
+    size_str = str(size)
+
+    iotests.qemu_img_log('create', '-f', iotests.imgfmt, src_path, size_str)
+    iotests.qemu_img_log('create', '-f', iotests.imgfmt, dst_path, size_str)
+
+    iotests.log(iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write 0 1M',
+                                src_path),
+                filters=[iotests.filter_test_dir, iotests.filter_qemu_io])
+
+    vm.add_object('throttle-group,x-bps-read=4096,id=throttle0')
+
+    vm.add_blockdev('file,node-name=src-file,filename=%s' % (src_path))
+    vm.add_blockdev('%s,node-name=src,file=src-file' % (iotests.imgfmt))
+
+    vm.add_blockdev('file,node-name=dst-file,filename=%s' % (dst_path))
+    vm.add_blockdev('%s,node-name=dst,file=dst-file' % (iotests.imgfmt))
+
+    vm.add_blockdev('throttle,node-name=src-throttled,' +
+                    'throttle-group=throttle0,file=src')
+
+    vm.add_device('virtio-blk,drive=src-throttled')
+
+    vm.launch()
+
+    iotests.log('=== Start a mirror job ===')
+    iotests.log('')
+
+    vm.qmp_log('blockdev-mirror', job_id='job0', device='src-throttled',
+                                  target='dst', sync='full')
+
+    vm.qmp_log('block-job-cancel', device='job0')
+    vm.qmp_log('quit')
+
+    vm.shutdown()
diff --git a/tests/qemu-iotests/255.out b/tests/qemu-iotests/255.out
new file mode 100644
index 0000000000..348909fdef
--- /dev/null
+++ b/tests/qemu-iotests/255.out
@@ -0,0 +1,40 @@
+Finishing a commit job with background reads
+============================================
+
+=== Create backing chain and start VM ===
+
+Formatting 'TEST_DIR/PID-t.qcow2.mid', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16
+
+Formatting 'TEST_DIR/PID-t.qcow2', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16
+
+=== Start background read requests ===
+
+=== Run a commit job ===
+
+{"execute": "block-commit", "arguments": {"auto-finalize": false, "device": "overlay", "job-id": "job0", "top-node": "mid"}}
+{"return": {}}
+{"execute": "job-finalize", "arguments": {"id": "job0"}}
+{"return": {}}
+{"data": {"id": "job0", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"device": "job0", "len": 134217728, "offset": 134217728, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+
+Closing the VM while a job is being cancelled
+=============================================
+
+=== Create images and start VM ===
+
+Formatting 'TEST_DIR/PID-src.qcow2', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16
+
+Formatting 'TEST_DIR/PID-dst.qcow2', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16
+
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Start a mirror job ===
+
+{"execute": "blockdev-mirror", "arguments": {"device": "src-throttled", "job-id": "job0", "sync": "full", "target": "dst"}}
+{"return": {}}
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
+{"return": {}}
+{"execute": "quit", "arguments": {}}
+{"return": {}}
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 922c5d1d3d..44ebf24080 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -27,9 +27,7 @@ bad=""
 notrun=""
 casenotrun=""
 interrupt=true
-
-# by default don't output timestamps
-timestamp=${TIMESTAMP:=false}
+makecheck=false
 
 _init_error()
 {
@@ -88,6 +86,22 @@ _full_platform_details()
     echo "$os/$platform $host $kernel"
 }
 
+_full_env_details()
+{
+    cat <<EOF
+QEMU          -- "$QEMU_PROG" $QEMU_OPTIONS
+QEMU_IMG      -- "$QEMU_IMG_PROG" $QEMU_IMG_OPTIONS
+QEMU_IO       -- "$QEMU_IO_PROG" $QEMU_IO_OPTIONS
+QEMU_NBD      -- "$QEMU_NBD_PROG" $QEMU_NBD_OPTIONS
+IMGFMT        -- $FULL_IMGFMT_DETAILS
+IMGPROTO      -- $IMGPROTO
+PLATFORM      -- $FULL_HOST_DETAILS
+TEST_DIR      -- $TEST_DIR
+SOCKET_SCM_HELPER -- $SOCKET_SCM_HELPER
+
+EOF
+}
+
 # $1 = prog to look for
 set_prog_path()
 {
@@ -254,8 +268,8 @@ other options
     -misalign           misalign memory allocations
     -n                  show me, do not run tests
     -o options          -o options to pass to qemu-img create/convert
-    -T                  output timestamps
     -c mode             cache mode
+    -makecheck          pretty print output for make check
 
 testlist options
     -g group[,group...]        include tests from these groups
@@ -403,7 +417,10 @@ testlist options
                 command -v xxdiff >/dev/null 2>&1 && diff=xxdiff
             fi
             ;;
-
+        -makecheck)   # makecheck friendly output
+            makecheck=true
+            xpand=false
+            ;;
         -n)        # show me, don't do it
             showme=true
             xpand=false
@@ -416,8 +433,7 @@ testlist options
             cachemode=true
             xpand=false
             ;;
-        -T)        # turn on timestamp output
-            timestamp=true
+        -T)        # deprecated timestamp option
             xpand=false
             ;;
 
@@ -633,12 +649,6 @@ _wallclock()
     date "+%H %M %S" | awk '{ print $1*3600 + $2*60 + $3 }'
 }
 
-_timestamp()
-{
-    now=$(date "+%T")
-    printf %s " [$now]"
-}
-
 _wrapup()
 {
     if $showme
@@ -704,23 +714,54 @@ END        { if (NR > 0) {
 
 trap "_wrapup; exit \$status" 0 1 2 3 15
 
+# Report the test start and results. For makecheck we want to pretty
+# print the whole report at the end of the execution.
+# args: $seq, $starttime, $lasttime
+_report_test_start()
+{
+    if ! $makecheck; then
+        if [ -n "$3" ]; then
+            local lasttime=" (last: $3s)"
+        fi
+        printf "%-8s %-10s [%s]            %4s%-14s\r" "$1" "..." "$2" "..." "$lasttime"
+    fi
+}
+# args:$seq $status $starttime $lasttime $thistime $details
+_report_test_result()
+{
+    local status lasttime thistime
+    if $makecheck; then
+        if [ -n "$2" ] && [ "$2" != "pass" ]; then
+            status=" [$2]"
+        fi
+        printf "  TEST    iotest-$IMGFMT: %s%s\n" "$1" "$status"
+        return
+    fi
+
+    if [ -n "$4" ]; then
+        lasttime=" (last: $4s)"
+    fi
+    if [ -n "$5" ]; then
+        thistime=" $5s"
+    fi
+    case "$2" in
+        "pass")     status=$(printf "\e[32m%-10s\e[0m" "$2") ;;
+        "fail")     status=$(printf "\e[1m\e[31m%-10s\e[0m" "$2") ;;
+        "not run")  status=$(printf "\e[33m%-10s\e[0m" "$2") ;;
+        *)          status=$(printf "%-10s" "$2") ;;
+    esac
+
+    printf "%-8s %s [%s] [%s] %4s%-14s %s\n" "$1" "$status" "$3" "$(date '+%T')" "$thistime" "$lasttime" "$6"
+}
+
 [ -f $TIMESTAMP_FILE ] || touch $TIMESTAMP_FILE
 
 FULL_IMGFMT_DETAILS=$(_full_imgfmt_details)
 FULL_HOST_DETAILS=$(_full_platform_details)
 
-cat <<EOF
-QEMU          -- "$QEMU_PROG" $QEMU_OPTIONS
-QEMU_IMG      -- "$QEMU_IMG_PROG" $QEMU_IMG_OPTIONS
-QEMU_IO       -- "$QEMU_IO_PROG" $QEMU_IO_OPTIONS
-QEMU_NBD      -- "$QEMU_NBD_PROG" $QEMU_NBD_OPTIONS
-IMGFMT        -- $FULL_IMGFMT_DETAILS
-IMGPROTO      -- $IMGPROTO
-PLATFORM      -- $FULL_HOST_DETAILS
-TEST_DIR      -- $TEST_DIR
-SOCKET_SCM_HELPER -- $SOCKET_SCM_HELPER
-
-EOF
+if ! $makecheck; then
+    _full_env_details
+fi
 
 seq="check"
 
@@ -728,42 +769,43 @@ seq="check"
 
 for seq in $list
 do
-    err=false
-    printf %s "$seq"
+    err=false       # error flag
+    printdiff=false # show diff to reference output?
+    status=""       # test result summary
+    results=""      # test result details
+
     if [ -n "$TESTS_REMAINING_LOG" ] ; then
         sed -e "s/$seq//" -e 's/  / /' -e 's/^ *//' $TESTS_REMAINING_LOG > $TESTS_REMAINING_LOG.tmp
         mv $TESTS_REMAINING_LOG.tmp $TESTS_REMAINING_LOG
         sync
     fi
 
+    lasttime=$(sed -n -e "/^$seq /s/.* //p" <$TIMESTAMP_FILE)
+    starttime=$(date "+%T")
+    _report_test_start $seq $starttime $lasttime
+
     if $showme
     then
-        echo
-        continue
+        status="not run"
     elif [ -f expunged ] && $expunge && egrep "^$seq([         ]|\$)" expunged >/dev/null
     then
-        echo " - expunged"
+        status="not run"
+        results="expunged"
         rm -f $seq.out.bad
         echo "/^$seq\$/d" >>$tmp.expunged
     elif [ ! -f "$source_iotests/$seq" ]
     then
-        echo " - no such test?"
+        status="not run"
+        results="no such test?"
         echo "/^$seq\$/d" >>$tmp.expunged
     else
         # really going to try and run this one
         #
         rm -f $seq.out.bad
-        lasttime=$(sed -n -e "/^$seq /s/.* //p" <$TIMESTAMP_FILE)
-        if [ "X$lasttime" != X ]; then
-                printf %s " ${lasttime}s ..."
-        else
-                printf "        "        # prettier output with timestamps.
-        fi
         rm -f core $seq.notrun
         rm -f $seq.casenotrun
 
         start=$(_wallclock)
-        $timestamp && printf %s "        [$(date "+%T")]"
 
         if [ "$(head -n 1 "$source_iotests/$seq")" == "#!/usr/bin/env python" ]; then
             run_command="$PYTHON $seq"
@@ -781,26 +823,26 @@ do
                     $run_command >$tmp.out 2>&1)
         fi
         sts=$?
-        $timestamp && _timestamp
         stop=$(_wallclock)
 
         if [ -f core ]
         then
-            printf " [dumped core]"
             mv core $seq.core
+            status="fail"
+            results="[dumped core] $seq.core"
             err=true
         fi
 
         if [ -f $seq.notrun ]
         then
-            $timestamp || printf " [not run] "
-            $timestamp && echo " [not run]" && printf %s "        $seq -- "
-            cat $seq.notrun
-            notrun="$notrun $seq"
+            # overwrites timestamp output
+            status="not run"
+            results="$(cat $seq.notrun)"
         else
             if [ $sts -ne 0 ]
             then
-                printf %s " [failed, exit status $sts]"
+                status="fail"
+                results=$(printf %s "[failed, exit status $sts]")
                 err=true
             fi
 
@@ -821,22 +863,22 @@ do
 
             if [ ! -f "$reference" ]
             then
-                echo " - no qualified output"
+                status="fail"
+                reason="no qualified output"
                 err=true
             else
                 if diff -w "$reference" $tmp.out >/dev/null 2>&1
                 then
-                    echo ""
-                    if $err
-                    then
-                        :
-                    else
-                        echo "$seq $(expr $stop - $start)" >>$tmp.time
+                    if ! $err; then
+                        status="pass"
+                        thistime=$(expr $stop - $start)
+                        echo "$seq $thistime" >>$tmp.time
                     fi
                 else
-                    echo " - output mismatch (see $seq.out.bad)"
                     mv $tmp.out $seq.out.bad
-                    $diff -w "$reference" "$PWD"/$seq.out.bad
+                    status="fail"
+                    results="output mismatch (see $seq.out.bad)"
+                    printdiff=true
                     err=true
                 fi
             fi
@@ -850,13 +892,27 @@ do
 
     # come here for each test, except when $showme is true
     #
-    if $err
-    then
-        bad="$bad $seq"
-        n_bad=$(expr $n_bad + 1)
-        quick=false
-    fi
-    [ -f $seq.notrun ] || try=$(expr $try + 1)
+    _report_test_result $seq "$status" "$starttime" "$lasttime" "$thistime" "$results"
+    case "$status" in
+        "pass")
+            try=$(expr $try + 1)
+            ;;
+        "fail")
+            try=$(expr $try + 1)
+            if $makecheck; then
+                _full_env_details
+            fi
+            if $printdiff; then
+                $diff -w "$reference" "$PWD"/$seq.out.bad
+            fi
+            bad="$bad $seq"
+            n_bad=$(expr $n_bad + 1)
+            quick=false
+            ;;
+        "not run")
+            notrun="$notrun $seq"
+            ;;
+    esac
 
     seq="after_$seq"
 done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 52b7c16e15..f3b6d601b2 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -1,8 +1,21 @@
 #
 # QA groups control file
 # Defines test groups
+#
+# Some notes about the groups:
+#
 # - do not start group names with a digit
 #
+# - quick : Tests in this group should finish within some few seconds.
+#
+# - img : Tests in this group can be used to excercise the qemu-img tool.
+#
+# - auto : Tests in this group are used during "make check" and should be
+#   runnable in any case. That means they should run with every QEMU binary
+#   (also non-x86), with every QEMU configuration (i.e. must not fail if
+#   an optional feature is not compiled in - but reporting a "skip" is ok),
+#   and work all kind of host filesystems and users (e.g. "nobody" or "root").
+#
 
 #
 # test-group association ... one line per test
@@ -32,11 +45,11 @@
 023 rw auto
 024 rw backing auto quick
 025 rw auto quick
-026 rw blkdbg auto
+026 rw blkdbg
 027 rw auto quick
-028 rw backing auto quick
+028 rw backing quick
 029 rw auto quick
-030 rw auto backing
+030 rw backing
 031 rw auto quick
 032 rw auto quick
 033 rw auto quick
@@ -46,35 +59,35 @@
 037 rw auto backing quick
 038 rw auto backing quick
 039 rw auto quick
-040 rw auto
-041 rw auto backing
+040 rw
+041 rw backing
 042 rw auto quick
 043 rw auto backing
-044 rw auto
-045 rw auto quick
+044 rw
+045 rw quick
 046 rw auto aio quick
 047 rw auto quick
 048 img auto quick
 049 rw auto
 050 rw auto backing quick
-051 rw auto
+051 rw
 052 rw auto backing quick
 053 rw auto quick
 054 rw auto quick
-055 rw auto
-056 rw auto backing
-057 rw auto
-058 rw auto quick
+055 rw
+056 rw backing
+057 rw
+058 rw quick
 059 rw auto quick
 060 rw auto quick
 061 rw auto
 062 rw auto quick
 063 rw auto quick
 064 rw auto quick
-065 rw auto quick
+065 rw quick
 066 rw auto quick
-067 rw auto quick
-068 rw auto quick
+067 rw quick
+068 rw quick
 069 rw auto quick
 070 rw auto quick
 071 rw auto quick
@@ -91,18 +104,18 @@
 082 rw auto quick
 083 rw auto
 084 img auto quick
-085 rw auto
+085 rw
 086 rw auto quick
-087 rw auto quick
+087 rw quick
 088 rw auto quick
 089 rw auto quick
 090 rw auto quick
 091 rw auto migration
 092 rw auto quick
-093 auto
+093 throttle
 094 rw auto quick
-095 rw auto quick
-096 rw auto quick
+095 rw quick
+096 rw quick
 097 rw auto backing
 098 rw auto backing quick
 099 rw auto quick
@@ -118,60 +131,60 @@
 109 rw auto
 110 rw auto backing quick
 111 rw auto quick
-112 rw auto
+112 rw
 113 rw auto quick
 114 rw auto quick
-115 rw auto
+115 rw
 116 rw auto quick
 117 rw auto
-118 rw auto
+118 rw
 119 rw auto quick
 120 rw auto quick
-121 rw auto
+121 rw
 122 rw auto
 123 rw auto quick
-124 rw auto backing
-125 rw auto
+124 rw backing
+125 rw
 126 rw auto backing
-127 rw auto backing quick
+127 rw backing quick
 128 rw auto quick
-129 rw auto quick
+129 rw quick
 130 rw auto quick
 131 rw auto quick
-132 rw auto quick
+132 rw quick
 133 auto quick
 134 rw auto quick
 135 rw auto
-136 rw auto
+136 rw
 137 rw auto
 138 rw auto quick
-139 rw auto quick
+139 rw quick
 140 rw auto quick
 141 rw auto quick
 142 auto
 143 auto quick
-144 rw auto quick
-145 auto quick
+144 rw quick
+145 quick
 146 auto quick
-147 auto
-148 rw auto quick
-149 rw auto sudo
+147 img
+148 rw quick
+149 rw sudo
 150 rw auto quick
-151 rw auto
-152 rw auto quick
-153 rw auto quick
+151 rw
+152 rw quick
+153 rw quick
 154 rw auto backing quick
-155 rw auto
+155 rw
 156 rw auto quick
-157 auto
+157 quick
 158 rw auto quick
 159 rw auto quick
 160 rw auto quick
 161 rw auto quick
-162 auto quick
-163 rw auto
-165 rw auto quick
-169 rw auto quick migration
+162 quick
+163 rw
+165 rw quick
+169 rw quick migration
 170 rw auto quick
 171 rw auto quick
 172 auto
@@ -180,74 +193,77 @@
 175 auto quick
 176 rw auto backing
 177 rw auto quick
-178 auto
+178 img
 179 rw auto quick
 181 rw auto migration
-182 rw auto quick
-183 rw auto migration
+182 rw quick
+183 rw migration
 184 rw auto quick
-185 rw auto
+185 rw
 186 rw auto
 187 rw auto
-188 rw auto quick
-189 rw auto
+188 rw quick
+189 rw
 190 rw auto quick
 191 rw auto
 192 rw auto quick
-194 rw auto migration quick
+194 rw migration quick
 195 rw auto quick
-196 rw auto quick migration
+196 rw quick migration
 197 rw auto quick
-198 rw auto
-199 rw auto migration
-200 rw auto
+198 rw
+199 rw migration
+200 rw
 201 rw auto migration
-202 rw auto quick
-203 rw auto migration
-204 rw auto quick
-205 rw auto quick
-206 rw auto
+202 rw quick
+203 rw migration
+204 rw quick
+205 rw quick
+206 rw
 207 rw auto
-208 rw auto quick
-209 rw auto quick
+208 rw quick
+209 rw quick
 210 rw auto
 211 rw auto quick
 212 rw auto quick
 213 rw auto quick
 214 rw auto
 215 rw auto quick
-216 rw auto quick
+216 rw quick
 217 rw auto quick
-218 rw auto quick
-219 rw auto
+218 rw quick
+219 rw
 220 rw auto
 221 rw auto quick
-222 rw auto quick
-223 rw auto quick
-224 rw auto quick
+222 rw quick
+223 rw quick
+224 rw quick
 225 rw auto quick
 226 auto quick
-227 auto quick
-228 rw auto quick
+227 quick
+228 rw quick
 229 auto quick
 231 auto quick
-232 auto quick
+232 quick
 233 auto quick
-234 auto quick migration
-235 auto quick
-236 auto quick
+234 quick migration
+235 quick
+236 quick
 237 rw auto quick
-238 auto quick
+238 quick
 239 rw auto quick
-240 auto quick
+240 quick
 241 rw auto quick
-242 rw auto quick
+242 rw quick
 243 rw auto quick
 244 rw auto quick
-245 rw auto
-246 rw auto quick
-247 rw auto quick
-248 rw auto quick
+245 rw
+246 rw quick
+247 rw quick
+248 rw quick
 249 rw auto quick
+250 rw auto quick
 252 rw auto backing quick
 253 rw auto quick
+254 rw auto backing quick
+255 rw auto quick
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 7bde380d96..6bcddd8870 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -126,6 +126,11 @@ def qemu_img_pipe(*args):
         sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args))))
     return subp.communicate()[0]
 
+def qemu_img_log(*args):
+    result = qemu_img_pipe(*args)
+    log(result, filters=[filter_testfiles])
+    return result
+
 def img_info_log(filename, filter_path=None, imgopts=False, extra_args=[]):
     args = [ 'info' ]
     if imgopts:
@@ -533,7 +538,8 @@ class VM(qtest.QEMUQtestMachine):
         return result
 
     # Returns None on success, and an error string on failure
-    def run_job(self, job, auto_finalize=True, auto_dismiss=False):
+    def run_job(self, job, auto_finalize=True, auto_dismiss=False,
+                pre_finalize=None):
         error = None
         while True:
             for ev in self.get_qmp_events_filtered(wait=True):
@@ -546,6 +552,8 @@ class VM(qtest.QEMUQtestMachine):
                                 error = j['error']
                                 log('Job failed: %s' % (j['error']))
                     elif status == 'pending' and not auto_finalize:
+                        if pre_finalize:
+                            pre_finalize()
                         self.qmp_log('job-finalize', id=job)
                     elif status == 'concluded' and not auto_dismiss:
                         self.qmp_log('job-dismiss', id=job)
diff --git a/tests/qos-test.c b/tests/qos-test.c
index ae2fb5de1c..01b2a22c08 100644
--- a/tests/qos-test.c
+++ b/tests/qos-test.c
@@ -340,7 +340,8 @@ static void walk_path(QOSGraphNode *orig_path, int len)
     char **path_vec = g_new0(char *, (QOS_PATH_MAX_ELEMENT_SIZE * 2));
     int path_vec_size = 0;
 
-    char *after_cmd = NULL, *before_cmd = NULL, *after_device = NULL;
+    char *after_cmd, *before_cmd, *after_device;
+    GString *after_device_str = g_string_new("");
     char *node_name = orig_path->name, *path_str;
 
     GString *cmd_line = g_string_new("");
@@ -363,9 +364,8 @@ static void walk_path(QOSGraphNode *orig_path, int len)
         /* append node command line + previous edge command line */
         if (path->command_line && etype == QEDGE_CONSUMED_BY) {
             g_string_append(cmd_line, path->command_line);
-            if (after_device) {
-                g_string_append(cmd_line, after_device);
-            }
+            g_string_append(cmd_line, after_device_str->str);
+            g_string_truncate(after_device_str, 0);
         }
 
         path_vec[path_vec_size++] = qos_graph_edge_get_name(path->path_edge);
@@ -382,12 +382,15 @@ static void walk_path(QOSGraphNode *orig_path, int len)
         if (after_cmd) {
             g_string_append(cmd_line2, after_cmd);
         }
+        if (after_device) {
+            g_string_append(after_device_str, after_device);
+        }
     }
 
     path_vec[path_vec_size++] = NULL;
-    if (after_device) {
-        g_string_append(cmd_line, after_device);
-    }
+    g_string_append(cmd_line, after_device_str->str);
+    g_string_free(after_device_str, true);
+
     g_string_append(cmd_line, cmd_line2->str);
     g_string_free(cmd_line2, true);
 
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 002ded6a22..3ae0e29ad7 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -2,3 +2,4 @@
 # in the tests/venv Python virtual environment. For more info,
 # refer to: https://pip.pypa.io/en/stable/user_guide/#id1
 avocado-framework==68.0
+paramiko
diff --git a/tests/tcg/Makefile b/tests/tcg/Makefile
index 1cdd628e96..6fa63cc8d5 100644
--- a/tests/tcg/Makefile
+++ b/tests/tcg/Makefile
@@ -96,6 +96,7 @@ else
 # build options for bare programs are usually pretty different. They
 # are expected to provide their own build recipes.
 -include $(SRC_PATH)/tests/tcg/minilib/Makefile.target
+-include $(SRC_PATH)/tests/tcg/multiarch/system/Makefile.softmmu-target
 -include $(SRC_PATH)/tests/tcg/$(TARGET_BASE_ARCH)/Makefile.softmmu-target
 ifneq ($(TARGET_BASE_ARCH),$(TARGET_NAME))
 -include $(SRC_PATH)/tests/tcg/$(TARGET_NAME)/Makefile.softmmu-target
diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target
new file mode 100644
index 0000000000..2e560e4d08
--- /dev/null
+++ b/tests/tcg/aarch64/Makefile.softmmu-target
@@ -0,0 +1,34 @@
+#
+# Aarch64 system tests
+#
+
+AARCH64_SYSTEM_SRC=$(SRC_PATH)/tests/tcg/aarch64/system
+VPATH+=$(AARCH64_SYSTEM_SRC)
+
+# These objects provide the basic boot code and helper functions for all tests
+CRT_OBJS=boot.o
+
+AARCH64_TEST_SRCS=$(wildcard $(AARCH64_SYSTEM_SRC)/*.c)
+AARCH64_TESTS = $(patsubst $(AARCH64_SYSTEM_SRC)/%.c, %, $(AARCH64_TEST_SRCS))
+
+CRT_PATH=$(AARCH64_SYSTEM_SRC)
+LINK_SCRIPT=$(AARCH64_SYSTEM_SRC)/kernel.ld
+LDFLAGS=-Wl,-T$(LINK_SCRIPT)
+TESTS+=$(AARCH64_TESTS) $(MULTIARCH_TESTS)
+CFLAGS+=-nostdlib -ggdb -O0 $(MINILIB_INC)
+LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc
+
+# building head blobs
+.PRECIOUS: $(CRT_OBJS)
+
+%.o: $(CRT_PATH)/%.S
+	$(CC) $(CFLAGS) -x assembler-with-cpp -c $< -o $@
+
+# Build and link the tests
+%: %.c $(LINK_SCRIPT) $(CRT_OBJS) $(MINILIB_OBJS)
+	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
+
+memory: CFLAGS+=-DCHECK_UNALIGNED=1
+
+# Running
+QEMU_OPTS+=-M virt -cpu max -display none -semihosting-config enable=on,target=native,chardev=output -kernel
diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S
new file mode 100644
index 0000000000..b14e94f332
--- /dev/null
+++ b/tests/tcg/aarch64/system/boot.S
@@ -0,0 +1,239 @@
+/*
+ * Minimal AArch64 system boot code.
+ *
+ * Copyright Linaro Ltd 2019
+ *
+ * Loosely based on the newlib/libgloss setup stubs. Using semihosting
+ * for serial output and exit functions.
+ */
+
+/*
+ * Semihosting interface on ARM AArch64
+ * See "Semihosting for AArch32 and AArch64 Relase 2.0" by ARM
+ * w0 - semihosting call number
+ * x1 - semihosting parameter
+ */
+#define semihosting_call hlt 0xf000
+#define SYS_WRITEC	0x03	/* character to debug channel */
+#define SYS_WRITE0	0x04	/* string to debug channel */
+#define SYS_EXIT	0x18
+
+	.align	12
+
+	.macro	ventry	label
+	.align	7
+	b	\label
+	.endm
+
+vector_table:
+	/* Current EL with SP0.	 */
+	ventry	curr_sp0_sync		/* Synchronous	*/
+	ventry	curr_sp0_irq		/* Irq/vIRQ  */
+	ventry	curr_sp0_fiq		/* Fiq/vFIQ  */
+	ventry	curr_sp0_serror		/* SError/VSError  */
+
+	/* Current EL with SPx.	 */
+	ventry	curr_spx_sync		/* Synchronous	*/
+	ventry	curr_spx_irq		/* IRQ/vIRQ  */
+	ventry	curr_spx_fiq		/* FIQ/vFIQ  */
+	ventry	curr_spx_serror		/* SError/VSError  */
+
+	/* Lower EL using AArch64.  */
+	ventry	lower_a64_sync		/* Synchronous	*/
+	ventry	lower_a64_irq		/* IRQ/vIRQ  */
+	ventry	lower_a64_fiq		/* FIQ/vFIQ  */
+	ventry	lower_a64_serror	/* SError/VSError  */
+
+	/* Lower EL using AArch32.  */
+	ventry	lower_a32_sync		/* Synchronous	*/
+	ventry	lower_a32_irq		/* IRQ/vIRQ  */
+	ventry	lower_a32_fiq		/* FIQ/vFIQ  */
+	ventry	lower_a32_serror	/* SError/VSError  */
+
+	.text
+	.align 4
+
+	/* Common vector handling for now */
+curr_sp0_sync:
+curr_sp0_irq:
+curr_sp0_fiq:
+curr_sp0_serror:
+curr_spx_sync:
+curr_spx_irq:
+curr_spx_fiq:
+curr_spx_serror:
+lower_a64_sync:
+lower_a64_irq:
+lower_a64_fiq:
+lower_a64_serror:
+lower_a32_sync:
+lower_a32_irq:
+lower_a32_fiq:
+lower_a32_serror:
+	mov	x0, SYS_WRITE0
+	adr	x1, .error
+	semihosting_call
+	mov	x0, SYS_EXIT
+	mov	x1, 1
+	semihosting_call
+	/* never returns */
+
+	.section .rodata
+.error:
+	.string "Terminated by exception.\n"
+
+	.text
+	.align 4
+	.global __start
+__start:
+	/* Installs a table of exception vectors to catch and handle all
+	   exceptions by terminating the process with a diagnostic.  */
+	adr	x0, vector_table
+	msr	vbar_el1, x0
+
+	/* Page table setup (identity mapping). */
+	adrp	x0, ttb
+	add	x0, x0, :lo12:ttb
+	msr	ttbr0_el1, x0
+
+	/*
+	 * Setup a flat address mapping page-tables. Stage one simply
+	 * maps RAM to the first Gb. The stage2 tables have two 2mb
+	 * translation block entries covering a series of adjacent
+	 * 4k pages.
+	*/
+
+	/* Stage 1 entry: indexed by IA[38:30] */
+	adr	x1, .				/* phys address */
+	bic	x1, x1, #(1 << 30) - 1		/* 1GB alignment*/
+	add	x2, x0, x1, lsr #(30 - 3)	/* offset in l1 page table */
+
+	/* point to stage 2 table [47:12] */
+	adrp	x0, ttb_stage2
+	orr 	x1, x0, #3 			/* ptr to stage 2 */
+	str	x1, [x2]
+
+	/* Stage 2 entries: indexed by IA[29:21] */
+	ldr	x5, =(((1 << 9) - 1) << 21)
+
+	/* First block: .text/RO/execute enabled */
+	adr	x1, .				/* phys address */
+	bic	x1, x1, #(1 << 21) - 1		/* 2mb block alignment	*/
+	and	x4, x1, x5			/* IA[29:21] */
+	add	x2, x0, x4, lsr #(21 - 3)	/* offset in l2 page table */
+	ldr	x3, =0x401			/* attr(AF, block) */
+	orr	x1, x1, x3
+	str	x1, [x2]			/* 1st 2mb (.text & rodata) */
+
+	/* Second block: .data/RW/no execute */
+	adrp	x1, .data
+	add	x1, x1, :lo12:.data
+	bic	x1, x1, #(1 << 21) - 1		/* 2mb block alignment */
+	and	x4, x1, x5			/* IA[29:21] */
+	add	x2, x0, x4, lsr #(21 - 3)	/* offset in l2 page table */
+	ldr	x3, =(3 << 53) | 0x401		/* attr(AF, NX, block) */
+	orr	x1, x1, x3
+	str	x1, [x2]			/* 2nd 2mb (.data & .bss)*/
+
+	/* Setup/enable the MMU.  */
+
+	/*
+	 * TCR_EL1 - Translation Control Registers
+	 *
+	 * IPS[34:32] = 40-bit PA, 1TB
+	 * TG0[14:15] = b00 => 4kb granuale
+	 * ORGN0[11:10] = Outer: Normal, WB Read-Alloc No Write-Alloc Cacheable
+	 * IRGN0[9:8] = Inner: Normal, WB Read-Alloc No Write-Alloc Cacheable
+	 * T0SZ[5:0]  = 2^(64 - 25)
+	 *
+	 * The size of T0SZ controls what the initial lookup level. It
+	 * would be nice to start at level 2 but unfortunatly for a
+	 * flat-mapping on the virt machine we need to handle IA's
+	 * with at least 1gb range to see RAM. So we start with a
+	 * level 1 lookup.
+	 */
+	ldr	x0, = (2 << 32) | 25 | (3 << 10) | (3 << 8)
+	msr	tcr_el1, x0
+
+	mov	x0, #0xee			/* Inner/outer cacheable WB */
+	msr	mair_el1, x0
+	isb
+
+	/*
+	 * SCTLR_EL1 - System Control Register
+	 *
+	 * WXN[19] = 0 = no effect, Write does not imply XN (execute never)
+	 * I[12] = Instruction cachability control
+	 * SA[3] = SP alignment check
+	 * C[2] = Data cachability control
+	 * M[0] = 1, enable stage 1 address translation for EL0/1
+	 */
+	mrs	x0, sctlr_el1
+	ldr	x1, =0x100d			/* bits I(12) SA(3) C(2) M(0) */
+	bic	x0, x0, #(1 << 1)		/* clear bit A(1) */
+	bic	x0, x0, #(1 << 19)		/* clear WXN */
+	orr	x0, x0, x1			/* set bits */
+
+	dsb	sy
+	msr	sctlr_el1, x0
+	isb
+
+	/*
+	 * Enable FP registers. The standard C pre-amble will be
+	 * saving these and A-profile compilers will use AdvSIMD
+	 * registers unless we tell it not to.
+	*/
+	mrs	x0, cpacr_el1
+	orr	x0, x0, #(3 << 20)
+	msr	cpacr_el1, x0
+
+	/* Setup some stack space and enter the test code.
+	 * Assume everthing except the return value is garbage when we
+	 * return, we won't need it.
+	 */
+	adrp	x0, stack_end
+	add	x0, x0, :lo12:stack_end
+	mov	sp, x0
+	bl	main
+
+	/* pass return value to sys exit */
+	mov    x1, x0
+	ldr    x0, =0x20026 /* ADP_Stopped_ApplicationExit */
+	stp    x0, x1, [sp, #-16]!
+	mov    x1, sp
+	mov    x0, SYS_EXIT
+	semihosting_call
+	/* never returns */
+
+	/*
+	 * Helper Functions
+	*/
+
+	/* Output a single character to serial port */
+	.global __sys_outc
+__sys_outc:
+	stp x0, x1, [sp, #-16]!
+	/* pass address of c on stack */
+	mov x1, sp
+	mov x0, SYS_WRITEC
+	semihosting_call
+	ldp x0, x1, [sp], #16
+	ret
+
+	.data
+	.align	12
+
+	/* Translation table
+	 * @4k granuale: 9 bit lookup, 512 entries
+	*/
+ttb:
+	.space	4096, 0
+
+	.align	12
+ttb_stage2:
+	.space	4096, 0
+
+	.align	12
+stack:
+	.space 65536, 0
+stack_end:
diff --git a/tests/tcg/aarch64/system/kernel.ld b/tests/tcg/aarch64/system/kernel.ld
new file mode 100644
index 0000000000..7b3a76dcbf
--- /dev/null
+++ b/tests/tcg/aarch64/system/kernel.ld
@@ -0,0 +1,24 @@
+ENTRY(__start)
+
+SECTIONS
+{
+    /* virt machine, RAM starts at 1gb */
+    . = (1 << 30);
+    .text : {
+        *(.text)
+    }
+    .rodata : {
+        *(.rodata)
+    }
+    /* align r/w section to next 2mb */
+    . = ALIGN(1 << 21);
+    .data : {
+        *(.data)
+    }
+    .bss : {
+        *(.bss)
+    }
+    /DISCARD/ : {
+        *(.ARM.attributes)
+    }
+}
diff --git a/tests/tcg/alpha/Makefile.softmmu-target b/tests/tcg/alpha/Makefile.softmmu-target
new file mode 100644
index 0000000000..3c0f34cc69
--- /dev/null
+++ b/tests/tcg/alpha/Makefile.softmmu-target
@@ -0,0 +1,34 @@
+#
+# Alpha system tests
+#
+
+ALPHA_SYSTEM_SRC=$(SRC_PATH)/tests/tcg/alpha/system
+VPATH+=$(ALPHA_SYSTEM_SRC)
+
+# These objects provide the basic boot code and helper functions for all tests
+CRT_OBJS=boot.o
+
+ALPHA_TEST_SRCS=$(wildcard $(ALPHA_SYSTEM_SRC)/*.c)
+ALPHA_TESTS = $(patsubst $(ALPHA_SYSTEM_SRC)/%.c, %, $(ALPHA_TEST_SRCS))
+
+CRT_PATH=$(ALPHA_SYSTEM_SRC)
+LINK_SCRIPT=$(ALPHA_SYSTEM_SRC)/kernel.ld
+LDFLAGS=-Wl,-T$(LINK_SCRIPT)
+TESTS+=$(ALPHA_TESTS) $(MULTIARCH_TESTS)
+CFLAGS+=-nostdlib -g -O1 -mcpu=ev6 $(MINILIB_INC)
+LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc
+
+# building head blobs
+.PRECIOUS: $(CRT_OBJS)
+
+%.o: $(CRT_PATH)/%.S
+	$(CC) $(CFLAGS) -x assembler-with-cpp -c $< -o $@
+
+# Build and link the tests
+%: %.c $(LINK_SCRIPT) $(CRT_OBJS) $(MINILIB_OBJS)
+	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
+
+memory: CFLAGS+=-DCHECK_UNALIGNED=0
+
+# Running
+QEMU_OPTS+=-serial chardev:output -kernel
diff --git a/tests/tcg/alpha/system/boot.S b/tests/tcg/alpha/system/boot.S
new file mode 100644
index 0000000000..9791b1ef7c
--- /dev/null
+++ b/tests/tcg/alpha/system/boot.S
@@ -0,0 +1,511 @@
+/*
+ * Minimal Alpha system boot code.
+ *
+ * Copyright Linaro Ltd 2019
+ */
+
+	.set	noat
+	.set	nomacro
+	.arch	ev6
+	.text
+
+.macro load_pci_io reg
+	/* For typhoon, this is
+	 *   0xfffffc0000000000  -- kseg identity map
+	 * +      0x10000000000  -- typhoon pio base
+	 * +        0x1fc000000  -- typhoon pchip0 pci base
+	 * = 0xfffffd01fc000000
+	 */
+	ldah	\reg, -3		/* ff..fd0000 */
+	lda	\reg, 0x1fc(\reg)	/* ff..fd01fc */
+	sll	\reg, 24, \reg
+.endm
+
+#define com1Rbr 0x3f8
+#define com1Thr 0x3f8
+#define com1Ier 0x3f9
+#define com1Iir 0x3fa
+#define com1Lcr 0x3fb
+#define com1Mcr 0x3fc
+#define com1Lsr 0x3fd
+#define com1Msr 0x3fe
+#define com1Scr 0x3ff
+#define com1Dll 0x3f8
+#define com1Dlm 0x3f9
+
+#define PAL_halt    0
+#define PAL_wrent  52
+#define PAL_wrkgp  55
+
+	.text
+	.p2align 4
+	.globl	_start
+	.ent	_start
+_start:
+	br	$gp, .+4
+	ldah	$gp, 0($gp)		!gpdisp!1
+	lda	$gp, 0($gp)		!gpdisp!1
+
+	ldah	$sp, $stack_end($gp)	!gprelhigh
+	lda	$sp, $stack_end($sp)	!gprellow
+
+	/* Install kernel gp for exception handlers.  */
+	mov	$gp, $16
+	call_pal PAL_wrkgp
+
+	/* Install exception handlers.  */
+	ldah	$16, entInt($gp)	!gprelhigh
+	lda	$16, entInt($16)	!gprellow
+	lda	$17, 0
+	call_pal PAL_wrent
+
+	ldah	$16, entArith($gp)	!gprelhigh
+	lda	$16, entArith($16)	!gprellow
+	lda	$17, 1
+	call_pal PAL_wrent
+
+	ldah	$16, entMM($gp)		!gprelhigh
+	lda	$16, entMM($16)		!gprellow
+	lda	$17, 2
+	call_pal PAL_wrent
+
+	ldah	$16, entIF($gp)		!gprelhigh
+	lda	$16, entIF($16)		!gprellow
+	lda	$17, 3
+	call_pal PAL_wrent
+
+	ldah	$16, entUna($gp)	!gprelhigh
+	lda	$16, entUna($16)	!gprellow
+	lda	$17, 4
+	call_pal PAL_wrent
+
+	ldah	$16, entSys($gp)	!gprelhigh
+	lda	$16, entSys($16)	!gprellow
+	lda	$17, 5
+	call_pal PAL_wrent
+
+	/*
+	 * Initialize COM1.
+	 */
+	load_pci_io $1
+	lda	$2, 0x87		/* outb(0x87, com1Lcr); */
+	stb	$2, com1Lcr($1)
+	stb	$31, com1Dlm($1)	/* outb(0, com1Dlm); */
+	lda	$2, 3			/* baudconst 3 => 56000 */
+	stb	$2, com1Dll($1)		/* outb(baudconst, com1Dll); */
+	lda	$2, 0x07
+	stb	$2, com1Lcr($1)		/* outb(0x07, com1Lcr) */
+	lda	$2, 0x0f
+	stb	$2, com1Mcr($1)		/* outb(0x0f, com1Mcr) */
+
+	bsr	$26, main		!samegp
+
+	/* fall through to _exit */
+	.end	_start
+
+	.globl	_exit
+	.ent	_exit
+_exit:
+	.frame	$sp, 0, $26, 0
+	.prologue 0
+
+	/* We cannot return an error code.  */
+	call_pal PAL_halt
+	.end	_exit
+
+/*
+ * We have received an exception that we don't handle.  Log and exit.
+ */
+	.ent	log_exit
+log_exit:
+entInt:
+entArith:
+entMM:
+entIF:
+entUna:
+entSys:
+	ldah	$16, $errormsg($gp)	!gprelhigh
+	lda	$16, $errormsg($16)	!gprellow
+	bsr	$26, __sys_outs		!samegp
+	bsr	$26, _exit		!samegp
+	.end	log_exit
+
+	.section .rodata
+$errormsg:
+	.string "Terminated by exception.\n"
+	.previous
+
+	/*
+	 * Helper Functions
+	 */
+
+	/* Output a single character to serial port */
+	.global __sys_outc
+	.ent	__sys_outc
+__sys_outc:
+	.frame	$sp, 0, $26, 0
+	.prologue 0
+
+	load_pci_io $1
+
+	/* 
+	 * while ((inb(com1Lsr) & 0x20) == 0)
+	 *       continue;
+	 */
+1:	ldbu	$0, com1Lsr($1)
+	and	$0, 0x20, $0
+	beq	$0, 1b
+
+	/* outb(c, com1Thr); */
+	stb	$16, com1Thr($1)
+	ret
+	.end	__sys_outc
+
+	/* Output a nul-terminated string to serial port */
+	.global	__sys_outs
+	.ent	__sys_outs
+__sys_outs:
+	.frame	$sp, 0, $26, 0
+	.prologue 0
+
+	load_pci_io $1
+
+	ldbu	$2, 0($16)
+	beq	$2, 9f
+
+	/* 
+	 * while ((inb(com1Lsr) & 0x20) == 0)
+	 *       continue;
+	 */
+1:	ldbu	$0, com1Lsr($1)
+	and	$0, 0x20, $0
+	beq	$0, 1b
+
+	/* outb(c, com1Thr); */
+	stb	$2, com1Thr($1)
+
+	addq	$16, 1, $16
+	ldbu	$2, 0($16)
+	bne	$2, 1b
+
+9:	ret
+	.end	__sys_outs
+
+/*
+ * Division routines that are normally in libc.
+ *
+ * These do not follow the C calling convention.  Arguments are in $24+$25,
+ * the result is in $27.  Register $28 may be clobbered; everything else
+ * must be saved.
+ *
+ * We store the remainder in $28, so that we can share code.
+ *
+ * We do not signal divide by zero.
+ */
+
+/*
+ * Unsigned 64-bit division.
+ */
+
+	.globl	__divqu
+	.ent	__divqu
+__divqu:
+	.frame	$sp, 48, $23
+	subq	$sp, 48, $sp
+	stq	$0, 0($sp)
+	stq	$1, 8($sp)
+	stq	$2, 16($sp)
+	stq	$3, 24($sp)
+	stq	$4, 32($sp)
+	.prologue 0
+
+#define mask     $0
+#define divisor  $1
+#define compare  $2
+#define tmp1     $3
+#define tmp2     $4
+#define quotient $27
+#define modulus  $28
+
+	mov	$24, modulus
+	mov	$25, divisor
+	mov	$31, quotient
+	mov	1, mask
+	beq	$25, 9f
+
+	/* Shift left until divisor >= modulus.  */
+1:	cmpult	divisor, modulus, compare
+	blt	divisor, 2f
+	addq	divisor, divisor, divisor
+	addq	mask, mask, mask
+	bne	compare, 1b
+
+2:	addq	quotient, mask, tmp2
+	srl	mask, 1, mask
+	cmpule	divisor, modulus, compare
+	subq	modulus, divisor, tmp1
+	cmovne	compare, tmp2, quotient
+	srl	divisor, 1, divisor
+	cmovne	compare, tmp1, modulus
+	bne	mask, 2b
+
+9:	ldq	$0, 0($sp)
+	ldq	$1, 8($sp)
+	ldq	$2, 16($sp)
+	ldq	$3, 24($sp)
+	ldq	$4, 32($sp)
+	addq	$sp, 48, $sp
+	ret	$31, ($23), 1
+
+#undef mask
+#undef divisor
+#undef compare
+#undef tmp1
+#undef tmp2
+#undef quotient
+#undef modulus
+
+	.end	__divqu
+
+/*
+ * Unsigned 64-bit remainder.
+ * Note that __divqu above leaves the result in $28.
+ */
+
+	.globl	__remqu
+	.ent	__remqu
+__remqu:
+	.frame	$sp, 16, $23
+	subq	$sp, 16, $sp
+	stq	$23, 0($sp)
+	.prologue 0
+
+	bsr	$23, __divqu
+
+	ldq	$23, 0($sp)
+	mov	$28, $27
+	addq	$sp, 16, $sp
+	ret	$31, ($23), 1
+	.end	__remqu
+
+/*
+ * Signed 64-bit division.
+ */
+
+	.globl	__divqs
+	.ent	__divqs
+__divqs:
+	.prologue 0
+
+	/* Common case: both arguments are positive.  */
+	bis	$24, $25, $28
+	bge	$28, __divqu
+
+	/* At least one argument is negative.  */
+	subq	$sp, 32, $sp
+	stq	$23, 0($sp)
+	stq	$24, 8($sp)
+	stq	$25, 16($sp)
+
+	/* Compute absolute values.  */
+	subq	$31, $24, $28
+	cmovlt	$24, $28, $24
+	subq	$31, $25, $28
+	cmovlt	$25, $28, $25
+
+	bsr	$23, __divqu
+
+	ldq	$24, 8($sp)
+	ldq	$25, 16($sp)
+
+	/* -a / b = a / -b = -(a / b) */
+	subq	$31, $27, $23
+	xor	$24, $25, $28
+	cmovlt	$28, $23, $27
+
+	ldq	$23, 0($sp)
+	addq	$sp, 32, $sp
+	ret	$31, ($23), 1
+	.end	__divqs
+
+/*
+ * Signed 64-bit remainder.
+ */
+
+	.globl	__remqs
+	.ent	__remqs
+__remqs:
+	.prologue 0
+
+	/* Common case: both arguments are positive.  */
+	bis	$24, $25, $28
+	bge	$28, __remqu
+
+	/* At least one argument is negative.  */
+	subq	$sp, 32, $sp
+	stq	$23, 0($sp)
+	stq	$24, 8($sp)
+	stq	$25, 16($sp)
+
+	/* Compute absolute values.  */
+	subq	$31, $24, $28
+	cmovlt	$24, $28, $24
+	subq	$31, $25, $28
+	cmovlt	$25, $28, $25
+
+	bsr	$23, __divqu
+
+	ldq	$23, 0($sp)
+	ldq	$24, 8($sp)
+	ldq	$25, 16($sp)
+
+	/* -a % b = -(a % b); a % -b = a % b.  */
+	subq	$31, $28, $27
+	cmovge	$24, $28, $27
+
+	addq	$sp, 32, $sp
+	ret	$31, ($23), 1
+	.end	__remqs
+
+/*
+ * Unsigned 32-bit division.
+ */
+
+	.globl	__divlu
+	.ent	__divlu
+__divlu:
+	.frame	$sp, 32, $23
+	subq	$sp, 32, $sp
+	stq	$23, 0($sp)
+	stq	$24, 8($sp)
+	stq	$25, 16($sp)
+	.prologue 0
+
+	/* Zero extend and use the 64-bit routine.  */
+	zap	$24, 0xf0, $24
+	zap	$25, 0xf0, $25
+	bsr	$23, __divqu
+
+	addl	$27, 0, $27
+	ldq	$23, 0($sp)
+	ldq	$24, 8($sp)
+	ldq	$25, 16($sp)
+	addq	$sp, 32, $sp
+	ret	$31, ($23), 1
+	.end	__divlu
+
+/*
+ * Unsigned 32-bit remainder.
+ */
+
+	.globl	__remlu
+	.ent	__remlu
+__remlu:
+	.frame	$sp, 32, $23
+	subq	$sp, 32, $sp
+	stq	$23, 0($sp)
+	stq	$24, 8($sp)
+	stq	$25, 16($sp)
+	.prologue 0
+
+	/* Zero extend and use the 64-bit routine.  */
+	zap	$24, 0xf0, $24
+	zap	$25, 0xf0, $25
+	bsr	$23, __divqu
+
+	/* Recall that the remainder is returned in $28.  */
+	addl	$28, 0, $27
+	ldq	$23, 0($sp)
+	ldq	$24, 8($sp)
+	ldq	$25, 16($sp)
+	addq	$sp, 32, $sp
+	ret	$31, ($23), 1
+	.end	__remlu
+
+/*
+ * Signed 32-bit division.
+ */
+
+	.globl	__divls
+	.ent	__divls
+__divls:
+	.frame	$sp, 32, $23
+	subq	$sp, 32, $sp
+	stq	$23, 0($sp)
+	stq	$24, 8($sp)
+	stq	$25, 16($sp)
+	.prologue 0
+
+	/* Sign extend.  */
+	addl	$24, 0, $24
+	addl	$25, 0, $25
+
+	/* Compute absolute values.  */
+	subq	$31, $24, $28
+	cmovlt	$24, $28, $24
+	subq	$31, $25, $28
+	cmovlt	$25, $28, $25
+
+	bsr	$23, __divqu
+
+	ldq	$24, 8($sp)
+	ldq	$25, 16($sp)
+
+	/* Negate the unsigned result, if necessary.  */
+	xor	$24, $25, $28
+	subl	$31, $27, $23
+	addl	$27, 0, $27
+	addl	$28, 0, $28
+	cmovlt	$28, $23, $27
+
+	ldq	$23, 0($sp)
+	addq	$sp, 32, $sp
+	ret	$31, ($23), 1
+	.end	__divls
+
+/*
+ * Signed 32-bit remainder.
+ */
+
+	.globl	__remls
+	.ent	__remls
+__remls:
+	.frame	$sp, 32, $23
+	subq	$sp, 32, $sp
+	stq	$23, 0($sp)
+	stq	$24, 8($sp)
+	stq	$25, 16($sp)
+	.prologue 0
+
+	/* Sign extend.  */
+	addl	$24, 0, $24
+	addl	$25, 0, $25
+
+	/* Compute absolute values.  */
+	subq	$31, $24, $28
+	cmovlt	$24, $28, $24
+	subq	$31, $25, $28
+	cmovlt	$25, $28, $25
+
+	bsr	$23, __divqu
+
+	ldq	$23, 0($sp)
+	ldq	$24, 8($sp)
+	ldq	$25, 16($sp)
+
+	/* Negate the unsigned result, if necessary.  */
+	subl	$31, $28, $27
+	addl	$28, 0, $28
+	cmovge	$24, $28, $27
+
+	addq	$sp, 32, $sp
+	ret	$31, ($23), 1
+	.end	__remls
+
+	.data
+	.p2align 4
+stack:
+	.skip	65536
+$stack_end:
+	.type	stack,@object
+	.size	stack, . - stack
diff --git a/tests/tcg/alpha/system/kernel.ld b/tests/tcg/alpha/system/kernel.ld
new file mode 100644
index 0000000000..d2ac6ecfeb
--- /dev/null
+++ b/tests/tcg/alpha/system/kernel.ld
@@ -0,0 +1,30 @@
+ENTRY(_start)
+
+SECTIONS
+{
+    /* Linux kernel legacy start address.  */
+    . = 0xfffffc0000310000;
+    _text = .;
+    .text : {
+        *(.text)
+    }
+    .rodata : {
+        *(.rodata)
+    }
+    _etext = .;
+
+    . = ALIGN(8192);
+    _data = .;
+    .got : {
+        *(.got)
+    }
+    .data : {
+	*(.sdata)
+        *(.data)
+    }
+    _edata = .;
+    .bss : {
+        *(.bss)
+    }
+    _end = .;
+}
diff --git a/tests/tcg/i386/Makefile.softmmu-target b/tests/tcg/i386/Makefile.softmmu-target
index 53c9c5ece0..e1f98177aa 100644
--- a/tests/tcg/i386/Makefile.softmmu-target
+++ b/tests/tcg/i386/Makefile.softmmu-target
@@ -27,7 +27,7 @@ CFLAGS+=-m32
 LINK_SCRIPT=$(I386_SYSTEM_SRC)/kernel.ld
 LDFLAGS=-Wl,-T$(LINK_SCRIPT) -Wl,-melf_i386
 # FIXME: move to common once x86_64 is bootstrapped
-TESTS+=$(X86_TESTS)
+TESTS+=$(X86_TESTS) $(MULTIARCH_TESTS)
 endif
 CFLAGS+=-nostdlib -ggdb -O0 $(MINILIB_INC)
 LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc
@@ -42,5 +42,7 @@ LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc
 %: %.c $(LINK_SCRIPT) $(CRT_OBJS) $(MINILIB_OBJS)
 	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
 
+memory: CFLAGS+=-DCHECK_UNALIGNED=1
+
 # Running
 QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel
diff --git a/tests/tcg/i386/system/memory.c b/tests/tcg/i386/system/memory.c
deleted file mode 100644
index a7a0a8e978..0000000000
--- a/tests/tcg/i386/system/memory.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Memory Test
- *
- * This is intended to test the softmmu code and ensure we properly
- * behave across normal and unaligned accesses across several pages.
- * We are not replicating memory tests for stuck bits and other
- * hardware level failures but looking for issues with different size
- * accesses when:
-
- *
- */
-
-#include <inttypes.h>
-#include <minilib.h>
-
-#define TEST_SIZE (4096 * 4)  /* 4 pages */
-
-static uint8_t test_data[TEST_SIZE];
-
-static void pdot(int count)
-{
-    if (count % 128 == 0) {
-        ml_printf(".");
-    }
-}
-
-
-/*
- * Fill the data with ascending value bytes. As x86 is a LE machine we
- * write in ascending order and then read and high byte should either
- * be zero or higher than the lower bytes.
- */
-
-static void init_test_data_u8(void)
-{
-    uint8_t count = 0, *ptr = &test_data[0];
-    int i;
-
-    ml_printf("Filling test area with u8:");
-    for (i = 0; i < TEST_SIZE; i++) {
-        *ptr++ = count++;
-        pdot(i);
-    }
-    ml_printf("done\n");
-}
-
-static void init_test_data_u16(int offset)
-{
-    uint8_t count = 0;
-    uint16_t word, *ptr = (uint16_t *) &test_data[0];
-    const int max = (TEST_SIZE - offset) / sizeof(word);
-    int i;
-
-    ml_printf("Filling test area with u16 (offset %d):", offset);
-
-    /* Leading zeros */
-    for (i = 0; i < offset; i++) {
-        *ptr = 0;
-    }
-
-    ptr = (uint16_t *) &test_data[offset];
-    for (i = 0; i < max; i++) {
-        uint8_t high, low;
-        low = count++;
-        high = count++;
-        word = (high << 8) | low;
-        *ptr++ = word;
-        pdot(i);
-    }
-    ml_printf("done\n");
-}
-
-static void init_test_data_u32(int offset)
-{
-    uint8_t count = 0;
-    uint32_t word, *ptr = (uint32_t *) &test_data[0];
-    const int max = (TEST_SIZE - offset) / sizeof(word);
-    int i;
-
-    ml_printf("Filling test area with u32 (offset %d):", offset);
-
-    /* Leading zeros */
-    for (i = 0; i < offset; i++) {
-        *ptr = 0;
-    }
-
-    ptr = (uint32_t *) &test_data[offset];
-    for (i = 0; i < max; i++) {
-        uint8_t b1, b2, b3, b4;
-        b4 = count++;
-        b3 = count++;
-        b2 = count++;
-        b1 = count++;
-        word = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
-        *ptr++ = word;
-        pdot(i);
-    }
-    ml_printf("done\n");
-}
-
-
-static int read_test_data_u16(int offset)
-{
-    uint16_t word, *ptr = (uint16_t *)&test_data[offset];
-    int i;
-    const int max = (TEST_SIZE - offset) / sizeof(word);
-
-    ml_printf("Reading u16 from %#lx (offset %d):", ptr, offset);
-
-    for (i = 0; i < max; i++) {
-        uint8_t high, low;
-        word = *ptr++;
-        high = (word >> 8) & 0xff;
-        low = word & 0xff;
-        if (high < low && high != 0) {
-            ml_printf("Error %d < %d\n", high, low);
-            return 1;
-        } else {
-            pdot(i);
-        }
-
-    }
-    ml_printf("done\n");
-    return 0;
-}
-
-static int read_test_data_u32(int offset)
-{
-    uint32_t word, *ptr = (uint32_t *)&test_data[offset];
-    int i;
-    const int max = (TEST_SIZE - offset) / sizeof(word);
-
-    ml_printf("Reading u32 from %#lx (offset %d):", ptr, offset);
-
-    for (i = 0; i < max; i++) {
-        uint8_t b1, b2, b3, b4;
-        word = *ptr++;
-
-        b1 = word >> 24 & 0xff;
-        b2 = word >> 16 & 0xff;
-        b3 = word >> 8 & 0xff;
-        b4 = word & 0xff;
-
-        if ((b1 < b2 && b1 != 0) ||
-            (b2 < b3 && b2 != 0) ||
-            (b3 < b4 && b3 != 0)) {
-            ml_printf("Error %d, %d, %d, %d", b1, b2, b3, b4);
-            return 2;
-        } else {
-            pdot(i);
-        }
-    }
-    ml_printf("done\n");
-    return 0;
-}
-
-static int read_test_data_u64(int offset)
-{
-    uint64_t word, *ptr = (uint64_t *)&test_data[offset];
-    int i;
-    const int max = (TEST_SIZE - offset) / sizeof(word);
-
-    ml_printf("Reading u64 from %#lx (offset %d):", ptr, offset);
-
-    for (i = 0; i < max; i++) {
-        uint8_t b1, b2, b3, b4, b5, b6, b7, b8;
-        word = *ptr++;
-
-        b1 = ((uint64_t) (word >> 56)) & 0xff;
-        b2 = ((uint64_t) (word >> 48)) & 0xff;
-        b3 = ((uint64_t) (word >> 40)) & 0xff;
-        b4 = (word >> 32) & 0xff;
-        b5 = (word >> 24) & 0xff;
-        b6 = (word >> 16) & 0xff;
-        b7 = (word >> 8)  & 0xff;
-        b8 = (word >> 0)  & 0xff;
-
-        if ((b1 < b2 && b1 != 0) ||
-            (b2 < b3 && b2 != 0) ||
-            (b3 < b4 && b3 != 0) ||
-            (b4 < b5 && b4 != 0) ||
-            (b5 < b6 && b5 != 0) ||
-            (b6 < b7 && b6 != 0) ||
-            (b7 < b8 && b7 != 0)) {
-            ml_printf("Error %d, %d, %d, %d, %d, %d, %d, %d",
-                      b1, b2, b3, b4, b5, b6, b7, b8);
-            return 2;
-        } else {
-            pdot(i);
-        }
-    }
-    ml_printf("done\n");
-    return 0;
-}
-
-/* Read the test data and verify at various offsets */
-int do_reads(void)
-{
-    int r = 0;
-    int off = 0;
-
-    while (r == 0 && off < 8) {
-        r = read_test_data_u16(off);
-        r |= read_test_data_u32(off);
-        r |= read_test_data_u64(off);
-        off++;
-    }
-
-    return r;
-}
-
-int main(void)
-{
-    int i, r = 0;
-
-
-    init_test_data_u8();
-    r = do_reads();
-    if (r) {
-        return r;
-    }
-
-    for (i = 0; i < 8; i++) {
-        init_test_data_u16(i);
-
-        r = do_reads();
-        if (r) {
-            return r;
-        }
-    }
-
-    for (i = 0; i < 8; i++) {
-        init_test_data_u32(i);
-
-        r = do_reads();
-        if (r) {
-            return r;
-        }
-    }
-
-    ml_printf("Test complete: %s\n", r == 0 ? "PASSED" : "FAILED");
-    return r;
-}
diff --git a/tests/tcg/minilib/printf.c b/tests/tcg/minilib/printf.c
index 121620cb16..10472b4f58 100644
--- a/tests/tcg/minilib/printf.c
+++ b/tests/tcg/minilib/printf.c
@@ -119,6 +119,9 @@ void ml_printf(const char *fmt, ...)
             str = va_arg(ap, char*);
             print_str(str);
             break;
+        case 'c':
+            __sys_outc(va_arg(ap, int));
+            break;
         case '%':
             __sys_outc(*fmt);
             break;
diff --git a/tests/tcg/mips/include/test_utils_128.h b/tests/tcg/mips/include/test_utils_128.h
index cfd7ad3188..a828416d4f 100644
--- a/tests/tcg/mips/include/test_utils_128.h
+++ b/tests/tcg/mips/include/test_utils_128.h
@@ -66,7 +66,7 @@ static inline int32_t check_results(const char *instruction_name,
         }
     }
 
-    printf("PASS: %3d   FAIL: %3d   elapsed time: %5.2f ms\n",
+    printf("\tPASS: %3d \tFAIL: %3d \telapsed time: %5.2f ms\n",
            pass_count, fail_count, elapsed_time);
 
     if (fail_count > 0) {
diff --git a/tests/tcg/mips/include/wrappers_msa.h b/tests/tcg/mips/include/wrappers_msa.h
index 254e215b8a..5d28cb531a 100644
--- a/tests/tcg/mips/include/wrappers_msa.h
+++ b/tests/tcg/mips/include/wrappers_msa.h
@@ -23,8 +23,57 @@
 #define WRAPPERS_MSA_H
 
 
+#define RESET_MSA_REGISTER(wi)                                         \
+   __asm__ volatile (                                                  \
+      "xor.v $" #wi ", $" #wi ", $" #wi "\n\t"                         \
+      :                                                                \
+      :                                                                \
+      :                                                                \
+   )
+
+
+static inline void reset_msa_registers()
+{
+
+   RESET_MSA_REGISTER(w0);
+   RESET_MSA_REGISTER(w1);
+   RESET_MSA_REGISTER(w2);
+   RESET_MSA_REGISTER(w3);
+   RESET_MSA_REGISTER(w4);
+   RESET_MSA_REGISTER(w5);
+   RESET_MSA_REGISTER(w6);
+   RESET_MSA_REGISTER(w7);
+   RESET_MSA_REGISTER(w8);
+   RESET_MSA_REGISTER(w9);
+   RESET_MSA_REGISTER(w10);
+   RESET_MSA_REGISTER(w11);
+   RESET_MSA_REGISTER(w12);
+   RESET_MSA_REGISTER(w13);
+   RESET_MSA_REGISTER(w14);
+   RESET_MSA_REGISTER(w15);
+   RESET_MSA_REGISTER(w16);
+   RESET_MSA_REGISTER(w17);
+   RESET_MSA_REGISTER(w18);
+   RESET_MSA_REGISTER(w19);
+   RESET_MSA_REGISTER(w20);
+   RESET_MSA_REGISTER(w21);
+   RESET_MSA_REGISTER(w22);
+   RESET_MSA_REGISTER(w23);
+   RESET_MSA_REGISTER(w24);
+   RESET_MSA_REGISTER(w25);
+   RESET_MSA_REGISTER(w26);
+   RESET_MSA_REGISTER(w27);
+   RESET_MSA_REGISTER(w28);
+   RESET_MSA_REGISTER(w29);
+   RESET_MSA_REGISTER(w30);
+   RESET_MSA_REGISTER(w31);
+
+}
+
+
 #define DO_MSA__WD__WS(suffix, mnemonic)                               \
-static inline void do_msa_##suffix(void *input, void *output)          \
+static inline void do_msa_##suffix(const void *input,                  \
+                                   const void *output)                 \
 {                                                                      \
    __asm__ volatile (                                                  \
       "move $t0, %0\n\t"                                               \
@@ -39,7 +88,8 @@ static inline void do_msa_##suffix(void *input, void *output)          \
 }
 
 #define DO_MSA__WD__WD(suffix, mnemonic)                               \
-static inline void do_msa_##suffix(void *input, void *output)          \
+static inline void do_msa_##suffix(const void *input,                  \
+                                   const void *output)                 \
 {                                                                      \
    __asm__ volatile (                                                  \
       "move $t0, %0\n\t"                                               \
@@ -53,25 +103,11 @@ static inline void do_msa_##suffix(void *input, void *output)          \
    );                                                                  \
 }
 
-DO_MSA__WD__WS(NLOC_B, nloc.b)
-DO_MSA__WD__WS(NLOC_H, nloc.h)
-DO_MSA__WD__WS(NLOC_W, nloc.w)
-DO_MSA__WD__WS(NLOC_D, nloc.d)
-
-DO_MSA__WD__WS(NLZC_B, nlzc.b)
-DO_MSA__WD__WS(NLZC_H, nlzc.h)
-DO_MSA__WD__WS(NLZC_W, nlzc.w)
-DO_MSA__WD__WS(NLZC_D, nlzc.d)
-
-DO_MSA__WD__WS(PCNT_B, pcnt.b)
-DO_MSA__WD__WS(PCNT_H, pcnt.h)
-DO_MSA__WD__WS(PCNT_W, pcnt.w)
-DO_MSA__WD__WS(PCNT_D, pcnt.d)
-
 
 #define DO_MSA__WD__WS_WT(suffix, mnemonic)                            \
-static inline void do_msa_##suffix(void *input1, void *input2,         \
-                                   void *output)                       \
+static inline void do_msa_##suffix(const void *input1,                 \
+                                   const void *input2,                 \
+                                   const void *output)                 \
 {                                                                      \
    __asm__ volatile (                                                  \
       "move $t0, %0\n\t"                                               \
@@ -123,30 +159,180 @@ static inline void do_msa_##suffix(void *input1, void *input2,         \
    );                                                                  \
 }
 
-DO_MSA__WD__WS_WT(ILVEV_B, ilvev.b)
-DO_MSA__WD__WS_WT(ILVEV_H, ilvev.h)
-DO_MSA__WD__WS_WT(ILVEV_W, ilvev.w)
-DO_MSA__WD__WS_WT(ILVEV_D, ilvev.d)
 
-DO_MSA__WD__WS_WT(ILVOD_B, ilvod.b)
-DO_MSA__WD__WS_WT(ILVOD_H, ilvod.h)
-DO_MSA__WD__WS_WT(ILVOD_W, ilvod.w)
-DO_MSA__WD__WS_WT(ILVOD_D, ilvod.d)
+/*
+ * Bit Count
+ * ---------
+ */
 
-DO_MSA__WD__WS_WT(ILVL_B, ilvl.b)
-DO_MSA__WD__WS_WT(ILVL_H, ilvl.h)
-DO_MSA__WD__WS_WT(ILVL_W, ilvl.w)
-DO_MSA__WD__WS_WT(ILVL_D, ilvl.d)
+DO_MSA__WD__WS(NLOC_B, nloc.b)
+DO_MSA__WD__WS(NLOC_H, nloc.h)
+DO_MSA__WD__WS(NLOC_W, nloc.w)
+DO_MSA__WD__WS(NLOC_D, nloc.d)
 
-DO_MSA__WD__WS_WT(ILVR_B, ilvr.b)
-DO_MSA__WD__WS_WT(ILVR_H, ilvr.h)
-DO_MSA__WD__WS_WT(ILVR_W, ilvr.w)
-DO_MSA__WD__WS_WT(ILVR_D, ilvr.d)
+DO_MSA__WD__WS(NLZC_B, nlzc.b)
+DO_MSA__WD__WS(NLZC_H, nlzc.h)
+DO_MSA__WD__WS(NLZC_W, nlzc.w)
+DO_MSA__WD__WS(NLZC_D, nlzc.d)
 
-DO_MSA__WD__WS_WT(AND_V, and.v)
-DO_MSA__WD__WS_WT(NOR_V, nor.v)
-DO_MSA__WD__WS_WT(OR_V, or.v)
-DO_MSA__WD__WS_WT(XOR_V, xor.v)
+DO_MSA__WD__WS(PCNT_B, pcnt.b)
+DO_MSA__WD__WS(PCNT_H, pcnt.h)
+DO_MSA__WD__WS(PCNT_W, pcnt.w)
+DO_MSA__WD__WS(PCNT_D, pcnt.d)
+
+
+/*
+ * Bit move
+ * --------
+ */
+
+DO_MSA__WD__WS_WT(BINSL_B, binsl.b)
+DO_MSA__WD__WS_WT(BINSL_H, binsl.h)
+DO_MSA__WD__WS_WT(BINSL_W, binsl.w)
+DO_MSA__WD__WS_WT(BINSL_D, binsl.d)
+
+DO_MSA__WD__WS_WT(BINSR_B, binsr.b)
+DO_MSA__WD__WS_WT(BINSR_H, binsr.h)
+DO_MSA__WD__WS_WT(BINSR_W, binsr.w)
+DO_MSA__WD__WS_WT(BINSR_D, binsr.d)
+
+DO_MSA__WD__WS_WT(BMNZ_V, bmnz.v)
+DO_MSA__WD__WS_WT(BMZ_V, bmz.v)
+DO_MSA__WD__WS_WT(BSEL_V, bsel.v)
+
+
+/*
+ * Bit Set
+ * -------
+ */
+
+DO_MSA__WD__WS_WT(BCLR_B, bclr.b)
+DO_MSA__WD__WS_WT(BCLR_H, bclr.h)
+DO_MSA__WD__WS_WT(BCLR_W, bclr.w)
+DO_MSA__WD__WS_WT(BCLR_D, bclr.d)
+
+DO_MSA__WD__WS_WT(BSET_B, bset.b)
+DO_MSA__WD__WS_WT(BSET_H, bset.h)
+DO_MSA__WD__WS_WT(BSET_W, bset.w)
+DO_MSA__WD__WS_WT(BSET_D, bset.d)
+
+DO_MSA__WD__WS_WT(BNEG_B, bneg.b)
+DO_MSA__WD__WS_WT(BNEG_H, bneg.h)
+DO_MSA__WD__WS_WT(BNEG_W, bneg.w)
+DO_MSA__WD__WS_WT(BNEG_D, bneg.d)
+
+
+/*
+ * Fixed Multiply
+ * --------------
+ */
+
+DO_MSA__WD__WS_WT(MADD_Q_H, madd_q.h)
+DO_MSA__WD__WS_WT(MADD_Q_W, madd_q.w)
+
+DO_MSA__WD__WS_WT(MADDR_Q_H, maddr_q.h)
+DO_MSA__WD__WS_WT(MADDR_Q_W, maddr_q.w)
+
+DO_MSA__WD__WS_WT(MSUB_Q_H, msub_q.h)
+DO_MSA__WD__WS_WT(MSUB_Q_W, msub_q.w)
+
+DO_MSA__WD__WS_WT(MSUBR_Q_H, msubr_q.h)
+DO_MSA__WD__WS_WT(MSUBR_Q_W, msubr_q.w)
+
+DO_MSA__WD__WS_WT(MUL_Q_H, mul_q.h)
+DO_MSA__WD__WS_WT(MUL_Q_W, mul_q.w)
+
+DO_MSA__WD__WS_WT(MULR_Q_H, mulr_q.h)
+DO_MSA__WD__WS_WT(MULR_Q_W, mulr_q.w)
+
+
+/*
+ * Float Max Min
+ * -------------
+ */
+
+DO_MSA__WD__WS_WT(FMAX_W, fmax.w)
+DO_MSA__WD__WS_WT(FMAX_D, fmax.d)
+
+DO_MSA__WD__WS_WT(FMAX_A_W, fmax_a.w)
+DO_MSA__WD__WS_WT(FMAX_A_D, fmax_a.d)
+
+DO_MSA__WD__WS_WT(FMIN_W, fmin.w)
+DO_MSA__WD__WS_WT(FMIN_D, fmin.d)
+
+DO_MSA__WD__WS_WT(FMIN_A_W, fmin_a.w)
+DO_MSA__WD__WS_WT(FMIN_A_D, fmin_a.d)
+
+
+/*
+ * Int Add
+ * -------
+ */
+
+DO_MSA__WD__WS_WT(ADD_A_B, add_a.b)
+DO_MSA__WD__WS_WT(ADD_A_H, add_a.h)
+DO_MSA__WD__WS_WT(ADD_A_W, add_a.w)
+DO_MSA__WD__WS_WT(ADD_A_D, add_a.d)
+
+DO_MSA__WD__WS_WT(ADDS_A_B, adds_a.b)
+DO_MSA__WD__WS_WT(ADDS_A_H, adds_a.h)
+DO_MSA__WD__WS_WT(ADDS_A_W, adds_a.w)
+DO_MSA__WD__WS_WT(ADDS_A_D, adds_a.d)
+
+DO_MSA__WD__WS_WT(ADDS_S_B, adds_s.b)
+DO_MSA__WD__WS_WT(ADDS_S_H, adds_s.h)
+DO_MSA__WD__WS_WT(ADDS_S_W, adds_s.w)
+DO_MSA__WD__WS_WT(ADDS_S_D, adds_s.d)
+
+DO_MSA__WD__WS_WT(ADDS_U_B, adds_u.b)
+DO_MSA__WD__WS_WT(ADDS_U_H, adds_u.h)
+DO_MSA__WD__WS_WT(ADDS_U_W, adds_u.w)
+DO_MSA__WD__WS_WT(ADDS_U_D, adds_u.d)
+
+DO_MSA__WD__WS_WT(ADDV_B, addv.b)
+DO_MSA__WD__WS_WT(ADDV_H, addv.h)
+DO_MSA__WD__WS_WT(ADDV_W, addv.w)
+DO_MSA__WD__WS_WT(ADDV_D, addv.d)
+
+DO_MSA__WD__WS_WT(HADD_S_H, hadd_s.h)
+DO_MSA__WD__WS_WT(HADD_S_W, hadd_s.w)
+DO_MSA__WD__WS_WT(HADD_S_D, hadd_s.d)
+
+DO_MSA__WD__WS_WT(HADD_U_H, hadd_u.h)
+DO_MSA__WD__WS_WT(HADD_U_W, hadd_u.w)
+DO_MSA__WD__WS_WT(HADD_U_D, hadd_u.d)
+
+
+/*
+ * Int Average
+ * -----------
+ */
+
+DO_MSA__WD__WS_WT(AVE_S_B, ave_s.b)
+DO_MSA__WD__WS_WT(AVE_S_H, ave_s.h)
+DO_MSA__WD__WS_WT(AVE_S_W, ave_s.w)
+DO_MSA__WD__WS_WT(AVE_S_D, ave_s.d)
+
+DO_MSA__WD__WS_WT(AVE_U_B, ave_u.b)
+DO_MSA__WD__WS_WT(AVE_U_H, ave_u.h)
+DO_MSA__WD__WS_WT(AVE_U_W, ave_u.w)
+DO_MSA__WD__WS_WT(AVE_U_D, ave_u.d)
+
+DO_MSA__WD__WS_WT(AVER_S_B, aver_s.b)
+DO_MSA__WD__WS_WT(AVER_S_H, aver_s.h)
+DO_MSA__WD__WS_WT(AVER_S_W, aver_s.w)
+DO_MSA__WD__WS_WT(AVER_S_D, aver_s.d)
+
+DO_MSA__WD__WS_WT(AVER_U_B, aver_u.b)
+DO_MSA__WD__WS_WT(AVER_U_H, aver_u.h)
+DO_MSA__WD__WS_WT(AVER_U_W, aver_u.w)
+DO_MSA__WD__WS_WT(AVER_U_D, aver_u.d)
+
+
+/*
+ * Int Compare
+ * -----------
+ */
 
 DO_MSA__WD__WS_WT(CEQ_B, ceq.b)
 DO_MSA__WD__WS_WT(CEQ_H, ceq.h)
@@ -173,50 +359,200 @@ DO_MSA__WD__WS_WT(CLT_U_H, clt_u.h)
 DO_MSA__WD__WS_WT(CLT_U_W, clt_u.w)
 DO_MSA__WD__WS_WT(CLT_U_D, clt_u.d)
 
+
+/*
+ * Int Divide
+ * ----------
+ */
+
+DO_MSA__WD__WS_WT(DIV_S_B, div_s.b)
+DO_MSA__WD__WS_WT(DIV_S_H, div_s.h)
+DO_MSA__WD__WS_WT(DIV_S_W, div_s.w)
+DO_MSA__WD__WS_WT(DIV_S_D, div_s.d)
+
+DO_MSA__WD__WS_WT(DIV_U_B, div_u.b)
+DO_MSA__WD__WS_WT(DIV_U_H, div_u.h)
+DO_MSA__WD__WS_WT(DIV_U_W, div_u.w)
+DO_MSA__WD__WS_WT(DIV_U_D, div_u.d)
+
+
+/*
+ * Int Dot Product
+ * ---------------
+ */
+
+DO_MSA__WD__WS_WT(DOTP_S_H, dotp_s.h)
+DO_MSA__WD__WS_WT(DOTP_S_W, dotp_s.w)
+DO_MSA__WD__WS_WT(DOTP_S_D, dotp_s.d)
+
+DO_MSA__WD__WS_WT(DOTP_U_H, dotp_u.h)
+DO_MSA__WD__WS_WT(DOTP_U_W, dotp_u.w)
+DO_MSA__WD__WS_WT(DOTP_U_D, dotp_u.d)
+
+
+/*
+ * Int Max Min
+ * -----------
+ */
+
 DO_MSA__WD__WS_WT(MAX_A_B, max_a.b)
 DO_MSA__WD__WS_WT(MAX_A_H, max_a.h)
 DO_MSA__WD__WS_WT(MAX_A_W, max_a.w)
 DO_MSA__WD__WS_WT(MAX_A_D, max_a.d)
 
-DO_MSA__WD__WS_WT(MIN_A_B, min_a.b)
-DO_MSA__WD__WS_WT(MIN_A_H, min_a.h)
-DO_MSA__WD__WS_WT(MIN_A_W, min_a.w)
-DO_MSA__WD__WS_WT(MIN_A_D, min_a.d)
-
 DO_MSA__WD__WS_WT(MAX_S_B, max_s.b)
 DO_MSA__WD__WS_WT(MAX_S_H, max_s.h)
 DO_MSA__WD__WS_WT(MAX_S_W, max_s.w)
 DO_MSA__WD__WS_WT(MAX_S_D, max_s.d)
 
-DO_MSA__WD__WS_WT(MIN_S_B, min_s.b)
-DO_MSA__WD__WS_WT(MIN_S_H, min_s.h)
-DO_MSA__WD__WS_WT(MIN_S_W, min_s.w)
-DO_MSA__WD__WS_WT(MIN_S_D, min_s.d)
-
 DO_MSA__WD__WS_WT(MAX_U_B, max_u.b)
 DO_MSA__WD__WS_WT(MAX_U_H, max_u.h)
 DO_MSA__WD__WS_WT(MAX_U_W, max_u.w)
 DO_MSA__WD__WS_WT(MAX_U_D, max_u.d)
 
+DO_MSA__WD__WS_WT(MIN_A_B, min_a.b)
+DO_MSA__WD__WS_WT(MIN_A_H, min_a.h)
+DO_MSA__WD__WS_WT(MIN_A_W, min_a.w)
+DO_MSA__WD__WS_WT(MIN_A_D, min_a.d)
+
+DO_MSA__WD__WS_WT(MIN_S_B, min_s.b)
+DO_MSA__WD__WS_WT(MIN_S_H, min_s.h)
+DO_MSA__WD__WS_WT(MIN_S_W, min_s.w)
+DO_MSA__WD__WS_WT(MIN_S_D, min_s.d)
+
 DO_MSA__WD__WS_WT(MIN_U_B, min_u.b)
 DO_MSA__WD__WS_WT(MIN_U_H, min_u.h)
 DO_MSA__WD__WS_WT(MIN_U_W, min_u.w)
 DO_MSA__WD__WS_WT(MIN_U_D, min_u.d)
 
-DO_MSA__WD__WS_WT(BCLR_B, bclr.b)
-DO_MSA__WD__WS_WT(BCLR_H, bclr.h)
-DO_MSA__WD__WS_WT(BCLR_W, bclr.w)
-DO_MSA__WD__WS_WT(BCLR_D, bclr.d)
 
-DO_MSA__WD__WS_WT(BSET_B, bset.b)
-DO_MSA__WD__WS_WT(BSET_H, bset.h)
-DO_MSA__WD__WS_WT(BSET_W, bset.w)
-DO_MSA__WD__WS_WT(BSET_D, bset.d)
+/*
+ * Int Modulo
+ * ----------
+ */
 
-DO_MSA__WD__WS_WT(BNEG_B, bneg.b)
-DO_MSA__WD__WS_WT(BNEG_H, bneg.h)
-DO_MSA__WD__WS_WT(BNEG_W, bneg.w)
-DO_MSA__WD__WS_WT(BNEG_D, bneg.d)
+DO_MSA__WD__WS_WT(MOD_S_B, mod_s.b)
+DO_MSA__WD__WS_WT(MOD_S_H, mod_s.h)
+DO_MSA__WD__WS_WT(MOD_S_W, mod_s.w)
+DO_MSA__WD__WS_WT(MOD_S_D, mod_s.d)
+
+DO_MSA__WD__WS_WT(MOD_U_B, mod_u.b)
+DO_MSA__WD__WS_WT(MOD_U_H, mod_u.h)
+DO_MSA__WD__WS_WT(MOD_U_W, mod_u.w)
+DO_MSA__WD__WS_WT(MOD_U_D, mod_u.d)
+
+
+/*
+ * Int Multiply
+ * ------------
+ */
+
+DO_MSA__WD__WS_WT(MADDV_B, maddv.b)
+DO_MSA__WD__WS_WT(MADDV_H, maddv.h)
+DO_MSA__WD__WS_WT(MADDV_W, maddv.w)
+DO_MSA__WD__WS_WT(MADDV_D, maddv.d)
+
+DO_MSA__WD__WS_WT(MSUBV_B, msubv.b)
+DO_MSA__WD__WS_WT(MSUBV_H, msubv.h)
+DO_MSA__WD__WS_WT(MSUBV_W, msubv.w)
+DO_MSA__WD__WS_WT(MSUBV_D, msubv.d)
+
+DO_MSA__WD__WS_WT(MULV_B, mulv.b)
+DO_MSA__WD__WS_WT(MULV_H, mulv.h)
+DO_MSA__WD__WS_WT(MULV_W, mulv.w)
+DO_MSA__WD__WS_WT(MULV_D, mulv.d)
+
+
+/*
+ * Int Subtract
+ * ------------
+ */
+
+DO_MSA__WD__WS_WT(ASUB_S_B, asub_s.b)
+DO_MSA__WD__WS_WT(ASUB_S_H, asub_s.h)
+DO_MSA__WD__WS_WT(ASUB_S_W, asub_s.w)
+DO_MSA__WD__WS_WT(ASUB_S_D, asub_s.d)
+
+DO_MSA__WD__WS_WT(ASUB_U_B, asub_u.b)
+DO_MSA__WD__WS_WT(ASUB_U_H, asub_u.h)
+DO_MSA__WD__WS_WT(ASUB_U_W, asub_u.w)
+DO_MSA__WD__WS_WT(ASUB_U_D, asub_u.d)
+
+DO_MSA__WD__WS_WT(HSUB_S_H, hsub_s.h)
+DO_MSA__WD__WS_WT(HSUB_S_W, hsub_s.w)
+DO_MSA__WD__WS_WT(HSUB_S_D, hsub_s.d)
+
+DO_MSA__WD__WS_WT(HSUB_U_H, hsub_u.h)
+DO_MSA__WD__WS_WT(HSUB_U_W, hsub_u.w)
+DO_MSA__WD__WS_WT(HSUB_U_D, hsub_u.d)
+
+DO_MSA__WD__WS_WT(SUBS_S_B, subs_s.b)
+DO_MSA__WD__WS_WT(SUBS_S_H, subs_s.h)
+DO_MSA__WD__WS_WT(SUBS_S_W, subs_s.w)
+DO_MSA__WD__WS_WT(SUBS_S_D, subs_s.d)
+
+DO_MSA__WD__WS_WT(SUBS_U_B, subs_u.b)
+DO_MSA__WD__WS_WT(SUBS_U_H, subs_u.h)
+DO_MSA__WD__WS_WT(SUBS_U_W, subs_u.w)
+DO_MSA__WD__WS_WT(SUBS_U_D, subs_u.d)
+
+DO_MSA__WD__WS_WT(SUBSUS_U_B, subsus_u.b)
+DO_MSA__WD__WS_WT(SUBSUS_U_H, subsus_u.h)
+DO_MSA__WD__WS_WT(SUBSUS_U_W, subsus_u.w)
+DO_MSA__WD__WS_WT(SUBSUS_U_D, subsus_u.d)
+
+DO_MSA__WD__WS_WT(SUBSUU_S_B, subsuu_s.b)
+DO_MSA__WD__WS_WT(SUBSUU_S_H, subsuu_s.h)
+DO_MSA__WD__WS_WT(SUBSUU_S_W, subsuu_s.w)
+DO_MSA__WD__WS_WT(SUBSUU_S_D, subsuu_s.d)
+
+DO_MSA__WD__WS_WT(SUBV_B, subv.b)
+DO_MSA__WD__WS_WT(SUBV_H, subv.h)
+DO_MSA__WD__WS_WT(SUBV_W, subv.w)
+DO_MSA__WD__WS_WT(SUBV_D, subv.d)
+
+
+/*
+ * Interleave
+ * ----------
+ */
+
+DO_MSA__WD__WS_WT(ILVEV_B, ilvev.b)
+DO_MSA__WD__WS_WT(ILVEV_H, ilvev.h)
+DO_MSA__WD__WS_WT(ILVEV_W, ilvev.w)
+DO_MSA__WD__WS_WT(ILVEV_D, ilvev.d)
+
+DO_MSA__WD__WS_WT(ILVOD_B, ilvod.b)
+DO_MSA__WD__WS_WT(ILVOD_H, ilvod.h)
+DO_MSA__WD__WS_WT(ILVOD_W, ilvod.w)
+DO_MSA__WD__WS_WT(ILVOD_D, ilvod.d)
+
+DO_MSA__WD__WS_WT(ILVL_B, ilvl.b)
+DO_MSA__WD__WS_WT(ILVL_H, ilvl.h)
+DO_MSA__WD__WS_WT(ILVL_W, ilvl.w)
+DO_MSA__WD__WS_WT(ILVL_D, ilvl.d)
+
+DO_MSA__WD__WS_WT(ILVR_B, ilvr.b)
+DO_MSA__WD__WS_WT(ILVR_H, ilvr.h)
+DO_MSA__WD__WS_WT(ILVR_W, ilvr.w)
+DO_MSA__WD__WS_WT(ILVR_D, ilvr.d)
+
+
+/*
+ * Logic
+ * -----
+ */
+
+DO_MSA__WD__WS_WT(AND_V, and.v)
+DO_MSA__WD__WS_WT(NOR_V, nor.v)
+DO_MSA__WD__WS_WT(OR_V, or.v)
+DO_MSA__WD__WS_WT(XOR_V, xor.v)
+
+
+/*
+ * Pack
+ * ----
+ */
 
 DO_MSA__WD__WS_WT(PCKEV_B, pckev.b)
 DO_MSA__WD__WS_WT(PCKEV_H, pckev.h)
@@ -233,6 +569,12 @@ DO_MSA__WD__WS_WT(VSHF_H, vshf.h)
 DO_MSA__WD__WS_WT(VSHF_W, vshf.w)
 DO_MSA__WD__WS_WT(VSHF_D, vshf.d)
 
+
+/*
+ * Shift
+ * -----
+ */
+
 DO_MSA__WD__WS_WT(SLL_B, sll.b)
 DO_MSA__WD__WS_WT(SLL_H, sll.h)
 DO_MSA__WD__WS_WT(SLL_W, sll.w)
@@ -258,20 +600,5 @@ DO_MSA__WD__WS_WT(SRLR_H, srlr.h)
 DO_MSA__WD__WS_WT(SRLR_W, srlr.w)
 DO_MSA__WD__WS_WT(SRLR_D, srlr.d)
 
-DO_MSA__WD__WS_WT(BMNZ_V, bmnz.v)
-DO_MSA__WD__WS_WT(BMZ_V, bmz.v)
-
-DO_MSA__WD__WS_WT(FMAX_W, fmax.w)
-DO_MSA__WD__WS_WT(FMAX_D, fmax.d)
-
-DO_MSA__WD__WS_WT(FMAX_A_W, fmax_a.w)
-DO_MSA__WD__WS_WT(FMAX_A_D, fmax_a.d)
-
-DO_MSA__WD__WS_WT(FMIN_W, fmin.w)
-DO_MSA__WD__WS_WT(FMIN_D, fmin.d)
-
-DO_MSA__WD__WS_WT(FMIN_A_W, fmin_a.w)
-DO_MSA__WD__WS_WT(FMIN_A_D, fmin_a.d)
-
 
 #endif
diff --git a/tests/tcg/mips/user/ase/msa/README b/tests/tcg/mips/user/ase/msa/README
new file mode 100644
index 0000000000..ca4f070ec1
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/README
@@ -0,0 +1,20 @@
+The tests in subdirectories of this directory are supposed to be compiled for
+mips64el MSA-enabled CPU (I6400, I6500), using an appropriate MIPS toolchain.
+For example:
+
+/opt/img/bin/mips-img-linux-gnu-gcc <source file>                  \
+-EL -static -mabi=64 -march=mips64r6 -mmsa  -o <executable file>
+
+They are to be executed using QEMU user mode, using command line:
+
+mips64el-linux-user/qemu-mips64el -cpu I6400 <executable file>
+
+Helper scripts test_msa_compile.sh and test_msa_run.sh are also
+provided. This is an example of compilation and execution of all
+MSA tests:
+
+cd <QEMU root directory>
+cd tests/tcg/mips/user/ase/msa
+
+./test_msa_compile.sh
+./test_msa_run.sh
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_b.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_b.c
index c73ed2464e..287054e716 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_b.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_b.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0100010104000201ULL, 0x0200020200000003ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_d.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_d.c
index b10fb23e88..171abdf77c 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_d.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_d.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0000000000000001ULL, 0x0000000000000002ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_h.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_h.c
index c1dc0754e6..f260eba7f0 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_h.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_h.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0001000100040002ULL, 0x0002000200000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_w.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_w.c
index 4f7a556dec..cb1981cafc 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_w.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nloc_w.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0000000100000004ULL, 0x0000000200000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_b.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_b.c
index c202ba4856..96d05d59a1 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_b.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_b.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0001000000010000ULL, 0x0001000001010300ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_d.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_d.c
index 1edead2860..c709d5e852 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_d.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_d.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_h.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_h.c
index b2724c532e..c47e8bfe32 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_h.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_h.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000010003ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_w.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_w.c
index b547c73621..a8a471ba10 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_w.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_nlzc_w.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000001ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_b.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_b.c
index 5918e7fcf3..067a11b48d 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_b.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_b.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0404050305040605ULL, 0x0404050504030405ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_d.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_d.c
index 667ca3112a..c60635df70 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_d.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_d.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0000000000000024ULL, 0x0000000000000022ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_h.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_h.c
index 2951f86983..8fba20a621 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_h.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_h.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x000800080009000bULL, 0x0008000a00070009ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_w.c b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_w.c
index ab43ea92cd..24c44aefa1 100644
--- a/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_w.c
+++ b/tests/tcg/mips/user/ase/msa/bit-count/test_msa_pcnt_w.c
@@ -121,6 +121,8 @@ int32_t main(void)
         { 0x0000001000000014ULL, 0x0000001200000010ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < TEST_COUNT_TOTAL; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_b.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_b.c
new file mode 100644
index 0000000000..e27d3b5f13
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BCLR.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BCLR.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7f7f7f7f7f7f7f7fULL, 0x7f7f7f7f7f7f7f7fULL, },    /*   0  */
+        { 0xfefefefefefefefeULL, 0xfefefefefefefefeULL, },
+        { 0xfbfbfbfbfbfbfbfbULL, 0xfbfbfbfbfbfbfbfbULL, },
+        { 0xdfdfdfdfdfdfdfdfULL, 0xdfdfdfdfdfdfdfdfULL, },
+        { 0xefefefefefefefefULL, 0xefefefefefefefefULL, },
+        { 0xf7f7f7f7f7f7f7f7ULL, 0xf7f7f7f7f7f7f7f7ULL, },
+        { 0xf7bffef7bffef7bfULL, 0xfef7bffef7bffef7ULL, },
+        { 0xeffd7feffd7feffdULL, 0x7feffd7feffd7fefULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2a2a2a2a2a2a2a2aULL, 0x2a2a2a2a2a2a2a2aULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x8a8a8a8a8a8a8a8aULL, 0x8a8a8a8a8a8a8a8aULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xa2a2a2a2a2a2a2a2ULL, 0xa2a2a2a2a2a2a2a2ULL, },
+        { 0xa2aaaaa2aaaaa2aaULL, 0xaaa2aaaaa2aaaaa2ULL, },
+        { 0xaaa82aaaa82aaaa8ULL, 0x2aaaa82aaaa82aaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5454545454545454ULL, 0x5454545454545454ULL, },
+        { 0x5151515151515151ULL, 0x5151515151515151ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x4545454545454545ULL, 0x4545454545454545ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5515545515545515ULL, 0x5455155455155455ULL, },
+        { 0x4555554555554555ULL, 0x5545555545555545ULL, },
+        { 0x4c4c4c4c4c4c4c4cULL, 0x4c4c4c4c4c4c4c4cULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xc8c8c8c8c8c8c8c8ULL, 0xc8c8c8c8c8c8c8c8ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xc4c4c4c4c4c4c4c4ULL, 0xc4c4c4c4c4c4c4c4ULL, },
+        { 0xc48cccc48cccc48cULL, 0xccc48cccc48cccc4ULL, },
+        { 0xcccc4ccccc4cccccULL, 0x4ccccc4ccccc4cccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3232323232323232ULL, 0x3232323232323232ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1313131313131313ULL, 0x1313131313131313ULL, },
+        { 0x2323232323232323ULL, 0x2323232323232323ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333323333323333ULL, 0x3233333233333233ULL, },
+        { 0x2331332331332331ULL, 0x3323313323313323ULL, },
+        { 0x630e38630e38630eULL, 0x38630e38630e3863ULL, },    /*  48  */
+        { 0xe28e38e28e38e28eULL, 0x38e28e38e28e38e2ULL, },
+        { 0xe38a38e38a38e38aULL, 0x38e38a38e38a38e3ULL, },
+        { 0xc38e18c38e18c38eULL, 0x18c38e18c38e18c3ULL, },
+        { 0xe38e28e38e28e38eULL, 0x28e38e28e38e28e3ULL, },
+        { 0xe38630e38630e386ULL, 0x30e38630e38630e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38c38e38c38e38cULL, 0x38e38c38e38c38e3ULL, },
+        { 0x1c71471c71471c71ULL, 0x471c71471c71471cULL, },    /*  56  */
+        { 0x1c70c61c70c61c70ULL, 0xc61c70c61c70c61cULL, },
+        { 0x1871c31871c31871ULL, 0xc31871c31871c318ULL, },
+        { 0x1c51c71c51c71c51ULL, 0xc71c51c71c51c71cULL, },
+        { 0x0c61c70c61c70c61ULL, 0xc70c61c70c61c70cULL, },
+        { 0x1471c71471c71471ULL, 0xc71471c71471c714ULL, },
+        { 0x1431c61431c61431ULL, 0xc61431c61431c614ULL, },
+        { 0x0c71470c71470c71ULL, 0x470c71470c71470cULL, },
+        { 0x886aa6cc28625540ULL, 0x4367031ebe73b00cULL, },    /*  64  */
+        { 0x802ae6c408625540ULL, 0x4b67035ade7bb00cULL, },
+        { 0x886aa6c828625540ULL, 0x4b660b5ef673900cULL, },
+        { 0x886aa6cc28605100ULL, 0x4b650a5efc7bb00cULL, },
+        { 0xfaba00634c93c708ULL, 0x1277b31a153752ecULL, },
+        { 0xf3be00634d934708ULL, 0x1277b31a153f52ecULL, },
+        { 0xebba00634d13c708ULL, 0x12f6bb1a153752ecULL, },
+        { 0xfa3e00430d91c308ULL, 0x12f5ba1a153b52fcULL, },
+        { 0xac5aaeaab8cb8b80ULL, 0x2758c6bfab232404ULL, },    /*  72  */
+        { 0xa41aaea299c70b80ULL, 0x2358c6fb8b2b2104ULL, },
+        { 0xac5aaeaab94f8380ULL, 0x27d8867fa3230504ULL, },
+        { 0xac5aae8ab9cd8b80ULL, 0x07d8c6fea92b2114ULL, },
+        { 0x704b164d5e31c24eULL, 0x85718098a942e2a0ULL, },
+        { 0x700f16455e31624eULL, 0x897180d88942e2a0ULL, },
+        { 0x604b16495c31e24eULL, 0x0df08858a142c2a0ULL, },
+        { 0x704f164d1e31e20eULL, 0x8df188d8a942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_B(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_B(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_d.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_d.c
new file mode 100644
index 0000000000..3b8c38494b
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BCLR.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BCLR.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },    /*   0  */
+        { 0xfffffffffffffffeULL, 0xfffffffffffffffeULL, },
+        { 0xfffffbffffffffffULL, 0xfffffbffffffffffULL, },
+        { 0xffffffffffdfffffULL, 0xffffffffffdfffffULL, },
+        { 0xffffffffffffefffULL, 0xffffffffffffefffULL, },
+        { 0xfff7ffffffffffffULL, 0xfff7ffffffffffffULL, },
+        { 0xffffffffffffbfffULL, 0xfffffff7ffffffffULL, },
+        { 0xfffdffffffffffffULL, 0xffffffffefffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2aaaaaaaaaaaaaaaULL, 0x2aaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaa8aaaaaULL, 0xaaaaaaaaaa8aaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaa2aaaaaaaaaaaaULL, 0xaaa2aaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaa2aaaaaaaaULL, },
+        { 0xaaa8aaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555554ULL, 0x5555555555555554ULL, },
+        { 0x5555515555555555ULL, 0x5555515555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555554555ULL, 0x5555555555554555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555551555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555545555555ULL, },
+        { 0x4cccccccccccccccULL, 0x4cccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccc8ccccccccccULL, 0xccccc8ccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccc4ccccccccccccULL, 0xccc4ccccccccccccULL, },
+        { 0xcccccccccccc8cccULL, 0xccccccc4ccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333332ULL, 0x3333333333333332ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333133333ULL, 0x3333333333133333ULL, },
+        { 0x3333333333332333ULL, 0x3333333333332333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3331333333333333ULL, 0x3333333323333333ULL, },
+        { 0x638e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e2ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38a38e38e38e3ULL, },
+        { 0xe38e38e38e18e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e28e3ULL, },
+        { 0xe38638e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38a38eULL, 0x38e38e30e38e38e3ULL, },
+        { 0xe38c38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x471c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c70ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c31c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c51c71cULL, },
+        { 0x1c71c71c71c70c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71471c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c70c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7ba00cULL, },    /*  64  */
+        { 0x886ae6cc28625440ULL, 0x4b670b5efe7bb00cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe6bb00cULL, },
+        { 0x886ae6cc28621540ULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f42fcULL, },
+        { 0xfbbe00634d93c608ULL, 0x02f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a152f52fcULL, },
+        { 0xfbbe00634d938708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8a80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6feab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d5e31a24eULL, 0x8df188d8a942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_D(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_D(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_h.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_h.c
new file mode 100644
index 0000000000..d1db60fb02
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BCLR.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BCLR.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fff7fff7fff7fffULL, 0x7fff7fff7fff7fffULL, },    /*   0  */
+        { 0xfffefffefffefffeULL, 0xfffefffefffefffeULL, },
+        { 0xfbfffbfffbfffbffULL, 0xfbfffbfffbfffbffULL, },
+        { 0xffdfffdfffdfffdfULL, 0xffdfffdfffdfffdfULL, },
+        { 0xefffefffefffefffULL, 0xefffefffefffefffULL, },
+        { 0xfff7fff7fff7fff7ULL, 0xfff7fff7fff7fff7ULL, },
+        { 0xbffffff7feffbfffULL, 0xfff7feffbffffff7ULL, },
+        { 0xfffdefffff7ffffdULL, 0xefffff7ffffdefffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2aaa2aaa2aaa2aaaULL, 0x2aaa2aaa2aaa2aaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaa8aaa8aaa8aaa8aULL, 0xaa8aaa8aaa8aaa8aULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaa2aaa2aaa2aaa2ULL, 0xaaa2aaa2aaa2aaa2ULL, },
+        { 0xaaaaaaa2aaaaaaaaULL, 0xaaa2aaaaaaaaaaa2ULL, },
+        { 0xaaa8aaaaaa2aaaa8ULL, 0xaaaaaa2aaaa8aaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5554555455545554ULL, 0x5554555455545554ULL, },
+        { 0x5155515551555155ULL, 0x5155515551555155ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x4555455545554555ULL, 0x4555455545554555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x1555555554551555ULL, 0x5555545515555555ULL, },
+        { 0x5555455555555555ULL, 0x4555555555554555ULL, },
+        { 0x4ccc4ccc4ccc4cccULL, 0x4ccc4ccc4ccc4cccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xc8ccc8ccc8ccc8ccULL, 0xc8ccc8ccc8ccc8ccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccc4ccc4ccc4ccc4ULL, 0xccc4ccc4ccc4ccc4ULL, },
+        { 0x8cccccc4cccc8cccULL, 0xccc4cccc8cccccc4ULL, },
+        { 0xcccccccccc4cccccULL, 0xcccccc4cccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3332333233323332ULL, 0x3332333233323332ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3313331333133313ULL, 0x3313331333133313ULL, },
+        { 0x2333233323332333ULL, 0x2333233323332333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333332333333ULL, 0x3333323333333333ULL, },
+        { 0x3331233333333331ULL, 0x2333333333312333ULL, },
+        { 0x638e38e30e38638eULL, 0x38e30e38638e38e3ULL, },    /*  48  */
+        { 0xe38e38e28e38e38eULL, 0x38e28e38e38e38e2ULL, },
+        { 0xe38e38e38a38e38eULL, 0x38e38a38e38e38e3ULL, },
+        { 0xe38e38c38e18e38eULL, 0x38c38e18e38e38c3ULL, },
+        { 0xe38e28e38e38e38eULL, 0x28e38e38e38e28e3ULL, },
+        { 0xe38638e38e30e386ULL, 0x38e38e30e38638e3ULL, },
+        { 0xa38e38e38e38a38eULL, 0x38e38e38a38e38e3ULL, },
+        { 0xe38c28e38e38e38cULL, 0x28e38e38e38c28e3ULL, },
+        { 0x1c71471c71c71c71ULL, 0x471c71c71c71471cULL, },    /*  56  */
+        { 0x1c70c71c71c61c70ULL, 0xc71c71c61c70c71cULL, },
+        { 0x1871c31c71c71871ULL, 0xc31c71c71871c31cULL, },
+        { 0x1c51c71c71c71c51ULL, 0xc71c71c71c51c71cULL, },
+        { 0x0c71c71c61c70c71ULL, 0xc71c61c70c71c71cULL, },
+        { 0x1c71c71471c71c71ULL, 0xc71471c71c71c714ULL, },
+        { 0x1c71c71470c71c71ULL, 0xc71470c71c71c714ULL, },
+        { 0x1c71c71c71471c71ULL, 0xc71c71471c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5ef67ba00cULL, },    /*  64  */
+        { 0x886ae6c428625440ULL, 0x4b670b5e7e7ba00cULL, },
+        { 0x886ae2cc28625540ULL, 0x4a670b5ef67bb00cULL, },
+        { 0x086ac6cc28601540ULL, 0x4b650a5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x1277bb1a153f42fcULL, },
+        { 0xbbbe00634d93c608ULL, 0x1277bb1a153f42fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f73b1a153f52ecULL, },
+        { 0x7bbe00634d918708ULL, 0x12f5ba1a153b52fcULL, },
+        { 0xa85aaeaab9cb8b80ULL, 0x275886ffa32b2514ULL, },    /*  72  */
+        { 0xac5aaea2b9c78a80ULL, 0x2758c2ff2b2b2514ULL, },
+        { 0xa85aaaaa39cf8b80ULL, 0x26d846ffa32b2504ULL, },
+        { 0x2c5a8eaab9cd8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f064d5e31e24eULL, 0x8d7188d8a142e2a0ULL, },
+        { 0x304f16455e31e24eULL, 0x8d7188d82942e2a0ULL, },
+        { 0x704f124d5e31e24eULL, 0x8cf108d8a142e2a0ULL, },
+        { 0x704f164d5e31a24eULL, 0x8df188d8a942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_H(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_H(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_w.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_w.c
new file mode 100644
index 0000000000..54087b7c6e
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BCLR.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BCLR.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fffffff7fffffffULL, 0x7fffffff7fffffffULL, },    /*   0  */
+        { 0xfffffffefffffffeULL, 0xfffffffefffffffeULL, },
+        { 0xfffffbfffffffbffULL, 0xfffffbfffffffbffULL, },
+        { 0xffdfffffffdfffffULL, 0xffdfffffffdfffffULL, },
+        { 0xffffefffffffefffULL, 0xffffefffffffefffULL, },
+        { 0xfff7fffffff7ffffULL, 0xfff7fffffff7ffffULL, },
+        { 0xfffffff7ffffbfffULL, 0xfefffffffffffff7ULL, },
+        { 0xeffffffffffdffffULL, 0xffffff7fefffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2aaaaaaa2aaaaaaaULL, 0x2aaaaaaa2aaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaa8aaaaaaa8aaaaaULL, 0xaa8aaaaaaa8aaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaa2aaaaaaa2aaaaULL, 0xaaa2aaaaaaa2aaaaULL, },
+        { 0xaaaaaaa2aaaaaaaaULL, 0xaaaaaaaaaaaaaaa2ULL, },
+        { 0xaaaaaaaaaaa8aaaaULL, 0xaaaaaa2aaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555455555554ULL, 0x5555555455555554ULL, },
+        { 0x5555515555555155ULL, 0x5555515555555155ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555455555554555ULL, 0x5555455555554555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555551555ULL, 0x5455555555555555ULL, },
+        { 0x4555555555555555ULL, 0x5555555545555555ULL, },
+        { 0x4ccccccc4cccccccULL, 0x4ccccccc4cccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccc8ccccccc8ccULL, 0xccccc8ccccccc8ccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccc4ccccccc4ccccULL, 0xccc4ccccccc4ccccULL, },
+        { 0xccccccc4cccc8cccULL, 0xccccccccccccccc4ULL, },
+        { 0xccccccccccccccccULL, 0xcccccc4cccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333233333332ULL, 0x3333333233333332ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3313333333133333ULL, 0x3313333333133333ULL, },
+        { 0x3333233333332333ULL, 0x3333233333332333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3233333333333333ULL, },
+        { 0x2333333333313333ULL, 0x3333333323333333ULL, },
+        { 0x638e38e30e38e38eULL, 0x38e38e38638e38e3ULL, },    /*  48  */
+        { 0xe38e38e28e38e38eULL, 0x38e38e38e38e38e2ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38a38e38e38e3ULL, },
+        { 0xe38e38e38e18e38eULL, 0x38c38e38e38e38e3ULL, },
+        { 0xe38e28e38e38e38eULL, 0x38e38e38e38e28e3ULL, },
+        { 0xe38638e38e30e38eULL, 0x38e38e38e38638e3ULL, },
+        { 0xe38e38e38e38a38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x471c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c70ULL, 0xc71c71c61c71c71cULL, },
+        { 0x1c71c31c71c71871ULL, 0xc71c71c71c71c31cULL, },
+        { 0x1c51c71c71c71c71ULL, 0xc71c71c71c51c71cULL, },
+        { 0x1c71c71c71c70c71ULL, 0xc71c61c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71471c71c71c71cULL, },
+        { 0x1c71c71471c71c71ULL, 0xc61c71c71c71c714ULL, },
+        { 0x0c71c71c71c51c71ULL, 0xc71c71470c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x0b670b5efe7ba00cULL, },    /*  64  */
+        { 0x886ae6c428625440ULL, 0x4b670b5eee7bb00cULL, },
+        { 0x886ae2cc28625540ULL, 0x4b670b5efe6bb00cULL, },
+        { 0x886ac6cc28621540ULL, 0x4a670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f42fcULL, },
+        { 0xfbbe00634d93c608ULL, 0x12f7bb1a053f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a152f52fcULL, },
+        { 0xfbbe00634d938708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xac5aaea2b9cf8a80ULL, 0x23d8c6ffab2b2514ULL, },
+        { 0xac5aaaaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5a8eaab9cf8b80ULL, 0x26d8c6ffab2b2514ULL, },
+        { 0x704f064d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f16455e31e24eULL, 0x89f188d8a942e2a0ULL, },
+        { 0x704f124d5e31e24eULL, 0x0df188d8a942e2a0ULL, },
+        { 0x704f164d5e31a24eULL, 0x8cf188d8a942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_W(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_W(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_b.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_b.c
new file mode 100644
index 0000000000..733bc24867
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BNEG.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BNEG.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7f7f7f7f7f7f7f7fULL, 0x7f7f7f7f7f7f7f7fULL, },    /*   0  */
+        { 0xfefefefefefefefeULL, 0xfefefefefefefefeULL, },
+        { 0xfbfbfbfbfbfbfbfbULL, 0xfbfbfbfbfbfbfbfbULL, },
+        { 0xdfdfdfdfdfdfdfdfULL, 0xdfdfdfdfdfdfdfdfULL, },
+        { 0xefefefefefefefefULL, 0xefefefefefefefefULL, },
+        { 0xf7f7f7f7f7f7f7f7ULL, 0xf7f7f7f7f7f7f7f7ULL, },
+        { 0xf7bffef7bffef7bfULL, 0xfef7bffef7bffef7ULL, },
+        { 0xeffd7feffd7feffdULL, 0x7feffd7feffd7fefULL, },
+        { 0x8080808080808080ULL, 0x8080808080808080ULL, },    /*   8  */
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
+        { 0x0404040404040404ULL, 0x0404040404040404ULL, },
+        { 0x2020202020202020ULL, 0x2020202020202020ULL, },
+        { 0x1010101010101010ULL, 0x1010101010101010ULL, },
+        { 0x0808080808080808ULL, 0x0808080808080808ULL, },
+        { 0x0840010840010840ULL, 0x0108400108400108ULL, },
+        { 0x1002801002801002ULL, 0x8010028010028010ULL, },
+        { 0x2a2a2a2a2a2a2a2aULL, 0x2a2a2a2a2a2a2a2aULL, },    /*  16  */
+        { 0xababababababababULL, 0xababababababababULL, },
+        { 0xaeaeaeaeaeaeaeaeULL, 0xaeaeaeaeaeaeaeaeULL, },
+        { 0x8a8a8a8a8a8a8a8aULL, 0x8a8a8a8a8a8a8a8aULL, },
+        { 0xbabababababababaULL, 0xbabababababababaULL, },
+        { 0xa2a2a2a2a2a2a2a2ULL, 0xa2a2a2a2a2a2a2a2ULL, },
+        { 0xa2eaaba2eaaba2eaULL, 0xaba2eaaba2eaaba2ULL, },
+        { 0xbaa82abaa82abaa8ULL, 0x2abaa82abaa82abaULL, },
+        { 0xd5d5d5d5d5d5d5d5ULL, 0xd5d5d5d5d5d5d5d5ULL, },    /*  24  */
+        { 0x5454545454545454ULL, 0x5454545454545454ULL, },
+        { 0x5151515151515151ULL, 0x5151515151515151ULL, },
+        { 0x7575757575757575ULL, 0x7575757575757575ULL, },
+        { 0x4545454545454545ULL, 0x4545454545454545ULL, },
+        { 0x5d5d5d5d5d5d5d5dULL, 0x5d5d5d5d5d5d5d5dULL, },
+        { 0x5d15545d15545d15ULL, 0x545d15545d15545dULL, },
+        { 0x4557d54557d54557ULL, 0xd54557d54557d545ULL, },
+        { 0x4c4c4c4c4c4c4c4cULL, 0x4c4c4c4c4c4c4c4cULL, },    /*  32  */
+        { 0xcdcdcdcdcdcdcdcdULL, 0xcdcdcdcdcdcdcdcdULL, },
+        { 0xc8c8c8c8c8c8c8c8ULL, 0xc8c8c8c8c8c8c8c8ULL, },
+        { 0xececececececececULL, 0xececececececececULL, },
+        { 0xdcdcdcdcdcdcdcdcULL, 0xdcdcdcdcdcdcdcdcULL, },
+        { 0xc4c4c4c4c4c4c4c4ULL, 0xc4c4c4c4c4c4c4c4ULL, },
+        { 0xc48ccdc48ccdc48cULL, 0xcdc48ccdc48ccdc4ULL, },
+        { 0xdcce4cdcce4cdcceULL, 0x4cdcce4cdcce4cdcULL, },
+        { 0xb3b3b3b3b3b3b3b3ULL, 0xb3b3b3b3b3b3b3b3ULL, },    /*  40  */
+        { 0x3232323232323232ULL, 0x3232323232323232ULL, },
+        { 0x3737373737373737ULL, 0x3737373737373737ULL, },
+        { 0x1313131313131313ULL, 0x1313131313131313ULL, },
+        { 0x2323232323232323ULL, 0x2323232323232323ULL, },
+        { 0x3b3b3b3b3b3b3b3bULL, 0x3b3b3b3b3b3b3b3bULL, },
+        { 0x3b73323b73323b73ULL, 0x323b73323b73323bULL, },
+        { 0x2331b32331b32331ULL, 0xb32331b32331b323ULL, },
+        { 0x630eb8630eb8630eULL, 0xb8630eb8630eb863ULL, },    /*  48  */
+        { 0xe28f39e28f39e28fULL, 0x39e28f39e28f39e2ULL, },
+        { 0xe78a3ce78a3ce78aULL, 0x3ce78a3ce78a3ce7ULL, },
+        { 0xc3ae18c3ae18c3aeULL, 0x18c3ae18c3ae18c3ULL, },
+        { 0xf39e28f39e28f39eULL, 0x28f39e28f39e28f3ULL, },
+        { 0xeb8630eb8630eb86ULL, 0x30eb8630eb8630ebULL, },
+        { 0xebce39ebce39ebceULL, 0x39ebce39ebce39ebULL, },
+        { 0xf38cb8f38cb8f38cULL, 0xb8f38cb8f38cb8f3ULL, },
+        { 0x9cf1479cf1479cf1ULL, 0x479cf1479cf1479cULL, },    /*  56  */
+        { 0x1d70c61d70c61d70ULL, 0xc61d70c61d70c61dULL, },
+        { 0x1875c31875c31875ULL, 0xc31875c31875c318ULL, },
+        { 0x3c51e73c51e73c51ULL, 0xe73c51e73c51e73cULL, },
+        { 0x0c61d70c61d70c61ULL, 0xd70c61d70c61d70cULL, },
+        { 0x1479cf1479cf1479ULL, 0xcf1479cf1479cf14ULL, },
+        { 0x1431c61431c61431ULL, 0xc61431c61431c614ULL, },
+        { 0x0c73470c73470c73ULL, 0x470c73470c73470cULL, },
+        { 0x896ea6dc29667541ULL, 0x43e7031ebe73b11cULL, },    /*  64  */
+        { 0x802ae7c4086ad541ULL, 0x4fe7035adefbb41cULL, },
+        { 0x986ea6c82ae25d41ULL, 0xcb664bdef673901cULL, },
+        { 0x89eaa6ec68605100ULL, 0x6b650a5ffc7fb40dULL, },
+        { 0xfaba40734c97e709ULL, 0x1a77b35a553753ecULL, },
+        { 0xf3fe016b6d9b4709ULL, 0x1677b31e35bf56ecULL, },
+        { 0xebba40674f13cf09ULL, 0x92f6fb9a1d3772ecULL, },
+        { 0xfa3e40430d91c348ULL, 0x32f5ba1b173b56fdULL, },
+        { 0xad5eeebab8cbab81ULL, 0x2f58cebfeb232404ULL, },    /*  72  */
+        { 0xa41aafa299c70b81ULL, 0x2358cefb8bab2104ULL, },
+        { 0xbc5eeeaebb4f8381ULL, 0xa7d9867fa3230504ULL, },
+        { 0xaddaee8af9cd8fc0ULL, 0x07dac7fea92f2115ULL, },
+        { 0x714b565d5f35c24fULL, 0x85718098e94ae3b0ULL, },
+        { 0x780f17457e39624fULL, 0x897180dc89c2e6b0ULL, },
+        { 0x604b56495cb1ea4fULL, 0x0df0c858a14ac2b0ULL, },
+        { 0x71cf566d1e33e60eULL, 0xadf389d9ab46e6a1ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_B(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_B(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_d.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_d.c
new file mode 100644
index 0000000000..fc092215d5
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BNEG.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BNEG.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },    /*   0  */
+        { 0xfffffffffffffffeULL, 0xfffffffffffffffeULL, },
+        { 0xfffffbffffffffffULL, 0xfffffbffffffffffULL, },
+        { 0xffffffffffdfffffULL, 0xffffffffffdfffffULL, },
+        { 0xffffffffffffefffULL, 0xffffffffffffefffULL, },
+        { 0xfff7ffffffffffffULL, 0xfff7ffffffffffffULL, },
+        { 0xffffffffffffbfffULL, 0xfffffff7ffffffffULL, },
+        { 0xfffdffffffffffffULL, 0xffffffffefffffffULL, },
+        { 0x8000000000000000ULL, 0x8000000000000000ULL, },    /*   8  */
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },
+        { 0x0000040000000000ULL, 0x0000040000000000ULL, },
+        { 0x0000000000200000ULL, 0x0000000000200000ULL, },
+        { 0x0000000000001000ULL, 0x0000000000001000ULL, },
+        { 0x0008000000000000ULL, 0x0008000000000000ULL, },
+        { 0x0000000000004000ULL, 0x0000000800000000ULL, },
+        { 0x0002000000000000ULL, 0x0000000010000000ULL, },
+        { 0x2aaaaaaaaaaaaaaaULL, 0x2aaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaabULL, 0xaaaaaaaaaaaaaaabULL, },
+        { 0xaaaaaeaaaaaaaaaaULL, 0xaaaaaeaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaa8aaaaaULL, 0xaaaaaaaaaa8aaaaaULL, },
+        { 0xaaaaaaaaaaaabaaaULL, 0xaaaaaaaaaaaabaaaULL, },
+        { 0xaaa2aaaaaaaaaaaaULL, 0xaaa2aaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaeaaaULL, 0xaaaaaaa2aaaaaaaaULL, },
+        { 0xaaa8aaaaaaaaaaaaULL, 0xaaaaaaaabaaaaaaaULL, },
+        { 0xd555555555555555ULL, 0xd555555555555555ULL, },    /*  24  */
+        { 0x5555555555555554ULL, 0x5555555555555554ULL, },
+        { 0x5555515555555555ULL, 0x5555515555555555ULL, },
+        { 0x5555555555755555ULL, 0x5555555555755555ULL, },
+        { 0x5555555555554555ULL, 0x5555555555554555ULL, },
+        { 0x555d555555555555ULL, 0x555d555555555555ULL, },
+        { 0x5555555555551555ULL, 0x5555555d55555555ULL, },
+        { 0x5557555555555555ULL, 0x5555555545555555ULL, },
+        { 0x4cccccccccccccccULL, 0x4cccccccccccccccULL, },    /*  32  */
+        { 0xcccccccccccccccdULL, 0xcccccccccccccccdULL, },
+        { 0xccccc8ccccccccccULL, 0xccccc8ccccccccccULL, },
+        { 0xccccccccccecccccULL, 0xccccccccccecccccULL, },
+        { 0xccccccccccccdcccULL, 0xccccccccccccdcccULL, },
+        { 0xccc4ccccccccccccULL, 0xccc4ccccccccccccULL, },
+        { 0xcccccccccccc8cccULL, 0xccccccc4ccccccccULL, },
+        { 0xccceccccccccccccULL, 0xccccccccdcccccccULL, },
+        { 0xb333333333333333ULL, 0xb333333333333333ULL, },    /*  40  */
+        { 0x3333333333333332ULL, 0x3333333333333332ULL, },
+        { 0x3333373333333333ULL, 0x3333373333333333ULL, },
+        { 0x3333333333133333ULL, 0x3333333333133333ULL, },
+        { 0x3333333333332333ULL, 0x3333333333332333ULL, },
+        { 0x333b333333333333ULL, 0x333b333333333333ULL, },
+        { 0x3333333333337333ULL, 0x3333333b33333333ULL, },
+        { 0x3331333333333333ULL, 0x3333333323333333ULL, },
+        { 0x638e38e38e38e38eULL, 0xb8e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38fULL, 0x38e38e38e38e38e2ULL, },
+        { 0xe38e3ce38e38e38eULL, 0x38e38a38e38e38e3ULL, },
+        { 0xe38e38e38e18e38eULL, 0x38e38e38e3ae38e3ULL, },
+        { 0xe38e38e38e38f38eULL, 0x38e38e38e38e28e3ULL, },
+        { 0xe38638e38e38e38eULL, 0x38eb8e38e38e38e3ULL, },
+        { 0xe38e38e38e38a38eULL, 0x38e38e30e38e38e3ULL, },
+        { 0xe38c38e38e38e38eULL, 0x38e38e38f38e38e3ULL, },
+        { 0x9c71c71c71c71c71ULL, 0x471c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c70ULL, 0xc71c71c71c71c71dULL, },
+        { 0x1c71c31c71c71c71ULL, 0xc71c75c71c71c71cULL, },
+        { 0x1c71c71c71e71c71ULL, 0xc71c71c71c51c71cULL, },
+        { 0x1c71c71c71c70c71ULL, 0xc71c71c71c71d71cULL, },
+        { 0x1c79c71c71c71c71ULL, 0xc71471c71c71c71cULL, },
+        { 0x1c71c71c71c75c71ULL, 0xc71c71cf1c71c71cULL, },
+        { 0x1c73c71c71c71c71ULL, 0xc71c71c70c71c71cULL, },
+        { 0x886ae6cc28625541ULL, 0x4b670b5efe7ba00cULL, },    /*  64  */
+        { 0x886ae6cc28625440ULL, 0x5b670b5efe7bb00cULL, },
+        { 0x886ae6cc28625541ULL, 0x4b670b5efe6bb00cULL, },
+        { 0x886ae6cc28621540ULL, 0x4b670b5ffe7bb00cULL, },
+        { 0xfbbe00634d93c709ULL, 0x12f7bb1a153f42fcULL, },
+        { 0xfbbe00634d93c608ULL, 0x02f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c709ULL, 0x12f7bb1a152f52fcULL, },
+        { 0xfbbe00634d938708ULL, 0x12f7bb1b153f52fcULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0x27d8c6ffab2b3514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8a80ULL, 0x37d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0x27d8c6ffab3b2514ULL, },
+        { 0xac5aaeaab9cfcb80ULL, 0x27d8c6feab2b2514ULL, },
+        { 0x704f164d5e31e24fULL, 0x8df188d8a942f2a0ULL, },
+        { 0x704f164d5e31e34eULL, 0x9df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24fULL, 0x8df188d8a952e2a0ULL, },
+        { 0x704f164d5e31a24eULL, 0x8df188d9a942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_D(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_D(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_h.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_h.c
new file mode 100644
index 0000000000..d8347a1884
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BNEG.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BNEG.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fff7fff7fff7fffULL, 0x7fff7fff7fff7fffULL, },    /*   0  */
+        { 0xfffefffefffefffeULL, 0xfffefffefffefffeULL, },
+        { 0xfbfffbfffbfffbffULL, 0xfbfffbfffbfffbffULL, },
+        { 0xffdfffdfffdfffdfULL, 0xffdfffdfffdfffdfULL, },
+        { 0xefffefffefffefffULL, 0xefffefffefffefffULL, },
+        { 0xfff7fff7fff7fff7ULL, 0xfff7fff7fff7fff7ULL, },
+        { 0xbffffff7feffbfffULL, 0xfff7feffbffffff7ULL, },
+        { 0xfffdefffff7ffffdULL, 0xefffff7ffffdefffULL, },
+        { 0x8000800080008000ULL, 0x8000800080008000ULL, },    /*   8  */
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },
+        { 0x0400040004000400ULL, 0x0400040004000400ULL, },
+        { 0x0020002000200020ULL, 0x0020002000200020ULL, },
+        { 0x1000100010001000ULL, 0x1000100010001000ULL, },
+        { 0x0008000800080008ULL, 0x0008000800080008ULL, },
+        { 0x4000000801004000ULL, 0x0008010040000008ULL, },
+        { 0x0002100000800002ULL, 0x1000008000021000ULL, },
+        { 0x2aaa2aaa2aaa2aaaULL, 0x2aaa2aaa2aaa2aaaULL, },    /*  16  */
+        { 0xaaabaaabaaabaaabULL, 0xaaabaaabaaabaaabULL, },
+        { 0xaeaaaeaaaeaaaeaaULL, 0xaeaaaeaaaeaaaeaaULL, },
+        { 0xaa8aaa8aaa8aaa8aULL, 0xaa8aaa8aaa8aaa8aULL, },
+        { 0xbaaabaaabaaabaaaULL, 0xbaaabaaabaaabaaaULL, },
+        { 0xaaa2aaa2aaa2aaa2ULL, 0xaaa2aaa2aaa2aaa2ULL, },
+        { 0xeaaaaaa2abaaeaaaULL, 0xaaa2abaaeaaaaaa2ULL, },
+        { 0xaaa8baaaaa2aaaa8ULL, 0xbaaaaa2aaaa8baaaULL, },
+        { 0xd555d555d555d555ULL, 0xd555d555d555d555ULL, },    /*  24  */
+        { 0x5554555455545554ULL, 0x5554555455545554ULL, },
+        { 0x5155515551555155ULL, 0x5155515551555155ULL, },
+        { 0x5575557555755575ULL, 0x5575557555755575ULL, },
+        { 0x4555455545554555ULL, 0x4555455545554555ULL, },
+        { 0x555d555d555d555dULL, 0x555d555d555d555dULL, },
+        { 0x1555555d54551555ULL, 0x555d54551555555dULL, },
+        { 0x5557455555d55557ULL, 0x455555d555574555ULL, },
+        { 0x4ccc4ccc4ccc4cccULL, 0x4ccc4ccc4ccc4cccULL, },    /*  32  */
+        { 0xcccdcccdcccdcccdULL, 0xcccdcccdcccdcccdULL, },
+        { 0xc8ccc8ccc8ccc8ccULL, 0xc8ccc8ccc8ccc8ccULL, },
+        { 0xccecccecccecccecULL, 0xccecccecccecccecULL, },
+        { 0xdcccdcccdcccdcccULL, 0xdcccdcccdcccdcccULL, },
+        { 0xccc4ccc4ccc4ccc4ULL, 0xccc4ccc4ccc4ccc4ULL, },
+        { 0x8cccccc4cdcc8cccULL, 0xccc4cdcc8cccccc4ULL, },
+        { 0xcccedccccc4cccceULL, 0xdccccc4ccccedcccULL, },
+        { 0xb333b333b333b333ULL, 0xb333b333b333b333ULL, },    /*  40  */
+        { 0x3332333233323332ULL, 0x3332333233323332ULL, },
+        { 0x3733373337333733ULL, 0x3733373337333733ULL, },
+        { 0x3313331333133313ULL, 0x3313331333133313ULL, },
+        { 0x2333233323332333ULL, 0x2333233323332333ULL, },
+        { 0x333b333b333b333bULL, 0x333b333b333b333bULL, },
+        { 0x7333333b32337333ULL, 0x333b32337333333bULL, },
+        { 0x3331233333b33331ULL, 0x233333b333312333ULL, },
+        { 0x638eb8e30e38638eULL, 0xb8e30e38638eb8e3ULL, },    /*  48  */
+        { 0xe38f38e28e39e38fULL, 0x38e28e39e38f38e2ULL, },
+        { 0xe78e3ce38a38e78eULL, 0x3ce38a38e78e3ce3ULL, },
+        { 0xe3ae38c38e18e3aeULL, 0x38c38e18e3ae38c3ULL, },
+        { 0xf38e28e39e38f38eULL, 0x28e39e38f38e28e3ULL, },
+        { 0xe38638eb8e30e386ULL, 0x38eb8e30e38638ebULL, },
+        { 0xa38e38eb8f38a38eULL, 0x38eb8f38a38e38ebULL, },
+        { 0xe38c28e38eb8e38cULL, 0x28e38eb8e38c28e3ULL, },
+        { 0x9c71471cf1c79c71ULL, 0x471cf1c79c71471cULL, },    /*  56  */
+        { 0x1c70c71d71c61c70ULL, 0xc71d71c61c70c71dULL, },
+        { 0x1871c31c75c71871ULL, 0xc31c75c71871c31cULL, },
+        { 0x1c51c73c71e71c51ULL, 0xc73c71e71c51c73cULL, },
+        { 0x0c71d71c61c70c71ULL, 0xd71c61c70c71d71cULL, },
+        { 0x1c79c71471cf1c79ULL, 0xc71471cf1c79c714ULL, },
+        { 0x5c71c71470c75c71ULL, 0xc71470c75c71c714ULL, },
+        { 0x1c73d71c71471c73ULL, 0xd71c71471c73d71cULL, },
+        { 0x8c6af6cc28665541ULL, 0x4be74b5ef67ba00cULL, },    /*  64  */
+        { 0xc86ae6c4286a5440ULL, 0x4be70f5e7e7ba00cULL, },
+        { 0x8c6ae2cca8625541ULL, 0x4a678b5ef67bb01cULL, },
+        { 0x086ac6cc28601540ULL, 0x4b650a5efe7fb00dULL, },
+        { 0xffbe10634d97c709ULL, 0x1277fb1a1d3f42fcULL, },
+        { 0xbbbe006b4d9bc608ULL, 0x1277bf1a953f42fcULL, },
+        { 0xffbe0463cd93c709ULL, 0x13f73b1a1d3f52ecULL, },
+        { 0x7bbe20634d918708ULL, 0x12f5ba1a153b52fdULL, },
+        { 0xa85abeaab9cb8b81ULL, 0x275886ffa32b3514ULL, },    /*  72  */
+        { 0xec5aaea2b9c78a80ULL, 0x2758c2ff2b2b3514ULL, },
+        { 0xa85aaaaa39cf8b81ULL, 0x26d846ffa32b2504ULL, },
+        { 0x2c5a8eaab9cdcb80ULL, 0x27dac7ffab2f2515ULL, },
+        { 0x744f064d5e35e24fULL, 0x8d71c8d8a142f2a0ULL, },
+        { 0x304f16455e39e34eULL, 0x8d718cd82942f2a0ULL, },
+        { 0x744f124dde31e24fULL, 0x8cf108d8a142e2b0ULL, },
+        { 0xf04f364d5e33a24eULL, 0x8df389d8a946e2a1ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_H(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_H(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_w.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_w.c
new file mode 100644
index 0000000000..36ef43672f
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BNEG.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BNEG.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fffffff7fffffffULL, 0x7fffffff7fffffffULL, },    /*   0  */
+        { 0xfffffffefffffffeULL, 0xfffffffefffffffeULL, },
+        { 0xfffffbfffffffbffULL, 0xfffffbfffffffbffULL, },
+        { 0xffdfffffffdfffffULL, 0xffdfffffffdfffffULL, },
+        { 0xffffefffffffefffULL, 0xffffefffffffefffULL, },
+        { 0xfff7fffffff7ffffULL, 0xfff7fffffff7ffffULL, },
+        { 0xfffffff7ffffbfffULL, 0xfefffffffffffff7ULL, },
+        { 0xeffffffffffdffffULL, 0xffffff7fefffffffULL, },
+        { 0x8000000080000000ULL, 0x8000000080000000ULL, },    /*   8  */
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },
+        { 0x0000040000000400ULL, 0x0000040000000400ULL, },
+        { 0x0020000000200000ULL, 0x0020000000200000ULL, },
+        { 0x0000100000001000ULL, 0x0000100000001000ULL, },
+        { 0x0008000000080000ULL, 0x0008000000080000ULL, },
+        { 0x0000000800004000ULL, 0x0100000000000008ULL, },
+        { 0x1000000000020000ULL, 0x0000008010000000ULL, },
+        { 0x2aaaaaaa2aaaaaaaULL, 0x2aaaaaaa2aaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaabaaaaaaabULL, 0xaaaaaaabaaaaaaabULL, },
+        { 0xaaaaaeaaaaaaaeaaULL, 0xaaaaaeaaaaaaaeaaULL, },
+        { 0xaa8aaaaaaa8aaaaaULL, 0xaa8aaaaaaa8aaaaaULL, },
+        { 0xaaaabaaaaaaabaaaULL, 0xaaaabaaaaaaabaaaULL, },
+        { 0xaaa2aaaaaaa2aaaaULL, 0xaaa2aaaaaaa2aaaaULL, },
+        { 0xaaaaaaa2aaaaeaaaULL, 0xabaaaaaaaaaaaaa2ULL, },
+        { 0xbaaaaaaaaaa8aaaaULL, 0xaaaaaa2abaaaaaaaULL, },
+        { 0xd5555555d5555555ULL, 0xd5555555d5555555ULL, },    /*  24  */
+        { 0x5555555455555554ULL, 0x5555555455555554ULL, },
+        { 0x5555515555555155ULL, 0x5555515555555155ULL, },
+        { 0x5575555555755555ULL, 0x5575555555755555ULL, },
+        { 0x5555455555554555ULL, 0x5555455555554555ULL, },
+        { 0x555d5555555d5555ULL, 0x555d5555555d5555ULL, },
+        { 0x5555555d55551555ULL, 0x545555555555555dULL, },
+        { 0x4555555555575555ULL, 0x555555d545555555ULL, },
+        { 0x4ccccccc4cccccccULL, 0x4ccccccc4cccccccULL, },    /*  32  */
+        { 0xcccccccdcccccccdULL, 0xcccccccdcccccccdULL, },
+        { 0xccccc8ccccccc8ccULL, 0xccccc8ccccccc8ccULL, },
+        { 0xccecccccccecccccULL, 0xccecccccccecccccULL, },
+        { 0xccccdcccccccdcccULL, 0xccccdcccccccdcccULL, },
+        { 0xccc4ccccccc4ccccULL, 0xccc4ccccccc4ccccULL, },
+        { 0xccccccc4cccc8cccULL, 0xcdccccccccccccc4ULL, },
+        { 0xdcccccccccceccccULL, 0xcccccc4cdcccccccULL, },
+        { 0xb3333333b3333333ULL, 0xb3333333b3333333ULL, },    /*  40  */
+        { 0x3333333233333332ULL, 0x3333333233333332ULL, },
+        { 0x3333373333333733ULL, 0x3333373333333733ULL, },
+        { 0x3313333333133333ULL, 0x3313333333133333ULL, },
+        { 0x3333233333332333ULL, 0x3333233333332333ULL, },
+        { 0x333b3333333b3333ULL, 0x333b3333333b3333ULL, },
+        { 0x3333333b33337333ULL, 0x323333333333333bULL, },
+        { 0x2333333333313333ULL, 0x333333b323333333ULL, },
+        { 0x638e38e30e38e38eULL, 0xb8e38e38638e38e3ULL, },    /*  48  */
+        { 0xe38e38e28e38e38fULL, 0x38e38e39e38e38e2ULL, },
+        { 0xe38e3ce38e38e78eULL, 0x38e38a38e38e3ce3ULL, },
+        { 0xe3ae38e38e18e38eULL, 0x38c38e38e3ae38e3ULL, },
+        { 0xe38e28e38e38f38eULL, 0x38e39e38e38e28e3ULL, },
+        { 0xe38638e38e30e38eULL, 0x38eb8e38e38638e3ULL, },
+        { 0xe38e38eb8e38a38eULL, 0x39e38e38e38e38ebULL, },
+        { 0xf38e38e38e3ae38eULL, 0x38e38eb8f38e38e3ULL, },
+        { 0x9c71c71cf1c71c71ULL, 0x471c71c79c71c71cULL, },    /*  56  */
+        { 0x1c71c71d71c71c70ULL, 0xc71c71c61c71c71dULL, },
+        { 0x1c71c31c71c71871ULL, 0xc71c75c71c71c31cULL, },
+        { 0x1c51c71c71e71c71ULL, 0xc73c71c71c51c71cULL, },
+        { 0x1c71d71c71c70c71ULL, 0xc71c61c71c71d71cULL, },
+        { 0x1c79c71c71cf1c71ULL, 0xc71471c71c79c71cULL, },
+        { 0x1c71c71471c75c71ULL, 0xc61c71c71c71c714ULL, },
+        { 0x0c71c71c71c51c71ULL, 0xc71c71470c71c71cULL, },
+        { 0x886af6cc28625541ULL, 0x0b670b5efe7ba00cULL, },    /*  64  */
+        { 0x886ae6c428625440ULL, 0x4f670b5eee7bb00cULL, },
+        { 0x886ae2cc28625541ULL, 0xcb670b5efe6bb00cULL, },
+        { 0x886ac6cc28621540ULL, 0x4a670b5efe7bb00dULL, },
+        { 0xfbbe10634d93c709ULL, 0x52f7bb1a153f42fcULL, },
+        { 0xfbbe006b4d93c608ULL, 0x16f7bb1a053f52fcULL, },
+        { 0xfbbe04634d93c709ULL, 0x92f7bb1a152f52fcULL, },
+        { 0xfbbe20634d938708ULL, 0x13f7bb1a153f52fdULL, },
+        { 0xac5abeaab9cf8b81ULL, 0x67d8c6ffab2b3514ULL, },    /*  72  */
+        { 0xac5aaea2b9cf8a80ULL, 0x23d8c6ffbb2b2514ULL, },
+        { 0xac5aaaaab9cf8b81ULL, 0xa7d8c6ffab3b2514ULL, },
+        { 0xac5a8eaab9cfcb80ULL, 0x26d8c6ffab2b2515ULL, },
+        { 0x704f064d5e31e24fULL, 0xcdf188d8a942f2a0ULL, },
+        { 0x704f16455e31e34eULL, 0x89f188d8b942e2a0ULL, },
+        { 0x704f124d5e31e24fULL, 0x0df188d8a952e2a0ULL, },
+        { 0x704f364d5e31a24eULL, 0x8cf188d8a942e2a1ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_W(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_W(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_b.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_b.c
new file mode 100644
index 0000000000..f6907cb3d3
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BSET.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BSET.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x8080808080808080ULL, 0x8080808080808080ULL, },    /*   8  */
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
+        { 0x0404040404040404ULL, 0x0404040404040404ULL, },
+        { 0x2020202020202020ULL, 0x2020202020202020ULL, },
+        { 0x1010101010101010ULL, 0x1010101010101010ULL, },
+        { 0x0808080808080808ULL, 0x0808080808080808ULL, },
+        { 0x0840010840010840ULL, 0x0108400108400108ULL, },
+        { 0x1002801002801002ULL, 0x8010028010028010ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xababababababababULL, 0xababababababababULL, },
+        { 0xaeaeaeaeaeaeaeaeULL, 0xaeaeaeaeaeaeaeaeULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xbabababababababaULL, 0xbabababababababaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaeaabaaeaabaaeaULL, 0xabaaeaabaaeaabaaULL, },
+        { 0xbaaaaabaaaaabaaaULL, 0xaabaaaaabaaaaabaULL, },
+        { 0xd5d5d5d5d5d5d5d5ULL, 0xd5d5d5d5d5d5d5d5ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x7575757575757575ULL, 0x7575757575757575ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5d5d5d5d5d5d5d5dULL, 0x5d5d5d5d5d5d5d5dULL, },
+        { 0x5d55555d55555d55ULL, 0x555d55555d55555dULL, },
+        { 0x5557d55557d55557ULL, 0xd55557d55557d555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xcdcdcdcdcdcdcdcdULL, 0xcdcdcdcdcdcdcdcdULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xececececececececULL, 0xececececececececULL, },
+        { 0xdcdcdcdcdcdcdcdcULL, 0xdcdcdcdcdcdcdcdcULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xcccccdcccccdccccULL, 0xcdcccccdcccccdccULL, },
+        { 0xdcceccdcceccdcceULL, 0xccdcceccdcceccdcULL, },
+        { 0xb3b3b3b3b3b3b3b3ULL, 0xb3b3b3b3b3b3b3b3ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3737373737373737ULL, 0x3737373737373737ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3b3b3b3b3b3b3b3bULL, 0x3b3b3b3b3b3b3b3bULL, },
+        { 0x3b73333b73333b73ULL, 0x333b73333b73333bULL, },
+        { 0x3333b33333b33333ULL, 0xb33333b33333b333ULL, },
+        { 0xe38eb8e38eb8e38eULL, 0xb8e38eb8e38eb8e3ULL, },    /*  48  */
+        { 0xe38f39e38f39e38fULL, 0x39e38f39e38f39e3ULL, },
+        { 0xe78e3ce78e3ce78eULL, 0x3ce78e3ce78e3ce7ULL, },
+        { 0xe3ae38e3ae38e3aeULL, 0x38e3ae38e3ae38e3ULL, },
+        { 0xf39e38f39e38f39eULL, 0x38f39e38f39e38f3ULL, },
+        { 0xeb8e38eb8e38eb8eULL, 0x38eb8e38eb8e38ebULL, },
+        { 0xebce39ebce39ebceULL, 0x39ebce39ebce39ebULL, },
+        { 0xf38eb8f38eb8f38eULL, 0xb8f38eb8f38eb8f3ULL, },
+        { 0x9cf1c79cf1c79cf1ULL, 0xc79cf1c79cf1c79cULL, },    /*  56  */
+        { 0x1d71c71d71c71d71ULL, 0xc71d71c71d71c71dULL, },
+        { 0x1c75c71c75c71c75ULL, 0xc71c75c71c75c71cULL, },
+        { 0x3c71e73c71e73c71ULL, 0xe73c71e73c71e73cULL, },
+        { 0x1c71d71c71d71c71ULL, 0xd71c71d71c71d71cULL, },
+        { 0x1c79cf1c79cf1c79ULL, 0xcf1c79cf1c79cf1cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c73c71c73c71c73ULL, 0xc71c73c71c73c71cULL, },
+        { 0x896ee6dc29667541ULL, 0x4be70b5efe7bb11cULL, },    /*  64  */
+        { 0x886ae7cc286ad541ULL, 0x4fe70b5efefbb41cULL, },
+        { 0x986ee6cc2ae25d41ULL, 0xcb674bdefe7bb01cULL, },
+        { 0x89eae6ec68625540ULL, 0x6b670b5ffe7fb40dULL, },
+        { 0xfbbe40734d97e709ULL, 0x1af7bb5a553f53fcULL, },
+        { 0xfbfe016b6d9bc709ULL, 0x16f7bb1e35bf56fcULL, },
+        { 0xfbbe40674f93cf09ULL, 0x92f7fb9a1d3f72fcULL, },
+        { 0xfbbe40634d93c748ULL, 0x32f7bb1b173f56fdULL, },
+        { 0xad5eeebab9cfab81ULL, 0x2fd8ceffeb2b2514ULL, },    /*  72  */
+        { 0xac5aafaab9cf8b81ULL, 0x27d8ceffabab2514ULL, },
+        { 0xbc5eeeaebbcf8b81ULL, 0xa7d9c6ffab2b2514ULL, },
+        { 0xaddaeeaaf9cf8fc0ULL, 0x27dac7ffab2f2515ULL, },
+        { 0x714f565d5f35e24fULL, 0x8df188d8e94ae3b0ULL, },
+        { 0x784f174d7e39e24fULL, 0x8df188dca9c2e6b0ULL, },
+        { 0x704f564d5eb1ea4fULL, 0x8df1c8d8a94ae2b0ULL, },
+        { 0x71cf566d5e33e64eULL, 0xadf389d9ab46e6a1ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_B(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_B(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_d.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_d.c
new file mode 100644
index 0000000000..ecd833bee0
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BSET.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BSET.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x8000000000000000ULL, 0x8000000000000000ULL, },    /*   8  */
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },
+        { 0x0000040000000000ULL, 0x0000040000000000ULL, },
+        { 0x0000000000200000ULL, 0x0000000000200000ULL, },
+        { 0x0000000000001000ULL, 0x0000000000001000ULL, },
+        { 0x0008000000000000ULL, 0x0008000000000000ULL, },
+        { 0x0000000000004000ULL, 0x0000000800000000ULL, },
+        { 0x0002000000000000ULL, 0x0000000010000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaabULL, 0xaaaaaaaaaaaaaaabULL, },
+        { 0xaaaaaeaaaaaaaaaaULL, 0xaaaaaeaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaabaaaULL, 0xaaaaaaaaaaaabaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaeaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaabaaaaaaaULL, },
+        { 0xd555555555555555ULL, 0xd555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555755555ULL, 0x5555555555755555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x555d555555555555ULL, 0x555d555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555d55555555ULL, },
+        { 0x5557555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xcccccccccccccccdULL, 0xcccccccccccccccdULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccecccccULL, 0xccccccccccecccccULL, },
+        { 0xccccccccccccdcccULL, 0xccccccccccccdcccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccceccccccccccccULL, 0xccccccccdcccccccULL, },
+        { 0xb333333333333333ULL, 0xb333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333373333333333ULL, 0x3333373333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x333b333333333333ULL, 0x333b333333333333ULL, },
+        { 0x3333333333337333ULL, 0x3333333b33333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0xb8e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38fULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e3ce38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e3ae38e3ULL, },
+        { 0xe38e38e38e38f38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38eb8e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38f38e38e3ULL, },
+        { 0x9c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71dULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c75c71c71c71cULL, },
+        { 0x1c71c71c71e71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71d71cULL, },
+        { 0x1c79c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c75c71ULL, 0xc71c71cf1c71c71cULL, },
+        { 0x1c73c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886ae6cc28625541ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x5b670b5efe7bb00cULL, },
+        { 0x886ae6cc28625541ULL, 0x4b670b5efe7bb00cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5ffe7bb00cULL, },
+        { 0xfbbe00634d93c709ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c709ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1b153f52fcULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0x27d8c6ffab2b3514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8b80ULL, 0x37d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0x27d8c6ffab3b2514ULL, },
+        { 0xac5aaeaab9cfcb80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24fULL, 0x8df188d8a942f2a0ULL, },
+        { 0x704f164d5e31e34eULL, 0x9df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24fULL, 0x8df188d8a952e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d9a942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_D(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_D(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_h.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_h.c
new file mode 100644
index 0000000000..de63f26881
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BSET.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BSET.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x8000800080008000ULL, 0x8000800080008000ULL, },    /*   8  */
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },
+        { 0x0400040004000400ULL, 0x0400040004000400ULL, },
+        { 0x0020002000200020ULL, 0x0020002000200020ULL, },
+        { 0x1000100010001000ULL, 0x1000100010001000ULL, },
+        { 0x0008000800080008ULL, 0x0008000800080008ULL, },
+        { 0x4000000801004000ULL, 0x0008010040000008ULL, },
+        { 0x0002100000800002ULL, 0x1000008000021000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaabaaabaaabaaabULL, 0xaaabaaabaaabaaabULL, },
+        { 0xaeaaaeaaaeaaaeaaULL, 0xaeaaaeaaaeaaaeaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xbaaabaaabaaabaaaULL, 0xbaaabaaabaaabaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xeaaaaaaaabaaeaaaULL, 0xaaaaabaaeaaaaaaaULL, },
+        { 0xaaaabaaaaaaaaaaaULL, 0xbaaaaaaaaaaabaaaULL, },
+        { 0xd555d555d555d555ULL, 0xd555d555d555d555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5575557555755575ULL, 0x5575557555755575ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x555d555d555d555dULL, 0x555d555d555d555dULL, },
+        { 0x5555555d55555555ULL, 0x555d55555555555dULL, },
+        { 0x5557555555d55557ULL, 0x555555d555575555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xcccdcccdcccdcccdULL, 0xcccdcccdcccdcccdULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccecccecccecccecULL, 0xccecccecccecccecULL, },
+        { 0xdcccdcccdcccdcccULL, 0xdcccdcccdcccdcccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xcccccccccdccccccULL, 0xcccccdccccccccccULL, },
+        { 0xcccedcccccccccceULL, 0xdccccccccccedcccULL, },
+        { 0xb333b333b333b333ULL, 0xb333b333b333b333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3733373337333733ULL, 0x3733373337333733ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x333b333b333b333bULL, 0x333b333b333b333bULL, },
+        { 0x7333333b33337333ULL, 0x333b33337333333bULL, },
+        { 0x3333333333b33333ULL, 0x333333b333333333ULL, },
+        { 0xe38eb8e38e38e38eULL, 0xb8e38e38e38eb8e3ULL, },    /*  48  */
+        { 0xe38f38e38e39e38fULL, 0x38e38e39e38f38e3ULL, },
+        { 0xe78e3ce38e38e78eULL, 0x3ce38e38e78e3ce3ULL, },
+        { 0xe3ae38e38e38e3aeULL, 0x38e38e38e3ae38e3ULL, },
+        { 0xf38e38e39e38f38eULL, 0x38e39e38f38e38e3ULL, },
+        { 0xe38e38eb8e38e38eULL, 0x38eb8e38e38e38ebULL, },
+        { 0xe38e38eb8f38e38eULL, 0x38eb8f38e38e38ebULL, },
+        { 0xe38e38e38eb8e38eULL, 0x38e38eb8e38e38e3ULL, },
+        { 0x9c71c71cf1c79c71ULL, 0xc71cf1c79c71c71cULL, },    /*  56  */
+        { 0x1c71c71d71c71c71ULL, 0xc71d71c71c71c71dULL, },
+        { 0x1c71c71c75c71c71ULL, 0xc71c75c71c71c71cULL, },
+        { 0x1c71c73c71e71c71ULL, 0xc73c71e71c71c73cULL, },
+        { 0x1c71d71c71c71c71ULL, 0xd71c71c71c71d71cULL, },
+        { 0x1c79c71c71cf1c79ULL, 0xc71c71cf1c79c71cULL, },
+        { 0x5c71c71c71c75c71ULL, 0xc71c71c75c71c71cULL, },
+        { 0x1c73d71c71c71c73ULL, 0xd71c71c71c73d71cULL, },
+        { 0x8c6af6cc28665541ULL, 0x4be74b5efe7bb00cULL, },    /*  64  */
+        { 0xc86ae6cc286a5540ULL, 0x4be70f5efe7bb00cULL, },
+        { 0x8c6ae6cca8625541ULL, 0x4b678b5efe7bb01cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7fb00dULL, },
+        { 0xffbe10634d97c709ULL, 0x12f7fb1a1d3f52fcULL, },
+        { 0xfbbe006b4d9bc708ULL, 0x12f7bf1a953f52fcULL, },
+        { 0xffbe0463cd93c709ULL, 0x13f7bb1a1d3f52fcULL, },
+        { 0xfbbe20634d93c708ULL, 0x12f7bb1a153f52fdULL, },
+        { 0xac5abeaab9cf8b81ULL, 0x27d8c6ffab2b3514ULL, },    /*  72  */
+        { 0xec5aaeaab9cf8b80ULL, 0x27d8c6ffab2b3514ULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cfcb80ULL, 0x27dac7ffab2f2515ULL, },
+        { 0x744f164d5e35e24fULL, 0x8df1c8d8a942f2a0ULL, },
+        { 0x704f164d5e39e34eULL, 0x8df18cd8a942f2a0ULL, },
+        { 0x744f164dde31e24fULL, 0x8df188d8a942e2b0ULL, },
+        { 0xf04f364d5e33e24eULL, 0x8df389d8a946e2a1ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_H(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_H(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_w.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_w.c
new file mode 100644
index 0000000000..b973e67065
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction BSET.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BSET.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x8000000080000000ULL, 0x8000000080000000ULL, },    /*   8  */
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },
+        { 0x0000040000000400ULL, 0x0000040000000400ULL, },
+        { 0x0020000000200000ULL, 0x0020000000200000ULL, },
+        { 0x0000100000001000ULL, 0x0000100000001000ULL, },
+        { 0x0008000000080000ULL, 0x0008000000080000ULL, },
+        { 0x0000000800004000ULL, 0x0100000000000008ULL, },
+        { 0x1000000000020000ULL, 0x0000008010000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaabaaaaaaabULL, 0xaaaaaaabaaaaaaabULL, },
+        { 0xaaaaaeaaaaaaaeaaULL, 0xaaaaaeaaaaaaaeaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaabaaaaaaabaaaULL, 0xaaaabaaaaaaabaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaeaaaULL, 0xabaaaaaaaaaaaaaaULL, },
+        { 0xbaaaaaaaaaaaaaaaULL, 0xaaaaaaaabaaaaaaaULL, },
+        { 0xd5555555d5555555ULL, 0xd5555555d5555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5575555555755555ULL, 0x5575555555755555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x555d5555555d5555ULL, 0x555d5555555d5555ULL, },
+        { 0x5555555d55555555ULL, 0x555555555555555dULL, },
+        { 0x5555555555575555ULL, 0x555555d555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xcccccccdcccccccdULL, 0xcccccccdcccccccdULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccecccccccecccccULL, 0xccecccccccecccccULL, },
+        { 0xccccdcccccccdcccULL, 0xccccdcccccccdcccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xcdccccccccccccccULL, },
+        { 0xdcccccccccceccccULL, 0xccccccccdcccccccULL, },
+        { 0xb3333333b3333333ULL, 0xb3333333b3333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333373333333733ULL, 0x3333373333333733ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x333b3333333b3333ULL, 0x333b3333333b3333ULL, },
+        { 0x3333333b33337333ULL, 0x333333333333333bULL, },
+        { 0x3333333333333333ULL, 0x333333b333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0xb8e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38fULL, 0x38e38e39e38e38e3ULL, },
+        { 0xe38e3ce38e38e78eULL, 0x38e38e38e38e3ce3ULL, },
+        { 0xe3ae38e38e38e38eULL, 0x38e38e38e3ae38e3ULL, },
+        { 0xe38e38e38e38f38eULL, 0x38e39e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38eb8e38e38e38e3ULL, },
+        { 0xe38e38eb8e38e38eULL, 0x39e38e38e38e38ebULL, },
+        { 0xf38e38e38e3ae38eULL, 0x38e38eb8f38e38e3ULL, },
+        { 0x9c71c71cf1c71c71ULL, 0xc71c71c79c71c71cULL, },    /*  56  */
+        { 0x1c71c71d71c71c71ULL, 0xc71c71c71c71c71dULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c75c71c71c71cULL, },
+        { 0x1c71c71c71e71c71ULL, 0xc73c71c71c71c71cULL, },
+        { 0x1c71d71c71c71c71ULL, 0xc71c71c71c71d71cULL, },
+        { 0x1c79c71c71cf1c71ULL, 0xc71c71c71c79c71cULL, },
+        { 0x1c71c71c71c75c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886af6cc28625541ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x4f670b5efe7bb00cULL, },
+        { 0x886ae6cc28625541ULL, 0xcb670b5efe7bb00cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00dULL, },
+        { 0xfbbe10634d93c709ULL, 0x52f7bb1a153f52fcULL, },
+        { 0xfbbe006b4d93c708ULL, 0x16f7bb1a153f52fcULL, },
+        { 0xfbbe04634d93c709ULL, 0x92f7bb1a153f52fcULL, },
+        { 0xfbbe20634d93c708ULL, 0x13f7bb1a153f52fdULL, },
+        { 0xac5abeaab9cf8b81ULL, 0x67d8c6ffab2b3514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffbb2b2514ULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0xa7d8c6ffab3b2514ULL, },
+        { 0xac5aaeaab9cfcb80ULL, 0x27d8c6ffab2b2515ULL, },
+        { 0x704f164d5e31e24fULL, 0xcdf188d8a942f2a0ULL, },
+        { 0x704f164d5e31e34eULL, 0x8df188d8b942e2a0ULL, },
+        { 0x704f164d5e31e24fULL, 0x8df188d8a952e2a0ULL, },
+        { 0x704f364d5e31e24eULL, 0x8df188d8a942e2a1ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_W(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_W(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_h.c b/tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mul_q_h.c
index f1526087fa..39a89ac099 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_h.c
+++ b/tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mul_q_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MUL_Q.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xb69bf1d4cc591b07ULL, 0xdc7e3510397df77dULL, },
         { 0x9712fb9b1db7ec38ULL, 0xbccff56b01071259ULL, },
         { 0xfc43001139150d37ULL, 0xef194023f19aecf4ULL, },
+        { 0xb69bf1d4cc591b07ULL, 0xdc7e3510397df77dULL, },
+        { 0x628a03e2455006e3ULL, 0x65a26eec3ac806bdULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_w.c b/tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mul_q_w.c
index df815ee9da..07f2daecac 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_w.c
+++ b/tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mul_q_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MUL_Q.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xb69baa39cc590fcdULL, 0xdc7e6df7397c58d9ULL, },
         { 0x9713a7171db7f3a5ULL, 0xbccfb4690107236fULL, },
         { 0xfc439edc3916c1e4ULL, 0xef19389cf19a0fddULL, },
+        { 0xb69baa39cc590fcdULL, 0xdc7e6df7397c58d9ULL, },
+        { 0x628a97e4455157d3ULL, 0x65a1c5e13ac736e1ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_h.c b/tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mulr_q_h.c
index fd0a5fa7a8..e35af2fb7e 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_h.c
+++ b/tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mulr_q_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULR_Q.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xb69bf1d4cc591b07ULL, 0xdc7f3511397df77eULL, },
         { 0x9713fb9c1db7ec39ULL, 0xbccff56b01081259ULL, },
         { 0xfc44001139160d37ULL, 0xef1a4023f19aecf5ULL, },
+        { 0xb69bf1d4cc591b07ULL, 0xdc7f3511397df77eULL, },
+        { 0x628a03e3455006e4ULL, 0x65a36eec3ac806beULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_w.c b/tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mulr_q_w.c
index f28b0d0a20..3cedf672af 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_w.c
+++ b/tests/tcg/mips/user/ase/msa/fixed-multiply/test_msa_mulr_q_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULR_Q.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xb69baa3acc590fcdULL, 0xdc7e6df7397c58daULL, },
         { 0x9713a7171db7f3a6ULL, 0xbccfb46a0107236fULL, },
         { 0xfc439edd3916c1e4ULL, 0xef19389cf19a0fdeULL, },
+        { 0xb69baa3acc590fcdULL, 0xdc7e6df7397c58daULL, },
+        { 0x628a97e4455157d3ULL, 0x65a1c5e23ac736e2ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_a_d.c b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_a_d.c
new file mode 100644
index 0000000000..1807cabdd5
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_a_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction FMAX_A.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "FMAX_A.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xe38e38e38e38e38eULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xe38e38e38e38e38eULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x3333333333333333ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x5555555555555555ULL, },
+        { 0xe38e38e38e38e38eULL, 0xccccccccccccccccULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0xfbbe00634d93c708ULL, 0x4b670b5efe7bb00cULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x4b670b5efe7bb00cULL, },
+        { 0x704f164d5e31e24eULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x4b670b5efe7bb00cULL, },    /*  72  */
+        { 0xfbbe00634d93c708ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x704f164d5e31e24eULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMAX_A_D(b128_pattern[i], b128_pattern[j],
+                            b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMAX_A_D(b128_random[i], b128_random[j],
+                            b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                         (PATTERN_INPUTS_SHORT_COUNT)) +
+                                        RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_a_w.c b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_a_w.c
new file mode 100644
index 0000000000..b999b1fdb5
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_a_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction FMAX_A.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "FMAX_A.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e3aaaaaaaaULL, 0x38e38e38e38e38e3ULL, },
+        { 0xaaaaaaaa71c71c71ULL, 0xc71c71c7aaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xe38e38e355555555ULL, 0x55555555e38e38e3ULL, },
+        { 0x5555555571c71c71ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xe38e38e3ccccccccULL, 0xcccccccce38e38e3ULL, },
+        { 0xcccccccc71c71c71ULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e333333333ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x3333333371c71c71ULL, 0xc71c71c733333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e3aaaaaaaaULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e355555555ULL, 0x55555555e38e38e3ULL, },
+        { 0xe38e38e3ccccccccULL, 0xcccccccce38e38e3ULL, },
+        { 0xe38e38e333333333ULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e371c71c71ULL, 0xc71c71c7e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xaaaaaaaa71c71c71ULL, 0xc71c71c7aaaaaaaaULL, },
+        { 0x5555555571c71c71ULL, 0x5555555555555555ULL, },
+        { 0xcccccccc71c71c71ULL, 0xccccccccccccccccULL, },
+        { 0x3333333371c71c71ULL, 0xc71c71c733333333ULL, },
+        { 0xe38e38e371c71c71ULL, 0xc71c71c7e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0xfbbe00634d93c708ULL, 0x4b670b5efe7bb00cULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x4b670b5efe7bb00cULL, },
+        { 0x704f164d5e31e24eULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xfbbe00635e31e24eULL, 0x12f7bb1aa942e2a0ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x4b670b5efe7bb00cULL, },    /*  72  */
+        { 0xfbbe00634d93c708ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00635e31e24eULL, 0x12f7bb1aa942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMAX_A_W(b128_pattern[i], b128_pattern[j],
+                            b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMAX_A_W(b128_random[i], b128_random[j],
+                            b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                         (PATTERN_INPUTS_SHORT_COUNT)) +
+                                        RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_d.c b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_d.c
new file mode 100644
index 0000000000..b5ce6c7543
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction FMAX.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "FMAX.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0x0000000000000000ULL, 0x38e38e38e38e38e3ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0x38e38e38e38e38e3ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0x38e38e38e38e38e3ULL, },
+        { 0x3333333333333333ULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0x0000000000000000ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },
+        { 0x704f164d5e31e24eULL, 0x4b670b5efe7bb00cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x12f7bb1a153f52fcULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },    /*  72  */
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x4b670b5efe7bb00cULL, },
+        { 0x704f164d5e31e24eULL, 0x12f7bb1a153f52fcULL, },
+        { 0x704f164d5e31e24eULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMAX_D(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMAX_D(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_w.c b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_w.c
new file mode 100644
index 0000000000..5fafd3080f
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmax_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction FMAX.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "FMAX.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x38e38e3800000000ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x000000001c71c71cULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xaaaaaaaa8e38e38eULL, 0x38e38e38aaaaaaaaULL, },
+        { 0x1c71c71c71c71c71ULL, 0xaaaaaaaa1c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555571c71c71ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xcccccccc8e38e38eULL, 0x38e38e38ccccccccULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x38e38e3833333333ULL, },
+        { 0x3333333371c71c71ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0x0000000000000000ULL, 0x38e38e3800000000ULL, },
+        { 0xaaaaaaaa8e38e38eULL, 0x38e38e38aaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xcccccccc8e38e38eULL, 0x38e38e38ccccccccULL, },
+        { 0x3333333333333333ULL, 0x38e38e3833333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e381c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0x000000001c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xaaaaaaaa1c71c71cULL, },
+        { 0x5555555571c71c71ULL, 0x5555555555555555ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x3333333371c71c71ULL, 0x3333333333333333ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e381c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0x886ae6cc4d93c708ULL, 0x4b670b5e153f52fcULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5eab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x4b670b5ea942e2a0ULL, },
+        { 0x886ae6cc4d93c708ULL, 0x4b670b5e153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaa4d93c708ULL, 0x27d8c6ff153f52fcULL, },
+        { 0x704f164d5e31e24eULL, 0x12f7bb1a153f52fcULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5eab2b2514ULL, },    /*  72  */
+        { 0xac5aaeaa4d93c708ULL, 0x27d8c6ff153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x27d8c6ffa942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x4b670b5ea942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x12f7bb1a153f52fcULL, },
+        { 0x704f164d5e31e24eULL, 0x27d8c6ffa942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMAX_W(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMAX_W(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_a_d.c b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_a_d.c
new file mode 100644
index 0000000000..c0b4001eb1
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_a_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction FMIN_A.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "FMIN_A.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1c71c71c71c71c71ULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x5555555555555555ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x38e38e38e38e38e3ULL, },
+        { 0xccccccccccccccccULL, 0x38e38e38e38e38e3ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0x3333333333333333ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x886ae6cc28625540ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x886ae6cc28625540ULL, 0x8df188d8a942e2a0ULL, },
+        { 0x886ae6cc28625540ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+        { 0x886ae6cc28625540ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8b80ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x8df188d8a942e2a0ULL, },
+        { 0x886ae6cc28625540ULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMIN_A_D(b128_pattern[i], b128_pattern[j],
+                            b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMIN_A_D(b128_random[i], b128_random[j],
+                            b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                         (PATTERN_INPUTS_SHORT_COUNT)) +
+                                        RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_a_w.c b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_a_w.c
new file mode 100644
index 0000000000..d174fcd669
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_a_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction FMIN_A.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "FMIN_A.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaa8e38e38eULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1c71c71caaaaaaaaULL, 0xaaaaaaaa1c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x555555558e38e38eULL, 0x38e38e3855555555ULL, },
+        { 0x1c71c71c55555555ULL, 0xc71c71c71c71c71cULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xcccccccc8e38e38eULL, 0x38e38e38ccccccccULL, },
+        { 0x1c71c71cccccccccULL, 0xc71c71c71c71c71cULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x333333338e38e38eULL, 0x3333333333333333ULL, },
+        { 0x1c71c71c33333333ULL, 0x333333331c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaa8e38e38eULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x555555558e38e38eULL, 0x38e38e3855555555ULL, },
+        { 0xcccccccc8e38e38eULL, 0x38e38e38ccccccccULL, },
+        { 0x333333338e38e38eULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c8e38e38eULL, 0x38e38e381c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1c71c71caaaaaaaaULL, 0xaaaaaaaa1c71c71cULL, },
+        { 0x1c71c71c55555555ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71cccccccccULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c33333333ULL, 0x333333331c71c71cULL, },
+        { 0x1c71c71c8e38e38eULL, 0x38e38e381c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x886ae6cc28625540ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x886ae6cc28625540ULL, 0x8df188d8a942e2a0ULL, },
+        { 0x886ae6cc28625540ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x704f164d4d93c708ULL, 0x8df188d8153f52fcULL, },
+        { 0x886ae6cc28625540ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8b80ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x8df188d8a942e2a0ULL, },
+        { 0x886ae6cc28625540ULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d4d93c708ULL, 0x8df188d8153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMIN_A_W(b128_pattern[i], b128_pattern[j],
+                            b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMIN_A_W(b128_random[i], b128_random[j],
+                            b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                         (PATTERN_INPUTS_SHORT_COUNT)) +
+                                        RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_d.c b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_d.c
new file mode 100644
index 0000000000..62c149f83a
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction FMIN.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "FMIN.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xe38e38e38e38e38eULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0xc71c71c71c71c71cULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xe38e38e38e38e38eULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xe38e38e38e38e38eULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x3333333333333333ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x0000000000000000ULL, },
+        { 0xe38e38e38e38e38eULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0xccccccccccccccccULL, },
+        { 0xe38e38e38e38e38eULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x0000000000000000ULL, 0xc71c71c71c71c71cULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x886ae6cc28625540ULL, 0x8df188d8a942e2a0ULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x8df188d8a942e2a0ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x8df188d8a942e2a0ULL, },
+        { 0x886ae6cc28625540ULL, 0x8df188d8a942e2a0ULL, },
+        { 0xfbbe00634d93c708ULL, 0x8df188d8a942e2a0ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMIN_D(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMIN_D(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_w.c b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_w.c
new file mode 100644
index 0000000000..c5815a66ab
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/float-max-min/test_msa_fmin_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction FMIN.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "FMIN.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xe38e38e38e38e38eULL, 0x00000000e38e38e3ULL, },
+        { 0x0000000000000000ULL, 0xc71c71c700000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xe38e38e3aaaaaaaaULL, 0xaaaaaaaae38e38e3ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xc71c71c7aaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c55555555ULL, 0xc71c71c71c71c71cULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xe38e38e3ccccccccULL, 0xcccccccce38e38e3ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x33333333e38e38e3ULL, },
+        { 0x1c71c71c33333333ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x00000000e38e38e3ULL, },
+        { 0xe38e38e3aaaaaaaaULL, 0xaaaaaaaae38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e3ccccccccULL, 0xcccccccce38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x33333333e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0xc71c71c7e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x0000000000000000ULL, 0xc71c71c700000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xc71c71c7aaaaaaaaULL, },
+        { 0x1c71c71c55555555ULL, 0xc71c71c71c71c71cULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x1c71c71c33333333ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0xc71c71c7e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0xfbbe006328625540ULL, 0x12f7bb1afe7bb00cULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6fffe7bb00cULL, },
+        { 0x886ae6cc28625540ULL, 0x8df188d8fe7bb00cULL, },
+        { 0xfbbe006328625540ULL, 0x12f7bb1afe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe0063b9cf8b80ULL, 0x12f7bb1aab2b2514ULL, },
+        { 0xfbbe00634d93c708ULL, 0x8df188d8a942e2a0ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6fffe7bb00cULL, },    /*  72  */
+        { 0xfbbe0063b9cf8b80ULL, 0x12f7bb1aab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x8df188d8ab2b2514ULL, },
+        { 0x886ae6cc28625540ULL, 0x8df188d8fe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x8df188d8a942e2a0ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x8df188d8ab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMIN_W(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_FMIN_W(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_b.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_b.c
index d2ea54f43d..e96e4fdf72 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_b.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADD_A.B
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xc4a968a3a56293ceULL, 0x9a37b229ac6d4374ULL, },
         { 0xe8b930818693738eULL, 0xbe76838659bd6e6cULL, },
         { 0x759116b0ab9e5756ULL, 0x8518bd426c817064ULL, },
+        { 0xc4a968a3a56293ceULL, 0x9a37b229ac6d4374ULL, },
+        { 0xe09e2c9abc623c9cULL, 0xe61ef050ae843cc0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_d.c
index 56b81f9090..ec025b6210 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADD_A.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xc3f467a2a46256ceULL, 0x99e73e2701e84274ULL, },
         { 0xe7e42f8135cf8d0eULL, 0xbd7582865538cd6cULL, },
         { 0x749115ea109e1b46ULL, 0x850632416bfc705cULL, },
+        { 0xc3f467a2a46256ceULL, 0x99e73e2701e84274ULL, },
+        { 0xe09e2c9abc63c49cULL, 0xe41cee4ead7a3ac0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_h.c
index fe3c664997..d04b3a5abe 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADD_A.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xc3f567a3a4629232ULL, 0x99e7b029ab934274ULL, },
         { 0xe7e52f81869372f2ULL, 0xbd76828658436d54ULL, },
         { 0x749116b0abc456aaULL, 0x8506bc0e6bfd705cULL, },
+        { 0xc3f567a3a4629232ULL, 0x99e7b029ab934274ULL, },
+        { 0xe09e2c9abc623b64ULL, 0xe41eee50ad7c3ac0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_w.c
index 205117ea95..7ae90a3daf 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADD_A.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xc3f467a3a46256ceULL, 0x99e73e27ab91f84cULL, },
         { 0xe7e42f818694378eULL, 0xbd75828658416d54ULL, },
         { 0x749115eaabc5a956ULL, 0x850632426bfc705cULL, },
+        { 0xc3f467a3a46256ceULL, 0x99e73e27ab91f84cULL, },
+        { 0xe09e2c9abc63c49cULL, 0xe41cee50ad7a3ac0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_b.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_b.c
index 6939e91fe4..05712a1f05 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_b.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_A.B
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x7f7f687f7f627f7fULL, 0x7f377f297f6d4374ULL, },
         { 0x7f7f307f7f7f737fULL, 0x7f767f7f597f6e6cULL, },
         { 0x757f167f7f7f5756ULL, 0x7f187f426c7f7064ULL, },
+        { 0x7f7f687f7f627f7fULL, 0x7f377f297f6d4374ULL, },
+        { 0x7f7f2c7f7f623c7fULL, 0x7f1e7f507f7f3c7fULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_d.c
index af0f3d3700..c222a9529f 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_A.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },
         { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },
         { 0x749115ea109e1b46ULL, 0x7fffffffffffffffULL, },
+        { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },
+        { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_h.c
index 4d3774fef2..0348f8ca47 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_A.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x7fff67a37fff7fffULL, 0x7fff7fff7fff4274ULL, },
         { 0x7fff2f817fff72f2ULL, 0x7fff7fff58436d54ULL, },
         { 0x749116b07fff56aaULL, 0x7fff7fff6bfd705cULL, },
+        { 0x7fff67a37fff7fffULL, 0x7fff7fff7fff4274ULL, },
+        { 0x7fff2c9a7fff3b64ULL, 0x7fff7fff7fff3ac0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_w.c
index 6f06fdc7cd..9ad9c569f8 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_A.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x7fffffff7fffffffULL, 0x7fffffff7fffffffULL, },
         { 0x7fffffff7fffffffULL, 0x7fffffff58416d54ULL, },
         { 0x749115ea7fffffffULL, 0x7fffffff6bfc705cULL, },
+        { 0x7fffffff7fffffffULL, 0x7fffffff7fffffffULL, },
+        { 0x7fffffff7fffffffULL, 0x7fffffff7fffffffULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_b.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_b.c
index e6cb9871f5..a7f4de9260 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_b.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_S.B
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x1c7fc4f7170080ceULL, 0xb4c980d7806d07b4ULL, },
         { 0xf87ffc197f7f377fULL, 0xd8589336a77f92acULL, },
         { 0x6b0d167f7fc4a956ULL, 0x9fe880f2be7f349cULL, },
+        { 0x1c7fc4f7170080ceULL, 0xb4c980d7806d07b4ULL, },
+        { 0x7f7f2c7f7f62c47fULL, 0x80e280b0807fc480ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_d.c
index 2cda5d9661..d1048fcdd5 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_S.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x1ca9c4f818016dceULL, 0xb5ca4fd8546e07b4ULL, },
         { 0xf8b9fd198694378eULL, 0xd9589437a7be92acULL, },
         { 0x6c0d16b0abc5a956ULL, 0xa0e943f2be82359cULL, },
+        { 0x1ca9c4f818016dceULL, 0xb5ca4fd8546e07b4ULL, },
+        { 0x7fffffffffffffffULL, 0x8000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_h.c
index 5539322423..6263616a63 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_S.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x1ca9c4f718008000ULL, 0xb5c98000800007b4ULL, },
         { 0xf8b9fd197fff378eULL, 0xd9589436a7bd92acULL, },
         { 0x6c0d16b07fffa956ULL, 0xa0e88000be81359cULL, },
+        { 0x1ca9c4f718008000ULL, 0xb5c98000800007b4ULL, },
+        { 0x7fff2c9a7fffc49cULL, 0x800080008000c540ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_w.c
index 4f2cc3862e..80b69f70cc 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_S.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x1ca9c4f718016dceULL, 0xb5ca4fd780000000ULL, },
         { 0xf8b9fd197fffffffULL, 0xd9589436a7be92acULL, },
         { 0x6c0d16b07fffffffULL, 0xa0e943f2be82359cULL, },
+        { 0x1ca9c4f718016dceULL, 0xb5ca4fd780000000ULL, },
+        { 0x7fffffff7fffffffULL, 0x8000000080000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_b.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_b.c
index e2d9be38e7..d61623fec5 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_b.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_U.B
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xffa9c4f7ffffffceULL, 0xb4ffffffff6dffb4ULL, },
         { 0xf8b9fcff8693ff8eULL, 0xd8ff93ffffbdffacULL, },
         { 0xffff16b0abc4ff56ULL, 0x9ffffff2be81ffffULL, },
+        { 0xffa9c4f7ffffffceULL, 0xb4ffffffff6dffb4ULL, },
+        { 0xe09e2c9abc62ff9cULL, 0xffffffffff84ffffULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_d.c
index 8418c636b6..7b60457f1d 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_U.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xb5ca4fd8546e07b4ULL, },
         { 0xf8b9fd198694378eULL, 0xd9589437a7be92acULL, },
         { 0xffffffffffffffffULL, 0xa0e943f2be82359cULL, },
+        { 0xffffffffffffffffULL, 0xb5ca4fd8546e07b4ULL, },
+        { 0xe09e2c9abc63c49cULL, 0xffffffffffffffffULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_h.c
index 8a3b5c5cf5..c8b226228c 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_U.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xffffc4f7ffffffffULL, 0xb5c9ffffffffffffULL, },
         { 0xf8b9fd198693ffffULL, 0xd9589436ffffffffULL, },
         { 0xffff16b0abc4ffffULL, 0xa0e8ffffbe81ffffULL, },
+        { 0xffffc4f7ffffffffULL, 0xb5c9ffffffffffffULL, },
+        { 0xe09e2c9abc62ffffULL, 0xffffffffffffffffULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_w.c
index b18bdc3ea0..7880b03383 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_U.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xb5ca4fd7ffffffffULL, },
         { 0xf8b9fd198694378eULL, 0xd9589436ffffffffULL, },
         { 0xffffffffabc5a956ULL, 0xa0e943f2be82359cULL, },
+        { 0xffffffffffffffffULL, 0xb5ca4fd7ffffffffULL, },
+        { 0xe09e2c9abc63c49cULL, 0xffffffffffffffffULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_b.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_b.c
index c86c99291e..dbfcd3800a 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_b.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDV.B
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x1ca9c4f717006dceULL, 0xb4c94ed7546d07b4ULL, },
         { 0xf8b9fc198693378eULL, 0xd8589336a7bd92acULL, },
         { 0x6b0d16b0abc4a956ULL, 0x9fe843f2be81349cULL, },
+        { 0x1ca9c4f717006dceULL, 0xb4c94ed7546d07b4ULL, },
+        { 0xe09e2c9abc62c49cULL, 0x1ae210b05284c440ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_d.c
index 0f301515b3..27b6d65686 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDV.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x1ca9c4f818016dceULL, 0xb5ca4fd8546e07b4ULL, },
         { 0xf8b9fd198694378eULL, 0xd9589437a7be92acULL, },
         { 0x6c0d16b0abc5a956ULL, 0xa0e943f2be82359cULL, },
+        { 0x1ca9c4f818016dceULL, 0xb5ca4fd8546e07b4ULL, },
+        { 0xe09e2c9abc63c49cULL, 0x1be311b15285c540ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_h.c
index c6b4cf697b..f7045f6151 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDV.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x1ca9c4f718006dceULL, 0xb5c94fd7546d07b4ULL, },
         { 0xf8b9fd198693378eULL, 0xd9589436a7bd92acULL, },
         { 0x6c0d16b0abc4a956ULL, 0xa0e843f2be81359cULL, },
+        { 0x1ca9c4f718006dceULL, 0xb5c94fd7546d07b4ULL, },
+        { 0xe09e2c9abc62c49cULL, 0x1be211b05284c540ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_w.c
index 2a565e8ed4..111e7c89e8 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDV.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x1ca9c4f718016dceULL, 0xb5ca4fd7546e07b4ULL, },
         { 0xf8b9fd198694378eULL, 0xd9589436a7be92acULL, },
         { 0x6c0d16b0abc5a956ULL, 0xa0e943f2be82359cULL, },
+        { 0x1ca9c4f718016dceULL, 0xb5ca4fd7546e07b4ULL, },
+        { 0xe09e2c9abc63c49cULL, 0x1be311b05285c540ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_d.c
index 7845dc0218..e6dffbf21f 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_S.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x000000000a8c90f8ULL, 0xffffffffd11ba99fULL, },
         { 0x0000000098b16b8dULL, 0xffffffff8c6d38e4ULL, },
         { 0x00000000bde2dd55ULL, 0xffffffffa330dbd4ULL, },
+        { 0x000000002a1ea1cdULL, 0xffffffff391cadecULL, },
+        { 0x00000000ce80f89bULL, 0xffffffff37346b78ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_h.c
index ddc2de3ff2..211123aa9d 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_S.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xfffbfffbffeaffd9ULL, 0x0018ff9effedffc5ULL, },
         { 0x00daffe200c00022ULL, 0xfff4ffe60024ffeeULL, },
         { 0x002e0079fff1ffeaULL, 0xff84ffa2ffe8ffdeULL, },
+        { 0x00caffc0002dff62ULL, 0xff65ff87ffd4fff6ULL, },
+        { 0x00bf0063008f0030ULL, 0xff7eff60ffebff82ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_w.c
index 887cd1cd5c..cdab49d7dc 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_S.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xffffc2a7ffff9c1dULL, 0xffffb0b0ffff8dcbULL, },
         { 0x0000571b0000b371ULL, 0xffff994fffff594eULL, },
         { 0x000070b200002539ULL, 0xffff490bfffffc3eULL, },
+        { 0x00001ef9ffffe9b1ULL, 0xffff54f0ffffce56ULL, },
+        { 0x0000869c0000407fULL, 0xffff16c9ffff8be2ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_d.c
index f0710f15de..38c43cedf7 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_U.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x000000010a8c90f8ULL, 0x00000000d11ba99fULL, },
         { 0x0000000098b16b8dULL, 0x000000018c6d38e4ULL, },
         { 0x00000000bde2dd55ULL, 0x00000000a330dbd4ULL, },
+        { 0x000000012a1ea1cdULL, 0x00000001391cadecULL, },
+        { 0x00000000ce80f89bULL, 0x0000000137346b78ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_h.c
index fe55d3eaee..41863f6cf8 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_U.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x00fb00fb00ea00d9ULL, 0x0118019e00ed00c5ULL, },
         { 0x00da00e200c00122ULL, 0x00f400e6012400eeULL, },
         { 0x012e007900f100eaULL, 0x018400a200e801deULL, },
+        { 0x00ca00c0012d0162ULL, 0x0165018700d400f6ULL, },
+        { 0x00bf0063008f0130ULL, 0x017e016000eb0182ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_w.c
index babe04d586..c30c2663fc 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_U.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0000c2a700019c1dULL, 0x0000b0b000018dcbULL, },
         { 0x0001571b0000b371ULL, 0x0000994f0001594eULL, },
         { 0x000070b200012539ULL, 0x0001490b0000fc3eULL, },
+        { 0x00011ef90000e9b1ULL, 0x000154f00000ce56ULL, },
+        { 0x0000869c0001407fULL, 0x000116c900018be2ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_b.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_b.c
index 675fb90c72..bb73fca1a3 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_S.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0e54e2fb0b00b6e7ULL, 0xdae4a7ebaa3603daULL, },
         { 0xfc5cfe0c43491b47ULL, 0xec2cc91bd35ec9d6ULL, },
         { 0x35060b5855e2d42bULL, 0xcff4a1f9df401aceULL, },
+        { 0x0e54e2fb0b00b6e7ULL, 0xdae4a7ebaa3603daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_d.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_d.c
index e87d414b5f..b8c5ab4dc1 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0e54e27c0c00b6e7ULL, 0xdae527ec2a3703daULL, },
         { 0xfc5cfe8cc34a1bc7ULL, 0xecac4a1bd3df4956ULL, },
         { 0x36068b5855e2d4abULL, 0xd074a1f95f411aceULL, },
+        { 0x0e54e27c0c00b6e7ULL, 0xdae527ec2a3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_h.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_h.c
index c850543587..24aed155f6 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0e54e27b0c00b6e7ULL, 0xdae4a7ebaa3603daULL, },
         { 0xfc5cfe8c43491bc7ULL, 0xecacca1bd3dec956ULL, },
         { 0x36060b5855e2d4abULL, 0xd074a1f9df401aceULL, },
+        { 0x0e54e27b0c00b6e7ULL, 0xdae4a7ebaa3603daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_w.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_w.c
index 3220574ca0..c0082d1cc4 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0e54e27b0c00b6e7ULL, 0xdae527ebaa3703daULL, },
         { 0xfc5cfe8c434a1bc7ULL, 0xecac4a1bd3df4956ULL, },
         { 0x36068b5855e2d4abULL, 0xd074a1f9df411aceULL, },
+        { 0x0e54e27b0c00b6e7ULL, 0xdae527ebaa3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_b.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_b.c
index c3f96a6a5f..8c857c594e 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_U.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8e54627b8b80b667ULL, 0x5ae4a7ebaa36835aULL, },
         { 0x7c5c7e8c43499b47ULL, 0x6cac499bd35ec956ULL, },
         { 0xb5860b585562d42bULL, 0x4ff4a1795f409aceULL, },
+        { 0x8e54627b8b80b667ULL, 0x5ae4a7ebaa36835aULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_d.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_d.c
index 3a78629cd8..fd2a4b9d49 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8e54e27c0c00b6e7ULL, 0x5ae527ec2a3703daULL, },
         { 0x7c5cfe8cc34a1bc7ULL, 0x6cac4a1bd3df4956ULL, },
         { 0xb6068b5855e2d4abULL, 0x5074a1f95f411aceULL, },
+        { 0x8e54e27c0c00b6e7ULL, 0x5ae527ec2a3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_h.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_h.c
index b7db518afb..6ab8f47f40 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8e54627b8c00b6e7ULL, 0x5ae4a7ebaa3683daULL, },
         { 0x7c5c7e8c43499bc7ULL, 0x6cac4a1bd3dec956ULL, },
         { 0xb6060b5855e2d4abULL, 0x5074a1f95f409aceULL, },
+        { 0x8e54627b8c00b6e7ULL, 0x5ae4a7ebaa3683daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_w.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_w.c
index 75e2409f1f..be614bb238 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8e54e27b8c00b6e7ULL, 0x5ae527ebaa3703daULL, },
         { 0x7c5cfe8c434a1bc7ULL, 0x6cac4a1bd3df4956ULL, },
         { 0xb6068b5855e2d4abULL, 0x5074a1f95f411aceULL, },
+        { 0x8e54e27b8c00b6e7ULL, 0x5ae527ebaa3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_b.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_b.c
index 59bba28d2e..bccb6ac892 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_S.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0e55e2fc0c00b7e7ULL, 0xdae5a7ecaa3704daULL, },
         { 0xfc5dfe0d434a1c47ULL, 0xec2cca1bd45fc9d6ULL, },
         { 0x36070b5856e2d52bULL, 0xd0f4a2f9df411aceULL, },
+        { 0x0e55e2fc0c00b7e7ULL, 0xdae5a7ecaa3704daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_d.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_d.c
index 435c09f9bf..941d889790 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0e54e27c0c00b6e7ULL, 0xdae527ec2a3703daULL, },
         { 0xfc5cfe8cc34a1bc7ULL, 0xecac4a1bd3df4956ULL, },
         { 0x36068b5855e2d4abULL, 0xd074a1f95f411aceULL, },
+        { 0x0e54e27c0c00b6e7ULL, 0xdae527ec2a3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_h.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_h.c
index 0902e508ec..b977aaceb4 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0e55e27c0c00b6e7ULL, 0xdae5a7ecaa3703daULL, },
         { 0xfc5dfe8d434a1bc7ULL, 0xecacca1bd3dfc956ULL, },
         { 0x36070b5855e2d4abULL, 0xd074a1f9df411aceULL, },
+        { 0x0e55e27c0c00b6e7ULL, 0xdae5a7ecaa3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_w.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_w.c
index 31f4553916..c41a9b0479 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0e54e27c0c00b6e7ULL, 0xdae527ecaa3703daULL, },
         { 0xfc5cfe8d434a1bc7ULL, 0xecac4a1bd3df4956ULL, },
         { 0x36068b5855e2d4abULL, 0xd074a1f9df411aceULL, },
+        { 0x0e54e27c0c00b6e7ULL, 0xdae527ecaa3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_b.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_b.c
index 8aa7ec6a41..ed9aa28fa0 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_U.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8e55627c8c80b767ULL, 0x5ae5a7ecaa37845aULL, },
         { 0x7c5d7e8d434a9c47ULL, 0x6cac4a9bd45fc956ULL, },
         { 0xb6870b585662d52bULL, 0x50f4a2795f419aceULL, },
+        { 0x8e55627c8c80b767ULL, 0x5ae5a7ecaa37845aULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_d.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_d.c
index 9b16e1250f..04fcea4d2a 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8e54e27c0c00b6e7ULL, 0x5ae527ec2a3703daULL, },
         { 0x7c5cfe8cc34a1bc7ULL, 0x6cac4a1bd3df4956ULL, },
         { 0xb6068b5855e2d4abULL, 0x5074a1f95f411aceULL, },
+        { 0x8e54e27c0c00b6e7ULL, 0x5ae527ec2a3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_h.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_h.c
index 191e4acbdc..4305010389 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8e55627c8c00b6e7ULL, 0x5ae5a7ecaa3783daULL, },
         { 0x7c5d7e8d434a9bc7ULL, 0x6cac4a1bd3dfc956ULL, },
         { 0xb6070b5855e2d4abULL, 0x5074a1f95f419aceULL, },
+        { 0x8e55627c8c00b6e7ULL, 0x5ae5a7ecaa3783daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_w.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_w.c
index e0d6b177c4..47cdc5c08f 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8e54e27c8c00b6e7ULL, 0x5ae527ecaa3703daULL, },
         { 0x7c5cfe8d434a1bc7ULL, 0x6cac4a1bd3df4956ULL, },
         { 0xb6068b5855e2d4abULL, 0x5074a1f95f411aceULL, },
+        { 0x8e54e27c8c00b6e7ULL, 0x5ae527ecaa3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_b.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_b.c
index bb884ee752..3ffb27c237 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_d.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_d.c
index ef13f7d05d..dc60868c27 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_h.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_h.c
index 1c43d40ee1..a3073d784b 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_w.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_w.c
index 1297d41f29..9e81665af7 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_b.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_b.c
index afd5f635f0..4235d59da7 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_d.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_d.c
index 04d58d103c..796f89abb7 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_h.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_h.c
index ed1a1e21bd..0b63274022 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_w.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_w.c
index ea4dc1a30b..8fcee89221 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_b.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_b.c
index 6e4fdd83ec..dc50ed3cd8 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_d.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_d.c
index b2b2f557b8..f6635f958c 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_h.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_h.c
index b2267752eb..0d09304fcb 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_w.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_w.c
index 00e930c0c7..4671eeee7f 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_b.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_b.c
index 4a52ebe491..18d5706282 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_d.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_d.c
index cc945cdf8d..15a31660d2 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_h.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_h.c
index b228dfe7f5..89fcc2cfe6 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_w.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_w.c
index 6cb192a851..c8481dfc7a 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_b.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_b.c
index b6189d6b72..ee2b39c687 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_d.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_d.c
index 4f547d8f0b..8fef047268 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_h.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_h.c
index 9fcd81c653..5b236a7bea 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_w.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_w.c
index 8f648afa62..918420c592 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_b.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_b.c
index 38e3670422..1b11d93025 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_S.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0001fdff00ff03ffULL, 0x000200000000ff00ULL, },
         { 0x000000ff02000001ULL, 0xff00f6002b0000f8ULL, },
         { 0xeaffff0001000009ULL, 0xfa0101fffc010018ULL, },
+        { 0xff000000ffff0000ULL, 0xfe000228010100fcULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_d.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_d.c
index d92b6953e8..289cf6ae3f 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
         { 0x0000000000000000ULL, 0xffffffffffffffffULL, },
         { 0xffffffffffffffe6ULL, 0xfffffffffffffffaULL, },
+        { 0xffffffffffffffffULL, 0xfffffffffffffffeULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_h.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_h.c
index f191b985b1..af10a04352 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0000fffd00000003ULL, 0x000000000000ffffULL, },
         { 0x0000000000020000ULL, 0xfffffff600390000ULL, },
         { 0xffe6003900010000ULL, 0xfffa0001fffc0000ULL, },
+        { 0xffff0000ffff0000ULL, 0xfffe000200010000ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_w.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_w.c
index 0baaff10f9..9d3920bc53 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
         { 0x0000000000000002ULL, 0xffffffff00000039ULL, },
         { 0xffffffe600000001ULL, 0xfffffffafffffffcULL, },
+        { 0xffffffffffffffffULL, 0xfffffffe00000001ULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_b.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_b.c
index 770544a2de..8c0623538f 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_U.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0101070201040001ULL, 0x0000010101000000ULL, },
         { 0x0000000002000201ULL, 0x01020c020000010dULL, },
         { 0x0000ff0001000109ULL, 0x0700000808010200ULL, },
+        { 0x0000000000000100ULL, 0x0301000000010608ULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_d.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_d.c
index 9653e7db5c..2a9bfff459 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0000000000000001ULL, 0x0000000000000000ULL, },
         { 0x0000000000000000ULL, 0x0000000000000001ULL, },
         { 0x0000000000000000ULL, 0x0000000000000007ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000003ULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_h.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_h.c
index 3dcd30bee9..95da85b4b9 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0001000700010000ULL, 0x0000000100010000ULL, },
         { 0x0000000000020002ULL, 0x0001000c00000001ULL, },
         { 0x0000003900010001ULL, 0x0007000000070002ULL, },
+        { 0x0000000000000001ULL, 0x0003000000000006ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_w.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_w.c
index fd395ef5e9..a3984b0790 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x0000000100000001ULL, 0x0000000000000001ULL, },
         { 0x0000000000000002ULL, 0x0000000100000000ULL, },
         { 0x0000000000000001ULL, 0x0000000700000007ULL, },
+        { 0x0000000000000000ULL, 0x0000000300000000ULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_d.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_d.c
index af8d609bea..e6d72097a6 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xc17a5d0372a2a622ULL, 0x0afd6368668933a8ULL, },
         { 0xda65cd5e9f696cdcULL, 0xdeeb6bec644a26d0ULL, },
         { 0x1aad30609bff5437ULL, 0xf059a43d01b40370ULL, },
+        { 0xc17a5d0372a2a622ULL, 0x0afd6368668933a8ULL, },
+        { 0x53edf7dbd76122edULL, 0x50347e61c2f51a40ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_h.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_h.c
index 40de72ae97..20a7029bd2 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xf706df16dc8de6b6ULL, 0xf0d31b5827f9f42aULL, },
         { 0xec36ee202172098aULL, 0xd846ec28206404e0ULL, },
         { 0xe9721dc70769091eULL, 0xf8711c48091bf7e4ULL, },
+        { 0xf706df16dc8de6b6ULL, 0xf0d31b5827f9f42aULL, },
+        { 0x4961190d2be51b48ULL, 0x348a3e802e952784ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_w.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_w.c
index 2f1d23be6f..493fc10cd7 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xd437b4e8f3b0139fULL, 0x08c7d980187d5896ULL, },
         { 0xc9576c1204f83042ULL, 0xd91d3e4709b06e36ULL, },
         { 0xfe2a6f6923268793ULL, 0x179e9377ef4766beULL, },
+        { 0xd437b4e8f3b0139fULL, 0x08c7d980187d5896ULL, },
+        { 0x33368b8a2619d525ULL, 0x6a47932120c31904ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_d.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_d.c
index e998e00410..7c9bd57fa7 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8ffb559e72a2a622ULL, 0x8744321b668933a8ULL, },
         { 0x4ab4e3ab9f696cdcULL, 0xd21109f6644a26d0ULL, },
         { 0x8afc46ad9bff5437ULL, 0x1890b25301b40370ULL, },
+        { 0x8ffb559e72a2a622ULL, 0x8744321b668933a8ULL, },
+        { 0x53edf7dbd76122edULL, 0xbe9d5551c2f51a40ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_h.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_h.c
index e8db601a74..5ca7164718 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x670642166b8da1b6ULL, 0xe0d340587bf92d2aULL, },
         { 0x5c36512021725e8aULL, 0x8a465528c764a2e0ULL, },
         { 0xa8721dc73869b21eULL, 0xf27179481e1be5e4ULL, },
+        { 0x670642166b8da1b6ULL, 0xe0d340587bf92d2aULL, },
+        { 0x4961190d2be5df48ULL, 0x308afe8080952b84ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_w.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_w.c
index cf5bd13f48..271666da05 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x5ad3b4e8bfaf139fULL, 0x8076d98091fe5896ULL, },
         { 0x4ff36c125a383042ULL, 0x2fe23e4744196e36ULL, },
         { 0x6e796f69cc7c8793ULL, 0x6e879377578266beULL, },
+        { 0x5ad3b4e8bfaf139fULL, 0x8076d98091fe5896ULL, },
+        { 0x33368b8aeab5d525ULL, 0x97d9932138871904ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_b.c
index 5fa2644c30..eabcd2a5cf 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_d.c
index 9d97982ab5..866c6839f2 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_h.c
index 3365f726a2..7f4c2406a7 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_w.c
index b33f4b7d79..7fb278ea02 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_b.c
index 71e571d0c4..7e35435e42 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_d.c
index e088ab99e3..f0a341e08a 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_h.c
index 6d1b81a119..52de98ba8f 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_w.c
index bd64294322..b2d0a22ac4 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_b.c
index 206d907a26..6fb023a4b7 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_d.c
index 4dd247f54a..35ebf1fdab 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_h.c
index 0e6a7651eb..0e7cf1e298 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_w.c
index db61440551..9dee9973f3 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_b.c
index d2a93a2e44..747f54d0ac 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_d.c
index 69fd3c7662..49ded49fe0 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_h.c
index 9f45b55539..f78104ecb6 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_w.c
index b08231d65f..e0afa01746 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_b.c
index 80b5201be1..248b9681c7 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_d.c
index 0ed319024c..219029a520 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_h.c
index b049054d9f..fb059af226 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_w.c
index 2bcd0a00ef..dd479921f5 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_b.c
index 2a06b43379..7985acf5e4 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_d.c
index 37924f3038..45d91af127 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_h.c
index 1846995ce4..eb527bbc2b 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_w.c
index 8b20c05440..057f22accb 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_b.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_b.c
new file mode 100644
index 0000000000..a8964eadf6
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_b.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction MOD_S.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_S.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xdedededededededeULL, 0xdedededededededeULL, },
+        { 0xddddddddddddddddULL, 0xddddddddddddddddULL, },
+        { 0xe4aae2e4aae2e4aaULL, 0xe2e4aae2e4aae2e4ULL, },
+        { 0xfeaae3feaae3feaaULL, 0xe3feaae3feaae3feULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2121212121212121ULL, 0x2121212121212121ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x1b551d1b551d1b55ULL, 0x1d1b551d1b551d1bULL, },
+        { 0x01551c01551c0155ULL, 0x1c01551c01551c01ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xe9cccce9cccce9ccULL, 0xcce9cccce9cccce9ULL, },
+        { 0xe8cccce8cccce8ccULL, 0xcce8cccce8cccce8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1633331633331633ULL, 0x3316333316333316ULL, },
+        { 0x1733331733331733ULL, 0x3317333317333317ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe3e438e3e438e3e4ULL, 0x38e3e438e3e438e3ULL, },
+        { 0xe3e338e3e338e3e3ULL, 0x38e3e338e3e338e3ULL, },
+        { 0xe3f604e3f604e3f6ULL, 0x04e3f604e3f604e3ULL, },
+        { 0xe3f405e3f405e3f4ULL, 0x05e3f405e3f405e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff38ffff38ffffULL, 0x38ffff38ffff38ffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c1bc71c1bc71c1bULL, 0xc71c1bc71c1bc71cULL, },
+        { 0x1c1cc71c1cc71c1cULL, 0xc71c1cc71c1cc71cULL, },
+        { 0x1c09fb1c09fb1c09ULL, 0xfb1c09fb1c09fb1cULL, },
+        { 0x1c0bfa1c0bfa1c0bULL, 0xfa1c0bfa1c0bfa1cULL, },
+        { 0x1c71ff1c71ff1c71ULL, 0xff1c71ff1c71ff1cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x0028e6cc28621c00ULL, 0x03040b10fe3cb000ULL, },
+        { 0xdc10e6cc28005540ULL, 0x24170b00fe25fa0cULL, },
+        { 0xf81bfccc28001940ULL, 0x4b0d0b0efe39ec0cULL, },
+        { 0xfbbe002f25f5c708ULL, 0x12f7fd1a013f02fcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfbbe000d06f5c708ULL, 0x12f7f500151408fcULL, },
+        { 0xfbbe00164df5e508ULL, 0x12f7bb1a153f16fcULL, },
+        { 0xac5afcdee1cfe000ULL, 0x27d8fdffff2b2508ULL, },    /*  72  */
+        { 0xfc18aeaab9cffd00ULL, 0x03fcc6ffff2b2500ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xac0bf0f7b900e5ceULL, 0x27f6c6ffab2b0714ULL, },
+        { 0x704f16190e31e20eULL, 0xd8f1f6d8ff42e200ULL, },
+        { 0x020d164d1131e206ULL, 0xf9facdf2fd03e200ULL, },
+        { 0x1c4f164d1700e24eULL, 0xdbf1fc00fe17e2f0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_B(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_B(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_d.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_d.c
new file mode 100644
index 0000000000..3346d37d19
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_d.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction MOD_S.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_S.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xdddddddddddddddeULL, 0xdddddddddddddddeULL, },
+        { 0xddddddddddddddddULL, 0xddddddddddddddddULL, },
+        { 0x0000000000000000ULL, 0xe38e38e38e38e38dULL, },
+        { 0xfffffffffffffffdULL, 0xe38e38e38e38e38eULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2222222222222221ULL, 0x2222222222222221ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x0000000000000002ULL, 0x1c71c71c71c71c71ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xe93e93e93e93e93eULL, 0xccccccccccccccccULL, },
+        { 0xe93e93e93e93e93dULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x16c16c16c16c16c1ULL, 0x3333333333333333ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x05b05b05b05b05afULL, },
+        { 0xe38e38e38e38e38eULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0x38e38e38e38e38e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xfa4fa4fa4fa4fa50ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xfa4fa4fa4fa4fa4fULL, },
+        { 0x1c71c71c71c71c71ULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0xffa2dbefac389060ULL, 0x127fda10bebdb718ULL, },
+        { 0xdc1038216e92c9c0ULL, 0x238e445f53508af8ULL, },
+        { 0xf8b9fd198694378eULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xfd40a74bf7d7c5e8ULL, 0x01e950cb80ac7f1cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0xd9589437a7be92acULL, },
+        { 0x019b20633f34191eULL, 0xffbfeb7528bed488ULL, },
+        { 0x1ca9c4f818016dceULL, 0xdda316d7ff992cc8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_h.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_h.c
new file mode 100644
index 0000000000..c28c2534f5
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_h.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction MOD_S.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_S.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xdddedddedddedddeULL, 0xdddedddedddedddeULL, },
+        { 0xddddddddddddddddULL, 0xddddddddddddddddULL, },
+        { 0x0000e38daaaa0000ULL, 0xe38daaaa0000e38dULL, },
+        { 0xfffde38eaaaafffdULL, 0xe38eaaaafffde38eULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2221222122212221ULL, 0x2221222122212221ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x1c711c7255551c71ULL, 0x1c7255551c711c72ULL, },
+        { 0x00021c7155550002ULL, 0x1c71555500021c71ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xe93ecccccccce93eULL, 0xcccccccce93eccccULL, },
+        { 0xe93dcccccccce93dULL, 0xcccccccce93dccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x16c13333333316c1ULL, 0x3333333316c13333ULL, },
+        { 0x16c23333333316c2ULL, 0x3333333316c23333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e3e38ee38eULL, 0x38e3e38ee38e38e3ULL, },
+        { 0xe38e38e3e38de38eULL, 0x38e3e38de38e38e3ULL, },
+        { 0xe38e05aff4a0e38eULL, 0x05aff4a0e38e05afULL, },
+        { 0xe38e05b0f49ee38eULL, 0x05b0f49ee38e05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff38e3ffffffffULL, 0x38e3ffffffff38e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c1c711c71ULL, 0xc71c1c711c71c71cULL, },
+        { 0x1c71c71c1c721c71ULL, 0xc71c1c721c71c71cULL, },
+        { 0x1c71fa500b5f1c71ULL, 0xfa500b5f1c71fa50ULL, },
+        { 0x1c71fa4f0b611c71ULL, 0xfa4f0b611c71fa4fULL, },
+        { 0x1c71ffff71c71c71ULL, 0xffff71c71c71ffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0xffa2ffef28621c48ULL, 0x12820b5efe7bb00cULL, },
+        { 0xdc10e6cc28625540ULL, 0x238f0b5efe7bfa34ULL, },
+        { 0xf8b9fd19286219dcULL, 0x4b670b5efe7beaccULL, },
+        { 0xfbbe00632531c708ULL, 0x12f7ff4e017e0308ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfbbe00630762c708ULL, 0x12f7f41b153f08d4ULL, },
+        { 0xfbbe00634d93e4baULL, 0x12f7bb1a153f183cULL, },
+        { 0xac5afa46e231e0c0ULL, 0x27d8ffd5febe2514ULL, },    /*  72  */
+        { 0xfd40ffe0b9cffd70ULL, 0x01eac6ffeae82514ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xac5af191b9cfe496ULL, 0x27d8c6ffab2b07b4ULL, },
+        { 0x704f164d0d6de24eULL, 0xd958fa84ffdfe2a0ULL, },
+        { 0x019b0042109ee24eULL, 0xffbbcdbefe3ee2a0ULL, },
+        { 0x1ca9164d1800e24eULL, 0xdda1fadafe17e2a0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_w.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_w.c
new file mode 100644
index 0000000000..593e516534
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_w.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction MOD_S.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_S.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xdddddddedddddddeULL, 0xdddddddedddddddeULL, },
+        { 0xddddddddddddddddULL, 0xddddddddddddddddULL, },
+        { 0xe38e38e4aaaaaaaaULL, 0xe38e38e2e38e38e4ULL, },
+        { 0xfffffffeaaaaaaaaULL, 0xe38e38e3fffffffeULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2222222122222221ULL, 0x2222222122222221ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x1c71c71b55555555ULL, 0x1c71c71d1c71c71bULL, },
+        { 0x0000000155555555ULL, 0x1c71c71c00000001ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xe93e93e9ccccccccULL, 0xcccccccce93e93e9ULL, },
+        { 0xe93e93e8ccccccccULL, 0xcccccccce93e93e8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x16c16c1633333333ULL, 0x3333333316c16c16ULL, },
+        { 0x16c16c1733333333ULL, 0x3333333316c16c17ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e3e38e38e4ULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e3e38e38e3ULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e3f49f49f6ULL, 0x05b05b04e38e38e3ULL, },
+        { 0xe38e38e3f49f49f4ULL, 0x05b05b05e38e38e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0x38e38e38ffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c1c71c71bULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c1c71c71cULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c0b60b609ULL, 0xfa4fa4fb1c71c71cULL, },
+        { 0x1c71c71c0b60b60bULL, 0xfa4fa4fa1c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xffffffff1c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0xffa2dbf828625540ULL, 0x127fda10fe7bb00cULL, },
+        { 0xdc10382228625540ULL, 0x238e445ffe7bb00cULL, },
+        { 0xf8b9fd1928625540ULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe0063253171c8ULL, 0x12f7bb1a0002f3a4ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfbbe006307635288ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaae231e0c0ULL, 0x27d8c6fffe985280ULL, },    /*  72  */
+        { 0xfd40a751b9cf8b80ULL, 0x01e950cbeae91e08ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d0d6d37ceULL, 0xd9589436ffb8aff4ULL, },
+        { 0x019b205b109e1b46ULL, 0xffbfeb74fe402e90ULL, },
+        { 0x1ca9c4f718016dceULL, 0xdda316d6fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_b.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_b.c
new file mode 100644
index 0000000000..1b5e57b96f
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_b.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction MOD_U.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_U.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1c711f1c711f1c71ULL, 0x1f1c711f1c711f1cULL, },
+        { 0x031d38031d38031dULL, 0x38031d38031d3803ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1111111111111111ULL, 0x1111111111111111ULL, },
+        { 0xaa1c02aa1c02aa1cULL, 0x02aa1c02aa1c02aaULL, },
+        { 0x0239aa0239aa0239ULL, 0xaa0239aa0239aa02ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x55551d55551d5555ULL, 0x1d55551d55551d55ULL, },
+        { 0x0155550155550155ULL, 0x5501555501555501ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xcc3e24cc3e24cc3eULL, 0x24cc3e24cc3e24ccULL, },
+        { 0x085b05085b05085bULL, 0x05085b05085b0508ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1733331733331733ULL, 0x3317333317333317ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x398e38398e38398eULL, 0x38398e38398e3839ULL, },
+        { 0x3939383939383939ULL, 0x3839393839393839ULL, },
+        { 0x178e38178e38178eULL, 0x38178e38178e3817ULL, },
+        { 0x1728051728051728ULL, 0x0517280517280517ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x031d38031d38031dULL, 0x38031d38031d3803ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c711d1c711d1c71ULL, 0x1d1c711d1c711d1cULL, },
+        { 0x1c1c1d1c1c1d1c1cULL, 0x1d1c1c1d1c1c1d1cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c0b2e1c0b2e1c0bULL, 0x2e1c0b2e1c0b2e1cULL, },
+        { 0x1c711f1c711f1c71ULL, 0x1f1c711f1c711f1cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x886ae60628625500ULL, 0x03670b10023c0c0cULL, },
+        { 0x8810382228625540ULL, 0x24670b5e53251c0cULL, },
+        { 0x181b0a3228005540ULL, 0x4b670b5e5539b00cULL, },
+        { 0x7354006325311d08ULL, 0x1229001a153f5200ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f0a00634d933c08ULL, 0x121fbb1a1514080cULL, },
+        { 0x1b2000164d00c708ULL, 0x1206331a153f525cULL, },
+        { 0x245aaeaa190b3600ULL, 0x270a0043ab2b2508ULL, },    /*  72  */
+        { 0xac5aae471f3c8b00ULL, 0x03d80b15032b2514ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b14105b0b8b32ULL, 0x27d83e27022b2514ULL, },
+        { 0x704f164d0e31380eULL, 0x4223041ca9423204ULL, },
+        { 0x704f164d11311b06ULL, 0x0ff1880801033ea0ULL, },
+        { 0x704f164d5e31574eULL, 0x181988d8a9170400ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_B(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_B(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_d.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_d.c
new file mode 100644
index 0000000000..8aef84325f
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_d.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction MOD_U.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_U.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71c71c71c73ULL, },
+        { 0x0000000000000006ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1111111111111111ULL, 0x1111111111111111ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0x0000000000000001ULL, },
+        { 0x0000000000000004ULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x5555555555555555ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x0000000000000002ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xccccccccccccccccULL, 0x2222222222222223ULL, },
+        { 0x05b05b05b05b05b5ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000006ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0x2d82d82d82d82d83ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71c71c71c73ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x127fda10bebdb718ULL, },
+        { 0x886ae6cc28625540ULL, 0x238e445f53508af8ULL, },
+        { 0x181bd07eca3072f2ULL, 0x4b670b5efe7bb00cULL, },
+        { 0x73531997253171c8ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b893c43b88ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x1b1fd3c89130026cULL, 0x12f7bb1a153f52fcULL, },
+        { 0x23efc7de916d3640ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8b80ULL, 0x01e950cb80ac7f1cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x428a7d79aac73294ULL, },
+        { 0x704f164d5e31e24eULL, 0x092b6b2214879dbcULL, },
+        { 0x704f164d5e31e24eULL, 0x166733d9a7c17364ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_h.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_h.c
new file mode 100644
index 0000000000..ae5abb8e44
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_h.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction MOD_U.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_U.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1c711c7371c71c71ULL, 0x1c7371c71c711c73ULL, },
+        { 0x000638e31c710006ULL, 0x38e31c71000638e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1111111111111111ULL, 0x1111111111111111ULL, },
+        { 0xaaaa00011c72aaaaULL, 0x00011c72aaaa0001ULL, },
+        { 0x0004aaaa38e30004ULL, 0xaaaa38e30004aaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x55551c7255555555ULL, 0x1c72555555551c72ULL, },
+        { 0x0002555555550002ULL, 0x5555555500025555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xcccc22233e94ccccULL, 0x22233e94cccc2223ULL, },
+        { 0x05b505b05b0505b5ULL, 0x05b05b0505b505b0ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x16c23333333316c2ULL, 0x3333333316c23333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e438e38e3838e4ULL, 0x38e38e3838e438e3ULL, },
+        { 0x38e438e338e338e4ULL, 0x38e338e338e438e3ULL, },
+        { 0x16c238e38e3816c2ULL, 0x38e38e3816c238e3ULL, },
+        { 0x16c205b027d216c2ULL, 0x05b027d216c205b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x000638e31c710006ULL, 0x38e31c71000638e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c711c7271c71c71ULL, 0x1c7271c71c711c72ULL, },
+        { 0x1c711c721c721c71ULL, 0x1c721c721c711c72ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c712d830b611c71ULL, 0x2d830b611c712d83ULL, },
+        { 0x1c711c7371c71c71ULL, 0x1c7371c71c711c73ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x886a005028625540ULL, 0x12820b5e14c60a14ULL, },
+        { 0x886a382228625540ULL, 0x238f0b5e53501bbcULL, },
+        { 0x181b07ca28625540ULL, 0x4b670b5e5539b00cULL, },
+        { 0x7354006325311c88ULL, 0x12f7053a153f52fcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6400634d933b88ULL, 0x12f7bb1a153f08d4ULL, },
+        { 0x1b2000634d93c708ULL, 0x12f73242153f52fcULL, },
+        { 0x23f0aeaa18473640ULL, 0x27d805c1ab2b2514ULL, },    /*  72  */
+        { 0xac5a00411ea98b80ULL, 0x01ea0be501332514ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b128f5b9e8b80ULL, 0x27d83e2701e92514ULL, },
+        { 0x704f164d0d6d37ceULL, 0x428a0070a9423294ULL, },
+        { 0x704f0042109e1b46ULL, 0x093088d814893ca8ULL, },
+        { 0x704f164d5e3156ceULL, 0x166988d8a9420428ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_w.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_w.c
new file mode 100644
index 0000000000..da48929f4f
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_w.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction MOD_U.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_U.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71f1c71c71cULL, },
+        { 0x000000031c71c71dULL, 0x38e38e3800000003ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1111111111111111ULL, 0x1111111111111111ULL, },
+        { 0xaaaaaaaa1c71c71cULL, 0x00000002aaaaaaaaULL, },
+        { 0x0000000238e38e39ULL, 0xaaaaaaaa00000002ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x5555555555555555ULL, 0x1c71c71d55555555ULL, },
+        { 0x0000000155555555ULL, 0x5555555500000001ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xcccccccc3e93e93eULL, 0x22222224ccccccccULL, },
+        { 0x05b05b085b05b05bULL, 0x05b05b0505b05b08ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x16c16c1733333333ULL, 0x3333333316c16c17ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e398e38e38eULL, 0x38e38e3838e38e39ULL, },
+        { 0x38e38e3938e38e39ULL, 0x38e38e3838e38e39ULL, },
+        { 0x16c16c178e38e38eULL, 0x38e38e3816c16c17ULL, },
+        { 0x16c16c1727d27d28ULL, 0x05b05b0516c16c17ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x000000031c71c71dULL, 0x38e38e3800000003ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71d1c71c71cULL, },
+        { 0x1c71c71c1c71c71cULL, 0x1c71c71d1c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c0b60b60bULL, 0x2d82d82e1c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71f1c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x127fda1014c31f38ULL, },
+        { 0x886ae6cc28625540ULL, 0x238e445f53508af8ULL, },
+        { 0x181bd07f28625540ULL, 0x4b670b5e5538cd6cULL, },
+        { 0x73531997253171c8ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b94d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x1b1fd3c94d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x23efc7de18463680ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xac5aaeaa1ea7fd70ULL, 0x01e950cb01308d34ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x27d8c6ff01e84274ULL, },
+        { 0x704f164d0d6d37ceULL, 0x428a7d7aa942e2a0ULL, },
+        { 0x704f164d109e1b46ULL, 0x092b6b2214879dbcULL, },
+        { 0x704f164d5e31e24eULL, 0x166733dba942e2a0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_b.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_b.c
index 6beeda906d..fa9753cc75 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULV.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x40c6f422ee9fb600ULL, 0x7b583028e316aa80ULL, },
         { 0x80b6c45cb0c20a80ULL, 0x4ff7d850aeb66080ULL, },
         { 0xd0a200c74623ae70ULL, 0xea8758f0dd3e6480ULL, },
+        { 0x40c6f422ee9fb600ULL, 0x7b583028e316aa80ULL, },
+        { 0x0061e429846184c4ULL, 0xa9e1404091048400ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_d.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_d.c
index 3205d4b378..2503467bf2 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULV.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x76a5ab8089e38100ULL, 0xa1019a60d4dad480ULL, },
         { 0xfbe1883aee787980ULL, 0x821d25438dd09f80ULL, },
         { 0xedbf72842143b470ULL, 0x7f8223caefce5580ULL, },
+        { 0x76a5ab8089e38100ULL, 0xa1019a60d4dad480ULL, },
+        { 0x4bb436d5b1e9cfc4ULL, 0x12d1ceb0e31ee400ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_h.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_h.c
index e7bd985ae1..5162678671 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULV.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8fc62522929f8100ULL, 0x7a585f288416d480ULL, },
         { 0x78b6f35cb6c27980ULL, 0xb6f78750ceb69f80ULL, },
         { 0xcfa29fc7d323b470ULL, 0xe587adf0113e5580ULL, },
+        { 0x8fc62522929f8100ULL, 0x7a585f288416d480ULL, },
+        { 0x386153290561cfc4ULL, 0x5ce136403504e400ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_w.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_w.c
index 9c318b3fbb..7e2ff6609f 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULV.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0xe8bf252289e38100ULL, 0x91ae5f28d4dad480ULL, },
         { 0xb0f0f35cee787980ULL, 0xd67987508dd09f80ULL, },
         { 0x7abb9fc72143b470ULL, 0x11e5adf0efce5580ULL, },
+        { 0xe8bf252289e38100ULL, 0x91ae5f28d4dad480ULL, },
+        { 0x25775329b1e9cfc4ULL, 0xdfd63640e31ee400ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_b.c
new file mode 100644
index 0000000000..f583702a6b
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_b.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction ASUB_S.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_S.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5656565656565656ULL, 0x5656565656565656ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3434343434343434ULL, 0x3434343434343434ULL, },
+        { 0x1c71391c71391c71ULL, 0x391c71391c71391cULL, },
+        { 0x1d72381d72381d72ULL, 0x381d72381d72381dULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5656565656565656ULL, 0x5656565656565656ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x3434343434343434ULL, 0x3434343434343434ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1d72381d72381d72ULL, 0x381d72381d72381dULL, },
+        { 0x1c71391c71391c71ULL, 0x391c71391c71391cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0x5656565656565656ULL, 0x5656565656565656ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xababababababababULL, 0xababababababababULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8989898989898989ULL, 0x8989898989898989ULL, },
+        { 0x391c8e391c8e391cULL, 0x8e391c8e391c8e39ULL, },
+        { 0x72c71d72c71d72c7ULL, 0x1d72c71d72c71d72ULL, },
+        { 0x5656565656565656ULL, 0x5656565656565656ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xababababababababULL, 0xababababababababULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x8989898989898989ULL, 0x8989898989898989ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x72c71d72c71d72c7ULL, 0x1d72c71d72c71d72ULL, },
+        { 0x391c8e391c8e391cULL, 0x8e391c8e391c8e39ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0x3434343434343434ULL, 0x3434343434343434ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8989898989898989ULL, 0x8989898989898989ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x6767676767676767ULL, 0x6767676767676767ULL, },
+        { 0x173e6c173e6c173eULL, 0x6c173e6c173e6c17ULL, },
+        { 0x50a50550a50550a5ULL, 0x0550a50550a50550ULL, },
+        { 0x3434343434343434ULL, 0x3434343434343434ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x8989898989898989ULL, 0x8989898989898989ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x6767676767676767ULL, 0x6767676767676767ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x50a50550a50550a5ULL, 0x0550a50550a50550ULL, },
+        { 0x173e6c173e6c173eULL, 0x6c173e6c173e6c17ULL, },
+        { 0x1c71391c71391c71ULL, 0x391c71391c71391cULL, },    /*  48  */
+        { 0x1d72381d72381d72ULL, 0x381d72381d72381dULL, },
+        { 0x391c8e391c8e391cULL, 0x8e391c8e391c8e39ULL, },
+        { 0x72c71d72c71d72c7ULL, 0x1d72c71d72c71d72ULL, },
+        { 0x173e6c173e6c173eULL, 0x6c173e6c173e6c17ULL, },
+        { 0x50a50550a50550a5ULL, 0x0550a50550a50550ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x39e37139e37139e3ULL, 0x7139e37139e37139ULL, },
+        { 0x1d72381d72381d72ULL, 0x381d72381d72381dULL, },    /*  56  */
+        { 0x1c71391c71391c71ULL, 0x391c71391c71391cULL, },
+        { 0x72c71d72c71d72c7ULL, 0x1d72c71d72c71d72ULL, },
+        { 0x391c8e391c8e391cULL, 0x8e391c8e391c8e39ULL, },
+        { 0x50a50550a50550a5ULL, 0x0550a50550a50550ULL, },
+        { 0x173e6c173e6c173eULL, 0x6c173e6c173e6c17ULL, },
+        { 0x39e37139e37139e3ULL, 0x7139e37139e37139ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x73ac1a9725cf8e38ULL, 0x39705044173ca210ULL, },
+        { 0x241038226f93cac0ULL, 0x248f455f53507508ULL, },
+        { 0xe81b30813631730eULL, 0xbe7683865539326cULL, },
+        { 0x73ac1a9725cf8e38ULL, 0x39705044173ca210ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f9c52b9943c3c88ULL, 0x151f0b1b6a142d18ULL, },
+        { 0x75911616119e1b46ULL, 0x850633426c03705cULL, },
+        { 0x241038226f93cac0ULL, 0x248f455f53507508ULL, },    /*  72  */
+        { 0x4f9c52b9943c3c88ULL, 0x151f0b1b6a142d18ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc40b68a3a56257ceULL, 0x9a193e2702174374ULL, },
+        { 0xe81b30813631730eULL, 0xbe7683865539326cULL, },
+        { 0x75911616119e1b46ULL, 0x850633426c03705cULL, },
+        { 0xc40b68a3a56257ceULL, 0x9a193e2702174374ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_B(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_B(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_d.c
new file mode 100644
index 0000000000..a9ae576610
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_d.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction ASUB_S.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_S.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555556ULL, 0x5555555555555556ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333334ULL, 0x3333333333333334ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e4ULL, },
+        { 0x1c71c71c71c71c72ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555556ULL, 0x5555555555555556ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x3333333333333334ULL, 0x3333333333333334ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1c71c71c71c71c72ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e4ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0x5555555555555556ULL, 0x5555555555555556ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaabULL, 0xaaaaaaaaaaaaaaabULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8888888888888889ULL, 0x8888888888888889ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x8e38e38e38e38e39ULL, },
+        { 0x71c71c71c71c71c7ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x5555555555555556ULL, 0x5555555555555556ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaabULL, 0xaaaaaaaaaaaaaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x8888888888888889ULL, 0x8888888888888889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x71c71c71c71c71c7ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x8e38e38e38e38e39ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0x3333333333333334ULL, 0x3333333333333334ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8888888888888889ULL, 0x8888888888888889ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x6666666666666667ULL, 0x6666666666666667ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x6c16c16c16c16c17ULL, },
+        { 0x4fa4fa4fa4fa4fa5ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x3333333333333334ULL, 0x3333333333333334ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x8888888888888889ULL, 0x8888888888888889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x6666666666666667ULL, 0x6666666666666667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4fa4fa4fa4fa4fa5ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x6c16c16c16c16c17ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e4ULL, },    /*  48  */
+        { 0x1c71c71c71c71c72ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x8e38e38e38e38e39ULL, },
+        { 0x71c71c71c71c71c7ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x6c16c16c16c16c17ULL, },
+        { 0x4fa4fa4fa4fa4fa5ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x38e38e38e38e38e3ULL, 0x71c71c71c71c71c7ULL, },
+        { 0x1c71c71c71c71c72ULL, 0x38e38e38e38e38e3ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e4ULL, },
+        { 0x71c71c71c71c71c7ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x8e38e38e38e38e39ULL, },
+        { 0x4fa4fa4fa4fa4fa5ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x6c16c16c16c16c17ULL, },
+        { 0x38e38e38e38e38e3ULL, 0x71c71c71c71c71c7ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },
+        { 0xe7e42f8135cf8d0eULL, 0xbd7582865538cd6cULL, },
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b893c43b88ULL, 0x14e10be595ebd218ULL, },
+        { 0x749115ea109e1b46ULL, 0x850632416bfc705cULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },    /*  72  */
+        { 0x4f6351b893c43b88ULL, 0x14e10be595ebd218ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc3f467a2a46256ceULL, 0x99e73e2701e84274ULL, },
+        { 0xe7e42f8135cf8d0eULL, 0xbd7582865538cd6cULL, },
+        { 0x749115ea109e1b46ULL, 0x850632416bfc705cULL, },
+        { 0xc3f467a2a46256ceULL, 0x99e73e2701e84274ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_h.c
new file mode 100644
index 0000000000..8c193b6705
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_h.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction ASUB_S.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_S.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5556555655565556ULL, 0x5556555655565556ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3334333433343334ULL, 0x3334333433343334ULL, },
+        { 0x1c7138e471c71c71ULL, 0x38e471c71c7138e4ULL, },
+        { 0x1c7238e371c81c72ULL, 0x38e371c81c7238e3ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5556555655565556ULL, 0x5556555655565556ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x3334333433343334ULL, 0x3334333433343334ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1c7238e371c81c72ULL, 0x38e371c81c7238e3ULL, },
+        { 0x1c7138e471c71c71ULL, 0x38e471c71c7138e4ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0x5556555655565556ULL, 0x5556555655565556ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaabaaabaaabaaabULL, 0xaaabaaabaaabaaabULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8889888988898889ULL, 0x8889888988898889ULL, },
+        { 0x38e48e391c7238e4ULL, 0x8e391c7238e48e39ULL, },
+        { 0x71c71c72c71d71c7ULL, 0x1c72c71d71c71c72ULL, },
+        { 0x5556555655565556ULL, 0x5556555655565556ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaabaaabaaabaaabULL, 0xaaabaaabaaabaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x8889888988898889ULL, 0x8889888988898889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x71c71c72c71d71c7ULL, 0x1c72c71d71c71c72ULL, },
+        { 0x38e48e391c7238e4ULL, 0x8e391c7238e48e39ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0x3334333433343334ULL, 0x3334333433343334ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8889888988898889ULL, 0x8889888988898889ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x6667666766676667ULL, 0x6667666766676667ULL, },
+        { 0x16c26c173e9416c2ULL, 0x6c173e9416c26c17ULL, },
+        { 0x4fa505b0a4fb4fa5ULL, 0x05b0a4fb4fa505b0ULL, },
+        { 0x3334333433343334ULL, 0x3334333433343334ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x8889888988898889ULL, 0x8889888988898889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x6667666766676667ULL, 0x6667666766676667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4fa505b0a4fb4fa5ULL, 0x05b0a4fb4fa505b0ULL, },
+        { 0x16c26c173e9416c2ULL, 0x6c173e9416c26c17ULL, },
+        { 0x1c7138e471c71c71ULL, 0x38e471c71c7138e4ULL, },    /*  48  */
+        { 0x1c7238e371c81c72ULL, 0x38e371c81c7238e3ULL, },
+        { 0x38e48e391c7238e4ULL, 0x8e391c7238e48e39ULL, },
+        { 0x71c71c72c71d71c7ULL, 0x1c72c71d71c71c72ULL, },
+        { 0x16c26c173e9416c2ULL, 0x6c173e9416c26c17ULL, },
+        { 0x4fa505b0a4fb4fa5ULL, 0x05b0a4fb4fa505b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x38e371c7e38f38e3ULL, 0x71c7e38f38e371c7ULL, },
+        { 0x1c7238e371c81c72ULL, 0x38e371c81c7238e3ULL, },    /*  56  */
+        { 0x1c7138e471c71c71ULL, 0x38e471c71c7138e4ULL, },
+        { 0x71c71c72c71d71c7ULL, 0x1c72c71d71c71c72ULL, },
+        { 0x38e48e391c7238e4ULL, 0x8e391c7238e48e39ULL, },
+        { 0x4fa505b0a4fb4fa5ULL, 0x05b0a4fb4fa505b0ULL, },
+        { 0x16c26c173e9416c2ULL, 0x6c173e9416c26c17ULL, },
+        { 0x38e371c7e38f38e3ULL, 0x71c7e38f38e371c7ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x7354199725318e38ULL, 0x3870504416c4a2f0ULL, },
+        { 0x23f038226e93c9c0ULL, 0x238f445f53507508ULL, },
+        { 0xe7e52f8135cf72f2ULL, 0xbd76828655393294ULL, },
+        { 0x7354199725318e38ULL, 0x3870504416c4a2f0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6451b993c43b88ULL, 0x14e10be56a142de8ULL, },
+        { 0x749115ea109e1b46ULL, 0x850632426bfd705cULL, },
+        { 0x23f038226e93c9c0ULL, 0x238f445f53507508ULL, },    /*  72  */
+        { 0x4f6451b993c43b88ULL, 0x14e10be56a142de8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc3f567a3a46256ceULL, 0x99e73e2701e94274ULL, },
+        { 0xe7e52f8135cf72f2ULL, 0xbd76828655393294ULL, },
+        { 0x749115ea109e1b46ULL, 0x850632426bfd705cULL, },
+        { 0xc3f567a3a46256ceULL, 0x99e73e2701e94274ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_w.c
new file mode 100644
index 0000000000..fb8f3c1a10
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_w.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction ASUB_S.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_S.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555655555556ULL, 0x5555555655555556ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333433333334ULL, 0x3333333433333334ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e391c71c71cULL, },
+        { 0x1c71c71d71c71c72ULL, 0x38e38e381c71c71dULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555655555556ULL, 0x5555555655555556ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x3333333433333334ULL, 0x3333333433333334ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1c71c71d71c71c72ULL, 0x38e38e381c71c71dULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e391c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0x5555555655555556ULL, 0x5555555655555556ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaabaaaaaaabULL, 0xaaaaaaabaaaaaaabULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8888888988888889ULL, 0x8888888988888889ULL, },
+        { 0x38e38e391c71c71cULL, 0x8e38e38e38e38e39ULL, },
+        { 0x71c71c72c71c71c7ULL, 0x1c71c71d71c71c72ULL, },
+        { 0x5555555655555556ULL, 0x5555555655555556ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaabaaaaaaabULL, 0xaaaaaaabaaaaaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x8888888988888889ULL, 0x8888888988888889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x71c71c72c71c71c7ULL, 0x1c71c71d71c71c72ULL, },
+        { 0x38e38e391c71c71cULL, 0x8e38e38e38e38e39ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0x3333333433333334ULL, 0x3333333433333334ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8888888988888889ULL, 0x8888888988888889ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x6666666766666667ULL, 0x6666666766666667ULL, },
+        { 0x16c16c173e93e93eULL, 0x6c16c16c16c16c17ULL, },
+        { 0x4fa4fa50a4fa4fa5ULL, 0x05b05b054fa4fa50ULL, },
+        { 0x3333333433333334ULL, 0x3333333433333334ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x8888888988888889ULL, 0x8888888988888889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x6666666766666667ULL, 0x6666666766666667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4fa4fa50a4fa4fa5ULL, 0x05b05b054fa4fa50ULL, },
+        { 0x16c16c173e93e93eULL, 0x6c16c16c16c16c17ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e391c71c71cULL, },    /*  48  */
+        { 0x1c71c71d71c71c72ULL, 0x38e38e381c71c71dULL, },
+        { 0x38e38e391c71c71cULL, 0x8e38e38e38e38e39ULL, },
+        { 0x71c71c72c71c71c7ULL, 0x1c71c71d71c71c72ULL, },
+        { 0x16c16c173e93e93eULL, 0x6c16c16c16c16c17ULL, },
+        { 0x4fa4fa50a4fa4fa5ULL, 0x05b05b054fa4fa50ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x38e38e39e38e38e3ULL, 0x71c71c7138e38e39ULL, },
+        { 0x1c71c71d71c71c72ULL, 0x38e38e381c71c71dULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0x38e38e391c71c71cULL, },
+        { 0x71c71c72c71c71c7ULL, 0x1c71c71d71c71c72ULL, },
+        { 0x38e38e391c71c71cULL, 0x8e38e38e38e38e39ULL, },
+        { 0x4fa4fa50a4fa4fa5ULL, 0x05b05b054fa4fa50ULL, },
+        { 0x16c16c173e93e93eULL, 0x6c16c16c16c16c17ULL, },
+        { 0x38e38e39e38e38e3ULL, 0x71c71c7138e38e39ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x73531997253171c8ULL, 0x386f504416c3a2f0ULL, },
+        { 0x23efc7de6e92c9c0ULL, 0x238e445f53508af8ULL, },
+        { 0xe7e42f8135cf8d0eULL, 0xbd7582865538cd6cULL, },
+        { 0x73531997253171c8ULL, 0x386f504416c3a2f0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b993c43b88ULL, 0x14e10be56a142de8ULL, },
+        { 0x749115ea109e1b46ULL, 0x850632426bfc705cULL, },
+        { 0x23efc7de6e92c9c0ULL, 0x238e445f53508af8ULL, },    /*  72  */
+        { 0x4f6351b993c43b88ULL, 0x14e10be56a142de8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc3f467a3a46256ceULL, 0x99e73e2701e84274ULL, },
+        { 0xe7e42f8135cf8d0eULL, 0xbd7582865538cd6cULL, },
+        { 0x749115ea109e1b46ULL, 0x850632426bfc705cULL, },
+        { 0xc3f467a3a46256ceULL, 0x99e73e2701e84274ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_b.c
new file mode 100644
index 0000000000..506500b993
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_b.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction ASUB_U.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_U.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x391c72391c72391cULL, 0x72391c72391c7239ULL, },
+        { 0x8e391d8e391d8e39ULL, 0x1d8e391d8e391d8eULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8e391d8e391d8e39ULL, 0x1d8e391d8e391d8eULL, },
+        { 0x391c72391c72391cULL, 0x72391c72391c7239ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x173e94173e94173eULL, 0x94173e94173e9417ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x173e94173e94173eULL, 0x94173e94173e9417ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x391c72391c72391cULL, 0x72391c72391c7239ULL, },
+        { 0x8e391d8e391d8e39ULL, 0x1d8e391d8e391d8eULL, },
+        { 0x173e94173e94173eULL, 0x94173e94173e9417ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc71d8fc71d8fc71dULL, 0x8fc71d8fc71d8fc7ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x8e391d8e391d8e39ULL, 0x1d8e391d8e391d8eULL, },
+        { 0x391c72391c72391cULL, 0x72391c72391c7239ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x173e94173e94173eULL, 0x94173e94173e9417ULL, },
+        { 0xc71d8fc71d8fc71dULL, 0x8fc71d8fc71d8fc7ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x7354e66925317238ULL, 0x3990b044e93c5ef0ULL, },
+        { 0x24103822916d3640ULL, 0x2471bba153508b08ULL, },
+        { 0x181bd07f36318d0eULL, 0x428a7d7a55393294ULL, },
+        { 0x7354e66925317238ULL, 0x3990b044e93c5ef0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f64ae476c3c3c78ULL, 0x151f0be596142de8ULL, },
+        { 0x8b6f161611621b46ULL, 0x7b0633be9403905cULL, },
+        { 0x24103822916d3640ULL, 0x2471bba153508b08ULL, },    /*  72  */
+        { 0x4f64ae476c3c3c78ULL, 0x151f0be596142de8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9e5732ULL, 0x66193e270217bd8cULL, },
+        { 0x181bd07f36318d0eULL, 0x428a7d7a55393294ULL, },
+        { 0x8b6f161611621b46ULL, 0x7b0633be9403905cULL, },
+        { 0x3c0b985d5b9e5732ULL, 0x66193e270217bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_B(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_B(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_d.c
new file mode 100644
index 0000000000..ef7ee3fd15
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_d.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction ASUB_U.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_U.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x71c71c71c71c71c7ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71c71c71c72ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x71c71c71c71c71c7ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x93e93e93e93e93e9ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x93e93e93e93e93e9ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x71c71c71c71c71c7ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x93e93e93e93e93e9ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc71c71c71c71c71dULL, 0x8e38e38e38e38e39ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x71c71c71c71c71c7ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x93e93e93e93e93e9ULL, },
+        { 0xc71c71c71c71c71dULL, 0x8e38e38e38e38e39ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },
+        { 0x181bd07eca3072f2ULL, 0x428a7d79aac73294ULL, },
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b893c43b88ULL, 0x14e10be595ebd218ULL, },
+        { 0x8b6eea15ef61e4baULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },    /*  72  */
+        { 0x4f6351b893c43b88ULL, 0x14e10be595ebd218ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x181bd07eca3072f2ULL, 0x428a7d79aac73294ULL, },
+        { 0x8b6eea15ef61e4baULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_h.c
new file mode 100644
index 0000000000..17dab15761
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_h.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction ASUB_U.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_U.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x38e471c71c7238e4ULL, 0x71c71c7238e471c7ULL, },
+        { 0x8e391c7238e38e39ULL, 0x1c7238e38e391c72ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8e391c7238e38e39ULL, 0x1c7238e38e391c72ULL, },
+        { 0x38e471c71c7238e4ULL, 0x71c71c7238e471c7ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x16c293e93e9416c2ULL, 0x93e93e9416c293e9ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c293e93e9416c2ULL, 0x93e93e9416c293e9ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e471c71c7238e4ULL, 0x71c71c7238e471c7ULL, },
+        { 0x8e391c7238e38e39ULL, 0x1c7238e38e391c72ULL, },
+        { 0x16c293e93e9416c2ULL, 0x93e93e9416c293e9ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc71d8e391c71c71dULL, 0x8e391c71c71d8e39ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x8e391c7238e38e39ULL, 0x1c7238e38e391c72ULL, },
+        { 0x38e471c71c7238e4ULL, 0x71c71c7238e471c7ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c293e93e9416c2ULL, 0x93e93e9416c293e9ULL, },
+        { 0xc71d8e391c71c71dULL, 0x8e391c71c71d8e39ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x7354e669253171c8ULL, 0x3870afbce93c5d10ULL, },
+        { 0x23f03822916d3640ULL, 0x238fbba153508af8ULL, },
+        { 0x181bd07f35cf8d0eULL, 0x428a7d7a55393294ULL, },
+        { 0x7354e669253171c8ULL, 0x3870afbce93c5d10ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f64ae476c3c3b88ULL, 0x14e10be595ec2de8ULL, },
+        { 0x8b6f15ea109e1b46ULL, 0x7afa324294038fa4ULL, },
+        { 0x23f03822916d3640ULL, 0x238fbba153508af8ULL, },    /*  72  */
+        { 0x4f64ae476c3c3b88ULL, 0x14e10be595ec2de8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9e56ceULL, 0x66193e2701e9bd8cULL, },
+        { 0x181bd07f35cf8d0eULL, 0x428a7d7a55393294ULL, },
+        { 0x8b6f15ea109e1b46ULL, 0x7afa324294038fa4ULL, },
+        { 0x3c0b985d5b9e56ceULL, 0x66193e2701e9bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_w.c
new file mode 100644
index 0000000000..38a6395e2d
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_w.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction ASUB_U.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_U.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x38e38e391c71c71cULL, 0x71c71c7238e38e39ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71d8e38e38eULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71d8e38e38eULL, },
+        { 0x38e38e391c71c71cULL, 0x71c71c7238e38e39ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x16c16c173e93e93eULL, 0x93e93e9416c16c17ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c173e93e93eULL, 0x93e93e9416c16c17ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e391c71c71cULL, 0x71c71c7238e38e39ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71d8e38e38eULL, },
+        { 0x16c16c173e93e93eULL, 0x93e93e9416c16c17ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc71c71c71c71c71dULL, 0x8e38e38fc71c71c7ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71d8e38e38eULL, },
+        { 0x38e38e391c71c71cULL, 0x71c71c7238e38e39ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c173e93e93eULL, 0x93e93e9416c16c17ULL, },
+        { 0xc71c71c71c71c71dULL, 0x8e38e38fc71c71c7ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },
+        { 0x181bd07f35cf8d0eULL, 0x428a7d7a5538cd6cULL, },
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b96c3bc478ULL, 0x14e10be595ebd218ULL, },
+        { 0x8b6eea16109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },    /*  72  */
+        { 0x4f6351b96c3bc478ULL, 0x14e10be595ebd218ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x6618c1d901e84274ULL, },
+        { 0x181bd07f35cf8d0eULL, 0x428a7d7a5538cd6cULL, },
+        { 0x8b6eea16109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x6618c1d901e84274ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_d.c
new file mode 100644
index 0000000000..dd1dd37af2
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_d.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction HSUB_S.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_S.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0xffffffffaaaaaaaaULL, 0xffffffffaaaaaaaaULL, },
+        { 0x0000000033333333ULL, 0x0000000033333333ULL, },
+        { 0xffffffffccccccccULL, 0xffffffffccccccccULL, },
+        { 0x0000000071c71c71ULL, 0x000000001c71c71cULL, },
+        { 0xffffffff8e38e38eULL, 0xffffffffe38e38e3ULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000055555556ULL, 0x0000000055555556ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },
+        { 0x0000000033333334ULL, 0x0000000033333334ULL, },
+        { 0xffffffffcccccccdULL, 0xffffffffcccccccdULL, },
+        { 0x0000000071c71c72ULL, 0x000000001c71c71dULL, },
+        { 0xffffffff8e38e38fULL, 0xffffffffe38e38e4ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },    /*  16  */
+        { 0xffffffffaaaaaaaaULL, 0xffffffffaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffff55555555ULL, 0xffffffff55555555ULL, },
+        { 0xffffffffdddddddeULL, 0xffffffffdddddddeULL, },
+        { 0xffffffff77777777ULL, 0xffffffff77777777ULL, },
+        { 0x000000001c71c71cULL, 0xffffffffc71c71c7ULL, },
+        { 0xffffffff38e38e39ULL, 0xffffffff8e38e38eULL, },
+        { 0x0000000055555556ULL, 0x0000000055555556ULL, },    /*  24  */
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0x00000000aaaaaaabULL, 0x00000000aaaaaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000088888889ULL, 0x0000000088888889ULL, },
+        { 0x0000000022222222ULL, 0x0000000022222222ULL, },
+        { 0x00000000c71c71c7ULL, 0x0000000071c71c72ULL, },
+        { 0xffffffffe38e38e4ULL, 0x0000000038e38e39ULL, },
+        { 0xffffffffcccccccdULL, 0xffffffffcccccccdULL, },    /*  32  */
+        { 0xffffffffccccccccULL, 0xffffffffccccccccULL, },
+        { 0x0000000022222222ULL, 0x0000000022222222ULL, },
+        { 0xffffffff77777777ULL, 0xffffffff77777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffff99999999ULL, 0xffffffff99999999ULL, },
+        { 0x000000003e93e93eULL, 0xffffffffe93e93e9ULL, },
+        { 0xffffffff5b05b05bULL, 0xffffffffb05b05b0ULL, },
+        { 0x0000000033333334ULL, 0x0000000033333334ULL, },    /*  40  */
+        { 0x0000000033333333ULL, 0x0000000033333333ULL, },
+        { 0x0000000088888889ULL, 0x0000000088888889ULL, },
+        { 0xffffffffdddddddeULL, 0xffffffffdddddddeULL, },
+        { 0x0000000066666667ULL, 0x0000000066666667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x00000000a4fa4fa5ULL, 0x000000004fa4fa50ULL, },
+        { 0xffffffffc16c16c2ULL, 0x0000000016c16c17ULL, },
+        { 0xffffffffe38e38e4ULL, 0x0000000038e38e39ULL, },    /*  48  */
+        { 0xffffffffe38e38e3ULL, 0x0000000038e38e38ULL, },
+        { 0x0000000038e38e39ULL, 0x000000008e38e38eULL, },
+        { 0xffffffff8e38e38eULL, 0xffffffffe38e38e3ULL, },
+        { 0x0000000016c16c17ULL, 0x000000006c16c16cULL, },
+        { 0xffffffffb05b05b0ULL, 0x0000000005b05b05ULL, },
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0xffffffff71c71c72ULL, 0x000000001c71c71cULL, },
+        { 0x000000001c71c71dULL, 0xffffffffc71c71c8ULL, },    /*  56  */
+        { 0x000000001c71c71cULL, 0xffffffffc71c71c7ULL, },
+        { 0x0000000071c71c72ULL, 0x000000001c71c71dULL, },
+        { 0xffffffffc71c71c7ULL, 0xffffffff71c71c72ULL, },
+        { 0x000000004fa4fa50ULL, 0xfffffffffa4fa4fbULL, },
+        { 0xffffffffe93e93e9ULL, 0xffffffff93e93e94ULL, },
+        { 0x000000008e38e38eULL, 0xffffffffe38e38e4ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },
+        { 0xffffffff6008918cULL, 0x000000004ceb5b52ULL, },    /*  64  */
+        { 0xffffffff3ad71fc4ULL, 0x000000003627b862ULL, },
+        { 0xffffffffce9b5b4cULL, 0x00000000a03be64aULL, },
+        { 0xffffffff2a39047eULL, 0x00000000a22428beULL, },
+        { 0xffffffffd35bab23ULL, 0x00000000147c0b0eULL, },
+        { 0xffffffffae2a395bULL, 0xfffffffffdb8681eULL, },
+        { 0x0000000041ee74e3ULL, 0x0000000067cc9606ULL, },
+        { 0xffffffff9d8c1e15ULL, 0x0000000069b4d87aULL, },
+        { 0xffffffff83f8596aULL, 0x00000000295d16f3ULL, },    /*  72  */
+        { 0xffffffff5ec6e7a2ULL, 0x0000000012997403ULL, },
+        { 0xfffffffff28b232aULL, 0x000000007cada1ebULL, },
+        { 0xffffffff4e28cc5cULL, 0x000000007e95e45fULL, },
+        { 0x0000000047ecc10dULL, 0xffffffff8f75d8ccULL, },
+        { 0x0000000022bb4f45ULL, 0xffffffff78b235dcULL, },
+        { 0x00000000b67f8acdULL, 0xffffffffe2c663c4ULL, },
+        { 0x00000000121d33ffULL, 0xffffffffe4aea638ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_h.c
new file mode 100644
index 0000000000..d725d1957a
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_h.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction HSUB_S.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_S.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0055005500550055ULL, 0x0055005500550055ULL, },
+        { 0xffaaffaaffaaffaaULL, 0xffaaffaaffaaffaaULL, },
+        { 0x0033003300330033ULL, 0x0033003300330033ULL, },
+        { 0xffccffccffccffccULL, 0xffccffccffccffccULL, },
+        { 0x0071001cffc70071ULL, 0x001cffc70071001cULL, },
+        { 0xff8effe30038ff8eULL, 0xffe30038ff8effe3ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0056005600560056ULL, 0x0056005600560056ULL, },
+        { 0xffabffabffabffabULL, 0xffabffabffabffabULL, },
+        { 0x0034003400340034ULL, 0x0034003400340034ULL, },
+        { 0xffcdffcdffcdffcdULL, 0xffcdffcdffcdffcdULL, },
+        { 0x0072001dffc80072ULL, 0x001dffc80072001dULL, },
+        { 0xff8fffe40039ff8fULL, 0xffe40039ff8fffe4ULL, },
+        { 0xffabffabffabffabULL, 0xffabffabffabffabULL, },    /*  16  */
+        { 0xffaaffaaffaaffaaULL, 0xffaaffaaffaaffaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xff55ff55ff55ff55ULL, 0xff55ff55ff55ff55ULL, },
+        { 0xffdeffdeffdeffdeULL, 0xffdeffdeffdeffdeULL, },
+        { 0xff77ff77ff77ff77ULL, 0xff77ff77ff77ff77ULL, },
+        { 0x001cffc7ff72001cULL, 0xffc7ff72001cffc7ULL, },
+        { 0xff39ff8effe3ff39ULL, 0xff8effe3ff39ff8eULL, },
+        { 0x0056005600560056ULL, 0x0056005600560056ULL, },    /*  24  */
+        { 0x0055005500550055ULL, 0x0055005500550055ULL, },
+        { 0x00ab00ab00ab00abULL, 0x00ab00ab00ab00abULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0089008900890089ULL, 0x0089008900890089ULL, },
+        { 0x0022002200220022ULL, 0x0022002200220022ULL, },
+        { 0x00c70072001d00c7ULL, 0x0072001d00c70072ULL, },
+        { 0xffe40039008effe4ULL, 0x0039008effe40039ULL, },
+        { 0xffcdffcdffcdffcdULL, 0xffcdffcdffcdffcdULL, },    /*  32  */
+        { 0xffccffccffccffccULL, 0xffccffccffccffccULL, },
+        { 0x0022002200220022ULL, 0x0022002200220022ULL, },
+        { 0xff77ff77ff77ff77ULL, 0xff77ff77ff77ff77ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xff99ff99ff99ff99ULL, 0xff99ff99ff99ff99ULL, },
+        { 0x003effe9ff94003eULL, 0xffe9ff94003effe9ULL, },
+        { 0xff5bffb00005ff5bULL, 0xffb00005ff5bffb0ULL, },
+        { 0x0034003400340034ULL, 0x0034003400340034ULL, },    /*  40  */
+        { 0x0033003300330033ULL, 0x0033003300330033ULL, },
+        { 0x0089008900890089ULL, 0x0089008900890089ULL, },
+        { 0xffdeffdeffdeffdeULL, 0xffdeffdeffdeffdeULL, },
+        { 0x0067006700670067ULL, 0x0067006700670067ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x00a50050fffb00a5ULL, 0x0050fffb00a50050ULL, },
+        { 0xffc20017006cffc2ULL, 0x0017006cffc20017ULL, },
+        { 0xffe40039ff8fffe4ULL, 0x0039ff8fffe40039ULL, },    /*  48  */
+        { 0xffe30038ff8effe3ULL, 0x0038ff8effe30038ULL, },
+        { 0x0039008effe40039ULL, 0x008effe40039008eULL, },
+        { 0xff8effe3ff39ff8eULL, 0xffe3ff39ff8effe3ULL, },
+        { 0x0017006cffc20017ULL, 0x006cffc20017006cULL, },
+        { 0xffb00005ff5bffb0ULL, 0x0005ff5bffb00005ULL, },
+        { 0x00550055ff560055ULL, 0x0055ff5600550055ULL, },
+        { 0xff72001cffc7ff72ULL, 0x001cffc7ff72001cULL, },
+        { 0x001dffc80072001dULL, 0xffc80072001dffc8ULL, },    /*  56  */
+        { 0x001cffc70071001cULL, 0xffc70071001cffc7ULL, },
+        { 0x0072001d00c70072ULL, 0x001d00c70072001dULL, },
+        { 0xffc7ff72001cffc7ULL, 0xff72001cffc7ff72ULL, },
+        { 0x0050fffb00a50050ULL, 0xfffb00a50050fffbULL, },
+        { 0xffe9ff94003effe9ULL, 0xff94003effe9ff94ULL, },
+        { 0x008effe40039008eULL, 0xffe40039008effe4ULL, },
+        { 0xffabffab00aaffabULL, 0xffab00aaffabffabULL, },
+        { 0xff1e001affc60015ULL, 0xffe4ffadff83ffa4ULL, },    /*  64  */
+        { 0xffcaff830095004dULL, 0x0054fff1ffbfffb4ULL, },
+        { 0xff2e003c005900d5ULL, 0x0073000cffd3ff9cULL, },
+        { 0xff39ff99fff70007ULL, 0x005a0033ffbc0010ULL, },
+        { 0xff910034ffebff87ULL, 0xffabff5dff9a0046ULL, },
+        { 0x003dff9d00baffbfULL, 0x001bffa1ffd60056ULL, },
+        { 0xffa10056007e0047ULL, 0x003affbcffea003eULL, },
+        { 0xffacffb3001cff79ULL, 0x0021ffe3ffd300b2ULL, },
+        { 0xff42ffe2ff57ff4bULL, 0xffc0ff68ff300019ULL, },    /*  72  */
+        { 0xffeeff4b0026ff83ULL, 0x0030ffacff6c0029ULL, },
+        { 0xff520004ffea000bULL, 0x004fffc7ff800011ULL, },
+        { 0xff5dff61ff88ff3dULL, 0x0036ffeeff690085ULL, },
+        { 0x0006004afffcffa2ULL, 0xff26ff2aff2effd6ULL, },
+        { 0x00b2ffb300cbffdaULL, 0xff96ff6eff6affe6ULL, },
+        { 0x0016006c008f0062ULL, 0xffb5ff89ff7effceULL, },
+        { 0x0021ffc9002dff94ULL, 0xff9cffb0ff670042ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_w.c
new file mode 100644
index 0000000000..af13d99c0f
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_w.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction HSUB_S.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_S.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000555500005555ULL, 0x0000555500005555ULL, },
+        { 0xffffaaaaffffaaaaULL, 0xffffaaaaffffaaaaULL, },
+        { 0x0000333300003333ULL, 0x0000333300003333ULL, },
+        { 0xffffccccffffccccULL, 0xffffccccffffccccULL, },
+        { 0xffffc71c00001c71ULL, 0x000071c7ffffc71cULL, },
+        { 0x000038e3ffffe38eULL, 0xffff8e38000038e3ULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000555600005556ULL, 0x0000555600005556ULL, },
+        { 0xffffaaabffffaaabULL, 0xffffaaabffffaaabULL, },
+        { 0x0000333400003334ULL, 0x0000333400003334ULL, },
+        { 0xffffcccdffffcccdULL, 0xffffcccdffffcccdULL, },
+        { 0xffffc71d00001c72ULL, 0x000071c8ffffc71dULL, },
+        { 0x000038e4ffffe38fULL, 0xffff8e39000038e4ULL, },
+        { 0xffffaaabffffaaabULL, 0xffffaaabffffaaabULL, },    /*  16  */
+        { 0xffffaaaaffffaaaaULL, 0xffffaaaaffffaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff5555ffff5555ULL, 0xffff5555ffff5555ULL, },
+        { 0xffffdddeffffdddeULL, 0xffffdddeffffdddeULL, },
+        { 0xffff7777ffff7777ULL, 0xffff7777ffff7777ULL, },
+        { 0xffff71c7ffffc71cULL, 0x00001c72ffff71c7ULL, },
+        { 0xffffe38effff8e39ULL, 0xffff38e3ffffe38eULL, },
+        { 0x0000555600005556ULL, 0x0000555600005556ULL, },    /*  24  */
+        { 0x0000555500005555ULL, 0x0000555500005555ULL, },
+        { 0x0000aaab0000aaabULL, 0x0000aaab0000aaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000888900008889ULL, 0x0000888900008889ULL, },
+        { 0x0000222200002222ULL, 0x0000222200002222ULL, },
+        { 0x00001c72000071c7ULL, 0x0000c71d00001c72ULL, },
+        { 0x00008e39000038e4ULL, 0xffffe38e00008e39ULL, },
+        { 0xffffcccdffffcccdULL, 0xffffcccdffffcccdULL, },    /*  32  */
+        { 0xffffccccffffccccULL, 0xffffccccffffccccULL, },
+        { 0x0000222200002222ULL, 0x0000222200002222ULL, },
+        { 0xffff7777ffff7777ULL, 0xffff7777ffff7777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff9999ffff9999ULL, 0xffff9999ffff9999ULL, },
+        { 0xffff93e9ffffe93eULL, 0x00003e94ffff93e9ULL, },
+        { 0x000005b0ffffb05bULL, 0xffff5b05000005b0ULL, },
+        { 0x0000333400003334ULL, 0x0000333400003334ULL, },    /*  40  */
+        { 0x0000333300003333ULL, 0x0000333300003333ULL, },
+        { 0x0000888900008889ULL, 0x0000888900008889ULL, },
+        { 0xffffdddeffffdddeULL, 0xffffdddeffffdddeULL, },
+        { 0x0000666700006667ULL, 0x0000666700006667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfffffa5000004fa5ULL, 0x0000a4fbfffffa50ULL, },
+        { 0x00006c17000016c2ULL, 0xffffc16c00006c17ULL, },
+        { 0xffffe38fffff8e39ULL, 0x000038e4ffffe38fULL, },    /*  48  */
+        { 0xffffe38effff8e38ULL, 0x000038e3ffffe38eULL, },
+        { 0x000038e4ffffe38eULL, 0x00008e39000038e4ULL, },
+        { 0xffff8e39ffff38e3ULL, 0xffffe38effff8e39ULL, },
+        { 0x000016c2ffffc16cULL, 0x00006c17000016c2ULL, },
+        { 0xffffb05bffff5b05ULL, 0x000005b0ffffb05bULL, },
+        { 0xffffaaabffffaaaaULL, 0x0000aaabffffaaabULL, },
+        { 0x00001c72ffff71c7ULL, 0xffffc71c00001c72ULL, },
+        { 0x00001c72000071c8ULL, 0xffffc71d00001c72ULL, },    /*  56  */
+        { 0x00001c71000071c7ULL, 0xffffc71c00001c71ULL, },
+        { 0x000071c70000c71dULL, 0x00001c72000071c7ULL, },
+        { 0xffffc71c00001c72ULL, 0xffff71c7ffffc71cULL, },
+        { 0x00004fa50000a4fbULL, 0xfffffa5000004fa5ULL, },
+        { 0xffffe93e00003e94ULL, 0xffff93e9ffffe93eULL, },
+        { 0xffffe38e00008e39ULL, 0x000038e4ffffe38eULL, },
+        { 0x0000555500005556ULL, 0xffff555500005555ULL, },
+        { 0xffffa19effffd322ULL, 0x0000400900004e6fULL, },    /*  64  */
+        { 0xffff88070000615aULL, 0x0000904dffffab7fULL, },
+        { 0xffffd9c000009ce2ULL, 0x00008468ffffd967ULL, },
+        { 0xffff721d00004614ULL, 0x0000c28f00001bdbULL, },
+        { 0x000014f2fffff853ULL, 0x0000079900006533ULL, },
+        { 0xfffffb5b0000868bULL, 0x000057ddffffc243ULL, },
+        { 0x00004d140000c213ULL, 0x00004bf8fffff02bULL, },
+        { 0xffffe57100006b45ULL, 0x00008a1f0000329fULL, },
+        { 0xffffc58effff648fULL, 0x00001c7afffffb1fULL, },    /*  72  */
+        { 0xffffabf7fffff2c7ULL, 0x00006cbeffff582fULL, },
+        { 0xfffffdb000002e4fULL, 0x000060d9ffff8617ULL, },
+        { 0xffff960dffffd781ULL, 0x00009f00ffffc88bULL, },
+        { 0x00008983000008f1ULL, 0xffff8293fffff936ULL, },
+        { 0x00006fec00009729ULL, 0xffffd2d7ffff5646ULL, },
+        { 0x0000c1a50000d2b1ULL, 0xffffc6f2ffff842eULL, },
+        { 0x00005a0200007be3ULL, 0x00000519ffffc6a2ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_d.c
new file mode 100644
index 0000000000..bd71abfc4f
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_d.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction HSUB_U.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_U.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x00000000ffffffffULL, 0x00000000ffffffffULL, },
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0x00000000aaaaaaaaULL, 0x00000000aaaaaaaaULL, },
+        { 0x0000000033333333ULL, 0x0000000033333333ULL, },
+        { 0x00000000ccccccccULL, 0x00000000ccccccccULL, },
+        { 0x0000000071c71c71ULL, 0x000000001c71c71cULL, },
+        { 0x000000008e38e38eULL, 0x00000000e38e38e3ULL, },
+        { 0xffffffff00000001ULL, 0xffffffff00000001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffff55555556ULL, 0xffffffff55555556ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },
+        { 0xffffffff33333334ULL, 0xffffffff33333334ULL, },
+        { 0xffffffffcccccccdULL, 0xffffffffcccccccdULL, },
+        { 0xffffffff71c71c72ULL, 0xffffffff1c71c71dULL, },
+        { 0xffffffff8e38e38fULL, 0xffffffffe38e38e4ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },    /*  16  */
+        { 0x00000000aaaaaaaaULL, 0x00000000aaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0xffffffffdddddddeULL, 0xffffffffdddddddeULL, },
+        { 0x0000000077777777ULL, 0x0000000077777777ULL, },
+        { 0x000000001c71c71cULL, 0xffffffffc71c71c7ULL, },
+        { 0x0000000038e38e39ULL, 0x000000008e38e38eULL, },
+        { 0xffffffff55555556ULL, 0xffffffff55555556ULL, },    /*  24  */
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffff88888889ULL, 0xffffffff88888889ULL, },
+        { 0x0000000022222222ULL, 0x0000000022222222ULL, },
+        { 0xffffffffc71c71c7ULL, 0xffffffff71c71c72ULL, },
+        { 0xffffffffe38e38e4ULL, 0x0000000038e38e39ULL, },
+        { 0xffffffffcccccccdULL, 0xffffffffcccccccdULL, },    /*  32  */
+        { 0x00000000ccccccccULL, 0x00000000ccccccccULL, },
+        { 0x0000000022222222ULL, 0x0000000022222222ULL, },
+        { 0x0000000077777777ULL, 0x0000000077777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000099999999ULL, 0x0000000099999999ULL, },
+        { 0x000000003e93e93eULL, 0xffffffffe93e93e9ULL, },
+        { 0x000000005b05b05bULL, 0x00000000b05b05b0ULL, },
+        { 0xffffffff33333334ULL, 0xffffffff33333334ULL, },    /*  40  */
+        { 0x0000000033333333ULL, 0x0000000033333333ULL, },
+        { 0xffffffff88888889ULL, 0xffffffff88888889ULL, },
+        { 0xffffffffdddddddeULL, 0xffffffffdddddddeULL, },
+        { 0xffffffff66666667ULL, 0xffffffff66666667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffa4fa4fa5ULL, 0xffffffff4fa4fa50ULL, },
+        { 0xffffffffc16c16c2ULL, 0x0000000016c16c17ULL, },
+        { 0xffffffffe38e38e4ULL, 0xffffffff38e38e39ULL, },    /*  48  */
+        { 0x00000000e38e38e3ULL, 0x0000000038e38e38ULL, },
+        { 0x0000000038e38e39ULL, 0xffffffff8e38e38eULL, },
+        { 0x000000008e38e38eULL, 0xffffffffe38e38e3ULL, },
+        { 0x0000000016c16c17ULL, 0xffffffff6c16c16cULL, },
+        { 0x00000000b05b05b0ULL, 0x0000000005b05b05ULL, },
+        { 0x0000000055555555ULL, 0xffffffff55555555ULL, },
+        { 0x0000000071c71c72ULL, 0x000000001c71c71cULL, },
+        { 0xffffffff1c71c71dULL, 0xffffffffc71c71c8ULL, },    /*  56  */
+        { 0x000000001c71c71cULL, 0x00000000c71c71c7ULL, },
+        { 0xffffffff71c71c72ULL, 0x000000001c71c71dULL, },
+        { 0xffffffffc71c71c7ULL, 0x0000000071c71c72ULL, },
+        { 0xffffffff4fa4fa50ULL, 0xfffffffffa4fa4fbULL, },
+        { 0xffffffffe93e93e9ULL, 0x0000000093e93e94ULL, },
+        { 0xffffffff8e38e38eULL, 0xffffffffe38e38e4ULL, },
+        { 0xffffffffaaaaaaabULL, 0x00000000aaaaaaabULL, },
+        { 0x000000006008918cULL, 0xffffffff4ceb5b52ULL, },    /*  64  */
+        { 0x000000003ad71fc4ULL, 0x000000003627b862ULL, },
+        { 0xffffffffce9b5b4cULL, 0xffffffffa03be64aULL, },
+        { 0x000000002a39047eULL, 0xffffffffa22428beULL, },
+        { 0x00000000d35bab23ULL, 0xffffffff147c0b0eULL, },
+        { 0x00000000ae2a395bULL, 0xfffffffffdb8681eULL, },
+        { 0x0000000041ee74e3ULL, 0xffffffff67cc9606ULL, },
+        { 0x000000009d8c1e15ULL, 0xffffffff69b4d87aULL, },
+        { 0x0000000083f8596aULL, 0xffffffff295d16f3ULL, },    /*  72  */
+        { 0x000000005ec6e7a2ULL, 0x0000000012997403ULL, },
+        { 0xfffffffff28b232aULL, 0xffffffff7cada1ebULL, },
+        { 0x000000004e28cc5cULL, 0xffffffff7e95e45fULL, },
+        { 0x0000000047ecc10dULL, 0xffffffff8f75d8ccULL, },
+        { 0x0000000022bb4f45ULL, 0x0000000078b235dcULL, },
+        { 0xffffffffb67f8acdULL, 0xffffffffe2c663c4ULL, },
+        { 0x00000000121d33ffULL, 0xffffffffe4aea638ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_h.c
new file mode 100644
index 0000000000..fbf0b6c45c
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_h.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction HSUB_U.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_U.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, },
+        { 0x0055005500550055ULL, 0x0055005500550055ULL, },
+        { 0x00aa00aa00aa00aaULL, 0x00aa00aa00aa00aaULL, },
+        { 0x0033003300330033ULL, 0x0033003300330033ULL, },
+        { 0x00cc00cc00cc00ccULL, 0x00cc00cc00cc00ccULL, },
+        { 0x0071001c00c70071ULL, 0x001c00c70071001cULL, },
+        { 0x008e00e30038008eULL, 0x00e30038008e00e3ULL, },
+        { 0xff01ff01ff01ff01ULL, 0xff01ff01ff01ff01ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xff56ff56ff56ff56ULL, 0xff56ff56ff56ff56ULL, },
+        { 0xffabffabffabffabULL, 0xffabffabffabffabULL, },
+        { 0xff34ff34ff34ff34ULL, 0xff34ff34ff34ff34ULL, },
+        { 0xffcdffcdffcdffcdULL, 0xffcdffcdffcdffcdULL, },
+        { 0xff72ff1dffc8ff72ULL, 0xff1dffc8ff72ff1dULL, },
+        { 0xff8fffe4ff39ff8fULL, 0xffe4ff39ff8fffe4ULL, },
+        { 0xffabffabffabffabULL, 0xffabffabffabffabULL, },    /*  16  */
+        { 0x00aa00aa00aa00aaULL, 0x00aa00aa00aa00aaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0055005500550055ULL, 0x0055005500550055ULL, },
+        { 0xffdeffdeffdeffdeULL, 0xffdeffdeffdeffdeULL, },
+        { 0x0077007700770077ULL, 0x0077007700770077ULL, },
+        { 0x001cffc70072001cULL, 0xffc70072001cffc7ULL, },
+        { 0x0039008effe30039ULL, 0x008effe30039008eULL, },
+        { 0xff56ff56ff56ff56ULL, 0xff56ff56ff56ff56ULL, },    /*  24  */
+        { 0x0055005500550055ULL, 0x0055005500550055ULL, },
+        { 0xffabffabffabffabULL, 0xffabffabffabffabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xff89ff89ff89ff89ULL, 0xff89ff89ff89ff89ULL, },
+        { 0x0022002200220022ULL, 0x0022002200220022ULL, },
+        { 0xffc7ff72001dffc7ULL, 0xff72001dffc7ff72ULL, },
+        { 0xffe40039ff8effe4ULL, 0x0039ff8effe40039ULL, },
+        { 0xffcdffcdffcdffcdULL, 0xffcdffcdffcdffcdULL, },    /*  32  */
+        { 0x00cc00cc00cc00ccULL, 0x00cc00cc00cc00ccULL, },
+        { 0x0022002200220022ULL, 0x0022002200220022ULL, },
+        { 0x0077007700770077ULL, 0x0077007700770077ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0099009900990099ULL, 0x0099009900990099ULL, },
+        { 0x003effe90094003eULL, 0xffe90094003effe9ULL, },
+        { 0x005b00b00005005bULL, 0x00b00005005b00b0ULL, },
+        { 0xff34ff34ff34ff34ULL, 0xff34ff34ff34ff34ULL, },    /*  40  */
+        { 0x0033003300330033ULL, 0x0033003300330033ULL, },
+        { 0xff89ff89ff89ff89ULL, 0xff89ff89ff89ff89ULL, },
+        { 0xffdeffdeffdeffdeULL, 0xffdeffdeffdeffdeULL, },
+        { 0xff67ff67ff67ff67ULL, 0xff67ff67ff67ff67ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffa5ff50fffbffa5ULL, 0xff50fffbffa5ff50ULL, },
+        { 0xffc20017ff6cffc2ULL, 0x0017ff6cffc20017ULL, },
+        { 0xffe4ff39ff8fffe4ULL, 0xff39ff8fffe4ff39ULL, },    /*  48  */
+        { 0x00e30038008e00e3ULL, 0x0038008e00e30038ULL, },
+        { 0x0039ff8effe40039ULL, 0xff8effe40039ff8eULL, },
+        { 0x008effe30039008eULL, 0xffe30039008effe3ULL, },
+        { 0x0017ff6cffc20017ULL, 0xff6cffc20017ff6cULL, },
+        { 0x00b00005005b00b0ULL, 0x0005005b00b00005ULL, },
+        { 0x0055ff5500560055ULL, 0xff5500560055ff55ULL, },
+        { 0x0072001cffc70072ULL, 0x001cffc70072001cULL, },
+        { 0xff1dffc8ff72ff1dULL, 0xffc8ff72ff1dffc8ULL, },    /*  56  */
+        { 0x001c00c70071001cULL, 0x00c70071001c00c7ULL, },
+        { 0xff72001dffc7ff72ULL, 0x001dffc7ff72001dULL, },
+        { 0xffc70072001cffc7ULL, 0x0072001cffc70072ULL, },
+        { 0xff50fffbffa5ff50ULL, 0xfffbffa5ff50fffbULL, },
+        { 0xffe90094003effe9ULL, 0x0094003effe90094ULL, },
+        { 0xff8effe40039ff8eULL, 0xffe40039ff8effe4ULL, },
+        { 0xffab00abffaaffabULL, 0x00abffaaffab00abULL, },
+        { 0x001e001affc60015ULL, 0xffe4ffad008300a4ULL, },    /*  64  */
+        { 0xffca0083ff95004dULL, 0xff54fff100bfffb4ULL, },
+        { 0x002e003cff59ffd5ULL, 0xff73ff0c00d3009cULL, },
+        { 0x00390099fff70007ULL, 0xff5aff3300bc0010ULL, },
+        { 0x0091ff34ffeb0087ULL, 0xffab005dff9a0046ULL, },
+        { 0x003dff9dffba00bfULL, 0xff1b00a1ffd6ff56ULL, },
+        { 0x00a1ff56ff7e0047ULL, 0xff3affbcffea003eULL, },
+        { 0x00acffb3001c0079ULL, 0xff21ffe3ffd3ffb2ULL, },
+        { 0x0042ffe20057004bULL, 0xffc0006800300019ULL, },    /*  72  */
+        { 0xffee004b00260083ULL, 0xff3000ac006cff29ULL, },
+        { 0x00520004ffea000bULL, 0xff4fffc700800011ULL, },
+        { 0x005d00610088003dULL, 0xff36ffee0069ff85ULL, },
+        { 0x0006ff4afffc00a2ULL, 0x0026002a002e00d6ULL, },
+        { 0xffb2ffb3ffcb00daULL, 0xff96006e006affe6ULL, },
+        { 0x0016ff6cff8f0062ULL, 0xffb5ff89007e00ceULL, },
+        { 0x0021ffc9002d0094ULL, 0xff9cffb000670042ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_w.c
new file mode 100644
index 0000000000..fb6b91a3a2
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_w.c
@@ -0,0 +1,157 @@
+/*
+ *  Test program for MSA instruction HSUB_U.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_U.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, },
+        { 0x0000555500005555ULL, 0x0000555500005555ULL, },
+        { 0x0000aaaa0000aaaaULL, 0x0000aaaa0000aaaaULL, },
+        { 0x0000333300003333ULL, 0x0000333300003333ULL, },
+        { 0x0000cccc0000ccccULL, 0x0000cccc0000ccccULL, },
+        { 0x0000c71c00001c71ULL, 0x000071c70000c71cULL, },
+        { 0x000038e30000e38eULL, 0x00008e38000038e3ULL, },
+        { 0xffff0001ffff0001ULL, 0xffff0001ffff0001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff5556ffff5556ULL, 0xffff5556ffff5556ULL, },
+        { 0xffffaaabffffaaabULL, 0xffffaaabffffaaabULL, },
+        { 0xffff3334ffff3334ULL, 0xffff3334ffff3334ULL, },
+        { 0xffffcccdffffcccdULL, 0xffffcccdffffcccdULL, },
+        { 0xffffc71dffff1c72ULL, 0xffff71c8ffffc71dULL, },
+        { 0xffff38e4ffffe38fULL, 0xffff8e39ffff38e4ULL, },
+        { 0xffffaaabffffaaabULL, 0xffffaaabffffaaabULL, },    /*  16  */
+        { 0x0000aaaa0000aaaaULL, 0x0000aaaa0000aaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000555500005555ULL, 0x0000555500005555ULL, },
+        { 0xffffdddeffffdddeULL, 0xffffdddeffffdddeULL, },
+        { 0x0000777700007777ULL, 0x0000777700007777ULL, },
+        { 0x000071c7ffffc71cULL, 0x00001c72000071c7ULL, },
+        { 0xffffe38e00008e39ULL, 0x000038e3ffffe38eULL, },
+        { 0xffff5556ffff5556ULL, 0xffff5556ffff5556ULL, },    /*  24  */
+        { 0x0000555500005555ULL, 0x0000555500005555ULL, },
+        { 0xffffaaabffffaaabULL, 0xffffaaabffffaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff8889ffff8889ULL, 0xffff8889ffff8889ULL, },
+        { 0x0000222200002222ULL, 0x0000222200002222ULL, },
+        { 0x00001c72ffff71c7ULL, 0xffffc71d00001c72ULL, },
+        { 0xffff8e39000038e4ULL, 0xffffe38effff8e39ULL, },
+        { 0xffffcccdffffcccdULL, 0xffffcccdffffcccdULL, },    /*  32  */
+        { 0x0000cccc0000ccccULL, 0x0000cccc0000ccccULL, },
+        { 0x0000222200002222ULL, 0x0000222200002222ULL, },
+        { 0x0000777700007777ULL, 0x0000777700007777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000999900009999ULL, 0x0000999900009999ULL, },
+        { 0x000093e9ffffe93eULL, 0x00003e94000093e9ULL, },
+        { 0x000005b00000b05bULL, 0x00005b05000005b0ULL, },
+        { 0xffff3334ffff3334ULL, 0xffff3334ffff3334ULL, },    /*  40  */
+        { 0x0000333300003333ULL, 0x0000333300003333ULL, },
+        { 0xffff8889ffff8889ULL, 0xffff8889ffff8889ULL, },
+        { 0xffffdddeffffdddeULL, 0xffffdddeffffdddeULL, },
+        { 0xffff6667ffff6667ULL, 0xffff6667ffff6667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfffffa50ffff4fa5ULL, 0xffffa4fbfffffa50ULL, },
+        { 0xffff6c17000016c2ULL, 0xffffc16cffff6c17ULL, },
+        { 0xffffe38fffff8e39ULL, 0xffff38e4ffffe38fULL, },    /*  48  */
+        { 0x0000e38e00008e38ULL, 0x000038e30000e38eULL, },
+        { 0x000038e4ffffe38eULL, 0xffff8e39000038e4ULL, },
+        { 0x00008e39000038e3ULL, 0xffffe38e00008e39ULL, },
+        { 0x000016c2ffffc16cULL, 0xffff6c17000016c2ULL, },
+        { 0x0000b05b00005b05ULL, 0x000005b00000b05bULL, },
+        { 0x0000aaabffffaaaaULL, 0xffffaaab0000aaabULL, },
+        { 0x00001c72000071c7ULL, 0xffffc71c00001c72ULL, },
+        { 0xffff1c72ffff71c8ULL, 0xffffc71dffff1c72ULL, },    /*  56  */
+        { 0x00001c71000071c7ULL, 0x0000c71c00001c71ULL, },
+        { 0xffff71c7ffffc71dULL, 0x00001c72ffff71c7ULL, },
+        { 0xffffc71c00001c72ULL, 0x000071c7ffffc71cULL, },
+        { 0xffff4fa5ffffa4fbULL, 0xfffffa50ffff4fa5ULL, },
+        { 0xffffe93e00003e94ULL, 0x000093e9ffffe93eULL, },
+        { 0xffffe38effff8e39ULL, 0x000038e4ffffe38eULL, },
+        { 0xffff555500005556ULL, 0x00005555ffff5555ULL, },
+        { 0xffffa19effffd322ULL, 0x0000400900004e6fULL, },    /*  64  */
+        { 0x00008807ffff615aULL, 0xffff904d0000ab7fULL, },
+        { 0xffffd9c0ffff9ce2ULL, 0xffff84680000d967ULL, },
+        { 0x0000721dffff4614ULL, 0xffffc28f00001bdbULL, },
+        { 0x000014f2fffff853ULL, 0x00000799ffff6533ULL, },
+        { 0x0000fb5bffff868bULL, 0xffff57ddffffc243ULL, },
+        { 0x00004d14ffffc213ULL, 0xffff4bf8fffff02bULL, },
+        { 0x0000e571ffff6b45ULL, 0xffff8a1fffff329fULL, },
+        { 0xffffc58e0000648fULL, 0x00001c7afffffb1fULL, },    /*  72  */
+        { 0x0000abf7fffff2c7ULL, 0xffff6cbe0000582fULL, },
+        { 0xfffffdb000002e4fULL, 0xffff60d900008617ULL, },
+        { 0x0000960dffffd781ULL, 0xffff9f00ffffc88bULL, },
+        { 0xffff8983000008f1ULL, 0x00008293fffff936ULL, },
+        { 0x00006fecffff9729ULL, 0xffffd2d700005646ULL, },
+        { 0xffffc1a5ffffd2b1ULL, 0xffffc6f20000842eULL, },
+        { 0x00005a02ffff7be3ULL, 0x00000519ffffc6a2ULL, },
+};
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_b.c
index 04e6159fc7..38a6f402fd 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_S.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x800b9880809ea980ULL, 0x7fe73e2702e94374ULL, },
         { 0x7fe5307f36cf8d0eULL, 0x808a8080abc73294ULL, },
         { 0x757f16ea117f1b46ULL, 0x80facdbe940390a4ULL, },
+        { 0x7ff5687f7f62577fULL, 0x8019c2d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_d.c
index 195137f41f..989d27cb4e 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8000000000000000ULL, 0x7fffffffffffffffULL, },
         { 0x7fffffffffffffffULL, 0x8000000000000000ULL, },
         { 0x749115ea109e1b46ULL, 0x8000000000000000ULL, },
+        { 0x7fffffffffffffffULL, 0x8000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_h.c
index c57238d31a..c847a6e6d2 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8000985d8000a932ULL, 0x7fff3e2701e94274ULL, },
         { 0x7fff2f8135cf8d0eULL, 0x80008000aac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x8000cdbe94038fa4ULL, },
+        { 0x7fff67a37fff56ceULL, 0x8000c1d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_w.c
index 1cded65a7e..0459a4e1af 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x8000000080000000ULL, 0x7fffffff01e84274ULL, },
         { 0x7fffffff35cf8d0eULL, 0x80000000aac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x8000000094038fa4ULL, },
+        { 0x7fffffff7fffffffULL, 0x80000000fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_b.c
index cb38f033a6..3533c0c3ae 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_U.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9e0032ULL, 0x00003e2702000000ULL, },
         { 0x0000000036008d0eULL, 0x428a7d7a00003294ULL, },
         { 0x0000160011001b46ULL, 0x7b0000be94039000ULL, },
+        { 0x0000000000005700ULL, 0x661900000017bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_d.c
index 2685b2fe7e..894f984f5f 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x0000000000000000ULL, },
         { 0x0000000000000000ULL, 0x428a7d79aac73294ULL, },
         { 0x0000000000000000ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x0000000000000000ULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_h.c
index ca6dd38b69..29eea521fd 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9e0000ULL, 0x00003e2701e90000ULL, },
         { 0x0000000035cf8d0eULL, 0x428a7d7a00003294ULL, },
         { 0x000015ea109e1b46ULL, 0x7afa000094038fa4ULL, },
+        { 0x00000000000056ceULL, 0x661900000000bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_w.c
index 42ebddb408..2bae876f11 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x0000000001e84274ULL, },
         { 0x0000000035cf8d0eULL, 0x428a7d7a00000000ULL, },
         { 0x00000000109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x0000000000000000ULL, 0x6618c1d900000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_b.c
index dac20cc769..9cb798c405 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUS_U.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9ea932ULL, 0x9ae7ffffff004374ULL, },
         { 0xe800308136008d0eULL, 0x428a7d7aab00ff94ULL, },
         { 0x75911600119eff46ULL, 0x7bfacdbe940390a4ULL, },
+        { 0xc40068a3a562ffceULL, 0x66ffc2d9fe17bd8cULL, },
+        { 0x000000000000ff00ULL, 0xffffffffff00ffffULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_d.c
index 4485502c1c..4915d598fd 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUS_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x99e73e2701e84274ULL, },
         { 0xe7e42f8135cf8d0eULL, 0x428a7d79aac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0xc3f467a2a46256ceULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0xffffffffffffffffULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_h.c
index 9e99aeefc5..19d8538942 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUS_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9ea932ULL, 0x99e7ffffffff4274ULL, },
         { 0xe7e52f8135cf8d0eULL, 0x428a7d7aaac7ffffULL, },
         { 0x749115ea109effffULL, 0x7afacdbe94038fa4ULL, },
+        { 0xc3f567a3a462ffffULL, 0x6619c1d9fe17bd8cULL, },
+        { 0x000000000000ffffULL, 0xffffffffffffffffULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_w.c
index 53a9acac1b..278488bcea 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUS_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x99e73e27ffffffffULL, },
         { 0xe7e42f8135cf8d0eULL, 0x428a7d7aaac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0xc3f467a3a46256ceULL, 0x6618c1d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0xffffffffffffffffULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_b.c
index 86fb4f3e26..a0d7616246 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUU_S.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b7f5d5b7fa932ULL, 0x9ae73e2702e98080ULL, },
         { 0xe8e5808136cf7f0eULL, 0x427f7d7aabc7327fULL, },
         { 0x809116ea119e1b46ULL, 0x7bfacd7f7f037fa4ULL, },
+        { 0xc4f580a3a58057ceULL, 0x6619c2d9fe177f7fULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_d.c
index 45a1eb3094..26cf9c5578 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUU_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x99e73e2701e84274ULL, },
         { 0xe7e42f8135cf8d0eULL, 0x428a7d79aac73294ULL, },
         { 0x8000000000000000ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0xc3f467a2a46256ceULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_h.c
index 14ac7def29..cbba316c68 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUU_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b7fff5b9ea932ULL, 0x99e73e2701e98000ULL, },
         { 0xe7e5800035cf7fffULL, 0x428a7d7aaac73294ULL, },
         { 0x800015ea109e1b46ULL, 0x7afacdbe7fff7fffULL, },
+        { 0xc3f58000a46256ceULL, 0x6619c1d9fe177fffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_w.c
index 688f469cd0..156d62cd65 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUU_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x99e73e2701e84274ULL, },
         { 0xe7e42f8135cf8d0eULL, 0x428a7d7aaac73294ULL, },
         { 0x80000000109e1b46ULL, 0x7af9cdbe7fffffffULL, },
+        { 0xc3f467a3a46256ceULL, 0x6618c1d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_b.c
index d0964dcd59..dbec0466ef 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBV.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9ea932ULL, 0x9ae73e2702e94374ULL, },
         { 0xe8e5308136cf8d0eULL, 0x428a7d7aabc73294ULL, },
         { 0x759116ea119e1b46ULL, 0x7bfacdbe940390a4ULL, },
+        { 0xc4f568a3a56257ceULL, 0x6619c2d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_d.c
index ec26a8e0c6..8b097899ce 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBV.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x99e73e2701e84274ULL, },
         { 0xe7e42f8135cf8d0eULL, 0x428a7d79aac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0xc3f467a2a46256ceULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_h.c
index 420422ecf1..69bf9ec7b8 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBV.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,8 +121,12 @@ int32_t main(void)
         { 0x3c0b985d5b9ea932ULL, 0x99e73e2701e94274ULL, },
         { 0xe7e52f8135cf8d0eULL, 0x428a7d7aaac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x7afacdbe94038fa4ULL, },
+        { 0xc3f567a3a46256ceULL, 0x6619c1d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_w.c
index 3e97005815..365dc0ed83 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBV.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -123,6 +125,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_b.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_b.c
index d720dc30a5..00b78f7ed4 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_b.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_b.c
@@ -33,7 +33,7 @@
 
 int32_t main(void)
 {
-    char *instruction_name = "*ILVEV.B";
+    char *instruction_name = "ILVEV.B";
     int32_t ret;
     uint32_t i, j;
     struct timeval start, end;
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x4f4f4d4d31314e4eULL, 0xf1f1d8d84242a0a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_d.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_d.c
index 83239949af..a33acfa382 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_d.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_d.c
@@ -33,7 +33,7 @@
 
 int32_t main(void)
 {
-    char *instruction_name = "*ILVEV.D";
+    char *instruction_name = "ILVEV.D";
     int32_t ret;
     uint32_t i, j;
     struct timeval start, end;
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x704f164d5e31e24eULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_h.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_h.c
index 3f6fc265d2..51866330dc 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_h.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_h.c
@@ -33,7 +33,7 @@
 
 int32_t main(void)
 {
-    char *instruction_name = "*ILVEV.H";
+    char *instruction_name = "ILVEV.H";
     int32_t ret;
     uint32_t i, j;
     struct timeval start, end;
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x164d164de24ee24eULL, 0x88d888d8e2a0e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_w.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_w.c
index 30d2e3802d..0e82f2199b 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_w.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_w.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x5e31e24e5e31e24eULL, 0xa942e2a0a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_b.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_b.c
index c771287a71..6dc06c597e 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_b.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_b.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0xa9a94242e2e2a0a0ULL, 0x8d8df1f18888d8d8ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_d.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_d.c
index b7d5fcdc18..a8679723aa 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_d.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_d.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x8df188d8a942e2a0ULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_h.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_h.c
index af72876236..22f6ba600a 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_h.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_h.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0xa942a942e2a0e2a0ULL, 0x8df18df188d888d8ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_w.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_w.c
index e06c9d94ca..f07689e0c8 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_w.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvl_w.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0xa942e2a0a942e2a0ULL, 0x8df188d88df188d8ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_b.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_b.c
index 8e7f1c4706..2316f388ec 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_b.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_b.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x707016165e5ee2e2ULL, 0x8d8d8888a9a9e2e2ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_d.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_d.c
index acbd94a68d..2feb2eb2ed 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_d.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_d.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x8df188d8a942e2a0ULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_h.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_h.c
index 8a82def407..93abfadfdd 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_h.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_h.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x704f704f5e315e31ULL, 0x8df18df1a942a942ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_w.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_w.c
index e19170c364..9b2b0d82ac 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_w.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvod_w.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x704f164d704f164dULL, 0x8df188d88df188d8ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_b.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_b.c
index 1e519e6e9e..105a710d34 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_b.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_b.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x5e5e3131e2e24e4eULL, 0x70704f4f16164d4dULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_d.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_d.c
index be760430c7..22579c48c5 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_d.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_d.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x704f164d5e31e24eULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_h.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_h.c
index cbd4685eca..ca416c02e6 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_h.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_h.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x5e315e31e24ee24eULL, 0x704f704f164d164dULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_w.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_w.c
index 5f4cfd0377..6652c7fbf5 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_w.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvr_w.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x5e31e24e5e31e24eULL, 0x704f164d704f164dULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/logic/test_msa_and_v.c b/tests/tcg/mips/user/ase/msa/logic/test_msa_and_v.c
index 534c4201a8..1f864be5f2 100644
--- a/tests/tcg/mips/user/ase/msa/logic/test_msa_and_v.c
+++ b/tests/tcg/mips/user/ase/msa/logic/test_msa_and_v.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/logic/test_msa_nor_v.c b/tests/tcg/mips/user/ase/msa/logic/test_msa_nor_v.c
index f781a8bb9d..0b0f5ddb5e 100644
--- a/tests/tcg/mips/user/ase/msa/logic/test_msa_nor_v.c
+++ b/tests/tcg/mips/user/ase/msa/logic/test_msa_nor_v.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x8fb0e9b2a1ce1db1ULL, 0x720e772756bd1d5fULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/logic/test_msa_or_v.c b/tests/tcg/mips/user/ase/msa/logic/test_msa_or_v.c
index 924f216e41..90f535aa29 100644
--- a/tests/tcg/mips/user/ase/msa/logic/test_msa_or_v.c
+++ b/tests/tcg/mips/user/ase/msa/logic/test_msa_or_v.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/logic/test_msa_xor_v.c b/tests/tcg/mips/user/ase/msa/logic/test_msa_xor_v.c
index f0442e6577..398443d83a 100644
--- a/tests/tcg/mips/user/ase/msa/logic/test_msa_xor_v.c
+++ b/tests/tcg/mips/user/ase/msa/logic/test_msa_xor_v.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_b.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_b.c
index 409773d7f2..fdab88d5c9 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_b.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_b.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0xf1d842a04f4d314eULL, 0xf1d842a04f4d314eULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_d.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_d.c
index 8e89716416..e7083858cd 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_d.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_d.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x704f164d5e31e24eULL, 0x704f164d5e31e24eULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_h.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_h.c
index b389587dfe..bfd401f379 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_h.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_h.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x88d8e2a0164de24eULL, 0x88d8e2a0164de24eULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_w.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_w.c
index d393ad5066..e58372d750 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_w.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckev_w.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0xa942e2a05e31e24eULL, 0xa942e2a05e31e24eULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_b.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_b.c
index ab363a0cdc..4cefecd0ec 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_b.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_b.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x8d88a9e270165ee2ULL, 0x8d88a9e270165ee2ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_d.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_d.c
index 09a61408bc..a7ea137148 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_d.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_d.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x8df188d8a942e2a0ULL, 0x8df188d8a942e2a0ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_h.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_h.c
index d7a8c5b5af..bc7edcc595 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_h.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_h.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x8df1a942704f5e31ULL, 0x8df1a942704f5e31ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_w.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_w.c
index 4b732d0359..ca8c4fb0b6 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_w.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_pckod_w.c
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x8df188d8704f164dULL, 0x8df188d8704f164dULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_b.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_b.c
index d9ccf575fa..c088643b8c 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_b.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_b.c
@@ -41,7 +41,7 @@ int32_t main(void)
 
     uint64_t b128_result[TEST_COUNT_TOTAL][2];
     uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
-        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
         { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_d.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_d.c
index 6c555fbb23..06f3604bef 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_d.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_d.c
@@ -41,7 +41,7 @@ int32_t main(void)
 
     uint64_t b128_result[TEST_COUNT_TOTAL][2];
     uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
-        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
         { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_h.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_h.c
index 9dfcb51fe5..f28d949d02 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_h.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_h.c
@@ -41,7 +41,7 @@ int32_t main(void)
 
     uint64_t b128_result[TEST_COUNT_TOTAL][2];
     uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
-        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
         { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_w.c b/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_w.c
index 97074c0924..78e26d4ca0 100644
--- a/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_w.c
+++ b/tests/tcg/mips/user/ase/msa/pack/test_msa_vshf_w.c
@@ -41,7 +41,7 @@ int32_t main(void)
 
     uint64_t b128_result[TEST_COUNT_TOTAL][2];
     uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
-        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
         { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
@@ -123,6 +123,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
     };
 
+    reset_msa_registers();
+
     gettimeofday(&start, NULL);
 
     for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_b.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_b.c
new file mode 100644
index 0000000000..6b1aa962d4
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SLL.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SLL.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x8080808080808080ULL, 0x8080808080808080ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xfcfcfcfcfcfcfcfcULL, 0xfcfcfcfcfcfcfcfcULL, },
+        { 0xe0e0e0e0e0e0e0e0ULL, 0xe0e0e0e0e0e0e0e0ULL, },
+        { 0xf0f0f0f0f0f0f0f0ULL, 0xf0f0f0f0f0f0f0f0ULL, },
+        { 0xf8f8f8f8f8f8f8f8ULL, 0xf8f8f8f8f8f8f8f8ULL, },
+        { 0xf8c0fff8c0fff8c0ULL, 0xfff8c0fff8c0fff8ULL, },
+        { 0xf0fe80f0fe80f0feULL, 0x80f0fe80f0fe80f0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xa8a8a8a8a8a8a8a8ULL, 0xa8a8a8a8a8a8a8a8ULL, },
+        { 0x4040404040404040ULL, 0x4040404040404040ULL, },
+        { 0xa0a0a0a0a0a0a0a0ULL, 0xa0a0a0a0a0a0a0a0ULL, },
+        { 0x5050505050505050ULL, 0x5050505050505050ULL, },
+        { 0x5080aa5080aa5080ULL, 0xaa5080aa5080aa50ULL, },
+        { 0xa05400a05400a054ULL, 0x00a05400a05400a0ULL, },
+        { 0x8080808080808080ULL, 0x8080808080808080ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5454545454545454ULL, 0x5454545454545454ULL, },
+        { 0xa0a0a0a0a0a0a0a0ULL, 0xa0a0a0a0a0a0a0a0ULL, },
+        { 0x5050505050505050ULL, 0x5050505050505050ULL, },
+        { 0xa8a8a8a8a8a8a8a8ULL, 0xa8a8a8a8a8a8a8a8ULL, },
+        { 0xa84055a84055a840ULL, 0x55a84055a84055a8ULL, },
+        { 0x50aa8050aa8050aaULL, 0x8050aa8050aa8050ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3030303030303030ULL, 0x3030303030303030ULL, },
+        { 0x8080808080808080ULL, 0x8080808080808080ULL, },
+        { 0xc0c0c0c0c0c0c0c0ULL, 0xc0c0c0c0c0c0c0c0ULL, },
+        { 0x6060606060606060ULL, 0x6060606060606060ULL, },
+        { 0x6000cc6000cc6000ULL, 0xcc6000cc6000cc60ULL, },
+        { 0xc09800c09800c098ULL, 0x00c09800c09800c0ULL, },
+        { 0x8080808080808080ULL, 0x8080808080808080ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x6060606060606060ULL, 0x6060606060606060ULL, },
+        { 0x3030303030303030ULL, 0x3030303030303030ULL, },
+        { 0x9898989898989898ULL, 0x9898989898989898ULL, },
+        { 0x98c03398c03398c0ULL, 0x3398c03398c03398ULL, },
+        { 0x3066803066803066ULL, 0x8030668030668030ULL, },
+        { 0x8000008000008000ULL, 0x0080000080000080ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x8c38e08c38e08c38ULL, 0xe08c38e08c38e08cULL, },
+        { 0x60c00060c00060c0ULL, 0x0060c00060c00060ULL, },
+        { 0x30e08030e08030e0ULL, 0x8030e08030e08030ULL, },
+        { 0x1870c01870c01870ULL, 0xc01870c01870c018ULL, },
+        { 0x1880381880381880ULL, 0x3818803818803818ULL, },
+        { 0x301c00301c00301cULL, 0x00301c00301c0030ULL, },
+        { 0x0080800080800080ULL, 0x8000808000808000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x70c41c70c41c70c4ULL, 0x1c70c41c70c41c70ULL, },
+        { 0x8020e08020e08020ULL, 0xe08020e08020e080ULL, },
+        { 0xc01070c01070c010ULL, 0x70c01070c01070c0ULL, },
+        { 0xe08838e08838e088ULL, 0x38e08838e08838e0ULL, },
+        { 0xe040c7e040c7e040ULL, 0xc7e040c7e040c7e0ULL, },
+        { 0xc0e280c0e280c0e2ULL, 0x80c0e280c0e280c0ULL, },
+        { 0x88a880c02888a040ULL, 0x5880588080d8b0c0ULL, },    /*  64  */
+        { 0x4080e66000108040ULL, 0x2c805878c080c0c0ULL, },
+        { 0x80a880305000a840ULL, 0x8067c000f0d800c0ULL, },
+        { 0x8800808000c45400ULL, 0x60ce0b5efcecc00cULL, },
+        { 0xfbf800304d4ce008ULL, 0x9080d88040f852c0ULL, },
+        { 0xd8800018a0988008ULL, 0x4880d868a08048c0ULL, },
+        { 0xb0f8008c9a803808ULL, 0x00f7c000a8f840c0ULL, },
+        { 0xfb00006040261c00ULL, 0x40eebb1a2afc48fcULL, },
+        { 0xac6880a0b93c6080ULL, 0x380030c0c0582540ULL, },    /*  72  */
+        { 0x6080ae5020788080ULL, 0x9c0030fc60809440ULL, },
+        { 0xc06880a872805880ULL, 0x80d880805858a040ULL, },
+        { 0xac008040409e2c00ULL, 0xe0b0c6ff56ac9414ULL, },
+        { 0x703c80d05ec4404eULL, 0x688040004010e200ULL, },
+        { 0x80c01668c088004eULL, 0x3480406020008800ULL, },
+        { 0x003c8034bc80104eULL, 0x80f1000048104000ULL, },
+        { 0x708080a080628880ULL, 0xa0e288d8520888a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SLL_B(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SLL_B(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_d.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_d.c
new file mode 100644
index 0000000000..deb3b11740
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SLL.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SLL.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x8000000000000000ULL, 0x8000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xfffffc0000000000ULL, 0xfffffc0000000000ULL, },
+        { 0xffffffffffe00000ULL, 0xffffffffffe00000ULL, },
+        { 0xfffffffffffff000ULL, 0xfffffffffffff000ULL, },
+        { 0xfff8000000000000ULL, 0xfff8000000000000ULL, },
+        { 0xffffffffffffc000ULL, 0xfffffff800000000ULL, },
+        { 0xfffe000000000000ULL, 0xfffffffff0000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaa80000000000ULL, 0xaaaaa80000000000ULL, },
+        { 0x5555555555400000ULL, 0x5555555555400000ULL, },
+        { 0xaaaaaaaaaaaaa000ULL, 0xaaaaaaaaaaaaa000ULL, },
+        { 0x5550000000000000ULL, 0x5550000000000000ULL, },
+        { 0xaaaaaaaaaaaa8000ULL, 0x5555555000000000ULL, },
+        { 0x5554000000000000ULL, 0xaaaaaaaaa0000000ULL, },
+        { 0x8000000000000000ULL, 0x8000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555540000000000ULL, 0x5555540000000000ULL, },
+        { 0xaaaaaaaaaaa00000ULL, 0xaaaaaaaaaaa00000ULL, },
+        { 0x5555555555555000ULL, 0x5555555555555000ULL, },
+        { 0xaaa8000000000000ULL, 0xaaa8000000000000ULL, },
+        { 0x5555555555554000ULL, 0xaaaaaaa800000000ULL, },
+        { 0xaaaa000000000000ULL, 0x5555555550000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333300000000000ULL, 0x3333300000000000ULL, },
+        { 0x9999999999800000ULL, 0x9999999999800000ULL, },
+        { 0xccccccccccccc000ULL, 0xccccccccccccc000ULL, },
+        { 0x6660000000000000ULL, 0x6660000000000000ULL, },
+        { 0x3333333333330000ULL, 0x6666666000000000ULL, },
+        { 0x9998000000000000ULL, 0xccccccccc0000000ULL, },
+        { 0x8000000000000000ULL, 0x8000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xcccccc0000000000ULL, 0xcccccc0000000000ULL, },
+        { 0x6666666666600000ULL, 0x6666666666600000ULL, },
+        { 0x3333333333333000ULL, 0x3333333333333000ULL, },
+        { 0x9998000000000000ULL, 0x9998000000000000ULL, },
+        { 0xccccccccccccc000ULL, 0x9999999800000000ULL, },
+        { 0x6666000000000000ULL, 0x3333333330000000ULL, },
+        { 0x0000000000000000ULL, 0x8000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e380000000000ULL, 0x38e38c0000000000ULL, },
+        { 0x1c71c71c71c00000ULL, 0xc71c71c71c600000ULL, },
+        { 0xe38e38e38e38e000ULL, 0x38e38e38e38e3000ULL, },
+        { 0x1c70000000000000ULL, 0xc718000000000000ULL, },
+        { 0x8e38e38e38e38000ULL, 0x1c71c71800000000ULL, },
+        { 0xc71c000000000000ULL, 0x8e38e38e30000000ULL, },
+        { 0x8000000000000000ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c40000000000ULL, 0xc71c700000000000ULL, },
+        { 0xe38e38e38e200000ULL, 0x38e38e38e3800000ULL, },
+        { 0x1c71c71c71c71000ULL, 0xc71c71c71c71c000ULL, },
+        { 0xe388000000000000ULL, 0x38e0000000000000ULL, },
+        { 0x71c71c71c71c4000ULL, 0xe38e38e000000000ULL, },
+        { 0x38e2000000000000ULL, 0x71c71c71c0000000ULL, },
+        { 0x886ae6cc28625540ULL, 0x70b5efe7bb00c000ULL, },    /*  64  */
+        { 0x6ae6cc2862554000ULL, 0xc000000000000000ULL, },
+        { 0x886ae6cc28625540ULL, 0xb5efe7bb00c00000ULL, },
+        { 0xb9b30a1895500000ULL, 0xfe7bb00c00000000ULL, },
+        { 0xfbbe00634d93c708ULL, 0x7bb1a153f52fc000ULL, },
+        { 0xbe00634d93c70800ULL, 0xc000000000000000ULL, },
+        { 0xfbbe00634d93c708ULL, 0xb1a153f52fc00000ULL, },
+        { 0x8018d364f1c20000ULL, 0x153f52fc00000000ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x8c6ffab2b2514000ULL, },    /*  72  */
+        { 0x5aaeaab9cf8b8000ULL, 0x4000000000000000ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x6ffab2b251400000ULL, },
+        { 0xabaaae73e2e00000ULL, 0xab2b251400000000ULL, },
+        { 0x704f164d5e31e24eULL, 0x188d8a942e2a0000ULL, },
+        { 0x4f164d5e31e24e00ULL, 0x0000000000000000ULL, },
+        { 0x704f164d5e31e24eULL, 0x8d8a942e2a000000ULL, },
+        { 0xc593578c78938000ULL, 0xa942e2a000000000ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SLL_D(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SLL_D(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_h.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_h.c
new file mode 100644
index 0000000000..edb3b11be5
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SLL.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SLL.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x8000800080008000ULL, 0x8000800080008000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xfc00fc00fc00fc00ULL, 0xfc00fc00fc00fc00ULL, },
+        { 0xffe0ffe0ffe0ffe0ULL, 0xffe0ffe0ffe0ffe0ULL, },
+        { 0xf000f000f000f000ULL, 0xf000f000f000f000ULL, },
+        { 0xfff8fff8fff8fff8ULL, 0xfff8fff8fff8fff8ULL, },
+        { 0xc000fff8ff00c000ULL, 0xfff8ff00c000fff8ULL, },
+        { 0xfffef000ff80fffeULL, 0xf000ff80fffef000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xa800a800a800a800ULL, 0xa800a800a800a800ULL, },
+        { 0x5540554055405540ULL, 0x5540554055405540ULL, },
+        { 0xa000a000a000a000ULL, 0xa000a000a000a000ULL, },
+        { 0x5550555055505550ULL, 0x5550555055505550ULL, },
+        { 0x80005550aa008000ULL, 0x5550aa0080005550ULL, },
+        { 0x5554a00055005554ULL, 0xa00055005554a000ULL, },
+        { 0x8000800080008000ULL, 0x8000800080008000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5400540054005400ULL, 0x5400540054005400ULL, },
+        { 0xaaa0aaa0aaa0aaa0ULL, 0xaaa0aaa0aaa0aaa0ULL, },
+        { 0x5000500050005000ULL, 0x5000500050005000ULL, },
+        { 0xaaa8aaa8aaa8aaa8ULL, 0xaaa8aaa8aaa8aaa8ULL, },
+        { 0x4000aaa855004000ULL, 0xaaa855004000aaa8ULL, },
+        { 0xaaaa5000aa80aaaaULL, 0x5000aa80aaaa5000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3000300030003000ULL, 0x3000300030003000ULL, },
+        { 0x9980998099809980ULL, 0x9980998099809980ULL, },
+        { 0xc000c000c000c000ULL, 0xc000c000c000c000ULL, },
+        { 0x6660666066606660ULL, 0x6660666066606660ULL, },
+        { 0x00006660cc000000ULL, 0x6660cc0000006660ULL, },
+        { 0x9998c00066009998ULL, 0xc00066009998c000ULL, },
+        { 0x8000800080008000ULL, 0x8000800080008000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xcc00cc00cc00cc00ULL, 0xcc00cc00cc00cc00ULL, },
+        { 0x6660666066606660ULL, 0x6660666066606660ULL, },
+        { 0x3000300030003000ULL, 0x3000300030003000ULL, },
+        { 0x9998999899989998ULL, 0x9998999899989998ULL, },
+        { 0xc00099983300c000ULL, 0x99983300c0009998ULL, },
+        { 0x6666300099806666ULL, 0x3000998066663000ULL, },
+        { 0x0000800000000000ULL, 0x8000000000008000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38008c00e0003800ULL, 0x8c00e00038008c00ULL, },
+        { 0x71c01c60c70071c0ULL, 0x1c60c70071c01c60ULL, },
+        { 0xe00030008000e000ULL, 0x30008000e0003000ULL, },
+        { 0x1c70c71871c01c70ULL, 0xc71871c01c70c718ULL, },
+        { 0x8000c71838008000ULL, 0xc71838008000c718ULL, },
+        { 0xc71c30001c00c71cULL, 0x30001c00c71c3000ULL, },
+        { 0x8000000080008000ULL, 0x0000800080000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xc40070001c00c400ULL, 0x70001c00c4007000ULL, },
+        { 0x8e20e38038e08e20ULL, 0xe38038e08e20e380ULL, },
+        { 0x1000c00070001000ULL, 0xc00070001000c000ULL, },
+        { 0xe38838e08e38e388ULL, 0x38e08e38e38838e0ULL, },
+        { 0x400038e0c7004000ULL, 0x38e0c700400038e0ULL, },
+        { 0x38e2c000e38038e2ULL, 0xc000e38038e2c000ULL, },
+        { 0xa800c000a1885540ULL, 0xb3808000d800c000ULL, },    /*  64  */
+        { 0x8000366043104000ULL, 0xb38078008000c000ULL, },
+        { 0xa800300000005540ULL, 0x67000000d80000c0ULL, },
+        { 0x0000800050c40000ULL, 0x96ce5e00f9ecb00cULL, },
+        { 0xf8003000364cc708ULL, 0x7b808000f800c000ULL, },
+        { 0x800003186c980800ULL, 0x7b8068008000c000ULL, },
+        { 0xf8008c008000c708ULL, 0xf7000000f8002fc0ULL, },
+        { 0x000060009b260000ULL, 0x25ee1a0054fc52fcULL, },
+        { 0x6800a000e73c8b80ULL, 0xec00c00058004000ULL, },    /*  72  */
+        { 0x80007550ce788000ULL, 0xec00fc0080004000ULL, },
+        { 0x6800a80080008b80ULL, 0xd800800058005140ULL, },
+        { 0x00004000739e0000ULL, 0x4fb0ff00acac2514ULL, },
+        { 0x3c00d00078c4e24eULL, 0xf880000010000000ULL, },
+        { 0xc000b268f1884e00ULL, 0xf880600000000000ULL, },
+        { 0x3c0034008000e24eULL, 0xf100000010002a00ULL, },
+        { 0x8000a000bc628000ULL, 0x1be2d800a508e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SLL_H(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SLL_H(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_w.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_w.c
new file mode 100644
index 0000000000..4cafaab334
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_sll_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SLL.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SLL.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x8000000080000000ULL, 0x8000000080000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xfffffc00fffffc00ULL, 0xfffffc00fffffc00ULL, },
+        { 0xffe00000ffe00000ULL, 0xffe00000ffe00000ULL, },
+        { 0xfffff000fffff000ULL, 0xfffff000fffff000ULL, },
+        { 0xfff80000fff80000ULL, 0xfff80000fff80000ULL, },
+        { 0xfffffff8ffffc000ULL, 0xff000000fffffff8ULL, },
+        { 0xf0000000fffe0000ULL, 0xffffff80f0000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaa800aaaaa800ULL, 0xaaaaa800aaaaa800ULL, },
+        { 0x5540000055400000ULL, 0x5540000055400000ULL, },
+        { 0xaaaaa000aaaaa000ULL, 0xaaaaa000aaaaa000ULL, },
+        { 0x5550000055500000ULL, 0x5550000055500000ULL, },
+        { 0x55555550aaaa8000ULL, 0xaa00000055555550ULL, },
+        { 0xa000000055540000ULL, 0x55555500a0000000ULL, },
+        { 0x8000000080000000ULL, 0x8000000080000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555540055555400ULL, 0x5555540055555400ULL, },
+        { 0xaaa00000aaa00000ULL, 0xaaa00000aaa00000ULL, },
+        { 0x5555500055555000ULL, 0x5555500055555000ULL, },
+        { 0xaaa80000aaa80000ULL, 0xaaa80000aaa80000ULL, },
+        { 0xaaaaaaa855554000ULL, 0x55000000aaaaaaa8ULL, },
+        { 0x50000000aaaa0000ULL, 0xaaaaaa8050000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333300033333000ULL, 0x3333300033333000ULL, },
+        { 0x9980000099800000ULL, 0x9980000099800000ULL, },
+        { 0xccccc000ccccc000ULL, 0xccccc000ccccc000ULL, },
+        { 0x6660000066600000ULL, 0x6660000066600000ULL, },
+        { 0x6666666033330000ULL, 0xcc00000066666660ULL, },
+        { 0xc000000099980000ULL, 0x66666600c0000000ULL, },
+        { 0x8000000080000000ULL, 0x8000000080000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xcccccc00cccccc00ULL, 0xcccccc00cccccc00ULL, },
+        { 0x6660000066600000ULL, 0x6660000066600000ULL, },
+        { 0x3333300033333000ULL, 0x3333300033333000ULL, },
+        { 0x9998000099980000ULL, 0x9998000099980000ULL, },
+        { 0x99999998ccccc000ULL, 0x3300000099999998ULL, },
+        { 0x3000000066660000ULL, 0x9999998030000000ULL, },
+        { 0x8000000000000000ULL, 0x0000000080000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38c00e38e3800ULL, 0x8e38e00038e38c00ULL, },
+        { 0x1c60000071c00000ULL, 0xc70000001c600000ULL, },
+        { 0xe38e30008e38e000ULL, 0x38e38000e38e3000ULL, },
+        { 0xc71800001c700000ULL, 0x71c00000c7180000ULL, },
+        { 0x1c71c71838e38000ULL, 0x380000001c71c718ULL, },
+        { 0x30000000c71c0000ULL, 0x71c71c0030000000ULL, },
+        { 0x0000000080000000ULL, 0x8000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xc71c70001c71c400ULL, 0x71c71c00c71c7000ULL, },
+        { 0xe38000008e200000ULL, 0x38e00000e3800000ULL, },
+        { 0x1c71c00071c71000ULL, 0xc71c70001c71c000ULL, },
+        { 0x38e00000e3880000ULL, 0x8e38000038e00000ULL, },
+        { 0xe38e38e0c71c4000ULL, 0xc7000000e38e38e0ULL, },
+        { 0xc000000038e20000ULL, 0x8e38e380c0000000ULL, },
+        { 0xae6cc00028625540ULL, 0x80000000bb00c000ULL, },    /*  64  */
+        { 0x4357366062554000ULL, 0x78000000c0000000ULL, },
+        { 0xab9b300028625540ULL, 0x0000000000c00000ULL, },
+        { 0x5cd9800095500000ULL, 0x5e000000fe7bb00cULL, },
+        { 0xe00630004d93c708ULL, 0x80000000f52fc000ULL, },
+        { 0xddf0031893c70800ULL, 0x68000000c0000000ULL, },
+        { 0xf8018c004d93c708ULL, 0x000000002fc00000ULL, },
+        { 0xc00c6000f1c20000ULL, 0x1a000000153f52fcULL, },
+        { 0xaaeaa000b9cf8b80ULL, 0xc0000000b2514000ULL, },    /*  72  */
+        { 0x62d57550cf8b8000ULL, 0xfc00000040000000ULL, },
+        { 0x6abaa800b9cf8b80ULL, 0x8000000051400000ULL, },
+        { 0x55d54000e2e00000ULL, 0xff000000ab2b2514ULL, },
+        { 0xf164d0005e31e24eULL, 0x000000002e2a0000ULL, },
+        { 0x8278b26831e24e00ULL, 0x6000000000000000ULL, },
+        { 0x3c5934005e31e24eULL, 0x000000002a000000ULL, },
+        { 0xe2c9a00078938000ULL, 0xd8000000a942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SLL_W(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SLL_W(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_b.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_b.c
new file mode 100644
index 0000000000..b1432c2806
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRA.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRA.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xeaeaeaeaeaeaeaeaULL, 0xeaeaeaeaeaeaeaeaULL, },
+        { 0xfdfdfdfdfdfdfdfdULL, 0xfdfdfdfdfdfdfdfdULL, },
+        { 0xfafafafafafafafaULL, 0xfafafafafafafafaULL, },
+        { 0xf5f5f5f5f5f5f5f5ULL, 0xf5f5f5f5f5f5f5f5ULL, },
+        { 0xf5feaaf5feaaf5feULL, 0xaaf5feaaf5feaaf5ULL, },
+        { 0xfad5fffad5fffad5ULL, 0xfffad5fffad5fffaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x1515151515151515ULL, 0x1515151515151515ULL, },
+        { 0x0202020202020202ULL, 0x0202020202020202ULL, },
+        { 0x0505050505050505ULL, 0x0505050505050505ULL, },
+        { 0x0a0a0a0a0a0a0a0aULL, 0x0a0a0a0a0a0a0a0aULL, },
+        { 0x0a01550a01550a01ULL, 0x550a01550a01550aULL, },
+        { 0x052a00052a00052aULL, 0x00052a00052a0005ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xf3f3f3f3f3f3f3f3ULL, 0xf3f3f3f3f3f3f3f3ULL, },
+        { 0xfefefefefefefefeULL, 0xfefefefefefefefeULL, },
+        { 0xfcfcfcfcfcfcfcfcULL, 0xfcfcfcfcfcfcfcfcULL, },
+        { 0xf9f9f9f9f9f9f9f9ULL, 0xf9f9f9f9f9f9f9f9ULL, },
+        { 0xf9ffccf9ffccf9ffULL, 0xccf9ffccf9ffccf9ULL, },
+        { 0xfce6fffce6fffce6ULL, 0xfffce6fffce6fffcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0c0c0c0c0c0c0c0cULL, 0x0c0c0c0c0c0c0c0cULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
+        { 0x0303030303030303ULL, 0x0303030303030303ULL, },
+        { 0x0606060606060606ULL, 0x0606060606060606ULL, },
+        { 0x0600330600330600ULL, 0x3306003306003306ULL, },
+        { 0x0319000319000319ULL, 0x0003190003190003ULL, },
+        { 0xffff00ffff00ffffULL, 0x00ffff00ffff00ffULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xf8e30ef8e30ef8e3ULL, 0x0ef8e30ef8e30ef8ULL, },
+        { 0xfffc01fffc01fffcULL, 0x01fffc01fffc01ffULL, },
+        { 0xfef803fef803fef8ULL, 0x03fef803fef803feULL, },
+        { 0xfcf107fcf107fcf1ULL, 0x07fcf107fcf107fcULL, },
+        { 0xfcfe38fcfe38fcfeULL, 0x38fcfe38fcfe38fcULL, },
+        { 0xfec700fec700fec7ULL, 0x00fec700fec700feULL, },
+        { 0x0000ff0000ff0000ULL, 0xff0000ff0000ff00ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x071cf1071cf1071cULL, 0xf1071cf1071cf107ULL, },
+        { 0x0003fe0003fe0003ULL, 0xfe0003fe0003fe00ULL, },
+        { 0x0107fc0107fc0107ULL, 0xfc0107fc0107fc01ULL, },
+        { 0x030ef8030ef8030eULL, 0xf8030ef8030ef803ULL, },
+        { 0x0301c70301c70301ULL, 0xc70301c70301c703ULL, },
+        { 0x0138ff0138ff0138ULL, 0xff0138ff0138ff01ULL, },
+        { 0x881afffc28180240ULL, 0x09000101ff0fb000ULL, },    /*  64  */
+        { 0xf101e6f9010c0040ULL, 0x12000117ff00ec00ULL, },
+        { 0xf81afff314000a40ULL, 0x00670000ff0ffd00ULL, },
+        { 0x8800fffe00311501ULL, 0x02330b5eff1eec0cULL, },
+        { 0xfbef00064de4fe08ULL, 0x02fff700000752ffULL, },
+        { 0xfffe000c02f2ff08ULL, 0x04fff706000014ffULL, },
+        { 0xffef001826fff808ULL, 0x00f7fe00020702ffULL, },
+        { 0xfbff000301c9f100ULL, 0x00fbbb1a0a0f14fcULL, },
+        { 0xac16fefab9f3fc80ULL, 0x04fff8fffe052501ULL, },    /*  72  */
+        { 0xf501aef5fdf9ff80ULL, 0x09fff8fffd000901ULL, },
+        { 0xfa16feeadcfff180ULL, 0x00d8fffff5050101ULL, },
+        { 0xac00fefdfee7e2feULL, 0x01ecc6ffd50a0914ULL, },
+        { 0x701300045e0cff4eULL, 0xf1fff1fffe08e2faULL, },
+        { 0x0e0116090206ff4eULL, 0xe3fff1f6fd00f8faULL, },
+        { 0x071300132f00fc4eULL, 0xfff1fefff508fffaULL, },
+        { 0x700000020118f801ULL, 0xfcf888d8d410f8a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRA_B(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRA_B(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_d.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_d.c
new file mode 100644
index 0000000000..cb6c12dde1
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRA.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRA.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xffffffffffeaaaaaULL, 0xffffffffffeaaaaaULL, },
+        { 0xfffffd5555555555ULL, 0xfffffd5555555555ULL, },
+        { 0xfffaaaaaaaaaaaaaULL, 0xfffaaaaaaaaaaaaaULL, },
+        { 0xfffffffffffff555ULL, 0xfffffffffffff555ULL, },
+        { 0xfffeaaaaaaaaaaaaULL, 0xfffffffff5555555ULL, },
+        { 0xffffffffffffd555ULL, 0xfffffffaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000155555ULL, 0x0000000000155555ULL, },
+        { 0x000002aaaaaaaaaaULL, 0x000002aaaaaaaaaaULL, },
+        { 0x0005555555555555ULL, 0x0005555555555555ULL, },
+        { 0x0000000000000aaaULL, 0x0000000000000aaaULL, },
+        { 0x0001555555555555ULL, 0x000000000aaaaaaaULL, },
+        { 0x0000000000002aaaULL, 0x0000000555555555ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xfffffffffff33333ULL, 0xfffffffffff33333ULL, },
+        { 0xfffffe6666666666ULL, 0xfffffe6666666666ULL, },
+        { 0xfffcccccccccccccULL, 0xfffcccccccccccccULL, },
+        { 0xfffffffffffff999ULL, 0xfffffffffffff999ULL, },
+        { 0xffff333333333333ULL, 0xfffffffff9999999ULL, },
+        { 0xffffffffffffe666ULL, 0xfffffffcccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x00000000000cccccULL, 0x00000000000cccccULL, },
+        { 0x0000019999999999ULL, 0x0000019999999999ULL, },
+        { 0x0003333333333333ULL, 0x0003333333333333ULL, },
+        { 0x0000000000000666ULL, 0x0000000000000666ULL, },
+        { 0x0000ccccccccccccULL, 0x0000000006666666ULL, },
+        { 0x0000000000001999ULL, 0x0000000333333333ULL, },
+        { 0xffffffffffffffffULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xfffffffffff8e38eULL, 0x00000000000e38e3ULL, },
+        { 0xffffff1c71c71c71ULL, 0x000001c71c71c71cULL, },
+        { 0xfffe38e38e38e38eULL, 0x00038e38e38e38e3ULL, },
+        { 0xfffffffffffffc71ULL, 0x000000000000071cULL, },
+        { 0xffff8e38e38e38e3ULL, 0x00000000071c71c7ULL, },
+        { 0xfffffffffffff1c7ULL, 0x000000038e38e38eULL, },
+        { 0x0000000000000000ULL, 0xffffffffffffffffULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000071c71ULL, 0xfffffffffff1c71cULL, },
+        { 0x000000e38e38e38eULL, 0xfffffe38e38e38e3ULL, },
+        { 0x0001c71c71c71c71ULL, 0xfffc71c71c71c71cULL, },
+        { 0x000000000000038eULL, 0xfffffffffffff8e3ULL, },
+        { 0x000071c71c71c71cULL, 0xfffffffff8e38e38ULL, },
+        { 0x0000000000000e38ULL, 0xfffffffc71c71c71ULL, },
+        { 0x886ae6cc28625540ULL, 0x0004b670b5efe7bbULL, },    /*  64  */
+        { 0xff886ae6cc286255ULL, 0x0000000000000004ULL, },
+        { 0x886ae6cc28625540ULL, 0x000004b670b5efe7ULL, },
+        { 0xfffe21ab9b30a189ULL, 0x000000004b670b5eULL, },
+        { 0xfbbe00634d93c708ULL, 0x00012f7bb1a153f5ULL, },
+        { 0xfffbbe00634d93c7ULL, 0x0000000000000001ULL, },
+        { 0xfbbe00634d93c708ULL, 0x0000012f7bb1a153ULL, },
+        { 0xffffeef8018d364fULL, 0x0000000012f7bb1aULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x00027d8c6ffab2b2ULL, },    /*  72  */
+        { 0xffac5aaeaab9cf8bULL, 0x0000000000000002ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x0000027d8c6ffab2ULL, },
+        { 0xfffeb16abaaae73eULL, 0x0000000027d8c6ffULL, },
+        { 0x704f164d5e31e24eULL, 0xfff8df188d8a942eULL, },
+        { 0x00704f164d5e31e2ULL, 0xfffffffffffffff8ULL, },
+        { 0x704f164d5e31e24eULL, 0xfffff8df188d8a94ULL, },
+        { 0x0001c13c593578c7ULL, 0xffffffff8df188d8ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRA_D(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRA_D(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_h.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_h.c
new file mode 100644
index 0000000000..2e505089de
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRA.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRA.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xffeaffeaffeaffeaULL, 0xffeaffeaffeaffeaULL, },
+        { 0xfd55fd55fd55fd55ULL, 0xfd55fd55fd55fd55ULL, },
+        { 0xfffafffafffafffaULL, 0xfffafffafffafffaULL, },
+        { 0xf555f555f555f555ULL, 0xf555f555f555f555ULL, },
+        { 0xfffef555ffaafffeULL, 0xf555ffaafffef555ULL, },
+        { 0xd555fffaff55d555ULL, 0xfffaff55d555fffaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0015001500150015ULL, 0x0015001500150015ULL, },
+        { 0x02aa02aa02aa02aaULL, 0x02aa02aa02aa02aaULL, },
+        { 0x0005000500050005ULL, 0x0005000500050005ULL, },
+        { 0x0aaa0aaa0aaa0aaaULL, 0x0aaa0aaa0aaa0aaaULL, },
+        { 0x00010aaa00550001ULL, 0x0aaa005500010aaaULL, },
+        { 0x2aaa000500aa2aaaULL, 0x000500aa2aaa0005ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xfff3fff3fff3fff3ULL, 0xfff3fff3fff3fff3ULL, },
+        { 0xfe66fe66fe66fe66ULL, 0xfe66fe66fe66fe66ULL, },
+        { 0xfffcfffcfffcfffcULL, 0xfffcfffcfffcfffcULL, },
+        { 0xf999f999f999f999ULL, 0xf999f999f999f999ULL, },
+        { 0xfffff999ffccffffULL, 0xf999ffccfffff999ULL, },
+        { 0xe666fffcff99e666ULL, 0xfffcff99e666fffcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x000c000c000c000cULL, 0x000c000c000c000cULL, },
+        { 0x0199019901990199ULL, 0x0199019901990199ULL, },
+        { 0x0003000300030003ULL, 0x0003000300030003ULL, },
+        { 0x0666066606660666ULL, 0x0666066606660666ULL, },
+        { 0x0000066600330000ULL, 0x0666003300000666ULL, },
+        { 0x1999000300661999ULL, 0x0003006619990003ULL, },
+        { 0xffff0000ffffffffULL, 0x0000ffffffff0000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xfff8000effe3fff8ULL, 0x000effe3fff8000eULL, },
+        { 0xff1c01c7fc71ff1cULL, 0x01c7fc71ff1c01c7ULL, },
+        { 0xfffe0003fff8fffeULL, 0x0003fff8fffe0003ULL, },
+        { 0xfc71071cf1c7fc71ULL, 0x071cf1c7fc71071cULL, },
+        { 0xffff071cff8effffULL, 0x071cff8effff071cULL, },
+        { 0xf1c70003ff1cf1c7ULL, 0x0003ff1cf1c70003ULL, },
+        { 0x0000ffff00000000ULL, 0xffff00000000ffffULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0007fff1001c0007ULL, 0xfff1001c0007fff1ULL, },
+        { 0x00e3fe38038e00e3ULL, 0xfe38038e00e3fe38ULL, },
+        { 0x0001fffc00070001ULL, 0xfffc00070001fffcULL, },
+        { 0x038ef8e30e38038eULL, 0xf8e30e38038ef8e3ULL, },
+        { 0x0000f8e300710000ULL, 0xf8e300710000f8e3ULL, },
+        { 0x0e38fffc00e30e38ULL, 0xfffc00e30e38fffcULL, },
+        { 0xffe2fffe0a185540ULL, 0x00960000fffffffbULL, },    /*  64  */
+        { 0xfffefcd9050c0055ULL, 0x00960002fffffffbULL, },
+        { 0xffe2fff900005540ULL, 0x004b0000fffffb00ULL, },
+        { 0xffffffff14310001ULL, 0x25b3000bff9eb00cULL, },
+        { 0xfffe00001364c708ULL, 0x0025fffe00020005ULL, },
+        { 0xffff000c09b2ffc7ULL, 0x0025ffee00000005ULL, },
+        { 0xfffe00000000c708ULL, 0x0012ffff0002052fULL, },
+        { 0xffff000026c9ffffULL, 0x097bffbb054f52fcULL, },
+        { 0xffebfffaee738b80ULL, 0x004ffffffff50002ULL, },    /*  72  */
+        { 0xfffef5d5f739ff8bULL, 0x004ffff1ffff0002ULL, },
+        { 0xffebffebffff8b80ULL, 0x0027fffffff50251ULL, },
+        { 0xfffffffddce7fffeULL, 0x13ecffc6eaca2514ULL, },
+        { 0x001c0001178ce24eULL, 0xff1bfffefff5fffeULL, },
+        { 0x000102c90bc6ffe2ULL, 0xff1bffe2fffffffeULL, },
+        { 0x001c00050000e24eULL, 0xff8dfffffff5fe2aULL, },
+        { 0x000000002f18ffffULL, 0xc6f8ff88ea50e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRA_H(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRA_H(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_w.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_w.c
new file mode 100644
index 0000000000..f4ba1b5621
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_sra_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRA.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRA.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xffeaaaaaffeaaaaaULL, 0xffeaaaaaffeaaaaaULL, },
+        { 0xfffffd55fffffd55ULL, 0xfffffd55fffffd55ULL, },
+        { 0xfffaaaaafffaaaaaULL, 0xfffaaaaafffaaaaaULL, },
+        { 0xfffff555fffff555ULL, 0xfffff555fffff555ULL, },
+        { 0xf5555555fffeaaaaULL, 0xffffffaaf5555555ULL, },
+        { 0xfffffffaffffd555ULL, 0xff555555fffffffaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0015555500155555ULL, 0x0015555500155555ULL, },
+        { 0x000002aa000002aaULL, 0x000002aa000002aaULL, },
+        { 0x0005555500055555ULL, 0x0005555500055555ULL, },
+        { 0x00000aaa00000aaaULL, 0x00000aaa00000aaaULL, },
+        { 0x0aaaaaaa00015555ULL, 0x000000550aaaaaaaULL, },
+        { 0x0000000500002aaaULL, 0x00aaaaaa00000005ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xfff33333fff33333ULL, 0xfff33333fff33333ULL, },
+        { 0xfffffe66fffffe66ULL, 0xfffffe66fffffe66ULL, },
+        { 0xfffcccccfffcccccULL, 0xfffcccccfffcccccULL, },
+        { 0xfffff999fffff999ULL, 0xfffff999fffff999ULL, },
+        { 0xf9999999ffff3333ULL, 0xffffffccf9999999ULL, },
+        { 0xfffffffcffffe666ULL, 0xff999999fffffffcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x000ccccc000cccccULL, 0x000ccccc000cccccULL, },
+        { 0x0000019900000199ULL, 0x0000019900000199ULL, },
+        { 0x0003333300033333ULL, 0x0003333300033333ULL, },
+        { 0x0000066600000666ULL, 0x0000066600000666ULL, },
+        { 0x066666660000ccccULL, 0x0000003306666666ULL, },
+        { 0x0000000300001999ULL, 0x0066666600000003ULL, },
+        { 0xffffffffffffffffULL, 0x00000000ffffffffULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xfff8e38effe38e38ULL, 0x000e38e3fff8e38eULL, },
+        { 0xffffff1cfffffc71ULL, 0x000001c7ffffff1cULL, },
+        { 0xfffe38e3fff8e38eULL, 0x00038e38fffe38e3ULL, },
+        { 0xfffffc71fffff1c7ULL, 0x0000071cfffffc71ULL, },
+        { 0xfc71c71cfffe38e3ULL, 0x00000038fc71c71cULL, },
+        { 0xfffffffeffffc71cULL, 0x0071c71cfffffffeULL, },
+        { 0x0000000000000000ULL, 0xffffffff00000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x00071c71001c71c7ULL, 0xfff1c71c00071c71ULL, },
+        { 0x000000e30000038eULL, 0xfffffe38000000e3ULL, },
+        { 0x0001c71c00071c71ULL, 0xfffc71c70001c71cULL, },
+        { 0x0000038e00000e38ULL, 0xfffff8e30000038eULL, },
+        { 0x038e38e30001c71cULL, 0xffffffc7038e38e3ULL, },
+        { 0x00000001000038e3ULL, 0xff8e38e300000001ULL, },
+        { 0xfff886ae28625540ULL, 0x00000001ffffe7bbULL, },    /*  64  */
+        { 0xf10d5cd900286255ULL, 0x00000012ffffffffULL, },
+        { 0xffe21ab928625540ULL, 0x00000000ffffffe7ULL, },
+        { 0xfffc43570000a189ULL, 0x0000004bfe7bb00cULL, },
+        { 0xffffbbe04d93c708ULL, 0x00000000000153f5ULL, },
+        { 0xff77c00c004d93c7ULL, 0x0000000400000001ULL, },
+        { 0xfffeef804d93c708ULL, 0x0000000000000153ULL, },
+        { 0xffffddf00001364fULL, 0x00000012153f52fcULL, },
+        { 0xfffac5aab9cf8b80ULL, 0x00000000fffab2b2ULL, },    /*  72  */
+        { 0xf58b55d5ffb9cf8bULL, 0x00000009fffffffaULL, },
+        { 0xffeb16abb9cf8b80ULL, 0x00000000fffffab2ULL, },
+        { 0xfffd62d5fffee73eULL, 0x00000027ab2b2514ULL, },
+        { 0x000704f15e31e24eULL, 0xfffffffefffa942eULL, },
+        { 0x0e09e2c9005e31e2ULL, 0xffffffe3fffffffaULL, },
+        { 0x001c13c55e31e24eULL, 0xfffffffffffffa94ULL, },
+        { 0x00038278000178c7ULL, 0xffffff8da942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRA_W(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRA_W(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_b.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_b.c
new file mode 100644
index 0000000000..73e60314e5
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRAR.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRAR.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000ff0000ff0000ULL, 0xff0000ff0000ff00ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xebebebebebebebebULL, 0xebebebebebebebebULL, },
+        { 0xfdfdfdfdfdfdfdfdULL, 0xfdfdfdfdfdfdfdfdULL, },
+        { 0xfbfbfbfbfbfbfbfbULL, 0xfbfbfbfbfbfbfbfbULL, },
+        { 0xf5f5f5f5f5f5f5f5ULL, 0xf5f5f5f5f5f5f5f5ULL, },
+        { 0xf5ffaaf5ffaaf5ffULL, 0xaaf5ffaaf5ffaaf5ULL, },
+        { 0xfbd5fffbd5fffbd5ULL, 0xfffbd5fffbd5fffbULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x1515151515151515ULL, 0x1515151515151515ULL, },
+        { 0x0303030303030303ULL, 0x0303030303030303ULL, },
+        { 0x0505050505050505ULL, 0x0505050505050505ULL, },
+        { 0x0b0b0b0b0b0b0b0bULL, 0x0b0b0b0b0b0b0b0bULL, },
+        { 0x0b01550b01550b01ULL, 0x550b01550b01550bULL, },
+        { 0x052b01052b01052bULL, 0x01052b01052b0105ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xf3f3f3f3f3f3f3f3ULL, 0xf3f3f3f3f3f3f3f3ULL, },
+        { 0xfefefefefefefefeULL, 0xfefefefefefefefeULL, },
+        { 0xfdfdfdfdfdfdfdfdULL, 0xfdfdfdfdfdfdfdfdULL, },
+        { 0xfafafafafafafafaULL, 0xfafafafafafafafaULL, },
+        { 0xfaffccfaffccfaffULL, 0xccfaffccfaffccfaULL, },
+        { 0xfde600fde600fde6ULL, 0x00fde600fde600fdULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0d0d0d0d0d0d0d0dULL, 0x0d0d0d0d0d0d0d0dULL, },
+        { 0x0202020202020202ULL, 0x0202020202020202ULL, },
+        { 0x0303030303030303ULL, 0x0303030303030303ULL, },
+        { 0x0606060606060606ULL, 0x0606060606060606ULL, },
+        { 0x0601330601330601ULL, 0x3306013306013306ULL, },
+        { 0x031a00031a00031aULL, 0x00031a00031a0003ULL, },
+        { 0x00ff0000ff0000ffULL, 0x0000ff0000ff0000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xf9e40ef9e40ef9e4ULL, 0x0ef9e40ef9e40ef9ULL, },
+        { 0xfffc02fffc02fffcULL, 0x02fffc02fffc02ffULL, },
+        { 0xfef904fef904fef9ULL, 0x04fef904fef904feULL, },
+        { 0xfcf207fcf207fcf2ULL, 0x07fcf207fcf207fcULL, },
+        { 0xfcfe38fcfe38fcfeULL, 0x38fcfe38fcfe38fcULL, },
+        { 0xfec700fec700fec7ULL, 0x00fec700fec700feULL, },
+        { 0x0001000001000001ULL, 0x0000010000010000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x071cf2071cf2071cULL, 0xf2071cf2071cf207ULL, },
+        { 0x0104fe0104fe0104ULL, 0xfe0104fe0104fe01ULL, },
+        { 0x0207fc0207fc0207ULL, 0xfc0207fc0207fc02ULL, },
+        { 0x040ef9040ef9040eULL, 0xf9040ef9040ef904ULL, },
+        { 0x0402c70402c70402ULL, 0xc70402c70402c704ULL, },
+        { 0x0239000239000239ULL, 0x0002390002390002ULL, },
+        { 0x881b00fd28190340ULL, 0x09010101000fb001ULL, },    /*  64  */
+        { 0xf102e6fa010c0140ULL, 0x130101180001ec01ULL, },
+        { 0xf91b00f314010b40ULL, 0x01670001000ffe01ULL, },
+        { 0x880100fe01311501ULL, 0x02340b5eff1fec0cULL, },
+        { 0xfbf000064de5fe08ULL, 0x0200f70000085200ULL, },
+        { 0xffff000c02f20008ULL, 0x0500f70701001500ULL, },
+        { 0x00f0001927fff908ULL, 0x00f7ff0003080300ULL, },
+        { 0xfbff000301caf200ULL, 0x01fcbb1a0b1015fcULL, },
+        { 0xac17fffbb9f4fc80ULL, 0x0500f900ff052501ULL, },    /*  72  */
+        { 0xf601aef5fefaff80ULL, 0x0a00f900fd000901ULL, },
+        { 0xfb17ffebdd00f180ULL, 0x00d8ff00f5050101ULL, },
+        { 0xac01fffdffe8e3feULL, 0x01ecc6ffd60b0914ULL, },
+        { 0x701400055e0cff4eULL, 0xf200f1ffff08e2faULL, },
+        { 0x0e01160a0306004eULL, 0xe300f1f6fd01f9faULL, },
+        { 0x071400132f00fc4eULL, 0xfff1fe00f508fffaULL, },
+        { 0x700100020119f901ULL, 0xfcf988d8d511f9a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRAR_B(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRAR_B(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_d.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_d.c
new file mode 100644
index 0000000000..b784897090
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRAR.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRAR.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xffffffffffeaaaabULL, 0xffffffffffeaaaabULL, },
+        { 0xfffffd5555555555ULL, 0xfffffd5555555555ULL, },
+        { 0xfffaaaaaaaaaaaabULL, 0xfffaaaaaaaaaaaabULL, },
+        { 0xfffffffffffff555ULL, 0xfffffffffffff555ULL, },
+        { 0xfffeaaaaaaaaaaabULL, 0xfffffffff5555555ULL, },
+        { 0xffffffffffffd555ULL, 0xfffffffaaaaaaaabULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000155555ULL, 0x0000000000155555ULL, },
+        { 0x000002aaaaaaaaabULL, 0x000002aaaaaaaaabULL, },
+        { 0x0005555555555555ULL, 0x0005555555555555ULL, },
+        { 0x0000000000000aabULL, 0x0000000000000aabULL, },
+        { 0x0001555555555555ULL, 0x000000000aaaaaabULL, },
+        { 0x0000000000002aabULL, 0x0000000555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xfffffffffff33333ULL, 0xfffffffffff33333ULL, },
+        { 0xfffffe6666666666ULL, 0xfffffe6666666666ULL, },
+        { 0xfffccccccccccccdULL, 0xfffccccccccccccdULL, },
+        { 0xfffffffffffff99aULL, 0xfffffffffffff99aULL, },
+        { 0xffff333333333333ULL, 0xfffffffff999999aULL, },
+        { 0xffffffffffffe666ULL, 0xfffffffccccccccdULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x00000000000ccccdULL, 0x00000000000ccccdULL, },
+        { 0x000001999999999aULL, 0x000001999999999aULL, },
+        { 0x0003333333333333ULL, 0x0003333333333333ULL, },
+        { 0x0000000000000666ULL, 0x0000000000000666ULL, },
+        { 0x0000cccccccccccdULL, 0x0000000006666666ULL, },
+        { 0x000000000000199aULL, 0x0000000333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xfffffffffff8e38eULL, 0x00000000000e38e4ULL, },
+        { 0xffffff1c71c71c72ULL, 0x000001c71c71c71cULL, },
+        { 0xfffe38e38e38e38eULL, 0x00038e38e38e38e4ULL, },
+        { 0xfffffffffffffc72ULL, 0x000000000000071cULL, },
+        { 0xffff8e38e38e38e4ULL, 0x00000000071c71c7ULL, },
+        { 0xfffffffffffff1c7ULL, 0x000000038e38e38eULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000071c72ULL, 0xfffffffffff1c71cULL, },
+        { 0x000000e38e38e38eULL, 0xfffffe38e38e38e4ULL, },
+        { 0x0001c71c71c71c72ULL, 0xfffc71c71c71c71cULL, },
+        { 0x000000000000038eULL, 0xfffffffffffff8e4ULL, },
+        { 0x000071c71c71c71cULL, 0xfffffffff8e38e39ULL, },
+        { 0x0000000000000e39ULL, 0xfffffffc71c71c72ULL, },
+        { 0x886ae6cc28625540ULL, 0x0004b670b5efe7bbULL, },    /*  64  */
+        { 0xff886ae6cc286255ULL, 0x0000000000000005ULL, },
+        { 0x886ae6cc28625540ULL, 0x000004b670b5efe8ULL, },
+        { 0xfffe21ab9b30a189ULL, 0x000000004b670b5fULL, },
+        { 0xfbbe00634d93c708ULL, 0x00012f7bb1a153f5ULL, },
+        { 0xfffbbe00634d93c7ULL, 0x0000000000000001ULL, },
+        { 0xfbbe00634d93c708ULL, 0x0000012f7bb1a154ULL, },
+        { 0xffffeef8018d364fULL, 0x0000000012f7bb1aULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x00027d8c6ffab2b2ULL, },    /*  72  */
+        { 0xffac5aaeaab9cf8cULL, 0x0000000000000002ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x0000027d8c6ffab3ULL, },
+        { 0xfffeb16abaaae73eULL, 0x0000000027d8c700ULL, },
+        { 0x704f164d5e31e24eULL, 0xfff8df188d8a942eULL, },
+        { 0x00704f164d5e31e2ULL, 0xfffffffffffffff9ULL, },
+        { 0x704f164d5e31e24eULL, 0xfffff8df188d8a94ULL, },
+        { 0x0001c13c593578c8ULL, 0xffffffff8df188d9ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRAR_D(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRAR_D(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_h.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_h.c
new file mode 100644
index 0000000000..476d2e1004
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRAR.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRAR.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xffebffebffebffebULL, 0xffebffebffebffebULL, },
+        { 0xfd55fd55fd55fd55ULL, 0xfd55fd55fd55fd55ULL, },
+        { 0xfffbfffbfffbfffbULL, 0xfffbfffbfffbfffbULL, },
+        { 0xf555f555f555f555ULL, 0xf555f555f555f555ULL, },
+        { 0xfffff555ffabffffULL, 0xf555ffabfffff555ULL, },
+        { 0xd555fffbff55d555ULL, 0xfffbff55d555fffbULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0015001500150015ULL, 0x0015001500150015ULL, },
+        { 0x02ab02ab02ab02abULL, 0x02ab02ab02ab02abULL, },
+        { 0x0005000500050005ULL, 0x0005000500050005ULL, },
+        { 0x0aab0aab0aab0aabULL, 0x0aab0aab0aab0aabULL, },
+        { 0x00010aab00550001ULL, 0x0aab005500010aabULL, },
+        { 0x2aab000500ab2aabULL, 0x000500ab2aab0005ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xfff3fff3fff3fff3ULL, 0xfff3fff3fff3fff3ULL, },
+        { 0xfe66fe66fe66fe66ULL, 0xfe66fe66fe66fe66ULL, },
+        { 0xfffdfffdfffdfffdULL, 0xfffdfffdfffdfffdULL, },
+        { 0xf99af99af99af99aULL, 0xf99af99af99af99aULL, },
+        { 0xfffff99affcdffffULL, 0xf99affcdfffff99aULL, },
+        { 0xe666fffdff9ae666ULL, 0xfffdff9ae666fffdULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x000d000d000d000dULL, 0x000d000d000d000dULL, },
+        { 0x019a019a019a019aULL, 0x019a019a019a019aULL, },
+        { 0x0003000300030003ULL, 0x0003000300030003ULL, },
+        { 0x0666066606660666ULL, 0x0666066606660666ULL, },
+        { 0x0001066600330001ULL, 0x0666003300010666ULL, },
+        { 0x199a00030066199aULL, 0x00030066199a0003ULL, },
+        { 0x00000000ffff0000ULL, 0x0000ffff00000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xfff9000effe4fff9ULL, 0x000effe4fff9000eULL, },
+        { 0xff1c01c7fc72ff1cULL, 0x01c7fc72ff1c01c7ULL, },
+        { 0xfffe0004fff9fffeULL, 0x0004fff9fffe0004ULL, },
+        { 0xfc72071cf1c7fc72ULL, 0x071cf1c7fc72071cULL, },
+        { 0x0000071cff8e0000ULL, 0x071cff8e0000071cULL, },
+        { 0xf1c70004ff1cf1c7ULL, 0x0004ff1cf1c70004ULL, },
+        { 0x0000000000010000ULL, 0x0000000100000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0007fff2001c0007ULL, 0xfff2001c0007fff2ULL, },
+        { 0x00e4fe39038e00e4ULL, 0xfe39038e00e4fe39ULL, },
+        { 0x0002fffc00070002ULL, 0xfffc00070002fffcULL, },
+        { 0x038ef8e40e39038eULL, 0xf8e40e39038ef8e4ULL, },
+        { 0x0000f8e400720000ULL, 0xf8e400720000f8e4ULL, },
+        { 0x0e39fffc00e40e39ULL, 0xfffc00e40e39fffcULL, },
+        { 0xffe2fffe0a195540ULL, 0x009700000000fffbULL, },    /*  64  */
+        { 0xfffefcda050c0055ULL, 0x009700030000fffbULL, },
+        { 0xffe2fffa00005540ULL, 0x004b00000000fb01ULL, },
+        { 0xffffffff14310001ULL, 0x25b4000bff9fb00cULL, },
+        { 0xffff00001365c708ULL, 0x0026ffff00030005ULL, },
+        { 0x0000000c09b2ffc7ULL, 0x0026ffef00000005ULL, },
+        { 0xffff00000001c708ULL, 0x0013ffff00030530ULL, },
+        { 0x0000000026caffffULL, 0x097cffbb055052fcULL, },
+        { 0xffebfffbee748b80ULL, 0x0050fffffff50002ULL, },    /*  72  */
+        { 0xfffff5d5f73aff8cULL, 0x0050fff2ffff0002ULL, },
+        { 0xffebffecffff8b80ULL, 0x00280000fff50251ULL, },
+        { 0xfffffffddce8fffeULL, 0x13ecffc7eacb2514ULL, },
+        { 0x001c0001178ce24eULL, 0xff1cfffefff5fffeULL, },
+        { 0x000202ca0bc6ffe2ULL, 0xff1cffe2fffffffeULL, },
+        { 0x001c00060001e24eULL, 0xff8efffffff5fe2aULL, },
+        { 0x000100012f190000ULL, 0xc6f9ff89ea51e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRAR_H(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRAR_H(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_w.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_w.c
new file mode 100644
index 0000000000..87d717325b
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srar_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRAR.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRAR.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xffeaaaabffeaaaabULL, 0xffeaaaabffeaaaabULL, },
+        { 0xfffffd55fffffd55ULL, 0xfffffd55fffffd55ULL, },
+        { 0xfffaaaabfffaaaabULL, 0xfffaaaabfffaaaabULL, },
+        { 0xfffff555fffff555ULL, 0xfffff555fffff555ULL, },
+        { 0xf5555555fffeaaabULL, 0xffffffabf5555555ULL, },
+        { 0xfffffffbffffd555ULL, 0xff555555fffffffbULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0015555500155555ULL, 0x0015555500155555ULL, },
+        { 0x000002ab000002abULL, 0x000002ab000002abULL, },
+        { 0x0005555500055555ULL, 0x0005555500055555ULL, },
+        { 0x00000aab00000aabULL, 0x00000aab00000aabULL, },
+        { 0x0aaaaaab00015555ULL, 0x000000550aaaaaabULL, },
+        { 0x0000000500002aabULL, 0x00aaaaab00000005ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xfff33333fff33333ULL, 0xfff33333fff33333ULL, },
+        { 0xfffffe66fffffe66ULL, 0xfffffe66fffffe66ULL, },
+        { 0xfffccccdfffccccdULL, 0xfffccccdfffccccdULL, },
+        { 0xfffff99afffff99aULL, 0xfffff99afffff99aULL, },
+        { 0xf999999affff3333ULL, 0xffffffcdf999999aULL, },
+        { 0xfffffffdffffe666ULL, 0xff99999afffffffdULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x000ccccd000ccccdULL, 0x000ccccd000ccccdULL, },
+        { 0x0000019a0000019aULL, 0x0000019a0000019aULL, },
+        { 0x0003333300033333ULL, 0x0003333300033333ULL, },
+        { 0x0000066600000666ULL, 0x0000066600000666ULL, },
+        { 0x066666660000cccdULL, 0x0000003306666666ULL, },
+        { 0x000000030000199aULL, 0x0066666600000003ULL, },
+        { 0x00000000ffffffffULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xfff8e38effe38e39ULL, 0x000e38e4fff8e38eULL, },
+        { 0xffffff1cfffffc72ULL, 0x000001c7ffffff1cULL, },
+        { 0xfffe38e4fff8e38eULL, 0x00038e39fffe38e4ULL, },
+        { 0xfffffc72fffff1c7ULL, 0x0000071cfffffc72ULL, },
+        { 0xfc71c71cfffe38e4ULL, 0x00000039fc71c71cULL, },
+        { 0xfffffffeffffc71cULL, 0x0071c71cfffffffeULL, },
+        { 0x0000000000000001ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x00071c72001c71c7ULL, 0xfff1c71c00071c72ULL, },
+        { 0x000000e40000038eULL, 0xfffffe39000000e4ULL, },
+        { 0x0001c71c00071c72ULL, 0xfffc71c70001c71cULL, },
+        { 0x0000038e00000e39ULL, 0xfffff8e40000038eULL, },
+        { 0x038e38e40001c71cULL, 0xffffffc7038e38e4ULL, },
+        { 0x00000002000038e4ULL, 0xff8e38e400000002ULL, },
+        { 0xfff886ae28625540ULL, 0x00000001ffffe7bbULL, },    /*  64  */
+        { 0xf10d5cda00286255ULL, 0x0000001300000000ULL, },
+        { 0xffe21aba28625540ULL, 0x00000001ffffffe8ULL, },
+        { 0xfffc43570000a189ULL, 0x0000004bfe7bb00cULL, },
+        { 0xffffbbe04d93c708ULL, 0x00000000000153f5ULL, },
+        { 0xff77c00c004d93c7ULL, 0x0000000500000001ULL, },
+        { 0xfffeef804d93c708ULL, 0x0000000000000154ULL, },
+        { 0xffffddf00001364fULL, 0x00000013153f52fcULL, },
+        { 0xfffac5abb9cf8b80ULL, 0x00000001fffab2b2ULL, },    /*  72  */
+        { 0xf58b55d5ffb9cf8cULL, 0x0000000afffffffbULL, },
+        { 0xffeb16acb9cf8b80ULL, 0x00000000fffffab3ULL, },
+        { 0xfffd62d5fffee73eULL, 0x00000028ab2b2514ULL, },
+        { 0x000704f15e31e24eULL, 0xfffffffefffa942eULL, },
+        { 0x0e09e2ca005e31e2ULL, 0xffffffe3fffffffbULL, },
+        { 0x001c13c65e31e24eULL, 0xfffffffffffffa94ULL, },
+        { 0x00038279000178c8ULL, 0xffffff8ea942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRAR_W(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRAR_W(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_b.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_b.c
new file mode 100644
index 0000000000..e0d460bc00
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRL.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRL.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x3f3f3f3f3f3f3f3fULL, 0x3f3f3f3f3f3f3f3fULL, },
+        { 0x0707070707070707ULL, 0x0707070707070707ULL, },
+        { 0x0f0f0f0f0f0f0f0fULL, 0x0f0f0f0f0f0f0f0fULL, },
+        { 0x1f1f1f1f1f1f1f1fULL, 0x1f1f1f1f1f1f1f1fULL, },
+        { 0x1f03ff1f03ff1f03ULL, 0xff1f03ff1f03ff1fULL, },
+        { 0x0f7f010f7f010f7fULL, 0x010f7f010f7f010fULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x2a2a2a2a2a2a2a2aULL, 0x2a2a2a2a2a2a2a2aULL, },
+        { 0x0505050505050505ULL, 0x0505050505050505ULL, },
+        { 0x0a0a0a0a0a0a0a0aULL, 0x0a0a0a0a0a0a0a0aULL, },
+        { 0x1515151515151515ULL, 0x1515151515151515ULL, },
+        { 0x1502aa1502aa1502ULL, 0xaa1502aa1502aa15ULL, },
+        { 0x0a55010a55010a55ULL, 0x010a55010a55010aULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x1515151515151515ULL, 0x1515151515151515ULL, },
+        { 0x0202020202020202ULL, 0x0202020202020202ULL, },
+        { 0x0505050505050505ULL, 0x0505050505050505ULL, },
+        { 0x0a0a0a0a0a0a0a0aULL, 0x0a0a0a0a0a0a0a0aULL, },
+        { 0x0a01550a01550a01ULL, 0x550a01550a01550aULL, },
+        { 0x052a00052a00052aULL, 0x00052a00052a0005ULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0606060606060606ULL, 0x0606060606060606ULL, },
+        { 0x0c0c0c0c0c0c0c0cULL, 0x0c0c0c0c0c0c0c0cULL, },
+        { 0x1919191919191919ULL, 0x1919191919191919ULL, },
+        { 0x1903cc1903cc1903ULL, 0xcc1903cc1903cc19ULL, },
+        { 0x0c66010c66010c66ULL, 0x010c66010c66010cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0c0c0c0c0c0c0c0cULL, 0x0c0c0c0c0c0c0c0cULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
+        { 0x0303030303030303ULL, 0x0303030303030303ULL, },
+        { 0x0606060606060606ULL, 0x0606060606060606ULL, },
+        { 0x0600330600330600ULL, 0x3306003306003306ULL, },
+        { 0x0319000319000319ULL, 0x0003190003190003ULL, },
+        { 0x0101000101000101ULL, 0x0001010001010001ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38230e38230e3823ULL, 0x0e38230e38230e38ULL, },
+        { 0x0704010704010704ULL, 0x0107040107040107ULL, },
+        { 0x0e08030e08030e08ULL, 0x030e08030e08030eULL, },
+        { 0x1c11071c11071c11ULL, 0x071c11071c11071cULL, },
+        { 0x1c02381c02381c02ULL, 0x381c02381c02381cULL, },
+        { 0x0e47000e47000e47ULL, 0x000e47000e47000eULL, },
+        { 0x0000010000010000ULL, 0x0100000100000100ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x071c31071c31071cULL, 0x31071c31071c3107ULL, },
+        { 0x0003060003060003ULL, 0x0600030600030600ULL, },
+        { 0x01070c01070c0107ULL, 0x0c01070c01070c01ULL, },
+        { 0x030e18030e18030eULL, 0x18030e18030e1803ULL, },
+        { 0x0301c70301c70301ULL, 0xc70301c70301c703ULL, },
+        { 0x0138010138010138ULL, 0x0101380101380101ULL, },
+        { 0x881a030c28180240ULL, 0x09000101030fb000ULL, },    /*  64  */
+        { 0x1101e619010c0040ULL, 0x1200011707002c00ULL, },
+        { 0x081a033314000a40ULL, 0x006700001f0f0500ULL, },
+        { 0x8800030600311501ULL, 0x02330b5e7f1e2c0cULL, },
+        { 0xfb2f00064d240608ULL, 0x020117000007520fULL, },
+        { 0x1f02000c02120108ULL, 0x040117060000140fULL, },
+        { 0x0f2f001826011808ULL, 0x00f702000207020fULL, },
+        { 0xfb01000301493100ULL, 0x007bbb1a0a0f14fcULL, },
+        { 0xac16020ab9330480ULL, 0x0401180302052501ULL, },    /*  72  */
+        { 0x1501ae1505190180ULL, 0x0901183f05000901ULL, },
+        { 0x0a16022a5c011180ULL, 0x00d8030115050101ULL, },
+        { 0xac00020502672202ULL, 0x016cc6ff550a0914ULL, },
+        { 0x701300045e0c074eULL, 0x110111030208e20aULL, },
+        { 0x0e0116090206014eULL, 0x230111360500380aULL, },
+        { 0x071300132f001c4eULL, 0x01f102011508070aULL, },
+        { 0x7000000201183801ULL, 0x047888d8541038a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRL_B(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRL_B(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_d.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_d.c
new file mode 100644
index 0000000000..e4b88a3cc4
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRL.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRL.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x00000000003fffffULL, 0x00000000003fffffULL, },
+        { 0x000007ffffffffffULL, 0x000007ffffffffffULL, },
+        { 0x000fffffffffffffULL, 0x000fffffffffffffULL, },
+        { 0x0000000000001fffULL, 0x0000000000001fffULL, },
+        { 0x0003ffffffffffffULL, 0x000000001fffffffULL, },
+        { 0x0000000000007fffULL, 0x0000000fffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x00000000002aaaaaULL, 0x00000000002aaaaaULL, },
+        { 0x0000055555555555ULL, 0x0000055555555555ULL, },
+        { 0x000aaaaaaaaaaaaaULL, 0x000aaaaaaaaaaaaaULL, },
+        { 0x0000000000001555ULL, 0x0000000000001555ULL, },
+        { 0x0002aaaaaaaaaaaaULL, 0x0000000015555555ULL, },
+        { 0x0000000000005555ULL, 0x0000000aaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000155555ULL, 0x0000000000155555ULL, },
+        { 0x000002aaaaaaaaaaULL, 0x000002aaaaaaaaaaULL, },
+        { 0x0005555555555555ULL, 0x0005555555555555ULL, },
+        { 0x0000000000000aaaULL, 0x0000000000000aaaULL, },
+        { 0x0001555555555555ULL, 0x000000000aaaaaaaULL, },
+        { 0x0000000000002aaaULL, 0x0000000555555555ULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000333333ULL, 0x0000000000333333ULL, },
+        { 0x0000066666666666ULL, 0x0000066666666666ULL, },
+        { 0x000cccccccccccccULL, 0x000cccccccccccccULL, },
+        { 0x0000000000001999ULL, 0x0000000000001999ULL, },
+        { 0x0003333333333333ULL, 0x0000000019999999ULL, },
+        { 0x0000000000006666ULL, 0x0000000cccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x00000000000cccccULL, 0x00000000000cccccULL, },
+        { 0x0000019999999999ULL, 0x0000019999999999ULL, },
+        { 0x0003333333333333ULL, 0x0003333333333333ULL, },
+        { 0x0000000000000666ULL, 0x0000000000000666ULL, },
+        { 0x0000ccccccccccccULL, 0x0000000006666666ULL, },
+        { 0x0000000000001999ULL, 0x0000000333333333ULL, },
+        { 0x0000000000000001ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x000000000038e38eULL, 0x00000000000e38e3ULL, },
+        { 0x0000071c71c71c71ULL, 0x000001c71c71c71cULL, },
+        { 0x000e38e38e38e38eULL, 0x00038e38e38e38e3ULL, },
+        { 0x0000000000001c71ULL, 0x000000000000071cULL, },
+        { 0x00038e38e38e38e3ULL, 0x00000000071c71c7ULL, },
+        { 0x00000000000071c7ULL, 0x000000038e38e38eULL, },
+        { 0x0000000000000000ULL, 0x0000000000000001ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000071c71ULL, 0x000000000031c71cULL, },
+        { 0x000000e38e38e38eULL, 0x00000638e38e38e3ULL, },
+        { 0x0001c71c71c71c71ULL, 0x000c71c71c71c71cULL, },
+        { 0x000000000000038eULL, 0x00000000000018e3ULL, },
+        { 0x000071c71c71c71cULL, 0x0000000018e38e38ULL, },
+        { 0x0000000000000e38ULL, 0x0000000c71c71c71ULL, },
+        { 0x886ae6cc28625540ULL, 0x0004b670b5efe7bbULL, },    /*  64  */
+        { 0x00886ae6cc286255ULL, 0x0000000000000004ULL, },
+        { 0x886ae6cc28625540ULL, 0x000004b670b5efe7ULL, },
+        { 0x000221ab9b30a189ULL, 0x000000004b670b5eULL, },
+        { 0xfbbe00634d93c708ULL, 0x00012f7bb1a153f5ULL, },
+        { 0x00fbbe00634d93c7ULL, 0x0000000000000001ULL, },
+        { 0xfbbe00634d93c708ULL, 0x0000012f7bb1a153ULL, },
+        { 0x0003eef8018d364fULL, 0x0000000012f7bb1aULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x00027d8c6ffab2b2ULL, },    /*  72  */
+        { 0x00ac5aaeaab9cf8bULL, 0x0000000000000002ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x0000027d8c6ffab2ULL, },
+        { 0x0002b16abaaae73eULL, 0x0000000027d8c6ffULL, },
+        { 0x704f164d5e31e24eULL, 0x0008df188d8a942eULL, },
+        { 0x00704f164d5e31e2ULL, 0x0000000000000008ULL, },
+        { 0x704f164d5e31e24eULL, 0x000008df188d8a94ULL, },
+        { 0x0001c13c593578c7ULL, 0x000000008df188d8ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRL_D(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRL_D(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_h.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_h.c
new file mode 100644
index 0000000000..1eb64403c2
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRL.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRL.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x003f003f003f003fULL, 0x003f003f003f003fULL, },
+        { 0x07ff07ff07ff07ffULL, 0x07ff07ff07ff07ffULL, },
+        { 0x000f000f000f000fULL, 0x000f000f000f000fULL, },
+        { 0x1fff1fff1fff1fffULL, 0x1fff1fff1fff1fffULL, },
+        { 0x00031fff00ff0003ULL, 0x1fff00ff00031fffULL, },
+        { 0x7fff000f01ff7fffULL, 0x000f01ff7fff000fULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x002a002a002a002aULL, 0x002a002a002a002aULL, },
+        { 0x0555055505550555ULL, 0x0555055505550555ULL, },
+        { 0x000a000a000a000aULL, 0x000a000a000a000aULL, },
+        { 0x1555155515551555ULL, 0x1555155515551555ULL, },
+        { 0x0002155500aa0002ULL, 0x155500aa00021555ULL, },
+        { 0x5555000a01555555ULL, 0x000a01555555000aULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0015001500150015ULL, 0x0015001500150015ULL, },
+        { 0x02aa02aa02aa02aaULL, 0x02aa02aa02aa02aaULL, },
+        { 0x0005000500050005ULL, 0x0005000500050005ULL, },
+        { 0x0aaa0aaa0aaa0aaaULL, 0x0aaa0aaa0aaa0aaaULL, },
+        { 0x00010aaa00550001ULL, 0x0aaa005500010aaaULL, },
+        { 0x2aaa000500aa2aaaULL, 0x000500aa2aaa0005ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0033003300330033ULL, 0x0033003300330033ULL, },
+        { 0x0666066606660666ULL, 0x0666066606660666ULL, },
+        { 0x000c000c000c000cULL, 0x000c000c000c000cULL, },
+        { 0x1999199919991999ULL, 0x1999199919991999ULL, },
+        { 0x0003199900cc0003ULL, 0x199900cc00031999ULL, },
+        { 0x6666000c01996666ULL, 0x000c01996666000cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x000c000c000c000cULL, 0x000c000c000c000cULL, },
+        { 0x0199019901990199ULL, 0x0199019901990199ULL, },
+        { 0x0003000300030003ULL, 0x0003000300030003ULL, },
+        { 0x0666066606660666ULL, 0x0666066606660666ULL, },
+        { 0x0000066600330000ULL, 0x0666003300000666ULL, },
+        { 0x1999000300661999ULL, 0x0003006619990003ULL, },
+        { 0x0001000000010001ULL, 0x0000000100010000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x0038000e00230038ULL, 0x000e00230038000eULL, },
+        { 0x071c01c70471071cULL, 0x01c70471071c01c7ULL, },
+        { 0x000e00030008000eULL, 0x00030008000e0003ULL, },
+        { 0x1c71071c11c71c71ULL, 0x071c11c71c71071cULL, },
+        { 0x0003071c008e0003ULL, 0x071c008e0003071cULL, },
+        { 0x71c70003011c71c7ULL, 0x0003011c71c70003ULL, },
+        { 0x0000000100000000ULL, 0x0001000000000001ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x00070031001c0007ULL, 0x0031001c00070031ULL, },
+        { 0x00e30638038e00e3ULL, 0x0638038e00e30638ULL, },
+        { 0x0001000c00070001ULL, 0x000c00070001000cULL, },
+        { 0x038e18e30e38038eULL, 0x18e30e38038e18e3ULL, },
+        { 0x000018e300710000ULL, 0x18e30071000018e3ULL, },
+        { 0x0e38000c00e30e38ULL, 0x000c00e30e38000cULL, },
+        { 0x0022000e0a185540ULL, 0x00960000001f000bULL, },    /*  64  */
+        { 0x00021cd9050c0055ULL, 0x009600020001000bULL, },
+        { 0x0022003900005540ULL, 0x004b0000001f0b00ULL, },
+        { 0x0001000714310001ULL, 0x25b3000b3f9eb00cULL, },
+        { 0x003e00001364c708ULL, 0x0025000200020005ULL, },
+        { 0x0003000c09b200c7ULL, 0x0025002e00000005ULL, },
+        { 0x003e00000000c708ULL, 0x001200010002052fULL, },
+        { 0x0001000026c90003ULL, 0x097b00bb054f52fcULL, },
+        { 0x002b000a2e738b80ULL, 0x004f000300150002ULL, },    /*  72  */
+        { 0x000215d51739008bULL, 0x004f003100010002ULL, },
+        { 0x002b002b00018b80ULL, 0x0027000100150251ULL, },
+        { 0x000100055ce70002ULL, 0x13ec00c62aca2514ULL, },
+        { 0x001c0001178ce24eULL, 0x011b00020015000eULL, },
+        { 0x000102c90bc600e2ULL, 0x011b00220001000eULL, },
+        { 0x001c00050000e24eULL, 0x008d000100150e2aULL, },
+        { 0x000000002f180003ULL, 0x46f800882a50e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRL_H(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRL_H(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_w.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_w.c
new file mode 100644
index 0000000000..794833cc15
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srl_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRL.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRL.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x003fffff003fffffULL, 0x003fffff003fffffULL, },
+        { 0x000007ff000007ffULL, 0x000007ff000007ffULL, },
+        { 0x000fffff000fffffULL, 0x000fffff000fffffULL, },
+        { 0x00001fff00001fffULL, 0x00001fff00001fffULL, },
+        { 0x1fffffff0003ffffULL, 0x000000ff1fffffffULL, },
+        { 0x0000000f00007fffULL, 0x01ffffff0000000fULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x002aaaaa002aaaaaULL, 0x002aaaaa002aaaaaULL, },
+        { 0x0000055500000555ULL, 0x0000055500000555ULL, },
+        { 0x000aaaaa000aaaaaULL, 0x000aaaaa000aaaaaULL, },
+        { 0x0000155500001555ULL, 0x0000155500001555ULL, },
+        { 0x155555550002aaaaULL, 0x000000aa15555555ULL, },
+        { 0x0000000a00005555ULL, 0x015555550000000aULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0015555500155555ULL, 0x0015555500155555ULL, },
+        { 0x000002aa000002aaULL, 0x000002aa000002aaULL, },
+        { 0x0005555500055555ULL, 0x0005555500055555ULL, },
+        { 0x00000aaa00000aaaULL, 0x00000aaa00000aaaULL, },
+        { 0x0aaaaaaa00015555ULL, 0x000000550aaaaaaaULL, },
+        { 0x0000000500002aaaULL, 0x00aaaaaa00000005ULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0033333300333333ULL, 0x0033333300333333ULL, },
+        { 0x0000066600000666ULL, 0x0000066600000666ULL, },
+        { 0x000ccccc000cccccULL, 0x000ccccc000cccccULL, },
+        { 0x0000199900001999ULL, 0x0000199900001999ULL, },
+        { 0x1999999900033333ULL, 0x000000cc19999999ULL, },
+        { 0x0000000c00006666ULL, 0x019999990000000cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x000ccccc000cccccULL, 0x000ccccc000cccccULL, },
+        { 0x0000019900000199ULL, 0x0000019900000199ULL, },
+        { 0x0003333300033333ULL, 0x0003333300033333ULL, },
+        { 0x0000066600000666ULL, 0x0000066600000666ULL, },
+        { 0x066666660000ccccULL, 0x0000003306666666ULL, },
+        { 0x0000000300001999ULL, 0x0066666600000003ULL, },
+        { 0x0000000100000001ULL, 0x0000000000000001ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x0038e38e00238e38ULL, 0x000e38e30038e38eULL, },
+        { 0x0000071c00000471ULL, 0x000001c70000071cULL, },
+        { 0x000e38e30008e38eULL, 0x00038e38000e38e3ULL, },
+        { 0x00001c71000011c7ULL, 0x0000071c00001c71ULL, },
+        { 0x1c71c71c000238e3ULL, 0x000000381c71c71cULL, },
+        { 0x0000000e0000471cULL, 0x0071c71c0000000eULL, },
+        { 0x0000000000000000ULL, 0x0000000100000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x00071c71001c71c7ULL, 0x0031c71c00071c71ULL, },
+        { 0x000000e30000038eULL, 0x00000638000000e3ULL, },
+        { 0x0001c71c00071c71ULL, 0x000c71c70001c71cULL, },
+        { 0x0000038e00000e38ULL, 0x000018e30000038eULL, },
+        { 0x038e38e30001c71cULL, 0x000000c7038e38e3ULL, },
+        { 0x00000001000038e3ULL, 0x018e38e300000001ULL, },
+        { 0x000886ae28625540ULL, 0x00000001000fe7bbULL, },    /*  64  */
+        { 0x110d5cd900286255ULL, 0x000000120000000fULL, },
+        { 0x00221ab928625540ULL, 0x0000000000000fe7ULL, },
+        { 0x000443570000a189ULL, 0x0000004bfe7bb00cULL, },
+        { 0x000fbbe04d93c708ULL, 0x00000000000153f5ULL, },
+        { 0x1f77c00c004d93c7ULL, 0x0000000400000001ULL, },
+        { 0x003eef804d93c708ULL, 0x0000000000000153ULL, },
+        { 0x0007ddf00001364fULL, 0x00000012153f52fcULL, },
+        { 0x000ac5aab9cf8b80ULL, 0x00000000000ab2b2ULL, },    /*  72  */
+        { 0x158b55d500b9cf8bULL, 0x000000090000000aULL, },
+        { 0x002b16abb9cf8b80ULL, 0x0000000000000ab2ULL, },
+        { 0x000562d50002e73eULL, 0x00000027ab2b2514ULL, },
+        { 0x000704f15e31e24eULL, 0x00000002000a942eULL, },
+        { 0x0e09e2c9005e31e2ULL, 0x000000230000000aULL, },
+        { 0x001c13c55e31e24eULL, 0x0000000100000a94ULL, },
+        { 0x00038278000178c7ULL, 0x0000008da942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRL_W(b128_pattern[i], b128_pattern[j],
+                         b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRL_W(b128_random[i], b128_random[j],
+                         b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                      (PATTERN_INPUTS_SHORT_COUNT)) +
+                                     RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_b.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_b.c
new file mode 100644
index 0000000000..4a900174cc
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRLR.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRLR.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0202020202020202ULL, 0x0202020202020202ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x4040404040404040ULL, 0x4040404040404040ULL, },
+        { 0x0808080808080808ULL, 0x0808080808080808ULL, },
+        { 0x1010101010101010ULL, 0x1010101010101010ULL, },
+        { 0x2020202020202020ULL, 0x2020202020202020ULL, },
+        { 0x2004ff2004ff2004ULL, 0xff2004ff2004ff20ULL, },
+        { 0x1080021080021080ULL, 0x0210800210800210ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x2b2b2b2b2b2b2b2bULL, 0x2b2b2b2b2b2b2b2bULL, },
+        { 0x0505050505050505ULL, 0x0505050505050505ULL, },
+        { 0x0b0b0b0b0b0b0b0bULL, 0x0b0b0b0b0b0b0b0bULL, },
+        { 0x1515151515151515ULL, 0x1515151515151515ULL, },
+        { 0x1503aa1503aa1503ULL, 0xaa1503aa1503aa15ULL, },
+        { 0x0b55010b55010b55ULL, 0x010b55010b55010bULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x1515151515151515ULL, 0x1515151515151515ULL, },
+        { 0x0303030303030303ULL, 0x0303030303030303ULL, },
+        { 0x0505050505050505ULL, 0x0505050505050505ULL, },
+        { 0x0b0b0b0b0b0b0b0bULL, 0x0b0b0b0b0b0b0b0bULL, },
+        { 0x0b01550b01550b01ULL, 0x550b01550b01550bULL, },
+        { 0x052b01052b01052bULL, 0x01052b01052b0105ULL, },
+        { 0x0202020202020202ULL, 0x0202020202020202ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0606060606060606ULL, 0x0606060606060606ULL, },
+        { 0x0d0d0d0d0d0d0d0dULL, 0x0d0d0d0d0d0d0d0dULL, },
+        { 0x1a1a1a1a1a1a1a1aULL, 0x1a1a1a1a1a1a1a1aULL, },
+        { 0x1a03cc1a03cc1a03ULL, 0xcc1a03cc1a03cc1aULL, },
+        { 0x0d66020d66020d66ULL, 0x020d66020d66020dULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0d0d0d0d0d0d0d0dULL, 0x0d0d0d0d0d0d0d0dULL, },
+        { 0x0202020202020202ULL, 0x0202020202020202ULL, },
+        { 0x0303030303030303ULL, 0x0303030303030303ULL, },
+        { 0x0606060606060606ULL, 0x0606060606060606ULL, },
+        { 0x0601330601330601ULL, 0x3306013306013306ULL, },
+        { 0x031a00031a00031aULL, 0x00031a00031a0003ULL, },
+        { 0x0201000201000201ULL, 0x0002010002010002ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x39240e39240e3924ULL, 0x0e39240e39240e39ULL, },
+        { 0x0704020704020704ULL, 0x0207040207040207ULL, },
+        { 0x0e09040e09040e09ULL, 0x040e09040e09040eULL, },
+        { 0x1c12071c12071c12ULL, 0x071c12071c12071cULL, },
+        { 0x1c02381c02381c02ULL, 0x381c02381c02381cULL, },
+        { 0x0e47000e47000e47ULL, 0x000e47000e47000eULL, },
+        { 0x0001020001020001ULL, 0x0200010200010200ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x071c32071c32071cULL, 0x32071c32071c3207ULL, },
+        { 0x0104060104060104ULL, 0x0601040601040601ULL, },
+        { 0x02070c02070c0207ULL, 0x0c02070c02070c02ULL, },
+        { 0x040e19040e19040eULL, 0x19040e19040e1904ULL, },
+        { 0x0402c70402c70402ULL, 0xc70402c70402c704ULL, },
+        { 0x0239020239020239ULL, 0x0202390202390202ULL, },
+        { 0x881b040d28190340ULL, 0x09010101040fb001ULL, },    /*  64  */
+        { 0x1102e61a010c0140ULL, 0x1301011808012c01ULL, },
+        { 0x091b043314010b40ULL, 0x01670001200f0601ULL, },
+        { 0x8801040601311501ULL, 0x02340b5e7f1f2c0cULL, },
+        { 0xfb3000064d250608ULL, 0x0202170000085210ULL, },
+        { 0x1f03000c02120208ULL, 0x0502170701001510ULL, },
+        { 0x1030001927011908ULL, 0x00f7030003080310ULL, },
+        { 0xfb010003014a3200ULL, 0x017cbb1a0b1015fcULL, },
+        { 0xac17030bb9340480ULL, 0x0502190403052501ULL, },    /*  72  */
+        { 0x1601ae15061a0180ULL, 0x0a02194005000901ULL, },
+        { 0x0b17032b5d021180ULL, 0x00d8030215050101ULL, },
+        { 0xac01030503682302ULL, 0x016cc6ff560b0914ULL, },
+        { 0x701400055e0c074eULL, 0x120211030308e20aULL, },
+        { 0x0e01160a0306024eULL, 0x230211360501390aULL, },
+        { 0x071400132f001c4eULL, 0x01f102021508070aULL, },
+        { 0x7001000201193901ULL, 0x047988d8551139a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRLR_B(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRLR_B(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_d.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_d.c
new file mode 100644
index 0000000000..69ba01f0d9
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRLR.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRLR.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000002ULL, 0x0000000000000002ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000400000ULL, 0x0000000000400000ULL, },
+        { 0x0000080000000000ULL, 0x0000080000000000ULL, },
+        { 0x0010000000000000ULL, 0x0010000000000000ULL, },
+        { 0x0000000000002000ULL, 0x0000000000002000ULL, },
+        { 0x0004000000000000ULL, 0x0000000020000000ULL, },
+        { 0x0000000000008000ULL, 0x0000001000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x00000000002aaaabULL, 0x00000000002aaaabULL, },
+        { 0x0000055555555555ULL, 0x0000055555555555ULL, },
+        { 0x000aaaaaaaaaaaabULL, 0x000aaaaaaaaaaaabULL, },
+        { 0x0000000000001555ULL, 0x0000000000001555ULL, },
+        { 0x0002aaaaaaaaaaabULL, 0x0000000015555555ULL, },
+        { 0x0000000000005555ULL, 0x0000000aaaaaaaabULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000155555ULL, 0x0000000000155555ULL, },
+        { 0x000002aaaaaaaaabULL, 0x000002aaaaaaaaabULL, },
+        { 0x0005555555555555ULL, 0x0005555555555555ULL, },
+        { 0x0000000000000aabULL, 0x0000000000000aabULL, },
+        { 0x0001555555555555ULL, 0x000000000aaaaaabULL, },
+        { 0x0000000000002aabULL, 0x0000000555555555ULL, },
+        { 0x0000000000000002ULL, 0x0000000000000002ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000333333ULL, 0x0000000000333333ULL, },
+        { 0x0000066666666666ULL, 0x0000066666666666ULL, },
+        { 0x000ccccccccccccdULL, 0x000ccccccccccccdULL, },
+        { 0x000000000000199aULL, 0x000000000000199aULL, },
+        { 0x0003333333333333ULL, 0x000000001999999aULL, },
+        { 0x0000000000006666ULL, 0x0000000ccccccccdULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x00000000000ccccdULL, 0x00000000000ccccdULL, },
+        { 0x000001999999999aULL, 0x000001999999999aULL, },
+        { 0x0003333333333333ULL, 0x0003333333333333ULL, },
+        { 0x0000000000000666ULL, 0x0000000000000666ULL, },
+        { 0x0000cccccccccccdULL, 0x0000000006666666ULL, },
+        { 0x000000000000199aULL, 0x0000000333333333ULL, },
+        { 0x0000000000000002ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x000000000038e38eULL, 0x00000000000e38e4ULL, },
+        { 0x0000071c71c71c72ULL, 0x000001c71c71c71cULL, },
+        { 0x000e38e38e38e38eULL, 0x00038e38e38e38e4ULL, },
+        { 0x0000000000001c72ULL, 0x000000000000071cULL, },
+        { 0x00038e38e38e38e4ULL, 0x00000000071c71c7ULL, },
+        { 0x00000000000071c7ULL, 0x000000038e38e38eULL, },
+        { 0x0000000000000000ULL, 0x0000000000000002ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x0000000000071c72ULL, 0x000000000031c71cULL, },
+        { 0x000000e38e38e38eULL, 0x00000638e38e38e4ULL, },
+        { 0x0001c71c71c71c72ULL, 0x000c71c71c71c71cULL, },
+        { 0x000000000000038eULL, 0x00000000000018e4ULL, },
+        { 0x000071c71c71c71cULL, 0x0000000018e38e39ULL, },
+        { 0x0000000000000e39ULL, 0x0000000c71c71c72ULL, },
+        { 0x886ae6cc28625540ULL, 0x0004b670b5efe7bbULL, },    /*  64  */
+        { 0x00886ae6cc286255ULL, 0x0000000000000005ULL, },
+        { 0x886ae6cc28625540ULL, 0x000004b670b5efe8ULL, },
+        { 0x000221ab9b30a189ULL, 0x000000004b670b5fULL, },
+        { 0xfbbe00634d93c708ULL, 0x00012f7bb1a153f5ULL, },
+        { 0x00fbbe00634d93c7ULL, 0x0000000000000001ULL, },
+        { 0xfbbe00634d93c708ULL, 0x0000012f7bb1a154ULL, },
+        { 0x0003eef8018d364fULL, 0x0000000012f7bb1aULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x00027d8c6ffab2b2ULL, },    /*  72  */
+        { 0x00ac5aaeaab9cf8cULL, 0x0000000000000002ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x0000027d8c6ffab3ULL, },
+        { 0x0002b16abaaae73eULL, 0x0000000027d8c700ULL, },
+        { 0x704f164d5e31e24eULL, 0x0008df188d8a942eULL, },
+        { 0x00704f164d5e31e2ULL, 0x0000000000000009ULL, },
+        { 0x704f164d5e31e24eULL, 0x000008df188d8a94ULL, },
+        { 0x0001c13c593578c8ULL, 0x000000008df188d9ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRLR_D(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRLR_D(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_h.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_h.c
new file mode 100644
index 0000000000..514a143a6e
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRLR.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRLR.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0002000200020002ULL, 0x0002000200020002ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0040004000400040ULL, 0x0040004000400040ULL, },
+        { 0x0800080008000800ULL, 0x0800080008000800ULL, },
+        { 0x0010001000100010ULL, 0x0010001000100010ULL, },
+        { 0x2000200020002000ULL, 0x2000200020002000ULL, },
+        { 0x0004200001000004ULL, 0x2000010000042000ULL, },
+        { 0x8000001002008000ULL, 0x0010020080000010ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x002b002b002b002bULL, 0x002b002b002b002bULL, },
+        { 0x0555055505550555ULL, 0x0555055505550555ULL, },
+        { 0x000b000b000b000bULL, 0x000b000b000b000bULL, },
+        { 0x1555155515551555ULL, 0x1555155515551555ULL, },
+        { 0x0003155500ab0003ULL, 0x155500ab00031555ULL, },
+        { 0x5555000b01555555ULL, 0x000b01555555000bULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0015001500150015ULL, 0x0015001500150015ULL, },
+        { 0x02ab02ab02ab02abULL, 0x02ab02ab02ab02abULL, },
+        { 0x0005000500050005ULL, 0x0005000500050005ULL, },
+        { 0x0aab0aab0aab0aabULL, 0x0aab0aab0aab0aabULL, },
+        { 0x00010aab00550001ULL, 0x0aab005500010aabULL, },
+        { 0x2aab000500ab2aabULL, 0x000500ab2aab0005ULL, },
+        { 0x0002000200020002ULL, 0x0002000200020002ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0033003300330033ULL, 0x0033003300330033ULL, },
+        { 0x0666066606660666ULL, 0x0666066606660666ULL, },
+        { 0x000d000d000d000dULL, 0x000d000d000d000dULL, },
+        { 0x199a199a199a199aULL, 0x199a199a199a199aULL, },
+        { 0x0003199a00cd0003ULL, 0x199a00cd0003199aULL, },
+        { 0x6666000d019a6666ULL, 0x000d019a6666000dULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x000d000d000d000dULL, 0x000d000d000d000dULL, },
+        { 0x019a019a019a019aULL, 0x019a019a019a019aULL, },
+        { 0x0003000300030003ULL, 0x0003000300030003ULL, },
+        { 0x0666066606660666ULL, 0x0666066606660666ULL, },
+        { 0x0001066600330001ULL, 0x0666003300010666ULL, },
+        { 0x199a00030066199aULL, 0x00030066199a0003ULL, },
+        { 0x0002000000010002ULL, 0x0000000100020000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x0039000e00240039ULL, 0x000e00240039000eULL, },
+        { 0x071c01c70472071cULL, 0x01c70472071c01c7ULL, },
+        { 0x000e00040009000eULL, 0x00040009000e0004ULL, },
+        { 0x1c72071c11c71c72ULL, 0x071c11c71c72071cULL, },
+        { 0x0004071c008e0004ULL, 0x071c008e0004071cULL, },
+        { 0x71c70004011c71c7ULL, 0x0004011c71c70004ULL, },
+        { 0x0000000200010000ULL, 0x0002000100000002ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x00070032001c0007ULL, 0x0032001c00070032ULL, },
+        { 0x00e40639038e00e4ULL, 0x0639038e00e40639ULL, },
+        { 0x0002000c00070002ULL, 0x000c00070002000cULL, },
+        { 0x038e18e40e39038eULL, 0x18e40e39038e18e4ULL, },
+        { 0x000018e400720000ULL, 0x18e40072000018e4ULL, },
+        { 0x0e39000c00e40e39ULL, 0x000c00e40e39000cULL, },
+        { 0x0022000e0a195540ULL, 0x009700000020000bULL, },    /*  64  */
+        { 0x00021cda050c0055ULL, 0x009700030002000bULL, },
+        { 0x0022003a00005540ULL, 0x004b000000200b01ULL, },
+        { 0x0001000714310001ULL, 0x25b4000b3f9fb00cULL, },
+        { 0x003f00001365c708ULL, 0x0026000300030005ULL, },
+        { 0x0004000c09b200c7ULL, 0x0026002f00000005ULL, },
+        { 0x003f00000001c708ULL, 0x0013000100030530ULL, },
+        { 0x0002000026ca0003ULL, 0x097c00bb055052fcULL, },
+        { 0x002b000b2e748b80ULL, 0x0050000300150002ULL, },    /*  72  */
+        { 0x000315d5173a008cULL, 0x0050003200010002ULL, },
+        { 0x002b002c00018b80ULL, 0x0028000200150251ULL, },
+        { 0x000100055ce80002ULL, 0x13ec00c72acb2514ULL, },
+        { 0x001c0001178ce24eULL, 0x011c00020015000eULL, },
+        { 0x000202ca0bc600e2ULL, 0x011c00220001000eULL, },
+        { 0x001c00060001e24eULL, 0x008e000100150e2aULL, },
+        { 0x000100012f190004ULL, 0x46f900892a51e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRLR_H(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRLR_H(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_w.c b/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_w.c
new file mode 100644
index 0000000000..940fd6f318
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/shift/test_msa_srlr_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction SRLR.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "SRLR.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000200000002ULL, 0x0000000200000002ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0040000000400000ULL, 0x0040000000400000ULL, },
+        { 0x0000080000000800ULL, 0x0000080000000800ULL, },
+        { 0x0010000000100000ULL, 0x0010000000100000ULL, },
+        { 0x0000200000002000ULL, 0x0000200000002000ULL, },
+        { 0x2000000000040000ULL, 0x0000010020000000ULL, },
+        { 0x0000001000008000ULL, 0x0200000000000010ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x002aaaab002aaaabULL, 0x002aaaab002aaaabULL, },
+        { 0x0000055500000555ULL, 0x0000055500000555ULL, },
+        { 0x000aaaab000aaaabULL, 0x000aaaab000aaaabULL, },
+        { 0x0000155500001555ULL, 0x0000155500001555ULL, },
+        { 0x155555550002aaabULL, 0x000000ab15555555ULL, },
+        { 0x0000000b00005555ULL, 0x015555550000000bULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0015555500155555ULL, 0x0015555500155555ULL, },
+        { 0x000002ab000002abULL, 0x000002ab000002abULL, },
+        { 0x0005555500055555ULL, 0x0005555500055555ULL, },
+        { 0x00000aab00000aabULL, 0x00000aab00000aabULL, },
+        { 0x0aaaaaab00015555ULL, 0x000000550aaaaaabULL, },
+        { 0x0000000500002aabULL, 0x00aaaaab00000005ULL, },
+        { 0x0000000200000002ULL, 0x0000000200000002ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0033333300333333ULL, 0x0033333300333333ULL, },
+        { 0x0000066600000666ULL, 0x0000066600000666ULL, },
+        { 0x000ccccd000ccccdULL, 0x000ccccd000ccccdULL, },
+        { 0x0000199a0000199aULL, 0x0000199a0000199aULL, },
+        { 0x1999999a00033333ULL, 0x000000cd1999999aULL, },
+        { 0x0000000d00006666ULL, 0x0199999a0000000dULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x000ccccd000ccccdULL, 0x000ccccd000ccccdULL, },
+        { 0x0000019a0000019aULL, 0x0000019a0000019aULL, },
+        { 0x0003333300033333ULL, 0x0003333300033333ULL, },
+        { 0x0000066600000666ULL, 0x0000066600000666ULL, },
+        { 0x066666660000cccdULL, 0x0000003306666666ULL, },
+        { 0x000000030000199aULL, 0x0066666600000003ULL, },
+        { 0x0000000200000001ULL, 0x0000000000000002ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x0038e38e00238e39ULL, 0x000e38e40038e38eULL, },
+        { 0x0000071c00000472ULL, 0x000001c70000071cULL, },
+        { 0x000e38e40008e38eULL, 0x00038e39000e38e4ULL, },
+        { 0x00001c72000011c7ULL, 0x0000071c00001c72ULL, },
+        { 0x1c71c71c000238e4ULL, 0x000000391c71c71cULL, },
+        { 0x0000000e0000471cULL, 0x0071c71c0000000eULL, },
+        { 0x0000000000000001ULL, 0x0000000200000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x00071c72001c71c7ULL, 0x0031c71c00071c72ULL, },
+        { 0x000000e40000038eULL, 0x00000639000000e4ULL, },
+        { 0x0001c71c00071c72ULL, 0x000c71c70001c71cULL, },
+        { 0x0000038e00000e39ULL, 0x000018e40000038eULL, },
+        { 0x038e38e40001c71cULL, 0x000000c7038e38e4ULL, },
+        { 0x00000002000038e4ULL, 0x018e38e400000002ULL, },
+        { 0x000886ae28625540ULL, 0x00000001000fe7bbULL, },    /*  64  */
+        { 0x110d5cda00286255ULL, 0x0000001300000010ULL, },
+        { 0x00221aba28625540ULL, 0x0000000100000fe8ULL, },
+        { 0x000443570000a189ULL, 0x0000004bfe7bb00cULL, },
+        { 0x000fbbe04d93c708ULL, 0x00000000000153f5ULL, },
+        { 0x1f77c00c004d93c7ULL, 0x0000000500000001ULL, },
+        { 0x003eef804d93c708ULL, 0x0000000000000154ULL, },
+        { 0x0007ddf00001364fULL, 0x00000013153f52fcULL, },
+        { 0x000ac5abb9cf8b80ULL, 0x00000001000ab2b2ULL, },    /*  72  */
+        { 0x158b55d500b9cf8cULL, 0x0000000a0000000bULL, },
+        { 0x002b16acb9cf8b80ULL, 0x0000000000000ab3ULL, },
+        { 0x000562d50002e73eULL, 0x00000028ab2b2514ULL, },
+        { 0x000704f15e31e24eULL, 0x00000002000a942eULL, },
+        { 0x0e09e2ca005e31e2ULL, 0x000000230000000bULL, },
+        { 0x001c13c65e31e24eULL, 0x0000000100000a94ULL, },
+        { 0x00038279000178c8ULL, 0x0000008ea942e2a0ULL, },
+    };
+
+    reset_msa_registers();
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRLR_W(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_SRLR_W(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/test_msa_compile.sh b/tests/tcg/mips/user/ase/msa/test_msa_compile.sh
new file mode 100755
index 0000000000..2a39d892f0
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/test_msa_compile.sh
@@ -0,0 +1,558 @@
+
+#
+# Bit Count
+# ---------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_nloc_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_nloc_b
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_nloc_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_nloc_h
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_nloc_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_nloc_w
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_nloc_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_nloc_d
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_nlzc_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_nlzc_b
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_nlzc_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_nlzc_h
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_nlzc_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_nlzc_w
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_nlzc_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_nlzc_d
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_pcnt_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pcnt_b
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_pcnt_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pcnt_h
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_pcnt_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pcnt_w
+/opt/img/bin/mips-img-linux-gnu-gcc       bit-count/test_msa_pcnt_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pcnt_d
+
+#
+# Bit move
+# --------
+#
+
+#
+# Bit Set
+# -------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bclr_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bclr_b
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bclr_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bclr_h
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bclr_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bclr_w
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bclr_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bclr_d
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bneg_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bneg_b
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bneg_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bneg_h
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bneg_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bneg_w
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bneg_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bneg_d
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bset_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bset_b
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bset_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bset_h
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bset_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bset_w
+/opt/img/bin/mips-img-linux-gnu-gcc         bit-set/test_msa_bset_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_bset_d
+
+#
+# Fixed Multiply
+# --------------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc    fixed-multiply/test_msa_mul_q_h.c       \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mul_q_h
+/opt/img/bin/mips-img-linux-gnu-gcc    fixed-multiply/test_msa_mul_q_w.c       \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mul_q_w
+/opt/img/bin/mips-img-linux-gnu-gcc    fixed-multiply/test_msa_mulr_q_h.c      \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mulr_q_h
+/opt/img/bin/mips-img-linux-gnu-gcc    fixed-multiply/test_msa_mulr_q_w.c      \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mulr_q_w
+
+#
+# Float Max Min
+# -------------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc         float-max-min/test_msa_fmax_a_w.c  \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o        /tmp/test_msa_fmax_a_w
+/opt/img/bin/mips-img-linux-gnu-gcc         float-max-min/test_msa_fmax_a_d.c  \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o        /tmp/test_msa_fmax_a_d
+/opt/img/bin/mips-img-linux-gnu-gcc         float-max-min/test_msa_fmax_w.c    \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o        /tmp/test_msa_fmax_w
+/opt/img/bin/mips-img-linux-gnu-gcc         float-max-min/test_msa_fmax_d.c    \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o        /tmp/test_msa_fmax_d
+/opt/img/bin/mips-img-linux-gnu-gcc         float-max-min/test_msa_fmin_a_w.c  \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o        /tmp/test_msa_fmin_a_w
+/opt/img/bin/mips-img-linux-gnu-gcc         float-max-min/test_msa_fmin_a_d.c  \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o        /tmp/test_msa_fmin_a_d
+/opt/img/bin/mips-img-linux-gnu-gcc         float-max-min/test_msa_fmin_w.c    \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o        /tmp/test_msa_fmin_w
+/opt/img/bin/mips-img-linux-gnu-gcc         float-max-min/test_msa_fmin_d.c    \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o        /tmp/test_msa_fmin_d
+
+#
+# Int Add
+# -------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_add_a_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_add_a_b
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_add_a_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_add_a_h
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_add_a_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_add_a_w
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_add_a_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_add_a_d
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_a_b.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_a_b
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_a_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_a_h
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_a_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_a_w
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_a_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_a_d
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_s_b.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_s_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_s_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_s_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_u_b.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_u_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_u_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_adds_u_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_adds_u_d
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_addv_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_addv_b
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_addv_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_addv_h
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_addv_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_addv_w
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_addv_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_addv_d
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_hadd_s_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hadd_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_hadd_s_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hadd_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_hadd_s_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hadd_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_hadd_u_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hadd_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_hadd_u_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hadd_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc         int-add/test_msa_hadd_u_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hadd_u_d
+
+#
+# Int Average
+# -----------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_ave_s_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ave_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_ave_s_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ave_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_ave_s_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ave_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_ave_s_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ave_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_ave_u_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ave_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_ave_u_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ave_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_ave_u_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ave_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_ave_u_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ave_u_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_aver_s_b.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_aver_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_aver_s_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_aver_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_aver_s_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_aver_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_aver_s_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_aver_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_aver_u_b.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_aver_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_aver_u_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_aver_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_aver_u_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_aver_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-average/test_msa_aver_u_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_aver_u_d
+
+#
+# Int Compare
+# -----------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_ceq_b.c           \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ceq_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_ceq_h.c           \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ceq_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_ceq_w.c           \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ceq_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_ceq_d.c           \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ceq_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_cle_s_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_cle_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_cle_s_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_cle_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_cle_s_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_cle_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_cle_s_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_cle_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_cle_u_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_cle_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_cle_u_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_cle_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_cle_u_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_cle_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_cle_u_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_cle_u_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_clt_s_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_clt_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_clt_s_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_clt_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_clt_s_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_clt_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_clt_s_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_clt_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_clt_u_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_clt_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_clt_u_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_clt_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_clt_u_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_clt_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-compare/test_msa_clt_u_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_clt_u_d
+
+#
+# Int Divide
+# ----------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc      int-divide/test_msa_div_s_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_div_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc      int-divide/test_msa_div_s_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_div_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc      int-divide/test_msa_div_s_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_div_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc      int-divide/test_msa_div_s_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_div_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc      int-divide/test_msa_div_u_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_div_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc      int-divide/test_msa_div_u_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_div_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc      int-divide/test_msa_div_u_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_div_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc      int-divide/test_msa_div_u_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_div_u_d
+
+#
+# Int Dot Product
+# ---------------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc int-dot-product/test_msa_dotp_s_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_dotp_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc int-dot-product/test_msa_dotp_s_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_dotp_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc int-dot-product/test_msa_dotp_s_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_dotp_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc int-dot-product/test_msa_dotp_u_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_dotp_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc int-dot-product/test_msa_dotp_u_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_dotp_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc int-dot-product/test_msa_dotp_u_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_dotp_u_d
+
+#
+# Int Max Min
+# -----------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_a_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_a_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_a_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_a_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_a_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_a_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_a_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_a_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_s_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_s_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_s_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_s_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_u_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_u_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_u_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_max_u_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_max_u_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_a_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_a_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_a_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_a_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_a_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_a_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_a_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_a_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_s_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_s_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_s_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_s_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_u_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_u_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_u_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc     int-max-min/test_msa_min_u_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_min_u_d
+
+#
+# Int Modulo
+# ----------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc      int-modulo/test_msa_mod_s_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mod_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc      int-modulo/test_msa_mod_s_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mod_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc      int-modulo/test_msa_mod_s_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mod_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc      int-modulo/test_msa_mod_s_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mod_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc      int-modulo/test_msa_mod_u_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mod_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc      int-modulo/test_msa_mod_u_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mod_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc      int-modulo/test_msa_mod_u_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mod_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc      int-modulo/test_msa_mod_u_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mod_u_d
+
+#
+# Int Multiply
+# ------------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc    int-multiply/test_msa_mulv_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mulv_b
+/opt/img/bin/mips-img-linux-gnu-gcc    int-multiply/test_msa_mulv_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mulv_h
+/opt/img/bin/mips-img-linux-gnu-gcc    int-multiply/test_msa_mulv_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mulv_w
+/opt/img/bin/mips-img-linux-gnu-gcc    int-multiply/test_msa_mulv_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_mulv_d
+
+#
+# Int Subtract
+# ------------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_asub_s_b.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_asub_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_asub_s_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_asub_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_asub_s_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_asub_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_asub_s_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_asub_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_asub_u_b.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_asub_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_asub_u_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_asub_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_asub_u_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_asub_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_asub_u_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_asub_u_d
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_hsub_s_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hsub_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_hsub_s_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hsub_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_hsub_s_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hsub_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_hsub_u_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hsub_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_hsub_u_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hsub_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_hsub_u_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_hsub_u_d
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subs_s_b.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subs_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subs_s_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subs_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subs_s_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subs_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subs_s_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subs_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subs_u_b.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subs_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subs_u_h.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subs_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subs_u_w.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subs_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subs_u_d.c        \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subs_u_d
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subsuu_s_b.c      \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subsuu_s_b
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subsuu_s_h.c      \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subsuu_s_h
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subsuu_s_w.c      \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subsuu_s_w
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subsuu_s_d.c      \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subsuu_s_d
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subsus_u_b.c      \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subsus_u_b
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subsus_u_h.c      \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subsus_u_h
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subsus_u_w.c      \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subsus_u_w
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subsus_u_d.c      \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subsus_u_d
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subv_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subv_b
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subv_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subv_h
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subv_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subv_w
+/opt/img/bin/mips-img-linux-gnu-gcc    int-subtract/test_msa_subv_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_subv_d
+
+#
+# Interleave
+# ----------
+#
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvev_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvev_b
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvev_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvev_h
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvev_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvev_w
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvev_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvev_d
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvod_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvod_b
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvod_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvod_h
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvod_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvod_w
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvod_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvod_d
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvl_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvl_b
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvl_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvl_h
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvl_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvl_w
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvl_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvl_d
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvr_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvr_b
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvr_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvr_h
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvr_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvr_w
+/opt/img/bin/mips-img-linux-gnu-gcc      interleave/test_msa_ilvr_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_ilvr_d
+
+#
+# Logic
+# -----
+#
+/opt/img/bin/mips-img-linux-gnu-gcc           logic/test_msa_and_v.c           \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_and_v
+/opt/img/bin/mips-img-linux-gnu-gcc           logic/test_msa_nor_v.c           \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_nor_v
+/opt/img/bin/mips-img-linux-gnu-gcc           logic/test_msa_or_v.c            \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_or_v
+/opt/img/bin/mips-img-linux-gnu-gcc           logic/test_msa_xor_v.c           \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_xor_v
+
+#
+# Pack
+# ----
+#
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_pckev_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pckev_b
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_pckev_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pckev_h
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_pckev_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pckev_w
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_pckev_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pckev_d
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_pckod_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pckod_b
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_pckod_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pckod_h
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_pckod_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pckod_w
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_pckod_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_pckod_d
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_vshf_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_vshf_b
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_vshf_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_vshf_h
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_vshf_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_vshf_w
+/opt/img/bin/mips-img-linux-gnu-gcc            pack/test_msa_vshf_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o  /tmp/test_msa_vshf_d
+
+#
+# Shift
+# -----
+#
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_sll_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_sll_b
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_sll_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_sll_h
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_sll_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_sll_w
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_sll_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_sll_d
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_sra_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_sra_b
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_sra_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_sra_h
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_sra_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_sra_w
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_sra_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_sra_d
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srar_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srar_b
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srar_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srar_h
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srar_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srar_w
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srar_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srar_d
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srl_b.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srl_b
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srl_h.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srl_h
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srl_w.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srl_w
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srl_d.c          \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srl_d
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srlr_b.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srlr_b
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srlr_h.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srlr_h
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srlr_w.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srlr_w
+/opt/img/bin/mips-img-linux-gnu-gcc            shift/test_msa_srlr_d.c         \
+-EL -static -mabi=64 -march=mips64r6 -mmsa -o   /tmp/test_msa_srlr_d
diff --git a/tests/tcg/mips/user/ase/msa/test_msa_run.sh b/tests/tcg/mips/user/ase/msa/test_msa_run.sh
new file mode 100755
index 0000000000..278d93b614
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/test_msa_run.sh
@@ -0,0 +1,326 @@
+PATH_TO_QEMU="../../../../../../mips64el-linux-user/qemu-mips64el"
+
+
+#
+# Bit Count
+# ---------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_nloc_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_nloc_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_nloc_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_nloc_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_nlzc_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_nlzc_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_nlzc_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_nlzc_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pcnt_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pcnt_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pcnt_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pcnt_d
+
+#
+# Bit move
+# --------
+#
+
+#
+# Bit Set
+# -------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bclr_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bclr_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bclr_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bclr_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bneg_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bneg_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bneg_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bneg_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bset_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bset_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bset_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_bset_d
+
+#
+# Fixed Multiply
+# --------------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mul_q_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mul_q_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mulr_q_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mulr_q_w
+
+#
+# Float Max Min
+# -------------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_fmax_a_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_fmax_a_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_fmax_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_fmax_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_fmin_a_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_fmin_a_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_fmin_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_fmin_d
+
+#
+# Int Add
+# -------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_add_a_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_add_a_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_add_a_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_add_a_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_a_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_a_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_a_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_a_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_adds_u_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_addv_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_addv_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_addv_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_addv_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hadd_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hadd_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hadd_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hadd_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hadd_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hadd_u_d
+
+#
+# Int Average
+# -----------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ave_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ave_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ave_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ave_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ave_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ave_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ave_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ave_u_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_aver_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_aver_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_aver_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_aver_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_aver_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_aver_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_aver_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_aver_u_d
+
+#
+# Int Compare
+# -----------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ceq_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ceq_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ceq_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ceq_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_cle_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_cle_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_cle_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_cle_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_cle_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_cle_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_cle_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_cle_u_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_clt_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_clt_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_clt_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_clt_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_clt_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_clt_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_clt_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_clt_u_d
+
+#
+# Int Divide
+# ----------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_div_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_div_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_div_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_div_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_div_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_div_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_div_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_div_u_d
+
+#
+# Int Dot Product
+# ---------------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_dotp_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_dotp_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_dotp_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_dotp_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_dotp_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_dotp_u_d
+
+#
+# Int Max Min
+# -----------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_a_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_a_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_a_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_a_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_max_u_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_a_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_a_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_a_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_a_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_min_u_d
+
+#
+# Int Modulo
+# ----------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mod_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mod_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mod_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mod_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mod_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mod_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mod_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mod_u_d
+
+#
+# Int Multiply
+# ------------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mulv_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mulv_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mulv_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_mulv_d
+
+#
+# Int Subtract
+# ------------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_asub_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_asub_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_asub_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_asub_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_asub_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_asub_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_asub_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_asub_u_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hsub_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hsub_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hsub_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hsub_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hsub_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_hsub_u_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subs_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subs_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subs_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subs_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subs_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subs_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subs_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subs_u_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subsuu_s_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subsuu_s_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subsuu_s_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subsuu_s_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subsus_u_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subsus_u_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subsus_u_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subsus_u_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subv_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subv_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subv_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_subv_d
+
+#
+# Interleave
+# ----------
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvev_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvev_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvev_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvev_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvod_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvod_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvod_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvod_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvl_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvl_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvl_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvl_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvr_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvr_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvr_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_ilvr_d
+
+#
+# Logic
+# -----
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_and_v
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_nor_v
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_or_v
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_xor_v
+
+#
+# Pack
+# ----
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pckev_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pckev_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pckev_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pckev_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pckod_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pckod_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pckod_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_pckod_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_vshf_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_vshf_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_vshf_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_vshf_d
+
+#
+# Shift
+# -----
+#
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_sll_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_sll_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_sll_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_sll_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_sra_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_sra_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_sra_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_sra_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srar_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srar_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srar_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srar_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srl_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srl_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srl_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srl_d
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srlr_b
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srlr_h
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srlr_w
+$PATH_TO_QEMU -cpu I6400  /tmp/test_msa_srlr_d
diff --git a/tests/tcg/multiarch/system/Makefile.softmmu-target b/tests/tcg/multiarch/system/Makefile.softmmu-target
new file mode 100644
index 0000000000..db4bbeda44
--- /dev/null
+++ b/tests/tcg/multiarch/system/Makefile.softmmu-target
@@ -0,0 +1,14 @@
+# -*- Mode: makefile -*-
+#
+# Multiarch system tests
+#
+# We just collect the tests together here and rely on the actual guest
+# architecture to add to the test dependancies and deal with the
+# complications of building.
+#
+
+MULTIARCH_SYSTEM_SRC=$(SRC_PATH)/tests/tcg/multiarch/system
+VPATH+=$(MULTIARCH_SYSTEM_SRC)
+
+MULTIARCH_TEST_SRCS=$(wildcard $(MULTIARCH_SYSTEM_SRC)/*.c)
+MULTIARCH_TESTS = $(patsubst $(MULTIARCH_SYSTEM_SRC)/%.c, %, $(MULTIARCH_TEST_SRCS))
diff --git a/tests/tcg/i386/system/hello.c b/tests/tcg/multiarch/system/hello.c
index 821dc0ef09..821dc0ef09 100644
--- a/tests/tcg/i386/system/hello.c
+++ b/tests/tcg/multiarch/system/hello.c
diff --git a/tests/tcg/multiarch/system/memory.c b/tests/tcg/multiarch/system/memory.c
new file mode 100644
index 0000000000..dc1d8a98ff
--- /dev/null
+++ b/tests/tcg/multiarch/system/memory.c
@@ -0,0 +1,449 @@
+/*
+ * Memory Test
+ *
+ * This is intended to test the softmmu code and ensure we properly
+ * behave across normal and unaligned accesses across several pages.
+ * We are not replicating memory tests for stuck bits and other
+ * hardware level failures but looking for issues with different size
+ * accesses when access is:
+ *
+ *   - unaligned at various sizes (if -DCHECK_UNALIGNED set)
+ *   - spanning a (softmmu) page
+ *   - sign extension when loading
+ */
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <minilib.h>
+
+#ifndef CHECK_UNALIGNED
+# error "Target does not specify CHECK_UNALIGNED"
+#endif
+
+#define PAGE_SIZE 4096             /* nominal 4k "pages" */
+#define TEST_SIZE (PAGE_SIZE * 4)  /* 4 pages */
+
+#define ARRAY_SIZE(x) ((sizeof(x) / sizeof((x)[0])))
+
+__attribute__((aligned(PAGE_SIZE)))
+static uint8_t test_data[TEST_SIZE];
+
+typedef void (*init_ufn) (int offset);
+typedef bool (*read_ufn) (int offset);
+typedef bool (*read_sfn) (int offset, bool nf);
+
+static void pdot(int count)
+{
+    if (count % 128 == 0) {
+        ml_printf(".");
+    }
+}
+
+/*
+ * Helper macros for shift/extract so we can keep our endian handling
+ * in one place.
+ */
+#define BYTE_SHIFT(b, pos) ((uint64_t)b << (pos * 8))
+#define BYTE_EXTRACT(b, pos) ((b >> (pos * 8)) & 0xff)
+
+/*
+ * Fill the data with ascending value bytes.
+ *
+ * Currently we only support Little Endian machines so write in
+ * ascending address order. When we read higher address bytes should
+ * either be zero or higher than the lower bytes.
+ */
+
+static void init_test_data_u8(int unused_offset)
+{
+    uint8_t count = 0, *ptr = &test_data[0];
+    int i;
+    (void)(unused_offset);
+
+    ml_printf("Filling test area with u8:");
+    for (i = 0; i < TEST_SIZE; i++) {
+        *ptr++ = count++;
+        pdot(i);
+    }
+    ml_printf("done\n");
+}
+
+/*
+ * Full the data with alternating positive and negative bytes. This
+ * should mean for reads larger than a byte all subsequent reads will
+ * stay either negative or positive. We never write 0.
+ */
+
+static inline uint8_t get_byte(int index, bool neg)
+{
+    return neg ? (0xff << (index % 7)) : (0xff >> ((index % 6) + 1));
+}
+
+static void init_test_data_s8(bool neg_first)
+{
+    uint8_t top, bottom, *ptr = &test_data[0];
+    int i;
+
+    ml_printf("Filling test area with s8 pairs (%s):",
+              neg_first ? "neg first" : "pos first");
+    for (i = 0; i < TEST_SIZE / 2; i++) {
+        *ptr++ = get_byte(i, neg_first);
+        *ptr++ = get_byte(i, !neg_first);
+        pdot(i);
+    }
+    ml_printf("done\n");
+}
+
+/*
+ * Zero the first few bytes of the test data in preparation for
+ * new offset values.
+ */
+static void reset_start_data(int offset)
+{
+    uint32_t *ptr = (uint32_t *) &test_data[0];
+    int i;
+    for (i = 0; i < offset; i++) {
+        *ptr++ = 0;
+    }
+}
+
+static void init_test_data_u16(int offset)
+{
+    uint8_t count = 0;
+    uint16_t word, *ptr = (uint16_t *) &test_data[offset];
+    const int max = (TEST_SIZE - offset) / sizeof(word);
+    int i;
+
+    ml_printf("Filling test area with u16 (offset %d, %p):", offset, ptr);
+
+    reset_start_data(offset);
+
+    for (i = 0; i < max; i++) {
+        uint8_t low = count++, high = count++;
+        word = BYTE_SHIFT(high, 1) | BYTE_SHIFT(low, 0);
+        *ptr++ = word;
+        pdot(i);
+    }
+    ml_printf("done @ %p\n", ptr);
+}
+
+static void init_test_data_u32(int offset)
+{
+    uint8_t count = 0;
+    uint32_t word, *ptr = (uint32_t *) &test_data[offset];
+    const int max = (TEST_SIZE - offset) / sizeof(word);
+    int i;
+
+    ml_printf("Filling test area with u32 (offset %d, %p):", offset, ptr);
+
+    reset_start_data(offset);
+
+    for (i = 0; i < max; i++) {
+        uint8_t b4 = count++, b3 = count++;
+        uint8_t b2 = count++, b1 = count++;
+        word = BYTE_SHIFT(b1, 3) | BYTE_SHIFT(b2, 2) | BYTE_SHIFT(b3, 1) | b4;
+        *ptr++ = word;
+        pdot(i);
+    }
+    ml_printf("done @ %p\n", ptr);
+}
+
+static void init_test_data_u64(int offset)
+{
+    uint8_t count = 0;
+    uint64_t word, *ptr = (uint64_t *) &test_data[offset];
+    const int max = (TEST_SIZE - offset) / sizeof(word);
+    int i;
+
+    ml_printf("Filling test area with u64 (offset %d, %p):", offset, ptr);
+
+    reset_start_data(offset);
+
+    for (i = 0; i < max; i++) {
+        uint8_t b8 = count++, b7 = count++;
+        uint8_t b6 = count++, b5 = count++;
+        uint8_t b4 = count++, b3 = count++;
+        uint8_t b2 = count++, b1 = count++;
+        word = BYTE_SHIFT(b1, 7) | BYTE_SHIFT(b2, 6) | BYTE_SHIFT(b3, 5) |
+               BYTE_SHIFT(b4, 4) | BYTE_SHIFT(b5, 3) | BYTE_SHIFT(b6, 2) |
+               BYTE_SHIFT(b7, 1) | b8;
+        *ptr++ = word;
+        pdot(i);
+    }
+    ml_printf("done @ %p\n", ptr);
+}
+
+static bool read_test_data_u16(int offset)
+{
+    uint16_t word, *ptr = (uint16_t *)&test_data[offset];
+    int i;
+    const int max = (TEST_SIZE - offset) / sizeof(word);
+
+    ml_printf("Reading u16 from %#lx (offset %d):", ptr, offset);
+
+    for (i = 0; i < max; i++) {
+        uint8_t high, low;
+        word = *ptr++;
+        high = (word >> 8) & 0xff;
+        low = word & 0xff;
+        if (high < low && high != 0) {
+            ml_printf("Error %d < %d\n", high, low);
+            return false;
+        } else {
+            pdot(i);
+        }
+
+    }
+    ml_printf("done @ %p\n", ptr);
+    return true;
+}
+
+static bool read_test_data_u32(int offset)
+{
+    uint32_t word, *ptr = (uint32_t *)&test_data[offset];
+    int i;
+    const int max = (TEST_SIZE - offset) / sizeof(word);
+
+    ml_printf("Reading u32 from %#lx (offset %d):", ptr, offset);
+
+    for (i = 0; i < max; i++) {
+        uint8_t b1, b2, b3, b4;
+        word = *ptr++;
+
+        b1 = word >> 24 & 0xff;
+        b2 = word >> 16 & 0xff;
+        b3 = word >> 8 & 0xff;
+        b4 = word & 0xff;
+
+        if ((b1 < b2 && b1 != 0) ||
+            (b2 < b3 && b2 != 0) ||
+            (b3 < b4 && b3 != 0)) {
+            ml_printf("Error %d, %d, %d, %d", b1, b2, b3, b4);
+            return false;
+        } else {
+            pdot(i);
+        }
+    }
+    ml_printf("done @ %p\n", ptr);
+    return true;
+}
+
+static bool read_test_data_u64(int offset)
+{
+    uint64_t word, *ptr = (uint64_t *)&test_data[offset];
+    int i;
+    const int max = (TEST_SIZE - offset) / sizeof(word);
+
+    ml_printf("Reading u64 from %#lx (offset %d):", ptr, offset);
+
+    for (i = 0; i < max; i++) {
+        uint8_t b1, b2, b3, b4, b5, b6, b7, b8;
+        word = *ptr++;
+
+        b1 = ((uint64_t) (word >> 56)) & 0xff;
+        b2 = ((uint64_t) (word >> 48)) & 0xff;
+        b3 = ((uint64_t) (word >> 40)) & 0xff;
+        b4 = (word >> 32) & 0xff;
+        b5 = (word >> 24) & 0xff;
+        b6 = (word >> 16) & 0xff;
+        b7 = (word >> 8)  & 0xff;
+        b8 = (word >> 0)  & 0xff;
+
+        if ((b1 < b2 && b1 != 0) ||
+            (b2 < b3 && b2 != 0) ||
+            (b3 < b4 && b3 != 0) ||
+            (b4 < b5 && b4 != 0) ||
+            (b5 < b6 && b5 != 0) ||
+            (b6 < b7 && b6 != 0) ||
+            (b7 < b8 && b7 != 0)) {
+            ml_printf("Error %d, %d, %d, %d, %d, %d, %d, %d",
+                      b1, b2, b3, b4, b5, b6, b7, b8);
+            return false;
+        } else {
+            pdot(i);
+        }
+    }
+    ml_printf("done @ %p\n", ptr);
+    return true;
+}
+
+/* Read the test data and verify at various offsets */
+read_ufn read_ufns[] = { read_test_data_u16,
+                         read_test_data_u32,
+                         read_test_data_u64 };
+
+bool do_unsigned_reads(void)
+{
+    int i;
+    bool ok = true;
+
+    for (i = 0; i < ARRAY_SIZE(read_ufns) && ok; i++) {
+#if CHECK_UNALIGNED
+        int off;
+        for (off = 0; off < 8 && ok; off++) {
+            ok = read_ufns[i](off);
+        }
+#else
+        ok = read_ufns[i](0);
+#endif
+    }
+
+    return ok;
+}
+
+static bool do_unsigned_test(init_ufn fn)
+{
+#if CHECK_UNALIGNED
+    bool ok = true;
+    int i;
+    for (i = 0; i < 8 && ok; i++) {
+        fn(i);
+        ok = do_unsigned_reads();
+    }
+#else
+    fn(0);
+    return do_unsigned_reads();
+#endif
+}
+
+/*
+ * We need to ensure signed data is read into a larger data type to
+ * ensure that sign extension is working properly.
+ */
+
+static bool read_test_data_s8(int offset, bool neg_first)
+{
+    int8_t *ptr = (int8_t *)&test_data[offset];
+    int i;
+    const int max = (TEST_SIZE - offset) / 2;
+
+    ml_printf("Reading s8 pairs from %#lx (offset %d):", ptr, offset);
+
+    for (i = 0; i < max; i++) {
+        int16_t first, second;
+        bool ok;
+        first = *ptr++;
+        second = *ptr++;
+
+        if (neg_first && first < 0 && second > 0) {
+            pdot(i);
+        } else if (!neg_first && first > 0 && second < 0) {
+            pdot(i);
+        } else {
+            ml_printf("Error %d %c %d\n", first, neg_first ? '<' : '>', second);
+            return false;
+        }
+    }
+    ml_printf("done @ %p\n", ptr);
+    return true;
+}
+
+static bool read_test_data_s16(int offset, bool neg_first)
+{
+    int16_t *ptr = (int16_t *)&test_data[offset];
+    int i;
+    const int max = (TEST_SIZE - offset) / (sizeof(*ptr));
+
+    ml_printf("Reading s16 from %#lx (offset %d, %s):", ptr,
+              offset, neg_first ? "neg" : "pos");
+
+    for (i = 0; i < max; i++) {
+        int32_t data = *ptr++;
+
+        if (neg_first && data < 0) {
+            pdot(i);
+        } else if (data > 0) {
+            pdot(i);
+        } else {
+            ml_printf("Error %d %c 0\n", data, neg_first ? '<' : '>');
+            return false;
+        }
+    }
+    ml_printf("done @ %p\n", ptr);
+    return true;
+}
+
+static bool read_test_data_s32(int offset, bool neg_first)
+{
+    int32_t *ptr = (int32_t *)&test_data[offset];
+    int i;
+    const int max = (TEST_SIZE - offset) / (sizeof(int32_t));
+
+    ml_printf("Reading s32 from %#lx (offset %d, %s):",
+              ptr, offset, neg_first ? "neg" : "pos");
+
+    for (i = 0; i < max; i++) {
+        int64_t data = *ptr++;
+
+        if (neg_first && data < 0) {
+            pdot(i);
+        } else if (data > 0) {
+            pdot(i);
+        } else {
+            ml_printf("Error %d %c 0\n", data, neg_first ? '<' : '>');
+            return false;
+        }
+    }
+    ml_printf("done @ %p\n", ptr);
+    return true;
+}
+
+/*
+ * Read the test data and verify at various offsets
+ *
+ * For everything except bytes all our reads should be either positive
+ * or negative depending on what offset we are reading from. Currently
+ * we only handle LE systems.
+ */
+read_sfn read_sfns[] = { read_test_data_s8,
+                         read_test_data_s16,
+                         read_test_data_s32 };
+
+bool do_signed_reads(bool neg_first)
+{
+    int i;
+    bool ok = true;
+
+    for (i = 0; i < ARRAY_SIZE(read_sfns) && ok; i++) {
+#if CHECK_UNALIGNED
+        int off;
+        for (off = 0; off < 8 && ok; off++) {
+            bool nf = i == 0 ? neg_first ^ (off & 1) : !(neg_first ^ (off & 1));
+            ok = read_sfns[i](off, nf);
+        }
+#else
+        ok = read_sfns[i](0, i == 0 ? neg_first : !neg_first);
+#endif
+    }
+
+    return ok;
+}
+
+init_ufn init_ufns[] = { init_test_data_u8,
+                         init_test_data_u16,
+                         init_test_data_u32,
+                         init_test_data_u64 };
+
+int main(void)
+{
+    int i;
+    bool ok = true;
+
+    /* Run through the unsigned tests first */
+    for (i = 0; i < ARRAY_SIZE(init_ufns) && ok; i++) {
+        ok = do_unsigned_test(init_ufns[i]);
+    }
+
+    if (ok) {
+        init_test_data_s8(false);
+        ok = do_signed_reads(false);
+    }
+
+    if (ok) {
+        init_test_data_s8(true);
+        ok = do_signed_reads(true);
+    }
+
+    ml_printf("Test complete: %s\n", ok ? "PASSED" : "FAILED");
+    return ok ? 0 : -1;
+}
diff --git a/tests/tcg/ppc/Makefile.include b/tests/tcg/ppc/Makefile.include
index b062c30dd3..ae01fb8fad 100644
--- a/tests/tcg/ppc/Makefile.include
+++ b/tests/tcg/ppc/Makefile.include
@@ -1,6 +1,9 @@
 ifeq ($(TARGET_NAME),ppc)
 DOCKER_IMAGE=debian-powerpc-cross
 DOCKER_CROSS_COMPILER=powerpc-linux-gnu-gcc
+else ifeq ($(TARGET_NAME),ppc64)
+DOCKER_IMAGE=debian-ppc64-cross
+DOCKER_CROSS_COMPILER=powerpc64-linux-gnu-gcc
 else ifeq ($(TARGET_NAME),ppc64le)
 DOCKER_IMAGE=debian-ppc64el-cross
 DOCKER_CROSS_COMPILER=powerpc64le-linux-gnu-gcc
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index eda90750eb..12e2ecf517 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -206,7 +206,7 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
 
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
                               &error_abort);
     s = bs->opaque;
@@ -290,7 +290,7 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
     BlockBackend *blk;
     BlockDriverState *bs, *backing;
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
                               &error_abort);
     blk_insert_bs(blk, bs, &error_abort);
@@ -353,7 +353,7 @@ static void test_nested(void)
     BDRVTestState *s, *backing_s;
     enum drain_type outer, inner;
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
                               &error_abort);
     s = bs->opaque;
@@ -402,13 +402,13 @@ static void test_multiparent(void)
     BlockDriverState *bs_a, *bs_b, *backing;
     BDRVTestState *a_s, *b_s, *backing_s;
 
-    blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
                                 &error_abort);
     a_s = bs_a->opaque;
     blk_insert_bs(blk_a, bs_a, &error_abort);
 
-    blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
                                 &error_abort);
     b_s = bs_b->opaque;
@@ -475,13 +475,13 @@ static void test_graph_change_drain_subtree(void)
     BlockDriverState *bs_a, *bs_b, *backing;
     BDRVTestState *a_s, *b_s, *backing_s;
 
-    blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
                                 &error_abort);
     a_s = bs_a->opaque;
     blk_insert_bs(blk_a, bs_a, &error_abort);
 
-    blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
                                 &error_abort);
     b_s = bs_b->opaque;
@@ -555,7 +555,7 @@ static void test_graph_change_drain_all(void)
     BDRVTestState *a_s, *b_s;
 
     /* Create node A with a BlockBackend */
-    blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
                                 &error_abort);
     a_s = bs_a->opaque;
@@ -571,7 +571,7 @@ static void test_graph_change_drain_all(void)
     g_assert_cmpint(a_s->drain_count, ==, 1);
 
     /* Create node B with a BlockBackend */
-    blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
                                 &error_abort);
     b_s = bs_b->opaque;
@@ -672,13 +672,13 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
         goto out;
     }
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
                               &error_abort);
     s = bs->opaque;
     blk_insert_bs(blk, bs, &error_abort);
 
-    blk_set_aio_context(blk, ctx_a);
+    blk_set_aio_context(blk, ctx_a, &error_abort);
     aio_context_acquire(ctx_a);
 
     s->bh_indirection_ctx = ctx_b;
@@ -742,7 +742,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
     }
 
     aio_context_acquire(ctx_a);
-    blk_set_aio_context(blk, qemu_get_aio_context());
+    blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
     aio_context_release(ctx_a);
 
     bdrv_unref(bs);
@@ -883,7 +883,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
     bdrv_set_backing_hd(src, src_backing, &error_abort);
     bdrv_unref(src_backing);
 
-    blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_src = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk_src, src_overlay, &error_abort);
 
     switch (drain_node) {
@@ -903,15 +903,16 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
     if (use_iothread) {
         iothread = iothread_new();
         ctx = iothread_get_aio_context(iothread);
-        blk_set_aio_context(blk_src, ctx);
+        blk_set_aio_context(blk_src, ctx, &error_abort);
     } else {
         ctx = qemu_get_aio_context();
     }
 
     target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
                                   &error_abort);
-    blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_target = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk_target, target, &error_abort);
+    blk_set_allow_aio_context_change(blk_target, true);
 
     aio_context_acquire(ctx);
     tjob = block_job_create("job0", &test_job_driver, NULL, src,
@@ -972,7 +973,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
     g_assert_false(job->job.paused);
     g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
 
-    do_drain_begin(drain_type, target);
+    do_drain_begin_unlocked(drain_type, target);
 
     if (drain_type == BDRV_DRAIN_ALL) {
         /* bdrv_drain_all() drains both src and target */
@@ -983,7 +984,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
     g_assert_true(job->job.paused);
     g_assert_false(job->job.busy); /* The job is paused */
 
-    do_drain_end(drain_type, target);
+    do_drain_end_unlocked(drain_type, target);
 
     if (use_iothread) {
         /* paused is reset in the I/O thread, wait for it */
@@ -1001,7 +1002,8 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
     g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO));
 
     if (use_iothread) {
-        blk_set_aio_context(blk_src, qemu_get_aio_context());
+        blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort);
+        assert(blk_get_aio_context(blk_target) == qemu_get_aio_context());
     }
     aio_context_release(ctx);
 
@@ -1205,7 +1207,7 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
                         &error_abort);
     bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort);
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk, bs, &error_abort);
 
     /* Referenced by blk now */
@@ -1368,7 +1370,7 @@ static void test_detach_indirect(bool by_parent_cb)
     c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort);
 
     /* blk is a BB for parent-a */
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk, parent_a, &error_abort);
     bdrv_unref(parent_a);
 
@@ -1436,12 +1438,6 @@ static void test_detach_indirect(bool by_parent_cb)
     bdrv_unref(parent_b);
     blk_unref(blk);
 
-    /* XXX Once bdrv_close() unref's children instead of just detaching them,
-     * this won't be necessary any more. */
-    bdrv_unref(a);
-    bdrv_unref(a);
-    bdrv_unref(c);
-
     g_assert_cmpint(a->refcnt, ==, 1);
     g_assert_cmpint(b->refcnt, ==, 1);
     g_assert_cmpint(c->refcnt, ==, 1);
@@ -1466,7 +1462,7 @@ static void test_append_to_drained(void)
     BlockDriverState *base, *overlay;
     BDRVTestState *base_s, *overlay_s;
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     base = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
     base_s = base->opaque;
     blk_insert_bs(blk, base, &error_abort);
@@ -1513,16 +1509,16 @@ static void test_set_aio_context(void)
                               &error_abort);
 
     bdrv_drained_begin(bs);
-    bdrv_set_aio_context(bs, ctx_a);
+    bdrv_try_set_aio_context(bs, ctx_a, &error_abort);
 
     aio_context_acquire(ctx_a);
     bdrv_drained_end(bs);
 
     bdrv_drained_begin(bs);
-    bdrv_set_aio_context(bs, ctx_b);
+    bdrv_try_set_aio_context(bs, ctx_b, &error_abort);
     aio_context_release(ctx_a);
     aio_context_acquire(ctx_b);
-    bdrv_set_aio_context(bs, qemu_get_aio_context());
+    bdrv_try_set_aio_context(bs, qemu_get_aio_context(), &error_abort);
     aio_context_release(ctx_b);
     bdrv_drained_end(bs);
 
diff --git a/tests/test-bdrv-graph-mod.c b/tests/test-bdrv-graph-mod.c
index 283dc84869..cfeec36566 100644
--- a/tests/test-bdrv-graph-mod.c
+++ b/tests/test-bdrv-graph-mod.c
@@ -102,7 +102,8 @@ static void test_update_perm_tree(void)
 {
     Error *local_err = NULL;
 
-    BlockBackend *root = blk_new(BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ,
+    BlockBackend *root = blk_new(qemu_get_aio_context(),
+                                 BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ,
                                  BLK_PERM_ALL & ~BLK_PERM_WRITE);
     BlockDriverState *bs = no_perm_node("node");
     BlockDriverState *filter = pass_through_node("filter");
@@ -116,7 +117,6 @@ static void test_update_perm_tree(void)
     g_assert_nonnull(local_err);
     error_free(local_err);
 
-    bdrv_unref(bs);
     blk_unref(root);
 }
 
@@ -166,7 +166,7 @@ static void test_update_perm_tree(void)
  */
 static void test_should_update_child(void)
 {
-    BlockBackend *root = blk_new(0, BLK_PERM_ALL);
+    BlockBackend *root = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
     BlockDriverState *bs = no_perm_node("node");
     BlockDriverState *filter = no_perm_node("filter");
     BlockDriverState *target = no_perm_node("target");
diff --git a/tests/test-block-backend.c b/tests/test-block-backend.c
index fd59f02bd0..5b5d6845c0 100644
--- a/tests/test-block-backend.c
+++ b/tests/test-block-backend.c
@@ -37,7 +37,8 @@ static void test_drain_aio_error_flush_cb(void *opaque, int ret)
 
 static void test_drain_aio_error(void)
 {
-    BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    BlockBackend *blk = blk_new(qemu_get_aio_context(),
+                                BLK_PERM_ALL, BLK_PERM_ALL);
     BlockAIOCB *acb;
     bool completed = false;
 
@@ -53,7 +54,8 @@ static void test_drain_aio_error(void)
 
 static void test_drain_all_aio_error(void)
 {
-    BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    BlockBackend *blk = blk_new(qemu_get_aio_context(),
+                                BLK_PERM_ALL, BLK_PERM_ALL);
     BlockAIOCB *acb;
     bool completed = false;
 
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
index 59f692892e..79d9cf8a57 100644
--- a/tests/test-block-iothread.c
+++ b/tests/test-block-iothread.c
@@ -336,20 +336,20 @@ static void test_sync_op(const void *opaque)
     BlockDriverState *bs;
     BdrvChild *c;
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
     bs->total_sectors = 65536 / BDRV_SECTOR_SIZE;
     blk_insert_bs(blk, bs, &error_abort);
     c = QLIST_FIRST(&bs->parents);
 
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
     aio_context_acquire(ctx);
     t->fn(c);
     if (t->blkfn) {
         t->blkfn(blk);
     }
     aio_context_release(ctx);
-    blk_set_aio_context(blk, qemu_get_aio_context());
+    blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
 
     bdrv_unref(bs);
     blk_unref(blk);
@@ -415,7 +415,7 @@ static void test_attach_blockjob(void)
     BlockDriverState *bs;
     TestBlockJob *tjob;
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
     blk_insert_bs(blk, bs, &error_abort);
 
@@ -428,7 +428,7 @@ static void test_attach_blockjob(void)
         aio_poll(qemu_get_aio_context(), false);
     }
 
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
 
     tjob->n = 0;
     while (tjob->n == 0) {
@@ -436,7 +436,7 @@ static void test_attach_blockjob(void)
     }
 
     aio_context_acquire(ctx);
-    blk_set_aio_context(blk, qemu_get_aio_context());
+    blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
     aio_context_release(ctx);
 
     tjob->n = 0;
@@ -444,7 +444,7 @@ static void test_attach_blockjob(void)
         aio_poll(qemu_get_aio_context(), false);
     }
 
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
 
     tjob->n = 0;
     while (tjob->n == 0) {
@@ -453,7 +453,7 @@ static void test_attach_blockjob(void)
 
     aio_context_acquire(ctx);
     job_complete_sync(&tjob->common.job, &error_abort);
-    blk_set_aio_context(blk, qemu_get_aio_context());
+    blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
     aio_context_release(ctx);
 
     bdrv_unref(bs);
@@ -481,7 +481,7 @@ static void test_propagate_basic(void)
     QDict *options;
 
     /* Create bs_a and its BlockBackend */
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_a = bdrv_new_open_driver(&bdrv_test, "bs_a", BDRV_O_RDWR, &error_abort);
     blk_insert_bs(blk, bs_a, &error_abort);
 
@@ -497,7 +497,7 @@ static void test_propagate_basic(void)
     bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
 
     /* Switch the AioContext */
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
     g_assert(blk_get_aio_context(blk) == ctx);
     g_assert(bdrv_get_aio_context(bs_a) == ctx);
     g_assert(bdrv_get_aio_context(bs_verify) == ctx);
@@ -505,7 +505,7 @@ static void test_propagate_basic(void)
 
     /* Switch the AioContext back */
     ctx = qemu_get_aio_context();
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
     g_assert(blk_get_aio_context(blk) == ctx);
     g_assert(bdrv_get_aio_context(bs_a) == ctx);
     g_assert(bdrv_get_aio_context(bs_verify) == ctx);
@@ -561,11 +561,11 @@ static void test_propagate_diamond(void)
     qdict_put_str(options, "raw", "bs_c");
 
     bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk, bs_verify, &error_abort);
 
     /* Switch the AioContext */
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
     g_assert(blk_get_aio_context(blk) == ctx);
     g_assert(bdrv_get_aio_context(bs_verify) == ctx);
     g_assert(bdrv_get_aio_context(bs_a) == ctx);
@@ -574,7 +574,7 @@ static void test_propagate_diamond(void)
 
     /* Switch the AioContext back */
     ctx = qemu_get_aio_context();
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
     g_assert(blk_get_aio_context(blk) == ctx);
     g_assert(bdrv_get_aio_context(bs_verify) == ctx);
     g_assert(bdrv_get_aio_context(bs_a) == ctx);
@@ -593,7 +593,7 @@ static void test_propagate_mirror(void)
     IOThread *iothread = iothread_new();
     AioContext *ctx = iothread_get_aio_context(iothread);
     AioContext *main_ctx = qemu_get_aio_context();
-    BlockDriverState *src, *target;
+    BlockDriverState *src, *target, *filter;
     BlockBackend *blk;
     Job *job;
     Error *local_err = NULL;
@@ -610,11 +610,13 @@ static void test_propagate_mirror(void)
                  false, "filter_node", MIRROR_COPY_MODE_BACKGROUND,
                  &error_abort);
     job = job_get("job0");
+    filter = bdrv_find_node("filter_node");
 
     /* Change the AioContext of src */
     bdrv_try_set_aio_context(src, ctx, &error_abort);
     g_assert(bdrv_get_aio_context(src) == ctx);
     g_assert(bdrv_get_aio_context(target) == ctx);
+    g_assert(bdrv_get_aio_context(filter) == ctx);
     g_assert(job->aio_context == ctx);
 
     /* Change the AioContext of target */
@@ -623,9 +625,10 @@ static void test_propagate_mirror(void)
     aio_context_release(ctx);
     g_assert(bdrv_get_aio_context(src) == main_ctx);
     g_assert(bdrv_get_aio_context(target) == main_ctx);
+    g_assert(bdrv_get_aio_context(filter) == main_ctx);
 
     /* With a BlockBackend on src, changing target must fail */
-    blk = blk_new(0, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
     blk_insert_bs(blk, src, &error_abort);
 
     bdrv_try_set_aio_context(target, ctx, &local_err);
@@ -635,6 +638,7 @@ static void test_propagate_mirror(void)
     g_assert(blk_get_aio_context(blk) == main_ctx);
     g_assert(bdrv_get_aio_context(src) == main_ctx);
     g_assert(bdrv_get_aio_context(target) == main_ctx);
+    g_assert(bdrv_get_aio_context(filter) == main_ctx);
 
     /* ...unless we explicitly allow it */
     aio_context_acquire(ctx);
@@ -645,11 +649,12 @@ static void test_propagate_mirror(void)
     g_assert(blk_get_aio_context(blk) == ctx);
     g_assert(bdrv_get_aio_context(src) == ctx);
     g_assert(bdrv_get_aio_context(target) == ctx);
+    g_assert(bdrv_get_aio_context(filter) == ctx);
 
     job_cancel_sync_all();
 
     aio_context_acquire(ctx);
-    blk_set_aio_context(blk, main_ctx);
+    blk_set_aio_context(blk, main_ctx, &error_abort);
     bdrv_try_set_aio_context(target, main_ctx, &error_abort);
     aio_context_release(ctx);
 
@@ -658,6 +663,69 @@ static void test_propagate_mirror(void)
     bdrv_unref(target);
 }
 
+static void test_attach_second_node(void)
+{
+    IOThread *iothread = iothread_new();
+    AioContext *ctx = iothread_get_aio_context(iothread);
+    AioContext *main_ctx = qemu_get_aio_context();
+    BlockBackend *blk;
+    BlockDriverState *bs, *filter;
+    QDict *options;
+
+    blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
+    bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
+    blk_insert_bs(blk, bs, &error_abort);
+
+    options = qdict_new();
+    qdict_put_str(options, "driver", "raw");
+    qdict_put_str(options, "file", "base");
+
+    filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
+    g_assert(blk_get_aio_context(blk) == ctx);
+    g_assert(bdrv_get_aio_context(bs) == ctx);
+    g_assert(bdrv_get_aio_context(filter) == ctx);
+
+    blk_set_aio_context(blk, main_ctx, &error_abort);
+    g_assert(blk_get_aio_context(blk) == main_ctx);
+    g_assert(bdrv_get_aio_context(bs) == main_ctx);
+    g_assert(bdrv_get_aio_context(filter) == main_ctx);
+
+    bdrv_unref(filter);
+    bdrv_unref(bs);
+    blk_unref(blk);
+}
+
+static void test_attach_preserve_blk_ctx(void)
+{
+    IOThread *iothread = iothread_new();
+    AioContext *ctx = iothread_get_aio_context(iothread);
+    BlockBackend *blk;
+    BlockDriverState *bs;
+
+    blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
+    bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
+    bs->total_sectors = 65536 / BDRV_SECTOR_SIZE;
+
+    /* Add node to BlockBackend that has an iothread context assigned */
+    blk_insert_bs(blk, bs, &error_abort);
+    g_assert(blk_get_aio_context(blk) == ctx);
+    g_assert(bdrv_get_aio_context(bs) == ctx);
+
+    /* Remove the node again */
+    blk_remove_bs(blk);
+    g_assert(blk_get_aio_context(blk) == ctx);
+    g_assert(bdrv_get_aio_context(bs) == qemu_get_aio_context());
+
+    /* Re-attach the node */
+    blk_insert_bs(blk, bs, &error_abort);
+    g_assert(blk_get_aio_context(blk) == ctx);
+    g_assert(bdrv_get_aio_context(bs) == ctx);
+
+    blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
+    bdrv_unref(bs);
+    blk_unref(blk);
+}
+
 int main(int argc, char **argv)
 {
     int i;
@@ -673,6 +741,8 @@ int main(int argc, char **argv)
     }
 
     g_test_add_func("/attach/blockjob", test_attach_blockjob);
+    g_test_add_func("/attach/second_node", test_attach_second_node);
+    g_test_add_func("/attach/preserve_blk_ctx", test_attach_preserve_blk_ctx);
     g_test_add_func("/propagate/basic", test_propagate_basic);
     g_test_add_func("/propagate/diamond", test_propagate_diamond);
     g_test_add_func("/propagate/mirror", test_propagate_mirror);
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index 652d1e8359..8c91980c70 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -68,7 +68,7 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
 static BlockBackend *create_blk(const char *name)
 {
     /* No I/O is performed on this device */
-    BlockBackend *blk = blk_new(0, BLK_PERM_ALL);
+    BlockBackend *blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
     BlockDriverState *bs;
 
     bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort);
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
index 9cdccb3a47..0b675923f6 100644
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@@ -27,9 +27,10 @@ static int worker_cb(void *opaque)
 static int long_cb(void *opaque)
 {
     WorkerTestData *data = opaque;
-    atomic_inc(&data->n);
-    g_usleep(2000000);
-    atomic_inc(&data->n);
+    if (atomic_cmpxchg(&data->n, 0, 1) == 0) {
+        g_usleep(2000000);
+        atomic_or(&data->n, 2);
+    }
     return 0;
 }
 
@@ -171,7 +172,7 @@ static void do_test_cancel(bool sync)
     /* Cancel the jobs that haven't been started yet.  */
     num_canceled = 0;
     for (i = 0; i < 100; i++) {
-        if (atomic_cmpxchg(&data[i].n, 0, 3) == 0) {
+        if (atomic_cmpxchg(&data[i].n, 0, 4) == 0) {
             data[i].ret = -ECANCELED;
             if (sync) {
                 bdrv_aio_cancel(data[i].aiocb);
@@ -185,7 +186,7 @@ static void do_test_cancel(bool sync)
     g_assert_cmpint(num_canceled, <, 100);
 
     for (i = 0; i < 100; i++) {
-        if (data[i].aiocb && data[i].n != 3) {
+        if (data[i].aiocb && atomic_read(&data[i].n) < 4) {
             if (sync) {
                 /* Canceling the others will be a blocking operation.  */
                 bdrv_aio_cancel(data[i].aiocb);
@@ -201,13 +202,22 @@ static void do_test_cancel(bool sync)
     }
     g_assert_cmpint(active, ==, 0);
     for (i = 0; i < 100; i++) {
-        if (data[i].n == 3) {
+        g_assert(data[i].aiocb == NULL);
+        switch (data[i].n) {
+        case 0:
+            fprintf(stderr, "Callback not canceled but never started?\n");
+            abort();
+        case 3:
+            /* Couldn't be canceled asynchronously, must have completed.  */
+            g_assert_cmpint(data[i].ret, ==, 0);
+            break;
+        case 4:
+            /* Could be canceled asynchronously, never started.  */
             g_assert_cmpint(data[i].ret, ==, -ECANCELED);
-            g_assert(data[i].aiocb == NULL);
-        } else {
-            g_assert_cmpint(data[i].n, ==, 2);
-            g_assert(data[i].ret == 0 || data[i].ret == -ECANCELED);
-            g_assert(data[i].aiocb == NULL);
+            break;
+        default:
+            fprintf(stderr, "Callback aborted while running?\n");
+            abort();
         }
     }
 }
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 948a42c991..5644cf95ca 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -675,9 +675,9 @@ static void test_groups(void)
     ThrottleGroupMember *tgm1, *tgm2, *tgm3;
 
     /* No actual I/O is performed on these devices */
-    blk1 = blk_new(0, BLK_PERM_ALL);
-    blk2 = blk_new(0, BLK_PERM_ALL);
-    blk3 = blk_new(0, BLK_PERM_ALL);
+    blk1 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
+    blk2 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
+    blk3 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
 
     blkp1 = blk_get_public(blk1);
     blkp2 = blk_get_public(blk2);
diff --git a/tests/tmp105-test.c b/tests/tmp105-test.c
index 34cae7a582..f599309a4a 100644
--- a/tests/tmp105-test.c
+++ b/tests/tmp105-test.c
@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 
 #include "libqtest.h"
+#include "libqos/qgraph.h"
 #include "libqos/i2c.h"
 #include "qapi/qmp/qdict.h"
 #include "hw/misc/tmp105_regs.h"
@@ -17,52 +18,6 @@
 #define TMP105_TEST_ID   "tmp105-test"
 #define TMP105_TEST_ADDR 0x49
 
-static I2CAdapter *i2c;
-
-static uint16_t tmp105_get8(I2CAdapter *i2c, uint8_t addr, uint8_t reg)
-{
-    uint8_t resp[1];
-    i2c_send(i2c, addr, &reg, 1);
-    i2c_recv(i2c, addr, resp, 1);
-    return resp[0];
-}
-
-static uint16_t tmp105_get16(I2CAdapter *i2c, uint8_t addr, uint8_t reg)
-{
-    uint8_t resp[2];
-    i2c_send(i2c, addr, &reg, 1);
-    i2c_recv(i2c, addr, resp, 2);
-    return (resp[0] << 8) | resp[1];
-}
-
-static void tmp105_set8(I2CAdapter *i2c, uint8_t addr, uint8_t reg,
-                        uint8_t value)
-{
-    uint8_t cmd[2];
-    uint8_t resp[1];
-
-    cmd[0] = reg;
-    cmd[1] = value;
-    i2c_send(i2c, addr, cmd, 2);
-    i2c_recv(i2c, addr, resp, 1);
-    g_assert_cmphex(resp[0], ==, cmd[1]);
-}
-
-static void tmp105_set16(I2CAdapter *i2c, uint8_t addr, uint8_t reg,
-                         uint16_t value)
-{
-    uint8_t cmd[3];
-    uint8_t resp[2];
-
-    cmd[0] = reg;
-    cmd[1] = value >> 8;
-    cmd[2] = value & 255;
-    i2c_send(i2c, addr, cmd, 3);
-    i2c_recv(i2c, addr, resp, 2);
-    g_assert_cmphex(resp[0], ==, cmd[1]);
-    g_assert_cmphex(resp[1], ==, cmd[2]);
-}
-
 static int qmp_tmp105_get_temperature(const char *id)
 {
     QDict *response;
@@ -87,21 +42,22 @@ static void qmp_tmp105_set_temperature(const char *id, int value)
 }
 
 #define TMP105_PRECISION (1000/16)
-static void send_and_receive(void)
+static void send_and_receive(void *obj, void *data, QGuestAllocator *alloc)
 {
     uint16_t value;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
 
     value = qmp_tmp105_get_temperature(TMP105_TEST_ID);
     g_assert_cmpuint(value, ==, 0);
 
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0);
 
     qmp_tmp105_set_temperature(TMP105_TEST_ID, 20000);
     value = qmp_tmp105_get_temperature(TMP105_TEST_ID);
     g_assert_cmpuint(value, ==, 20000);
 
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x1400);
 
     qmp_tmp105_set_temperature(TMP105_TEST_ID, 20938); /* 20 + 15/16 */
@@ -110,24 +66,27 @@ static void send_and_receive(void)
     g_assert_cmpuint(value, <, 20938 + TMP105_PRECISION/2);
 
     /* Set config */
-    tmp105_set8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG, 0x60);
-    value = tmp105_get8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG);
+    i2c_set8(i2cdev, TMP105_REG_CONFIG, 0x60);
+    value = i2c_get8(i2cdev, TMP105_REG_CONFIG);
     g_assert_cmphex(value, ==, 0x60);
 
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x14f0);
 
     /* Set precision to 9, 10, 11 bits.  */
-    tmp105_set8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG, 0x00);
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    i2c_set8(i2cdev, TMP105_REG_CONFIG, 0x00);
+    g_assert_cmphex(i2c_get8(i2cdev, TMP105_REG_CONFIG), ==, 0x00);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x1480);
 
-    tmp105_set8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG, 0x20);
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    i2c_set8(i2cdev, TMP105_REG_CONFIG, 0x20);
+    g_assert_cmphex(i2c_get8(i2cdev, TMP105_REG_CONFIG), ==, 0x20);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x14c0);
 
-    tmp105_set8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG, 0x40);
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    i2c_set8(i2cdev, TMP105_REG_CONFIG, 0x40);
+    g_assert_cmphex(i2c_get8(i2cdev, TMP105_REG_CONFIG), ==, 0x40);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x14e0);
 
     /* stored precision remains the same */
@@ -135,32 +94,27 @@ static void send_and_receive(void)
     g_assert_cmpuint(value, >=, 20938 - TMP105_PRECISION/2);
     g_assert_cmpuint(value, <, 20938 + TMP105_PRECISION/2);
 
-    tmp105_set8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG, 0x60);
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    i2c_set8(i2cdev, TMP105_REG_CONFIG, 0x60);
+    g_assert_cmphex(i2c_get8(i2cdev, TMP105_REG_CONFIG), ==, 0x60);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x14f0);
 
-    tmp105_set16(i2c, TMP105_TEST_ADDR, TMP105_REG_T_LOW, 0x1234);
-    tmp105_set16(i2c, TMP105_TEST_ADDR, TMP105_REG_T_HIGH, 0x4231);
+    i2c_set16(i2cdev, TMP105_REG_T_LOW, 0x1234);
+    g_assert_cmphex(i2c_get16(i2cdev, TMP105_REG_T_LOW), ==, 0x1234);
+    i2c_set16(i2cdev, TMP105_REG_T_HIGH, 0x4231);
+    g_assert_cmphex(i2c_get16(i2cdev, TMP105_REG_T_HIGH), ==, 0x4231);
 }
 
-int main(int argc, char **argv)
+static void tmp105_register_nodes(void)
 {
-    QTestState *s = NULL;
-    int ret;
-
-    g_test_init(&argc, &argv, NULL);
+    QOSGraphEdgeOptions opts = {
+        .extra_device_opts = "id=" TMP105_TEST_ID ",address=0x49"
+    };
+    add_qi2c_address(&opts, &(QI2CAddress) { 0x49 });
 
-    s = qtest_start("-machine n800 "
-                    "-device tmp105,bus=i2c-bus.0,id=" TMP105_TEST_ID
-                    ",address=0x49");
-    i2c = omap_i2c_create(s, OMAP2_I2C_1_BASE);
+    qos_node_create_driver("tmp105", i2c_device_create);
+    qos_node_consumes("tmp105", "i2c-bus", &opts);
 
-    qtest_add_func("/tmp105/tx-rx", send_and_receive);
-
-    ret = g_test_run();
-
-    qtest_quit(s);
-    g_free(i2c);
-
-    return ret;
+    qos_add_test("tx-rx", "tmp105", send_and_receive, NULL);
 }
+libqos_init(tmp105_register_nodes);
diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index 162b31c88d..1e535cb178 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c
@@ -188,6 +188,53 @@ static void test_unaligned_write_same(void *obj, void *data,
     qvirtio_scsi_pci_free(vs);
 }
 
+static void test_iothread_attach_node(void *obj, void *data,
+                                      QGuestAllocator *t_alloc)
+{
+    QVirtioSCSIPCI *scsi_pci = obj;
+    QVirtioSCSI *scsi = &scsi_pci->scsi;
+    QVirtioSCSIQueues *vs;
+    char tmp_path[] = "/tmp/qtest.XXXXXX";
+    int fd;
+    int ret;
+
+    uint8_t buf[512] = { 0 };
+    const uint8_t write_cdb[VIRTIO_SCSI_CDB_SIZE] = {
+        /* WRITE(10) to LBA 0, transfer length 1 */
+        0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00
+    };
+
+    alloc = t_alloc;
+    vs = qvirtio_scsi_init(scsi->vdev);
+
+    /* Create a temporary qcow2 overlay*/
+    fd = mkstemp(tmp_path);
+    g_assert(fd >= 0);
+    close(fd);
+
+    if (!have_qemu_img()) {
+        g_test_message("QTEST_QEMU_IMG not set or qemu-img missing; "
+                       "skipping snapshot test");
+        goto fail;
+    }
+
+    mkqcow2(tmp_path, 64);
+
+    /* Attach the overlay to the null0 node */
+    qmp_assert_success("{'execute': 'blockdev-add', 'arguments': {"
+                       "   'driver': 'qcow2', 'node-name': 'overlay',"
+                       "   'backing': 'null0', 'file': {"
+                       "     'driver': 'file', 'filename': %s}}}", tmp_path);
+
+    /* Send a request to see if the AioContext is still right */
+    ret = virtio_scsi_do_command(vs, write_cdb, NULL, 0, buf, 512, NULL);
+    g_assert_cmphex(ret, ==, 0);
+
+fail:
+    qvirtio_scsi_pci_free(vs);
+    unlink(tmp_path);
+}
+
 static void *virtio_scsi_hotplug_setup(GString *cmd_line, void *arg)
 {
     g_string_append(cmd_line,
@@ -204,6 +251,15 @@ static void *virtio_scsi_setup(GString *cmd_line, void *arg)
     return arg;
 }
 
+static void *virtio_scsi_setup_iothread(GString *cmd_line, void *arg)
+{
+    g_string_append(cmd_line,
+                    " -object iothread,id=thread0"
+                    " -blockdev driver=null-co,node-name=null0"
+                    " -device scsi-hd,drive=null0");
+    return arg;
+}
+
 static void register_virtio_scsi_test(void)
 {
     QOSGraphTestOptions opts = { };
@@ -214,6 +270,13 @@ static void register_virtio_scsi_test(void)
     opts.before = virtio_scsi_setup;
     qos_add_test("unaligned-write-same", "virtio-scsi",
                  test_unaligned_write_same, &opts);
+
+    opts.before = virtio_scsi_setup_iothread;
+    opts.edge = (QOSGraphEdgeOptions) {
+        .extra_device_opts = "iothread=thread0",
+    };
+    qos_add_test("iothread-attach-node", "virtio-scsi-pci",
+                 test_iothread_attach_node, &opts);
 }
 
 libqos_init(register_virtio_scsi_test);
diff --git a/ui/curses.c b/ui/curses.c
index 1f3fcabb00..e9319eb8ae 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -489,9 +489,9 @@ static uint16_t get_ucs(wchar_t wch, iconv_t conv)
     memset(&ps, 0, sizeof(ps));
     ret = wcrtomb(mbch, wch, &ps);
     if (ret == -1) {
-        fprintf(stderr, "Could not convert 0x%04x "
+        fprintf(stderr, "Could not convert 0x%04lx "
                         "from wchar_t to a multibyte character: %s\n",
-                        wch, strerror(errno));
+                        (unsigned long)wch, strerror(errno));
         return 0xFFFD;
     }
 
@@ -501,9 +501,9 @@ static uint16_t get_ucs(wchar_t wch, iconv_t conv)
     such = sizeof(uch);
 
     if (iconv(conv, &pmbch, &smbch, &puch, &such) == (size_t) -1) {
-        fprintf(stderr, "Could not convert 0x%04x "
+        fprintf(stderr, "Could not convert 0x%04lx "
                         "from a multibyte character to UCS-2 : %s\n",
-                        wch, strerror(errno));
+                        (unsigned long)wch, strerror(errno));
         return 0xFFFD;
     }
 
diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index e90eef8c9c..edc53f6d34 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c
@@ -200,7 +200,8 @@ err:
     return -1;
 }
 
-int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc)
+int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc,
+                           EGLuint64KHR *modifier)
 {
     EGLImageKHR image;
     EGLint num_planes, fd;
@@ -214,7 +215,7 @@ int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc)
     }
 
     eglExportDMABUFImageQueryMESA(qemu_egl_display, image, fourcc,
-                                  &num_planes, NULL);
+                                  &num_planes, modifier);
     if (num_planes != 1) {
         eglDestroyImageKHR(qemu_egl_display, image);
         return -1;
@@ -228,20 +229,36 @@ int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc)
 void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf)
 {
     EGLImageKHR image = EGL_NO_IMAGE_KHR;
-    EGLint attrs[] = {
-        EGL_DMA_BUF_PLANE0_FD_EXT,      dmabuf->fd,
-        EGL_DMA_BUF_PLANE0_PITCH_EXT,   dmabuf->stride,
-        EGL_DMA_BUF_PLANE0_OFFSET_EXT,  0,
-        EGL_WIDTH,                      dmabuf->width,
-        EGL_HEIGHT,                     dmabuf->height,
-        EGL_LINUX_DRM_FOURCC_EXT,       dmabuf->fourcc,
-        EGL_NONE, /* end of list */
-    };
+    EGLint attrs[64];
+    int i = 0;
 
     if (dmabuf->texture != 0) {
         return;
     }
 
+    attrs[i++] = EGL_WIDTH;
+    attrs[i++] = dmabuf->width;
+    attrs[i++] = EGL_HEIGHT;
+    attrs[i++] = dmabuf->height;
+    attrs[i++] = EGL_LINUX_DRM_FOURCC_EXT;
+    attrs[i++] = dmabuf->fourcc;
+
+    attrs[i++] = EGL_DMA_BUF_PLANE0_FD_EXT;
+    attrs[i++] = dmabuf->fd;
+    attrs[i++] = EGL_DMA_BUF_PLANE0_PITCH_EXT;
+    attrs[i++] = dmabuf->stride;
+    attrs[i++] = EGL_DMA_BUF_PLANE0_OFFSET_EXT;
+    attrs[i++] = 0;
+#ifdef EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT
+    if (dmabuf->modifier) {
+        attrs[i++] = EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT;
+        attrs[i++] = (dmabuf->modifier >>  0) & 0xffffffff;
+        attrs[i++] = EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT;
+        attrs[i++] = (dmabuf->modifier >> 32) & 0xffffffff;
+    }
+#endif
+    attrs[i++] = EGL_NONE;
+
     image = eglCreateImageKHR(qemu_egl_display,
                               EGL_NO_CONTEXT,
                               EGL_LINUX_DMA_BUF_EXT,
diff --git a/ui/spice-app.c b/ui/spice-app.c
index 925b27b708..30541b1022 100644
--- a/ui/spice-app.c
+++ b/ui/spice-app.c
@@ -157,9 +157,10 @@ static void spice_app_display_early_init(DisplayOptions *opts)
     qemu_opt_set(qopts, "addr", sock_path, &error_abort);
     qemu_opt_set(qopts, "image-compression", "off", &error_abort);
     qemu_opt_set(qopts, "streaming-video", "off", &error_abort);
+#ifdef CONFIG_OPENGL
     qemu_opt_set(qopts, "gl", opts->has_gl ? "on" : "off", &error_abort);
     display_opengl = opts->has_gl;
-
+#endif
     be->u.spiceport.data->fqdn = g_strdup("org.qemu.monitor.qmp.0");
     qemu_chardev_new("org.qemu.monitor.qmp", TYPE_CHARDEV_SPICEPORT,
                      be, NULL, &error_abort);
diff --git a/ui/spice-display.c b/ui/spice-display.c
index a5e26479a8..104df23025 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -888,7 +888,8 @@ static void spice_gl_switch(DisplayChangeListener *dcl,
     if (ssd->ds) {
         surface_gl_create_texture(ssd->gls, ssd->ds);
         fd = egl_get_fd_for_texture(ssd->ds->texture,
-                                    &stride, &fourcc);
+                                    &stride, &fourcc,
+                                    NULL);
         if (fd < 0) {
             surface_gl_destroy_texture(ssd->gls, ssd->ds);
             return;
@@ -945,7 +946,7 @@ static void qemu_spice_gl_scanout_texture(DisplayChangeListener *dcl,
     int fd = -1;
 
     assert(tex_id);
-    fd = egl_get_fd_for_texture(tex_id, &stride, &fourcc);
+    fd = egl_get_fd_for_texture(tex_id, &stride, &fourcc, NULL);
     if (fd < 0) {
         fprintf(stderr, "%s: failed to get fd for texture\n", __func__);
         return;
@@ -1063,7 +1064,7 @@ static void qemu_spice_gl_update(DisplayChangeListener *dcl,
                 egl_fb_setup_new_tex(&ssd->blit_fb,
                                      dmabuf->width, dmabuf->height);
                 fd = egl_get_fd_for_texture(ssd->blit_fb.texture,
-                                            &stride, &fourcc);
+                                            &stride, &fourcc, NULL);
                 spice_qxl_gl_scanout(&ssd->qxl, fd,
                                      dmabuf->width, dmabuf->height,
                                      stride, fourcc, false);
diff --git a/util/Makefile.objs b/util/Makefile.objs
index c27a923dbe..38178201ff 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -53,7 +53,7 @@ util-obj-y += systemd.o
 util-obj-y += iova-tree.o
 util-obj-$(CONFIG_INOTIFY1) += filemonitor-inotify.o
 util-obj-$(CONFIG_LINUX) += vfio-helpers.o
-util-obj-$(CONFIG_OPENGL) += drm.o
+util-obj-$(CONFIG_POSIX) += drm.o
 util-obj-y += guest-random.o
 
 stub-obj-y += filemonitor-stub.o
diff --git a/util/guest-random.c b/util/guest-random.c
index e8124a3cad..00a08fd981 100644
--- a/util/guest-random.c
+++ b/util/guest-random.c
@@ -56,7 +56,7 @@ int qemu_guest_getrandom(void *buf, size_t len, Error **errp)
 
 void qemu_guest_getrandom_nofail(void *buf, size_t len)
 {
-    qemu_guest_getrandom(buf, len, &error_fatal);
+    (void)qemu_guest_getrandom(buf, len, &error_fatal);
 }
 
 uint64_t qemu_guest_random_seed_thread_part1(void)
diff --git a/vl.c b/vl.c
index 5550bd7693..cd1fbc4cdc 100644
--- a/vl.c
+++ b/vl.c
@@ -116,7 +116,7 @@ int main(int argc, char **argv)
 #include "qapi/opts-visitor.h"
 #include "qapi/clone-visitor.h"
 #include "qom/object_interfaces.h"
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
 #include "crypto/init.h"
 #include "sysemu/replay.h"
 #include "qapi/qapi-events-run-state.h"
@@ -239,6 +239,7 @@ static struct {
     { .driver = "qxl-vga",              .flag = &default_vga       },
     { .driver = "virtio-vga",           .flag = &default_vga       },
     { .driver = "ati-vga",              .flag = &default_vga       },
+    { .driver = "vhost-user-vga",       .flag = &default_vga       },
 };
 
 static QemuOptsList qemu_rtc_opts = {
@@ -501,25 +502,6 @@ static QemuOptsList qemu_icount_opts = {
     },
 };
 
-static QemuOptsList qemu_semihosting_config_opts = {
-    .name = "semihosting-config",
-    .implied_opt_name = "enable",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_semihosting_config_opts.head),
-    .desc = {
-        {
-            .name = "enable",
-            .type = QEMU_OPT_BOOL,
-        }, {
-            .name = "target",
-            .type = QEMU_OPT_STRING,
-        }, {
-            .name = "arg",
-            .type = QEMU_OPT_STRING,
-        },
-        { /* end of list */ }
-    },
-};
-
 static QemuOptsList qemu_fw_cfg_opts = {
     .name = "fw_cfg",
     .implied_opt_name = "name",
@@ -1351,80 +1333,6 @@ static void configure_msg(QemuOpts *opts)
     enable_timestamp_msg = qemu_opt_get_bool(opts, "timestamp", true);
 }
 
-/***********************************************************/
-/* Semihosting */
-
-typedef struct SemihostingConfig {
-    bool enabled;
-    SemihostingTarget target;
-    const char **argv;
-    int argc;
-    const char *cmdline; /* concatenated argv */
-} SemihostingConfig;
-
-static SemihostingConfig semihosting;
-
-bool semihosting_enabled(void)
-{
-    return semihosting.enabled;
-}
-
-SemihostingTarget semihosting_get_target(void)
-{
-    return semihosting.target;
-}
-
-const char *semihosting_get_arg(int i)
-{
-    if (i >= semihosting.argc) {
-        return NULL;
-    }
-    return semihosting.argv[i];
-}
-
-int semihosting_get_argc(void)
-{
-    return semihosting.argc;
-}
-
-const char *semihosting_get_cmdline(void)
-{
-    if (semihosting.cmdline == NULL && semihosting.argc > 0) {
-        semihosting.cmdline = g_strjoinv(" ", (gchar **)semihosting.argv);
-    }
-    return semihosting.cmdline;
-}
-
-static int add_semihosting_arg(void *opaque,
-                               const char *name, const char *val,
-                               Error **errp)
-{
-    SemihostingConfig *s = opaque;
-    if (strcmp(name, "arg") == 0) {
-        s->argc++;
-        /* one extra element as g_strjoinv() expects NULL-terminated array */
-        s->argv = g_realloc(s->argv, (s->argc + 1) * sizeof(void *));
-        s->argv[s->argc - 1] = val;
-        s->argv[s->argc] = NULL;
-    }
-    return 0;
-}
-
-/* Use strings passed via -kernel/-append to initialize semihosting.argv[] */
-static inline void semihosting_arg_fallback(const char *file, const char *cmd)
-{
-    char *cmd_token;
-
-    /* argv[0] */
-    add_semihosting_arg(&semihosting, "arg", file, NULL);
-
-    /* split -append and initialize argv[1..n] */
-    cmd_token = strtok(g_strdup(cmd), " ");
-    while (cmd_token) {
-        add_semihosting_arg(&semihosting, "arg", cmd_token, NULL);
-        cmd_token = strtok(NULL, " ");
-    }
-}
 
 /* Now we still need this for compatibility with XEN. */
 bool has_igd_gfx_passthru;
@@ -2843,20 +2751,25 @@ static bool object_create_initial(const char *type, QemuOpts *opts)
         exit(0);
     }
 
-    if (g_str_equal(type, "rng-egd") ||
-        g_str_has_prefix(type, "pr-manager-")) {
+    /*
+     * Objects should not be made "delayed" without a reason.  If you
+     * add one, state the reason in a comment!
+     */
+
+    /* Reason: rng-egd property "chardev" */
+    if (g_str_equal(type, "rng-egd")) {
         return false;
     }
 
 #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
+    /* Reason: cryptodev-vhost-user property "chardev" */
     if (g_str_equal(type, "cryptodev-vhost-user")) {
         return false;
     }
 #endif
 
     /*
-     * return false for concrete netfilters since
-     * they depend on netdevs already existing
+     * Reason: filter-* property "netdev" etc.
      */
     if (g_str_equal(type, "filter-buffer") ||
         g_str_equal(type, "filter-dump") ||
@@ -3668,7 +3581,23 @@ int main(int argc, char **argv, char **envp)
                                                      optarg, true);
                 optarg = qemu_opt_get(accel_opts, "accel");
                 if (!optarg || is_help_option(optarg)) {
-                    printf("Possible accelerators: kvm, xen, hax, tcg\n");
+                    printf("Accelerators supported in QEMU binary:\n");
+                    GSList *el, *accel_list = object_class_get_list(TYPE_ACCEL,
+                                                                    false);
+                    for (el = accel_list; el; el = el->next) {
+                        gchar *typename = g_strdup(object_class_get_name(
+                                                   OBJECT_CLASS(el->data)));
+                        /* omit qtest which is used for tests only */
+                        if (g_strcmp0(typename, ACCEL_CLASS_NAME("qtest")) &&
+                            g_str_has_suffix(typename, ACCEL_CLASS_SUFFIX)) {
+                            gchar **optname = g_strsplit(typename,
+                                                         ACCEL_CLASS_SUFFIX, 0);
+                            printf("%s\n", optname[0]);
+                            g_free(optname);
+                        }
+                        g_free(typename);
+                    }
+                    g_slist_free(accel_list);
                     exit(0);
                 }
                 opts = qemu_opts_create(qemu_find_opts("machine"), NULL,
@@ -3743,37 +3672,10 @@ int main(int argc, char **argv, char **envp)
                 nb_option_roms++;
                 break;
             case QEMU_OPTION_semihosting:
-                semihosting.enabled = true;
-                semihosting.target = SEMIHOSTING_TARGET_AUTO;
+                qemu_semihosting_enable();
                 break;
             case QEMU_OPTION_semihosting_config:
-                semihosting.enabled = true;
-                opts = qemu_opts_parse_noisily(qemu_find_opts("semihosting-config"),
-                                               optarg, false);
-                if (opts != NULL) {
-                    semihosting.enabled = qemu_opt_get_bool(opts, "enable",
-                                                            true);
-                    const char *target = qemu_opt_get(opts, "target");
-                    if (target != NULL) {
-                        if (strcmp("native", target) == 0) {
-                            semihosting.target = SEMIHOSTING_TARGET_NATIVE;
-                        } else if (strcmp("gdb", target) == 0) {
-                            semihosting.target = SEMIHOSTING_TARGET_GDB;
-                        } else  if (strcmp("auto", target) == 0) {
-                            semihosting.target = SEMIHOSTING_TARGET_AUTO;
-                        } else {
-                            error_report("unsupported semihosting-config %s",
-                                         optarg);
-                            exit(1);
-                        }
-                    } else {
-                        semihosting.target = SEMIHOSTING_TARGET_AUTO;
-                    }
-                    /* Set semihosting argument count and vector */
-                    qemu_opt_foreach(opts, add_semihosting_arg,
-                                     &semihosting, NULL);
-                } else {
-                    error_report("unsupported semihosting-config %s", optarg);
+                if (qemu_semihosting_config_options(optarg) != 0) {
                     exit(1);
                 }
                 break;
@@ -4290,6 +4192,8 @@ int main(int argc, char **argv, char **envp)
 
     qemu_opts_foreach(qemu_find_opts("chardev"),
                       chardev_init_func, NULL, &error_fatal);
+    /* now chardevs have been created we may have semihosting to connect */
+    qemu_semihosting_connect_chardevs();
 
 #ifdef CONFIG_VIRTFS
     qemu_opts_foreach(qemu_find_opts("fsdev"),