summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.patchew.yml302
-rw-r--r--MAINTAINERS11
-rw-r--r--Makefile24
-rw-r--r--Makefile.objs1
-rw-r--r--block.c125
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/backup.c246
-rw-r--r--block/block-backend.c50
-rw-r--r--block/commit.c13
-rw-r--r--block/crypto.c3
-rw-r--r--block/dirty-bitmap.c9
-rw-r--r--block/io.c28
-rw-r--r--block/linux-aio.c72
-rw-r--r--block/mirror.c4
-rw-r--r--block/parallels.c3
-rw-r--r--block/qcow.c3
-rw-r--r--block/qcow2-bitmap.c3
-rw-r--r--block/qcow2-cache.c1
-rw-r--r--block/qcow2-cluster.c10
-rw-r--r--block/qcow2-refcount.c40
-rw-r--r--block/qcow2-snapshot.c1
-rw-r--r--block/qcow2-threads.c268
-rw-r--r--block/qcow2.c337
-rw-r--r--block/qcow2.h30
-rw-r--r--block/qed.c3
-rw-r--r--block/quorum.c1
-rw-r--r--block/sheepdog.c3
-rw-r--r--block/trace-events4
-rw-r--r--block/vdi.c3
-rw-r--r--block/vhdx.c3
-rw-r--r--block/vmdk.c3
-rw-r--r--block/vpc.c3
-rw-r--r--blockdev.c118
-rw-r--r--blockjob.c14
-rwxr-xr-xconfigure76
-rw-r--r--contrib/libvhost-user/libvhost-user.c1
-rw-r--r--contrib/libvhost-user/libvhost-user.h1
-rw-r--r--contrib/vhost-user-gpu/50-qemu-gpu.json.in5
-rw-r--r--contrib/vhost-user-gpu/Makefile.objs10
-rw-r--r--contrib/vhost-user-gpu/main.c1185
-rw-r--r--contrib/vhost-user-gpu/virgl.c579
-rw-r--r--contrib/vhost-user-gpu/virgl.h25
-rw-r--r--contrib/vhost-user-gpu/vugbm.c328
-rw-r--r--contrib/vhost-user-gpu/vugbm.h67
-rw-r--r--contrib/vhost-user-gpu/vugpu.h177
-rw-r--r--docs/devel/multiple-iothreads.txt4
-rw-r--r--docs/index.rst1
-rw-r--r--docs/interop/index.rst1
-rw-r--r--docs/interop/vhost-user-gpu.rst242
-rw-r--r--docs/interop/vhost-user.rst9
-rw-r--r--docs/specs/index.rst13
-rw-r--r--docs/specs/ppc-spapr-xive.rst174
-rw-r--r--docs/specs/ppc-xive.rst199
-rw-r--r--hmp.c3
-rw-r--r--hw/arm/imx25_pdk.c9
-rw-r--r--hw/block/dataplane/virtio-blk.c12
-rw-r--r--hw/block/dataplane/xen-block.c6
-rw-r--r--hw/block/fdc.c2
-rw-r--r--hw/block/nvme.c106
-rw-r--r--hw/block/nvme.h2
-rw-r--r--hw/block/trace-events2
-rw-r--r--hw/block/xen-block.c2
-rw-r--r--hw/core/machine.c5
-rw-r--r--hw/core/qdev-properties-system.c41
-rw-r--r--hw/core/sysbus.c10
-rw-r--r--hw/display/Kconfig10
-rw-r--r--hw/display/Makefile.objs5
-rw-r--r--hw/display/vhost-user-gpu-pci.c51
-rw-r--r--hw/display/vhost-user-gpu.c607
-rw-r--r--hw/display/vhost-user-vga.c52
-rw-r--r--hw/display/virtio-gpu-3d.c49
-rw-r--r--hw/display/virtio-gpu-base.c268
-rw-r--r--hw/display/virtio-gpu-pci.c55
-rw-r--r--hw/display/virtio-gpu.c450
-rw-r--r--hw/display/virtio-vga.c138
-rw-r--r--hw/display/virtio-vga.h32
-rw-r--r--hw/i386/pc.c3
-rw-r--r--hw/i386/pc_q35.c16
-rw-r--r--hw/ide/qdev.c2
-rw-r--r--hw/intc/Makefile.objs1
-rw-r--r--hw/intc/spapr_xive.c193
-rw-r--r--hw/intc/spapr_xive_kvm.c823
-rw-r--r--hw/intc/xics.c10
-rw-r--r--hw/intc/xics_kvm.c113
-rw-r--r--hw/intc/xics_spapr.c7
-rw-r--r--hw/intc/xive.c53
-rw-r--r--hw/isa/i82378.c4
-rw-r--r--hw/misc/edu.c32
-rw-r--r--hw/ppc/Kconfig5
-rw-r--r--hw/ppc/pnv.c13
-rw-r--r--hw/ppc/pnv_xscom.c18
-rw-r--r--hw/ppc/prep.c7
-rw-r--r--hw/ppc/spapr.c38
-rw-r--r--hw/ppc/spapr_caps.c13
-rw-r--r--hw/ppc/spapr_cpu_core.c2
-rw-r--r--hw/ppc/spapr_hcall.c26
-rw-r--r--hw/ppc/spapr_irq.c140
-rw-r--r--hw/ppc/spapr_rtas.c6
-rw-r--r--hw/scsi/scsi-disk.c24
-rw-r--r--hw/scsi/virtio-scsi.c25
-rw-r--r--hw/usb/core.c2
-rw-r--r--hw/usb/dev-hub.c221
-rw-r--r--hw/usb/host-libusb.c21
-rw-r--r--hw/virtio/vhost-user.c11
-rw-r--r--include/block/block.h21
-rw-r--r--include/block/block_int.h1
-rw-r--r--include/block/nvme.h2
-rw-r--r--include/block/raw-aio.h3
-rw-r--r--include/exec/memory.h17
-rw-r--r--include/hw/block/block.h7
-rw-r--r--include/hw/boards.h3
-rw-r--r--include/hw/i386/pc.h3
-rw-r--r--include/hw/ppc/spapr.h2
-rw-r--r--include/hw/ppc/spapr_irq.h2
-rw-r--r--include/hw/ppc/spapr_xive.h39
-rw-r--r--include/hw/ppc/xics.h1
-rw-r--r--include/hw/ppc/xics_spapr.h1
-rw-r--r--include/hw/ppc/xive.h14
-rw-r--r--include/hw/ppc/xive_regs.h6
-rw-r--r--include/hw/qdev-properties.h3
-rw-r--r--include/hw/scsi/scsi.h1
-rw-r--r--include/hw/sysbus.h1
-rw-r--r--include/hw/virtio/vhost-backend.h2
-rw-r--r--include/hw/virtio/virtio-gpu-bswap.h61
-rw-r--r--include/hw/virtio/virtio-gpu-pci.h40
-rw-r--r--include/hw/virtio/virtio-gpu-pixman.h45
-rw-r--r--include/hw/virtio/virtio-gpu.h92
-rw-r--r--include/sysemu/block-backend.h5
-rw-r--r--memory.c8
-rw-r--r--migration/block-dirty-bitmap.c14
-rw-r--r--migration/block.c3
-rw-r--r--migration/fd.c8
-rw-r--r--migration/fd.h2
-rw-r--r--migration/ram.c27
-rw-r--r--nbd/server.c6
-rw-r--r--qapi/block-core.json26
-rw-r--r--qemu-img.c89
-rw-r--r--rules.mak9
-rwxr-xr-xscripts/checkpatch.pl3
-rw-r--r--target/i386/cpu.c3
-rw-r--r--target/i386/cpu.h1
-rw-r--r--target/mips/dsp_helper.c40
-rw-r--r--target/mips/lmi_helper.c8
-rw-r--r--target/mips/msa_helper.c542
-rw-r--r--target/mips/translate.c152
-rw-r--r--target/ppc/helper.h12
-rw-r--r--target/ppc/int_helper.c53
-rw-r--r--target/ppc/kvm.c9
-rw-r--r--target/ppc/kvm_ppc.h6
-rw-r--r--target/ppc/trace-events2
-rw-r--r--target/ppc/translate/vmx-impl.inc.c24
-rw-r--r--target/ppc/translate/vsx-impl.inc.c86
-rw-r--r--tests/Makefile.include18
-rw-r--r--tests/docker/Makefile.include1
-rw-r--r--tests/docker/dockerfiles/debian-ppc64-cross.docker11
-rw-r--r--tests/ds1338-test.c45
-rw-r--r--tests/libqos/arm-imx25-pdk-machine.c92
-rw-r--r--tests/libqos/arm-n800-machine.c92
-rw-r--r--tests/libqos/i2c-imx.c40
-rw-r--r--tests/libqos/i2c-omap.c70
-rw-r--r--tests/libqos/i2c.c74
-rw-r--r--tests/libqos/i2c.h63
-rw-r--r--tests/libqos/qgraph.c12
-rw-r--r--tests/libqos/qgraph.h15
-rw-r--r--tests/libqtest.c99
-rw-r--r--tests/libqtest.h62
-rw-r--r--tests/migration-test.c101
-rw-r--r--tests/pca9552-test.c91
-rwxr-xr-xtests/perf/block/qcow2/convert-blockstatus71
-rwxr-xr-xtests/qemu-iotests/05124
-rw-r--r--tests/qemu-iotests/051.out3
-rw-r--r--tests/qemu-iotests/051.pc.out27
-rwxr-xr-xtests/qemu-iotests/0562
-rwxr-xr-xtests/qemu-iotests/0607
-rw-r--r--tests/qemu-iotests/060.out5
-rwxr-xr-xtests/qemu-iotests/1022
-rw-r--r--tests/qemu-iotests/102.out3
-rw-r--r--tests/qemu-iotests/141.out2
-rw-r--r--tests/qemu-iotests/144.out2
-rwxr-xr-xtests/qemu-iotests/24021
-rw-r--r--tests/qemu-iotests/240.out15
-rwxr-xr-xtests/qemu-iotests/25078
-rw-r--r--tests/qemu-iotests/250.out16
-rwxr-xr-xtests/qemu-iotests/25452
-rw-r--r--tests/qemu-iotests/254.out52
-rwxr-xr-xtests/qemu-iotests/255135
-rw-r--r--tests/qemu-iotests/255.out40
-rwxr-xr-xtests/qemu-iotests/check1
-rw-r--r--tests/qemu-iotests/group3
-rw-r--r--tests/qemu-iotests/iotests.py10
-rw-r--r--tests/qos-test.c17
-rw-r--r--tests/tcg/mips/include/test_utils_128.h2
-rw-r--r--tests/tcg/mips/include/wrappers_msa.h147
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_b.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_d.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_h.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_w.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_b.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_d.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_h.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_w.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_b.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_d.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_h.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_w.c153
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_b.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_d.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_h.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_w.c4
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_h.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_w.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_b.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_d.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_h.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_w.c155
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_w.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_b.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_d.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_h.c12
-rw-r--r--tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_w.c10
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_b.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_d.c2
-rw-r--r--tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_h.c2
-rw-r--r--tests/tcg/ppc/Makefile.include3
-rw-r--r--tests/test-bdrv-drain.c56
-rw-r--r--tests/test-bdrv-graph-mod.c6
-rw-r--r--tests/test-block-backend.c6
-rw-r--r--tests/test-block-iothread.c104
-rw-r--r--tests/test-blockjob.c2
-rw-r--r--tests/test-thread-pool.c32
-rw-r--r--tests/test-throttle.c6
-rw-r--r--tests/tmp105-test.c112
-rw-r--r--tests/virtio-scsi-test.c63
-rw-r--r--ui/spice-app.c3
-rw-r--r--util/Makefile.objs2
-rw-r--r--vl.c32
371 files changed, 16059 insertions, 2218 deletions
diff --git a/.patchew.yml b/.patchew.yml
new file mode 100644
index 0000000000..988c29261f
--- /dev/null
+++ b/.patchew.yml
@@ -0,0 +1,302 @@
+---
+# Note: this file is still unused.  It serves as a documentation for the
+# Patchew configuration in case patchew.org disappears or has to be
+# reinstalled.
+#
+# Patchew configuration is available to project administrators at
+# https://patchew.org/api/v1/projects/1/config/ and can be configured
+# to YAML using the following Python script:
+#
+#     import json
+#     import sys
+#     import ruamel.yaml
+#
+#     json_str = sys.stdin.read()
+#     yaml = ruamel.yaml.YAML()
+#     yaml.explicit_start = True
+#     data = json.loads(json_str, object_pairs_hook=ruamel.yaml.comments.CommentedMap)
+#     ruamel.yaml.scalarstring.walk_tree(data)
+#     yaml.dump(data, sys.stdout)
+
+email:
+  notifications:
+    timeouts:
+      event: TestingReport
+      enabled: true
+      to_user: false
+      reply_subject: true
+      set_reply_to: true
+      in_reply_to: true
+      reply_to_all: false
+      subject_template: none
+      to: fam@euphon.net
+      cc: ''
+      body_template: |
+        {% if not is_timeout %} {{ cancel }} {% endif %}
+
+        Test '{{ test }}' timeout, log:
+
+        {{ log }}
+    ENOSPC:
+      event: TestingReport
+      enabled: true
+      to_user: false
+      reply_subject: false
+      set_reply_to: false
+      in_reply_to: true
+      reply_to_all: false
+      subject_template: Out of space error
+      to: fam@euphon.net
+      cc: ''
+      body_template: |
+        {% if passed %}
+          {{ cancel }}
+        {% endif %}
+
+        {% if 'No space left on device' in log %}
+        Tester {{ tester }} out of space when running {{ test }}
+
+          {{ log }}
+        {% else %}
+          {{ cancel }}
+        {% endif %}
+    FailureShort:
+      event: TestingReport
+      enabled: true
+      to_user: false
+      reply_subject: true
+      set_reply_to: true
+      in_reply_to: true
+      reply_to_all: true
+      subject_template: Testing failed
+      to: ''
+      cc: ''
+      body_template: |
+        {% if passed or not obj.message_id or is_timeout %}
+          {{ cancel }}
+        {% endif %}
+        {% if 'No space left on device' in log %}
+          {{ cancel }}
+        {% endif %}
+        Patchew URL: https://patchew.org/QEMU/{{ obj.message_id }}/
+
+        {% ansi2text log as logtext %}
+        {% if test == "checkpatch" %}
+        Hi,
+
+        This series seems to have some coding style problems. See output below for
+        more information:
+
+        {{ logtext }}
+        {% elif test == "docker-mingw@fedora" or test == "docker-quick@centos7" or test == "asan" %}
+        Hi,
+
+        This series failed the {{ test }} build test. Please find the testing commands and
+        their output below. If you have Docker installed, you can probably reproduce it
+        locally.
+
+        {% lines_between logtext start="^=== TEST SCRIPT BEGIN ===$" stop="^=== TEST SCRIPT END ===$" %}
+        {% lines_between logtext start="^=== OUTPUT BEGIN ===$" stop="=== OUTPUT END ===$" as output %}
+        {% grep_C output regex="\b(FAIL|XPASS|ERROR|WARN|error:|warning:)" n=3 %}
+        {% elif test == "s390x" or test == "FreeBSD" or test == "ppcle" or test == "ppcbe" %}
+        Hi,
+
+        This series failed build test on {{test}} host. Please find the details below.
+
+        {% lines_between logtext start="^=== TEST SCRIPT BEGIN ===$" stop="^=== TEST SCRIPT END ===$" %}
+        {% lines_between logtext start="^=== OUTPUT BEGIN ===$" stop="=== OUTPUT END ===$" as output %}
+        {% grep_C output regex="\b(FAIL|XPASS|ERROR|WARN|error:|warning:)" n=3 %}
+        {% else %}
+        {{ cancel }}
+        {% endif %}
+
+        The full log is available at
+        {{ log_url }}.
+        ---
+        Email generated automatically by Patchew [https://patchew.org/].
+        Please send your feedback to patchew-devel@redhat.com
+testing:
+  tests:
+    asan:
+      enabled: true
+      requirements: docker
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        time make docker-test-debug@fedora TARGET_LIST=x86_64-softmmu J=14 NETWORK=1
+    docker-quick@centos7:
+      enabled: false
+      requirements: docker,x86_64
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
+    checkpatch:
+      enabled: true
+      requirements: ''
+      timeout: 600
+      script: |
+        #!/bin/bash
+        git rev-parse base > /dev/null || exit 0
+        git config --local diff.renamelimit 0
+        git config --local diff.renames True
+        git config --local diff.algorithm histogram
+        ./scripts/checkpatch.pl --mailback base..
+    docker-mingw@fedora:
+      enabled: true
+      requirements: docker,x86_64
+      timeout: 3600
+      script: |
+        #! /bin/bash
+        test "$(uname -m)" = "x86_64"
+    ppcle:
+      enabled: false
+      requirements: ppcle
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        # Testing script will be invoked under the git checkout with
+        # HEAD pointing to a commit that has the patches applied on top of "base"
+        # branch
+        set -e
+        CC=$HOME/bin/cc
+        INSTALL=$PWD/install
+        BUILD=$PWD/build
+        mkdir -p $BUILD $INSTALL
+        SRC=$PWD
+        cd $BUILD
+        $SRC/configure --cc=$CC --prefix=$INSTALL
+        make -j4
+        # XXX: we need reliable clean up
+        # make check -j4 V=1
+        make install
+
+        echo
+        echo "=== ENV ==="
+        env
+
+        echo
+        echo "=== PACKAGES ==="
+        rpm -qa
+    ppcbe:
+      enabled: false
+      requirements: ppcbe
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        # Testing script will be invoked under the git checkout with
+        # HEAD pointing to a commit that has the patches applied on top of "base"
+        # branch
+        set -e
+        CC=$HOME/bin/cc
+        INSTALL=$PWD/install
+        BUILD=$PWD/build
+        mkdir -p $BUILD $INSTALL
+        SRC=$PWD
+        cd $BUILD
+        $SRC/configure --cc=$CC --prefix=$INSTALL
+        make -j4
+        # XXX: we need reliable clean up
+        # make check -j4 V=1
+        make install
+
+        echo
+        echo "=== ENV ==="
+        env
+
+        echo
+        echo "=== PACKAGES ==="
+        rpm -qa
+    FreeBSD:
+      enabled: true
+      requirements: qemu-x86,x86_64,git
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        # Testing script will be invoked under the git checkout with
+        # HEAD pointing to a commit that has the patches applied on top of "base"
+        # branch
+        if qemu-system-x86_64 --help >/dev/null 2>&1; then
+          QEMU=qemu-system-x86_64
+        elif /usr/libexec/qemu-kvm --help >/dev/null 2>&1; then
+          QEMU=/usr/libexec/qemu-kvm
+        else
+          exit 1
+        fi
+        make vm-build-freebsd J=21 QEMU=$QEMU
+        exit 0
+    docker-clang@ubuntu:
+      enabled: true
+      requirements: docker,x86_64
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        time make docker-test-clang@ubuntu SHOW_ENV=1 J=14 NETWORK=1
+    s390x:
+      enabled: true
+      requirements: s390x
+      timeout: 3600
+      script: |
+        #!/bin/bash
+        # Testing script will be invoked under the git checkout with
+        # HEAD pointing to a commit that has the patches applied on top of "base"
+        # branch
+        set -e
+        CC=$HOME/bin/cc
+        INSTALL=$PWD/install
+        BUILD=$PWD/build
+        mkdir -p $BUILD $INSTALL
+        SRC=$PWD
+        cd $BUILD
+        $SRC/configure --cc=$CC --prefix=$INSTALL
+        make -j4
+        # XXX: we need reliable clean up
+        # make check -j4 V=1
+        make install
+
+        echo
+        echo "=== ENV ==="
+        env
+
+        echo
+        echo "=== PACKAGES ==="
+        rpm -qa
+  requirements:
+    x86_64:
+      script: |
+        #! /bin/bash
+        test "$(uname -m)" = "x86_64"
+    qemu-x86:
+      script: |
+        #!/bin/bash
+        if qemu-system-x86_64 --help >/dev/null 2>&1; then
+          :
+        elif /usr/libexec/qemu-kvm --help >/dev/null 2>&1; then
+          :
+        else
+          exit 1
+        fi
+    ppcle:
+      script: |
+        #!/bin/bash
+        test "$(uname -m)" = "ppc64le"
+    ppcbe:
+      script: |
+        #!/bin/bash
+        test "$(uname -m)" = "ppc64"
+    git:
+      script: |
+        #! /bin/bash
+        git config user.name > /dev/null 2>&1
+    docker:
+      script: |
+        #!/bin/bash
+        docker ps || sudo -n docker ps
+    s390x:
+      script: |
+        #!/bin/bash
+        test "$(uname -m)" = "s390x"
+git:
+  push_to: git@github.com:patchew-project/qemu
+  public_repo: https://github.com/patchew-project/qemu
+  url_template: https://github.com/patchew-project/qemu/tree/%t
diff --git a/MAINTAINERS b/MAINTAINERS
index 1f5f8b7a2c..a96829ea83 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1675,9 +1675,17 @@ virtio-gpu
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: hw/display/virtio-gpu*
-F: hw/display/virtio-vga.c
+F: hw/display/virtio-vga.*
 F: include/hw/virtio/virtio-gpu.h
 
+vhost-user-gpu
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Maintained
+F: docs/interop/vhost-user-gpu.rst
+F: contrib/vhost-user-gpu
+F: hw/display/vhost-user-*
+
 Cirrus VGA
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Odd Fixes
@@ -1712,6 +1720,7 @@ L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/*/*xive*
 F: include/hw/*/*xive*
+F: docs/*/*xive*
 
 Subsystems
 ----------
diff --git a/Makefile b/Makefile
index f0be624f47..8e2fc6624c 100644
--- a/Makefile
+++ b/Makefile
@@ -314,8 +314,20 @@ $(call set-vpath, $(SRC_PATH))
 
 LIBS+=-lz $(LIBS_TOOLS)
 
+vhost-user-json-y =
+HELPERS-y =
+
 HELPERS-$(call land,$(CONFIG_SOFTMMU),$(CONFIG_LINUX)) = qemu-bridge-helper$(EXESUF)
 
+ifdef CONFIG_LINUX
+ifdef CONFIG_VIRGL
+ifdef CONFIG_GBM
+HELPERS-y += vhost-user-gpu$(EXESUF)
+vhost-user-json-y += contrib/vhost-user-gpu/50-qemu-gpu.json
+endif
+endif
+endif
+
 ifdef BUILD_DOCS
 DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
 DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7
@@ -409,6 +421,7 @@ dummy := $(call unnest-vars,, \
                 vhost-user-scsi-obj-y \
                 vhost-user-blk-obj-y \
                 vhost-user-input-obj-y \
+                vhost-user-gpu-obj-y \
                 qga-vss-dll-obj-y \
                 block-obj-y \
                 block-obj-m \
@@ -426,7 +439,7 @@ dummy := $(call unnest-vars,, \
 
 include $(SRC_PATH)/tests/Makefile.include
 
-all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) $(HELPERS-y) recurse-all modules
+all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) $(HELPERS-y) recurse-all modules $(vhost-user-json-y)
 
 qemu-version.h: FORCE
 	$(call quiet-command, \
@@ -619,6 +632,9 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad"
 rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)
 
+vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a
+	$(call LINK, $^)
+
 ifdef CONFIG_VHOST_USER_INPUT
 ifdef CONFIG_LINUX
 vhost-user-input$(EXESUF): $(vhost-user-input-obj-y) libvhost-user.a libqemuutil.a
@@ -827,6 +843,12 @@ endif
 ifneq ($(HELPERS-y),)
 	$(call install-prog,$(HELPERS-y),$(DESTDIR)$(libexecdir))
 endif
+ifneq ($(vhost-user-json-y),)
+	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/vhost-user/"
+	for x in $(vhost-user-json-y); do \
+		$(INSTALL_DATA) $$x "$(DESTDIR)$(qemu_datadir)/vhost-user/"; \
+	done
+endif
 ifdef CONFIG_TRACE_SYSTEMTAP
 	$(INSTALL_PROG) "scripts/qemu-trace-stap" $(DESTDIR)$(bindir)
 endif
diff --git a/Makefile.objs b/Makefile.objs
index b61568ad06..c8337fa34b 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -123,6 +123,7 @@ vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
 vhost-user-blk-obj-y = contrib/vhost-user-blk/
 rdmacm-mux-obj-y = contrib/rdmacm-mux/
 vhost-user-input-obj-y = contrib/vhost-user-input/
+vhost-user-gpu-obj-y = contrib/vhost-user-gpu/
 
 ######################################################################
 trace-events-subdirs =
diff --git a/block.c b/block.c
index cb11537029..e3e77feee0 100644
--- a/block.c
+++ b/block.c
@@ -2235,6 +2235,10 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
         bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
         bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, &error_abort);
         bdrv_set_perm(old_bs, perm, shared_perm);
+
+        /* When the parent requiring a non-default AioContext is removed, the
+         * node moves back to the main AioContext */
+        bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
     }
 
     if (new_bs) {
@@ -2243,18 +2247,31 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
     }
 }
 
+/*
+ * This function steals the reference to child_bs from the caller.
+ * That reference is later dropped by bdrv_root_unref_child().
+ *
+ * On failure NULL is returned, errp is set and the reference to
+ * child_bs is also dropped.
+ *
+ * The caller must hold the AioContext lock @child_bs, but not that of @ctx
+ * (unless @child_bs is already in @ctx).
+ */
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildRole *child_role,
+                                  AioContext *ctx,
                                   uint64_t perm, uint64_t shared_perm,
                                   void *opaque, Error **errp)
 {
     BdrvChild *child;
+    Error *local_err = NULL;
     int ret;
 
     ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
     if (ret < 0) {
         bdrv_abort_perm_update(child_bs);
+        bdrv_unref(child_bs);
         return NULL;
     }
 
@@ -2268,12 +2285,48 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
         .opaque         = opaque,
     };
 
+    /* If the AioContexts don't match, first try to move the subtree of
+     * child_bs into the AioContext of the new parent. If this doesn't work,
+     * try moving the parent into the AioContext of child_bs instead. */
+    if (bdrv_get_aio_context(child_bs) != ctx) {
+        ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err);
+        if (ret < 0 && child_role->can_set_aio_ctx) {
+            GSList *ignore = g_slist_prepend(NULL, child);;
+            ctx = bdrv_get_aio_context(child_bs);
+            if (child_role->can_set_aio_ctx(child, ctx, &ignore, NULL)) {
+                error_free(local_err);
+                ret = 0;
+                g_slist_free(ignore);
+                ignore = g_slist_prepend(NULL, child);;
+                child_role->set_aio_ctx(child, ctx, &ignore);
+            }
+            g_slist_free(ignore);
+        }
+        if (ret < 0) {
+            error_propagate(errp, local_err);
+            g_free(child);
+            bdrv_abort_perm_update(child_bs);
+            return NULL;
+        }
+    }
+
     /* This performs the matching bdrv_set_perm() for the above check. */
     bdrv_replace_child(child, child_bs);
 
     return child;
 }
 
+/*
+ * This function transfers the reference to child_bs from the caller
+ * to parent_bs. That reference is later dropped by parent_bs on
+ * bdrv_close() or if someone calls bdrv_unref_child().
+ *
+ * On failure NULL is returned, errp is set and the reference to
+ * child_bs is also dropped.
+ *
+ * If @parent_bs and @child_bs are in different AioContexts, the caller must
+ * hold the AioContext lock for @child_bs, but not for @parent_bs.
+ */
 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              BlockDriverState *child_bs,
                              const char *child_name,
@@ -2286,11 +2339,11 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
     bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
 
     assert(parent_bs->drv);
-    assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs));
     bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
                     perm, shared_perm, &perm, &shared_perm);
 
     child = bdrv_root_attach_child(child_bs, child_name, child_role,
+                                   bdrv_get_aio_context(parent_bs),
                                    perm, shared_perm, parent_bs, errp);
     if (child == NULL) {
         return NULL;
@@ -2401,12 +2454,9 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
     /* If backing_hd was already part of bs's backing chain, and
      * inherits_from pointed recursively to bs then let's update it to
      * point directly to bs (else it will become NULL). */
-    if (update_inherits_from) {
+    if (bs->backing && update_inherits_from) {
         backing_hd->inherits_from = bs;
     }
-    if (!bs->backing) {
-        bdrv_unref(backing_hd);
-    }
 
 out:
     bdrv_refresh_limits(bs, NULL);
@@ -2504,7 +2554,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
         ret = -EINVAL;
         goto free_exit;
     }
-    bdrv_set_aio_context(backing_hd, bdrv_get_aio_context(bs));
 
     if (implicit_backing) {
         bdrv_refresh_filename(backing_hd);
@@ -2594,7 +2643,6 @@ BdrvChild *bdrv_open_child(const char *filename,
                            const BdrvChildRole *child_role,
                            bool allow_none, Error **errp)
 {
-    BdrvChild *c;
     BlockDriverState *bs;
 
     bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
@@ -2603,13 +2651,7 @@ BdrvChild *bdrv_open_child(const char *filename,
         return NULL;
     }
 
-    c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
-    if (!c) {
-        bdrv_unref(bs);
-        return NULL;
-    }
-
-    return c;
+    return bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
 }
 
 /* TODO Future callers may need to specify parent/child_role in order for
@@ -2878,7 +2920,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
             /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
              * looking at the header to guess the image format. This works even
              * in cases where a guest would not see a consistent state. */
-            file = blk_new(0, BLK_PERM_ALL);
+            file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
             blk_insert_bs(file, file_bs, &local_err);
             bdrv_unref(file_bs);
             if (local_err) {
@@ -3877,22 +3919,12 @@ static void bdrv_close(BlockDriverState *bs)
         bs->drv = NULL;
     }
 
-    bdrv_set_backing_hd(bs, NULL, &error_abort);
-
-    if (bs->file != NULL) {
-        bdrv_unref_child(bs, bs->file);
-        bs->file = NULL;
-    }
-
     QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
-        /* TODO Remove bdrv_unref() from drivers' close function and use
-         * bdrv_unref_child() here */
-        if (child->bs->inherits_from == bs) {
-            child->bs->inherits_from = NULL;
-        }
-        bdrv_detach_child(child);
+        bdrv_unref_child(bs, child);
     }
 
+    bs->backing = NULL;
+    bs->file = NULL;
     g_free(bs->opaque);
     bs->opaque = NULL;
     atomic_set(&bs->copy_on_read, 0);
@@ -4021,13 +4053,13 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
     uint64_t perm = 0, shared = BLK_PERM_ALL;
     int ret;
 
-    assert(!atomic_read(&from->in_flight));
-    assert(!atomic_read(&to->in_flight));
-
     /* Make sure that @from doesn't go away until we have successfully attached
      * all of its parents to @to. */
     bdrv_ref(from);
 
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+    bdrv_drained_begin(from);
+
     /* Put all parents into @list and calculate their cumulative permissions */
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
         assert(c->bs == from);
@@ -4068,6 +4100,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
 
 out:
     g_slist_free(list);
+    bdrv_drained_end(from);
     bdrv_unref(from);
 }
 
@@ -5756,8 +5789,17 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
     bs->walking_aio_notifiers = false;
 }
 
-/* @ignore will accumulate all visited BdrvChild object. The caller is
- * responsible for freeing the list afterwards. */
+/*
+ * Changes the AioContext used for fd handlers, timers, and BHs by this
+ * BlockDriverState and all its children and parents.
+ *
+ * The caller must own the AioContext lock for the old AioContext of bs, but it
+ * must not own the AioContext lock for new_context (unless new_context is the
+ * same as the current context of bs).
+ *
+ * @ignore will accumulate all visited BdrvChild object. The caller is
+ * responsible for freeing the list afterwards.
+ */
 void bdrv_set_aio_context_ignore(BlockDriverState *bs,
                                  AioContext *new_context, GSList **ignore)
 {
@@ -5780,10 +5822,9 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
         if (g_slist_find(*ignore, child)) {
             continue;
         }
-        if (child->role->set_aio_ctx) {
-            *ignore = g_slist_prepend(*ignore, child);
-            child->role->set_aio_ctx(child, new_context, ignore);
-        }
+        assert(child->role->set_aio_ctx);
+        *ignore = g_slist_prepend(*ignore, child);
+        child->role->set_aio_ctx(child, new_context, ignore);
     }
 
     bdrv_detach_aio_context(bs);
@@ -5797,16 +5838,6 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
     aio_context_release(new_context);
 }
 
-/* The caller must own the AioContext lock for the old AioContext of bs, but it
- * must not own the AioContext lock for new_context (unless new_context is
- * the same as the current context of bs). */
-void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
-{
-    GSList *ignore_list = NULL;
-    bdrv_set_aio_context_ignore(bs, new_context, &ignore_list);
-    g_slist_free(ignore_list);
-}
-
 static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
                                             GSList **ignore, Error **errp)
 {
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 7a81892a52..ae11605c9f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -6,7 +6,7 @@ block-obj-$(CONFIG_BOCHS) += bochs.o
 block-obj-$(CONFIG_VVFAT) += vvfat.o
 block-obj-$(CONFIG_DMG) += dmg.o
 
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o qcow2-threads.o
 block-obj-$(CONFIG_QED) += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-$(CONFIG_QED) += qed-check.o
 block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
diff --git a/block/backup.c b/block/backup.c
index 916817d8b1..715e1d3be8 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -112,7 +112,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
     int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
     int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
 
-    hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
+    assert(QEMU_IS_ALIGNED(start, job->cluster_size));
+    hbitmap_reset(job->copy_bitmap, start, job->cluster_size);
     nbytes = MIN(job->cluster_size, job->len - start);
     if (!*bounce_buffer) {
         *bounce_buffer = blk_blockalign(blk, job->cluster_size);
@@ -145,7 +146,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
 
     return nbytes;
 fail:
-    hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
+    hbitmap_set(job->copy_bitmap, start, job->cluster_size);
     return ret;
 
 }
@@ -165,16 +166,15 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
     int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
 
     assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
+    assert(QEMU_IS_ALIGNED(start, job->cluster_size));
     nbytes = MIN(job->copy_range_size, end - start);
     nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
-    hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
-                  nr_clusters);
+    hbitmap_reset(job->copy_bitmap, start, job->cluster_size * nr_clusters);
     ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
                             read_flags, write_flags);
     if (ret < 0) {
         trace_backup_do_cow_copy_range_fail(job, start, ret);
-        hbitmap_set(job->copy_bitmap, start / job->cluster_size,
-                    nr_clusters);
+        hbitmap_set(job->copy_bitmap, start, job->cluster_size * nr_clusters);
         return ret;
     }
 
@@ -202,7 +202,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
     cow_request_begin(&cow_request, job, start, end);
 
     while (start < end) {
-        if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
+        if (!hbitmap_get(job->copy_bitmap, start)) {
             trace_backup_do_cow_skip(job, start);
             start += job->cluster_size;
             continue; /* already copied */
@@ -298,12 +298,16 @@ static void backup_clean(Job *job)
     assert(s->target);
     blk_unref(s->target);
     s->target = NULL;
+
+    if (s->copy_bitmap) {
+        hbitmap_free(s->copy_bitmap);
+        s->copy_bitmap = NULL;
+    }
 }
 
 void backup_do_checkpoint(BlockJob *job, Error **errp)
 {
     BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
-    int64_t len;
 
     assert(block_job_driver(job) == &backup_job_driver);
 
@@ -313,8 +317,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
         return;
     }
 
-    len = DIV_ROUND_UP(backup_job->len, backup_job->cluster_size);
-    hbitmap_set(backup_job->copy_bitmap, 0, len);
+    hbitmap_set(backup_job->copy_bitmap, 0, backup_job->len);
 }
 
 static void backup_drain(BlockJob *job)
@@ -365,20 +368,44 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
     return false;
 }
 
-static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
+static bool bdrv_is_unallocated_range(BlockDriverState *bs,
+                                      int64_t offset, int64_t bytes)
+{
+    int64_t end = offset + bytes;
+
+    while (offset < end && !bdrv_is_allocated(bs, offset, bytes, &bytes)) {
+        if (bytes == 0) {
+            return true;
+        }
+        offset += bytes;
+        bytes = end - offset;
+    }
+
+    return offset >= end;
+}
+
+static int coroutine_fn backup_loop(BackupBlockJob *job)
 {
     int ret;
     bool error_is_read;
-    int64_t cluster;
+    int64_t offset;
     HBitmapIter hbi;
+    BlockDriverState *bs = blk_bs(job->common.blk);
 
     hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
-    while ((cluster = hbitmap_iter_next(&hbi)) != -1) {
+    while ((offset = hbitmap_iter_next(&hbi)) != -1) {
+        if (job->sync_mode == MIRROR_SYNC_MODE_TOP &&
+            bdrv_is_unallocated_range(bs, offset, job->cluster_size))
+        {
+            hbitmap_reset(job->copy_bitmap, offset, job->cluster_size);
+            continue;
+        }
+
         do {
             if (yield_and_check(job)) {
                 return 0;
             }
-            ret = backup_do_cow(job, cluster * job->cluster_size,
+            ret = backup_do_cow(job, offset,
                                 job->cluster_size, &error_is_read, false);
             if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
                            BLOCK_ERROR_ACTION_REPORT)
@@ -394,66 +421,43 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
 /* init copy_bitmap from sync_bitmap */
 static void backup_incremental_init_copy_bitmap(BackupBlockJob *job)
 {
-    BdrvDirtyBitmapIter *dbi;
-    int64_t offset;
-    int64_t end = DIV_ROUND_UP(bdrv_dirty_bitmap_size(job->sync_bitmap),
-                               job->cluster_size);
+    uint64_t offset = 0;
+    uint64_t bytes = job->len;
 
-    dbi = bdrv_dirty_iter_new(job->sync_bitmap);
-    while ((offset = bdrv_dirty_iter_next(dbi)) != -1) {
-        int64_t cluster = offset / job->cluster_size;
-        int64_t next_cluster;
-
-        offset += bdrv_dirty_bitmap_granularity(job->sync_bitmap);
-        if (offset >= bdrv_dirty_bitmap_size(job->sync_bitmap)) {
-            hbitmap_set(job->copy_bitmap, cluster, end - cluster);
-            break;
-        }
+    while (bdrv_dirty_bitmap_next_dirty_area(job->sync_bitmap,
+                                             &offset, &bytes))
+    {
+        hbitmap_set(job->copy_bitmap, offset, bytes);
 
-        offset = bdrv_dirty_bitmap_next_zero(job->sync_bitmap, offset,
-                                             UINT64_MAX);
-        if (offset == -1) {
-            hbitmap_set(job->copy_bitmap, cluster, end - cluster);
+        offset += bytes;
+        if (offset >= job->len) {
             break;
         }
-
-        next_cluster = DIV_ROUND_UP(offset, job->cluster_size);
-        hbitmap_set(job->copy_bitmap, cluster, next_cluster - cluster);
-        if (next_cluster >= end) {
-            break;
-        }
-
-        bdrv_set_dirty_iter(dbi, next_cluster * job->cluster_size);
+        bytes = job->len - offset;
     }
 
     /* TODO job_progress_set_remaining() would make more sense */
     job_progress_update(&job->common.job,
-        job->len - hbitmap_count(job->copy_bitmap) * job->cluster_size);
-
-    bdrv_dirty_iter_free(dbi);
+        job->len - hbitmap_count(job->copy_bitmap));
 }
 
 static int coroutine_fn backup_run(Job *job, Error **errp)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
     BlockDriverState *bs = blk_bs(s->common.blk);
-    int64_t offset, nb_clusters;
     int ret = 0;
 
     QLIST_INIT(&s->inflight_reqs);
     qemu_co_rwlock_init(&s->flush_rwlock);
 
-    nb_clusters = DIV_ROUND_UP(s->len, s->cluster_size);
     job_progress_set_remaining(job, s->len);
 
-    s->copy_bitmap = hbitmap_alloc(nb_clusters, 0);
     if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
         backup_incremental_init_copy_bitmap(s);
     } else {
-        hbitmap_set(s->copy_bitmap, 0, nb_clusters);
+        hbitmap_set(s->copy_bitmap, 0, s->len);
     }
 
-
     s->before_write.notify = backup_before_write_notify;
     bdrv_add_before_write_notifier(bs, &s->before_write);
 
@@ -465,68 +469,8 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
              * notify callback service CoW requests. */
             job_yield(job);
         }
-    } else if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
-        ret = backup_run_incremental(s);
     } else {
-        /* Both FULL and TOP SYNC_MODE's require copying.. */
-        for (offset = 0; offset < s->len;
-             offset += s->cluster_size) {
-            bool error_is_read;
-            int alloced = 0;
-
-            if (yield_and_check(s)) {
-                break;
-            }
-
-            if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
-                int i;
-                int64_t n;
-
-                /* Check to see if these blocks are already in the
-                 * backing file. */
-
-                for (i = 0; i < s->cluster_size;) {
-                    /* bdrv_is_allocated() only returns true/false based
-                     * on the first set of sectors it comes across that
-                     * are are all in the same state.
-                     * For that reason we must verify each sector in the
-                     * backup cluster length.  We end up copying more than
-                     * needed but at some point that is always the case. */
-                    alloced =
-                        bdrv_is_allocated(bs, offset + i,
-                                          s->cluster_size - i, &n);
-                    i += n;
-
-                    if (alloced || n == 0) {
-                        break;
-                    }
-                }
-
-                /* If the above loop never found any sectors that are in
-                 * the topmost image, skip this backup. */
-                if (alloced == 0) {
-                    continue;
-                }
-            }
-            /* FULL sync mode we copy the whole drive. */
-            if (alloced < 0) {
-                ret = alloced;
-            } else {
-                ret = backup_do_cow(s, offset, s->cluster_size,
-                                    &error_is_read, false);
-            }
-            if (ret < 0) {
-                /* Depending on error action, fail now or retry cluster */
-                BlockErrorAction action =
-                    backup_error_action(s, error_is_read, -ret);
-                if (action == BLOCK_ERROR_ACTION_REPORT) {
-                    break;
-                } else {
-                    offset -= s->cluster_size;
-                    continue;
-                }
-            }
-        }
+        ret = backup_loop(s);
     }
 
     notifier_with_return_remove(&s->before_write);
@@ -534,7 +478,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
     /* wait until pending backup_do_cow() calls have completed */
     qemu_co_rwlock_wrlock(&s->flush_rwlock);
     qemu_co_rwlock_unlock(&s->flush_rwlock);
-    hbitmap_free(s->copy_bitmap);
 
     return ret;
 }
@@ -554,6 +497,42 @@ static const BlockJobDriver backup_job_driver = {
     .drain                  = backup_drain,
 };
 
+static int64_t backup_calculate_cluster_size(BlockDriverState *target,
+                                             Error **errp)
+{
+    int ret;
+    BlockDriverInfo bdi;
+
+    /*
+     * If there is no backing file on the target, we cannot rely on COW if our
+     * backup cluster size is smaller than the target cluster size. Even for
+     * targets with a backing file, try to avoid COW if possible.
+     */
+    ret = bdrv_get_info(target, &bdi);
+    if (ret == -ENOTSUP && !target->backing) {
+        /* Cluster size is not defined */
+        warn_report("The target block device doesn't provide "
+                    "information about the block size and it doesn't have a "
+                    "backing file. The default block size of %u bytes is "
+                    "used. If the actual block size of the target exceeds "
+                    "this default, the backup may be unusable",
+                    BACKUP_CLUSTER_SIZE_DEFAULT);
+        return BACKUP_CLUSTER_SIZE_DEFAULT;
+    } else if (ret < 0 && !target->backing) {
+        error_setg_errno(errp, -ret,
+            "Couldn't determine the cluster size of the target image, "
+            "which has no backing file");
+        error_append_hint(errp,
+            "Aborting, since this may create an unusable destination image\n");
+        return ret;
+    } else if (ret < 0 && target->backing) {
+        /* Not fatal; just trudge on ahead. */
+        return BACKUP_CLUSTER_SIZE_DEFAULT;
+    }
+
+    return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+}
+
 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, int64_t speed,
                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
@@ -565,9 +544,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                   JobTxn *txn, Error **errp)
 {
     int64_t len;
-    BlockDriverInfo bdi;
     BackupBlockJob *job = NULL;
     int ret;
+    int64_t cluster_size;
+    HBitmap *copy_bitmap = NULL;
 
     assert(bs);
     assert(target);
@@ -629,6 +609,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         goto error;
     }
 
+    cluster_size = backup_calculate_cluster_size(target, errp);
+    if (cluster_size < 0) {
+        goto error;
+    }
+
+    copy_bitmap = hbitmap_alloc(len, ctz32(cluster_size));
+
     /* job->len is fixed, so we can't allow resize */
     job = block_job_create(job_id, &backup_job_driver, txn, bs,
                            BLK_PERM_CONSISTENT_READ,
@@ -640,7 +627,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     }
 
     /* The target must match the source in size, so no resize here either */
-    job->target = blk_new(BLK_PERM_WRITE,
+    job->target = blk_new(job->common.job.aio_context,
+                          BLK_PERM_WRITE,
                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
                           BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
     ret = blk_insert_bs(job->target, target, errp);
@@ -657,33 +645,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
 
     /* Detect image-fleecing (and similar) schemes */
     job->serialize_target_writes = bdrv_chain_contains(target, bs);
-
-    /* If there is no backing file on the target, we cannot rely on COW if our
-     * backup cluster size is smaller than the target cluster size. Even for
-     * targets with a backing file, try to avoid COW if possible. */
-    ret = bdrv_get_info(target, &bdi);
-    if (ret == -ENOTSUP && !target->backing) {
-        /* Cluster size is not defined */
-        warn_report("The target block device doesn't provide "
-                    "information about the block size and it doesn't have a "
-                    "backing file. The default block size of %u bytes is "
-                    "used. If the actual block size of the target exceeds "
-                    "this default, the backup may be unusable",
-                    BACKUP_CLUSTER_SIZE_DEFAULT);
-        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
-    } else if (ret < 0 && !target->backing) {
-        error_setg_errno(errp, -ret,
-            "Couldn't determine the cluster size of the target image, "
-            "which has no backing file");
-        error_append_hint(errp,
-            "Aborting, since this may create an unusable destination image\n");
-        goto error;
-    } else if (ret < 0 && target->backing) {
-        /* Not fatal; just trudge on ahead. */
-        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
-    } else {
-        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
-    }
+    job->cluster_size = cluster_size;
+    job->copy_bitmap = copy_bitmap;
+    copy_bitmap = NULL;
     job->use_copy_range = true;
     job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
                                         blk_get_max_transfer(job->target));
@@ -699,6 +663,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     return &job->common;
 
  error:
+    if (copy_bitmap) {
+        assert(!job || !job->copy_bitmap);
+        hbitmap_free(copy_bitmap);
+    }
     if (sync_bitmap) {
         bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
     }
diff --git a/block/block-backend.c b/block/block-backend.c
index 4c0a8ef88d..f5d9407d20 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -42,6 +42,7 @@ struct BlockBackend {
     char *name;
     int refcnt;
     BdrvChild *root;
+    AioContext *ctx;
     DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
     QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
     QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
@@ -322,12 +323,13 @@ static const BdrvChildRole child_root = {
  *
  * Return the new BlockBackend on success, null on failure.
  */
-BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
 {
     BlockBackend *blk;
 
     blk = g_new0(BlockBackend, 1);
     blk->refcnt = 1;
+    blk->ctx = ctx;
     blk->perm = perm;
     blk->shared_perm = shared_perm;
     blk_set_enable_write_cache(blk, true);
@@ -347,6 +349,7 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
 
 /*
  * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
+ * The new BlockBackend is in the main AioContext.
  *
  * Just as with bdrv_open(), after having called this function the reference to
  * @options belongs to the block layer (even on failure).
@@ -382,17 +385,16 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
         perm |= BLK_PERM_RESIZE;
     }
 
-    blk = blk_new(perm, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL);
     bs = bdrv_open(filename, reference, options, flags, errp);
     if (!bs) {
         blk_unref(blk);
         return NULL;
     }
 
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx,
                                        perm, BLK_PERM_ALL, blk, errp);
     if (!blk->root) {
-        bdrv_unref(bs);
         blk_unref(blk);
         return NULL;
     }
@@ -800,12 +802,12 @@ void blk_remove_bs(BlockBackend *blk)
 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
 {
     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+    bdrv_ref(bs);
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx,
                                        blk->perm, blk->shared_perm, blk, errp);
     if (blk->root == NULL) {
         return -EPERM;
     }
-    bdrv_ref(bs);
 
     notifier_list_notify(&blk->insert_bs_notifiers, blk);
     if (tgm->throttle_state) {
@@ -1857,7 +1859,14 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
 
 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
-    return bdrv_get_aio_context(blk_bs(blk));
+    BlockDriverState *bs = blk_bs(blk);
+
+    if (bs) {
+        AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
+        assert(ctx == blk->ctx);
+    }
+
+    return blk->ctx;
 }
 
 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
@@ -1866,30 +1875,37 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
     return blk_get_aio_context(blk_acb->blk);
 }
 
-static void blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
-                                   bool update_root_node)
+static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                                  bool update_root_node, Error **errp)
 {
     BlockDriverState *bs = blk_bs(blk);
     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+    int ret;
 
     if (bs) {
+        if (update_root_node) {
+            ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
+                                                 errp);
+            if (ret < 0) {
+                return ret;
+            }
+        }
         if (tgm->throttle_state) {
             bdrv_drained_begin(bs);
             throttle_group_detach_aio_context(tgm);
             throttle_group_attach_aio_context(tgm, new_context);
             bdrv_drained_end(bs);
         }
-        if (update_root_node) {
-            GSList *ignore = g_slist_prepend(NULL, blk->root);
-            bdrv_set_aio_context_ignore(bs, new_context, &ignore);
-            g_slist_free(ignore);
-        }
     }
+
+    blk->ctx = new_context;
+    return 0;
 }
 
-void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                        Error **errp)
 {
-    blk_do_set_aio_context(blk, new_context, true);
+    return blk_do_set_aio_context(blk, new_context, true, errp);
 }
 
 static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
@@ -1916,7 +1932,7 @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
                                  GSList **ignore)
 {
     BlockBackend *blk = child->opaque;
-    blk_do_set_aio_context(blk, ctx, false);
+    blk_do_set_aio_context(blk, ctx, false, &error_abort);
 }
 
 void blk_add_aio_context_notifier(BlockBackend *blk,
diff --git a/block/commit.c b/block/commit.c
index 14e5bb394c..c815def89a 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -301,7 +301,6 @@ void commit_start(const char *job_id, BlockDriverState *bs,
         commit_top_bs->implicit = true;
     }
     commit_top_bs->total_sectors = top->total_sectors;
-    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top));
 
     bdrv_append(commit_top_bs, top, &local_err);
     if (local_err) {
@@ -338,7 +337,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
         goto fail;
     }
 
-    s->base = blk_new(BLK_PERM_CONSISTENT_READ
+    s->base = blk_new(s->common.job.aio_context,
+                      BLK_PERM_CONSISTENT_READ
                       | BLK_PERM_WRITE
                       | BLK_PERM_RESIZE,
                       BLK_PERM_CONSISTENT_READ
@@ -351,7 +351,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     s->base_bs = base;
 
     /* Required permissions are already taken with block_job_add_bdrv() */
-    s->top = blk_new(0, BLK_PERM_ALL);
+    s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL);
     ret = blk_insert_bs(s->top, top, errp);
     if (ret < 0) {
         goto fail;
@@ -395,6 +395,7 @@ int bdrv_commit(BlockDriverState *bs)
     BlockDriverState *backing_file_bs = NULL;
     BlockDriverState *commit_top_bs = NULL;
     BlockDriver *drv = bs->drv;
+    AioContext *ctx;
     int64_t offset, length, backing_length;
     int ro;
     int64_t n;
@@ -422,8 +423,9 @@ int bdrv_commit(BlockDriverState *bs)
         }
     }
 
-    src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
-    backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    ctx = bdrv_get_aio_context(bs);
+    src = blk_new(ctx, BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+    backing = blk_new(ctx, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
 
     ret = blk_insert_bs(src, bs, &local_err);
     if (ret < 0) {
@@ -440,7 +442,6 @@ int bdrv_commit(BlockDriverState *bs)
         error_report_err(local_err);
         goto ro_cleanup;
     }
-    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs));
 
     bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
     bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
diff --git a/block/crypto.c b/block/crypto.c
index 3af46b805f..7351fd479d 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -257,7 +257,8 @@ static int block_crypto_co_create_generic(BlockDriverState *bs,
     QCryptoBlock *crypto = NULL;
     struct BlockCryptoCreateData data;
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
 
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 59e6ebb861..49646a30e6 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -816,10 +816,10 @@ void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
 {
     bool ret;
 
-    /* only bitmaps from one bds are supported */
-    assert(dest->mutex == src->mutex);
-
     qemu_mutex_lock(dest->mutex);
+    if (src->mutex != dest->mutex) {
+        qemu_mutex_lock(src->mutex);
+    }
 
     if (bdrv_dirty_bitmap_check(dest, BDRV_BITMAP_DEFAULT, errp)) {
         goto out;
@@ -845,4 +845,7 @@ void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
 
 out:
     qemu_mutex_unlock(dest->mutex);
+    if (src->mutex != dest->mutex) {
+        qemu_mutex_unlock(src->mutex);
+    }
 }
diff --git a/block/io.c b/block/io.c
index 3134a60a48..9ba1bada36 100644
--- a/block/io.c
+++ b/block/io.c
@@ -422,11 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
         return;
     }
     assert(bs->quiesce_counter > 0);
-    old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
 
     /* Re-enable things in child-to-parent order */
     bdrv_drain_invoke(bs, false);
     bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
+
+    old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
     if (old_quiesce_counter == 1) {
         aio_enable_external(bdrv_get_aio_context(bs));
     }
@@ -2092,6 +2093,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
      */
     assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
            align > offset - aligned_offset);
+    if (ret & BDRV_BLOCK_RECURSE) {
+        assert(ret & BDRV_BLOCK_DATA);
+        assert(ret & BDRV_BLOCK_OFFSET_VALID);
+        assert(!(ret & BDRV_BLOCK_ZERO));
+    }
+
     *pnum -= offset - aligned_offset;
     if (*pnum > bytes) {
         *pnum = bytes;
@@ -2122,7 +2129,8 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
         }
     }
 
-    if (want_zero && local_file && local_file != bs &&
+    if (want_zero && ret & BDRV_BLOCK_RECURSE &&
+        local_file && local_file != bs &&
         (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
         (ret & BDRV_BLOCK_OFFSET_VALID)) {
         int64_t file_pnum;
@@ -2624,7 +2632,7 @@ int bdrv_flush(BlockDriverState *bs)
 typedef struct DiscardCo {
     BdrvChild *child;
     int64_t offset;
-    int bytes;
+    int64_t bytes;
     int ret;
 } DiscardCo;
 static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
@@ -2635,14 +2643,15 @@ static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
     aio_wait_kick();
 }
 
-int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes)
+int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
+                                  int64_t bytes)
 {
     BdrvTrackedRequest req;
     int max_pdiscard, ret;
     int head, tail, align;
     BlockDriverState *bs = child->bs;
 
-    if (!bs || !bs->drv) {
+    if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
         return -ENOMEDIUM;
     }
 
@@ -2650,9 +2659,8 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes)
         return -EPERM;
     }
 
-    ret = bdrv_check_byte_request(bs, offset, bytes);
-    if (ret < 0) {
-        return ret;
+    if (offset < 0 || bytes < 0 || bytes > INT64_MAX - offset) {
+        return -EIO;
     }
 
     /* Do nothing if disabled.  */
@@ -2687,7 +2695,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes)
     assert(max_pdiscard >= bs->bl.request_alignment);
 
     while (bytes > 0) {
-        int num = bytes;
+        int64_t num = bytes;
 
         if (head) {
             /* Make small requests to get to alignment boundaries. */
@@ -2749,7 +2757,7 @@ out:
     return ret;
 }
 
-int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes)
+int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
 {
     Coroutine *co;
     DiscardCo rwco = {
diff --git a/block/linux-aio.c b/block/linux-aio.c
index d4b61fb251..27100c2fd1 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -30,7 +30,6 @@
 #define MAX_EVENTS 128
 
 struct qemu_laiocb {
-    BlockAIOCB common;
     Coroutine *co;
     LinuxAioState *ctx;
     struct iocb iocb;
@@ -72,7 +71,7 @@ static inline ssize_t io_event_ret(struct io_event *ev)
 }
 
 /*
- * Completes an AIO request (calls the callback and frees the ACB).
+ * Completes an AIO request.
  */
 static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
 {
@@ -94,18 +93,15 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
     }
 
     laiocb->ret = ret;
-    if (laiocb->co) {
-        /* If the coroutine is already entered it must be in ioq_submit() and
-         * will notice laio->ret has been filled in when it eventually runs
-         * later.  Coroutines cannot be entered recursively so avoid doing
-         * that!
-         */
-        if (!qemu_coroutine_entered(laiocb->co)) {
-            aio_co_wake(laiocb->co);
-        }
-    } else {
-        laiocb->common.cb(laiocb->common.opaque, ret);
-        qemu_aio_unref(laiocb);
+
+    /*
+     * If the coroutine is already entered it must be in ioq_submit() and
+     * will notice laio->ret has been filled in when it eventually runs
+     * later.  Coroutines cannot be entered recursively so avoid doing
+     * that!
+     */
+    if (!qemu_coroutine_entered(laiocb->co)) {
+        aio_co_wake(laiocb->co);
     }
 }
 
@@ -273,30 +269,6 @@ static bool qemu_laio_poll_cb(void *opaque)
     return true;
 }
 
-static void laio_cancel(BlockAIOCB *blockacb)
-{
-    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
-    struct io_event event;
-    int ret;
-
-    if (laiocb->ret != -EINPROGRESS) {
-        return;
-    }
-    ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
-    laiocb->ret = -ECANCELED;
-    if (ret != 0) {
-        /* iocb is not cancelled, cb will be called by the event loop later */
-        return;
-    }
-
-    laiocb->common.cb(laiocb->common.opaque, laiocb->ret);
-}
-
-static const AIOCBInfo laio_aiocb_info = {
-    .aiocb_size         = sizeof(struct qemu_laiocb),
-    .cancel_async       = laio_cancel,
-};
-
 static void ioq_init(LaioQueue *io_q)
 {
     QSIMPLEQ_INIT(&io_q->pending);
@@ -431,30 +403,6 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
     return laiocb.ret;
 }
 
-BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
-{
-    struct qemu_laiocb *laiocb;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
-    int ret;
-
-    laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
-    laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE;
-    laiocb->ctx = s;
-    laiocb->ret = -EINPROGRESS;
-    laiocb->is_read = (type == QEMU_AIO_READ);
-    laiocb->qiov = qiov;
-
-    ret = laio_do_submit(fd, laiocb, offset, type);
-    if (ret < 0) {
-        qemu_aio_unref(laiocb);
-        return NULL;
-    }
-
-    return &laiocb->common;
-}
-
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
     aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
diff --git a/block/mirror.c b/block/mirror.c
index ec4bd9f404..f8bdb5b21b 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1543,7 +1543,6 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                                           BDRV_REQ_NO_FALLBACK;
     bs_opaque = g_new0(MirrorBDSOpaque, 1);
     mirror_top_bs->opaque = bs_opaque;
-    bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
 
     /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
      * it alive until block_job_create() succeeds even if bs has no parent. */
@@ -1584,7 +1583,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
      * We can allow anything except resize there.*/
     target_is_backing = bdrv_chain_contains(bs, target);
     target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
-    s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
+    s->target = blk_new(s->common.job.aio_context,
+                        BLK_PERM_WRITE | BLK_PERM_RESIZE |
                         (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
                         BLK_PERM_WRITE_UNCHANGED |
                         (target_is_backing ? BLK_PERM_CONSISTENT_READ |
diff --git a/block/parallels.c b/block/parallels.c
index 2747400577..00fae125d1 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -554,7 +554,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
         return -EIO;
     }
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto out;
diff --git a/block/qcow.c b/block/qcow.c
index 1bb8fd05e2..6dee5bb792 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -844,7 +844,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
         return -EIO;
     }
 
-    qcow_blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    qcow_blk = blk_new(bdrv_get_aio_context(bs),
+                       BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(qcow_blk, bs, errp);
     if (ret < 0) {
         goto exit;
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 8a75366c92..b2487101ed 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -29,7 +29,6 @@
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 
-#include "block/block_int.h"
 #include "qcow2.h"
 
 /* NOTICE: BME here means Bitmaps Extension and used as a namespace for
@@ -754,7 +753,7 @@ static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
         dir_offset = *offset;
     }
 
-    dir = g_try_malloc(dir_size);
+    dir = g_try_malloc0(dir_size);
     if (dir == NULL) {
         return -ENOMEM;
     }
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index df02e7b20a..b33bcbc984 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -23,7 +23,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "block/block_int.h"
 #include "qemu-common.h"
 #include "qcow2.h"
 #include "trace.h"
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index b36f4aa84a..cf892f37a8 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -27,7 +27,6 @@
 
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "block/block_int.h"
 #include "qcow2.h"
 #include "qemu/bswap.h"
 #include "trace.h"
@@ -472,13 +471,12 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
 {
     if (bytes && bs->encrypted) {
         BDRVQcow2State *s = bs->opaque;
-        int64_t offset = (s->crypt_physical_offset ?
-                          (cluster_offset + offset_in_cluster) :
-                          (src_cluster_offset + offset_in_cluster));
         assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
         assert((bytes & ~BDRV_SECTOR_MASK) == 0);
         assert(s->crypto);
-        if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) {
+        if (qcow2_co_encrypt(bs, cluster_offset,
+                             src_cluster_offset + offset_in_cluster,
+                             buffer, bytes) < 0) {
             return false;
         }
     }
@@ -833,7 +831,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
     assert(start->offset + start->nb_bytes <= end->offset);
     assert(!m->data_qiov || m->data_qiov->size == data_bytes);
 
-    if (start->nb_bytes == 0 && end->nb_bytes == 0) {
+    if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) {
         return 0;
     }
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 0b09d6838b..22cadd79d5 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -25,11 +25,11 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "block/block_int.h"
 #include "qcow2.h"
 #include "qemu/range.h"
 #include "qemu/bswap.h"
 #include "qemu/cutils.h"
+#include "trace.h"
 
 static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
                                     uint64_t max);
@@ -738,7 +738,11 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
 
         /* Discard is optional, ignore the return value */
         if (ret >= 0) {
-            bdrv_pdiscard(bs->file, d->offset, d->bytes);
+            int r2 = bdrv_pdiscard(bs->file, d->offset, d->bytes);
+            if (r2 < 0) {
+                trace_qcow2_process_discards_failed_region(d->offset, d->bytes,
+                                                           r2);
+            }
         }
 
         g_free(d);
@@ -3445,3 +3449,35 @@ int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
                             "There are no references in the refcount table.");
     return -EIO;
 }
+
+int qcow2_detect_metadata_preallocation(BlockDriverState *bs)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int64_t i, end_cluster, cluster_count = 0, threshold;
+    int64_t file_length, real_allocation, real_clusters;
+
+    file_length = bdrv_getlength(bs->file->bs);
+    if (file_length < 0) {
+        return file_length;
+    }
+
+    real_allocation = bdrv_get_allocated_file_size(bs->file->bs);
+    if (real_allocation < 0) {
+        return real_allocation;
+    }
+
+    real_clusters = real_allocation / s->cluster_size;
+    threshold = MAX(real_clusters * 10 / 9, real_clusters + 2);
+
+    end_cluster = size_to_clusters(s, file_length);
+    for (i = 0; i < end_cluster && cluster_count < threshold; i++) {
+        uint64_t refcount;
+        int ret = qcow2_get_refcount(bs, i, &refcount);
+        if (ret < 0) {
+            return ret;
+        }
+        cluster_count += !!refcount;
+    }
+
+    return cluster_count >= threshold;
+}
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index a6ffae89a6..d0e7fa9311 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -24,7 +24,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "block/block_int.h"
 #include "qcow2.h"
 #include "qemu/bswap.h"
 #include "qemu/error-report.h"
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
new file mode 100644
index 0000000000..3b1e63fe41
--- /dev/null
+++ b/block/qcow2-threads.c
@@ -0,0 +1,268 @@
+/*
+ * Threaded data processing for Qcow2: compression, encryption
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ * Copyright (c) 2018 Virtuozzo International GmbH. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#define ZLIB_CONST
+#include <zlib.h>
+
+#include "qcow2.h"
+#include "block/thread-pool.h"
+#include "crypto.h"
+
+static int coroutine_fn
+qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg)
+{
+    int ret;
+    BDRVQcow2State *s = bs->opaque;
+    ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+
+    qemu_co_mutex_lock(&s->lock);
+    while (s->nb_threads >= QCOW2_MAX_THREADS) {
+        qemu_co_queue_wait(&s->thread_task_queue, &s->lock);
+    }
+    s->nb_threads++;
+    qemu_co_mutex_unlock(&s->lock);
+
+    ret = thread_pool_submit_co(pool, func, arg);
+
+    qemu_co_mutex_lock(&s->lock);
+    s->nb_threads--;
+    qemu_co_queue_next(&s->thread_task_queue);
+    qemu_co_mutex_unlock(&s->lock);
+
+    return ret;
+}
+
+
+/*
+ * Compression
+ */
+
+typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size,
+                                     const void *src, size_t src_size);
+typedef struct Qcow2CompressData {
+    void *dest;
+    size_t dest_size;
+    const void *src;
+    size_t src_size;
+    ssize_t ret;
+
+    Qcow2CompressFunc func;
+} Qcow2CompressData;
+
+/*
+ * qcow2_compress()
+ *
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: compressed size on success
+ *          -ENOMEM destination buffer is not enough to store compressed data
+ *          -EIO    on any other error
+ */
+static ssize_t qcow2_compress(void *dest, size_t dest_size,
+                              const void *src, size_t src_size)
+{
+    ssize_t ret;
+    z_stream strm;
+
+    /* best compression, small window, no zlib header */
+    memset(&strm, 0, sizeof(strm));
+    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
+                       -12, 9, Z_DEFAULT_STRATEGY);
+    if (ret != Z_OK) {
+        return -EIO;
+    }
+
+    /*
+     * strm.next_in is not const in old zlib versions, such as those used on
+     * OpenBSD/NetBSD, so cast the const away
+     */
+    strm.avail_in = src_size;
+    strm.next_in = (void *) src;
+    strm.avail_out = dest_size;
+    strm.next_out = dest;
+
+    ret = deflate(&strm, Z_FINISH);
+    if (ret == Z_STREAM_END) {
+        ret = dest_size - strm.avail_out;
+    } else {
+        ret = (ret == Z_OK ? -ENOMEM : -EIO);
+    }
+
+    deflateEnd(&strm);
+
+    return ret;
+}
+
+/*
+ * qcow2_decompress()
+ *
+ * Decompress some data (not more than @src_size bytes) to produce exactly
+ * @dest_size bytes.
+ *
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: 0 on success
+ *          -1 on fail
+ */
+static ssize_t qcow2_decompress(void *dest, size_t dest_size,
+                                const void *src, size_t src_size)
+{
+    int ret = 0;
+    z_stream strm;
+
+    memset(&strm, 0, sizeof(strm));
+    strm.avail_in = src_size;
+    strm.next_in = (void *) src;
+    strm.avail_out = dest_size;
+    strm.next_out = dest;
+
+    ret = inflateInit2(&strm, -12);
+    if (ret != Z_OK) {
+        return -1;
+    }
+
+    ret = inflate(&strm, Z_FINISH);
+    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
+        /*
+         * We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
+         * @src buffer may be processed partly (because in qcow2 we know size of
+         * compressed data with precision of one sector)
+         */
+        ret = -1;
+    }
+
+    inflateEnd(&strm);
+
+    return ret;
+}
+
+static int qcow2_compress_pool_func(void *opaque)
+{
+    Qcow2CompressData *data = opaque;
+
+    data->ret = data->func(data->dest, data->dest_size,
+                           data->src, data->src_size);
+
+    return 0;
+}
+
+static ssize_t coroutine_fn
+qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+                     const void *src, size_t src_size, Qcow2CompressFunc func)
+{
+    Qcow2CompressData arg = {
+        .dest = dest,
+        .dest_size = dest_size,
+        .src = src,
+        .src_size = src_size,
+        .func = func,
+    };
+
+    qcow2_co_process(bs, qcow2_compress_pool_func, &arg);
+
+    return arg.ret;
+}
+
+ssize_t coroutine_fn
+qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+                  const void *src, size_t src_size)
+{
+    return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
+                                qcow2_compress);
+}
+
+ssize_t coroutine_fn
+qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
+                    const void *src, size_t src_size)
+{
+    return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
+                                qcow2_decompress);
+}
+
+
+/*
+ * Cryptography
+ */
+
+/*
+ * Qcow2EncDecFunc: common prototype of qcrypto_block_encrypt() and
+ * qcrypto_block_decrypt() functions.
+ */
+typedef int (*Qcow2EncDecFunc)(QCryptoBlock *block, uint64_t offset,
+                               uint8_t *buf, size_t len, Error **errp);
+
+typedef struct Qcow2EncDecData {
+    QCryptoBlock *block;
+    uint64_t offset;
+    uint8_t *buf;
+    size_t len;
+
+    Qcow2EncDecFunc func;
+} Qcow2EncDecData;
+
+static int qcow2_encdec_pool_func(void *opaque)
+{
+    Qcow2EncDecData *data = opaque;
+
+    return data->func(data->block, data->offset, data->buf, data->len, NULL);
+}
+
+static int coroutine_fn
+qcow2_co_encdec(BlockDriverState *bs, uint64_t file_cluster_offset,
+                  uint64_t offset, void *buf, size_t len, Qcow2EncDecFunc func)
+{
+    BDRVQcow2State *s = bs->opaque;
+    Qcow2EncDecData arg = {
+        .block = s->crypto,
+        .offset = s->crypt_physical_offset ?
+                      file_cluster_offset + offset_into_cluster(s, offset) :
+                      offset,
+        .buf = buf,
+        .len = len,
+        .func = func,
+    };
+
+    return qcow2_co_process(bs, qcow2_encdec_pool_func, &arg);
+}
+
+int coroutine_fn
+qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+                 uint64_t offset, void *buf, size_t len)
+{
+    return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len,
+                             qcrypto_block_encrypt);
+}
+
+int coroutine_fn
+qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+                 uint64_t offset, void *buf, size_t len)
+{
+    return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len,
+                             qcrypto_block_decrypt);
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index d39882785d..9396d490d5 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -24,10 +24,6 @@
 
 #include "qemu/osdep.h"
 
-#define ZLIB_CONST
-#include <zlib.h>
-
-#include "block/block_int.h"
 #include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
@@ -44,7 +40,6 @@
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "crypto.h"
-#include "block/thread-pool.h"
 
 /*
   Differences with QCOW:
@@ -302,7 +297,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
             }
             s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
                                            qcow2_crypto_hdr_read_func,
-                                           bs, cflags, 1, errp);
+                                           bs, cflags, QCOW2_MAX_THREADS, errp);
             if (!s->crypto) {
                 return -EINVAL;
             }
@@ -1543,7 +1538,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
                 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
             }
             s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
-                                           NULL, NULL, cflags, 1, errp);
+                                           NULL, NULL, cflags,
+                                           QCOW2_MAX_THREADS, errp);
             if (!s->crypto) {
                 ret = -EINVAL;
                 goto fail;
@@ -1698,7 +1694,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
     }
 #endif
 
-    qemu_co_queue_init(&s->compress_wait_queue);
+    qemu_co_queue_init(&s->thread_task_queue);
 
     return ret;
 
@@ -1899,6 +1895,12 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
     unsigned int bytes;
     int status = 0;
 
+    if (!s->metadata_preallocation_checked) {
+        ret = qcow2_detect_metadata_preallocation(bs);
+        s->metadata_preallocation = (ret == 1);
+        s->metadata_preallocation_checked = true;
+    }
+
     bytes = MIN(INT_MAX, count);
     qemu_co_mutex_lock(&s->lock);
     ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
@@ -1921,6 +1923,11 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
     } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
         status |= BDRV_BLOCK_DATA;
     }
+    if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) &&
+        (status & BDRV_BLOCK_OFFSET_VALID))
+    {
+        status |= BDRV_BLOCK_RECURSE;
+    }
     return status;
 }
 
@@ -1974,8 +1981,6 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
 
     qemu_iovec_init(&hd_qiov, qiov->niov);
 
-    qemu_co_mutex_lock(&s->lock);
-
     while (bytes != 0) {
 
         /* prepare next request */
@@ -1985,7 +1990,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
                             QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
         }
 
+        qemu_co_mutex_lock(&s->lock);
         ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
+        qemu_co_mutex_unlock(&s->lock);
         if (ret < 0) {
             goto fail;
         }
@@ -2000,10 +2007,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
 
             if (bs->backing) {
                 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
-                qemu_co_mutex_unlock(&s->lock);
                 ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
                                      &hd_qiov, 0);
-                qemu_co_mutex_lock(&s->lock);
                 if (ret < 0) {
                     goto fail;
                 }
@@ -2019,11 +2024,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
             break;
 
         case QCOW2_CLUSTER_COMPRESSED:
-            qemu_co_mutex_unlock(&s->lock);
             ret = qcow2_co_preadv_compressed(bs, cluster_offset,
                                              offset, cur_bytes,
                                              &hd_qiov);
-            qemu_co_mutex_lock(&s->lock);
             if (ret < 0) {
                 goto fail;
             }
@@ -2060,11 +2063,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
             }
 
             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-            qemu_co_mutex_unlock(&s->lock);
             ret = bdrv_co_preadv(s->data_file,
                                  cluster_offset + offset_in_cluster,
                                  cur_bytes, &hd_qiov, 0);
-            qemu_co_mutex_lock(&s->lock);
             if (ret < 0) {
                 goto fail;
             }
@@ -2072,13 +2073,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
                 assert(s->crypto);
                 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
                 assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-                if (qcrypto_block_decrypt(s->crypto,
-                                          (s->crypt_physical_offset ?
-                                           cluster_offset + offset_in_cluster :
-                                           offset),
-                                          cluster_data,
-                                          cur_bytes,
-                                          NULL) < 0) {
+                if (qcow2_co_decrypt(bs, cluster_offset, offset,
+                                     cluster_data, cur_bytes) < 0) {
                     ret = -EIO;
                     goto fail;
                 }
@@ -2099,8 +2095,6 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
     ret = 0;
 
 fail:
-    qemu_co_mutex_unlock(&s->lock);
-
     qemu_iovec_destroy(&hd_qiov);
     qemu_vfree(cluster_data);
 
@@ -2120,6 +2114,11 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
             continue;
         }
 
+        /* If COW regions are handled already, skip this too */
+        if (m->skip_cow) {
+            continue;
+        }
+
         /* The data (middle) region must be immediately after the
          * start region */
         if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
@@ -2145,6 +2144,80 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
     return false;
 }
 
+static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    int64_t nr;
+    return !bytes ||
+        (!bdrv_is_allocated_above(bs, NULL, offset, bytes, &nr) && nr == bytes);
+}
+
+static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
+{
+    /*
+     * This check is designed for optimization shortcut so it must be
+     * efficient.
+     * Instead of is_zero(), use is_unallocated() as it is faster (but not
+     * as accurate and can result in false negatives).
+     */
+    return is_unallocated(bs, m->offset + m->cow_start.offset,
+                          m->cow_start.nb_bytes) &&
+           is_unallocated(bs, m->offset + m->cow_end.offset,
+                          m->cow_end.nb_bytes);
+}
+
+static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
+{
+    BDRVQcow2State *s = bs->opaque;
+    QCowL2Meta *m;
+
+    if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
+        return 0;
+    }
+
+    if (bs->encrypted) {
+        return 0;
+    }
+
+    for (m = l2meta; m != NULL; m = m->next) {
+        int ret;
+
+        if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
+            continue;
+        }
+
+        if (!is_zero_cow(bs, m)) {
+            continue;
+        }
+
+        /*
+         * instead of writing zero COW buffers,
+         * efficiently zero out the whole clusters
+         */
+
+        ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset,
+                                            m->nb_clusters * s->cluster_size,
+                                            true);
+        if (ret < 0) {
+            return ret;
+        }
+
+        BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
+        ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset,
+                                    m->nb_clusters * s->cluster_size,
+                                    BDRV_REQ_NO_FALLBACK);
+        if (ret < 0) {
+            if (ret != -ENOTSUP && ret != -EAGAIN) {
+                return ret;
+            }
+            continue;
+        }
+
+        trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
+        m->skip_cow = true;
+    }
+    return 0;
+}
+
 static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                          uint64_t bytes, QEMUIOVector *qiov,
                                          int flags)
@@ -2181,11 +2254,20 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
         ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
                                          &cluster_offset, &l2meta);
         if (ret < 0) {
-            goto fail;
+            goto out_locked;
         }
 
         assert((cluster_offset & 511) == 0);
 
+        ret = qcow2_pre_write_overlap_check(bs, 0,
+                                            cluster_offset + offset_in_cluster,
+                                            cur_bytes, true);
+        if (ret < 0) {
+            goto out_locked;
+        }
+
+        qemu_co_mutex_unlock(&s->lock);
+
         qemu_iovec_reset(&hd_qiov);
         qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
 
@@ -2197,7 +2279,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                                    * s->cluster_size);
                 if (cluster_data == NULL) {
                     ret = -ENOMEM;
-                    goto fail;
+                    goto out_unlocked;
                 }
             }
 
@@ -2205,24 +2287,20 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
             qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
 
-            if (qcrypto_block_encrypt(s->crypto,
-                                      (s->crypt_physical_offset ?
-                                       cluster_offset + offset_in_cluster :
-                                       offset),
-                                      cluster_data,
-                                      cur_bytes, NULL) < 0) {
+            if (qcow2_co_encrypt(bs, cluster_offset, offset,
+                                 cluster_data, cur_bytes) < 0) {
                 ret = -EIO;
-                goto fail;
+                goto out_unlocked;
             }
 
             qemu_iovec_reset(&hd_qiov);
             qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
         }
 
-        ret = qcow2_pre_write_overlap_check(bs, 0,
-                cluster_offset + offset_in_cluster, cur_bytes, true);
+        /* Try to efficiently initialize the physical space with zeroes */
+        ret = handle_alloc_space(bs, l2meta);
         if (ret < 0) {
-            goto fail;
+            goto out_unlocked;
         }
 
         /* If we need to do COW, check if it's possible to merge the
@@ -2230,22 +2308,22 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
          * If it's not possible (or not necessary) then write the
          * guest data now. */
         if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
-            qemu_co_mutex_unlock(&s->lock);
             BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
             trace_qcow2_writev_data(qemu_coroutine_self(),
                                     cluster_offset + offset_in_cluster);
             ret = bdrv_co_pwritev(s->data_file,
                                   cluster_offset + offset_in_cluster,
                                   cur_bytes, &hd_qiov, 0);
-            qemu_co_mutex_lock(&s->lock);
             if (ret < 0) {
-                goto fail;
+                goto out_unlocked;
             }
         }
 
+        qemu_co_mutex_lock(&s->lock);
+
         ret = qcow2_handle_l2meta(bs, &l2meta, true);
         if (ret) {
-            goto fail;
+            goto out_locked;
         }
 
         bytes -= cur_bytes;
@@ -2254,8 +2332,12 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
         trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
     }
     ret = 0;
+    goto out_locked;
 
-fail:
+out_unlocked:
+    qemu_co_mutex_lock(&s->lock);
+
+out_locked:
     qcow2_handle_l2meta(bs, &l2meta, false);
 
     qemu_co_mutex_unlock(&s->lock);
@@ -3092,7 +3174,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     }
 
     /* Create BlockBackend to write to the image */
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto out;
@@ -3921,171 +4004,6 @@ fail:
     return ret;
 }
 
-/*
- * qcow2_compress()
- *
- * @dest - destination buffer, @dest_size bytes
- * @src - source buffer, @src_size bytes
- *
- * Returns: compressed size on success
- *          -ENOMEM destination buffer is not enough to store compressed data
- *          -EIO    on any other error
- */
-static ssize_t qcow2_compress(void *dest, size_t dest_size,
-                              const void *src, size_t src_size)
-{
-    ssize_t ret;
-    z_stream strm;
-
-    /* best compression, small window, no zlib header */
-    memset(&strm, 0, sizeof(strm));
-    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-                       -12, 9, Z_DEFAULT_STRATEGY);
-    if (ret != Z_OK) {
-        return -EIO;
-    }
-
-    /* strm.next_in is not const in old zlib versions, such as those used on
-     * OpenBSD/NetBSD, so cast the const away */
-    strm.avail_in = src_size;
-    strm.next_in = (void *) src;
-    strm.avail_out = dest_size;
-    strm.next_out = dest;
-
-    ret = deflate(&strm, Z_FINISH);
-    if (ret == Z_STREAM_END) {
-        ret = dest_size - strm.avail_out;
-    } else {
-        ret = (ret == Z_OK ? -ENOMEM : -EIO);
-    }
-
-    deflateEnd(&strm);
-
-    return ret;
-}
-
-/*
- * qcow2_decompress()
- *
- * Decompress some data (not more than @src_size bytes) to produce exactly
- * @dest_size bytes.
- *
- * @dest - destination buffer, @dest_size bytes
- * @src - source buffer, @src_size bytes
- *
- * Returns: 0 on success
- *          -1 on fail
- */
-static ssize_t qcow2_decompress(void *dest, size_t dest_size,
-                                const void *src, size_t src_size)
-{
-    int ret = 0;
-    z_stream strm;
-
-    memset(&strm, 0, sizeof(strm));
-    strm.avail_in = src_size;
-    strm.next_in = (void *) src;
-    strm.avail_out = dest_size;
-    strm.next_out = dest;
-
-    ret = inflateInit2(&strm, -12);
-    if (ret != Z_OK) {
-        return -1;
-    }
-
-    ret = inflate(&strm, Z_FINISH);
-    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
-        /* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
-         * @src buffer may be processed partly (because in qcow2 we know size of
-         * compressed data with precision of one sector) */
-        ret = -1;
-    }
-
-    inflateEnd(&strm);
-
-    return ret;
-}
-
-#define MAX_COMPRESS_THREADS 4
-
-typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size,
-                                     const void *src, size_t src_size);
-typedef struct Qcow2CompressData {
-    void *dest;
-    size_t dest_size;
-    const void *src;
-    size_t src_size;
-    ssize_t ret;
-
-    Qcow2CompressFunc func;
-} Qcow2CompressData;
-
-static int qcow2_compress_pool_func(void *opaque)
-{
-    Qcow2CompressData *data = opaque;
-
-    data->ret = data->func(data->dest, data->dest_size,
-                           data->src, data->src_size);
-
-    return 0;
-}
-
-static void qcow2_compress_complete(void *opaque, int ret)
-{
-    qemu_coroutine_enter(opaque);
-}
-
-static ssize_t coroutine_fn
-qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size,
-                     const void *src, size_t src_size, Qcow2CompressFunc func)
-{
-    BDRVQcow2State *s = bs->opaque;
-    BlockAIOCB *acb;
-    ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    Qcow2CompressData arg = {
-        .dest = dest,
-        .dest_size = dest_size,
-        .src = src,
-        .src_size = src_size,
-        .func = func,
-    };
-
-    while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
-        qemu_co_queue_wait(&s->compress_wait_queue, NULL);
-    }
-
-    s->nb_compress_threads++;
-    acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
-                                 qcow2_compress_complete,
-                                 qemu_coroutine_self());
-
-    if (!acb) {
-        s->nb_compress_threads--;
-        return -EINVAL;
-    }
-    qemu_coroutine_yield();
-    s->nb_compress_threads--;
-    qemu_co_queue_next(&s->compress_wait_queue);
-
-    return arg.ret;
-}
-
-static ssize_t coroutine_fn
-qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
-                  const void *src, size_t src_size)
-{
-    return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
-                                qcow2_compress);
-}
-
-static ssize_t coroutine_fn
-qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
-                    const void *src, size_t src_size)
-{
-    return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
-                                qcow2_decompress);
-}
-
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
 static coroutine_fn int
@@ -5089,7 +5007,8 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
     }
 
     if (new_size) {
-        BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+        BlockBackend *blk = blk_new(bdrv_get_aio_context(bs),
+                                    BLK_PERM_RESIZE, BLK_PERM_ALL);
         ret = blk_insert_bs(blk, bs, errp);
         if (ret < 0) {
             blk_unref(blk);
diff --git a/block/qcow2.h b/block/qcow2.h
index 8d92ef1fee..fc1b0d3c1e 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -28,6 +28,7 @@
 #include "crypto/block.h"
 #include "qemu/coroutine.h"
 #include "qemu/units.h"
+#include "block/block_int.h"
 
 //#define DEBUG_ALLOC
 //#define DEBUG_ALLOC2
@@ -267,6 +268,8 @@ typedef struct Qcow2BitmapHeaderExt {
     uint64_t bitmap_directory_offset;
 } QEMU_PACKED Qcow2BitmapHeaderExt;
 
+#define QCOW2_MAX_THREADS 4
+
 typedef struct BDRVQcow2State {
     int cluster_bits;
     int cluster_size;
@@ -349,10 +352,13 @@ typedef struct BDRVQcow2State {
     char *image_backing_format;
     char *image_data_file;
 
-    CoQueue compress_wait_queue;
-    int nb_compress_threads;
+    CoQueue thread_task_queue;
+    int nb_threads;
 
     BdrvChild *data_file;
+
+    bool metadata_preallocation_checked;
+    bool metadata_preallocation;
 } BDRVQcow2State;
 
 typedef struct Qcow2COWRegion {
@@ -402,6 +408,12 @@ typedef struct QCowL2Meta
      */
     Qcow2COWRegion cow_end;
 
+    /*
+     * Indicates that COW regions are already handled and do not require
+     * any more processing.
+     */
+    bool skip_cow;
+
     /**
      * The I/O vector with the data from the actual guest write request.
      * If non-NULL, this is meant to be merged together with the data
@@ -646,6 +658,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
                                 void *cb_opaque, Error **errp);
 int qcow2_shrink_reftable(BlockDriverState *bs);
 int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
+int qcow2_detect_metadata_preallocation(BlockDriverState *bs);
 
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
@@ -737,4 +750,17 @@ void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
                                           const char *name,
                                           Error **errp);
 
+ssize_t coroutine_fn
+qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+                  const void *src, size_t src_size);
+ssize_t coroutine_fn
+qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
+                    const void *src, size_t src_size);
+int coroutine_fn
+qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+                 uint64_t offset, void *buf, size_t len);
+int coroutine_fn
+qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+                 uint64_t offset, void *buf, size_t len);
+
 #endif
diff --git a/block/qed.c b/block/qed.c
index dcdcd62b4a..bb4f5c9863 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -649,7 +649,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
         return -EIO;
     }
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto out;
diff --git a/block/quorum.c b/block/quorum.c
index 352f729136..133ee18204 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1019,7 +1019,6 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
     child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
     if (child == NULL) {
         s->next_child_index--;
-        bdrv_unref(child_bs);
         goto out;
     }
     s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
diff --git a/block/sheepdog.c b/block/sheepdog.c
index cbdfe9ab6e..f76d6ddbbc 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1800,7 +1800,8 @@ static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size,
     void *buf = NULL;
     int ret;
 
-    blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
                   BLK_PERM_ALL);
 
     ret = blk_insert_bs(blk, bs, errp);
diff --git a/block/trace-events b/block/trace-events
index 79ccd8d824..eab51497fc 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -68,6 +68,7 @@ qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
 qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64
 qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
 qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
+qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset 0x%" PRIx64 " nb_clusters %d"
 
 # qcow2-cluster.c
 qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d"
@@ -91,6 +92,9 @@ qcow2_cache_get_done(void *co, int c, int i) "co %p is_l2_cache %d index %d"
 qcow2_cache_flush(void *co, int c) "co %p is_l2_cache %d"
 qcow2_cache_entry_flush(void *co, int c, int i) "co %p is_l2_cache %d index %d"
 
+# qcow2-refcount.c
+qcow2_process_discards_failed_region(uint64_t offset, uint64_t bytes, int ret) "offset 0x%" PRIx64 " bytes 0x%" PRIx64 " ret %d"
+
 # qed-l2-cache.c
 qed_alloc_l2_cache_entry(void *l2_cache, void *entry) "l2_cache %p entry %p"
 qed_unref_l2_cache_entry(void *entry, int ref) "entry %p ref %d"
diff --git a/block/vdi.c b/block/vdi.c
index d7ef6628e7..b9845a4cbd 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -803,7 +803,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
         goto exit;
     }
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs_file),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs_file, errp);
     if (ret < 0) {
         goto exit;
diff --git a/block/vhdx.c b/block/vhdx.c
index a143a57657..d6070b6fa8 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1900,7 +1900,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
         return -EIO;
     }
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto delete_and_exit;
diff --git a/block/vmdk.c b/block/vmdk.c
index de8cb859f8..51067c774f 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2356,7 +2356,8 @@ static BlockBackend *vmdk_co_create_cb(int64_t size, int idx,
     if (!bs) {
         return NULL;
     }
-    blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
                   BLK_PERM_ALL);
     if (blk_insert_bs(blk, bs, errp)) {
         bdrv_unref(bs);
diff --git a/block/vpc.c b/block/vpc.c
index 0c279b87c8..d4776ee8a5 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -1011,7 +1011,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
         return -EIO;
     }
 
-    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto out;
diff --git a/blockdev.c b/blockdev.c
index 79fbac8450..3f44b891eb 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -574,7 +574,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     if ((!file || !*file) && !qdict_size(bs_opts)) {
         BlockBackendRootState *blk_rs;
 
-        blk = blk_new(0, BLK_PERM_ALL);
+        blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
         blk_rs = blk_get_root_state(blk);
         blk_rs->open_flags    = bdrv_flags;
         blk_rs->read_only     = read_only;
@@ -1535,6 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common,
                              DO_UPCAST(ExternalSnapshotState, common, common);
     TransactionAction *action = common->action;
     AioContext *aio_context;
+    int ret;
 
     /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
      * purpose but a different set of parameters */
@@ -1674,7 +1675,10 @@ static void external_snapshot_prepare(BlkActionState *common,
         goto out;
     }
 
-    bdrv_set_aio_context(state->new_bs, aio_context);
+    ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp);
+    if (ret < 0) {
+        goto out;
+    }
 
     /* This removes our old bs and adds the new bs. This is an operation that
      * can fail, so we need to do it in .prepare; undoing it for abort is
@@ -2112,11 +2116,10 @@ static void block_dirty_bitmap_disable_abort(BlkActionState *common)
     }
 }
 
-static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(const char *node,
-                                                    const char *target,
-                                                    strList *bitmaps,
-                                                    HBitmap **backup,
-                                                    Error **errp);
+static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(
+        const char *node, const char *target,
+        BlockDirtyBitmapMergeSourceList *bitmaps,
+        HBitmap **backup, Error **errp);
 
 static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
                                              Error **errp)
@@ -2965,15 +2968,14 @@ void qmp_block_dirty_bitmap_disable(const char *node, const char *name,
     bdrv_disable_dirty_bitmap(bitmap);
 }
 
-static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(const char *node,
-                                                    const char *target,
-                                                    strList *bitmaps,
-                                                    HBitmap **backup,
-                                                    Error **errp)
+static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(
+        const char *node, const char *target,
+        BlockDirtyBitmapMergeSourceList *bitmaps,
+        HBitmap **backup, Error **errp)
 {
     BlockDriverState *bs;
     BdrvDirtyBitmap *dst, *src, *anon;
-    strList *lst;
+    BlockDirtyBitmapMergeSourceList *lst;
     Error *local_err = NULL;
 
     dst = block_dirty_bitmap_lookup(node, target, &bs, errp);
@@ -2988,11 +2990,28 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(const char *node,
     }
 
     for (lst = bitmaps; lst; lst = lst->next) {
-        src = bdrv_find_dirty_bitmap(bs, lst->value);
-        if (!src) {
-            error_setg(errp, "Dirty bitmap '%s' not found", lst->value);
-            dst = NULL;
-            goto out;
+        switch (lst->value->type) {
+            const char *name, *node;
+        case QTYPE_QSTRING:
+            name = lst->value->u.local;
+            src = bdrv_find_dirty_bitmap(bs, name);
+            if (!src) {
+                error_setg(errp, "Dirty bitmap '%s' not found", name);
+                dst = NULL;
+                goto out;
+            }
+            break;
+        case QTYPE_QDICT:
+            node = lst->value->u.external.node;
+            name = lst->value->u.external.name;
+            src = block_dirty_bitmap_lookup(node, name, NULL, errp);
+            if (!src) {
+                dst = NULL;
+                goto out;
+            }
+            break;
+        default:
+            abort();
         }
 
         bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err);
@@ -3012,7 +3031,8 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_merge(const char *node,
 }
 
 void qmp_block_dirty_bitmap_merge(const char *node, const char *target,
-                                  strList *bitmaps, Error **errp)
+                                  BlockDirtyBitmapMergeSourceList *bitmaps,
+                                  Error **errp)
 {
     do_block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp);
 }
@@ -3138,7 +3158,7 @@ void qmp_block_resize(bool has_device, const char *device,
         goto out;
     }
 
-    blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+    blk = blk_new(bdrv_get_aio_context(bs), BLK_PERM_RESIZE, BLK_PERM_ALL);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto out;
@@ -3424,6 +3444,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
     int flags, job_flags = JOB_DEFAULT;
     int64_t size;
     bool set_backing_hd = false;
+    int ret;
 
     if (!backup->has_speed) {
         backup->speed = 0;
@@ -3450,11 +3471,16 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
         backup->compress = false;
     }
 
-    bs = qmp_get_root_bs(backup->device, errp);
+    bs = bdrv_lookup_bs(backup->device, backup->device, errp);
     if (!bs) {
         return NULL;
     }
 
+    if (!bs->drv) {
+        error_setg(errp, "Device has no medium");
+        return NULL;
+    }
+
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
@@ -3520,13 +3546,16 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
         goto out;
     }
 
-    bdrv_set_aio_context(target_bs, aio_context);
+    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    if (ret < 0) {
+        bdrv_unref(target_bs);
+        goto out;
+    }
 
     if (set_backing_hd) {
         bdrv_set_backing_hd(target_bs, source, &local_err);
         if (local_err) {
-            bdrv_unref(target_bs);
-            goto out;
+            goto unref;
         }
     }
 
@@ -3534,11 +3563,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
         bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
         if (!bmap) {
             error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
-            bdrv_unref(target_bs);
-            goto out;
+            goto unref;
         }
         if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) {
-            goto out;
+            goto unref;
         }
     }
     if (!backup->auto_finalize) {
@@ -3552,12 +3580,13 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
                             backup->sync, bmap, backup->compress,
                             backup->on_source_error, backup->on_target_error,
                             job_flags, NULL, NULL, txn, &local_err);
-    bdrv_unref(target_bs);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
-        goto out;
+        goto unref;
     }
 
+unref:
+    bdrv_unref(target_bs);
 out:
     aio_context_release(aio_context);
     return job;
@@ -3593,6 +3622,7 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn,
     AioContext *aio_context;
     BlockJob *job = NULL;
     int job_flags = JOB_DEFAULT;
+    int ret;
 
     if (!backup->has_speed) {
         backup->speed = 0;
@@ -3629,16 +3659,9 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn,
         goto out;
     }
 
-    if (bdrv_get_aio_context(target_bs) != aio_context) {
-        if (!bdrv_has_blk(target_bs)) {
-            /* The target BDS is not attached, we can safely move it to another
-             * AioContext. */
-            bdrv_set_aio_context(target_bs, aio_context);
-        } else {
-            error_setg(errp, "Target is attached to a different thread from "
-                             "source.");
-            goto out;
-        }
+    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    if (ret < 0) {
+        goto out;
     }
 
     if (backup->has_bitmap) {
@@ -3811,6 +3834,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     int flags;
     int64_t size;
     const char *format = arg->format;
+    int ret;
 
     bs = qmp_get_root_bs(arg->device, errp);
     if (!bs) {
@@ -3911,7 +3935,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
         goto out;
     }
 
-    bdrv_set_aio_context(target_bs, aio_context);
+    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    if (ret < 0) {
+        bdrv_unref(target_bs);
+        goto out;
+    }
 
     blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
                            arg->has_replaces, arg->replaces, arg->sync,
@@ -3955,6 +3983,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
     AioContext *aio_context;
     BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
     Error *local_err = NULL;
+    int ret;
 
     bs = qmp_get_root_bs(device, errp);
     if (!bs) {
@@ -3969,7 +3998,10 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    bdrv_set_aio_context(target_bs, aio_context);
+    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    if (ret < 0) {
+        goto out;
+    }
 
     blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
                            has_replaces, replaces, sync, backing_mode,
@@ -3985,7 +4017,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                            has_auto_dismiss, auto_dismiss,
                            &local_err);
     error_propagate(errp, local_err);
-
+out:
     aio_context_release(aio_context);
 }
 
@@ -4475,7 +4507,7 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
     old_context = bdrv_get_aio_context(bs);
     aio_context_acquire(old_context);
 
-    bdrv_set_aio_context(bs, new_context);
+    bdrv_try_set_aio_context(bs, new_context, errp);
 
     aio_context_release(old_context);
 }
diff --git a/blockjob.c b/blockjob.c
index 9ca942ba01..931d675c0c 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -204,14 +204,20 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
 {
     BdrvChild *c;
 
-    c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
-                               job, errp);
+    bdrv_ref(bs);
+    if (job->job.aio_context != qemu_get_aio_context()) {
+        aio_context_release(job->job.aio_context);
+    }
+    c = bdrv_root_attach_child(bs, name, &child_job, job->job.aio_context,
+                               perm, shared_perm, job, errp);
+    if (job->job.aio_context != qemu_get_aio_context()) {
+        aio_context_acquire(job->job.aio_context);
+    }
     if (c == NULL) {
         return -EPERM;
     }
 
     job->nodes = g_slist_prepend(job->nodes, c);
-    bdrv_ref(bs);
     bdrv_op_block_all(bs, job->blocker);
 
     return 0;
@@ -392,7 +398,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
         job_id = bdrv_get_device_name(bs);
     }
 
-    blk = blk_new(perm, shared_perm);
+    blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         blk_unref(blk);
diff --git a/configure b/configure
index 528b9ff705..b091b82cb3 100755
--- a/configure
+++ b/configure
@@ -198,7 +198,7 @@ supported_kvm_target() {
         i386:i386 | i386:x86_64 | i386:x32 | \
         x86_64:i386 | x86_64:x86_64 | x86_64:x32 | \
         mips:mips | mipsel:mips | \
-        ppc:ppc | ppc64:ppc | ppc:ppc64 | ppc64:ppc64 | \
+        ppc:ppc | ppc64:ppc | ppc:ppc64 | ppc64:ppc64 | ppc64:ppc64le | \
         s390x:s390x)
             return 0
         ;;
@@ -502,8 +502,11 @@ cross_cc_arm="arm-linux-gnueabihf-gcc"
 cross_cc_cflags_armeb="-mbig-endian"
 cross_cc_i386="i386-pc-linux-gnu-gcc"
 cross_cc_cflags_i386=""
-cross_cc_powerpc="powerpc-linux-gnu-gcc"
-cross_cc_powerpc="powerpc-linux-gnu-gcc"
+cross_cc_ppc="powerpc-linux-gnu-gcc"
+cross_cc_cflags_ppc="-m32"
+cross_cc_ppc64="powerpc-linux-gnu-gcc"
+cross_cc_cflags_ppc64="-m64"
+cross_cc_ppc64le="powerpc64le-linux-gnu-gcc"
 
 enabled_cross_compilers=""
 
@@ -700,7 +703,11 @@ elif check_define __sparc__ ; then
   fi
 elif check_define _ARCH_PPC ; then
   if check_define _ARCH_PPC64 ; then
-    cpu="ppc64"
+    if check_define _LITTLE_ENDIAN ; then
+      cpu="ppc64le"
+    else
+      cpu="ppc64"
+    fi
   else
     cpu="ppc"
   fi
@@ -731,10 +738,14 @@ ARCH=
 # Note that this case should only have supported host CPUs, not guests.
 case "$cpu" in
   ppc|ppc64|s390|s390x|sparc64|x32|riscv32|riscv64)
-    cpu="$cpu"
     supported_cpu="yes"
     eval "cross_cc_${cpu}=\$host_cc"
   ;;
+  ppc64le)
+    ARCH="ppc64"
+    supported_cpu="yes"
+    cross_cc_ppc64le=$host_cc
+  ;;
   i386|i486|i586|i686|i86pc|BePC)
     cpu="i386"
     supported_cpu="yes"
@@ -1538,44 +1549,44 @@ case "$cpu" in
     ppc)
            CPU_CFLAGS="-m32"
            LDFLAGS="-m32 $LDFLAGS"
-           cross_cc_powerpc=$cc
-           cross_cc_cflags_powerpc=$CPU_CFLAGS
+           cross_cc_ppc=$cc
+           cross_cc_cflags_ppc="$CPU_CFLAGS"
            ;;
     ppc64)
            CPU_CFLAGS="-m64"
            LDFLAGS="-m64 $LDFLAGS"
            cross_cc_ppc64=$cc
-           cross_cc_cflags_ppc64=$CPU_CFLAGS
+           cross_cc_cflags_ppc64="$CPU_CFLAGS"
            ;;
     sparc)
            CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc"
            LDFLAGS="-m32 -mv8plus $LDFLAGS"
            cross_cc_sparc=$cc
-           cross_cc_cflags_sparc=$CPU_CFLAGS
+           cross_cc_cflags_sparc="$CPU_CFLAGS"
            ;;
     sparc64)
            CPU_CFLAGS="-m64 -mcpu=ultrasparc"
            LDFLAGS="-m64 $LDFLAGS"
            cross_cc_sparc64=$cc
-           cross_cc_cflags_sparc64=$CPU_CFLAGS
+           cross_cc_cflags_sparc64="$CPU_CFLAGS"
            ;;
     s390)
            CPU_CFLAGS="-m31"
            LDFLAGS="-m31 $LDFLAGS"
            cross_cc_s390=$cc
-           cross_cc_cflags_s390=$CPU_CFLAGS
+           cross_cc_cflags_s390="$CPU_CFLAGS"
            ;;
     s390x)
            CPU_CFLAGS="-m64"
            LDFLAGS="-m64 $LDFLAGS"
            cross_cc_s390x=$cc
-           cross_cc_cflags_s390x=$CPU_CFLAGS
+           cross_cc_cflags_s390x="$CPU_CFLAGS"
            ;;
     i386)
            CPU_CFLAGS="-m32"
            LDFLAGS="-m32 $LDFLAGS"
            cross_cc_i386=$cc
-           cross_cc_cflags_i386=$CPU_CFLAGS
+           cross_cc_cflags_i386="$CPU_CFLAGS"
            ;;
     x86_64)
            # ??? Only extremely old AMD cpus do not have cmpxchg16b.
@@ -1584,13 +1595,13 @@ case "$cpu" in
            CPU_CFLAGS="-m64 -mcx16"
            LDFLAGS="-m64 $LDFLAGS"
            cross_cc_x86_64=$cc
-           cross_cc_cflags_x86_64=$CPU_CFLAGS
+           cross_cc_cflags_x86_64="$CPU_CFLAGS"
            ;;
     x32)
            CPU_CFLAGS="-mx32"
            LDFLAGS="-mx32 $LDFLAGS"
            cross_cc_i386=$cc
-           cross_cc_cflags_i386=$CPU_CFLAGS
+           cross_cc_cflags_i386="$CPU_CFLAGS"
            ;;
     # No special flags required for other host CPUs
 esac
@@ -4099,6 +4110,13 @@ libs_softmmu="$libs_softmmu $fdt_libs"
 ##########################################
 # opengl probe (for sdl2, gtk, milkymist-tmu2)
 
+gbm="no"
+if $pkg_config gbm; then
+    gbm_cflags="$($pkg_config --cflags gbm)"
+    gbm_libs="$($pkg_config --libs gbm)"
+    gbm="yes"
+fi
+
 if test "$opengl" != "no" ; then
   opengl_pkgs="epoxy gbm"
   if $pkg_config $opengl_pkgs; then
@@ -6191,7 +6209,7 @@ if { test "$cpu" = "i386" || test "$cpu" = "x86_64"; } && \
         fi
     done
 fi
-if test "$cpu" = "ppc64" && test "$targetos" != "Darwin" ; then
+if test "$ARCH" = "ppc64" && test "$targetos" != "Darwin" ; then
   roms="$roms spapr-rtas"
 fi
 
@@ -6434,8 +6452,6 @@ echo "gcov              $gcov_tool"
 echo "gcov enabled      $gcov"
 echo "TPM support       $tpm"
 echo "libssh2 support   $libssh2"
-echo "TPM passthrough   $tpm_passthrough"
-echo "TPM emulator      $tpm_emulator"
 echo "QOM debugging     $qom_cast_debug"
 echo "Live block migration $live_block_migration"
 echo "lzo support       $lzo"
@@ -6964,6 +6980,13 @@ if test "$opengl" = "yes" ; then
   fi
 fi
 
+if test "$gbm" = "yes" ; then
+    echo "CONFIG_GBM=y" >> $config_host_mak
+    echo "GBM_LIBS=$gbm_libs" >> $config_host_mak
+    echo "GBM_CFLAGS=$gbm_cflags" >> $config_host_mak
+fi
+
+
 if test "$malloc_trim" = "yes" ; then
   echo "CONFIG_MALLOC_TRIM=y" >> $config_host_mak
 fi
@@ -7133,14 +7156,6 @@ fi
 
 if test "$tpm" = "yes"; then
   echo 'CONFIG_TPM=$(CONFIG_SOFTMMU)' >> $config_host_mak
-  # TPM passthrough support?
-  if test "$tpm_passthrough" = "yes"; then
-    echo "CONFIG_TPM_PASSTHROUGH=y" >> $config_host_mak
-  fi
-  # TPM emulator support?
-  if test "$tpm_emulator" = "yes"; then
-    echo "CONFIG_TPM_EMULATOR=y" >> $config_host_mak
-  fi
 fi
 
 echo "TRACE_BACKENDS=$trace_backends" >> $config_host_mak
@@ -7378,7 +7393,7 @@ if test "$linux" = "yes" ; then
   i386|x86_64|x32)
     linux_arch=x86
     ;;
-  ppc|ppc64)
+  ppc|ppc64|ppc64le)
     linux_arch=powerpc
     ;;
   s390x)
@@ -7539,7 +7554,8 @@ case "$target_name" in
   ;;
   ppc)
     gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_compiler=$cross_cc_powerpc
+    target_compiler=$cross_cc_ppc
+    target_compiler_cflags="$cross_cc_cflags_ppc"
   ;;
   ppc64)
     TARGET_BASE_ARCH=ppc
@@ -7547,6 +7563,7 @@ case "$target_name" in
     mttcg=yes
     gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml"
     target_compiler=$cross_cc_ppc64
+    target_compiler_cflags="$cross_cc_cflags_ppc64"
   ;;
   ppc64le)
     TARGET_ARCH=ppc64
@@ -7859,6 +7876,9 @@ echo "QEMU_CFLAGS+=$cflags" >> $config_target_mak
 
 done # for target in $targets
 
+echo "PIXMAN_CFLAGS=$pixman_cflags" >> $config_host_mak
+echo "PIXMAN_LIBS=$pixman_libs" >> $config_host_mak
+
 if test -n "$enabled_cross_compilers"; then
     echo
     echo "NOTE: cross-compilers enabled: $enabled_cross_compilers"
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index c56f2dfe44..443b7e08c3 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -130,6 +130,7 @@ vu_request_to_string(unsigned int req)
         REQ(VHOST_USER_POSTCOPY_END),
         REQ(VHOST_USER_GET_INFLIGHT_FD),
         REQ(VHOST_USER_SET_INFLIGHT_FD),
+        REQ(VHOST_USER_GPU_SET_SOCKET),
         REQ(VHOST_USER_MAX),
     };
 #undef REQ
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
index 78b33306e8..3b888ff0a5 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -94,6 +94,7 @@ typedef enum VhostUserRequest {
     VHOST_USER_POSTCOPY_END     = 30,
     VHOST_USER_GET_INFLIGHT_FD = 31,
     VHOST_USER_SET_INFLIGHT_FD = 32,
+    VHOST_USER_GPU_SET_SOCKET = 33,
     VHOST_USER_MAX
 } VhostUserRequest;
 
diff --git a/contrib/vhost-user-gpu/50-qemu-gpu.json.in b/contrib/vhost-user-gpu/50-qemu-gpu.json.in
new file mode 100644
index 0000000000..658b545864
--- /dev/null
+++ b/contrib/vhost-user-gpu/50-qemu-gpu.json.in
@@ -0,0 +1,5 @@
+{
+  "description": "QEMU vhost-user-gpu",
+  "type": "gpu",
+  "binary": "@libexecdir@/vhost-user-gpu",
+}
diff --git a/contrib/vhost-user-gpu/Makefile.objs b/contrib/vhost-user-gpu/Makefile.objs
new file mode 100644
index 0000000000..6170c919e4
--- /dev/null
+++ b/contrib/vhost-user-gpu/Makefile.objs
@@ -0,0 +1,10 @@
+vhost-user-gpu-obj-y = main.o virgl.o vugbm.o
+
+main.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS)
+main.o-libs := $(PIXMAN_LIBS)
+
+virgl.o-cflags := $(VIRGL_CFLAGS) $(GBM_CFLAGS)
+virgl.o-libs := $(VIRGL_LIBS)
+
+vugbm.o-cflags := $(GBM_CFLAGS)
+vugbm.o-libs := $(GBM_LIBS)
diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c
new file mode 100644
index 0000000000..f9e2146b69
--- /dev/null
+++ b/contrib/vhost-user-gpu/main.c
@@ -0,0 +1,1185 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *     Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/drm.h"
+#include "qapi/error.h"
+#include "qemu/sockets.h"
+
+#include <pixman.h>
+#include <glib-unix.h>
+
+#include "vugpu.h"
+#include "hw/virtio/virtio-gpu-bswap.h"
+#include "hw/virtio/virtio-gpu-pixman.h"
+#include "virgl.h"
+#include "vugbm.h"
+
+struct virtio_gpu_simple_resource {
+    uint32_t resource_id;
+    uint32_t width;
+    uint32_t height;
+    uint32_t format;
+    struct iovec *iov;
+    unsigned int iov_cnt;
+    uint32_t scanout_bitmask;
+    pixman_image_t *image;
+    struct vugbm_buffer buffer;
+    QTAILQ_ENTRY(virtio_gpu_simple_resource) next;
+};
+
+static gboolean opt_print_caps;
+static int opt_fdnum = -1;
+static char *opt_socket_path;
+static char *opt_render_node;
+static gboolean opt_virgl;
+
+static void vg_handle_ctrl(VuDev *dev, int qidx);
+
+static const char *
+vg_cmd_to_string(int cmd)
+{
+#define CMD(cmd) [cmd] = #cmd
+    static const char *vg_cmd_str[] = {
+        CMD(VIRTIO_GPU_UNDEFINED),
+
+        /* 2d commands */
+        CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF),
+        CMD(VIRTIO_GPU_CMD_SET_SCANOUT),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH),
+        CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING),
+        CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO),
+        CMD(VIRTIO_GPU_CMD_GET_CAPSET),
+
+        /* 3d commands */
+        CMD(VIRTIO_GPU_CMD_CTX_CREATE),
+        CMD(VIRTIO_GPU_CMD_CTX_DESTROY),
+        CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE),
+        CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE),
+        CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D),
+        CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D),
+        CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D),
+        CMD(VIRTIO_GPU_CMD_SUBMIT_3D),
+
+        /* cursor commands */
+        CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR),
+        CMD(VIRTIO_GPU_CMD_MOVE_CURSOR),
+    };
+#undef REQ
+
+    if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) {
+        return vg_cmd_str[cmd];
+    } else {
+        return "unknown";
+    }
+}
+
+static int
+vg_sock_fd_read(int sock, void *buf, ssize_t buflen)
+{
+    int ret;
+
+    do {
+        ret = read(sock, buf, buflen);
+    } while (ret < 0 && (errno == EINTR || errno == EAGAIN));
+
+    g_warn_if_fail(ret == buflen);
+    return ret;
+}
+
+static void
+vg_sock_fd_close(VuGpu *g)
+{
+    if (g->sock_fd >= 0) {
+        close(g->sock_fd);
+        g->sock_fd = -1;
+    }
+}
+
+static gboolean
+source_wait_cb(gint fd, GIOCondition condition, gpointer user_data)
+{
+    VuGpu *g = user_data;
+
+    if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) {
+        return G_SOURCE_CONTINUE;
+    }
+
+    /* resume */
+    g->wait_ok = 0;
+    vg_handle_ctrl(&g->dev.parent, 0);
+
+    return G_SOURCE_REMOVE;
+}
+
+void
+vg_wait_ok(VuGpu *g)
+{
+    assert(g->wait_ok == 0);
+    g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP,
+                               source_wait_cb, g);
+}
+
+static int
+vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd)
+{
+    ssize_t ret;
+    struct msghdr msg;
+    struct iovec iov;
+    union {
+        struct cmsghdr cmsghdr;
+        char control[CMSG_SPACE(sizeof(int))];
+    } cmsgu;
+    struct cmsghdr *cmsg;
+
+    iov.iov_base = (void *)buf;
+    iov.iov_len = buflen;
+
+    msg.msg_name = NULL;
+    msg.msg_namelen = 0;
+    msg.msg_iov = &iov;
+    msg.msg_iovlen = 1;
+
+    if (fd != -1) {
+        msg.msg_control = cmsgu.control;
+        msg.msg_controllen = sizeof(cmsgu.control);
+
+        cmsg = CMSG_FIRSTHDR(&msg);
+        cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+
+        *((int *)CMSG_DATA(cmsg)) = fd;
+    } else {
+        msg.msg_control = NULL;
+        msg.msg_controllen = 0;
+    }
+
+    do {
+        ret = sendmsg(sock, &msg, 0);
+    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+
+    g_warn_if_fail(ret == buflen);
+    return ret;
+}
+
+void
+vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd)
+{
+    if (vg_sock_fd_write(vg->sock_fd, msg,
+                         VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) {
+        vg_sock_fd_close(vg);
+    }
+}
+
+bool
+vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size,
+            gpointer payload)
+{
+    uint32_t req, flags, size;
+
+    if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 ||
+        vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 ||
+        vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) {
+        goto err;
+    }
+
+    g_return_val_if_fail(req == expect_req, false);
+    g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false);
+    g_return_val_if_fail(size == expect_size, false);
+
+    if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) {
+        goto err;
+    }
+
+    return true;
+
+err:
+    vg_sock_fd_close(g);
+    return false;
+}
+
+static struct virtio_gpu_simple_resource *
+virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id)
+{
+    struct virtio_gpu_simple_resource *res;
+
+    QTAILQ_FOREACH(res, &g->reslist, next) {
+        if (res->resource_id == resource_id) {
+            return res;
+        }
+    }
+    return NULL;
+}
+
+void
+vg_ctrl_response(VuGpu *g,
+                 struct virtio_gpu_ctrl_command *cmd,
+                 struct virtio_gpu_ctrl_hdr *resp,
+                 size_t resp_len)
+{
+    size_t s;
+
+    if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) {
+        resp->flags |= VIRTIO_GPU_FLAG_FENCE;
+        resp->fence_id = cmd->cmd_hdr.fence_id;
+        resp->ctx_id = cmd->cmd_hdr.ctx_id;
+    }
+    virtio_gpu_ctrl_hdr_bswap(resp);
+    s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len);
+    if (s != resp_len) {
+        g_critical("%s: response size incorrect %zu vs %zu",
+                   __func__, s, resp_len);
+    }
+    vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s);
+    vu_queue_notify(&g->dev.parent, cmd->vq);
+    cmd->finished = true;
+}
+
+void
+vg_ctrl_response_nodata(VuGpu *g,
+                        struct virtio_gpu_ctrl_command *cmd,
+                        enum virtio_gpu_ctrl_type type)
+{
+    struct virtio_gpu_ctrl_hdr resp = {
+        .type = type,
+    };
+
+    vg_ctrl_response(g, cmd, &resp, sizeof(resp));
+}
+
+void
+vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resp_display_info dpy_info = { {} };
+    VhostUserGpuMsg msg = {
+        .request = VHOST_USER_GPU_GET_DISPLAY_INFO,
+        .size = 0,
+    };
+
+    assert(vg->wait_ok == 0);
+
+    vg_send_msg(vg, &msg, -1);
+    if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) {
+        return;
+    }
+
+    vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info));
+}
+
+static void
+vg_resource_create_2d(VuGpu *g,
+                      struct virtio_gpu_ctrl_command *cmd)
+{
+    pixman_format_code_t pformat;
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_create_2d c2d;
+
+    VUGPU_FILL_CMD(c2d);
+    virtio_gpu_bswap_32(&c2d, sizeof(c2d));
+
+    if (c2d.resource_id == 0) {
+        g_critical("%s: resource id 0 is not allowed", __func__);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    res = virtio_gpu_find_resource(g, c2d.resource_id);
+    if (res) {
+        g_critical("%s: resource already exists %d", __func__, c2d.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    res = g_new0(struct virtio_gpu_simple_resource, 1);
+    res->width = c2d.width;
+    res->height = c2d.height;
+    res->format = c2d.format;
+    res->resource_id = c2d.resource_id;
+
+    pformat = virtio_gpu_get_pixman_format(c2d.format);
+    if (!pformat) {
+        g_critical("%s: host couldn't handle guest format %d",
+                   __func__, c2d.format);
+        g_free(res);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+    vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height);
+    res->image = pixman_image_create_bits(pformat,
+                                          c2d.width,
+                                          c2d.height,
+                                          (uint32_t *)res->buffer.mmap,
+                                          res->buffer.stride);
+    if (!res->image) {
+        g_critical("%s: resource creation failed %d %d %d",
+                   __func__, c2d.resource_id, c2d.width, c2d.height);
+        g_free(res);
+        cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY;
+        return;
+    }
+
+    QTAILQ_INSERT_HEAD(&g->reslist, res, next);
+}
+
+static void
+vg_disable_scanout(VuGpu *g, int scanout_id)
+{
+    struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id];
+    struct virtio_gpu_simple_resource *res;
+
+    if (scanout->resource_id == 0) {
+        return;
+    }
+
+    res = virtio_gpu_find_resource(g, scanout->resource_id);
+    if (res) {
+        res->scanout_bitmask &= ~(1 << scanout_id);
+    }
+
+    scanout->width = 0;
+    scanout->height = 0;
+
+    {
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_SCANOUT,
+            .size = sizeof(VhostUserGpuScanout),
+            .payload.scanout.scanout_id = scanout_id,
+        };
+        vg_send_msg(g, &msg, -1);
+    }
+}
+
+static void
+vg_resource_destroy(VuGpu *g,
+                    struct virtio_gpu_simple_resource *res)
+{
+    int i;
+
+    if (res->scanout_bitmask) {
+        for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) {
+            if (res->scanout_bitmask & (1 << i)) {
+                vg_disable_scanout(g, i);
+            }
+        }
+    }
+
+    vugbm_buffer_destroy(&res->buffer);
+    pixman_image_unref(res->image);
+    QTAILQ_REMOVE(&g->reslist, res, next);
+    g_free(res);
+}
+
+static void
+vg_resource_unref(VuGpu *g,
+                  struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_unref unref;
+
+    VUGPU_FILL_CMD(unref);
+    virtio_gpu_bswap_32(&unref, sizeof(unref));
+
+    res = virtio_gpu_find_resource(g, unref.resource_id);
+    if (!res) {
+        g_critical("%s: illegal resource specified %d",
+                   __func__, unref.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+    vg_resource_destroy(g, res);
+}
+
+int
+vg_create_mapping_iov(VuGpu *g,
+                      struct virtio_gpu_resource_attach_backing *ab,
+                      struct virtio_gpu_ctrl_command *cmd,
+                      struct iovec **iov)
+{
+    struct virtio_gpu_mem_entry *ents;
+    size_t esize, s;
+    int i;
+
+    if (ab->nr_entries > 16384) {
+        g_critical("%s: nr_entries is too big (%d > 16384)",
+                   __func__, ab->nr_entries);
+        return -1;
+    }
+
+    esize = sizeof(*ents) * ab->nr_entries;
+    ents = g_malloc(esize);
+    s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+                   sizeof(*ab), ents, esize);
+    if (s != esize) {
+        g_critical("%s: command data size incorrect %zu vs %zu",
+                   __func__, s, esize);
+        g_free(ents);
+        return -1;
+    }
+
+    *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries);
+    for (i = 0; i < ab->nr_entries; i++) {
+        uint64_t len = ents[i].length;
+        (*iov)[i].iov_len = ents[i].length;
+        (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr);
+        if (!(*iov)[i].iov_base || len != ents[i].length) {
+            g_critical("%s: resource %d element %d",
+                       __func__, ab->resource_id, i);
+            g_free(*iov);
+            g_free(ents);
+            *iov = NULL;
+            return -1;
+        }
+    }
+    g_free(ents);
+    return 0;
+}
+
+static void
+vg_resource_attach_backing(VuGpu *g,
+                           struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_attach_backing ab;
+    int ret;
+
+    VUGPU_FILL_CMD(ab);
+    virtio_gpu_bswap_32(&ab, sizeof(ab));
+
+    res = virtio_gpu_find_resource(g, ab.resource_id);
+    if (!res) {
+        g_critical("%s: illegal resource specified %d",
+                   __func__, ab.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov);
+    if (ret != 0) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        return;
+    }
+
+    res->iov_cnt = ab.nr_entries;
+}
+
+static void
+vg_resource_detach_backing(VuGpu *g,
+                           struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_detach_backing detach;
+
+    VUGPU_FILL_CMD(detach);
+    virtio_gpu_bswap_32(&detach, sizeof(detach));
+
+    res = virtio_gpu_find_resource(g, detach.resource_id);
+    if (!res || !res->iov) {
+        g_critical("%s: illegal resource specified %d",
+                   __func__, detach.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    g_free(res->iov);
+    res->iov = NULL;
+    res->iov_cnt = 0;
+}
+
+static void
+vg_transfer_to_host_2d(VuGpu *g,
+                       struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    int h;
+    uint32_t src_offset, dst_offset, stride;
+    int bpp;
+    pixman_format_code_t format;
+    struct virtio_gpu_transfer_to_host_2d t2d;
+
+    VUGPU_FILL_CMD(t2d);
+    virtio_gpu_t2d_bswap(&t2d);
+
+    res = virtio_gpu_find_resource(g, t2d.resource_id);
+    if (!res || !res->iov) {
+        g_critical("%s: illegal resource specified %d",
+                   __func__, t2d.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    if (t2d.r.x > res->width ||
+        t2d.r.y > res->height ||
+        t2d.r.width > res->width ||
+        t2d.r.height > res->height ||
+        t2d.r.x + t2d.r.width > res->width ||
+        t2d.r.y + t2d.r.height > res->height) {
+        g_critical("%s: transfer bounds outside resource"
+                   " bounds for resource %d: %d %d %d %d vs %d %d",
+                   __func__, t2d.resource_id, t2d.r.x, t2d.r.y,
+                   t2d.r.width, t2d.r.height, res->width, res->height);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    format = pixman_image_get_format(res->image);
+    bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8;
+    stride = pixman_image_get_stride(res->image);
+
+    if (t2d.offset || t2d.r.x || t2d.r.y ||
+        t2d.r.width != pixman_image_get_width(res->image)) {
+        void *img_data = pixman_image_get_data(res->image);
+        for (h = 0; h < t2d.r.height; h++) {
+            src_offset = t2d.offset + stride * h;
+            dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp);
+
+            iov_to_buf(res->iov, res->iov_cnt, src_offset,
+                       img_data
+                       + dst_offset, t2d.r.width * bpp);
+        }
+    } else {
+        iov_to_buf(res->iov, res->iov_cnt, 0,
+                   pixman_image_get_data(res->image),
+                   pixman_image_get_stride(res->image)
+                   * pixman_image_get_height(res->image));
+    }
+}
+
+static void
+vg_set_scanout(VuGpu *g,
+               struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res, *ores;
+    struct virtio_gpu_scanout *scanout;
+    struct virtio_gpu_set_scanout ss;
+    int fd;
+
+    VUGPU_FILL_CMD(ss);
+    virtio_gpu_bswap_32(&ss, sizeof(ss));
+
+    if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) {
+        g_critical("%s: illegal scanout id specified %d",
+                   __func__, ss.scanout_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+        return;
+    }
+
+    if (ss.resource_id == 0) {
+        vg_disable_scanout(g, ss.scanout_id);
+        return;
+    }
+
+    /* create a surface for this scanout */
+    res = virtio_gpu_find_resource(g, ss.resource_id);
+    if (!res) {
+        g_critical("%s: illegal resource specified %d",
+                      __func__, ss.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    if (ss.r.x > res->width ||
+        ss.r.y > res->height ||
+        ss.r.width > res->width ||
+        ss.r.height > res->height ||
+        ss.r.x + ss.r.width > res->width ||
+        ss.r.y + ss.r.height > res->height) {
+        g_critical("%s: illegal scanout %d bounds for"
+                   " resource %d, (%d,%d)+%d,%d vs %d %d",
+                   __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y,
+                   ss.r.width, ss.r.height, res->width, res->height);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    scanout = &g->scanout[ss.scanout_id];
+
+    ores = virtio_gpu_find_resource(g, scanout->resource_id);
+    if (ores) {
+        ores->scanout_bitmask &= ~(1 << ss.scanout_id);
+    }
+
+    res->scanout_bitmask |= (1 << ss.scanout_id);
+    scanout->resource_id = ss.resource_id;
+    scanout->x = ss.r.x;
+    scanout->y = ss.r.y;
+    scanout->width = ss.r.width;
+    scanout->height = ss.r.height;
+
+    struct vugbm_buffer *buffer = &res->buffer;
+
+    if (vugbm_buffer_can_get_dmabuf_fd(buffer)) {
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_DMABUF_SCANOUT,
+            .size = sizeof(VhostUserGpuDMABUFScanout),
+            .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) {
+                .scanout_id = ss.scanout_id,
+                .x = ss.r.x,
+                .y = ss.r.y,
+                .width = ss.r.width,
+                .height = ss.r.height,
+                .fd_width = buffer->width,
+                .fd_height = buffer->height,
+                .fd_stride = buffer->stride,
+                .fd_drm_fourcc = buffer->format
+            }
+        };
+
+        if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) {
+            vg_send_msg(g, &msg, fd);
+            close(fd);
+        }
+    } else {
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_SCANOUT,
+            .size = sizeof(VhostUserGpuScanout),
+            .payload.scanout = (VhostUserGpuScanout) {
+                .scanout_id = ss.scanout_id,
+                .width = scanout->width,
+                .height = scanout->height
+            }
+        };
+        vg_send_msg(g, &msg, -1);
+    }
+}
+
+static void
+vg_resource_flush(VuGpu *g,
+                  struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_flush rf;
+    pixman_region16_t flush_region;
+    int i;
+
+    VUGPU_FILL_CMD(rf);
+    virtio_gpu_bswap_32(&rf, sizeof(rf));
+
+    res = virtio_gpu_find_resource(g, rf.resource_id);
+    if (!res) {
+        g_critical("%s: illegal resource specified %d\n",
+                   __func__, rf.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    if (rf.r.x > res->width ||
+        rf.r.y > res->height ||
+        rf.r.width > res->width ||
+        rf.r.height > res->height ||
+        rf.r.x + rf.r.width > res->width ||
+        rf.r.y + rf.r.height > res->height) {
+        g_critical("%s: flush bounds outside resource"
+                   " bounds for resource %d: %d %d %d %d vs %d %d\n",
+                   __func__, rf.resource_id, rf.r.x, rf.r.y,
+                   rf.r.width, rf.r.height, res->width, res->height);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    pixman_region_init_rect(&flush_region,
+                            rf.r.x, rf.r.y, rf.r.width, rf.r.height);
+    for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) {
+        struct virtio_gpu_scanout *scanout;
+        pixman_region16_t region, finalregion;
+        pixman_box16_t *extents;
+
+        if (!(res->scanout_bitmask & (1 << i))) {
+            continue;
+        }
+        scanout = &g->scanout[i];
+
+        pixman_region_init(&finalregion);
+        pixman_region_init_rect(&region, scanout->x, scanout->y,
+                                scanout->width, scanout->height);
+
+        pixman_region_intersect(&finalregion, &flush_region, &region);
+
+        extents = pixman_region_extents(&finalregion);
+        size_t width = extents->x2 - extents->x1;
+        size_t height = extents->y2 - extents->y1;
+
+        if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) {
+            VhostUserGpuMsg vmsg = {
+                .request = VHOST_USER_GPU_DMABUF_UPDATE,
+                .size = sizeof(VhostUserGpuUpdate),
+                .payload.update = (VhostUserGpuUpdate) {
+                    .scanout_id = i,
+                    .x = extents->x1,
+                    .y = extents->y1,
+                    .width = width,
+                    .height = height,
+                }
+            };
+            vg_send_msg(g, &vmsg, -1);
+            vg_wait_ok(g);
+        } else {
+            size_t bpp =
+                PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8;
+            size_t size = width * height * bpp;
+
+            void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE +
+                               sizeof(VhostUserGpuUpdate) + size);
+            VhostUserGpuMsg *msg = p;
+            msg->request = VHOST_USER_GPU_UPDATE;
+            msg->size = sizeof(VhostUserGpuUpdate) + size;
+            msg->payload.update = (VhostUserGpuUpdate) {
+                .scanout_id = i,
+                .x = extents->x1,
+                .y = extents->y1,
+                .width = width,
+                .height = height,
+            };
+            pixman_image_t *i =
+                pixman_image_create_bits(pixman_image_get_format(res->image),
+                                         msg->payload.update.width,
+                                         msg->payload.update.height,
+                                         p + offsetof(VhostUserGpuMsg,
+                                                      payload.update.data),
+                                         width * bpp);
+            pixman_image_composite(PIXMAN_OP_SRC,
+                                   res->image, NULL, i,
+                                   extents->x1, extents->y1,
+                                   0, 0, 0, 0,
+                                   width, height);
+            pixman_image_unref(i);
+            vg_send_msg(g, msg, -1);
+            g_free(msg);
+        }
+        pixman_region_fini(&region);
+        pixman_region_fini(&finalregion);
+    }
+    pixman_region_fini(&flush_region);
+}
+
+static void
+vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd)
+{
+    switch (cmd->cmd_hdr.type) {
+    case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
+        vg_get_display_info(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
+        vg_resource_create_2d(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_UNREF:
+        vg_resource_unref(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
+        vg_resource_flush(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
+        vg_transfer_to_host_2d(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SET_SCANOUT:
+        vg_set_scanout(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
+        vg_resource_attach_backing(vg, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
+        vg_resource_detach_backing(vg, cmd);
+        break;
+    /* case VIRTIO_GPU_CMD_GET_EDID: */
+    /*     break */
+    default:
+        g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type);
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        break;
+    }
+    if (!cmd->finished) {
+        vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error :
+                                VIRTIO_GPU_RESP_OK_NODATA);
+    }
+}
+
+static void
+vg_handle_ctrl(VuDev *dev, int qidx)
+{
+    VuGpu *vg = container_of(dev, VuGpu, dev.parent);
+    VuVirtq *vq = vu_get_queue(dev, qidx);
+    struct virtio_gpu_ctrl_command *cmd = NULL;
+    size_t len;
+
+    for (;;) {
+        if (vg->wait_ok != 0) {
+            return;
+        }
+
+        cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command));
+        if (!cmd) {
+            break;
+        }
+        cmd->vq = vq;
+        cmd->error = 0;
+        cmd->finished = false;
+
+        len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+                         0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr));
+        if (len != sizeof(cmd->cmd_hdr)) {
+            g_warning("%s: command size incorrect %zu vs %zu\n",
+                      __func__, len, sizeof(cmd->cmd_hdr));
+        }
+
+        virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr);
+        g_debug("%d %s\n", cmd->cmd_hdr.type,
+                vg_cmd_to_string(cmd->cmd_hdr.type));
+
+        if (vg->virgl) {
+            vg_virgl_process_cmd(vg, cmd);
+        } else {
+            vg_process_cmd(vg, cmd);
+        }
+
+        if (!cmd->finished) {
+            QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next);
+            vg->inflight++;
+        } else {
+            g_free(cmd);
+        }
+    }
+}
+
+static void
+update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data)
+{
+    struct virtio_gpu_simple_resource *res;
+
+    res = virtio_gpu_find_resource(g, resource_id);
+    g_return_if_fail(res != NULL);
+    g_return_if_fail(pixman_image_get_width(res->image) == 64);
+    g_return_if_fail(pixman_image_get_height(res->image) == 64);
+    g_return_if_fail(
+        PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32);
+
+    memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t));
+}
+
+static void
+vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor)
+{
+    bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR;
+
+    g_debug("%s move:%d\n", G_STRFUNC, move);
+
+    if (move) {
+        VhostUserGpuMsg msg = {
+            .request = cursor->resource_id ?
+                VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE,
+            .size = sizeof(VhostUserGpuCursorPos),
+            .payload.cursor_pos = {
+                .scanout_id = cursor->pos.scanout_id,
+                .x = cursor->pos.x,
+                .y = cursor->pos.y,
+            }
+        };
+        vg_send_msg(g, &msg, -1);
+    } else {
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_CURSOR_UPDATE,
+            .size = sizeof(VhostUserGpuCursorUpdate),
+            .payload.cursor_update = {
+                .pos = {
+                    .scanout_id = cursor->pos.scanout_id,
+                    .x = cursor->pos.x,
+                    .y = cursor->pos.y,
+                },
+                .hot_x = cursor->hot_x,
+                .hot_y = cursor->hot_y,
+            }
+        };
+        if (g->virgl) {
+            vg_virgl_update_cursor_data(g, cursor->resource_id,
+                                        msg.payload.cursor_update.data);
+        } else {
+            update_cursor_data_simple(g, cursor->resource_id,
+                                      msg.payload.cursor_update.data);
+        }
+        vg_send_msg(g, &msg, -1);
+    }
+}
+
+static void
+vg_handle_cursor(VuDev *dev, int qidx)
+{
+    VuGpu *g = container_of(dev, VuGpu, dev.parent);
+    VuVirtq *vq = vu_get_queue(dev, qidx);
+    VuVirtqElement *elem;
+    size_t len;
+    struct virtio_gpu_update_cursor cursor;
+
+    for (;;) {
+        elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+        if (!elem) {
+            break;
+        }
+        g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num);
+
+        len = iov_to_buf(elem->out_sg, elem->out_num,
+                         0, &cursor, sizeof(cursor));
+        if (len != sizeof(cursor)) {
+            g_warning("%s: cursor size incorrect %zu vs %zu\n",
+                      __func__, len, sizeof(cursor));
+        } else {
+            virtio_gpu_bswap_32(&cursor, sizeof(cursor));
+            vg_process_cursor_cmd(g, &cursor);
+        }
+        vu_queue_push(dev, vq, elem, 0);
+        vu_queue_notify(dev, vq);
+        g_free(elem);
+    }
+}
+
+static void
+vg_panic(VuDev *dev, const char *msg)
+{
+    g_critical("%s\n", msg);
+    exit(1);
+}
+
+static void
+vg_queue_set_started(VuDev *dev, int qidx, bool started)
+{
+    VuVirtq *vq = vu_get_queue(dev, qidx);
+
+    g_debug("queue started %d:%d\n", qidx, started);
+
+    switch (qidx) {
+    case 0:
+        vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL);
+        break;
+    case 1:
+        vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL);
+        break;
+    default:
+        break;
+    }
+}
+
+static void
+set_gpu_protocol_features(VuGpu *g)
+{
+    uint64_t u64;
+    VhostUserGpuMsg msg = {
+        .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES
+    };
+
+    assert(g->wait_ok == 0);
+    vg_send_msg(g, &msg, -1);
+    if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) {
+        return;
+    }
+
+    msg = (VhostUserGpuMsg) {
+        .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES,
+        .size = sizeof(uint64_t),
+        .payload.u64 = 0
+    };
+    vg_send_msg(g, &msg, -1);
+}
+
+static int
+vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply)
+{
+    VuGpu *g = container_of(dev, VuGpu, dev.parent);
+
+    switch (msg->request) {
+    case VHOST_USER_GPU_SET_SOCKET: {
+        g_return_val_if_fail(msg->fd_num == 1, 1);
+        g_return_val_if_fail(g->sock_fd == -1, 1);
+        g->sock_fd = msg->fds[0];
+        set_gpu_protocol_features(g);
+        return 1;
+    }
+    default:
+        return 0;
+    }
+
+    return 0;
+}
+
+static uint64_t
+vg_get_features(VuDev *dev)
+{
+    uint64_t features = 0;
+
+    if (opt_virgl) {
+        features |= 1 << VIRTIO_GPU_F_VIRGL;
+    }
+
+    return features;
+}
+
+static void
+vg_set_features(VuDev *dev, uint64_t features)
+{
+    VuGpu *g = container_of(dev, VuGpu, dev.parent);
+    bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL);
+
+    if (virgl && !g->virgl_inited) {
+        if (!vg_virgl_init(g)) {
+            vg_panic(dev, "Failed to initialize virgl");
+        }
+        g->virgl_inited = true;
+    }
+
+    g->virgl = virgl;
+}
+
+static int
+vg_get_config(VuDev *dev, uint8_t *config, uint32_t len)
+{
+    VuGpu *g = container_of(dev, VuGpu, dev.parent);
+
+    g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1);
+
+    if (opt_virgl) {
+        g->virtio_config.num_capsets = vg_virgl_get_num_capsets();
+    }
+
+    memcpy(config, &g->virtio_config, len);
+
+    return 0;
+}
+
+static int
+vg_set_config(VuDev *dev, const uint8_t *data,
+              uint32_t offset, uint32_t size,
+              uint32_t flags)
+{
+    VuGpu *g = container_of(dev, VuGpu, dev.parent);
+    struct virtio_gpu_config *config = (struct virtio_gpu_config *)data;
+
+    if (config->events_clear) {
+        g->virtio_config.events_read &= ~config->events_clear;
+    }
+
+    return 0;
+}
+
+static const VuDevIface vuiface = {
+    .set_features = vg_set_features,
+    .get_features = vg_get_features,
+    .queue_set_started = vg_queue_set_started,
+    .process_msg = vg_process_msg,
+    .get_config = vg_get_config,
+    .set_config = vg_set_config,
+};
+
+static void
+vg_destroy(VuGpu *g)
+{
+    struct virtio_gpu_simple_resource *res, *tmp;
+
+    vug_deinit(&g->dev);
+
+    vg_sock_fd_close(g);
+
+    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
+        vg_resource_destroy(g, res);
+    }
+
+    vugbm_device_destroy(&g->gdev);
+}
+
+static GOptionEntry entries[] = {
+    { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
+      "Print capabilities", NULL },
+    { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
+      "Use inherited fd socket", "FDNUM" },
+    { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
+      "Use UNIX socket path", "PATH" },
+    { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node,
+      "Specify DRM render node", "PATH" },
+    { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl,
+      "Turn virgl rendering on", NULL },
+    { NULL, }
+};
+
+int
+main(int argc, char *argv[])
+{
+    GOptionContext *context;
+    GError *error = NULL;
+    GMainLoop *loop = NULL;
+    int fd;
+    VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 };
+
+    QTAILQ_INIT(&g.reslist);
+    QTAILQ_INIT(&g.fenceq);
+
+    context = g_option_context_new("QEMU vhost-user-gpu");
+    g_option_context_add_main_entries(context, entries, NULL);
+    if (!g_option_context_parse(context, &argc, &argv, &error)) {
+        g_printerr("Option parsing failed: %s\n", error->message);
+        exit(EXIT_FAILURE);
+    }
+    g_option_context_free(context);
+
+    if (opt_print_caps) {
+        g_print("{\n");
+        g_print("  \"type\": \"gpu\",\n");
+        g_print("  \"features\": [\n");
+        g_print("    \"render-node\",\n");
+        g_print("    \"virgl\"\n");
+        g_print("  ]\n");
+        g_print("}\n");
+        exit(EXIT_SUCCESS);
+    }
+
+    g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node);
+    if (opt_render_node && g.drm_rnode_fd == -1) {
+        g_printerr("Failed to open DRM rendernode.\n");
+        exit(EXIT_FAILURE);
+    }
+
+    if (g.drm_rnode_fd >= 0) {
+        if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) {
+            g_warning("Failed to init DRM device, using fallback path");
+        }
+    }
+
+    if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) {
+        g_printerr("Please specify either --fd or --socket-path\n");
+        exit(EXIT_FAILURE);
+    }
+
+    if (opt_socket_path) {
+        int lsock = unix_listen(opt_socket_path, &error_fatal);
+        fd = accept(lsock, NULL, NULL);
+        close(lsock);
+    } else {
+        fd = opt_fdnum;
+    }
+    if (fd == -1) {
+        g_printerr("Invalid socket");
+        exit(EXIT_FAILURE);
+    }
+
+    vug_init(&g.dev, fd, vg_panic, &vuiface);
+
+    loop = g_main_loop_new(NULL, FALSE);
+    g_main_loop_run(loop);
+    g_main_loop_unref(loop);
+
+    vg_destroy(&g);
+    if (g.drm_rnode_fd >= 0) {
+        close(g.drm_rnode_fd);
+    }
+
+    return 0;
+}
diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c
new file mode 100644
index 0000000000..43413e29df
--- /dev/null
+++ b/contrib/vhost-user-gpu/virgl.c
@@ -0,0 +1,579 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *     Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <virglrenderer.h>
+#include "virgl.h"
+
+void
+vg_virgl_update_cursor_data(VuGpu *g, uint32_t resource_id,
+                            gpointer data)
+{
+    uint32_t width, height;
+    uint32_t *cursor;
+
+    cursor = virgl_renderer_get_cursor_data(resource_id, &width, &height);
+    g_return_if_fail(cursor != NULL);
+    g_return_if_fail(width == 64);
+    g_return_if_fail(height == 64);
+
+    memcpy(data, cursor, 64 * 64 * sizeof(uint32_t));
+    free(cursor);
+}
+
+static void
+virgl_cmd_context_create(VuGpu *g,
+                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_create cc;
+
+    VUGPU_FILL_CMD(cc);
+
+    virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen,
+                                  cc.debug_name);
+}
+
+static void
+virgl_cmd_context_destroy(VuGpu *g,
+                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_destroy cd;
+
+    VUGPU_FILL_CMD(cd);
+
+    virgl_renderer_context_destroy(cd.hdr.ctx_id);
+}
+
+static void
+virgl_cmd_create_resource_2d(VuGpu *g,
+                             struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_create_2d c2d;
+    struct virgl_renderer_resource_create_args args;
+
+    VUGPU_FILL_CMD(c2d);
+
+    args.handle = c2d.resource_id;
+    args.target = 2;
+    args.format = c2d.format;
+    args.bind = (1 << 1);
+    args.width = c2d.width;
+    args.height = c2d.height;
+    args.depth = 1;
+    args.array_size = 1;
+    args.last_level = 0;
+    args.nr_samples = 0;
+    args.flags = VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP;
+    virgl_renderer_resource_create(&args, NULL, 0);
+}
+
+static void
+virgl_cmd_create_resource_3d(VuGpu *g,
+                             struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_create_3d c3d;
+    struct virgl_renderer_resource_create_args args;
+
+    VUGPU_FILL_CMD(c3d);
+
+    args.handle = c3d.resource_id;
+    args.target = c3d.target;
+    args.format = c3d.format;
+    args.bind = c3d.bind;
+    args.width = c3d.width;
+    args.height = c3d.height;
+    args.depth = c3d.depth;
+    args.array_size = c3d.array_size;
+    args.last_level = c3d.last_level;
+    args.nr_samples = c3d.nr_samples;
+    args.flags = c3d.flags;
+    virgl_renderer_resource_create(&args, NULL, 0);
+}
+
+static void
+virgl_cmd_resource_unref(VuGpu *g,
+                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_unref unref;
+
+    VUGPU_FILL_CMD(unref);
+
+    virgl_renderer_resource_unref(unref.resource_id);
+}
+
+/* Not yet(?) defined in standard-headers, remove when possible */
+#ifndef VIRTIO_GPU_CAPSET_VIRGL2
+#define VIRTIO_GPU_CAPSET_VIRGL2 2
+#endif
+
+static void
+virgl_cmd_get_capset_info(VuGpu *g,
+                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_get_capset_info info;
+    struct virtio_gpu_resp_capset_info resp;
+
+    VUGPU_FILL_CMD(info);
+
+    if (info.capset_index == 0) {
+        resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL;
+        virgl_renderer_get_cap_set(resp.capset_id,
+                                   &resp.capset_max_version,
+                                   &resp.capset_max_size);
+    } else if (info.capset_index == 1) {
+        resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL2;
+        virgl_renderer_get_cap_set(resp.capset_id,
+                                   &resp.capset_max_version,
+                                   &resp.capset_max_size);
+    } else {
+        resp.capset_max_version = 0;
+        resp.capset_max_size = 0;
+    }
+    resp.hdr.type = VIRTIO_GPU_RESP_OK_CAPSET_INFO;
+    vg_ctrl_response(g, cmd, &resp.hdr, sizeof(resp));
+}
+
+uint32_t
+vg_virgl_get_num_capsets(void)
+{
+    uint32_t capset2_max_ver, capset2_max_size;
+    virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VIRGL2,
+                               &capset2_max_ver,
+                               &capset2_max_size);
+
+    return capset2_max_ver ? 2 : 1;
+}
+
+static void
+virgl_cmd_get_capset(VuGpu *g,
+                     struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_get_capset gc;
+    struct virtio_gpu_resp_capset *resp;
+    uint32_t max_ver, max_size;
+
+    VUGPU_FILL_CMD(gc);
+
+    virgl_renderer_get_cap_set(gc.capset_id, &max_ver,
+                               &max_size);
+    resp = g_malloc0(sizeof(*resp) + max_size);
+
+    resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET;
+    virgl_renderer_fill_caps(gc.capset_id,
+                             gc.capset_version,
+                             (void *)resp->capset_data);
+    vg_ctrl_response(g, cmd, &resp->hdr, sizeof(*resp) + max_size);
+    g_free(resp);
+}
+
+static void
+virgl_cmd_submit_3d(VuGpu *g,
+                    struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_cmd_submit cs;
+    void *buf;
+    size_t s;
+
+    VUGPU_FILL_CMD(cs);
+
+    buf = g_malloc(cs.size);
+    s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+                   sizeof(cs), buf, cs.size);
+    if (s != cs.size) {
+        g_critical("%s: size mismatch (%zd/%d)", __func__, s, cs.size);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        goto out;
+    }
+
+    virgl_renderer_submit_cmd(buf, cs.hdr.ctx_id, cs.size / 4);
+
+out:
+    g_free(buf);
+}
+
+static void
+virgl_cmd_transfer_to_host_2d(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_to_host_2d t2d;
+    struct virtio_gpu_box box;
+
+    VUGPU_FILL_CMD(t2d);
+
+    box.x = t2d.r.x;
+    box.y = t2d.r.y;
+    box.z = 0;
+    box.w = t2d.r.width;
+    box.h = t2d.r.height;
+    box.d = 1;
+
+    virgl_renderer_transfer_write_iov(t2d.resource_id,
+                                      0,
+                                      0,
+                                      0,
+                                      0,
+                                      (struct virgl_box *)&box,
+                                      t2d.offset, NULL, 0);
+}
+
+static void
+virgl_cmd_transfer_to_host_3d(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_host_3d t3d;
+
+    VUGPU_FILL_CMD(t3d);
+
+    virgl_renderer_transfer_write_iov(t3d.resource_id,
+                                      t3d.hdr.ctx_id,
+                                      t3d.level,
+                                      t3d.stride,
+                                      t3d.layer_stride,
+                                      (struct virgl_box *)&t3d.box,
+                                      t3d.offset, NULL, 0);
+}
+
+static void
+virgl_cmd_transfer_from_host_3d(VuGpu *g,
+                                struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_host_3d tf3d;
+
+    VUGPU_FILL_CMD(tf3d);
+
+    virgl_renderer_transfer_read_iov(tf3d.resource_id,
+                                     tf3d.hdr.ctx_id,
+                                     tf3d.level,
+                                     tf3d.stride,
+                                     tf3d.layer_stride,
+                                     (struct virgl_box *)&tf3d.box,
+                                     tf3d.offset, NULL, 0);
+}
+
+static void
+virgl_resource_attach_backing(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_attach_backing att_rb;
+    struct iovec *res_iovs;
+    int ret;
+
+    VUGPU_FILL_CMD(att_rb);
+
+    ret = vg_create_mapping_iov(g, &att_rb, cmd, &res_iovs);
+    if (ret != 0) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        return;
+    }
+
+    virgl_renderer_resource_attach_iov(att_rb.resource_id,
+                                       res_iovs, att_rb.nr_entries);
+}
+
+static void
+virgl_resource_detach_backing(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_detach_backing detach_rb;
+    struct iovec *res_iovs = NULL;
+    int num_iovs = 0;
+
+    VUGPU_FILL_CMD(detach_rb);
+
+    virgl_renderer_resource_detach_iov(detach_rb.resource_id,
+                                       &res_iovs,
+                                       &num_iovs);
+    if (res_iovs == NULL || num_iovs == 0) {
+        return;
+    }
+    g_free(res_iovs);
+}
+
+static void
+virgl_cmd_set_scanout(VuGpu *g,
+                      struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_set_scanout ss;
+    struct virgl_renderer_resource_info info;
+    int ret;
+
+    VUGPU_FILL_CMD(ss);
+
+    if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) {
+        g_critical("%s: illegal scanout id specified %d",
+                   __func__, ss.scanout_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+        return;
+    }
+
+    memset(&info, 0, sizeof(info));
+
+    if (ss.resource_id && ss.r.width && ss.r.height) {
+        ret = virgl_renderer_resource_get_info(ss.resource_id, &info);
+        if (ret == -1) {
+            g_critical("%s: illegal resource specified %d\n",
+                       __func__, ss.resource_id);
+            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+            return;
+        }
+
+        int fd = -1;
+        if (virgl_renderer_get_fd_for_texture(info.tex_id, &fd) < 0) {
+            g_critical("%s: failed to get fd for texture\n", __func__);
+            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+            return;
+        }
+        assert(fd >= 0);
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_DMABUF_SCANOUT,
+            .size = sizeof(VhostUserGpuDMABUFScanout),
+            .payload.dmabuf_scanout.scanout_id = ss.scanout_id,
+            .payload.dmabuf_scanout.x =  ss.r.x,
+            .payload.dmabuf_scanout.y =  ss.r.y,
+            .payload.dmabuf_scanout.width = ss.r.width,
+            .payload.dmabuf_scanout.height = ss.r.height,
+            .payload.dmabuf_scanout.fd_width = info.width,
+            .payload.dmabuf_scanout.fd_height = info.height,
+            .payload.dmabuf_scanout.fd_stride = info.stride,
+            .payload.dmabuf_scanout.fd_flags = info.flags,
+            .payload.dmabuf_scanout.fd_drm_fourcc = info.drm_fourcc
+        };
+        vg_send_msg(g, &msg, fd);
+        close(fd);
+    } else {
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_DMABUF_SCANOUT,
+            .size = sizeof(VhostUserGpuDMABUFScanout),
+            .payload.dmabuf_scanout.scanout_id = ss.scanout_id,
+        };
+        g_debug("disable scanout");
+        vg_send_msg(g, &msg, -1);
+    }
+    g->scanout[ss.scanout_id].resource_id = ss.resource_id;
+}
+
+static void
+virgl_cmd_resource_flush(VuGpu *g,
+                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_flush rf;
+    int i;
+
+    VUGPU_FILL_CMD(rf);
+
+    if (!rf.resource_id) {
+        g_debug("bad resource id for flush..?");
+        return;
+    }
+    for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) {
+        if (g->scanout[i].resource_id != rf.resource_id) {
+            continue;
+        }
+        VhostUserGpuMsg msg = {
+            .request = VHOST_USER_GPU_DMABUF_UPDATE,
+            .size = sizeof(VhostUserGpuUpdate),
+            .payload.update.scanout_id = i,
+            .payload.update.x = rf.r.x,
+            .payload.update.y = rf.r.y,
+            .payload.update.width = rf.r.width,
+            .payload.update.height = rf.r.height
+        };
+        vg_send_msg(g, &msg, -1);
+        vg_wait_ok(g);
+    }
+}
+
+static void
+virgl_cmd_ctx_attach_resource(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_resource att_res;
+
+    VUGPU_FILL_CMD(att_res);
+
+    virgl_renderer_ctx_attach_resource(att_res.hdr.ctx_id, att_res.resource_id);
+}
+
+static void
+virgl_cmd_ctx_detach_resource(VuGpu *g,
+                              struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_resource det_res;
+
+    VUGPU_FILL_CMD(det_res);
+
+    virgl_renderer_ctx_detach_resource(det_res.hdr.ctx_id, det_res.resource_id);
+}
+
+void vg_virgl_process_cmd(VuGpu *g, struct virtio_gpu_ctrl_command *cmd)
+{
+    virgl_renderer_force_ctx_0();
+    switch (cmd->cmd_hdr.type) {
+    case VIRTIO_GPU_CMD_CTX_CREATE:
+        virgl_cmd_context_create(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_DESTROY:
+        virgl_cmd_context_destroy(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
+        virgl_cmd_create_resource_2d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_3D:
+        virgl_cmd_create_resource_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SUBMIT_3D:
+        virgl_cmd_submit_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
+        virgl_cmd_transfer_to_host_2d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D:
+        virgl_cmd_transfer_to_host_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D:
+        virgl_cmd_transfer_from_host_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
+        virgl_resource_attach_backing(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
+        virgl_resource_detach_backing(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SET_SCANOUT:
+        virgl_cmd_set_scanout(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
+        virgl_cmd_resource_flush(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_UNREF:
+        virgl_cmd_resource_unref(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE:
+        /* TODO add security */
+        virgl_cmd_ctx_attach_resource(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE:
+        /* TODO add security */
+        virgl_cmd_ctx_detach_resource(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_CAPSET_INFO:
+        virgl_cmd_get_capset_info(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_CAPSET:
+        virgl_cmd_get_capset(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
+        vg_get_display_info(g, cmd);
+        break;
+    default:
+        g_debug("TODO handle ctrl %x\n", cmd->cmd_hdr.type);
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        break;
+    }
+
+    if (cmd->finished) {
+        return;
+    }
+
+    if (cmd->error) {
+        g_warning("%s: ctrl 0x%x, error 0x%x\n", __func__,
+                  cmd->cmd_hdr.type, cmd->error);
+        vg_ctrl_response_nodata(g, cmd, cmd->error);
+        return;
+    }
+
+    if (!(cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE)) {
+        vg_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+        return;
+    }
+
+    g_debug("Creating fence id:%" PRId64 " type:%d",
+            cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+    virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+}
+
+static void
+virgl_write_fence(void *opaque, uint32_t fence)
+{
+    VuGpu *g = opaque;
+    struct virtio_gpu_ctrl_command *cmd, *tmp;
+
+    QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
+        /*
+         * the guest can end up emitting fences out of order
+         * so we should check all fenced cmds not just the first one.
+         */
+        if (cmd->cmd_hdr.fence_id > fence) {
+            continue;
+        }
+        g_debug("FENCE %" PRIu64, cmd->cmd_hdr.fence_id);
+        vg_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+        QTAILQ_REMOVE(&g->fenceq, cmd, next);
+        g_free(cmd);
+        g->inflight--;
+    }
+}
+
+#if defined(VIRGL_RENDERER_CALLBACKS_VERSION) && \
+    VIRGL_RENDERER_CALLBACKS_VERSION >= 2
+static int
+virgl_get_drm_fd(void *opaque)
+{
+    VuGpu *g = opaque;
+
+    return g->drm_rnode_fd;
+}
+#endif
+
+static struct virgl_renderer_callbacks virgl_cbs = {
+#if defined(VIRGL_RENDERER_CALLBACKS_VERSION) &&    \
+    VIRGL_RENDERER_CALLBACKS_VERSION >= 2
+    .get_drm_fd  = virgl_get_drm_fd,
+    .version     = 2,
+#else
+    .version     = 1,
+#endif
+    .write_fence = virgl_write_fence,
+};
+
+static void
+vg_virgl_poll(VuDev *dev, int condition, void *data)
+{
+    virgl_renderer_poll();
+}
+
+bool
+vg_virgl_init(VuGpu *g)
+{
+    int ret;
+
+    if (g->drm_rnode_fd && virgl_cbs.version == 1) {
+        g_warning("virgl will use the default rendernode");
+    }
+
+    ret = virgl_renderer_init(g,
+                              VIRGL_RENDERER_USE_EGL |
+                              VIRGL_RENDERER_THREAD_SYNC,
+                              &virgl_cbs);
+    if (ret != 0) {
+        return false;
+    }
+
+    ret = virgl_renderer_get_poll_fd();
+    if (ret != -1) {
+        g->renderer_source =
+            vug_source_new(&g->dev, ret, G_IO_IN, vg_virgl_poll, g);
+    }
+
+    return true;
+}
diff --git a/contrib/vhost-user-gpu/virgl.h b/contrib/vhost-user-gpu/virgl.h
new file mode 100644
index 0000000000..f952bc9d4f
--- /dev/null
+++ b/contrib/vhost-user-gpu/virgl.h
@@ -0,0 +1,25 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *     Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef VUGPU_VIRGL_H_
+#define VUGPU_VIRGL_H_
+
+#include "vugpu.h"
+
+bool vg_virgl_init(VuGpu *g);
+uint32_t vg_virgl_get_num_capsets(void);
+void vg_virgl_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd);
+void vg_virgl_update_cursor_data(VuGpu *g, uint32_t resource_id,
+                                 gpointer data);
+
+#endif
diff --git a/contrib/vhost-user-gpu/vugbm.c b/contrib/vhost-user-gpu/vugbm.c
new file mode 100644
index 0000000000..d3bb82ff0e
--- /dev/null
+++ b/contrib/vhost-user-gpu/vugbm.c
@@ -0,0 +1,328 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * DRM helpers
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "vugbm.h"
+
+static bool
+mem_alloc_bo(struct vugbm_buffer *buf)
+{
+    buf->mmap = g_malloc(buf->width * buf->height * 4);
+    buf->stride = buf->width * 4;
+    return true;
+}
+
+static void
+mem_free_bo(struct vugbm_buffer *buf)
+{
+    g_free(buf->mmap);
+}
+
+static bool
+mem_map_bo(struct vugbm_buffer *buf)
+{
+    return buf->mmap != NULL;
+}
+
+static void
+mem_unmap_bo(struct vugbm_buffer *buf)
+{
+}
+
+static void
+mem_device_destroy(struct vugbm_device *dev)
+{
+}
+
+#ifdef CONFIG_MEMFD
+struct udmabuf_create {
+        uint32_t memfd;
+        uint32_t flags;
+        uint64_t offset;
+        uint64_t size;
+};
+
+#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create)
+
+static size_t
+udmabuf_get_size(struct vugbm_buffer *buf)
+{
+    return ROUND_UP(buf->width * buf->height * 4, getpagesize());
+}
+
+static bool
+udmabuf_alloc_bo(struct vugbm_buffer *buf)
+{
+    int ret;
+
+    buf->memfd = memfd_create("udmabuf-bo", MFD_ALLOW_SEALING);
+    if (buf->memfd < 0) {
+        return false;
+    }
+
+    ret = ftruncate(buf->memfd, udmabuf_get_size(buf));
+    if (ret < 0) {
+        close(buf->memfd);
+        return false;
+    }
+
+    ret = fcntl(buf->memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+    if (ret < 0) {
+        close(buf->memfd);
+        return false;
+    }
+
+    buf->stride = buf->width * 4;
+
+    return true;
+}
+
+static void
+udmabuf_free_bo(struct vugbm_buffer *buf)
+{
+    close(buf->memfd);
+}
+
+static bool
+udmabuf_map_bo(struct vugbm_buffer *buf)
+{
+    buf->mmap = mmap(NULL, udmabuf_get_size(buf),
+                     PROT_READ | PROT_WRITE, MAP_SHARED, buf->memfd, 0);
+    if (buf->mmap == MAP_FAILED) {
+        return false;
+    }
+
+    return true;
+}
+
+static bool
+udmabuf_get_fd(struct vugbm_buffer *buf, int *fd)
+{
+    struct udmabuf_create create = {
+        .memfd = buf->memfd,
+        .offset = 0,
+        .size = udmabuf_get_size(buf),
+    };
+
+    *fd = ioctl(buf->dev->fd, UDMABUF_CREATE, &create);
+
+    return *fd >= 0;
+}
+
+static void
+udmabuf_unmap_bo(struct vugbm_buffer *buf)
+{
+    munmap(buf->mmap, udmabuf_get_size(buf));
+}
+
+static void
+udmabuf_device_destroy(struct vugbm_device *dev)
+{
+    close(dev->fd);
+}
+#endif
+
+#ifdef CONFIG_GBM
+static bool
+alloc_bo(struct vugbm_buffer *buf)
+{
+    struct gbm_device *dev = buf->dev->dev;
+
+    assert(!buf->bo);
+
+    buf->bo = gbm_bo_create(dev, buf->width, buf->height,
+                            buf->format,
+                            GBM_BO_USE_RENDERING | GBM_BO_USE_LINEAR);
+
+    if (buf->bo) {
+        buf->stride = gbm_bo_get_stride(buf->bo);
+        return true;
+    }
+
+    return false;
+}
+
+static void
+free_bo(struct vugbm_buffer *buf)
+{
+    gbm_bo_destroy(buf->bo);
+}
+
+static bool
+map_bo(struct vugbm_buffer *buf)
+{
+    uint32_t stride;
+
+    buf->mmap = gbm_bo_map(buf->bo, 0, 0, buf->width, buf->height,
+                           GBM_BO_TRANSFER_READ_WRITE, &stride,
+                           &buf->mmap_data);
+
+    assert(stride == buf->stride);
+
+    return buf->mmap != NULL;
+}
+
+static void
+unmap_bo(struct vugbm_buffer *buf)
+{
+    gbm_bo_unmap(buf->bo, buf->mmap_data);
+}
+
+static bool
+get_fd(struct vugbm_buffer *buf, int *fd)
+{
+    *fd = gbm_bo_get_fd(buf->bo);
+
+    return *fd >= 0;
+}
+
+static void
+device_destroy(struct vugbm_device *dev)
+{
+    gbm_device_destroy(dev->dev);
+}
+#endif
+
+void
+vugbm_device_destroy(struct vugbm_device *dev)
+{
+    if (!dev->inited) {
+        return;
+    }
+
+    dev->device_destroy(dev);
+}
+
+bool
+vugbm_device_init(struct vugbm_device *dev, int fd)
+{
+    dev->fd = fd;
+
+#ifdef CONFIG_GBM
+    dev->dev = gbm_create_device(fd);
+#endif
+
+    if (0) {
+        /* nothing */
+    }
+#ifdef CONFIG_GBM
+    else if (dev->dev != NULL) {
+        dev->alloc_bo = alloc_bo;
+        dev->free_bo = free_bo;
+        dev->get_fd = get_fd;
+        dev->map_bo = map_bo;
+        dev->unmap_bo = unmap_bo;
+        dev->device_destroy = device_destroy;
+    }
+#endif
+#ifdef CONFIG_MEMFD
+    else if (g_file_test("/dev/udmabuf", G_FILE_TEST_EXISTS)) {
+        dev->fd = open("/dev/udmabuf", O_RDWR);
+        if (dev->fd < 0) {
+            return false;
+        }
+        g_debug("Using experimental udmabuf backend");
+        dev->alloc_bo = udmabuf_alloc_bo;
+        dev->free_bo = udmabuf_free_bo;
+        dev->get_fd = udmabuf_get_fd;
+        dev->map_bo = udmabuf_map_bo;
+        dev->unmap_bo = udmabuf_unmap_bo;
+        dev->device_destroy = udmabuf_device_destroy;
+    }
+#endif
+    else {
+        g_debug("Using mem fallback");
+        dev->alloc_bo = mem_alloc_bo;
+        dev->free_bo = mem_free_bo;
+        dev->map_bo = mem_map_bo;
+        dev->unmap_bo = mem_unmap_bo;
+        dev->device_destroy = mem_device_destroy;
+        return false;
+    }
+
+    dev->inited = true;
+    return true;
+}
+
+static bool
+vugbm_buffer_map(struct vugbm_buffer *buf)
+{
+    struct vugbm_device *dev = buf->dev;
+
+    return dev->map_bo(buf);
+}
+
+static void
+vugbm_buffer_unmap(struct vugbm_buffer *buf)
+{
+    struct vugbm_device *dev = buf->dev;
+
+    dev->unmap_bo(buf);
+}
+
+bool
+vugbm_buffer_can_get_dmabuf_fd(struct vugbm_buffer *buffer)
+{
+    if (!buffer->dev->get_fd) {
+        return false;
+    }
+
+    return true;
+}
+
+bool
+vugbm_buffer_get_dmabuf_fd(struct vugbm_buffer *buffer, int *fd)
+{
+    if (!vugbm_buffer_can_get_dmabuf_fd(buffer) ||
+        !buffer->dev->get_fd(buffer, fd)) {
+        g_warning("Failed to get dmabuf");
+        return false;
+    }
+
+    if (*fd < 0) {
+        g_warning("error: dmabuf_fd < 0");
+        return false;
+    }
+
+    return true;
+}
+
+bool
+vugbm_buffer_create(struct vugbm_buffer *buffer, struct vugbm_device *dev,
+                    uint32_t width, uint32_t height)
+{
+    buffer->dev = dev;
+    buffer->width = width;
+    buffer->height = height;
+    buffer->format = GBM_FORMAT_XRGB8888;
+    buffer->stride = 0; /* modified during alloc */
+    if (!dev->alloc_bo(buffer)) {
+        g_warning("alloc_bo failed");
+        return false;
+    }
+
+    if (!vugbm_buffer_map(buffer)) {
+        g_warning("map_bo failed");
+        goto err;
+    }
+
+    return true;
+
+err:
+    dev->free_bo(buffer);
+    return false;
+}
+
+void
+vugbm_buffer_destroy(struct vugbm_buffer *buffer)
+{
+    struct vugbm_device *dev = buffer->dev;
+
+    vugbm_buffer_unmap(buffer);
+    dev->free_bo(buffer);
+}
diff --git a/contrib/vhost-user-gpu/vugbm.h b/contrib/vhost-user-gpu/vugbm.h
new file mode 100644
index 0000000000..c0bf27af9b
--- /dev/null
+++ b/contrib/vhost-user-gpu/vugbm.h
@@ -0,0 +1,67 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * GBM helpers
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef VHOST_USER_GPU_GBM_H
+#define VHOST_USER_GPU_GBM_H
+
+#include "qemu/osdep.h"
+
+#ifdef CONFIG_MEMFD
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#endif
+
+#ifdef CONFIG_GBM
+#include <gbm.h>
+#endif
+
+struct vugbm_buffer;
+
+struct vugbm_device {
+    bool inited;
+    int fd;
+#ifdef CONFIG_GBM
+    struct gbm_device *dev;
+#endif
+
+    bool (*alloc_bo)(struct vugbm_buffer *buf);
+    void (*free_bo)(struct vugbm_buffer *buf);
+    bool (*get_fd)(struct vugbm_buffer *buf, int *fd);
+    bool (*map_bo)(struct vugbm_buffer *buf);
+    void (*unmap_bo)(struct vugbm_buffer *buf);
+    void (*device_destroy)(struct vugbm_device *dev);
+};
+
+struct vugbm_buffer {
+    struct vugbm_device *dev;
+
+#ifdef CONFIG_MEMFD
+    int memfd;
+#endif
+#ifdef CONFIG_GBM
+    struct gbm_bo *bo;
+    void *mmap_data;
+#endif
+
+    uint8_t *mmap;
+    uint32_t width;
+    uint32_t height;
+    uint32_t stride;
+    uint32_t format;
+};
+
+bool vugbm_device_init(struct vugbm_device *dev, int fd);
+void vugbm_device_destroy(struct vugbm_device *dev);
+
+bool vugbm_buffer_create(struct vugbm_buffer *buffer, struct vugbm_device *dev,
+                         uint32_t width, uint32_t height);
+bool vugbm_buffer_can_get_dmabuf_fd(struct vugbm_buffer *buffer);
+bool vugbm_buffer_get_dmabuf_fd(struct vugbm_buffer *buffer, int *fd);
+void vugbm_buffer_destroy(struct vugbm_buffer *buffer);
+
+#endif
diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h
new file mode 100644
index 0000000000..458e92a1b3
--- /dev/null
+++ b/contrib/vhost-user-gpu/vugpu.h
@@ -0,0 +1,177 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *     Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef VUGPU_H_
+#define VUGPU_H_
+
+#include "qemu/osdep.h"
+
+#include "contrib/libvhost-user/libvhost-user-glib.h"
+#include "standard-headers/linux/virtio_gpu.h"
+
+#include "qemu/queue.h"
+#include "qemu/iov.h"
+#include "qemu/bswap.h"
+#include "vugbm.h"
+
+typedef enum VhostUserGpuRequest {
+    VHOST_USER_GPU_NONE = 0,
+    VHOST_USER_GPU_GET_PROTOCOL_FEATURES,
+    VHOST_USER_GPU_SET_PROTOCOL_FEATURES,
+    VHOST_USER_GPU_GET_DISPLAY_INFO,
+    VHOST_USER_GPU_CURSOR_POS,
+    VHOST_USER_GPU_CURSOR_POS_HIDE,
+    VHOST_USER_GPU_CURSOR_UPDATE,
+    VHOST_USER_GPU_SCANOUT,
+    VHOST_USER_GPU_UPDATE,
+    VHOST_USER_GPU_DMABUF_SCANOUT,
+    VHOST_USER_GPU_DMABUF_UPDATE,
+} VhostUserGpuRequest;
+
+typedef struct VhostUserGpuDisplayInfoReply {
+    struct virtio_gpu_resp_display_info info;
+} VhostUserGpuDisplayInfoReply;
+
+typedef struct VhostUserGpuCursorPos {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+} QEMU_PACKED VhostUserGpuCursorPos;
+
+typedef struct VhostUserGpuCursorUpdate {
+    VhostUserGpuCursorPos pos;
+    uint32_t hot_x;
+    uint32_t hot_y;
+    uint32_t data[64 * 64];
+} QEMU_PACKED VhostUserGpuCursorUpdate;
+
+typedef struct VhostUserGpuScanout {
+    uint32_t scanout_id;
+    uint32_t width;
+    uint32_t height;
+} QEMU_PACKED VhostUserGpuScanout;
+
+typedef struct VhostUserGpuUpdate {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+    uint32_t width;
+    uint32_t height;
+    uint8_t data[];
+} QEMU_PACKED VhostUserGpuUpdate;
+
+typedef struct VhostUserGpuDMABUFScanout {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+    uint32_t width;
+    uint32_t height;
+    uint32_t fd_width;
+    uint32_t fd_height;
+    uint32_t fd_stride;
+    uint32_t fd_flags;
+    int fd_drm_fourcc;
+} QEMU_PACKED VhostUserGpuDMABUFScanout;
+
+typedef struct VhostUserGpuMsg {
+    uint32_t request; /* VhostUserGpuRequest */
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+    union {
+        VhostUserGpuCursorPos cursor_pos;
+        VhostUserGpuCursorUpdate cursor_update;
+        VhostUserGpuScanout scanout;
+        VhostUserGpuUpdate update;
+        VhostUserGpuDMABUFScanout dmabuf_scanout;
+        struct virtio_gpu_resp_display_info display_info;
+        uint64_t u64;
+    } payload;
+} QEMU_PACKED VhostUserGpuMsg;
+
+static VhostUserGpuMsg m __attribute__ ((unused));
+#define VHOST_USER_GPU_HDR_SIZE \
+    (sizeof(m.request) + sizeof(m.flags) + sizeof(m.size))
+
+#define VHOST_USER_GPU_MSG_FLAG_REPLY 0x4
+
+struct virtio_gpu_scanout {
+    uint32_t width, height;
+    int x, y;
+    int invalidate;
+    uint32_t resource_id;
+};
+
+typedef struct VuGpu {
+    VugDev dev;
+    struct virtio_gpu_config virtio_config;
+    struct vugbm_device gdev;
+    int sock_fd;
+    int drm_rnode_fd;
+    GSource *renderer_source;
+    guint wait_ok;
+
+    bool virgl;
+    bool virgl_inited;
+    uint32_t inflight;
+
+    struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS];
+    QTAILQ_HEAD(, virtio_gpu_simple_resource) reslist;
+    QTAILQ_HEAD(, virtio_gpu_ctrl_command) fenceq;
+} VuGpu;
+
+struct virtio_gpu_ctrl_command {
+    VuVirtqElement elem;
+    VuVirtq *vq;
+    struct virtio_gpu_ctrl_hdr cmd_hdr;
+    uint32_t error;
+    bool finished;
+    QTAILQ_ENTRY(virtio_gpu_ctrl_command) next;
+};
+
+#define VUGPU_FILL_CMD(out) do {                                \
+        size_t s;                                               \
+        s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0,  \
+                       &out, sizeof(out));                      \
+        if (s != sizeof(out)) {                                 \
+            g_critical("%s: command size incorrect %zu vs %zu", \
+                       __func__, s, sizeof(out));               \
+            return;                                             \
+        }                                                       \
+    } while (0)
+
+
+void    vg_ctrl_response(VuGpu *g,
+                         struct virtio_gpu_ctrl_command *cmd,
+                         struct virtio_gpu_ctrl_hdr *resp,
+                         size_t resp_len);
+
+void    vg_ctrl_response_nodata(VuGpu *g,
+                                struct virtio_gpu_ctrl_command *cmd,
+                                enum virtio_gpu_ctrl_type type);
+
+int     vg_create_mapping_iov(VuGpu *g,
+                              struct virtio_gpu_resource_attach_backing *ab,
+                              struct virtio_gpu_ctrl_command *cmd,
+                              struct iovec **iov);
+
+void    vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd);
+
+void    vg_wait_ok(VuGpu *g);
+
+void    vg_send_msg(VuGpu *g, const VhostUserGpuMsg *msg, int fd);
+
+bool    vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size,
+                    gpointer payload);
+
+
+#endif
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
index 4f9012d154..aeb997bed5 100644
--- a/docs/devel/multiple-iothreads.txt
+++ b/docs/devel/multiple-iothreads.txt
@@ -109,7 +109,7 @@ The AioContext originates from the QEMU block layer, even though nowadays
 AioContext is a generic event loop that can be used by any QEMU subsystem.
 
 The block layer has support for AioContext integrated.  Each BlockDriverState
-is associated with an AioContext using bdrv_set_aio_context() and
+is associated with an AioContext using bdrv_try_set_aio_context() and
 bdrv_get_aio_context().  This allows block layer code to process I/O inside the
 right AioContext.  Other subsystems may wish to follow a similar approach.
 
@@ -134,5 +134,5 @@ Long-running jobs (usually in the form of coroutines) are best scheduled in
 the BlockDriverState's AioContext to avoid the need to acquire/release around
 each bdrv_*() call.  The functions bdrv_add/remove_aio_context_notifier,
 or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends,
-can be used to get a notification whenever bdrv_set_aio_context() moves a
+can be used to get a notification whenever bdrv_try_set_aio_context() moves a
 BlockDriverState to a different AioContext.
diff --git a/docs/index.rst b/docs/index.rst
index 3690955dd1..baa5791c17 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -12,4 +12,5 @@ Welcome to QEMU's documentation!
 
    interop/index
    devel/index
+   specs/index
 
diff --git a/docs/interop/index.rst b/docs/interop/index.rst
index a037bd67ec..b4bfcab417 100644
--- a/docs/interop/index.rst
+++ b/docs/interop/index.rst
@@ -16,3 +16,4 @@ Contents:
    live-block-operations
    pr-helper
    vhost-user
+   vhost-user-gpu
diff --git a/docs/interop/vhost-user-gpu.rst b/docs/interop/vhost-user-gpu.rst
new file mode 100644
index 0000000000..688f8b4259
--- /dev/null
+++ b/docs/interop/vhost-user-gpu.rst
@@ -0,0 +1,242 @@
+=======================
+Vhost-user-gpu Protocol
+=======================
+
+:Licence: This work is licensed under the terms of the GNU GPL,
+          version 2 or later. See the COPYING file in the top-level
+          directory.
+
+.. contents:: Table of Contents
+
+Introduction
+============
+
+The vhost-user-gpu protocol is aiming at sharing the rendering result
+of a virtio-gpu, done from a vhost-user slave process to a vhost-user
+master process (such as QEMU). It bears a resemblance to a display
+server protocol, if you consider QEMU as the display server and the
+slave as the client, but in a very limited way. Typically, it will
+work by setting a scanout/display configuration, before sending flush
+events for the display updates. It will also update the cursor shape
+and position.
+
+The protocol is sent over a UNIX domain stream socket, since it uses
+socket ancillary data to share opened file descriptors (DMABUF fds or
+shared memory). The socket is usually obtained via
+``VHOST_USER_GPU_SET_SOCKET``.
+
+Requests are sent by the *slave*, and the optional replies by the
+*master*.
+
+Wire format
+===========
+
+Unless specified differently, numbers are in the machine native byte
+order.
+
+A vhost-user-gpu message (request and reply) consists of 3 header
+fields and a payload.
+
++---------+-------+------+---------+
+| request | flags | size | payload |
++---------+-------+------+---------+
+
+Header
+------
+
+:request: ``u32``, type of the request
+
+:flags: ``u32``, 32-bit bit field:
+
+ - Bit 2 is the reply flag - needs to be set on each reply
+
+:size: ``u32``, size of the payload
+
+Payload types
+-------------
+
+Depending on the request type, **payload** can be:
+
+VhostUserGpuCursorPos
+^^^^^^^^^^^^^^^^^^^^^
+
++------------+---+---+
+| scanout-id | x | y |
++------------+---+---+
+
+:scanout-id: ``u32``, the scanout where the cursor is located
+
+:x/y: ``u32``, the cursor postion
+
+VhostUserGpuCursorUpdate
+^^^^^^^^^^^^^^^^^^^^^^^^
+
++-----+-------+-------+--------+
+| pos | hot_x | hot_y | cursor |
++-----+-------+-------+--------+
+
+:pos: a ``VhostUserGpuCursorPos``, the cursor location
+
+:hot_x/hot_y: ``u32``, the cursor hot location
+
+:cursor: ``[u32; 64 * 64]``, 64x64 RGBA cursor data (PIXMAN_a8r8g8b8 format)
+
+VhostUserGpuScanout
+^^^^^^^^^^^^^^^^^^^
+
++------------+---+---+
+| scanout-id | w | h |
++------------+---+---+
+
+:scanout-id: ``u32``, the scanout configuration to set
+
+:w/h: ``u32``, the scanout width/height size
+
+VhostUserGpuUpdate
+^^^^^^^^^^^^^^^^^^
+
++------------+---+---+---+---+------+
+| scanout-id | x | y | w | h | data |
++------------+---+---+---+---+------+
+
+:scanout-id: ``u32``, the scanout content to update
+
+:x/y/w/h: ``u32``, region of the update
+
+:data: RGB data (PIXMAN_x8r8g8b8 format)
+
+VhostUserGpuDMABUFScanout
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
++------------+---+---+---+---+-----+-----+--------+-------+--------+
+| scanout-id | x | y | w | h | fdw | fwh | stride | flags | fourcc |
++------------+---+---+---+---+-----+-----+--------+-------+--------+
+
+:scanout-id: ``u32``, the scanout configuration to set
+
+:x/y: ``u32``, the location of the scanout within the DMABUF
+
+:w/h: ``u32``, the scanout width/height size
+
+:fdw/fdh/stride/flags: ``u32``, the DMABUF width/height/stride/flags
+
+:fourcc: ``i32``, the DMABUF fourcc
+
+
+C structure
+-----------
+
+In QEMU the vhost-user-gpu message is implemented with the following struct:
+
+.. code:: c
+
+  typedef struct VhostUserGpuMsg {
+      uint32_t request; /* VhostUserGpuRequest */
+      uint32_t flags;
+      uint32_t size; /* the following payload size */
+      union {
+          VhostUserGpuCursorPos cursor_pos;
+          VhostUserGpuCursorUpdate cursor_update;
+          VhostUserGpuScanout scanout;
+          VhostUserGpuUpdate update;
+          VhostUserGpuDMABUFScanout dmabuf_scanout;
+          struct virtio_gpu_resp_display_info display_info;
+          uint64_t u64;
+      } payload;
+  } QEMU_PACKED VhostUserGpuMsg;
+
+Protocol features
+-----------------
+
+None yet.
+
+As the protocol may need to evolve, new messages and communication
+changes are negotiated thanks to preliminary
+``VHOST_USER_GPU_GET_PROTOCOL_FEATURES`` and
+``VHOST_USER_GPU_SET_PROTOCOL_FEATURES`` requests.
+
+Communication
+=============
+
+Message types
+-------------
+
+``VHOST_USER_GPU_GET_PROTOCOL_FEATURES``
+  :id: 1
+  :request payload: N/A
+  :reply payload: ``u64``
+
+  Get the supported protocol features bitmask.
+
+``VHOST_USER_GPU_SET_PROTOCOL_FEATURES``
+  :id: 2
+  :request payload: ``u64``
+  :reply payload: N/A
+
+  Enable protocol features using a bitmask.
+
+``VHOST_USER_GPU_GET_DISPLAY_INFO``
+  :id: 3
+  :request payload: N/A
+  :reply payload: ``struct virtio_gpu_resp_display_info`` (from virtio specification)
+
+  Get the preferred display configuration.
+
+``VHOST_USER_GPU_CURSOR_POS``
+  :id: 4
+  :request payload: ``VhostUserGpuCursorPos``
+  :reply payload: N/A
+
+  Set/show the cursor position.
+
+``VHOST_USER_GPU_CURSOR_POS_HIDE``
+  :id: 5
+  :request payload: ``VhostUserGpuCursorPos``
+  :reply payload: N/A
+
+  Set/hide the cursor.
+
+``VHOST_USER_GPU_CURSOR_UPDATE``
+  :id: 6
+  :request payload: ``VhostUserGpuCursorUpdate``
+  :reply payload: N/A
+
+  Update the cursor shape and location.
+
+``VHOST_USER_GPU_SCANOUT``
+  :id: 7
+  :request payload: ``VhostUserGpuScanout``
+  :reply payload: N/A
+
+  Set the scanout resolution. To disable a scanout, the dimensions
+  width/height are set to 0.
+
+``VHOST_USER_GPU_UPDATE``
+  :id: 8
+  :request payload: ``VhostUserGpuUpdate``
+  :reply payload: N/A
+
+  Update the scanout content. The data payload contains the graphical bits.
+  The display should be flushed and presented.
+
+``VHOST_USER_GPU_DMABUF_SCANOUT``
+  :id: 9
+  :request payload: ``VhostUserGpuDMABUFScanout``
+  :reply payload: N/A
+
+  Set the scanout resolution/configuration, and share a DMABUF file
+  descriptor for the scanout content, which is passed as ancillary
+  data. To disable a scanout, the dimensions width/height are set
+  to 0, there is no file descriptor passed.
+
+``VHOST_USER_GPU_DMABUF_UPDATE``
+  :id: 10
+  :request payload: ``VhostUserGpuUpdate``
+  :reply payload: empty payload
+
+  The display should be flushed and presented according to updated
+  region from ``VhostUserGpuUpdate``.
+
+  Note: there is no data payload, since the scanout is shared thanks
+  to DMABUF, that must have been set previously with
+  ``VHOST_USER_GPU_DMABUF_SCANOUT``.
diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 7f3232c798..dc0ff9211f 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -1163,6 +1163,15 @@ Master message types
   send the shared inflight buffer back to slave so that slave could
   get inflight I/O after a crash or restart.
 
+``VHOST_USER_GPU_SET_SOCKET``
+  :id: 33
+  :equivalent ioctl: N/A
+  :master payload: N/A
+
+  Sets the GPU protocol socket file descriptor, which is passed as
+  ancillary data. The GPU protocol is used to inform the master of
+  rendering state and updates. See vhost-user-gpu.rst for details.
+
 Slave message types
 -------------------
 
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
new file mode 100644
index 0000000000..2e927519c2
--- /dev/null
+++ b/docs/specs/index.rst
@@ -0,0 +1,13 @@
+. This is the top level page for the 'specs' manual
+
+
+QEMU full-system emulation guest hardware specifications
+========================================================
+
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   xive
diff --git a/docs/specs/ppc-spapr-xive.rst b/docs/specs/ppc-spapr-xive.rst
new file mode 100644
index 0000000000..539ce7ca4e
--- /dev/null
+++ b/docs/specs/ppc-spapr-xive.rst
@@ -0,0 +1,174 @@
+XIVE for sPAPR (pseries machines)
+=================================
+
+The POWER9 processor comes with a new interrupt controller
+architecture, called XIVE as "eXternal Interrupt Virtualization
+Engine". It supports a larger number of interrupt sources and offers
+virtualization features which enables the HW to deliver interrupts
+directly to virtual processors without hypervisor assistance.
+
+A QEMU ``pseries`` machine (which is PAPR compliant) using POWER9
+processors can run under two interrupt modes:
+
+- *Legacy Compatibility Mode*
+
+  the hypervisor provides identical interfaces and similar
+  functionality to PAPR+ Version 2.7.  This is the default mode
+
+  It is also referred as *XICS* in QEMU.
+
+- *XIVE native exploitation mode*
+
+  the hypervisor provides new interfaces to manage the XIVE control
+  structures, and provides direct control for interrupt management
+  through MMIO pages.
+
+Which interrupt modes can be used by the machine is negotiated with
+the guest O/S during the Client Architecture Support negotiation
+sequence. The two modes are mutually exclusive.
+
+Both interrupt mode share the same IRQ number space. See below for the
+layout.
+
+CAS Negotiation
+---------------
+
+QEMU advertises the supported interrupt modes in the device tree
+property "ibm,arch-vec-5-platform-support" in byte 23 and the OS
+Selection for XIVE is indicated in the "ibm,architecture-vec-5"
+property byte 23.
+
+The interrupt modes supported by the machine depend on the CPU type
+(POWER9 is required for XIVE) but also on the machine property
+``ic-mode`` which can be set on the command line. It can take the
+following values: ``xics``, ``xive``, ``dual`` and currently ``xics``
+is the default but it may change in the future.
+
+The choosen interrupt mode is activated after a reconfiguration done
+in a machine reset.
+
+XIVE Device tree properties
+---------------------------
+
+The properties for the PAPR interrupt controller node when the *XIVE
+native exploitation mode* is selected shoud contain:
+
+- ``device_type``
+
+  value should be "power-ivpe".
+
+- ``compatible``
+
+  value should be "ibm,power-ivpe".
+
+- ``reg``
+
+  contains the base address and size of the thread interrupt
+  managnement areas (TIMA), for the User level and for the Guest OS
+  level. Only the Guest OS level is taken into account today.
+
+- ``ibm,xive-eq-sizes``
+
+  the size of the event queues. One cell per size supported, contains
+  log2 of size, in ascending order.
+
+- ``ibm,xive-lisn-ranges``
+
+  the IRQ interrupt number ranges assigned to the guest for the IPIs.
+
+The root node also exports :
+
+- ``ibm,plat-res-int-priorities``
+
+  contains a list of priorities that the hypervisor has reserved for
+  its own use.
+
+IRQ number space
+----------------
+
+IRQ Number space of the ``pseries`` machine is 8K wide and is the same
+for both interrupt mode. The different ranges are defined as follow :
+
+- ``0x0000 .. 0x0FFF`` 4K CPU IPIs (only used under XIVE)
+- ``0x1000 .. 0x1000`` 1 EPOW
+- ``0x1001 .. 0x1001`` 1 HOTPLUG
+- ``0x1100 .. 0x11FF`` 256 VIO devices
+- ``0x1200 .. 0x127F`` 32 PHBs devices
+- ``0x1280 .. 0x12FF`` unused
+- ``0x1300 .. 0x1FFF`` PHB MSIs
+
+Monitoring XIVE
+---------------
+
+The state of the XIVE interrupt controller can be queried through the
+monitor commands ``info pic``. The output comes in two parts.
+
+First, the state of the thread interrupt context registers is dumped
+for each CPU :
+
+::
+
+   (qemu) info pic
+   CPU[0000]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR  W2
+   CPU[0000]: USER    00   00  00    00   00  00  00   00  00000000
+   CPU[0000]:   OS    00   ff  00    00   ff  00  ff   ff  80000400
+   CPU[0000]: POOL    00   00  00    00   00  00  00   00  00000000
+   CPU[0000]: PHYS    00   00  00    00   00  00  00   ff  00000000
+   ...
+
+In the case of a ``pseries`` machine, QEMU acts as the hypervisor and only
+the O/S and USER register rings make sense. ``W2`` contains the vCPU CAM
+line which is set to the VP identifier.
+
+Then comes the routing information which aggregates the EAS and the
+END configuration:
+
+::
+
+   ...
+   LISN         PQ    EISN     CPU/PRIO EQ
+   00000000 MSI --    00000010   0/6    380/16384 @1fe3e0000 ^1 [ 80000010 ... ]
+   00000001 MSI --    00000010   1/6    305/16384 @1fc230000 ^1 [ 80000010 ... ]
+   00000002 MSI --    00000010   2/6    220/16384 @1fc2f0000 ^1 [ 80000010 ... ]
+   00000003 MSI --    00000010   3/6    201/16384 @1fc390000 ^1 [ 80000010 ... ]
+   00000004 MSI -Q  M 00000000
+   00000005 MSI -Q  M 00000000
+   00000006 MSI -Q  M 00000000
+   00000007 MSI -Q  M 00000000
+   00001000 MSI --    00000012   0/6    380/16384 @1fe3e0000 ^1 [ 80000010 ... ]
+   00001001 MSI --    00000013   0/6    380/16384 @1fe3e0000 ^1 [ 80000010 ... ]
+   00001100 MSI --    00000100   1/6    305/16384 @1fc230000 ^1 [ 80000010 ... ]
+   00001101 MSI -Q  M 00000000
+   00001200 LSI -Q  M 00000000
+   00001201 LSI -Q  M 00000000
+   00001202 LSI -Q  M 00000000
+   00001203 LSI -Q  M 00000000
+   00001300 MSI --    00000102   1/6    305/16384 @1fc230000 ^1 [ 80000010 ... ]
+   00001301 MSI --    00000103   2/6    220/16384 @1fc2f0000 ^1 [ 80000010 ... ]
+   00001302 MSI --    00000104   3/6    201/16384 @1fc390000 ^1 [ 80000010 ... ]
+
+The source information and configuration:
+
+- The ``LISN`` column outputs the interrupt number of the source in
+  range ``[ 0x0 ... 0x1FFF ]`` and its type : ``MSI`` or ``LSI``
+- The ``PQ`` column reflects the state of the PQ bits of the source :
+
+  - ``--`` source is ready to take events
+  - ``P-`` an event was sent and an EOI is PENDING
+  - ``PQ`` an event was QUEUED
+  - ``-Q`` source is OFF
+
+  a ``M`` indicates that source is *MASKED* at the EAS level,
+
+The targeting configuration :
+
+- The ``EISN`` column is the event data that will be queued in the event
+  queue of the O/S.
+- The ``CPU/PRIO`` column is the tuple defining the CPU number and
+  priority queue serving the source.
+- The ``EQ`` column outputs :
+
+  - the current index of the event queue/ the max number of entries
+  - the O/S event queue address
+  - the toggle bit
+  - the last entries that were pushed in the event queue.
diff --git a/docs/specs/ppc-xive.rst b/docs/specs/ppc-xive.rst
new file mode 100644
index 0000000000..b997dc0629
--- /dev/null
+++ b/docs/specs/ppc-xive.rst
@@ -0,0 +1,199 @@
+================================
+POWER9 XIVE interrupt controller
+================================
+
+The POWER9 processor comes with a new interrupt controller
+architecture, called XIVE as "eXternal Interrupt Virtualization
+Engine".
+
+Compared to the previous architecture, the main characteristics of
+XIVE are to support a larger number of interrupt sources and to
+deliver interrupts directly to virtual processors without hypervisor
+assistance. This removes the context switches required for the
+delivery process.
+
+
+XIVE architecture
+=================
+
+The XIVE IC is composed of three sub-engines, each taking care of a
+processing layer of external interrupts:
+
+- Interrupt Virtualization Source Engine (IVSE), or Source Controller
+  (SC). These are found in PCI PHBs, in the PSI host bridge
+  controller, but also inside the main controller for the core IPIs
+  and other sub-chips (NX, CAP, NPU) of the chip/processor. They are
+  configured to feed the IVRE with events.
+- Interrupt Virtualization Routing Engine (IVRE) or Virtualization
+  Controller (VC). It handles event coalescing and perform interrupt
+  routing by matching an event source number with an Event
+  Notification Descriptor (END).
+- Interrupt Virtualization Presentation Engine (IVPE) or Presentation
+  Controller (PC). It maintains the interrupt context state of each
+  thread and handles the delivery of the external interrupt to the
+  thread.
+
+::
+
+                XIVE Interrupt Controller
+                +------------------------------------+      IPIs
+                | +---------+ +---------+ +--------+ |    +-------+
+                | |IVRE     | |Common Q | |IVPE    |----> | CORES |
+                | |     esb | |         | |        |----> |       |
+                | |     eas | |  Bridge | |   tctx |----> |       |
+                | |SC   end | |         | |    nvt | |    |       |
+    +------+    | +---------+ +----+----+ +--------+ |    +-+-+-+-+
+    | RAM  |    +------------------|-----------------+      | | |
+    |      |                       |                        | | |
+    |      |                       |                        | | |
+    |      |  +--------------------v------------------------v-v-v--+    other
+    |      <--+                     Power Bus                      +--> chips
+    |  esb |  +---------+-----------------------+------------------+
+    |  eas |            |                       |
+    |  end |         +--|------+                |
+    |  nvt |       +----+----+ |           +----+----+
+    +------+       |IVSE     | |           |IVSE     |
+                   |         | |           |         |
+                   | PQ-bits | |           | PQ-bits |
+                   | local   |-+           |  in VC  |
+                   +---------+             +---------+
+                      PCIe                 NX,NPU,CAPI
+
+
+    PQ-bits: 2 bits source state machine (P:pending Q:queued)
+    esb: Event State Buffer (Array of PQ bits in an IVSE)
+    eas: Event Assignment Structure
+    end: Event Notification Descriptor
+    nvt: Notification Virtual Target
+    tctx: Thread interrupt Context registers
+
+
+
+XIVE internal tables
+--------------------
+
+Each of the sub-engines uses a set of tables to redirect interrupts
+from event sources to CPU threads.
+
+::
+
+                                            +-------+
+    User or O/S                             |  EQ   |
+        or                          +------>|entries|
+    Hypervisor                      |       |  ..   |
+      Memory                        |       +-------+
+                                    |           ^
+                                    |           |
+               +-------------------------------------------------+
+                                    |           |
+    Hypervisor      +------+    +---+--+    +---+--+   +------+
+      Memory        | ESB  |    | EAT  |    | ENDT |   | NVTT |
+     (skiboot)      +----+-+    +----+-+    +----+-+   +------+
+                      ^  |        ^  |        ^  |       ^
+                      |  |        |  |        |  |       |
+               +-------------------------------------------------+
+                      |  |        |  |        |  |       |
+                      |  |        |  |        |  |       |
+                 +----|--|--------|--|--------|--|-+   +-|-----+    +------+
+                 |    |  |        |  |        |  | |   | | tctx|    |Thread|
+     IPI or   ---+    +  v        +  v        +  v |---| +  .. |----->     |
+    HW events    |                                 |   |       |    |      |
+                 |             IVRE                |   | IVPE  |    +------+
+                 +---------------------------------+   +-------+
+
+
+The IVSE have a 2-bits state machine, P for pending and Q for queued,
+for each source that allows events to be triggered. They are stored in
+an Event State Buffer (ESB) array and can be controlled by MMIOs.
+
+If the event is let through, the IVRE looks up in the Event Assignment
+Structure (EAS) table for an Event Notification Descriptor (END)
+configured for the source. Each Event Notification Descriptor defines
+a notification path to a CPU and an in-memory Event Queue, in which
+will be enqueued an EQ data for the O/S to pull.
+
+The IVPE determines if a Notification Virtual Target (NVT) can handle
+the event by scanning the thread contexts of the VCPUs dispatched on
+the processor HW threads. It maintains the interrupt context state of
+each thread in a NVT table.
+
+XIVE thread interrupt context
+-----------------------------
+
+The XIVE presenter can generate four different exceptions to its
+HW threads:
+
+- hypervisor exception
+- O/S exception
+- Event-Based Branch (user level)
+- msgsnd (doorbell)
+
+Each exception has a state independent from the others called a Thread
+Interrupt Management context. This context is a set of registers which
+lets the thread handle priority management and interrupt
+acknowledgment among other things. The most important ones being :
+
+- Interrupt Priority Register  (PIPR)
+- Interrupt Pending Buffer     (IPB)
+- Current Processor Priority   (CPPR)
+- Notification Source Register (NSR)
+
+TIMA
+~~~~
+
+The Thread Interrupt Management registers are accessible through a
+specific MMIO region, called the Thread Interrupt Management Area
+(TIMA), four aligned pages, each exposing a different view of the
+registers. First page (page address ending in ``0b00``) gives access
+to the entire context and is reserved for the ring 0 view for the
+physical thread context. The second (page address ending in ``0b01``)
+is for the hypervisor, ring 1 view. The third (page address ending in
+``0b10``) is for the operating system, ring 2 view. The fourth (page
+address ending in ``0b11``) is for user level, ring 3 view.
+
+Interrupt flow from an O/S perspective
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+After an event data has been enqueued in the O/S Event Queue, the IVPE
+raises the bit corresponding to the priority of the pending interrupt
+in the register IBP (Interrupt Pending Buffer) to indicate that an
+event is pending in one of the 8 priority queues. The Pending
+Interrupt Priority Register (PIPR) is also updated using the IPB. This
+register represent the priority of the most favored pending
+notification.
+
+The PIPR is then compared to the the Current Processor Priority
+Register (CPPR). If it is more favored (numerically less than), the
+CPU interrupt line is raised and the EO bit of the Notification Source
+Register (NSR) is updated to notify the presence of an exception for
+the O/S. The O/S acknowledges the interrupt with a special load in the
+Thread Interrupt Management Area.
+
+The O/S handles the interrupt and when done, performs an EOI using a
+MMIO operation on the ESB management page of the associate source.
+
+Overview of the QEMU models for XIVE
+====================================
+
+The XiveSource models the IVSE in general, internal and external. It
+handles the source ESBs and the MMIO interface to control them.
+
+The XiveNotifier is a small helper interface interconnecting the
+XiveSource to the XiveRouter.
+
+The XiveRouter is an abstract model acting as a combined IVRE and
+IVPE. It routes event notifications using the EAS and END tables to
+the IVPE sub-engine which does a CAM scan to find a CPU to deliver the
+exception. Storage should be provided by the inheriting classes.
+
+XiveEnDSource is a special source object. It exposes the END ESB MMIOs
+of the Event Queues which are used for coalescing event notifications
+and for escalation. Not used on the field, only to sync the EQ cache
+in OPAL.
+
+Finally, the XiveTCTX contains the interrupt state context of a thread,
+four sets of registers, one for each exception that can be delivered
+to a CPU. These contexts are scanned by the IVPE to find a matching VP
+when a notification is triggered. It also models the Thread Interrupt
+Management Area (TIMA), which exposes the thread context registers to
+the CPU for interrupt management.
diff --git a/hmp.c b/hmp.c
index 56a3ed7375..be5e345c6f 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2560,7 +2560,8 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
     if (!blk) {
         BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
         if (bs) {
-            blk = local_blk = blk_new(0, BLK_PERM_ALL);
+            blk = local_blk = blk_new(bdrv_get_aio_context(bs),
+                                      0, BLK_PERM_ALL);
             ret = blk_insert_bs(blk, bs, &err);
             if (ret < 0) {
                 goto fail;
diff --git a/hw/arm/imx25_pdk.c b/hw/arm/imx25_pdk.c
index eef1b184b0..a0423ffb67 100644
--- a/hw/arm/imx25_pdk.c
+++ b/hw/arm/imx25_pdk.c
@@ -131,15 +131,6 @@ static void imx25_pdk_init(MachineState *machine)
      */
     if (!qtest_enabled()) {
         arm_load_kernel(&s->soc.cpu, &imx25_pdk_binfo);
-    } else {
-        /*
-         * This I2C device doesn't exist on the real board.
-         * We add it here (only on qtest usage) to be able to do a bit
-         * of simple qtest. See "make check" for details.
-         */
-        i2c_create_slave((I2CBus *)qdev_get_child_bus(DEVICE(&s->soc.i2c[0]),
-                                                      "i2c-bus.0"),
-                         "ds1338", 0x68);
     }
 }
 
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 8c37bd314a..158c78f852 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -173,6 +173,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
     unsigned i;
     unsigned nvqs = s->conf->num_queues;
+    Error *local_err = NULL;
     int r;
 
     if (vblk->dataplane_started || s->starting) {
@@ -212,7 +213,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
     vblk->dataplane_started = true;
     trace_virtio_blk_data_plane_start(s);
 
-    blk_set_aio_context(s->conf->conf.blk, s->ctx);
+    r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err);
+    if (r < 0) {
+        error_report_err(local_err);
+        goto fail_guest_notifiers;
+    }
 
     /* Kick right away to begin processing requests already in vring */
     for (i = 0; i < nvqs; i++) {
@@ -281,8 +286,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
     aio_context_acquire(s->ctx);
     aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
 
-    /* Drain and switch bs back to the QEMU main loop */
-    blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
+    /* Drain and try to switch bs back to the QEMU main loop. If other users
+     * keep the BlockBackend in the iothread, that's ok */
+    blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL);
 
     aio_context_release(s->ctx);
 
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index bb8f1186e4..f7ad452bbd 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -682,7 +682,8 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
     }
 
     aio_context_acquire(dataplane->ctx);
-    blk_set_aio_context(dataplane->blk, qemu_get_aio_context());
+    /* Xen doesn't have multiple users for nodes, so this can't fail */
+    blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort);
     aio_context_release(dataplane->ctx);
 
     xendev = dataplane->xendev;
@@ -811,7 +812,8 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
     }
 
     aio_context_acquire(dataplane->ctx);
-    blk_set_aio_context(dataplane->blk, dataplane->ctx);
+    /* If other users keep the BlockBackend in the iothread, that's ok */
+    blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL);
     aio_context_release(dataplane->ctx);
     return;
 
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 6f19f127a5..37ccedc9f7 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -538,7 +538,7 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp)
 
     if (!dev->conf.blk) {
         /* Anonymous BlockBackend for an empty drive */
-        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
+        dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
         ret = blk_attach_dev(dev->conf.blk, qdev);
         assert(ret == 0);
     }
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 63a5b58849..30e50f7a38 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -219,6 +219,30 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
     return NVME_INVALID_FIELD | NVME_DNR;
 }
 
+static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                                   uint64_t prp1, uint64_t prp2)
+{
+    QEMUSGList qsg;
+    QEMUIOVector iov;
+    uint16_t status = NVME_SUCCESS;
+
+    if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+    if (qsg.nsg > 0) {
+        if (dma_buf_write(ptr, len, &qsg)) {
+            status = NVME_INVALID_FIELD | NVME_DNR;
+        }
+        qemu_sglist_destroy(&qsg);
+    } else {
+        if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
+            status = NVME_INVALID_FIELD | NVME_DNR;
+        }
+        qemu_iovec_destroy(&iov);
+    }
+    return status;
+}
+
 static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
     uint64_t prp1, uint64_t prp2)
 {
@@ -678,7 +702,6 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
     return ret;
 }
 
-
 static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
 {
     NvmeIdentify *c = (NvmeIdentify *)cmd;
@@ -696,6 +719,57 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
     }
 }
 
+static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
+{
+    trace_nvme_setfeat_timestamp(ts);
+
+    n->host_timestamp = le64_to_cpu(ts);
+    n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+}
+
+static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
+{
+    uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+    uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms;
+
+    union nvme_timestamp {
+        struct {
+            uint64_t timestamp:48;
+            uint64_t sync:1;
+            uint64_t origin:3;
+            uint64_t rsvd1:12;
+        };
+        uint64_t all;
+    };
+
+    union nvme_timestamp ts;
+    ts.all = 0;
+
+    /*
+     * If the sum of the Timestamp value set by the host and the elapsed
+     * time exceeds 2^48, the value returned should be reduced modulo 2^48.
+     */
+    ts.timestamp = (n->host_timestamp + elapsed_time) & 0xffffffffffff;
+
+    /* If the host timestamp is non-zero, set the timestamp origin */
+    ts.origin = n->host_timestamp ? 0x01 : 0x00;
+
+    trace_nvme_getfeat_timestamp(ts.all);
+
+    return cpu_to_le64(ts.all);
+}
+
+static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
+{
+    uint64_t prp1 = le64_to_cpu(cmd->prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+    uint64_t timestamp = nvme_get_timestamp(n);
+
+    return nvme_dma_read_prp(n, (uint8_t *)&timestamp,
+                                 sizeof(timestamp), prp1, prp2);
+}
+
 static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
@@ -710,6 +784,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
         trace_nvme_getfeat_numq(result);
         break;
+    case NVME_TIMESTAMP:
+        return nvme_get_feature_timestamp(n, cmd);
+        break;
     default:
         trace_nvme_err_invalid_getfeat(dw10);
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -719,6 +796,24 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     return NVME_SUCCESS;
 }
 
+static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
+{
+    uint16_t ret;
+    uint64_t timestamp;
+    uint64_t prp1 = le64_to_cpu(cmd->prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+    ret = nvme_dma_write_prp(n, (uint8_t *)&timestamp,
+                                sizeof(timestamp), prp1, prp2);
+    if (ret != NVME_SUCCESS) {
+        return ret;
+    }
+
+    nvme_set_timestamp(n, timestamp);
+
+    return NVME_SUCCESS;
+}
+
 static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
@@ -735,6 +830,11 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         req->cqe.result =
             cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
         break;
+
+    case NVME_TIMESTAMP:
+        return nvme_set_feature_timestamp(n, cmd);
+        break;
+
     default:
         trace_nvme_err_invalid_setfeat(dw10);
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -907,6 +1007,8 @@ static int nvme_start_ctrl(NvmeCtrl *n)
     nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
         NVME_AQA_ASQS(n->bar.aqa) + 1);
 
+    nvme_set_timestamp(n, 0ULL);
+
     return 0;
 }
 
@@ -1270,7 +1372,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
     id->sqes = (0x6 << 4) | 0x6;
     id->cqes = (0x4 << 4) | 0x4;
     id->nn = cpu_to_le32(n->num_namespaces);
-    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS);
+    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP);
     id->psd[0].mp = cpu_to_le16(0x9c4);
     id->psd[0].enlat = cpu_to_le32(0x10);
     id->psd[0].exlat = cpu_to_le32(0x4);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 56c9d4b4b1..557194ee19 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -79,6 +79,8 @@ typedef struct NvmeCtrl {
     uint32_t    cmbloc;
     uint8_t     *cmbuf;
     uint64_t    irq_status;
+    uint64_t    host_timestamp;                 /* Timestamp sent by the host */
+    uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
 
     char            *serial;
     NvmeNamespace   *namespaces;
diff --git a/hw/block/trace-events b/hw/block/trace-events
index b92039a573..97a17838ed 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -46,6 +46,8 @@ nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
 nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
 nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
 nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
+nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64""
+nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64""
 nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index ef635be4c2..31b0f5ccc8 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -609,7 +609,7 @@ static void xen_cdrom_realize(XenBlockDevice *blockdev, Error **errp)
         int rc;
 
         /* Set up an empty drive */
-        conf->blk = blk_new(0, BLK_PERM_ALL);
+        conf->blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
 
         rc = blk_attach_dev(conf->blk, DEVICE(blockdev));
         if (!rc) {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 934c1bcceb..f1a0f45f9c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -24,6 +24,9 @@
 #include "hw/pci/pci.h"
 #include "hw/mem/nvdimm.h"
 
+GlobalProperty hw_compat_4_0_1[] = {};
+const size_t hw_compat_4_0_1_len = G_N_ELEMENTS(hw_compat_4_0_1);
+
 GlobalProperty hw_compat_4_0[] = {};
 const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0);
 
@@ -36,7 +39,7 @@ GlobalProperty hw_compat_3_1[] = {
     { "tpm-tis", "ppi", "false" },
     { "usb-kbd", "serial", "42" },
     { "usb-mouse", "serial", "42" },
-    { "usb-kbd", "serial", "42" },
+    { "usb-tablet", "serial", "42" },
     { "virtio-blk-device", "discard", "false" },
     { "virtio-blk-device", "write-zeroes", "false" },
 };
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index b45a7ef54b..ba412dd2ca 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -69,8 +69,8 @@ static void set_pointer(Object *obj, Visitor *v, Property *prop,
 
 /* --- drive --- */
 
-static void parse_drive(DeviceState *dev, const char *str, void **ptr,
-                        const char *propname, Error **errp)
+static void do_parse_drive(DeviceState *dev, const char *str, void **ptr,
+                           const char *propname, bool iothread, Error **errp)
 {
     BlockBackend *blk;
     bool blk_created = false;
@@ -80,7 +80,16 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr,
     if (!blk) {
         BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
         if (bs) {
-            blk = blk_new(0, BLK_PERM_ALL);
+            /*
+             * If the device supports iothreads, it will make sure to move the
+             * block node to the right AioContext if necessary (or fail if this
+             * isn't possible because of other users). Devices that are not
+             * aware of iothreads require their BlockBackends to be in the main
+             * AioContext.
+             */
+            AioContext *ctx = iothread ? bdrv_get_aio_context(bs) :
+                                         qemu_get_aio_context();
+            blk = blk_new(ctx, 0, BLK_PERM_ALL);
             blk_created = true;
 
             ret = blk_insert_bs(blk, bs, errp);
@@ -118,6 +127,18 @@ fail:
     }
 }
 
+static void parse_drive(DeviceState *dev, const char *str, void **ptr,
+                        const char *propname, Error **errp)
+{
+    do_parse_drive(dev, str, ptr, propname, false, errp);
+}
+
+static void parse_drive_iothread(DeviceState *dev, const char *str, void **ptr,
+                                 const char *propname, Error **errp)
+{
+    do_parse_drive(dev, str, ptr, propname, true, errp);
+}
+
 static void release_drive(Object *obj, const char *name, void *opaque)
 {
     DeviceState *dev = DEVICE(obj);
@@ -160,6 +181,12 @@ static void set_drive(Object *obj, Visitor *v, const char *name, void *opaque,
     set_pointer(obj, v, opaque, parse_drive, name, errp);
 }
 
+static void set_drive_iothread(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    set_pointer(obj, v, opaque, parse_drive_iothread, name, errp);
+}
+
 const PropertyInfo qdev_prop_drive = {
     .name  = "str",
     .description = "Node name or ID of a block device to use as a backend",
@@ -168,6 +195,14 @@ const PropertyInfo qdev_prop_drive = {
     .release = release_drive,
 };
 
+const PropertyInfo qdev_prop_drive_iothread = {
+    .name  = "str",
+    .description = "Node name or ID of a block device to use as a backend",
+    .get   = get_drive,
+    .set   = set_drive_iothread,
+    .release = release_drive,
+};
+
 /* --- character device --- */
 
 static void get_chr(Object *obj, Visitor *v, const char *name, void *opaque,
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 307cf90a51..689a867a22 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -153,6 +153,16 @@ static void sysbus_mmio_map_common(SysBusDevice *dev, int n, hwaddr addr,
     }
 }
 
+void sysbus_mmio_unmap(SysBusDevice *dev, int n)
+{
+    assert(n >= 0 && n < dev->num_mmio);
+
+    if (dev->mmio[n].addr != (hwaddr)-1) {
+        memory_region_del_subregion(get_system_memory(), dev->mmio[n].memory);
+        dev->mmio[n].addr = (hwaddr)-1;
+    }
+}
+
 void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
 {
     sysbus_mmio_map_common(dev, n, addr, false, 0);
diff --git a/hw/display/Kconfig b/hw/display/Kconfig
index dc1f113df2..910dccb2f7 100644
--- a/hw/display/Kconfig
+++ b/hw/display/Kconfig
@@ -111,6 +111,16 @@ config VIRTIO_VGA
     depends on VIRTIO_PCI
     select VGA
 
+config VHOST_USER_GPU
+    bool
+    default y
+    depends on VIRTIO_GPU && VHOST_USER
+
+config VHOST_USER_VGA
+    bool
+    default y
+    depends on VIRTIO_VGA && VHOST_USER_GPU
+
 config DPCD
     bool
     select AUX
diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs
index 650031f725..a64998fc7b 100644
--- a/hw/display/Makefile.objs
+++ b/hw/display/Makefile.objs
@@ -43,9 +43,12 @@ obj-$(CONFIG_VGA) += vga.o
 
 common-obj-$(CONFIG_QXL) += qxl.o qxl-logger.o qxl-render.o
 
-obj-$(CONFIG_VIRTIO_GPU) += virtio-gpu.o virtio-gpu-3d.o
+obj-$(CONFIG_VIRTIO_GPU) += virtio-gpu-base.o virtio-gpu.o virtio-gpu-3d.o
+obj-$(CONFIG_VHOST_USER_GPU) += vhost-user-gpu.o
 obj-$(call land,$(CONFIG_VIRTIO_GPU),$(CONFIG_VIRTIO_PCI)) += virtio-gpu-pci.o
+obj-$(call land,$(CONFIG_VHOST_USER_GPU),$(CONFIG_VIRTIO_PCI)) += vhost-user-gpu-pci.o
 obj-$(CONFIG_VIRTIO_VGA) += virtio-vga.o
+obj-$(CONFIG_VHOST_USER_VGA) += vhost-user-vga.o
 virtio-gpu.o-cflags := $(VIRGL_CFLAGS)
 virtio-gpu.o-libs += $(VIRGL_LIBS)
 virtio-gpu-3d.o-cflags := $(VIRGL_CFLAGS)
diff --git a/hw/display/vhost-user-gpu-pci.c b/hw/display/vhost-user-gpu-pci.c
new file mode 100644
index 0000000000..7d9b1f5a8c
--- /dev/null
+++ b/hw/display/vhost-user-gpu-pci.c
@@ -0,0 +1,51 @@
+/*
+ * vhost-user GPU PCI device
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/virtio/virtio-gpu-pci.h"
+
+#define TYPE_VHOST_USER_GPU_PCI "vhost-user-gpu-pci"
+#define VHOST_USER_GPU_PCI(obj)                                     \
+    OBJECT_CHECK(VhostUserGPUPCI, (obj), TYPE_VHOST_USER_GPU_PCI)
+
+typedef struct VhostUserGPUPCI {
+    VirtIOGPUPCIBase parent_obj;
+
+    VhostUserGPU vdev;
+} VhostUserGPUPCI;
+
+static void vhost_user_gpu_pci_initfn(Object *obj)
+{
+    VhostUserGPUPCI *dev = VHOST_USER_GPU_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_GPU);
+
+    VIRTIO_GPU_PCI_BASE(obj)->vgpu = VIRTIO_GPU_BASE(&dev->vdev);
+
+    object_property_add_alias(obj, "chardev",
+                              OBJECT(&dev->vdev), "chardev",
+                              &error_abort);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_gpu_pci_info = {
+    .generic_name = TYPE_VHOST_USER_GPU_PCI,
+    .parent = TYPE_VIRTIO_GPU_PCI_BASE,
+    .instance_size = sizeof(VhostUserGPUPCI),
+    .instance_init = vhost_user_gpu_pci_initfn,
+};
+
+static void vhost_user_gpu_pci_register_types(void)
+{
+    virtio_pci_types_register(&vhost_user_gpu_pci_info);
+}
+
+type_init(vhost_user_gpu_pci_register_types)
diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
new file mode 100644
index 0000000000..7181d9cdba
--- /dev/null
+++ b/hw/display/vhost-user-gpu.c
@@ -0,0 +1,607 @@
+/*
+ * vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * Authors:
+ *     Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/virtio/virtio-gpu.h"
+#include "chardev/char-fe.h"
+#include "qapi/error.h"
+#include "migration/blocker.h"
+
+#define VHOST_USER_GPU(obj)                                    \
+    OBJECT_CHECK(VhostUserGPU, (obj), TYPE_VHOST_USER_GPU)
+
+typedef enum VhostUserGpuRequest {
+    VHOST_USER_GPU_NONE = 0,
+    VHOST_USER_GPU_GET_PROTOCOL_FEATURES,
+    VHOST_USER_GPU_SET_PROTOCOL_FEATURES,
+    VHOST_USER_GPU_GET_DISPLAY_INFO,
+    VHOST_USER_GPU_CURSOR_POS,
+    VHOST_USER_GPU_CURSOR_POS_HIDE,
+    VHOST_USER_GPU_CURSOR_UPDATE,
+    VHOST_USER_GPU_SCANOUT,
+    VHOST_USER_GPU_UPDATE,
+    VHOST_USER_GPU_DMABUF_SCANOUT,
+    VHOST_USER_GPU_DMABUF_UPDATE,
+} VhostUserGpuRequest;
+
+typedef struct VhostUserGpuDisplayInfoReply {
+    struct virtio_gpu_resp_display_info info;
+} VhostUserGpuDisplayInfoReply;
+
+typedef struct VhostUserGpuCursorPos {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+} QEMU_PACKED VhostUserGpuCursorPos;
+
+typedef struct VhostUserGpuCursorUpdate {
+    VhostUserGpuCursorPos pos;
+    uint32_t hot_x;
+    uint32_t hot_y;
+    uint32_t data[64 * 64];
+} QEMU_PACKED VhostUserGpuCursorUpdate;
+
+typedef struct VhostUserGpuScanout {
+    uint32_t scanout_id;
+    uint32_t width;
+    uint32_t height;
+} QEMU_PACKED VhostUserGpuScanout;
+
+typedef struct VhostUserGpuUpdate {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+    uint32_t width;
+    uint32_t height;
+    uint8_t data[];
+} QEMU_PACKED VhostUserGpuUpdate;
+
+typedef struct VhostUserGpuDMABUFScanout {
+    uint32_t scanout_id;
+    uint32_t x;
+    uint32_t y;
+    uint32_t width;
+    uint32_t height;
+    uint32_t fd_width;
+    uint32_t fd_height;
+    uint32_t fd_stride;
+    uint32_t fd_flags;
+    int fd_drm_fourcc;
+} QEMU_PACKED VhostUserGpuDMABUFScanout;
+
+typedef struct VhostUserGpuMsg {
+    uint32_t request; /* VhostUserGpuRequest */
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+    union {
+        VhostUserGpuCursorPos cursor_pos;
+        VhostUserGpuCursorUpdate cursor_update;
+        VhostUserGpuScanout scanout;
+        VhostUserGpuUpdate update;
+        VhostUserGpuDMABUFScanout dmabuf_scanout;
+        struct virtio_gpu_resp_display_info display_info;
+        uint64_t u64;
+    } payload;
+} QEMU_PACKED VhostUserGpuMsg;
+
+static VhostUserGpuMsg m __attribute__ ((unused));
+#define VHOST_USER_GPU_HDR_SIZE \
+    (sizeof(m.request) + sizeof(m.size) + sizeof(m.flags))
+
+#define VHOST_USER_GPU_MSG_FLAG_REPLY 0x4
+
+static void vhost_user_gpu_update_blocked(VhostUserGPU *g, bool blocked);
+
+static void
+vhost_user_gpu_handle_cursor(VhostUserGPU *g, VhostUserGpuMsg *msg)
+{
+    VhostUserGpuCursorPos *pos = &msg->payload.cursor_pos;
+    struct virtio_gpu_scanout *s;
+
+    if (pos->scanout_id >= g->parent_obj.conf.max_outputs) {
+        return;
+    }
+    s = &g->parent_obj.scanout[pos->scanout_id];
+
+    if (msg->request == VHOST_USER_GPU_CURSOR_UPDATE) {
+        VhostUserGpuCursorUpdate *up = &msg->payload.cursor_update;
+        if (!s->current_cursor) {
+            s->current_cursor = cursor_alloc(64, 64);
+        }
+
+        s->current_cursor->hot_x = up->hot_x;
+        s->current_cursor->hot_y = up->hot_y;
+
+        memcpy(s->current_cursor->data, up->data,
+               64 * 64 * sizeof(uint32_t));
+
+        dpy_cursor_define(s->con, s->current_cursor);
+    }
+
+    dpy_mouse_set(s->con, pos->x, pos->y,
+                  msg->request != VHOST_USER_GPU_CURSOR_POS_HIDE);
+}
+
+static void
+vhost_user_gpu_send_msg(VhostUserGPU *g, const VhostUserGpuMsg *msg)
+{
+    qemu_chr_fe_write(&g->vhost_chr, (uint8_t *)msg,
+                      VHOST_USER_GPU_HDR_SIZE + msg->size);
+}
+
+static void
+vhost_user_gpu_unblock(VhostUserGPU *g)
+{
+    VhostUserGpuMsg msg = {
+        .request = VHOST_USER_GPU_DMABUF_UPDATE,
+        .flags = VHOST_USER_GPU_MSG_FLAG_REPLY,
+    };
+
+    vhost_user_gpu_send_msg(g, &msg);
+}
+
+static void
+vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg)
+{
+    QemuConsole *con = NULL;
+    struct virtio_gpu_scanout *s;
+
+    switch (msg->request) {
+    case VHOST_USER_GPU_GET_PROTOCOL_FEATURES: {
+        VhostUserGpuMsg reply = {
+            .request = msg->request,
+            .flags = VHOST_USER_GPU_MSG_FLAG_REPLY,
+            .size = sizeof(uint64_t),
+        };
+
+        vhost_user_gpu_send_msg(g, &reply);
+        break;
+    }
+    case VHOST_USER_GPU_SET_PROTOCOL_FEATURES: {
+        break;
+    }
+    case VHOST_USER_GPU_GET_DISPLAY_INFO: {
+        struct virtio_gpu_resp_display_info display_info = { {} };
+        VhostUserGpuMsg reply = {
+            .request = msg->request,
+            .flags = VHOST_USER_GPU_MSG_FLAG_REPLY,
+            .size = sizeof(struct virtio_gpu_resp_display_info),
+        };
+
+        display_info.hdr.type = VIRTIO_GPU_RESP_OK_DISPLAY_INFO;
+        virtio_gpu_base_fill_display_info(VIRTIO_GPU_BASE(g), &display_info);
+        memcpy(&reply.payload.display_info, &display_info,
+               sizeof(display_info));
+        vhost_user_gpu_send_msg(g, &reply);
+        break;
+    }
+    case VHOST_USER_GPU_SCANOUT: {
+        VhostUserGpuScanout *m = &msg->payload.scanout;
+
+        if (m->scanout_id >= g->parent_obj.conf.max_outputs) {
+            return;
+        }
+
+        g->parent_obj.enable = 1;
+        s = &g->parent_obj.scanout[m->scanout_id];
+        con = s->con;
+
+        if (m->scanout_id == 0 && m->width == 0) {
+            s->ds = qemu_create_message_surface(640, 480,
+                                                "Guest disabled display.");
+            dpy_gfx_replace_surface(con, s->ds);
+        } else {
+            s->ds = qemu_create_displaysurface(m->width, m->height);
+            /* replace surface on next update */
+        }
+
+        break;
+    }
+    case VHOST_USER_GPU_DMABUF_SCANOUT: {
+        VhostUserGpuDMABUFScanout *m = &msg->payload.dmabuf_scanout;
+        int fd = qemu_chr_fe_get_msgfd(&g->vhost_chr);
+        QemuDmaBuf *dmabuf;
+
+        if (m->scanout_id >= g->parent_obj.conf.max_outputs) {
+            error_report("invalid scanout: %d", m->scanout_id);
+            if (fd >= 0) {
+                close(fd);
+            }
+            break;
+        }
+
+        g->parent_obj.enable = 1;
+        con = g->parent_obj.scanout[m->scanout_id].con;
+        dmabuf = &g->dmabuf[m->scanout_id];
+        if (dmabuf->fd >= 0) {
+            close(dmabuf->fd);
+            dmabuf->fd = -1;
+        }
+        if (!console_has_gl_dmabuf(con)) {
+            /* it would be nice to report that error earlier */
+            error_report("console doesn't support dmabuf!");
+            break;
+        }
+        dpy_gl_release_dmabuf(con, dmabuf);
+        if (fd == -1) {
+            dpy_gl_scanout_disable(con);
+            break;
+        }
+        *dmabuf = (QemuDmaBuf) {
+            .fd = fd,
+            .width = m->fd_width,
+            .height = m->fd_height,
+            .stride = m->fd_stride,
+            .fourcc = m->fd_drm_fourcc,
+            .y0_top = m->fd_flags & VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP,
+        };
+        dpy_gl_scanout_dmabuf(con, dmabuf);
+        break;
+    }
+    case VHOST_USER_GPU_DMABUF_UPDATE: {
+        VhostUserGpuUpdate *m = &msg->payload.update;
+
+        if (m->scanout_id >= g->parent_obj.conf.max_outputs ||
+            !g->parent_obj.scanout[m->scanout_id].con) {
+            error_report("invalid scanout update: %d", m->scanout_id);
+            vhost_user_gpu_unblock(g);
+            break;
+        }
+
+        con = g->parent_obj.scanout[m->scanout_id].con;
+        if (!console_has_gl(con)) {
+            error_report("console doesn't support GL!");
+            vhost_user_gpu_unblock(g);
+            break;
+        }
+        dpy_gl_update(con, m->x, m->y, m->width, m->height);
+        g->backend_blocked = true;
+        break;
+    }
+    case VHOST_USER_GPU_UPDATE: {
+        VhostUserGpuUpdate *m = &msg->payload.update;
+
+        if (m->scanout_id >= g->parent_obj.conf.max_outputs) {
+            break;
+        }
+        s = &g->parent_obj.scanout[m->scanout_id];
+        con = s->con;
+        pixman_image_t *image =
+            pixman_image_create_bits(PIXMAN_x8r8g8b8,
+                                     m->width,
+                                     m->height,
+                                     (uint32_t *)m->data,
+                                     m->width * 4);
+
+        pixman_image_composite(PIXMAN_OP_SRC,
+                               image, NULL, s->ds->image,
+                               0, 0, 0, 0, m->x, m->y, m->width, m->height);
+
+        pixman_image_unref(image);
+        if (qemu_console_surface(con) != s->ds) {
+            dpy_gfx_replace_surface(con, s->ds);
+        } else {
+            dpy_gfx_update(con, m->x, m->y, m->width, m->height);
+        }
+        break;
+    }
+    default:
+        g_warning("unhandled message %d %d", msg->request, msg->size);
+    }
+
+    if (con && qemu_console_is_gl_blocked(con)) {
+        vhost_user_gpu_update_blocked(g, true);
+    }
+}
+
+static void
+vhost_user_gpu_chr_read(void *opaque)
+{
+    VhostUserGPU *g = opaque;
+    VhostUserGpuMsg *msg = NULL;
+    VhostUserGpuRequest request;
+    uint32_t size, flags;
+    int r;
+
+    r = qemu_chr_fe_read_all(&g->vhost_chr,
+                             (uint8_t *)&request, sizeof(uint32_t));
+    if (r != sizeof(uint32_t)) {
+        error_report("failed to read msg header: %d, %d", r, errno);
+        goto end;
+    }
+
+    r = qemu_chr_fe_read_all(&g->vhost_chr,
+                             (uint8_t *)&flags, sizeof(uint32_t));
+    if (r != sizeof(uint32_t)) {
+        error_report("failed to read msg flags");
+        goto end;
+    }
+
+    r = qemu_chr_fe_read_all(&g->vhost_chr,
+                             (uint8_t *)&size, sizeof(uint32_t));
+    if (r != sizeof(uint32_t)) {
+        error_report("failed to read msg size");
+        goto end;
+    }
+
+    msg = g_malloc(VHOST_USER_GPU_HDR_SIZE + size);
+    g_return_if_fail(msg != NULL);
+
+    r = qemu_chr_fe_read_all(&g->vhost_chr,
+                             (uint8_t *)&msg->payload, size);
+    if (r != size) {
+        error_report("failed to read msg payload %d != %d", r, size);
+        goto end;
+    }
+
+    msg->request = request;
+    msg->flags = size;
+    msg->size = size;
+
+    if (request == VHOST_USER_GPU_CURSOR_UPDATE ||
+        request == VHOST_USER_GPU_CURSOR_POS ||
+        request == VHOST_USER_GPU_CURSOR_POS_HIDE) {
+        vhost_user_gpu_handle_cursor(g, msg);
+    } else {
+        vhost_user_gpu_handle_display(g, msg);
+    }
+
+end:
+    g_free(msg);
+}
+
+static void
+vhost_user_gpu_update_blocked(VhostUserGPU *g, bool blocked)
+{
+    qemu_set_fd_handler(g->vhost_gpu_fd,
+                        blocked ? NULL : vhost_user_gpu_chr_read, NULL, g);
+}
+
+static void
+vhost_user_gpu_gl_unblock(VirtIOGPUBase *b)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(b);
+
+    if (g->backend_blocked) {
+        vhost_user_gpu_unblock(VHOST_USER_GPU(g));
+        g->backend_blocked = false;
+    }
+
+    vhost_user_gpu_update_blocked(VHOST_USER_GPU(g), false);
+}
+
+static bool
+vhost_user_gpu_do_set_socket(VhostUserGPU *g, Error **errp)
+{
+    Chardev *chr;
+    int sv[2];
+
+    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+        error_setg_errno(errp, errno, "socketpair() failed");
+        return false;
+    }
+
+    chr = CHARDEV(object_new(TYPE_CHARDEV_SOCKET));
+    if (!chr || qemu_chr_add_client(chr, sv[0]) == -1) {
+        error_setg(errp, "Failed to make socket chardev");
+        goto err;
+    }
+    if (!qemu_chr_fe_init(&g->vhost_chr, chr, errp)) {
+        goto err;
+    }
+    if (vhost_user_gpu_set_socket(&g->vhost->dev, sv[1]) < 0) {
+        error_setg(errp, "Failed to set vhost-user-gpu socket");
+        qemu_chr_fe_deinit(&g->vhost_chr, false);
+        goto err;
+    }
+
+    g->vhost_gpu_fd = sv[0];
+    vhost_user_gpu_update_blocked(g, false);
+    close(sv[1]);
+    return true;
+
+err:
+    close(sv[0]);
+    close(sv[1]);
+    if (chr) {
+        object_unref(OBJECT(chr));
+    }
+    return false;
+}
+
+static void
+vhost_user_gpu_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+    VirtIOGPUBase *b = VIRTIO_GPU_BASE(vdev);
+    struct virtio_gpu_config *vgconfig =
+        (struct virtio_gpu_config *)config_data;
+    int ret;
+
+    memset(config_data, 0, sizeof(struct virtio_gpu_config));
+
+    ret = vhost_dev_get_config(&g->vhost->dev,
+                               config_data, sizeof(struct virtio_gpu_config));
+    if (ret) {
+        error_report("vhost-user-gpu: get device config space failed");
+        return;
+    }
+
+    /* those fields are managed by qemu */
+    vgconfig->num_scanouts = b->virtio_config.num_scanouts;
+    vgconfig->events_read = b->virtio_config.events_read;
+    vgconfig->events_clear = b->virtio_config.events_clear;
+}
+
+static void
+vhost_user_gpu_set_config(VirtIODevice *vdev,
+                          const uint8_t *config_data)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+    VirtIOGPUBase *b = VIRTIO_GPU_BASE(vdev);
+    const struct virtio_gpu_config *vgconfig =
+        (const struct virtio_gpu_config *)config_data;
+    int ret;
+
+    if (vgconfig->events_clear) {
+        b->virtio_config.events_read &= ~vgconfig->events_clear;
+    }
+
+    ret = vhost_dev_set_config(&g->vhost->dev, config_data,
+                               0, sizeof(struct virtio_gpu_config),
+                               VHOST_SET_CONFIG_TYPE_MASTER);
+    if (ret) {
+        error_report("vhost-user-gpu: set device config space failed");
+        return;
+    }
+}
+
+static void
+vhost_user_gpu_set_status(VirtIODevice *vdev, uint8_t val)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+    Error *err = NULL;
+
+    if (val & VIRTIO_CONFIG_S_DRIVER_OK && vdev->vm_running) {
+        if (!vhost_user_gpu_do_set_socket(g, &err)) {
+            error_report_err(err);
+            return;
+        }
+        vhost_user_backend_start(g->vhost);
+    } else {
+        /* unblock any wait and stop processing */
+        if (g->vhost_gpu_fd != -1) {
+            vhost_user_gpu_update_blocked(g, true);
+            qemu_chr_fe_deinit(&g->vhost_chr, true);
+            g->vhost_gpu_fd = -1;
+        }
+        vhost_user_backend_stop(g->vhost);
+    }
+}
+
+static bool
+vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+
+    return vhost_virtqueue_pending(&g->vhost->dev, idx);
+}
+
+static void
+vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+
+    vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask);
+}
+
+static void
+vhost_user_gpu_instance_init(Object *obj)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(obj);
+
+    g->vhost = VHOST_USER_BACKEND(object_new(TYPE_VHOST_USER_BACKEND));
+    object_property_add_alias(obj, "chardev",
+                              OBJECT(g->vhost), "chardev", &error_abort);
+}
+
+static void
+vhost_user_gpu_instance_finalize(Object *obj)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(obj);
+
+    object_unref(OBJECT(g->vhost));
+}
+
+static void
+vhost_user_gpu_reset(VirtIODevice *vdev)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(vdev);
+
+    virtio_gpu_base_reset(VIRTIO_GPU_BASE(vdev));
+
+    vhost_user_backend_stop(g->vhost);
+}
+
+static int
+vhost_user_gpu_config_change(struct vhost_dev *dev)
+{
+    error_report("vhost-user-gpu: unhandled backend config change");
+    return -1;
+}
+
+static const VhostDevConfigOps config_ops = {
+    .vhost_dev_config_notifier = vhost_user_gpu_config_change,
+};
+
+static void
+vhost_user_gpu_device_realize(DeviceState *qdev, Error **errp)
+{
+    VhostUserGPU *g = VHOST_USER_GPU(qdev);
+    VirtIODevice *vdev = VIRTIO_DEVICE(g);
+
+    vhost_dev_set_config_notifier(&g->vhost->dev, &config_ops);
+    if (vhost_user_backend_dev_init(g->vhost, vdev, 2, errp) < 0) {
+        return;
+    }
+
+    if (virtio_has_feature(g->vhost->dev.features, VIRTIO_GPU_F_VIRGL)) {
+        g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED;
+    }
+
+    if (!virtio_gpu_base_device_realize(qdev, NULL, NULL, errp)) {
+        return;
+    }
+
+    g->vhost_gpu_fd = -1;
+}
+
+static Property vhost_user_gpu_properties[] = {
+    VIRTIO_GPU_BASE_PROPERTIES(VhostUserGPU, parent_obj.conf),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void
+vhost_user_gpu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    VirtIOGPUBaseClass *vgc = VIRTIO_GPU_BASE_CLASS(klass);
+
+    vgc->gl_unblock = vhost_user_gpu_gl_unblock;
+
+    vdc->realize = vhost_user_gpu_device_realize;
+    vdc->reset = vhost_user_gpu_reset;
+    vdc->set_status   = vhost_user_gpu_set_status;
+    vdc->guest_notifier_mask = vhost_user_gpu_guest_notifier_mask;
+    vdc->guest_notifier_pending = vhost_user_gpu_guest_notifier_pending;
+    vdc->get_config = vhost_user_gpu_get_config;
+    vdc->set_config = vhost_user_gpu_set_config;
+
+    dc->props = vhost_user_gpu_properties;
+}
+
+static const TypeInfo vhost_user_gpu_info = {
+    .name = TYPE_VHOST_USER_GPU,
+    .parent = TYPE_VIRTIO_GPU_BASE,
+    .instance_size = sizeof(VhostUserGPU),
+    .instance_init = vhost_user_gpu_instance_init,
+    .instance_finalize = vhost_user_gpu_instance_finalize,
+    .class_init = vhost_user_gpu_class_init,
+};
+
+static void vhost_user_gpu_register_types(void)
+{
+    type_register_static(&vhost_user_gpu_info);
+}
+
+type_init(vhost_user_gpu_register_types)
diff --git a/hw/display/vhost-user-vga.c b/hw/display/vhost-user-vga.c
new file mode 100644
index 0000000000..a7195276d9
--- /dev/null
+++ b/hw/display/vhost-user-vga.c
@@ -0,0 +1,52 @@
+/*
+ * vhost-user VGA device
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "virtio-vga.h"
+
+#define TYPE_VHOST_USER_VGA "vhost-user-vga"
+
+#define VHOST_USER_VGA(obj)                                \
+    OBJECT_CHECK(VhostUserVGA, (obj), TYPE_VHOST_USER_VGA)
+
+typedef struct VhostUserVGA {
+    VirtIOVGABase parent_obj;
+
+    VhostUserGPU vdev;
+} VhostUserVGA;
+
+static void vhost_user_vga_inst_initfn(Object *obj)
+{
+    VhostUserVGA *dev = VHOST_USER_VGA(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_GPU);
+
+    VIRTIO_VGA_BASE(dev)->vgpu = VIRTIO_GPU_BASE(&dev->vdev);
+
+    object_property_add_alias(obj, "chardev",
+                              OBJECT(&dev->vdev), "chardev",
+                              &error_abort);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_vga_info = {
+    .generic_name  = TYPE_VHOST_USER_VGA,
+    .parent        = TYPE_VIRTIO_VGA_BASE,
+    .instance_size = sizeof(struct VhostUserVGA),
+    .instance_init = vhost_user_vga_inst_initfn,
+};
+
+static void vhost_user_vga_register_types(void)
+{
+    virtio_pci_types_register(&vhost_user_vga_info);
+}
+
+type_init(vhost_user_vga_register_types)
diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c
index 2d302526ab..b918167aec 100644
--- a/hw/display/virtio-gpu-3d.c
+++ b/hw/display/virtio-gpu-3d.c
@@ -118,11 +118,11 @@ static void virgl_cmd_context_destroy(VirtIOGPU *g,
 static void virtio_gpu_rect_update(VirtIOGPU *g, int idx, int x, int y,
                                 int width, int height)
 {
-    if (!g->scanout[idx].con) {
+    if (!g->parent_obj.scanout[idx].con) {
         return;
     }
 
-    dpy_gl_update(g->scanout[idx].con, x, y, width, height);
+    dpy_gl_update(g->parent_obj.scanout[idx].con, x, y, width, height);
 }
 
 static void virgl_cmd_resource_flush(VirtIOGPU *g,
@@ -135,8 +135,8 @@ static void virgl_cmd_resource_flush(VirtIOGPU *g,
     trace_virtio_gpu_cmd_res_flush(rf.resource_id,
                                    rf.r.width, rf.r.height, rf.r.x, rf.r.y);
 
-    for (i = 0; i < g->conf.max_outputs; i++) {
-        if (g->scanout[i].resource_id != rf.resource_id) {
+    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
+        if (g->parent_obj.scanout[i].resource_id != rf.resource_id) {
             continue;
         }
         virtio_gpu_rect_update(g, i, rf.r.x, rf.r.y, rf.r.width, rf.r.height);
@@ -154,13 +154,13 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g,
     trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id,
                                      ss.r.width, ss.r.height, ss.r.x, ss.r.y);
 
-    if (ss.scanout_id >= g->conf.max_outputs) {
+    if (ss.scanout_id >= g->parent_obj.conf.max_outputs) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
                       __func__, ss.scanout_id);
         cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
         return;
     }
-    g->enable = 1;
+    g->parent_obj.enable = 1;
 
     memset(&info, 0, sizeof(info));
 
@@ -173,20 +173,22 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g,
             cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
             return;
         }
-        qemu_console_resize(g->scanout[ss.scanout_id].con,
+        qemu_console_resize(g->parent_obj.scanout[ss.scanout_id].con,
                             ss.r.width, ss.r.height);
         virgl_renderer_force_ctx_0();
-        dpy_gl_scanout_texture(g->scanout[ss.scanout_id].con, info.tex_id,
-                               info.flags & 1 /* FIXME: Y_0_TOP */,
-                               info.width, info.height,
-                               ss.r.x, ss.r.y, ss.r.width, ss.r.height);
+        dpy_gl_scanout_texture(
+            g->parent_obj.scanout[ss.scanout_id].con, info.tex_id,
+            info.flags & 1 /* FIXME: Y_0_TOP */,
+            info.width, info.height,
+            ss.r.x, ss.r.y, ss.r.width, ss.r.height);
     } else {
         if (ss.scanout_id != 0) {
-            dpy_gfx_replace_surface(g->scanout[ss.scanout_id].con, NULL);
+            dpy_gfx_replace_surface(
+                g->parent_obj.scanout[ss.scanout_id].con, NULL);
         }
-        dpy_gl_scanout_disable(g->scanout[ss.scanout_id].con);
+        dpy_gl_scanout_disable(g->parent_obj.scanout[ss.scanout_id].con);
     }
-    g->scanout[ss.scanout_id].resource_id = ss.resource_id;
+    g->parent_obj.scanout[ss.scanout_id].resource_id = ss.resource_id;
 }
 
 static void virgl_cmd_submit_3d(VirtIOGPU *g,
@@ -209,7 +211,7 @@ static void virgl_cmd_submit_3d(VirtIOGPU *g,
         goto out;
     }
 
-    if (virtio_gpu_stats_enabled(g->conf)) {
+    if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
         g->stats.req_3d++;
         g->stats.bytes_3d += cs.size;
     }
@@ -507,7 +509,7 @@ static void virgl_write_fence(void *opaque, uint32_t fence)
         QTAILQ_REMOVE(&g->fenceq, cmd, next);
         g_free(cmd);
         g->inflight--;
-        if (virtio_gpu_stats_enabled(g->conf)) {
+        if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
             fprintf(stderr, "inflight: %3d (-)\r", g->inflight);
         }
     }
@@ -524,7 +526,7 @@ virgl_create_context(void *opaque, int scanout_idx,
     qparams.major_ver = params->major_ver;
     qparams.minor_ver = params->minor_ver;
 
-    ctx = dpy_gl_ctx_create(g->scanout[scanout_idx].con, &qparams);
+    ctx = dpy_gl_ctx_create(g->parent_obj.scanout[scanout_idx].con, &qparams);
     return (virgl_renderer_gl_context)ctx;
 }
 
@@ -533,7 +535,7 @@ static void virgl_destroy_context(void *opaque, virgl_renderer_gl_context ctx)
     VirtIOGPU *g = opaque;
     QEMUGLContext qctx = (QEMUGLContext)ctx;
 
-    dpy_gl_ctx_destroy(g->scanout[0].con, qctx);
+    dpy_gl_ctx_destroy(g->parent_obj.scanout[0].con, qctx);
 }
 
 static int virgl_make_context_current(void *opaque, int scanout_idx,
@@ -542,7 +544,8 @@ static int virgl_make_context_current(void *opaque, int scanout_idx,
     VirtIOGPU *g = opaque;
     QEMUGLContext qctx = (QEMUGLContext)ctx;
 
-    return dpy_gl_ctx_make_current(g->scanout[scanout_idx].con, qctx);
+    return dpy_gl_ctx_make_current(g->parent_obj.scanout[scanout_idx].con,
+                                   qctx);
 }
 
 static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = {
@@ -594,11 +597,11 @@ void virtio_gpu_virgl_reset(VirtIOGPU *g)
     int i;
 
     /* virgl_renderer_reset() ??? */
-    for (i = 0; i < g->conf.max_outputs; i++) {
+    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
         if (i != 0) {
-            dpy_gfx_replace_surface(g->scanout[i].con, NULL);
+            dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL);
         }
-        dpy_gl_scanout_disable(g->scanout[i].con);
+        dpy_gl_scanout_disable(g->parent_obj.scanout[i].con);
     }
 }
 
@@ -614,7 +617,7 @@ int virtio_gpu_virgl_init(VirtIOGPU *g)
     g->fence_poll = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                  virtio_gpu_fence_poll, g);
 
-    if (virtio_gpu_stats_enabled(g->conf)) {
+    if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
         g->print_stats = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                       virtio_gpu_print_stats, g);
         timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
new file mode 100644
index 0000000000..55e07995fe
--- /dev/null
+++ b/hw/display/virtio-gpu-base.c
@@ -0,0 +1,268 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-gpu.h"
+#include "migration/blocker.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+void
+virtio_gpu_base_reset(VirtIOGPUBase *g)
+{
+    int i;
+
+    g->enable = 0;
+    g->use_virgl_renderer = false;
+
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        g->scanout[i].resource_id = 0;
+        g->scanout[i].width = 0;
+        g->scanout[i].height = 0;
+        g->scanout[i].x = 0;
+        g->scanout[i].y = 0;
+        g->scanout[i].ds = NULL;
+    }
+}
+
+void
+virtio_gpu_base_fill_display_info(VirtIOGPUBase *g,
+                                  struct virtio_gpu_resp_display_info *dpy_info)
+{
+    int i;
+
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        if (g->enabled_output_bitmask & (1 << i)) {
+            dpy_info->pmodes[i].enabled = 1;
+            dpy_info->pmodes[i].r.width = cpu_to_le32(g->req_state[i].width);
+            dpy_info->pmodes[i].r.height = cpu_to_le32(g->req_state[i].height);
+        }
+    }
+}
+
+static void virtio_gpu_invalidate_display(void *opaque)
+{
+}
+
+static void virtio_gpu_update_display(void *opaque)
+{
+}
+
+static void virtio_gpu_text_update(void *opaque, console_ch_t *chardata)
+{
+}
+
+static void virtio_gpu_notify_event(VirtIOGPUBase *g, uint32_t event_type)
+{
+    g->virtio_config.events_read |= event_type;
+    virtio_notify_config(&g->parent_obj);
+}
+
+static int virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
+{
+    VirtIOGPUBase *g = opaque;
+
+    if (idx >= g->conf.max_outputs) {
+        return -1;
+    }
+
+    g->req_state[idx].x = info->xoff;
+    g->req_state[idx].y = info->yoff;
+    g->req_state[idx].width = info->width;
+    g->req_state[idx].height = info->height;
+
+    if (info->width && info->height) {
+        g->enabled_output_bitmask |= (1 << idx);
+    } else {
+        g->enabled_output_bitmask &= ~(1 << idx);
+    }
+
+    /* send event to guest */
+    virtio_gpu_notify_event(g, VIRTIO_GPU_EVENT_DISPLAY);
+    return 0;
+}
+
+static void
+virtio_gpu_gl_block(void *opaque, bool block)
+{
+    VirtIOGPUBase *g = opaque;
+    VirtIOGPUBaseClass *vgc = VIRTIO_GPU_BASE_GET_CLASS(g);
+
+    if (block) {
+        g->renderer_blocked++;
+    } else {
+        g->renderer_blocked--;
+    }
+    assert(g->renderer_blocked >= 0);
+
+    if (g->renderer_blocked == 0) {
+        vgc->gl_unblock(g);
+    }
+}
+
+const GraphicHwOps virtio_gpu_ops = {
+    .invalidate = virtio_gpu_invalidate_display,
+    .gfx_update = virtio_gpu_update_display,
+    .text_update = virtio_gpu_text_update,
+    .ui_info = virtio_gpu_ui_info,
+    .gl_block = virtio_gpu_gl_block,
+};
+
+bool
+virtio_gpu_base_device_realize(DeviceState *qdev,
+                               VirtIOHandleOutput ctrl_cb,
+                               VirtIOHandleOutput cursor_cb,
+                               Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev);
+    Error *local_err = NULL;
+    int i;
+
+    if (g->conf.max_outputs > VIRTIO_GPU_MAX_SCANOUTS) {
+        error_setg(errp, "invalid max_outputs > %d", VIRTIO_GPU_MAX_SCANOUTS);
+        return false;
+    }
+
+    g->use_virgl_renderer = false;
+    if (virtio_gpu_virgl_enabled(g->conf)) {
+        error_setg(&g->migration_blocker, "virgl is not yet migratable");
+        migrate_add_blocker(g->migration_blocker, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            error_free(g->migration_blocker);
+            return false;
+        }
+    }
+
+    g->virtio_config.num_scanouts = cpu_to_le32(g->conf.max_outputs);
+    virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU,
+                sizeof(struct virtio_gpu_config));
+
+    if (virtio_gpu_virgl_enabled(g->conf)) {
+        /* use larger control queue in 3d mode */
+        virtio_add_queue(vdev, 256, ctrl_cb);
+        virtio_add_queue(vdev, 16, cursor_cb);
+    } else {
+        virtio_add_queue(vdev, 64, ctrl_cb);
+        virtio_add_queue(vdev, 16, cursor_cb);
+    }
+
+    g->enabled_output_bitmask = 1;
+
+    g->req_state[0].width = g->conf.xres;
+    g->req_state[0].height = g->conf.yres;
+
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        g->scanout[i].con =
+            graphic_console_init(DEVICE(g), i, &virtio_gpu_ops, g);
+        if (i > 0) {
+            dpy_gfx_replace_surface(g->scanout[i].con, NULL);
+        }
+    }
+
+    return true;
+}
+
+static uint64_t
+virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t features,
+                             Error **errp)
+{
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
+
+    if (virtio_gpu_virgl_enabled(g->conf)) {
+        features |= (1 << VIRTIO_GPU_F_VIRGL);
+    }
+    if (virtio_gpu_edid_enabled(g->conf)) {
+        features |= (1 << VIRTIO_GPU_F_EDID);
+    }
+
+    return features;
+}
+
+static void
+virtio_gpu_base_set_features(VirtIODevice *vdev, uint64_t features)
+{
+    static const uint32_t virgl = (1 << VIRTIO_GPU_F_VIRGL);
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
+
+    g->use_virgl_renderer = ((features & virgl) == virgl);
+    trace_virtio_gpu_features(g->use_virgl_renderer);
+}
+
+static void
+virtio_gpu_base_device_unrealize(DeviceState *qdev, Error **errp)
+{
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev);
+
+    if (g->migration_blocker) {
+        migrate_del_blocker(g->migration_blocker);
+        error_free(g->migration_blocker);
+    }
+}
+
+static void
+virtio_gpu_base_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    vdc->unrealize = virtio_gpu_base_device_unrealize;
+    vdc->get_features = virtio_gpu_base_get_features;
+    vdc->set_features = virtio_gpu_base_set_features;
+
+    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
+    dc->hotpluggable = false;
+}
+
+static const TypeInfo virtio_gpu_base_info = {
+    .name = TYPE_VIRTIO_GPU_BASE,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VirtIOGPUBase),
+    .class_size = sizeof(VirtIOGPUBaseClass),
+    .class_init = virtio_gpu_base_class_init,
+    .abstract = true
+};
+
+static void
+virtio_register_types(void)
+{
+    type_register_static(&virtio_gpu_base_info);
+}
+
+type_init(virtio_register_types)
+
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctrl_hdr)                != 24);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_update_cursor)           != 56);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_unref)          != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_create_2d)      != 40);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_set_scanout)             != 48);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_flush)          != 48);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_transfer_to_host_2d)     != 56);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_mem_entry)               != 16);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_attach_backing) != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_detach_backing) != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_display_info)       != 408);
+
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_transfer_host_3d)        != 72);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_create_3d)      != 72);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_create)              != 96);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_destroy)             != 24);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_resource)            != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_cmd_submit)              != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_get_capset_info)         != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_capset_info)        != 40);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_get_capset)              != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_capset)             != 24);
diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index 0bc4d9d424..206870cd4c 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -16,33 +16,18 @@
 #include "hw/pci/pci.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-bus.h"
-#include "hw/virtio/virtio-pci.h"
-#include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-gpu-pci.h"
 
-typedef struct VirtIOGPUPCI VirtIOGPUPCI;
-
-/*
- * virtio-gpu-pci: This extends VirtioPCIProxy.
- */
-#define TYPE_VIRTIO_GPU_PCI "virtio-gpu-pci"
-#define VIRTIO_GPU_PCI(obj) \
-        OBJECT_CHECK(VirtIOGPUPCI, (obj), TYPE_VIRTIO_GPU_PCI)
-
-struct VirtIOGPUPCI {
-    VirtIOPCIProxy parent_obj;
-    VirtIOGPU vdev;
-};
-
-static Property virtio_gpu_pci_properties[] = {
+static Property virtio_gpu_pci_base_properties[] = {
     DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+static void virtio_gpu_pci_base_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
 {
-    VirtIOGPUPCI *vgpu = VIRTIO_GPU_PCI(vpci_dev);
-    VirtIOGPU *g = &vgpu->vdev;
-    DeviceState *vdev = DEVICE(&vgpu->vdev);
+    VirtIOGPUPCIBase *vgpu = VIRTIO_GPU_PCI_BASE(vpci_dev);
+    VirtIOGPUBase *g = vgpu->vgpu;
+    DeviceState *vdev = DEVICE(g);
     int i;
     Error *local_error = NULL;
 
@@ -64,36 +49,56 @@ static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
     }
 }
 
-static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data)
+static void virtio_gpu_pci_base_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
     PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
 
     set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
-    dc->props = virtio_gpu_pci_properties;
+    dc->props = virtio_gpu_pci_base_properties;
     dc->hotpluggable = false;
-    k->realize = virtio_gpu_pci_realize;
+    k->realize = virtio_gpu_pci_base_realize;
     pcidev_k->class_id = PCI_CLASS_DISPLAY_OTHER;
 }
 
+static const TypeInfo virtio_gpu_pci_base_info = {
+    .name = TYPE_VIRTIO_GPU_PCI_BASE,
+    .parent = TYPE_VIRTIO_PCI,
+    .instance_size = sizeof(VirtIOGPUPCIBase),
+    .class_init = virtio_gpu_pci_base_class_init,
+    .abstract = true
+};
+
+#define TYPE_VIRTIO_GPU_PCI "virtio-gpu-pci"
+#define VIRTIO_GPU_PCI(obj)                                 \
+    OBJECT_CHECK(VirtIOGPUPCI, (obj), TYPE_VIRTIO_GPU_PCI)
+
+typedef struct VirtIOGPUPCI {
+    VirtIOGPUPCIBase parent_obj;
+    VirtIOGPU vdev;
+} VirtIOGPUPCI;
+
 static void virtio_gpu_initfn(Object *obj)
 {
     VirtIOGPUPCI *dev = VIRTIO_GPU_PCI(obj);
 
     virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
                                 TYPE_VIRTIO_GPU);
+    VIRTIO_GPU_PCI_BASE(obj)->vgpu = VIRTIO_GPU_BASE(&dev->vdev);
 }
 
 static const VirtioPCIDeviceTypeInfo virtio_gpu_pci_info = {
     .generic_name = TYPE_VIRTIO_GPU_PCI,
+    .parent = TYPE_VIRTIO_GPU_PCI_BASE,
     .instance_size = sizeof(VirtIOGPUPCI),
     .instance_init = virtio_gpu_initfn,
-    .class_init = virtio_gpu_pci_class_init,
 };
 
 static void virtio_gpu_pci_register_types(void)
 {
+    type_register_static(&virtio_gpu_pci_base_info);
     virtio_pci_types_register(&virtio_gpu_pci_info);
 }
+
 type_init(virtio_gpu_pci_register_types)
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 9e37e0ac96..4a49da5ecb 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -20,11 +20,13 @@
 #include "sysemu/dma.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-gpu-bswap.h"
+#include "hw/virtio/virtio-gpu-pixman.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/display/edid.h"
-#include "migration/blocker.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
+#include "qemu/error-report.h"
 
 #define VIRTIO_GPU_VM_VERSION 1
 
@@ -34,53 +36,11 @@ virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id);
 static void virtio_gpu_cleanup_mapping(VirtIOGPU *g,
                                        struct virtio_gpu_simple_resource *res);
 
-static void
-virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr)
-{
-    le32_to_cpus(&hdr->type);
-    le32_to_cpus(&hdr->flags);
-    le64_to_cpus(&hdr->fence_id);
-    le32_to_cpus(&hdr->ctx_id);
-    le32_to_cpus(&hdr->padding);
-}
-
-static void virtio_gpu_bswap_32(void *ptr,
-                                size_t size)
-{
-#ifdef HOST_WORDS_BIGENDIAN
-
-    size_t i;
-    struct virtio_gpu_ctrl_hdr *hdr = (struct virtio_gpu_ctrl_hdr *) ptr;
-
-    virtio_gpu_ctrl_hdr_bswap(hdr);
-
-    i = sizeof(struct virtio_gpu_ctrl_hdr);
-    while (i < size) {
-        le32_to_cpus((uint32_t *)(ptr + i));
-        i = i + sizeof(uint32_t);
-    }
-
-#endif
-}
-
-static void
-virtio_gpu_t2d_bswap(struct virtio_gpu_transfer_to_host_2d *t2d)
-{
-    virtio_gpu_ctrl_hdr_bswap(&t2d->hdr);
-    le32_to_cpus(&t2d->r.x);
-    le32_to_cpus(&t2d->r.y);
-    le32_to_cpus(&t2d->r.width);
-    le32_to_cpus(&t2d->r.height);
-    le64_to_cpus(&t2d->offset);
-    le32_to_cpus(&t2d->resource_id);
-    le32_to_cpus(&t2d->padding);
-}
-
 #ifdef CONFIG_VIRGL
 #include <virglrenderer.h>
 #define VIRGL(_g, _virgl, _simple, ...)                     \
     do {                                                    \
-        if (_g->use_virgl_renderer) {                       \
+        if (_g->parent_obj.use_virgl_renderer) {            \
             _virgl(__VA_ARGS__);                            \
         } else {                                            \
             _simple(__VA_ARGS__);                           \
@@ -148,10 +108,10 @@ static void update_cursor(VirtIOGPU *g, struct virtio_gpu_update_cursor *cursor)
     struct virtio_gpu_scanout *s;
     bool move = cursor->hdr.type == VIRTIO_GPU_CMD_MOVE_CURSOR;
 
-    if (cursor->pos.scanout_id >= g->conf.max_outputs) {
+    if (cursor->pos.scanout_id >= g->parent_obj.conf.max_outputs) {
         return;
     }
-    s = &g->scanout[cursor->pos.scanout_id];
+    s = &g->parent_obj.scanout[cursor->pos.scanout_id];
 
     trace_virtio_gpu_update_cursor(cursor->pos.scanout_id,
                                    cursor->pos.x,
@@ -182,53 +142,6 @@ static void update_cursor(VirtIOGPU *g, struct virtio_gpu_update_cursor *cursor)
                   cursor->resource_id ? 1 : 0);
 }
 
-static void virtio_gpu_get_config(VirtIODevice *vdev, uint8_t *config)
-{
-    VirtIOGPU *g = VIRTIO_GPU(vdev);
-    memcpy(config, &g->virtio_config, sizeof(g->virtio_config));
-}
-
-static void virtio_gpu_set_config(VirtIODevice *vdev, const uint8_t *config)
-{
-    VirtIOGPU *g = VIRTIO_GPU(vdev);
-    struct virtio_gpu_config vgconfig;
-
-    memcpy(&vgconfig, config, sizeof(g->virtio_config));
-
-    if (vgconfig.events_clear) {
-        g->virtio_config.events_read &= ~vgconfig.events_clear;
-    }
-}
-
-static uint64_t virtio_gpu_get_features(VirtIODevice *vdev, uint64_t features,
-                                        Error **errp)
-{
-    VirtIOGPU *g = VIRTIO_GPU(vdev);
-
-    if (virtio_gpu_virgl_enabled(g->conf)) {
-        features |= (1 << VIRTIO_GPU_F_VIRGL);
-    }
-    if (virtio_gpu_edid_enabled(g->conf)) {
-        features |= (1 << VIRTIO_GPU_F_EDID);
-    }
-    return features;
-}
-
-static void virtio_gpu_set_features(VirtIODevice *vdev, uint64_t features)
-{
-    static const uint32_t virgl = (1 << VIRTIO_GPU_F_VIRGL);
-    VirtIOGPU *g = VIRTIO_GPU(vdev);
-
-    g->use_virgl_renderer = ((features & virgl) == virgl);
-    trace_virtio_gpu_features(g->use_virgl_renderer);
-}
-
-static void virtio_gpu_notify_event(VirtIOGPU *g, uint32_t event_type)
-{
-    g->virtio_config.events_read |= event_type;
-    virtio_notify_config(&g->parent_obj);
-}
-
 static struct virtio_gpu_simple_resource *
 virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id)
 {
@@ -277,21 +190,6 @@ void virtio_gpu_ctrl_response_nodata(VirtIOGPU *g,
     virtio_gpu_ctrl_response(g, cmd, &resp, sizeof(resp));
 }
 
-static void
-virtio_gpu_fill_display_info(VirtIOGPU *g,
-                             struct virtio_gpu_resp_display_info *dpy_info)
-{
-    int i;
-
-    for (i = 0; i < g->conf.max_outputs; i++) {
-        if (g->enabled_output_bitmask & (1 << i)) {
-            dpy_info->pmodes[i].enabled = 1;
-            dpy_info->pmodes[i].r.width = cpu_to_le32(g->req_state[i].width);
-            dpy_info->pmodes[i].r.height = cpu_to_le32(g->req_state[i].height);
-        }
-    }
-}
-
 void virtio_gpu_get_display_info(VirtIOGPU *g,
                                  struct virtio_gpu_ctrl_command *cmd)
 {
@@ -300,7 +198,7 @@ void virtio_gpu_get_display_info(VirtIOGPU *g,
     trace_virtio_gpu_cmd_get_display_info();
     memset(&display_info, 0, sizeof(display_info));
     display_info.hdr.type = VIRTIO_GPU_RESP_OK_DISPLAY_INFO;
-    virtio_gpu_fill_display_info(g, &display_info);
+    virtio_gpu_base_fill_display_info(VIRTIO_GPU_BASE(g), &display_info);
     virtio_gpu_ctrl_response(g, cmd, &display_info.hdr,
                              sizeof(display_info));
 }
@@ -309,9 +207,10 @@ static void
 virtio_gpu_generate_edid(VirtIOGPU *g, int scanout,
                          struct virtio_gpu_resp_edid *edid)
 {
+    VirtIOGPUBase *b = VIRTIO_GPU_BASE(g);
     qemu_edid_info info = {
-        .prefx = g->req_state[scanout].width,
-        .prefy = g->req_state[scanout].height,
+        .prefx = b->req_state[scanout].width,
+        .prefy = b->req_state[scanout].height,
     };
 
     edid->size = cpu_to_le32(sizeof(edid->edid));
@@ -323,11 +222,12 @@ void virtio_gpu_get_edid(VirtIOGPU *g,
 {
     struct virtio_gpu_resp_edid edid;
     struct virtio_gpu_cmd_get_edid get_edid;
+    VirtIOGPUBase *b = VIRTIO_GPU_BASE(g);
 
     VIRTIO_GPU_FILL_CMD(get_edid);
     virtio_gpu_bswap_32(&get_edid, sizeof(get_edid));
 
-    if (get_edid.scanout >= g->conf.max_outputs) {
+    if (get_edid.scanout >= b->conf.max_outputs) {
         cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
         return;
     }
@@ -339,30 +239,6 @@ void virtio_gpu_get_edid(VirtIOGPU *g,
     virtio_gpu_ctrl_response(g, cmd, &edid.hdr, sizeof(edid));
 }
 
-static pixman_format_code_t get_pixman_format(uint32_t virtio_gpu_format)
-{
-    switch (virtio_gpu_format) {
-    case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
-        return PIXMAN_BE_b8g8r8x8;
-    case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
-        return PIXMAN_BE_b8g8r8a8;
-    case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
-        return PIXMAN_BE_x8r8g8b8;
-    case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
-        return PIXMAN_BE_a8r8g8b8;
-    case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
-        return PIXMAN_BE_r8g8b8x8;
-    case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
-        return PIXMAN_BE_r8g8b8a8;
-    case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
-        return PIXMAN_BE_x8b8g8r8;
-    case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
-        return PIXMAN_BE_a8b8g8r8;
-    default:
-        return 0;
-    }
-}
-
 static uint32_t calc_image_hostmem(pixman_format_code_t pformat,
                                    uint32_t width, uint32_t height)
 {
@@ -409,7 +285,7 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
     res->format = c2d.format;
     res->resource_id = c2d.resource_id;
 
-    pformat = get_pixman_format(c2d.format);
+    pformat = virtio_gpu_get_pixman_format(c2d.format);
     if (!pformat) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: host couldn't handle guest format %d\n",
@@ -420,7 +296,7 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
     }
 
     res->hostmem = calc_image_hostmem(pformat, c2d.width, c2d.height);
-    if (res->hostmem + g->hostmem < g->conf.max_hostmem) {
+    if (res->hostmem + g->hostmem < g->conf_max_hostmem) {
         res->image = pixman_image_create_bits(pformat,
                                               c2d.width,
                                               c2d.height,
@@ -442,7 +318,7 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
 
 static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
 {
-    struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id];
+    struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
     struct virtio_gpu_simple_resource *res;
     DisplaySurface *ds = NULL;
 
@@ -474,7 +350,7 @@ static void virtio_gpu_resource_destroy(VirtIOGPU *g,
     int i;
 
     if (res->scanout_bitmask) {
-        for (i = 0; i < g->conf.max_outputs; i++) {
+        for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
             if (res->scanout_bitmask & (1 << i)) {
                 virtio_gpu_disable_scanout(g, i);
             }
@@ -604,7 +480,7 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
 
     pixman_region_init_rect(&flush_region,
                             rf.r.x, rf.r.y, rf.r.width, rf.r.height);
-    for (i = 0; i < g->conf.max_outputs; i++) {
+    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
         struct virtio_gpu_scanout *scanout;
         pixman_region16_t region, finalregion;
         pixman_box16_t *extents;
@@ -612,7 +488,7 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
         if (!(res->scanout_bitmask & (1 << i))) {
             continue;
         }
-        scanout = &g->scanout[i];
+        scanout = &g->parent_obj.scanout[i];
 
         pixman_region_init(&finalregion);
         pixman_region_init_rect(&region, scanout->x, scanout->y,
@@ -622,7 +498,7 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
         pixman_region_translate(&finalregion, -scanout->x, -scanout->y);
         extents = pixman_region_extents(&finalregion);
         /* work out the area we need to update for each console */
-        dpy_gfx_update(g->scanout[i].con,
+        dpy_gfx_update(g->parent_obj.scanout[i].con,
                        extents->x1, extents->y1,
                        extents->x2 - extents->x1,
                        extents->y2 - extents->y1);
@@ -653,14 +529,14 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
     trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id,
                                      ss.r.width, ss.r.height, ss.r.x, ss.r.y);
 
-    if (ss.scanout_id >= g->conf.max_outputs) {
+    if (ss.scanout_id >= g->parent_obj.conf.max_outputs) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
                       __func__, ss.scanout_id);
         cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
         return;
     }
 
-    g->enable = 1;
+    g->parent_obj.enable = 1;
     if (ss.resource_id == 0) {
         virtio_gpu_disable_scanout(g, ss.scanout_id);
         return;
@@ -677,6 +553,8 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
 
     if (ss.r.x > res->width ||
         ss.r.y > res->height ||
+        ss.r.width < 16 ||
+        ss.r.height < 16 ||
         ss.r.width > res->width ||
         ss.r.height > res->height ||
         ss.r.x + ss.r.width > res->width ||
@@ -689,7 +567,7 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
         return;
     }
 
-    scanout = &g->scanout[ss.scanout_id];
+    scanout = &g->parent_obj.scanout[ss.scanout_id];
 
     format = pixman_image_get_format(res->image);
     bpp = DIV_ROUND_UP(PIXMAN_FORMAT_BPP(format), 8);
@@ -712,7 +590,8 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
             return;
         }
         pixman_image_unref(rect);
-        dpy_gfx_replace_surface(g->scanout[ss.scanout_id].con, scanout->ds);
+        dpy_gfx_replace_surface(g->parent_obj.scanout[ss.scanout_id].con,
+                                scanout->ds);
     }
 
     ores = virtio_gpu_find_resource(g, scanout->resource_id);
@@ -930,7 +809,7 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
     while (!QTAILQ_EMPTY(&g->cmdq)) {
         cmd = QTAILQ_FIRST(&g->cmdq);
 
-        if (g->renderer_blocked) {
+        if (g->parent_obj.renderer_blocked) {
             break;
         }
 
@@ -939,14 +818,14 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
               g, cmd);
 
         QTAILQ_REMOVE(&g->cmdq, cmd, next);
-        if (virtio_gpu_stats_enabled(g->conf)) {
+        if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
             g->stats.requests++;
         }
 
         if (!cmd->finished) {
             QTAILQ_INSERT_TAIL(&g->fenceq, cmd, next);
             g->inflight++;
-            if (virtio_gpu_stats_enabled(g->conf)) {
+            if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
                 if (g->stats.max_inflight < g->inflight) {
                     g->stats.max_inflight = g->inflight;
                 }
@@ -958,6 +837,19 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
     }
 }
 
+static void virtio_gpu_gl_unblock(VirtIOGPUBase *b)
+{
+    VirtIOGPU *g = VIRTIO_GPU(b);
+
+#ifdef CONFIG_VIRGL
+    if (g->renderer_reset) {
+        g->renderer_reset = false;
+        virtio_gpu_virgl_reset(g);
+    }
+#endif
+    virtio_gpu_process_cmdq(g);
+}
+
 static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIOGPU *g = VIRTIO_GPU(vdev);
@@ -968,7 +860,7 @@ static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
     }
 
 #ifdef CONFIG_VIRGL
-    if (!g->renderer_inited && g->use_virgl_renderer) {
+    if (!g->renderer_inited && g->parent_obj.use_virgl_renderer) {
         virtio_gpu_virgl_init(g);
         g->renderer_inited = true;
     }
@@ -986,7 +878,7 @@ static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
     virtio_gpu_process_cmdq(g);
 
 #ifdef CONFIG_VIRGL
-    if (g->use_virgl_renderer) {
+    if (g->parent_obj.use_virgl_renderer) {
         virtio_gpu_virgl_fence_poll(g);
     }
 #endif
@@ -995,7 +887,7 @@ static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 static void virtio_gpu_ctrl_bh(void *opaque)
 {
     VirtIOGPU *g = opaque;
-    virtio_gpu_handle_ctrl(&g->parent_obj, g->ctrl_vq);
+    virtio_gpu_handle_ctrl(&g->parent_obj.parent_obj, g->ctrl_vq);
 }
 
 static void virtio_gpu_handle_cursor(VirtIODevice *vdev, VirtQueue *vq)
@@ -1033,75 +925,9 @@ static void virtio_gpu_handle_cursor(VirtIODevice *vdev, VirtQueue *vq)
 static void virtio_gpu_cursor_bh(void *opaque)
 {
     VirtIOGPU *g = opaque;
-    virtio_gpu_handle_cursor(&g->parent_obj, g->cursor_vq);
+    virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq);
 }
 
-static void virtio_gpu_invalidate_display(void *opaque)
-{
-}
-
-static void virtio_gpu_update_display(void *opaque)
-{
-}
-
-static void virtio_gpu_text_update(void *opaque, console_ch_t *chardata)
-{
-}
-
-static int virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
-{
-    VirtIOGPU *g = opaque;
-
-    if (idx >= g->conf.max_outputs) {
-        return -1;
-    }
-
-    g->req_state[idx].x = info->xoff;
-    g->req_state[idx].y = info->yoff;
-    g->req_state[idx].width = info->width;
-    g->req_state[idx].height = info->height;
-
-    if (info->width && info->height) {
-        g->enabled_output_bitmask |= (1 << idx);
-    } else {
-        g->enabled_output_bitmask &= ~(1 << idx);
-    }
-
-    /* send event to guest */
-    virtio_gpu_notify_event(g, VIRTIO_GPU_EVENT_DISPLAY);
-    return 0;
-}
-
-static void virtio_gpu_gl_block(void *opaque, bool block)
-{
-    VirtIOGPU *g = opaque;
-
-    if (block) {
-        g->renderer_blocked++;
-    } else {
-        g->renderer_blocked--;
-    }
-    assert(g->renderer_blocked >= 0);
-
-    if (g->renderer_blocked == 0) {
-#ifdef CONFIG_VIRGL
-        if (g->renderer_reset) {
-            g->renderer_reset = false;
-            virtio_gpu_virgl_reset(g);
-        }
-#endif
-        virtio_gpu_process_cmdq(g);
-    }
-}
-
-const GraphicHwOps virtio_gpu_ops = {
-    .invalidate = virtio_gpu_invalidate_display,
-    .gfx_update = virtio_gpu_update_display,
-    .text_update = virtio_gpu_text_update,
-    .ui_info = virtio_gpu_ui_info,
-    .gl_block = virtio_gpu_gl_block,
-};
-
 static const VMStateDescription vmstate_virtio_gpu_scanout = {
     .name = "virtio-gpu-one-scanout",
     .version_id = 1,
@@ -1124,10 +950,11 @@ static const VMStateDescription vmstate_virtio_gpu_scanouts = {
     .name = "virtio-gpu-scanouts",
     .version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_INT32(enable, struct VirtIOGPU),
-        VMSTATE_UINT32_EQUAL(conf.max_outputs, struct VirtIOGPU, NULL),
-        VMSTATE_STRUCT_VARRAY_UINT32(scanout, struct VirtIOGPU,
-                                     conf.max_outputs, 1,
+        VMSTATE_INT32(parent_obj.enable, struct VirtIOGPU),
+        VMSTATE_UINT32_EQUAL(parent_obj.conf.max_outputs,
+                             struct VirtIOGPU, NULL),
+        VMSTATE_STRUCT_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
+                                     parent_obj.conf.max_outputs, 1,
                                      vmstate_virtio_gpu_scanout,
                                      struct virtio_gpu_scanout),
         VMSTATE_END_OF_LIST()
@@ -1183,7 +1010,7 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
         res->iov_cnt = qemu_get_be32(f);
 
         /* allocate */
-        pformat = get_pixman_format(res->format);
+        pformat = virtio_gpu_get_pixman_format(res->format);
         if (!pformat) {
             g_free(res);
             return -EINVAL;
@@ -1242,8 +1069,8 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
 
     /* load & apply scanout state */
     vmstate_load_state(f, &vmstate_virtio_gpu_scanouts, g, 1);
-    for (i = 0; i < g->conf.max_outputs; i++) {
-        scanout = &g->scanout[i];
+    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
+        scanout = &g->parent_obj.scanout[i];
         if (!scanout->resource_id) {
             continue;
         }
@@ -1272,84 +1099,35 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
     VirtIOGPU *g = VIRTIO_GPU(qdev);
     bool have_virgl;
-    Error *local_err = NULL;
-    int i;
-
-    if (g->conf.max_outputs > VIRTIO_GPU_MAX_SCANOUTS) {
-        error_setg(errp, "invalid max_outputs > %d", VIRTIO_GPU_MAX_SCANOUTS);
-        return;
-    }
 
-    g->use_virgl_renderer = false;
 #if !defined(CONFIG_VIRGL) || defined(HOST_WORDS_BIGENDIAN)
     have_virgl = false;
 #else
     have_virgl = display_opengl;
 #endif
     if (!have_virgl) {
-        g->conf.flags &= ~(1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
-    }
-
-    if (virtio_gpu_virgl_enabled(g->conf)) {
-        error_setg(&g->migration_blocker, "virgl is not yet migratable");
-        migrate_add_blocker(g->migration_blocker, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            error_free(g->migration_blocker);
-            return;
-        }
-    }
-
-    g->virtio_config.num_scanouts = cpu_to_le32(g->conf.max_outputs);
-    virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU,
-                sizeof(struct virtio_gpu_config));
-
-    g->req_state[0].width = g->conf.xres;
-    g->req_state[0].height = g->conf.yres;
-
-    if (virtio_gpu_virgl_enabled(g->conf)) {
-        /* use larger control queue in 3d mode */
-        g->ctrl_vq   = virtio_add_queue(vdev, 256, virtio_gpu_handle_ctrl_cb);
-        g->cursor_vq = virtio_add_queue(vdev, 16, virtio_gpu_handle_cursor_cb);
-
+        g->parent_obj.conf.flags &= ~(1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
+    } else {
 #if defined(CONFIG_VIRGL)
-        g->virtio_config.num_capsets = virtio_gpu_virgl_get_num_capsets(g);
-#else
-        g->virtio_config.num_capsets = 0;
+        VIRTIO_GPU_BASE(g)->virtio_config.num_capsets =
+            virtio_gpu_virgl_get_num_capsets(g);
 #endif
-    } else {
-        g->ctrl_vq   = virtio_add_queue(vdev, 64, virtio_gpu_handle_ctrl_cb);
-        g->cursor_vq = virtio_add_queue(vdev, 16, virtio_gpu_handle_cursor_cb);
     }
 
+    if (!virtio_gpu_base_device_realize(qdev,
+                                        virtio_gpu_handle_ctrl_cb,
+                                        virtio_gpu_handle_cursor_cb,
+                                        errp)) {
+        return;
+    }
+
+    g->ctrl_vq = virtio_get_queue(vdev, 0);
+    g->cursor_vq = virtio_get_queue(vdev, 1);
     g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
     g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
     QTAILQ_INIT(&g->reslist);
     QTAILQ_INIT(&g->cmdq);
     QTAILQ_INIT(&g->fenceq);
-
-    g->enabled_output_bitmask = 1;
-
-    for (i = 0; i < g->conf.max_outputs; i++) {
-        g->scanout[i].con =
-            graphic_console_init(DEVICE(g), i, &virtio_gpu_ops, g);
-        if (i > 0) {
-            dpy_gfx_replace_surface(g->scanout[i].con, NULL);
-        }
-    }
-}
-
-static void virtio_gpu_device_unrealize(DeviceState *qdev, Error **errp)
-{
-    VirtIOGPU *g = VIRTIO_GPU(qdev);
-    if (g->migration_blocker) {
-        migrate_del_blocker(g->migration_blocker);
-        error_free(g->migration_blocker);
-    }
-}
-
-static void virtio_gpu_instance_init(Object *obj)
-{
 }
 
 static void virtio_gpu_reset(VirtIODevice *vdev)
@@ -1357,21 +1135,16 @@ static void virtio_gpu_reset(VirtIODevice *vdev)
     VirtIOGPU *g = VIRTIO_GPU(vdev);
     struct virtio_gpu_simple_resource *res, *tmp;
     struct virtio_gpu_ctrl_command *cmd;
-    int i;
 
-    g->enable = 0;
+#ifdef CONFIG_VIRGL
+    if (g->parent_obj.use_virgl_renderer) {
+        virtio_gpu_virgl_reset(g);
+    }
+#endif
 
     QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
         virtio_gpu_resource_destroy(g, res);
     }
-    for (i = 0; i < g->conf.max_outputs; i++) {
-        g->scanout[i].resource_id = 0;
-        g->scanout[i].width = 0;
-        g->scanout[i].height = 0;
-        g->scanout[i].x = 0;
-        g->scanout[i].y = 0;
-        g->scanout[i].ds = NULL;
-    }
 
     while (!QTAILQ_EMPTY(&g->cmdq)) {
         cmd = QTAILQ_FIRST(&g->cmdq);
@@ -1387,15 +1160,37 @@ static void virtio_gpu_reset(VirtIODevice *vdev)
     }
 
 #ifdef CONFIG_VIRGL
-    if (g->use_virgl_renderer) {
-        if (g->renderer_blocked) {
+    if (g->parent_obj.use_virgl_renderer) {
+        if (g->parent_obj.renderer_blocked) {
             g->renderer_reset = true;
         } else {
             virtio_gpu_virgl_reset(g);
         }
-        g->use_virgl_renderer = 0;
+        g->parent_obj.use_virgl_renderer = false;
     }
 #endif
+
+    virtio_gpu_base_reset(VIRTIO_GPU_BASE(vdev));
+}
+
+static void
+virtio_gpu_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
+
+    memcpy(config, &g->virtio_config, sizeof(g->virtio_config));
+}
+
+static void
+virtio_gpu_set_config(VirtIODevice *vdev, const uint8_t *config)
+{
+    VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
+    const struct virtio_gpu_config *vgconfig =
+        (const struct virtio_gpu_config *)config;
+
+    if (vgconfig->events_clear) {
+        g->virtio_config.events_read &= ~vgconfig->events_clear;
+    }
 }
 
 /*
@@ -1426,18 +1221,15 @@ static const VMStateDescription vmstate_virtio_gpu = {
 };
 
 static Property virtio_gpu_properties[] = {
-    DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1),
-    DEFINE_PROP_SIZE("max_hostmem", VirtIOGPU, conf.max_hostmem, 256 * MiB),
+    VIRTIO_GPU_BASE_PROPERTIES(VirtIOGPU, parent_obj.conf),
+    DEFINE_PROP_SIZE("max_hostmem", VirtIOGPU, conf_max_hostmem,
+                     256 * MiB),
 #ifdef CONFIG_VIRGL
-    DEFINE_PROP_BIT("virgl", VirtIOGPU, conf.flags,
+    DEFINE_PROP_BIT("virgl", VirtIOGPU, parent_obj.conf.flags,
                     VIRTIO_GPU_FLAG_VIRGL_ENABLED, true),
-    DEFINE_PROP_BIT("stats", VirtIOGPU, conf.flags,
+    DEFINE_PROP_BIT("stats", VirtIOGPU, parent_obj.conf.flags,
                     VIRTIO_GPU_FLAG_STATS_ENABLED, false),
 #endif
-    DEFINE_PROP_BIT("edid", VirtIOGPU, conf.flags,
-                    VIRTIO_GPU_FLAG_EDID_ENABLED, false),
-    DEFINE_PROP_UINT32("xres", VirtIOGPU, conf.xres, 1024),
-    DEFINE_PROP_UINT32("yres", VirtIOGPU, conf.yres, 768),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1445,27 +1237,22 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    VirtIOGPUBaseClass *vgc = VIRTIO_GPU_BASE_CLASS(klass);
 
+    vgc->gl_unblock = virtio_gpu_gl_unblock;
     vdc->realize = virtio_gpu_device_realize;
-    vdc->unrealize = virtio_gpu_device_unrealize;
+    vdc->reset = virtio_gpu_reset;
     vdc->get_config = virtio_gpu_get_config;
     vdc->set_config = virtio_gpu_set_config;
-    vdc->get_features = virtio_gpu_get_features;
-    vdc->set_features = virtio_gpu_set_features;
 
-    vdc->reset = virtio_gpu_reset;
-
-    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
-    dc->props = virtio_gpu_properties;
     dc->vmsd = &vmstate_virtio_gpu;
-    dc->hotpluggable = false;
+    dc->props = virtio_gpu_properties;
 }
 
 static const TypeInfo virtio_gpu_info = {
     .name = TYPE_VIRTIO_GPU,
-    .parent = TYPE_VIRTIO_DEVICE,
+    .parent = TYPE_VIRTIO_GPU_BASE,
     .instance_size = sizeof(VirtIOGPU),
-    .instance_init = virtio_gpu_instance_init,
     .class_init = virtio_gpu_class_init,
 };
 
@@ -1475,26 +1262,3 @@ static void virtio_register_types(void)
 }
 
 type_init(virtio_register_types)
-
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctrl_hdr)                != 24);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_update_cursor)           != 56);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_unref)          != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_create_2d)      != 40);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_set_scanout)             != 48);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_flush)          != 48);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_transfer_to_host_2d)     != 56);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_mem_entry)               != 16);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_attach_backing) != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_detach_backing) != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_display_info)       != 408);
-
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_transfer_host_3d)        != 72);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_create_3d)      != 72);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_create)              != 96);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_destroy)             != 24);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_resource)            != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_cmd_submit)              != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_get_capset_info)         != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_capset_info)        != 40);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_get_capset)              != 32);
-QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_capset)             != 24);
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index 5d57bf5b0c..67e34935c2 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -1,63 +1,42 @@
 #include "qemu/osdep.h"
 #include "hw/hw.h"
 #include "hw/pci/pci.h"
-#include "vga_int.h"
-#include "hw/virtio/virtio-pci.h"
 #include "hw/virtio/virtio-gpu.h"
 #include "qapi/error.h"
+#include "virtio-vga.h"
 
-/*
- * virtio-vga: This extends VirtioPCIProxy.
- */
-#define TYPE_VIRTIO_VGA "virtio-vga"
-#define VIRTIO_VGA(obj) \
-        OBJECT_CHECK(VirtIOVGA, (obj), TYPE_VIRTIO_VGA)
-#define VIRTIO_VGA_GET_CLASS(obj) \
-        OBJECT_GET_CLASS(VirtIOVGAClass, obj, TYPE_VIRTIO_VGA)
-#define VIRTIO_VGA_CLASS(klass) \
-        OBJECT_CLASS_CHECK(VirtIOVGAClass, klass, TYPE_VIRTIO_VGA)
-
-typedef struct VirtIOVGA {
-    VirtIOPCIProxy parent_obj;
-    VirtIOGPU      vdev;
-    VGACommonState vga;
-    MemoryRegion   vga_mrs[3];
-} VirtIOVGA;
-
-typedef struct VirtIOVGAClass {
-    VirtioPCIClass parent_class;
-    DeviceReset parent_reset;
-} VirtIOVGAClass;
-
-static void virtio_vga_invalidate_display(void *opaque)
+static void virtio_vga_base_invalidate_display(void *opaque)
 {
-    VirtIOVGA *vvga = opaque;
+    VirtIOVGABase *vvga = opaque;
+    VirtIOGPUBase *g = vvga->vgpu;
 
-    if (vvga->vdev.enable) {
-        virtio_gpu_ops.invalidate(&vvga->vdev);
+    if (g->enable) {
+        virtio_gpu_ops.invalidate(g);
     } else {
         vvga->vga.hw_ops->invalidate(&vvga->vga);
     }
 }
 
-static void virtio_vga_update_display(void *opaque)
+static void virtio_vga_base_update_display(void *opaque)
 {
-    VirtIOVGA *vvga = opaque;
+    VirtIOVGABase *vvga = opaque;
+    VirtIOGPUBase *g = vvga->vgpu;
 
-    if (vvga->vdev.enable) {
-        virtio_gpu_ops.gfx_update(&vvga->vdev);
+    if (g->enable) {
+        virtio_gpu_ops.gfx_update(g);
     } else {
         vvga->vga.hw_ops->gfx_update(&vvga->vga);
     }
 }
 
-static void virtio_vga_text_update(void *opaque, console_ch_t *chardata)
+static void virtio_vga_base_text_update(void *opaque, console_ch_t *chardata)
 {
-    VirtIOVGA *vvga = opaque;
+    VirtIOVGABase *vvga = opaque;
+    VirtIOGPUBase *g = vvga->vgpu;
 
-    if (vvga->vdev.enable) {
+    if (g->enable) {
         if (virtio_gpu_ops.text_update) {
-            virtio_gpu_ops.text_update(&vvga->vdev, chardata);
+            virtio_gpu_ops.text_update(g, chardata);
         }
     } else {
         if (vvga->vga.hw_ops->text_update) {
@@ -66,49 +45,52 @@ static void virtio_vga_text_update(void *opaque, console_ch_t *chardata)
     }
 }
 
-static int virtio_vga_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
+static int virtio_vga_base_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
 {
-    VirtIOVGA *vvga = opaque;
+    VirtIOVGABase *vvga = opaque;
+    VirtIOGPUBase *g = vvga->vgpu;
 
     if (virtio_gpu_ops.ui_info) {
-        return virtio_gpu_ops.ui_info(&vvga->vdev, idx, info);
+        return virtio_gpu_ops.ui_info(g, idx, info);
     }
     return -1;
 }
 
-static void virtio_vga_gl_block(void *opaque, bool block)
+static void virtio_vga_base_gl_block(void *opaque, bool block)
 {
-    VirtIOVGA *vvga = opaque;
+    VirtIOVGABase *vvga = opaque;
+    VirtIOGPUBase *g = vvga->vgpu;
 
     if (virtio_gpu_ops.gl_block) {
-        virtio_gpu_ops.gl_block(&vvga->vdev, block);
+        virtio_gpu_ops.gl_block(g, block);
     }
 }
 
-static const GraphicHwOps virtio_vga_ops = {
-    .invalidate = virtio_vga_invalidate_display,
-    .gfx_update = virtio_vga_update_display,
-    .text_update = virtio_vga_text_update,
-    .ui_info = virtio_vga_ui_info,
-    .gl_block = virtio_vga_gl_block,
+static const GraphicHwOps virtio_vga_base_ops = {
+    .invalidate = virtio_vga_base_invalidate_display,
+    .gfx_update = virtio_vga_base_update_display,
+    .text_update = virtio_vga_base_text_update,
+    .ui_info = virtio_vga_base_ui_info,
+    .gl_block = virtio_vga_base_gl_block,
 };
 
-static const VMStateDescription vmstate_virtio_vga = {
+static const VMStateDescription vmstate_virtio_vga_base = {
     .name = "virtio-vga",
     .version_id = 2,
     .minimum_version_id = 2,
     .fields = (VMStateField[]) {
         /* no pci stuff here, saving the virtio device will handle that */
-        VMSTATE_STRUCT(vga, VirtIOVGA, 0, vmstate_vga_common, VGACommonState),
+        VMSTATE_STRUCT(vga, VirtIOVGABase, 0,
+                       vmstate_vga_common, VGACommonState),
         VMSTATE_END_OF_LIST()
     }
 };
 
 /* VGA device wrapper around PCI device around virtio GPU */
-static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+static void virtio_vga_base_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
 {
-    VirtIOVGA *vvga = VIRTIO_VGA(vpci_dev);
-    VirtIOGPU *g = &vvga->vdev;
+    VirtIOVGABase *vvga = VIRTIO_VGA_BASE(vpci_dev);
+    VirtIOGPUBase *g = vvga->vgpu;
     VGACommonState *vga = &vvga->vga;
     Error *err = NULL;
     uint32_t offset;
@@ -168,7 +150,7 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
                                  vvga->vga_mrs, true, false);
 
     vga->con = g->scanout[0].con;
-    graphic_console_set_hwops(vga->con, &virtio_vga_ops, vvga);
+    graphic_console_set_hwops(vga->con, &virtio_vga_base_ops, vvga);
 
     for (i = 0; i < g->conf.max_outputs; i++) {
         object_property_set_link(OBJECT(g->scanout[i].con),
@@ -177,10 +159,10 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
     }
 }
 
-static void virtio_vga_reset(DeviceState *dev)
+static void virtio_vga_base_reset(DeviceState *dev)
 {
-    VirtIOVGAClass *klass = VIRTIO_VGA_GET_CLASS(dev);
-    VirtIOVGA *vvga = VIRTIO_VGA(dev);
+    VirtIOVGABaseClass *klass = VIRTIO_VGA_BASE_GET_CLASS(dev);
+    VirtIOVGABase *vvga = VIRTIO_VGA_BASE(dev);
 
     /* reset virtio-gpu */
     klass->parent_reset(dev);
@@ -190,48 +172,70 @@ static void virtio_vga_reset(DeviceState *dev)
     vga_dirty_log_start(&vvga->vga);
 }
 
-static Property virtio_vga_properties[] = {
+static Property virtio_vga_base_properties[] = {
     DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-static void virtio_vga_class_init(ObjectClass *klass, void *data)
+static void virtio_vga_base_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
-    VirtIOVGAClass *v = VIRTIO_VGA_CLASS(klass);
+    VirtIOVGABaseClass *v = VIRTIO_VGA_BASE_CLASS(klass);
     PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
 
     set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
-    dc->props = virtio_vga_properties;
-    dc->vmsd = &vmstate_virtio_vga;
+    dc->props = virtio_vga_base_properties;
+    dc->vmsd = &vmstate_virtio_vga_base;
     dc->hotpluggable = false;
-    device_class_set_parent_reset(dc, virtio_vga_reset,
+    device_class_set_parent_reset(dc, virtio_vga_base_reset,
                                   &v->parent_reset);
 
-    k->realize = virtio_vga_realize;
+    k->realize = virtio_vga_base_realize;
     pcidev_k->romfile = "vgabios-virtio.bin";
     pcidev_k->class_id = PCI_CLASS_DISPLAY_VGA;
 }
 
+static TypeInfo virtio_vga_base_info = {
+    .name          = TYPE_VIRTIO_VGA_BASE,
+    .parent        = TYPE_VIRTIO_PCI,
+    .instance_size = sizeof(struct VirtIOVGABase),
+    .class_size    = sizeof(struct VirtIOVGABaseClass),
+    .class_init    = virtio_vga_base_class_init,
+    .abstract      = true,
+};
+
+#define TYPE_VIRTIO_VGA "virtio-vga"
+
+#define VIRTIO_VGA(obj)                             \
+    OBJECT_CHECK(VirtIOVGA, (obj), TYPE_VIRTIO_VGA)
+
+typedef struct VirtIOVGA {
+    VirtIOVGABase parent_obj;
+
+    VirtIOGPU     vdev;
+} VirtIOVGA;
+
 static void virtio_vga_inst_initfn(Object *obj)
 {
     VirtIOVGA *dev = VIRTIO_VGA(obj);
 
     virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
                                 TYPE_VIRTIO_GPU);
+    VIRTIO_VGA_BASE(dev)->vgpu = VIRTIO_GPU_BASE(&dev->vdev);
 }
 
+
 static VirtioPCIDeviceTypeInfo virtio_vga_info = {
     .generic_name  = TYPE_VIRTIO_VGA,
+    .parent        = TYPE_VIRTIO_VGA_BASE,
     .instance_size = sizeof(struct VirtIOVGA),
     .instance_init = virtio_vga_inst_initfn,
-    .class_size    = sizeof(struct VirtIOVGAClass),
-    .class_init    = virtio_vga_class_init,
 };
 
 static void virtio_vga_register_types(void)
 {
+    type_register_static(&virtio_vga_base_info);
     virtio_pci_types_register(&virtio_vga_info);
 }
 
diff --git a/hw/display/virtio-vga.h b/hw/display/virtio-vga.h
new file mode 100644
index 0000000000..c10bf390aa
--- /dev/null
+++ b/hw/display/virtio-vga.h
@@ -0,0 +1,32 @@
+#ifndef VIRTIO_VGA_H_
+#define VIRTIO_VGA_H_
+
+#include "hw/virtio/virtio-gpu-pci.h"
+#include "vga_int.h"
+
+/*
+ * virtio-vga-base: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_VGA_BASE "virtio-vga-base"
+#define VIRTIO_VGA_BASE(obj)                                \
+    OBJECT_CHECK(VirtIOVGABase, (obj), TYPE_VIRTIO_VGA_BASE)
+#define VIRTIO_VGA_BASE_GET_CLASS(obj)                      \
+    OBJECT_GET_CLASS(VirtIOVGABaseClass, obj, TYPE_VIRTIO_VGA_BASE)
+#define VIRTIO_VGA_BASE_CLASS(klass)                        \
+    OBJECT_CLASS_CHECK(VirtIOVGABaseClass, klass, TYPE_VIRTIO_VGA_BASE)
+
+typedef struct VirtIOVGABase {
+    VirtIOPCIProxy parent_obj;
+
+    VirtIOGPUBase *vgpu;
+    VGACommonState vga;
+    MemoryRegion vga_mrs[3];
+} VirtIOVGABase;
+
+typedef struct VirtIOVGABaseClass {
+    VirtioPCIClass parent_class;
+
+    DeviceReset parent_reset;
+} VirtIOVGABaseClass;
+
+#endif /* VIRTIO_VGA_H_ */
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 2632b73f80..edc240bcbf 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -110,6 +110,9 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 /* Physical Address of PVH entry point read from kernel ELF NOTE */
 static size_t pvh_start_addr;
 
+GlobalProperty pc_compat_4_0_1[] = {};
+const size_t pc_compat_4_0_1_len = G_N_ELEMENTS(pc_compat_4_0_1);
+
 GlobalProperty pc_compat_4_0[] = {};
 const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0);
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 37dd350511..dcddc64662 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -357,7 +357,7 @@ static void pc_q35_machine_options(MachineClass *m)
     m->units_per_default_bus = 1;
     m->default_machine_opts = "firmware=bios-256k.bin";
     m->default_display = "std";
-    m->default_kernel_irqchip_split = true;
+    m->default_kernel_irqchip_split = false;
     m->no_floppy = 1;
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE);
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
@@ -374,10 +374,22 @@ static void pc_q35_4_1_machine_options(MachineClass *m)
 DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL,
                    pc_q35_4_1_machine_options);
 
-static void pc_q35_4_0_machine_options(MachineClass *m)
+static void pc_q35_4_0_1_machine_options(MachineClass *m)
 {
     pc_q35_4_1_machine_options(m);
     m->alias = NULL;
+    compat_props_add(m->compat_props, hw_compat_4_0_1, hw_compat_4_0_1_len);
+    compat_props_add(m->compat_props, pc_compat_4_0_1, pc_compat_4_0_1_len);
+}
+
+DEFINE_Q35_MACHINE(v4_0_1, "pc-q35-4.0.1", NULL,
+                   pc_q35_4_0_1_machine_options);
+
+static void pc_q35_4_0_machine_options(MachineClass *m)
+{
+    pc_q35_4_0_1_machine_options(m);
+    m->default_kernel_irqchip_split = true;
+    m->alias = NULL;
     compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len);
     compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
 }
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 573b022e1e..360cd20bd8 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -168,7 +168,7 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp)
             return;
         } else {
             /* Anonymous BlockBackend for an empty drive */
-            dev->conf.blk = blk_new(0, BLK_PERM_ALL);
+            dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
             ret = blk_attach_dev(dev->conf.blk, &dev->qdev);
             assert(ret == 0);
         }
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index df712c3e6c..03019b9a03 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -39,6 +39,7 @@ obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
 obj-$(CONFIG_XIVE) += xive.o
 obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
+obj-$(CONFIG_XIVE_KVM) += spapr_xive_kvm.o
 obj-$(CONFIG_POWERNV) += xics_pnv.o pnv_xive.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 097f88d460..62e0ef8fa5 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -41,13 +41,6 @@
 #define SPAPR_XIVE_NVT_BASE 0x400
 
 /*
- * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
- * to the controller block id value. It can nevertheless be changed
- * for testing purpose.
- */
-#define SPAPR_XIVE_BLOCK_ID 0x0
-
-/*
  * sPAPR NVT and END indexing helpers
  */
 static uint32_t spapr_xive_nvt_to_target(uint8_t nvt_blk, uint32_t nvt_idx)
@@ -86,6 +79,22 @@ static int spapr_xive_target_to_nvt(uint32_t target,
  * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
  * priorities per CPU
  */
+int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
+                             uint32_t *out_server, uint8_t *out_prio)
+{
+
+    assert(end_blk == SPAPR_XIVE_BLOCK_ID);
+
+    if (out_server) {
+        *out_server = end_idx >> 3;
+    }
+
+    if (out_prio) {
+        *out_prio = end_idx & 0x7;
+    }
+    return 0;
+}
+
 static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
                                   uint8_t *out_end_blk, uint32_t *out_end_idx)
 {
@@ -120,6 +129,7 @@ static int spapr_xive_target_to_end(uint32_t target, uint8_t prio,
 static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end,
                                           Monitor *mon)
 {
+    uint64_t qaddr_base = xive_end_qaddr(end);
     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
@@ -127,9 +137,9 @@ static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end,
     uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
     uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
 
-    monitor_printf(mon, "%3d/%d % 6d/%5d ^%d",
+    monitor_printf(mon, "%3d/%d % 6d/%5d @%"PRIx64" ^%d",
                    spapr_xive_nvt_to_target(0, nvt),
-                   priority, qindex, qentries, qgen);
+                   priority, qindex, qentries, qaddr_base, qgen);
 
     xive_end_queue_pic_print_info(end, 6, mon);
     monitor_printf(mon, "]");
@@ -140,7 +150,17 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
     XiveSource *xsrc = &xive->source;
     int i;
 
-    monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_synchronize_state(xive, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return;
+        }
+    }
+
+    monitor_printf(mon, "  LISN         PQ    EISN     CPU/PRIO EQ\n");
 
     for (i = 0; i < xive->nr_irqs; i++) {
         uint8_t pq = xive_source_esb_get(xsrc, i);
@@ -173,7 +193,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
     }
 }
 
-static void spapr_xive_map_mmio(SpaprXive *xive)
+void spapr_xive_map_mmio(SpaprXive *xive)
 {
     sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base);
     sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base);
@@ -250,6 +270,9 @@ static void spapr_xive_instance_init(Object *obj)
     object_initialize_child(obj, "end_source", &xive->end_source,
                             sizeof(xive->end_source), TYPE_XIVE_END_SOURCE,
                             &error_abort, NULL);
+
+    /* Not connected to the KVM XIVE device */
+    xive->fd = -1;
 }
 
 static void spapr_xive_realize(DeviceState *dev, Error **errp)
@@ -304,22 +327,36 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
     xive->eat = g_new0(XiveEAS, xive->nr_irqs);
     xive->endt = g_new0(XiveEND, xive->nr_ends);
 
-    /* TIMA initialization */
-    memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
-                          "xive.tima", 4ull << TM_SHIFT);
+    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
+                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
+
+    qemu_register_reset(spapr_xive_reset, dev);
 
     /* Define all XIVE MMIO regions on SysBus */
     sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio);
     sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
     sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio);
+}
 
-    /* Map all regions */
-    spapr_xive_map_mmio(xive);
+void spapr_xive_init(SpaprXive *xive, Error **errp)
+{
+    XiveSource *xsrc = &xive->source;
 
-    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
-                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
+    /*
+     * The emulated XIVE device can only be initialized once. If the
+     * ESB memory region has been already mapped, it means we have been
+     * through there.
+     */
+    if (memory_region_is_mapped(&xsrc->esb_mmio)) {
+        return;
+    }
 
-    qemu_register_reset(spapr_xive_reset, dev);
+    /* TIMA initialization */
+    memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
+                          "xive.tima", 4ull << TM_SHIFT);
+
+    /* Map all regions */
+    spapr_xive_map_mmio(xive);
 }
 
 static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk,
@@ -427,10 +464,34 @@ static const VMStateDescription vmstate_spapr_xive_eas = {
     },
 };
 
+static int vmstate_spapr_xive_pre_save(void *opaque)
+{
+    if (kvm_irqchip_in_kernel()) {
+        return kvmppc_xive_pre_save(SPAPR_XIVE(opaque));
+    }
+
+    return 0;
+}
+
+/*
+ * Called by the sPAPR IRQ backend 'post_load' method at the machine
+ * level.
+ */
+int spapr_xive_post_load(SpaprXive *xive, int version_id)
+{
+    if (kvm_irqchip_in_kernel()) {
+        return kvmppc_xive_post_load(xive, version_id);
+    }
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_spapr_xive = {
     .name = TYPE_SPAPR_XIVE,
     .version_id = 1,
     .minimum_version_id = 1,
+    .pre_save = vmstate_spapr_xive_pre_save,
+    .post_load = NULL, /* handled at the machine level */
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_EQUAL(nr_irqs, SpaprXive, NULL),
         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, SpaprXive, nr_irqs,
@@ -494,6 +555,17 @@ bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi)
     if (lsi) {
         xive_source_irq_set_lsi(xsrc, lisn);
     }
+
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_source_reset_one(xsrc, lisn, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return false;
+        }
+    }
+
     return true;
 }
 
@@ -755,6 +827,16 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
         new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
     }
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
+
 out:
     xive->eat[lisn] = new_eas;
     return H_SUCCESS;
@@ -993,6 +1075,12 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
     case 16:
     case 21:
     case 24:
+        if (!QEMU_IS_ALIGNED(qpage, 1ul << qsize)) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: EQ @0x%" HWADDR_PRIx
+                          " is not naturally aligned with %" HWADDR_PRIx "\n",
+                          qpage, (hwaddr)1 << qsize);
+            return H_P4;
+        }
         end.w2 = cpu_to_be32((qpage >> 32) & 0x0fffffff);
         end.w3 = cpu_to_be32(qpage & 0xffffffff);
         end.w0 |= cpu_to_be32(END_W0_ENQUEUE);
@@ -1060,6 +1148,16 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
      */
 
 out:
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
+
     /* Update END */
     memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND));
     return H_SUCCESS;
@@ -1144,14 +1242,23 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
     }
 
     if (xive_end_is_enqueue(end)) {
-        args[1] = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
-            | be32_to_cpu(end->w3);
+        args[1] = xive_end_qaddr(end);
         args[2] = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
     } else {
         args[1] = 0;
         args[2] = 0;
     }
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
+
     /* TODO: do we need any locking on the END ? */
     if (flags & SPAPR_XIVE_END_DEBUG) {
         /* Load the event queue generation number into the return flags */
@@ -1304,15 +1411,20 @@ static target_ulong h_int_esb(PowerPCCPU *cpu,
         return H_P3;
     }
 
-    mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
+    if (kvm_irqchip_in_kernel()) {
+        args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data,
+                                     flags & SPAPR_XIVE_ESB_STORE);
+    } else {
+        mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
 
-    if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
-                      (flags & SPAPR_XIVE_ESB_STORE))) {
-        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
-                      HWADDR_PRIx "\n", mmio_addr);
-        return H_HARDWARE;
+        if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
+                          (flags & SPAPR_XIVE_ESB_STORE))) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
+                          HWADDR_PRIx "\n", mmio_addr);
+            return H_HARDWARE;
+        }
+        args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
     }
-    args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
     return H_SUCCESS;
 }
 
@@ -1369,7 +1481,20 @@ static target_ulong h_int_sync(PowerPCCPU *cpu,
      * This is not needed when running the emulation under QEMU
      */
 
-    /* This is not real hardware. Nothing to be done */
+    /*
+     * This is not real hardware. Nothing to be done unless when
+     * under KVM
+     */
+
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_sync_source(xive, lisn, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
     return H_SUCCESS;
 }
 
@@ -1404,6 +1529,16 @@ static target_ulong h_int_reset(PowerPCCPU *cpu,
     }
 
     device_reset(DEVICE(xive));
+
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_reset(xive, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
     return H_SUCCESS;
 }
 
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
new file mode 100644
index 0000000000..b48f135838
--- /dev/null
+++ b/hw/intc/spapr_xive_kvm.c
@@ -0,0 +1,823 @@
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2019, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/spapr_xive.h"
+#include "hw/ppc/xive.h"
+#include "kvm_ppc.h"
+
+#include <sys/ioctl.h>
+
+/*
+ * Helpers for CPU hotplug
+ *
+ * TODO: make a common KVMEnabledCPU layer for XICS and XIVE
+ */
+typedef struct KVMEnabledCPU {
+    unsigned long vcpu_id;
+    QLIST_ENTRY(KVMEnabledCPU) node;
+} KVMEnabledCPU;
+
+static QLIST_HEAD(, KVMEnabledCPU)
+    kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
+
+static bool kvm_cpu_is_enabled(CPUState *cs)
+{
+    KVMEnabledCPU *enabled_cpu;
+    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
+
+    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
+        if (enabled_cpu->vcpu_id == vcpu_id) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static void kvm_cpu_enable(CPUState *cs)
+{
+    KVMEnabledCPU *enabled_cpu;
+    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
+
+    enabled_cpu = g_malloc(sizeof(*enabled_cpu));
+    enabled_cpu->vcpu_id = vcpu_id;
+    QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
+}
+
+static void kvm_cpu_disable_all(void)
+{
+    KVMEnabledCPU *enabled_cpu, *next;
+
+    QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) {
+        QLIST_REMOVE(enabled_cpu, node);
+        g_free(enabled_cpu);
+    }
+}
+
+/*
+ * XIVE Thread Interrupt Management context (KVM)
+ */
+
+static void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp)
+{
+    uint64_t state[2];
+    int ret;
+
+    /* word0 and word1 of the OS ring. */
+    state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
+
+    ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
+    if (ret != 0) {
+        error_setg_errno(errp, errno,
+                         "XIVE: could not restore KVM state of CPU %ld",
+                         kvm_arch_vcpu_id(tctx->cs));
+    }
+}
+
+void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
+{
+    SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
+    uint64_t state[2] = { 0 };
+    int ret;
+
+    /* The KVM XIVE device is not in use */
+    if (xive->fd == -1) {
+        return;
+    }
+
+    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
+    if (ret != 0) {
+        error_setg_errno(errp, errno,
+                         "XIVE: could not capture KVM state of CPU %ld",
+                         kvm_arch_vcpu_id(tctx->cs));
+        return;
+    }
+
+    /* word0 and word1 of the OS ring. */
+    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
+}
+
+typedef struct {
+    XiveTCTX *tctx;
+    Error *err;
+} XiveCpuGetState;
+
+static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
+                                                 run_on_cpu_data arg)
+{
+    XiveCpuGetState *s = arg.host_ptr;
+
+    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
+}
+
+void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
+{
+    XiveCpuGetState s = {
+        .tctx = tctx,
+        .err = NULL,
+    };
+
+    /*
+     * Kick the vCPU to make sure they are available for the KVM ioctl.
+     */
+    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
+               RUN_ON_CPU_HOST_PTR(&s));
+
+    if (s.err) {
+        error_propagate(errp, s.err);
+        return;
+    }
+}
+
+void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
+{
+    SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
+    unsigned long vcpu_id;
+    int ret;
+
+    /* The KVM XIVE device is not in use */
+    if (xive->fd == -1) {
+        return;
+    }
+
+    /* Check if CPU was hot unplugged and replugged. */
+    if (kvm_cpu_is_enabled(tctx->cs)) {
+        return;
+    }
+
+    vcpu_id = kvm_arch_vcpu_id(tctx->cs);
+
+    ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
+                              vcpu_id, 0);
+    if (ret < 0) {
+        error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s",
+                   vcpu_id, strerror(errno));
+        return;
+    }
+
+    kvm_cpu_enable(tctx->cs);
+}
+
+/*
+ * XIVE Interrupt Source (KVM)
+ */
+
+void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
+                                   Error **errp)
+{
+    uint32_t end_idx;
+    uint32_t end_blk;
+    uint8_t priority;
+    uint32_t server;
+    bool masked;
+    uint32_t eisn;
+    uint64_t kvm_src;
+    Error *local_err = NULL;
+
+    assert(xive_eas_is_valid(eas));
+
+    end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+    end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
+    eisn = xive_get_field64(EAS_END_DATA, eas->w);
+    masked = xive_eas_is_masked(eas);
+
+    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+    kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
+        KVM_XIVE_SOURCE_PRIORITY_MASK;
+    kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
+        KVM_XIVE_SOURCE_SERVER_MASK;
+    kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) &
+        KVM_XIVE_SOURCE_MASKED_MASK;
+    kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) &
+        KVM_XIVE_SOURCE_EISN_MASK;
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn,
+                      &kvm_src, true, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp)
+{
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn,
+                      NULL, true, errp);
+}
+
+/*
+ * At reset, the interrupt sources are simply created and MASKED. We
+ * only need to inform the KVM XIVE device about their type: LSI or
+ * MSI.
+ */
+void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
+{
+    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
+    uint64_t state = 0;
+
+    /* The KVM XIVE device is not in use */
+    if (xive->fd == -1) {
+        return;
+    }
+
+    if (xive_source_irq_is_lsi(xsrc, srcno)) {
+        state |= KVM_XIVE_LEVEL_SENSITIVE;
+        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+            state |= KVM_XIVE_LEVEL_ASSERTED;
+        }
+    }
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state,
+                      true, errp);
+}
+
+static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
+{
+    int i;
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_source_reset_one(xsrc, i, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+/*
+ * This is used to perform the magic loads on the ESB pages, described
+ * in xive.h.
+ *
+ * Memory barriers should not be needed for loads (no store for now).
+ */
+static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+                            uint64_t data, bool write)
+{
+    uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) +
+        offset;
+
+    if (write) {
+        *addr = cpu_to_be64(data);
+        return -1;
+    } else {
+        /* Prevent the compiler from optimizing away the load */
+        volatile uint64_t value = be64_to_cpu(*addr);
+        return value;
+    }
+}
+
+static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
+{
+    return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3;
+}
+
+static void xive_esb_trigger(XiveSource *xsrc, int srcno)
+{
+    uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno);
+
+    *addr = 0x0;
+}
+
+uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+                            uint64_t data, bool write)
+{
+    if (write) {
+        return xive_esb_rw(xsrc, srcno, offset, data, 1);
+    }
+
+    /*
+     * Special Load EOI handling for LSI sources. Q bit is never set
+     * and the interrupt should be re-triggered if the level is still
+     * asserted.
+     */
+    if (xive_source_irq_is_lsi(xsrc, srcno) &&
+        offset == XIVE_ESB_LOAD_EOI) {
+        xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00);
+        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+            xive_esb_trigger(xsrc, srcno);
+        }
+        return 0;
+    } else {
+        return xive_esb_rw(xsrc, srcno, offset, 0, 0);
+    }
+}
+
+static void kvmppc_xive_source_get_state(XiveSource *xsrc)
+{
+    int i;
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        /* Perform a load without side effect to retrieve the PQ bits */
+        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
+
+        /* and save PQ locally */
+        xive_source_esb_set(xsrc, i, pq);
+    }
+}
+
+void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
+{
+    XiveSource *xsrc = opaque;
+    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
+    struct kvm_irq_level args;
+    int rc;
+
+    /* The KVM XIVE device should be in use */
+    assert(xive->fd != -1);
+
+    args.irq = srcno;
+    if (!xive_source_irq_is_lsi(xsrc, srcno)) {
+        if (!val) {
+            return;
+        }
+        args.level = KVM_INTERRUPT_SET;
+    } else {
+        if (val) {
+            xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
+            args.level = KVM_INTERRUPT_SET_LEVEL;
+        } else {
+            xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
+            args.level = KVM_INTERRUPT_UNSET;
+        }
+    }
+    rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
+    if (rc < 0) {
+        error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno));
+    }
+}
+
+/*
+ * sPAPR XIVE interrupt controller (KVM)
+ */
+void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                  uint32_t end_idx, XiveEND *end,
+                                  Error **errp)
+{
+    struct kvm_ppc_xive_eq kvm_eq = { 0 };
+    uint64_t kvm_eq_idx;
+    uint8_t priority;
+    uint32_t server;
+    Error *local_err = NULL;
+
+    assert(xive_end_is_valid(end));
+
+    /* Encode the tuple (server, prio) as a KVM EQ index */
+    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
+            KVM_XIVE_EQ_PRIORITY_MASK;
+    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
+        KVM_XIVE_EQ_SERVER_MASK;
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
+                      &kvm_eq, false, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /*
+     * The EQ index and toggle bit are updated by HW. These are the
+     * only fields from KVM we want to update QEMU with. The other END
+     * fields should already be in the QEMU END table.
+     */
+    end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
+        xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
+}
+
+void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                  uint32_t end_idx, XiveEND *end,
+                                  Error **errp)
+{
+    struct kvm_ppc_xive_eq kvm_eq = { 0 };
+    uint64_t kvm_eq_idx;
+    uint8_t priority;
+    uint32_t server;
+    Error *local_err = NULL;
+
+    /*
+     * Build the KVM state from the local END structure.
+     */
+
+    kvm_eq.flags = 0;
+    if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) {
+        kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
+    }
+
+    /*
+     * If the hcall is disabling the EQ, set the size and page address
+     * to zero. When migrating, only valid ENDs are taken into
+     * account.
+     */
+    if (xive_end_is_valid(end)) {
+        kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
+        kvm_eq.qaddr  = xive_end_qaddr(end);
+        /*
+         * The EQ toggle bit and index should only be relevant when
+         * restoring the EQ state
+         */
+        kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1);
+        kvm_eq.qindex  = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+    } else {
+        kvm_eq.qshift = 0;
+        kvm_eq.qaddr  = 0;
+    }
+
+    /* Encode the tuple (server, prio) as a KVM EQ index */
+    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
+            KVM_XIVE_EQ_PRIORITY_MASK;
+    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
+        KVM_XIVE_EQ_SERVER_MASK;
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
+                      &kvm_eq, true, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+void kvmppc_xive_reset(SpaprXive *xive, Error **errp)
+{
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET,
+                      NULL, true, errp);
+}
+
+static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
+{
+    Error *local_err = NULL;
+    int i;
+
+    for (i = 0; i < xive->nr_ends; i++) {
+        if (!xive_end_is_valid(&xive->endt[i])) {
+            continue;
+        }
+
+        kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
+                                     &xive->endt[i], &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+/*
+ * The primary goal of the XIVE VM change handler is to mark the EQ
+ * pages dirty when all XIVE event notifications have stopped.
+ *
+ * Whenever the VM is stopped, the VM change handler sets the source
+ * PQs to PENDING to stop the flow of events and to possibly catch a
+ * triggered interrupt occuring while the VM is stopped. The previous
+ * state is saved in anticipation of a migration. The XIVE controller
+ * is then synced through KVM to flush any in-flight event
+ * notification and stabilize the EQs.
+ *
+ * At this stage, we can mark the EQ page dirty and let a migration
+ * sequence transfer the EQ pages to the destination, which is done
+ * just after the stop state.
+ *
+ * The previous configuration of the sources is restored when the VM
+ * runs again. If an interrupt was queued while the VM was stopped,
+ * simply generate a trigger.
+ */
+static void kvmppc_xive_change_state_handler(void *opaque, int running,
+                                             RunState state)
+{
+    SpaprXive *xive = opaque;
+    XiveSource *xsrc = &xive->source;
+    Error *local_err = NULL;
+    int i;
+
+    /*
+     * Restore the sources to their initial state. This is called when
+     * the VM resumes after a stop or a migration.
+     */
+    if (running) {
+        for (i = 0; i < xsrc->nr_irqs; i++) {
+            uint8_t pq = xive_source_esb_get(xsrc, i);
+            uint8_t old_pq;
+
+            old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
+
+            /*
+             * An interrupt was queued while the VM was stopped,
+             * generate a trigger.
+             */
+            if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) {
+                xive_esb_trigger(xsrc, i);
+            }
+        }
+
+        return;
+    }
+
+    /*
+     * Mask the sources, to stop the flow of event notifications, and
+     * save the PQs locally in the XiveSource object. The XiveSource
+     * state will be collected later on by its vmstate handler if a
+     * migration is in progress.
+     */
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
+
+        /*
+         * PQ is set to PENDING to possibly catch a triggered
+         * interrupt occuring while the VM is stopped (hotplug event
+         * for instance) .
+         */
+        if (pq != XIVE_ESB_OFF) {
+            pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10);
+        }
+        xive_source_esb_set(xsrc, i, pq);
+    }
+
+    /*
+     * Sync the XIVE controller in KVM, to flush in-flight event
+     * notification that should be enqueued in the EQs and mark the
+     * XIVE EQ pages dirty to collect all updates.
+     */
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL,
+                      KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+}
+
+void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp)
+{
+    /* The KVM XIVE device is not in use */
+    if (xive->fd == -1) {
+        return;
+    }
+
+    /*
+     * When the VM is stopped, the sources are masked and the previous
+     * state is saved in anticipation of a migration. We should not
+     * synchronize the source state in that case else we will override
+     * the saved state.
+     */
+    if (runstate_is_running()) {
+        kvmppc_xive_source_get_state(&xive->source);
+    }
+
+    /* EAT: there is no extra state to query from KVM */
+
+    /* ENDT */
+    kvmppc_xive_get_queues(xive, errp);
+}
+
+/*
+ * The SpaprXive 'pre_save' method is called by the vmstate handler of
+ * the SpaprXive model, after the XIVE controller is synced in the VM
+ * change handler.
+ */
+int kvmppc_xive_pre_save(SpaprXive *xive)
+{
+    Error *local_err = NULL;
+
+    /* The KVM XIVE device is not in use */
+    if (xive->fd == -1) {
+        return 0;
+    }
+
+    /* EAT: there is no extra state to query from KVM */
+
+    /* ENDT */
+    kvmppc_xive_get_queues(xive, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * The SpaprXive 'post_load' method is not called by a vmstate
+ * handler. It is called at the sPAPR machine level at the end of the
+ * migration sequence by the sPAPR IRQ backend 'post_load' method,
+ * when all XIVE states have been transferred and loaded.
+ */
+int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
+{
+    Error *local_err = NULL;
+    CPUState *cs;
+    int i;
+
+    /* The KVM XIVE device should be in use */
+    assert(xive->fd != -1);
+
+    /* Restore the ENDT first. The targetting depends on it. */
+    for (i = 0; i < xive->nr_ends; i++) {
+        if (!xive_end_is_valid(&xive->endt[i])) {
+            continue;
+        }
+
+        kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
+                                     &xive->endt[i], &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
+    }
+
+    /* Restore the EAT */
+    for (i = 0; i < xive->nr_irqs; i++) {
+        if (!xive_eas_is_valid(&xive->eat[i])) {
+            continue;
+        }
+
+        kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
+    }
+
+    /* Restore the thread interrupt contexts */
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
+    }
+
+    /* The source states will be restored when the machine starts running */
+    return 0;
+}
+
+static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
+                              Error **errp)
+{
+    void *addr;
+    uint32_t page_shift = 16; /* TODO: fix page_shift */
+
+    addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd,
+                pgoff << page_shift);
+    if (addr == MAP_FAILED) {
+        error_setg_errno(errp, errno, "XIVE: unable to set memory mapping");
+        return NULL;
+    }
+
+    return addr;
+}
+
+/*
+ * All the XIVE memory regions are now backed by mappings from the KVM
+ * XIVE device.
+ */
+void kvmppc_xive_connect(SpaprXive *xive, Error **errp)
+{
+    XiveSource *xsrc = &xive->source;
+    Error *local_err = NULL;
+    size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+    size_t tima_len = 4ull << TM_SHIFT;
+    CPUState *cs;
+
+    /*
+     * The KVM XIVE device already in use. This is the case when
+     * rebooting under the XIVE-only interrupt mode.
+     */
+    if (xive->fd != -1) {
+        return;
+    }
+
+    if (!kvmppc_has_cap_xive()) {
+        error_setg(errp, "IRQ_XIVE capability must be present for KVM");
+        return;
+    }
+
+    /* First, create the KVM XIVE device */
+    xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
+    if (xive->fd < 0) {
+        error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device");
+        return;
+    }
+
+    /*
+     * 1. Source ESB pages - KVM mapping
+     */
+    xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
+                                      &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    memory_region_init_ram_device_ptr(&xsrc->esb_mmio, OBJECT(xsrc),
+                                      "xive.esb", esb_len, xsrc->esb_mmap);
+
+    /*
+     * 2. END ESB pages (No KVM support yet)
+     */
+
+    /*
+     * 3. TIMA pages - KVM mapping
+     */
+    xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
+                                     &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    memory_region_init_ram_device_ptr(&xive->tm_mmio, OBJECT(xive),
+                                      "xive.tima", tima_len, xive->tm_mmap);
+
+    xive->change = qemu_add_vm_change_state_handler(
+        kvmppc_xive_change_state_handler, xive);
+
+    /* Connect the presenters to the initial VCPUs of the machine */
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+    /* Update the KVM sources */
+    kvmppc_xive_source_reset(xsrc, &local_err);
+    if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+    }
+
+    kvm_kernel_irqchip = true;
+    kvm_msi_via_irqfd_allowed = true;
+    kvm_gsi_direct_mapping = true;
+
+    /* Map all regions */
+    spapr_xive_map_mmio(xive);
+}
+
+void kvmppc_xive_disconnect(SpaprXive *xive, Error **errp)
+{
+    XiveSource *xsrc;
+    size_t esb_len;
+
+    /* The KVM XIVE device is not in use */
+    if (!xive || xive->fd == -1) {
+        return;
+    }
+
+    if (!kvmppc_has_cap_xive()) {
+        error_setg(errp, "IRQ_XIVE capability must be present for KVM");
+        return;
+    }
+
+    /* Clear the KVM mapping */
+    xsrc = &xive->source;
+    esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+
+    sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 0);
+    munmap(xsrc->esb_mmap, esb_len);
+
+    sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 1);
+
+    sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 2);
+    munmap(xive->tm_mmap, 4ull << TM_SHIFT);
+
+    /*
+     * When the KVM device fd is closed, the KVM device is destroyed
+     * and removed from the list of devices of the VM. The VCPU
+     * presenters are also detached from the device.
+     */
+    close(xive->fd);
+    xive->fd = -1;
+
+    kvm_kernel_irqchip = false;
+    kvm_msi_via_irqfd_allowed = false;
+    kvm_gsi_direct_mapping = false;
+
+    /* Clear the local list of presenter (hotplug) */
+    kvm_cpu_disable_all();
+
+    /* VM Change state handler is not needed anymore */
+    qemu_del_vm_change_state_handler(xive->change);
+}
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index af7dc709ab..79f5a8a916 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -610,6 +610,12 @@ static const TypeInfo ics_simple_info = {
     .class_size = sizeof(ICSStateClass),
 };
 
+static void ics_reset_irq(ICSIRQState *irq)
+{
+    irq->priority = 0xff;
+    irq->saved_priority = 0xff;
+}
+
 static void ics_base_reset(DeviceState *dev)
 {
     ICSState *ics = ICS_BASE(dev);
@@ -623,8 +629,7 @@ static void ics_base_reset(DeviceState *dev)
     memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
 
     for (i = 0; i < ics->nr_irqs; i++) {
-        ics->irqs[i].priority = 0xff;
-        ics->irqs[i].saved_priority = 0xff;
+        ics_reset_irq(ics->irqs + i);
         ics->irqs[i].flags = flags[i];
     }
 }
@@ -760,6 +765,7 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
         lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI;
 
     if (kvm_irqchip_in_kernel()) {
+        ics_reset_irq(ics->irqs + srcno);
         ics_set_kvm_state_one(ics, srcno);
     }
 }
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 78a252e6df..5ba5b77561 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -33,6 +33,7 @@
 #include "trace.h"
 #include "sysemu/kvm.h"
 #include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
 #include "hw/ppc/xics.h"
 #include "hw/ppc/xics_spapr.h"
 #include "kvm_ppc.h"
@@ -51,6 +52,16 @@ typedef struct KVMEnabledICP {
 static QLIST_HEAD(, KVMEnabledICP)
     kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps);
 
+static void kvm_disable_icps(void)
+{
+    KVMEnabledICP *enabled_icp, *next;
+
+    QLIST_FOREACH_SAFE(enabled_icp, &kvm_enabled_icps, node, next) {
+        QLIST_REMOVE(enabled_icp, node);
+        g_free(enabled_icp);
+    }
+}
+
 /*
  * ICP-KVM
  */
@@ -59,6 +70,11 @@ void icp_get_kvm_state(ICPState *icp)
     uint64_t state;
     int ret;
 
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return;
+    }
+
     /* ICP for this CPU thread is not in use, exiting */
     if (!icp->cs) {
         return;
@@ -95,6 +111,11 @@ int icp_set_kvm_state(ICPState *icp)
     uint64_t state;
     int ret;
 
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return 0;
+    }
+
     /* ICP for this CPU thread is not in use, exiting */
     if (!icp->cs) {
         return 0;
@@ -123,8 +144,9 @@ void icp_kvm_realize(DeviceState *dev, Error **errp)
     unsigned long vcpu_id;
     int ret;
 
+    /* The KVM XICS device is not in use */
     if (kernel_xics_fd == -1) {
-        abort();
+        return;
     }
 
     cs = icp->cs;
@@ -160,6 +182,11 @@ void ics_get_kvm_state(ICSState *ics)
     uint64_t state;
     int i;
 
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return;
+    }
+
     for (i = 0; i < ics->nr_irqs; i++) {
         ICSIRQState *irq = &ics->irqs[i];
 
@@ -220,6 +247,11 @@ int ics_set_kvm_state_one(ICSState *ics, int srcno)
     ICSIRQState *irq = &ics->irqs[srcno];
     int ret;
 
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return 0;
+    }
+
     state = irq->server;
     state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK)
         << KVM_XICS_PRIORITY_SHIFT;
@@ -259,6 +291,11 @@ int ics_set_kvm_state(ICSState *ics)
 {
     int i;
 
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return 0;
+    }
+
     for (i = 0; i < ics->nr_irqs; i++) {
         int ret;
 
@@ -276,6 +313,9 @@ void ics_kvm_set_irq(ICSState *ics, int srcno, int val)
     struct kvm_irq_level args;
     int rc;
 
+    /* The KVM XICS device should be in use */
+    assert(kernel_xics_fd != -1);
+
     args.irq = srcno + ics->offset;
     if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) {
         if (!val) {
@@ -303,6 +343,16 @@ static void rtas_dummy(PowerPCCPU *cpu, SpaprMachineState *spapr,
 int xics_kvm_init(SpaprMachineState *spapr, Error **errp)
 {
     int rc;
+    CPUState *cs;
+    Error *local_err = NULL;
+
+    /*
+     * The KVM XICS device already in use. This is the case when
+     * rebooting under the XICS-only interrupt mode.
+     */
+    if (kernel_xics_fd != -1) {
+        return 0;
+    }
 
     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
         error_setg(errp,
@@ -351,6 +401,26 @@ int xics_kvm_init(SpaprMachineState *spapr, Error **errp)
     kvm_msi_via_irqfd_allowed = true;
     kvm_gsi_direct_mapping = true;
 
+    /* Create the presenters */
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        icp_kvm_realize(DEVICE(spapr_cpu_state(cpu)->icp), &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            goto fail;
+        }
+    }
+
+    /* Update the KVM sources */
+    ics_set_kvm_state(spapr->ics);
+
+    /* Connect the presenters to the initial VCPUs of the machine */
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        icp_set_kvm_state(spapr_cpu_state(cpu)->icp);
+    }
+
     return 0;
 
 fail:
@@ -360,3 +430,44 @@ fail:
     kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
     return -1;
 }
+
+void xics_kvm_disconnect(SpaprMachineState *spapr, Error **errp)
+{
+    /* The KVM XICS device is not in use */
+    if (kernel_xics_fd == -1) {
+        return;
+    }
+
+    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
+        error_setg(errp,
+                   "KVM and IRQ_XICS capability must be present for KVM XICS device");
+        return;
+    }
+
+    /*
+     * Only on P9 using the XICS-on XIVE KVM device:
+     *
+     * When the KVM device fd is closed, the device is destroyed and
+     * removed from the list of devices of the VM. The VCPU presenters
+     * are also detached from the device.
+     */
+    close(kernel_xics_fd);
+    kernel_xics_fd = -1;
+
+    spapr_rtas_unregister(RTAS_IBM_SET_XIVE);
+    spapr_rtas_unregister(RTAS_IBM_GET_XIVE);
+    spapr_rtas_unregister(RTAS_IBM_INT_OFF);
+    spapr_rtas_unregister(RTAS_IBM_INT_ON);
+
+    kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
+    kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
+    kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
+    kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
+
+    kvm_kernel_irqchip = false;
+    kvm_msi_via_irqfd_allowed = false;
+    kvm_gsi_direct_mapping = false;
+
+    /* Clear the presenter from the VCPUs */
+    kvm_disable_icps();
+}
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 9d2b8adef7..5a1835e8b1 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -239,6 +239,13 @@ static void rtas_int_on(PowerPCCPU *cpu, SpaprMachineState *spapr,
 
 void xics_spapr_init(SpaprMachineState *spapr)
 {
+    /* Emulated mode can only be initialized once. */
+    if (spapr->ics->init) {
+        return;
+    }
+
+    spapr->ics->init = true;
+
     /* Registration of global state belongs into realize */
     spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
     spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index a0b87001da..0c74e47aa4 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -493,6 +493,16 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
     int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
     int i;
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return;
+        }
+    }
+
     monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
                    "  W2\n", cpu_index);
 
@@ -555,6 +565,15 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    /* Connect the presenter to the VCPU (required for CPU hotplug) */
+    if (kvm_irqchip_in_kernel()) {
+        kvmppc_xive_cpu_connect(tctx, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
     qemu_register_reset(xive_tctx_reset, dev);
 }
 
@@ -563,10 +582,27 @@ static void xive_tctx_unrealize(DeviceState *dev, Error **errp)
     qemu_unregister_reset(xive_tctx_reset, dev);
 }
 
+static int vmstate_xive_tctx_pre_save(void *opaque)
+{
+    Error *local_err = NULL;
+
+    if (kvm_irqchip_in_kernel()) {
+        kvmppc_xive_cpu_get_state(XIVE_TCTX(opaque), &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_xive_tctx = {
     .name = TYPE_XIVE_TCTX,
     .version_id = 1,
     .minimum_version_id = 1,
+    .pre_save = vmstate_xive_tctx_pre_save,
+    .post_load = NULL, /* handled by the sPAPRxive model */
     .fields = (VMStateField[]) {
         VMSTATE_BUFFER(regs, XiveTCTX),
         VMSTATE_END_OF_LIST()
@@ -990,9 +1026,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp)
     xsrc->status = g_malloc0(xsrc->nr_irqs);
     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
 
-    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
-                          &xive_source_esb_ops, xsrc, "xive.esb",
-                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+    if (!kvm_irqchip_in_kernel()) {
+        memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+                              &xive_source_esb_ops, xsrc, "xive.esb",
+                              (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+    }
 
     qemu_register_reset(xive_source_reset, dev);
 }
@@ -1042,8 +1080,7 @@ static const TypeInfo xive_source_info = {
 
 void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
 {
-    uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
-        | be32_to_cpu(end->w3);
+    uint64_t qaddr_base = xive_end_qaddr(end);
     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
     uint32_t qentries = 1 << (qsize + 10);
@@ -1072,8 +1109,7 @@ void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
 
 void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
 {
-    uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
-        | be32_to_cpu(end->w3);
+    uint64_t qaddr_base = xive_end_qaddr(end);
     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
@@ -1101,8 +1137,7 @@ void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
 
 static void xive_end_enqueue(XiveEND *end, uint32_t data)
 {
-    uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
-        | be32_to_cpu(end->w3);
+    uint64_t qaddr_base = xive_end_qaddr(end);
     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c
index a5d67bc6d7..c08970b24a 100644
--- a/hw/isa/i82378.c
+++ b/hw/isa/i82378.c
@@ -21,7 +21,6 @@
 #include "hw/pci/pci.h"
 #include "hw/i386/pc.h"
 #include "hw/timer/i8254.h"
-#include "hw/timer/mc146818rtc.h"
 #include "hw/audio/pcspk.h"
 
 #define TYPE_I82378 "i82378"
@@ -105,9 +104,6 @@ static void i82378_realize(PCIDevice *pci, Error **errp)
 
     /* 2 82C37 (dma) */
     isa = isa_create_simple(isabus, "i82374");
-
-    /* timer */
-    isa_create_simple(isabus, TYPE_MC146818_RTC);
 }
 
 static void i82378_init(Object *obj)
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index 91af452c9e..19e5545e2c 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -98,23 +98,24 @@ static void edu_lower_irq(EduState *edu, uint32_t val)
     }
 }
 
-static bool within(uint32_t addr, uint32_t start, uint32_t end)
+static bool within(uint64_t addr, uint64_t start, uint64_t end)
 {
     return start <= addr && addr < end;
 }
 
-static void edu_check_range(uint32_t addr, uint32_t size1, uint32_t start,
-                uint32_t size2)
+static void edu_check_range(uint64_t addr, uint64_t size1, uint64_t start,
+                uint64_t size2)
 {
-    uint32_t end1 = addr + size1;
-    uint32_t end2 = start + size2;
+    uint64_t end1 = addr + size1;
+    uint64_t end2 = start + size2;
 
     if (within(addr, start, end2) &&
             end1 > addr && within(end1, start, end2)) {
         return;
     }
 
-    hw_error("EDU: DMA range 0x%.8x-0x%.8x out of bounds (0x%.8x-0x%.8x)!",
+    hw_error("EDU: DMA range 0x%016"PRIx64"-0x%016"PRIx64
+             " out of bounds (0x%016"PRIx64"-0x%016"PRIx64")!",
             addr, end1 - 1, start, end2 - 1);
 }
 
@@ -139,13 +140,13 @@ static void edu_dma_timer(void *opaque)
     }
 
     if (EDU_DMA_DIR(edu->dma.cmd) == EDU_DMA_FROM_PCI) {
-        uint32_t dst = edu->dma.dst;
+        uint64_t dst = edu->dma.dst;
         edu_check_range(dst, edu->dma.cnt, DMA_START, DMA_SIZE);
         dst -= DMA_START;
         pci_dma_read(&edu->pdev, edu_clamp_addr(edu, edu->dma.src),
                 edu->dma_buf + dst, edu->dma.cnt);
     } else {
-        uint32_t src = edu->dma.src;
+        uint64_t src = edu->dma.src;
         edu_check_range(src, edu->dma.cnt, DMA_START, DMA_SIZE);
         src -= DMA_START;
         pci_dma_write(&edu->pdev, edu_clamp_addr(edu, edu->dma.dst),
@@ -185,7 +186,11 @@ static uint64_t edu_mmio_read(void *opaque, hwaddr addr, unsigned size)
     EduState *edu = opaque;
     uint64_t val = ~0ULL;
 
-    if (size != 4) {
+    if (addr < 0x80 && size != 4) {
+        return val;
+    }
+
+    if (addr >= 0x80 && size != 4 && size != 8) {
         return val;
     }
 
@@ -289,6 +294,15 @@ static const MemoryRegionOps edu_mmio_ops = {
     .read = edu_mmio_read,
     .write = edu_mmio_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 8,
+    },
+
 };
 
 /*
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index a3465155f0..f927ec9c74 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -122,3 +122,8 @@ config XIVE_SPAPR
     default y
     depends on PSERIES
     select XIVE
+
+config XIVE_KVM
+    bool
+    default y
+    depends on XIVE_SPAPR && KVM
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 31aa20ee25..046f0a83c8 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -450,7 +450,8 @@ static void pnv_dt_power_mgt(void *fdt)
 
 static void *pnv_dt_create(MachineState *machine)
 {
-    const char plat_compat[] = "qemu,powernv\0ibm,powernv";
+    const char plat_compat8[] = "qemu,powernv8\0qemu,powernv\0ibm,powernv";
+    const char plat_compat9[] = "qemu,powernv9\0ibm,powernv";
     PnvMachineState *pnv = PNV_MACHINE(machine);
     void *fdt;
     char *buf;
@@ -465,8 +466,14 @@ static void *pnv_dt_create(MachineState *machine)
     _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2)));
     _FDT((fdt_setprop_string(fdt, 0, "model",
                              "IBM PowerNV (emulated by qemu)")));
-    _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat,
-                      sizeof(plat_compat))));
+    if (pnv_is_power9(pnv)) {
+        _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat9,
+                          sizeof(plat_compat9))));
+    } else {
+        _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat8,
+                          sizeof(plat_compat8))));
+    }
+
 
     buf =  qemu_uuid_unparse_strdup(&qemu_uuid);
     _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf)));
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
index c285ef514e..f53a6d7a94 100644
--- a/hw/ppc/pnv_xscom.c
+++ b/hw/ppc/pnv_xscom.c
@@ -29,6 +29,12 @@
 
 #include <libfdt.h>
 
+/* PRD registers */
+#define PRD_P8_IPOLL_REG_MASK           0x01020013
+#define PRD_P8_IPOLL_REG_STATUS         0x01020014
+#define PRD_P9_IPOLL_REG_MASK           0x000F0033
+#define PRD_P9_IPOLL_REG_STATUS         0x000F0034
+
 static void xscom_complete(CPUState *cs, uint64_t hmer_bits)
 {
     /*
@@ -70,6 +76,12 @@ static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba)
     case 0x1010c00:     /* PIBAM FIR */
     case 0x1010c03:     /* PIBAM FIR MASK */
 
+        /* PRD registers */
+    case PRD_P8_IPOLL_REG_MASK:
+    case PRD_P8_IPOLL_REG_STATUS:
+    case PRD_P9_IPOLL_REG_MASK:
+    case PRD_P9_IPOLL_REG_STATUS:
+
         /* P9 xscom reset */
     case 0x0090018:     /* Receive status reg */
     case 0x0090012:     /* log register */
@@ -124,6 +136,12 @@ static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val)
     case 0x201302a:     /* CAPP stuff */
     case 0x2013801:     /* CAPP stuff */
     case 0x2013802:     /* CAPP stuff */
+
+        /* P8 PRD registers */
+    case PRD_P8_IPOLL_REG_MASK:
+    case PRD_P8_IPOLL_REG_STATUS:
+    case PRD_P9_IPOLL_REG_MASK:
+    case PRD_P9_IPOLL_REG_STATUS:
         return true;
     default:
         return false;
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index b7f459d475..2a8009e20b 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -601,7 +601,7 @@ static int prep_set_cmos_checksum(DeviceState *dev, void *opaque)
     uint16_t checksum = *(uint16_t *)opaque;
     ISADevice *rtc;
 
-    if (object_dynamic_cast(OBJECT(dev), "mc146818rtc")) {
+    if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
         rtc = ISA_DEVICE(dev);
         rtc_set_memory(rtc, 0x2e, checksum & 0xff);
         rtc_set_memory(rtc, 0x3e, checksum & 0xff);
@@ -675,6 +675,11 @@ static void ibm_40p_init(MachineState *machine)
     qdev_prop_set_uint32(dev, "ram-size", machine->ram_size);
     qdev_init_nofail(dev);
 
+    /* RTC */
+    dev = DEVICE(isa_create(isa_bus, TYPE_MC146818_RTC));
+    qdev_prop_set_int32(dev, "base_year", 1900);
+    qdev_init_nofail(dev);
+
     /* initialize CMOS checksums */
     cmos_checksum = 0x6aa9;
     qbus_walk_children(BUS(isa_bus), prep_set_cmos_checksum, NULL, NULL, NULL,
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2ef3ce4362..e2b33e5890 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -500,7 +500,10 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
 
     if (env->spr_cb[SPR_PURR].oea_read) {
-        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1)));
+    }
+    if (env->spr_cb[SPR_SPURR].oea_read) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1)));
     }
 
     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
@@ -2122,6 +2125,7 @@ static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_cap_cfpc,
         &vmstate_spapr_cap_sbbc,
         &vmstate_spapr_cap_ibs,
+        &vmstate_spapr_cap_hpt_maxpagesize,
         &vmstate_spapr_irq_map,
         &vmstate_spapr_cap_nested_kvm_hv,
         &vmstate_spapr_dtb,
@@ -4348,7 +4352,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
     spapr_caps_add_properties(smc, &error_abort);
-    smc->irq = &spapr_irq_xics;
+    smc->irq = &spapr_irq_dual;
     smc->dr_phb_enabled = true;
 }
 
@@ -4407,18 +4411,7 @@ DEFINE_SPAPR_MACHINE(4_1, "4.1", true);
 /*
  * pseries-4.0
  */
-static void spapr_machine_4_0_class_options(MachineClass *mc)
-{
-    spapr_machine_4_1_class_options(mc);
-    compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
-}
-
-DEFINE_SPAPR_MACHINE(4_0, "4.0", false);
-
-/*
- * pseries-3.1
- */
-static void phb_placement_3_1(SpaprMachineState *spapr, uint32_t index,
+static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
                               uint64_t *buid, hwaddr *pio,
                               hwaddr *mmio32, hwaddr *mmio64,
                               unsigned n_dma, uint32_t *liobns,
@@ -4430,6 +4423,22 @@ static void phb_placement_3_1(SpaprMachineState *spapr, uint32_t index,
     *nv2atsd = 0;
 }
 
+static void spapr_machine_4_0_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_4_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
+    smc->phb_placement = phb_placement_4_0;
+    smc->irq = &spapr_irq_xics;
+    smc->pre_4_1_migration = true;
+}
+
+DEFINE_SPAPR_MACHINE(4_0, "4.0", false);
+
+/*
+ * pseries-3.1
+ */
 static void spapr_machine_3_1_class_options(MachineClass *mc)
 {
     SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
@@ -4445,7 +4454,6 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
     smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
     smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
-    smc->phb_placement = phb_placement_3_1;
 }
 
 DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 9b1c10baa6..31b4661399 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -64,6 +64,7 @@ typedef struct SpaprCapabilityInfo {
     void (*apply)(SpaprMachineState *spapr, uint8_t val, Error **errp);
     void (*cpu_apply)(SpaprMachineState *spapr, PowerPCCPU *cpu,
                       uint8_t val, Error **errp);
+    bool (*migrate_needed)(void *opaque);
 } SpaprCapabilityInfo;
 
 static void spapr_cap_get_bool(Object *obj, Visitor *v, const char *name,
@@ -350,6 +351,11 @@ static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
     spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
 }
 
+static bool cap_hpt_maxpagesize_migrate_needed(void *opaque)
+{
+    return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_1_migration;
+}
+
 static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift,
                               uint32_t pshift)
 {
@@ -542,6 +548,7 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "int",
         .apply = cap_hpt_maxpagesize_apply,
         .cpu_apply = cap_hpt_maxpagesize_cpu_apply,
+        .migrate_needed = cap_hpt_maxpagesize_migrate_needed,
     },
     [SPAPR_CAP_NESTED_KVM_HV] = {
         .name = "nested-hv",
@@ -679,8 +686,11 @@ int spapr_caps_post_migration(SpaprMachineState *spapr)
 static bool spapr_cap_##sname##_needed(void *opaque)    \
 {                                                       \
     SpaprMachineState *spapr = opaque;                  \
+    bool (*needed)(void *opaque) =                      \
+        capability_table[cap].migrate_needed;           \
                                                         \
-    return spapr->cmd_line_caps[cap] &&                 \
+    return needed ? needed(opaque) : true &&            \
+           spapr->cmd_line_caps[cap] &&                 \
            (spapr->eff.caps[cap] !=                     \
             spapr->def.caps[cap]);                      \
 }                                                       \
@@ -703,6 +713,7 @@ SPAPR_CAP_MIG_STATE(dfp, SPAPR_CAP_DFP);
 SPAPR_CAP_MIG_STATE(cfpc, SPAPR_CAP_CFPC);
 SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
 SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
+SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index f04e06cdf6..5621fb9a3d 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -58,9 +58,11 @@ static void spapr_cpu_reset(void *opaque)
      *
      * Disable Power-saving mode Exit Cause exceptions for the CPU, so
      * we don't get spurious wakups before an RTAS start-cpu call.
+     * For the same reason, set PSSCR_EC.
      */
     lpcr &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV | pcc->lpcr_pm);
     lpcr |= LPCR_LPES0 | LPCR_LPES1;
+    env->spr[SPR_PSSCR] |= PSSCR_EC;
 
     /* Set RMLS to the max (ie, 16G) */
     lpcr &= ~LPCR_RMLS;
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 6c16d2b120..0a050ad3d8 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1513,6 +1513,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     bool guest_radix;
     Error *local_err = NULL;
     bool raw_mode_supported = false;
+    bool guest_xive;
 
     cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err);
     if (local_err) {
@@ -1545,10 +1546,17 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
         error_report("guest requested hash and radix MMU, which is invalid.");
         exit(EXIT_FAILURE);
     }
+    if (spapr_ovec_test(ov5_guest, OV5_XIVE_BOTH)) {
+        error_report("guest requested an invalid interrupt mode");
+        exit(EXIT_FAILURE);
+    }
+
     /* The radix/hash bit in byte 24 requires special handling: */
     guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300);
     spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300);
 
+    guest_xive = spapr_ovec_test(ov5_guest, OV5_XIVE_EXPLOIT);
+
     /*
      * HPT resizing is a bit of a special case, because when enabled
      * we assume an HPT guest will support it until it says it
@@ -1633,6 +1641,24 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     }
 
     /*
+     * Ensure the guest asks for an interrupt mode we support; otherwise
+     * terminate the boot.
+     */
+    if (guest_xive) {
+        if (spapr->irq->ov5 == SPAPR_OV5_XIVE_LEGACY) {
+            error_report(
+"Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
+            exit(EXIT_FAILURE);
+        }
+    } else {
+        if (spapr->irq->ov5 == SPAPR_OV5_XIVE_EXPLOIT) {
+            error_report(
+"Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    /*
      * Generate a machine reset when we have an update of the
      * interrupt mode. Only required when the machine supports both
      * modes.
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index b1f79ea9de..3156daf093 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -62,38 +62,46 @@ void spapr_irq_msi_reset(SpaprMachineState *spapr)
     bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr);
 }
 
-
-/*
- * XICS IRQ backend.
- */
-
-static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs,
-                                Error **errp)
+static void spapr_irq_init_device(SpaprMachineState *spapr,
+                                  SpaprIrq *irq, Error **errp)
 {
     MachineState *machine = MACHINE(spapr);
-    Object *obj;
     Error *local_err = NULL;
-    bool xics_kvm = false;
 
-    if (kvm_enabled()) {
-        if (machine_kernel_irqchip_allowed(machine) &&
-            !xics_kvm_init(spapr, &local_err)) {
-            xics_kvm = true;
-        }
-        if (machine_kernel_irqchip_required(machine) && !xics_kvm) {
+    if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
+        irq->init_kvm(spapr, &local_err);
+        if (local_err && machine_kernel_irqchip_required(machine)) {
             error_prepend(&local_err,
                           "kernel_irqchip requested but unavailable: ");
             error_propagate(errp, local_err);
             return;
         }
-        error_free(local_err);
-        local_err = NULL;
-    }
 
-    if (!xics_kvm) {
-        xics_spapr_init(spapr);
+        if (!local_err) {
+            return;
+        }
+
+        /*
+         * We failed to initialize the KVM device, fallback to
+         * emulated mode
+         */
+        error_prepend(&local_err, "kernel_irqchip allowed but unavailable: ");
+        warn_report_err(local_err);
     }
 
+    irq->init_emu(spapr, errp);
+}
+
+/*
+ * XICS IRQ backend.
+ */
+
+static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs,
+                                Error **errp)
+{
+    Object *obj;
+    Error *local_err = NULL;
+
     obj = object_new(TYPE_ICS_SIMPLE);
     object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort);
     object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
@@ -212,7 +220,13 @@ static void spapr_irq_set_irq_xics(void *opaque, int srcno, int val)
 
 static void spapr_irq_reset_xics(SpaprMachineState *spapr, Error **errp)
 {
-    /* TODO: create the KVM XICS device */
+    Error *local_err = NULL;
+
+    spapr_irq_init_device(spapr, &spapr_irq_xics, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 }
 
 static const char *spapr_irq_get_nodename_xics(SpaprMachineState *spapr)
@@ -220,6 +234,18 @@ static const char *spapr_irq_get_nodename_xics(SpaprMachineState *spapr)
     return XICS_NODENAME;
 }
 
+static void spapr_irq_init_emu_xics(SpaprMachineState *spapr, Error **errp)
+{
+    xics_spapr_init(spapr);
+}
+
+static void spapr_irq_init_kvm_xics(SpaprMachineState *spapr, Error **errp)
+{
+    if (kvm_enabled()) {
+        xics_kvm_init(spapr, errp);
+    }
+}
+
 #define SPAPR_IRQ_XICS_NR_IRQS     0x1000
 #define SPAPR_IRQ_XICS_NR_MSIS     \
     (XICS_IRQ_BASE + SPAPR_IRQ_XICS_NR_IRQS - SPAPR_IRQ_MSI)
@@ -240,6 +266,8 @@ SpaprIrq spapr_irq_xics = {
     .reset       = spapr_irq_reset_xics,
     .set_irq     = spapr_irq_set_irq_xics,
     .get_nodename = spapr_irq_get_nodename_xics,
+    .init_emu    = spapr_irq_init_emu_xics,
+    .init_kvm    = spapr_irq_init_kvm_xics,
 };
 
 /*
@@ -248,19 +276,10 @@ SpaprIrq spapr_irq_xics = {
 static void spapr_irq_init_xive(SpaprMachineState *spapr, int nr_irqs,
                                 Error **errp)
 {
-    MachineState *machine = MACHINE(spapr);
     uint32_t nr_servers = spapr_max_server_number(spapr);
     DeviceState *dev;
     int i;
 
-    /* KVM XIVE device not yet available */
-    if (kvm_enabled()) {
-        if (machine_kernel_irqchip_required(machine)) {
-            error_setg(errp, "kernel_irqchip requested. no KVM XIVE support");
-            return;
-        }
-    }
-
     dev = qdev_create(NULL, TYPE_SPAPR_XIVE);
     qdev_prop_set_uint32(dev, "nr-irqs", nr_irqs);
     /*
@@ -350,12 +369,13 @@ static void spapr_irq_cpu_intc_create_xive(SpaprMachineState *spapr,
 
 static int spapr_irq_post_load_xive(SpaprMachineState *spapr, int version_id)
 {
-    return 0;
+    return spapr_xive_post_load(spapr->xive, version_id);
 }
 
 static void spapr_irq_reset_xive(SpaprMachineState *spapr, Error **errp)
 {
     CPUState *cs;
+    Error *local_err = NULL;
 
     CPU_FOREACH(cs) {
         PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -364,6 +384,12 @@ static void spapr_irq_reset_xive(SpaprMachineState *spapr, Error **errp)
         spapr_xive_set_tctx_os_cam(spapr_cpu_state(cpu)->tctx);
     }
 
+    spapr_irq_init_device(spapr, &spapr_irq_xive, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
     /* Activate the XIVE MMIOs */
     spapr_xive_mmio_set_enabled(spapr->xive, true);
 }
@@ -372,7 +398,11 @@ static void spapr_irq_set_irq_xive(void *opaque, int srcno, int val)
 {
     SpaprMachineState *spapr = opaque;
 
-    xive_source_set_irq(&spapr->xive->source, srcno, val);
+    if (kvm_irqchip_in_kernel()) {
+        kvmppc_xive_source_set_irq(&spapr->xive->source, srcno, val);
+    } else {
+        xive_source_set_irq(&spapr->xive->source, srcno, val);
+    }
 }
 
 static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr)
@@ -380,6 +410,18 @@ static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr)
     return spapr->xive->nodename;
 }
 
+static void spapr_irq_init_emu_xive(SpaprMachineState *spapr, Error **errp)
+{
+    spapr_xive_init(spapr->xive, errp);
+}
+
+static void spapr_irq_init_kvm_xive(SpaprMachineState *spapr, Error **errp)
+{
+    if (kvm_enabled()) {
+        kvmppc_xive_connect(spapr->xive, errp);
+    }
+}
+
 /*
  * XIVE uses the full IRQ number space. Set it to 8K to be compatible
  * with XICS.
@@ -404,6 +446,8 @@ SpaprIrq spapr_irq_xive = {
     .reset       = spapr_irq_reset_xive,
     .set_irq     = spapr_irq_set_irq_xive,
     .get_nodename = spapr_irq_get_nodename_xive,
+    .init_emu    = spapr_irq_init_emu_xive,
+    .init_kvm    = spapr_irq_init_kvm_xive,
 };
 
 /*
@@ -428,14 +472,8 @@ static SpaprIrq *spapr_irq_current(SpaprMachineState *spapr)
 static void spapr_irq_init_dual(SpaprMachineState *spapr, int nr_irqs,
                                 Error **errp)
 {
-    MachineState *machine = MACHINE(spapr);
     Error *local_err = NULL;
 
-    if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
-        error_setg(errp, "No KVM support for the 'dual' machine");
-        return;
-    }
-
     spapr_irq_xics.init(spapr, spapr_irq_xics.nr_irqs, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -514,6 +552,9 @@ static int spapr_irq_post_load_dual(SpaprMachineState *spapr, int version_id)
      * defaults to XICS at startup.
      */
     if (spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        if (kvm_irqchip_in_kernel()) {
+            xics_kvm_disconnect(spapr, &error_fatal);
+        }
         spapr_irq_xive.reset(spapr, &error_fatal);
     }
 
@@ -522,12 +563,30 @@ static int spapr_irq_post_load_dual(SpaprMachineState *spapr, int version_id)
 
 static void spapr_irq_reset_dual(SpaprMachineState *spapr, Error **errp)
 {
+    Error *local_err = NULL;
+
     /*
      * Deactivate the XIVE MMIOs. The XIVE backend will reenable them
      * if selected.
      */
     spapr_xive_mmio_set_enabled(spapr->xive, false);
 
+    /* Destroy all KVM devices */
+    if (kvm_irqchip_in_kernel()) {
+        xics_kvm_disconnect(spapr, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            error_prepend(errp, "KVM XICS disconnect failed: ");
+            return;
+        }
+        kvmppc_xive_disconnect(spapr->xive, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            error_prepend(errp, "KVM XIVE disconnect failed: ");
+            return;
+        }
+    }
+
     spapr_irq_current(spapr)->reset(spapr, errp);
 }
 
@@ -565,6 +624,8 @@ SpaprIrq spapr_irq_dual = {
     .reset       = spapr_irq_reset_dual,
     .set_irq     = spapr_irq_set_irq_dual,
     .get_nodename = spapr_irq_get_nodename_dual,
+    .init_emu    = NULL, /* should not be used */
+    .init_kvm    = NULL, /* should not be used */
 };
 
 
@@ -763,6 +824,9 @@ SpaprIrq spapr_irq_xics_legacy = {
     .dt_populate = spapr_dt_xics,
     .cpu_intc_create = spapr_irq_cpu_intc_create_xics,
     .post_load   = spapr_irq_post_load_xics,
+    .reset       = spapr_irq_reset_xics,
     .set_irq     = spapr_irq_set_irq_xics,
     .get_nodename = spapr_irq_get_nodename_xics,
+    .init_emu    = spapr_irq_init_emu_xics,
+    .init_kvm    = spapr_irq_init_kvm_xics,
 };
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index ee24212765..5bc1a93271 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -177,6 +177,7 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr,
         } else {
             lpcr &= ~(LPCR_UPRT | LPCR_GTSE | LPCR_HR);
         }
+        env->spr[SPR_PSSCR] &= ~PSSCR_EC;
     }
     ppc_store_lpcr(newcpu, lpcr);
 
@@ -205,8 +206,11 @@ static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr,
 
     /* Disable Power-saving mode Exit Cause exceptions for the CPU.
      * This could deliver an interrupt on a dying CPU and crash the
-     * guest */
+     * guest.
+     * For the same reason, set PSSCR_EC.
+     */
     ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm);
+    env->spr[SPR_PSSCR] |= PSSCR_EC;
     cs->halted = 1;
     kvmppc_set_reg_ppc_online(cpu, 0);
     qemu_cpu_kick(cs);
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index e7e865ab3b..7b89ac798b 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2336,6 +2336,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
         return;
     }
 
+    if (blk_get_aio_context(s->qdev.conf.blk) != qemu_get_aio_context() &&
+        !s->qdev.hba_supports_iothread)
+    {
+        error_setg(errp, "HBA does not support iothreads");
+        return;
+    }
+
     if (dev->type == TYPE_DISK) {
         if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) {
             return;
@@ -2417,7 +2424,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
     if (!dev->conf.blk) {
         /* Anonymous BlockBackend for an empty drive. As we put it into
          * dev->conf, qdev takes care of detaching on unplug. */
-        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
+        dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
         ret = blk_attach_dev(dev->conf.blk, &dev->qdev);
         assert(ret == 0);
     }
@@ -2929,13 +2936,14 @@ static const TypeInfo scsi_disk_base_info = {
     .abstract      = true,
 };
 
-#define DEFINE_SCSI_DISK_PROPERTIES()                                \
-    DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf),               \
-    DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),         \
-    DEFINE_PROP_STRING("ver", SCSIDiskState, version),               \
-    DEFINE_PROP_STRING("serial", SCSIDiskState, serial),             \
-    DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),             \
-    DEFINE_PROP_STRING("product", SCSIDiskState, product),           \
+#define DEFINE_SCSI_DISK_PROPERTIES()                                   \
+    DEFINE_PROP_DRIVE_IOTHREAD("drive", SCSIDiskState, qdev.conf.blk),  \
+    DEFINE_BLOCK_PROPERTIES_BASE(SCSIDiskState, qdev.conf),             \
+    DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),            \
+    DEFINE_PROP_STRING("ver", SCSIDiskState, version),                  \
+    DEFINE_PROP_STRING("serial", SCSIDiskState, serial),                \
+    DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),                \
+    DEFINE_PROP_STRING("product", SCSIDiskState, product),              \
     DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id)
 
 
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 839f120256..2994f0738f 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -789,28 +789,31 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
     }
 }
 
+static void virtio_scsi_pre_hotplug(HotplugHandler *hotplug_dev,
+                                    DeviceState *dev, Error **errp)
+{
+    SCSIDevice *sd = SCSI_DEVICE(dev);
+    sd->hba_supports_iothread = true;
+}
+
 static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
                                 Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
     VirtIOSCSI *s = VIRTIO_SCSI(vdev);
     SCSIDevice *sd = SCSI_DEVICE(dev);
+    int ret;
 
     if (s->ctx && !s->dataplane_fenced) {
-        AioContext *ctx;
         if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
             return;
         }
-        ctx = blk_get_aio_context(sd->conf.blk);
-        if (ctx != s->ctx && ctx != qemu_get_aio_context()) {
-            error_setg(errp, "Cannot attach a blockdev that is using "
-                       "a different iothread");
-            return;
-        }
         virtio_scsi_acquire(s);
-        blk_set_aio_context(sd->conf.blk, s->ctx);
+        ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp);
         virtio_scsi_release(s);
-
+        if (ret < 0) {
+            return;
+        }
     }
 
     if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
@@ -839,7 +842,8 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     if (s->ctx) {
         virtio_scsi_acquire(s);
-        blk_set_aio_context(sd->conf.blk, qemu_get_aio_context());
+        /* If other users keep the BlockBackend in the iothread, that's ok */
+        blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
         virtio_scsi_release(s);
     }
 
@@ -986,6 +990,7 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data)
     vdc->reset = virtio_scsi_reset;
     vdc->start_ioeventfd = virtio_scsi_dataplane_start;
     vdc->stop_ioeventfd = virtio_scsi_dataplane_stop;
+    hc->pre_plug = virtio_scsi_pre_hotplug;
     hc->plug = virtio_scsi_hotplug;
     hc->unplug = virtio_scsi_hotunplug;
 }
diff --git a/hw/usb/core.c b/hw/usb/core.c
index 8fbd9c7d57..3ab48a1607 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -87,10 +87,10 @@ void usb_device_reset(USBDevice *dev)
     if (dev == NULL || !dev->attached) {
         return;
     }
+    usb_device_handle_reset(dev);
     dev->remote_wakeup = 0;
     dev->addr = 0;
     dev->state = USB_STATE_DEFAULT;
-    usb_device_handle_reset(dev);
 }
 
 void usb_wakeup(USBEndpoint *ep, unsigned int stream)
diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c
index 7e9339b8a8..2b64d6ef03 100644
--- a/hw/usb/dev-hub.c
+++ b/hw/usb/dev-hub.c
@@ -24,12 +24,13 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
+#include "qemu/timer.h"
 #include "trace.h"
 #include "hw/usb.h"
 #include "desc.h"
 #include "qemu/error-report.h"
 
-#define NUM_PORTS 8
+#define MAX_PORTS 8
 
 typedef struct USBHubPort {
     USBPort port;
@@ -40,7 +41,10 @@ typedef struct USBHubPort {
 typedef struct USBHubState {
     USBDevice dev;
     USBEndpoint *intr;
-    USBHubPort ports[NUM_PORTS];
+    uint32_t num_ports;
+    bool port_power;
+    QEMUTimer *port_timer;
+    USBHubPort ports[MAX_PORTS];
 } USBHubState;
 
 #define TYPE_USB_HUB "usb-hub"
@@ -109,7 +113,7 @@ static const USBDescIface desc_iface_hub = {
         {
             .bEndpointAddress      = USB_DIR_IN | 0x01,
             .bmAttributes          = USB_ENDPOINT_XFER_INT,
-            .wMaxPacketSize        = 1 + DIV_ROUND_UP(NUM_PORTS, 8),
+            .wMaxPacketSize        = 1 + DIV_ROUND_UP(MAX_PORTS, 8),
             .bInterval             = 0xff,
         },
     }
@@ -158,19 +162,71 @@ static const uint8_t qemu_hub_hub_descriptor[] =
         /* DeviceRemovable and PortPwrCtrlMask patched in later */
 };
 
+static bool usb_hub_port_change(USBHubPort *port, uint16_t status)
+{
+    bool notify = false;
+
+    if (status & 0x1f) {
+        port->wPortChange |= status;
+        notify = true;
+    }
+    return notify;
+}
+
+static bool usb_hub_port_set(USBHubPort *port, uint16_t status)
+{
+    if (port->wPortStatus & status) {
+        return false;
+    }
+    port->wPortStatus |= status;
+    return usb_hub_port_change(port, status);
+}
+
+static bool usb_hub_port_clear(USBHubPort *port, uint16_t status)
+{
+    if (!(port->wPortStatus & status)) {
+        return false;
+    }
+    port->wPortStatus &= ~status;
+    return usb_hub_port_change(port, status);
+}
+
+static bool usb_hub_port_update(USBHubPort *port)
+{
+    bool notify = false;
+
+    if (port->port.dev && port->port.dev->attached) {
+        notify = usb_hub_port_set(port, PORT_STAT_CONNECTION);
+        if (port->port.dev->speed == USB_SPEED_LOW) {
+            usb_hub_port_set(port, PORT_STAT_LOW_SPEED);
+        } else {
+            usb_hub_port_clear(port, PORT_STAT_LOW_SPEED);
+        }
+    }
+    return notify;
+}
+
+static void usb_hub_port_update_timer(void *opaque)
+{
+    USBHubState *s = opaque;
+    bool notify = false;
+    int i;
+
+    for (i = 0; i < s->num_ports; i++) {
+        notify |= usb_hub_port_update(&s->ports[i]);
+    }
+    if (notify) {
+        usb_wakeup(s->intr, 0);
+    }
+}
+
 static void usb_hub_attach(USBPort *port1)
 {
     USBHubState *s = port1->opaque;
     USBHubPort *port = &s->ports[port1->index];
 
     trace_usb_hub_attach(s->dev.addr, port1->index + 1);
-    port->wPortStatus |= PORT_STAT_CONNECTION;
-    port->wPortChange |= PORT_STAT_C_CONNECTION;
-    if (port->port.dev->speed == USB_SPEED_LOW) {
-        port->wPortStatus |= PORT_STAT_LOW_SPEED;
-    } else {
-        port->wPortStatus &= ~PORT_STAT_LOW_SPEED;
-    }
+    usb_hub_port_update(port);
     usb_wakeup(s->intr, 0);
 }
 
@@ -185,16 +241,9 @@ static void usb_hub_detach(USBPort *port1)
     /* Let upstream know the device on this port is gone */
     s->dev.port->ops->child_detach(s->dev.port, port1->dev);
 
-    port->wPortStatus &= ~PORT_STAT_CONNECTION;
-    port->wPortChange |= PORT_STAT_C_CONNECTION;
-    if (port->wPortStatus & PORT_STAT_ENABLE) {
-        port->wPortStatus &= ~PORT_STAT_ENABLE;
-        port->wPortChange |= PORT_STAT_C_ENABLE;
-    }
-    if (port->wPortStatus & PORT_STAT_SUSPEND) {
-        port->wPortStatus &= ~PORT_STAT_SUSPEND;
-        port->wPortChange |= PORT_STAT_C_SUSPEND;
-    }
+    usb_hub_port_clear(port, PORT_STAT_CONNECTION);
+    usb_hub_port_clear(port, PORT_STAT_ENABLE);
+    usb_hub_port_clear(port, PORT_STAT_SUSPEND);
     usb_wakeup(s->intr, 0);
 }
 
@@ -211,9 +260,7 @@ static void usb_hub_wakeup(USBPort *port1)
     USBHubState *s = port1->opaque;
     USBHubPort *port = &s->ports[port1->index];
 
-    if (port->wPortStatus & PORT_STAT_SUSPEND) {
-        port->wPortStatus &= ~PORT_STAT_SUSPEND;
-        port->wPortChange |= PORT_STAT_C_SUSPEND;
+    if (usb_hub_port_clear(port, PORT_STAT_SUSPEND)) {
         usb_wakeup(s->intr, 0);
     }
 }
@@ -242,7 +289,7 @@ static USBDevice *usb_hub_find_device(USBDevice *dev, uint8_t addr)
     USBDevice *downstream;
     int i;
 
-    for (i = 0; i < NUM_PORTS; i++) {
+    for (i = 0; i < s->num_ports; i++) {
         port = &s->ports[i];
         if (!(port->wPortStatus & PORT_STAT_ENABLE)) {
             continue;
@@ -262,17 +309,12 @@ static void usb_hub_handle_reset(USBDevice *dev)
     int i;
 
     trace_usb_hub_reset(s->dev.addr);
-    for (i = 0; i < NUM_PORTS; i++) {
+    for (i = 0; i < s->num_ports; i++) {
         port = s->ports + i;
-        port->wPortStatus = PORT_STAT_POWER;
+        port->wPortStatus = 0;
         port->wPortChange = 0;
-        if (port->port.dev && port->port.dev->attached) {
-            port->wPortStatus |= PORT_STAT_CONNECTION;
-            port->wPortChange |= PORT_STAT_C_CONNECTION;
-            if (port->port.dev->speed == USB_SPEED_LOW) {
-                port->wPortStatus |= PORT_STAT_LOW_SPEED;
-            }
-        }
+        usb_hub_port_set(port, PORT_STAT_POWER);
+        usb_hub_port_update(port);
     }
 }
 
@@ -287,11 +329,11 @@ static const char *feature_name(int feature)
         [PORT_POWER]         = "power",
         [PORT_LOWSPEED]      = "lowspeed",
         [PORT_HIGHSPEED]     = "highspeed",
-        [PORT_C_CONNECTION]  = "change connection",
-        [PORT_C_ENABLE]      = "change enable",
-        [PORT_C_SUSPEND]     = "change suspend",
-        [PORT_C_OVERCURRENT] = "change overcurrent",
-        [PORT_C_RESET]       = "change reset",
+        [PORT_C_CONNECTION]  = "change-connection",
+        [PORT_C_ENABLE]      = "change-enable",
+        [PORT_C_SUSPEND]     = "change-suspend",
+        [PORT_C_OVERCURRENT] = "change-overcurrent",
+        [PORT_C_RESET]       = "change-reset",
         [PORT_TEST]          = "test",
         [PORT_INDICATOR]     = "indicator",
     };
@@ -332,7 +374,7 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p,
         {
             unsigned int n = index - 1;
             USBHubPort *port;
-            if (n >= NUM_PORTS) {
+            if (n >= s->num_ports) {
                 goto fail;
             }
             port = &s->ports[n];
@@ -361,7 +403,7 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p,
             trace_usb_hub_set_port_feature(s->dev.addr, index,
                                            feature_name(value));
 
-            if (n >= NUM_PORTS) {
+            if (n >= s->num_ports) {
                 goto fail;
             }
             port = &s->ports[n];
@@ -371,15 +413,20 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p,
                 port->wPortStatus |= PORT_STAT_SUSPEND;
                 break;
             case PORT_RESET:
+                usb_hub_port_set(port, PORT_STAT_RESET);
+                usb_hub_port_clear(port, PORT_STAT_RESET);
                 if (dev && dev->attached) {
                     usb_device_reset(dev);
-                    port->wPortChange |= PORT_STAT_C_RESET;
-                    /* set enable bit */
-                    port->wPortStatus |= PORT_STAT_ENABLE;
-                    usb_wakeup(s->intr, 0);
+                    usb_hub_port_set(port, PORT_STAT_ENABLE);
                 }
+                usb_wakeup(s->intr, 0);
                 break;
             case PORT_POWER:
+                if (s->port_power) {
+                    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+                    usb_hub_port_set(port, PORT_STAT_POWER);
+                    timer_mod(s->port_timer, now + 5000000); /* 5 ms */
+                }
                 break;
             default:
                 goto fail;
@@ -394,7 +441,7 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p,
             trace_usb_hub_clear_port_feature(s->dev.addr, index,
                                              feature_name(value));
 
-            if (n >= NUM_PORTS) {
+            if (n >= s->num_ports) {
                 goto fail;
             }
             port = &s->ports[n];
@@ -406,20 +453,7 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p,
                 port->wPortChange &= ~PORT_STAT_C_ENABLE;
                 break;
             case PORT_SUSPEND:
-                if (port->wPortStatus & PORT_STAT_SUSPEND) {
-                    port->wPortStatus &= ~PORT_STAT_SUSPEND;
-
-                    /*
-                     * USB Spec rev2.0 11.24.2.7.2.3 C_PORT_SUSPEND
-                     * "This bit is set on the following transitions:
-                     *  - On transition from the Resuming state to the
-                     *    SendEOP [sic] state"
-                     *
-                     * Note that this includes both remote wake-up and
-                     * explicit ClearPortFeature(PORT_SUSPEND).
-                     */
-                    port->wPortChange |= PORT_STAT_C_SUSPEND;
-                }
+                usb_hub_port_clear(port, PORT_STAT_SUSPEND);
                 break;
             case PORT_C_SUSPEND:
                 port->wPortChange &= ~PORT_STAT_C_SUSPEND;
@@ -433,6 +467,14 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p,
             case PORT_C_RESET:
                 port->wPortChange &= ~PORT_STAT_C_RESET;
                 break;
+            case PORT_POWER:
+                if (s->port_power) {
+                    usb_hub_port_clear(port, PORT_STAT_POWER);
+                    usb_hub_port_clear(port, PORT_STAT_CONNECTION);
+                    usb_hub_port_clear(port, PORT_STAT_ENABLE);
+                    usb_hub_port_clear(port, PORT_STAT_SUSPEND);
+                    port->wPortChange = 0;
+                }
             default:
                 goto fail;
             }
@@ -443,17 +485,22 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p,
             unsigned int n, limit, var_hub_size = 0;
             memcpy(data, qemu_hub_hub_descriptor,
                    sizeof(qemu_hub_hub_descriptor));
-            data[2] = NUM_PORTS;
+            data[2] = s->num_ports;
+
+            if (s->port_power) {
+                data[3] &= ~0x03;
+                data[3] |= 0x01;
+            }
 
             /* fill DeviceRemovable bits */
-            limit = DIV_ROUND_UP(NUM_PORTS + 1, 8) + 7;
+            limit = DIV_ROUND_UP(s->num_ports + 1, 8) + 7;
             for (n = 7; n < limit; n++) {
                 data[n] = 0x00;
                 var_hub_size++;
             }
 
             /* fill PortPwrCtrlMask bits */
-            limit = limit + DIV_ROUND_UP(NUM_PORTS, 8);
+            limit = limit + DIV_ROUND_UP(s->num_ports, 8);
             for (;n < limit; n++) {
                 data[n] = 0xff;
                 var_hub_size++;
@@ -481,7 +528,7 @@ static void usb_hub_handle_data(USBDevice *dev, USBPacket *p)
             unsigned int status;
             uint8_t buf[4];
             int i, n;
-            n = DIV_ROUND_UP(NUM_PORTS + 1, 8);
+            n = DIV_ROUND_UP(s->num_ports + 1, 8);
             if (p->iov.size == 1) { /* FreeBSD workaround */
                 n = 1;
             } else if (n > p->iov.size) {
@@ -489,7 +536,7 @@ static void usb_hub_handle_data(USBDevice *dev, USBPacket *p)
                 return;
             }
             status = 0;
-            for(i = 0; i < NUM_PORTS; i++) {
+            for (i = 0; i < s->num_ports; i++) {
                 port = &s->ports[i];
                 if (port->wPortChange)
                     status |= (1 << (i + 1));
@@ -520,10 +567,13 @@ static void usb_hub_unrealize(USBDevice *dev, Error **errp)
     USBHubState *s = (USBHubState *)dev;
     int i;
 
-    for (i = 0; i < NUM_PORTS; i++) {
+    for (i = 0; i < s->num_ports; i++) {
         usb_unregister_port(usb_bus_from_device(dev),
                             &s->ports[i].port);
     }
+
+    timer_del(s->port_timer);
+    timer_free(s->port_timer);
 }
 
 static USBPortOps usb_hub_port_ops = {
@@ -540,6 +590,12 @@ static void usb_hub_realize(USBDevice *dev, Error **errp)
     USBHubPort *port;
     int i;
 
+    if (s->num_ports < 1 || s->num_ports > MAX_PORTS) {
+        error_setg(errp, "num_ports (%d) out of range (1..%d)",
+                   s->num_ports, MAX_PORTS);
+        return;
+    }
+
     if (dev->port->hubcount == 5) {
         error_setg(errp, "usb hub chain too deep");
         return;
@@ -547,8 +603,10 @@ static void usb_hub_realize(USBDevice *dev, Error **errp)
 
     usb_desc_create_serial(dev);
     usb_desc_init(dev);
+    s->port_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                 usb_hub_port_update_timer, s);
     s->intr = usb_ep_get(dev, USB_TOKEN_IN, 1);
-    for (i = 0; i < NUM_PORTS; i++) {
+    for (i = 0; i < s->num_ports; i++) {
         port = &s->ports[i];
         usb_register_port(usb_bus_from_device(dev),
                           &port->port, s, i, &usb_hub_port_ops,
@@ -569,18 +627,46 @@ static const VMStateDescription vmstate_usb_hub_port = {
     }
 };
 
+static bool usb_hub_port_timer_needed(void *opaque)
+{
+    USBHubState *s = opaque;
+
+    return s->port_power;
+}
+
+static const VMStateDescription vmstate_usb_hub_port_timer = {
+    .name = "usb-hub/port-timer",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = usb_hub_port_timer_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_TIMER_PTR(port_timer, USBHubState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_usb_hub = {
     .name = "usb-hub",
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_USB_DEVICE(dev, USBHubState),
-        VMSTATE_STRUCT_ARRAY(ports, USBHubState, NUM_PORTS, 0,
+        VMSTATE_STRUCT_ARRAY(ports, USBHubState, MAX_PORTS, 0,
                              vmstate_usb_hub_port, USBHubPort),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_usb_hub_port_timer,
+        NULL
     }
 };
 
+static Property usb_hub_properties[] = {
+    DEFINE_PROP_UINT32("ports", USBHubState, num_ports, 8),
+    DEFINE_PROP_BOOL("port-power", USBHubState, port_power, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void usb_hub_class_initfn(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -597,6 +683,7 @@ static void usb_hub_class_initfn(ObjectClass *klass, void *data)
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->fw_name = "hub";
     dc->vmsd = &vmstate_usb_hub;
+    dc->props = usb_hub_properties;
 }
 
 static const TypeInfo hub_info = {
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index 67b7465915..4f765d7f9a 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -1225,19 +1225,21 @@ static void usb_host_set_address(USBHostDevice *s, int addr)
 
 static void usb_host_set_config(USBHostDevice *s, int config, USBPacket *p)
 {
-    int rc;
+    int rc = 0;
 
     trace_usb_host_set_config(s->bus_num, s->addr, config);
 
     usb_host_release_interfaces(s);
-    rc = libusb_set_configuration(s->dh, config);
-    if (rc != 0) {
-        usb_host_libusb_error("libusb_set_configuration", rc);
-        p->status = USB_RET_STALL;
-        if (rc == LIBUSB_ERROR_NO_DEVICE) {
-            usb_host_nodev(s);
+    if (s->ddesc.bNumConfigurations != 1) {
+        rc = libusb_set_configuration(s->dh, config);
+        if (rc != 0) {
+            usb_host_libusb_error("libusb_set_configuration", rc);
+            p->status = USB_RET_STALL;
+            if (rc == LIBUSB_ERROR_NO_DEVICE) {
+                usb_host_nodev(s);
+            }
+            return;
         }
-        return;
     }
     p->status = usb_host_claim_interfaces(s, config);
     if (p->status != USB_RET_SUCCESS) {
@@ -1459,6 +1461,9 @@ static void usb_host_handle_reset(USBDevice *udev)
     if (!s->allow_guest_reset) {
         return;
     }
+    if (udev->addr == 0) {
+        return;
+    }
 
     trace_usb_host_reset(s->bus_num, s->addr);
 
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 553319c7ac..4ca5b2551e 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -96,6 +96,7 @@ typedef enum VhostUserRequest {
     VHOST_USER_POSTCOPY_END     = 30,
     VHOST_USER_GET_INFLIGHT_FD = 31,
     VHOST_USER_SET_INFLIGHT_FD = 32,
+    VHOST_USER_GPU_SET_SOCKET = 33,
     VHOST_USER_MAX
 } VhostUserRequest;
 
@@ -353,6 +354,16 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
     return 0;
 }
 
+int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
+{
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_GPU_SET_SOCKET,
+        .hdr.flags = VHOST_USER_VERSION,
+    };
+
+    return vhost_user_write(dev, &msg, &fd, 1);
+}
+
 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
                                    struct vhost_log *log)
 {
diff --git a/include/block/block.h b/include/block/block.h
index 9b083e2bca..f9415ed740 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -156,10 +156,15 @@ typedef struct HDGeometry {
  * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
  *                 layer, set by block layer
  *
- * Internal flag:
+ * Internal flags:
  * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
  *                 that the block layer recompute the answer from the returned
  *                 BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
+ * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
+ *                     zeroes in file child of current block node inside
+ *                     returned region. Only valid together with both
+ *                     BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
+ *                     appear with BDRV_BLOCK_ZERO.
  *
  * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
  * host offset within the returned BDS that is allocated for the
@@ -184,6 +189,7 @@ typedef struct HDGeometry {
 #define BDRV_BLOCK_RAW          0x08
 #define BDRV_BLOCK_ALLOCATED    0x10
 #define BDRV_BLOCK_EOF          0x20
+#define BDRV_BLOCK_RECURSE      0x40
 #define BDRV_BLOCK_OFFSET_MASK  BDRV_SECTOR_MASK
 
 typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
@@ -428,8 +434,8 @@ void bdrv_drain_all(void);
     AIO_WAIT_WHILE(bdrv_get_aio_context(bs_),              \
                    cond); })
 
-int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes);
-int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes);
+int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
 int bdrv_has_zero_init_1(BlockDriverState *bs);
 int bdrv_has_zero_init(BlockDriverState *bs);
 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
@@ -577,15 +583,6 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs);
  */
 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
 
-/**
- * bdrv_set_aio_context:
- *
- * Changes the #AioContext used for fd handlers, timers, and BHs by this
- * BlockDriverState and all its children.
- *
- * This function must be called with iothread lock held.
- */
-void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context);
 void bdrv_set_aio_context_ignore(BlockDriverState *bs,
                                  AioContext *new_context, GSList **ignore);
 int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1eebc7c8f3..06df2bda1b 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1163,6 +1163,7 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr);
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildRole *child_role,
+                                  AioContext *ctx,
                                   uint64_t perm, uint64_t shared_perm,
                                   void *opaque, Error **errp);
 void bdrv_root_unref_child(BdrvChild *child);
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 849a6f3fa3..3ec8efcc43 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -581,6 +581,7 @@ enum NvmeIdCtrlOncs {
     NVME_ONCS_WRITE_ZEROS   = 1 << 3,
     NVME_ONCS_FEATURES      = 1 << 4,
     NVME_ONCS_RESRVATIONS   = 1 << 5,
+    NVME_ONCS_TIMESTAMP     = 1 << 6,
 };
 
 #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
@@ -622,6 +623,7 @@ enum NvmeFeatureIds {
     NVME_INTERRUPT_VECTOR_CONF      = 0x9,
     NVME_WRITE_ATOMICITY            = 0xa,
     NVME_ASYNCHRONOUS_EVENT_CONF    = 0xb,
+    NVME_TIMESTAMP                  = 0xe,
     NVME_SOFTWARE_PROGRESS_MARKER   = 0x80
 };
 
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
index ba223dd1f1..0cb7cc74a2 100644
--- a/include/block/raw-aio.h
+++ b/include/block/raw-aio.h
@@ -50,9 +50,6 @@ LinuxAioState *laio_init(Error **errp);
 void laio_cleanup(LinuxAioState *s);
 int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
                                 uint64_t offset, QEMUIOVector *qiov, int type);
-BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type);
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
 void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9144a47f57..e6140e8a04 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1255,23 +1255,6 @@ void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize,
 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client);
 
 /**
- * memory_region_get_dirty: Check whether a range of bytes is dirty
- *                          for a specified client.
- *
- * Checks whether a range of bytes has been written to since the last
- * call to memory_region_reset_dirty() with the same @client.  Dirty logging
- * must be enabled.
- *
- * @mr: the memory region being queried.
- * @addr: the address (relative to the start of the region) being queried.
- * @size: the size of the range being queried.
- * @client: the user of the logging information; %DIRTY_MEMORY_MIGRATION or
- *          %DIRTY_MEMORY_VGA.
- */
-bool memory_region_get_dirty(MemoryRegion *mr, hwaddr addr,
-                             hwaddr size, unsigned client);
-
-/**
  * memory_region_set_dirty: Mark a range of bytes as dirty in a memory region.
  *
  * Marks a range of bytes as dirty, after it has been dirtied outside
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index d06f25aa0f..607539057a 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -45,8 +45,7 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
     return exp;
 }
 
-#define DEFINE_BLOCK_PROPERTIES(_state, _conf)                          \
-    DEFINE_PROP_DRIVE("drive", _state, _conf.blk),                      \
+#define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf)                     \
     DEFINE_PROP_BLOCKSIZE("logical_block_size", _state,                 \
                           _conf.logical_block_size),                    \
     DEFINE_PROP_BLOCKSIZE("physical_block_size", _state,                \
@@ -59,6 +58,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
                             ON_OFF_AUTO_AUTO), \
     DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false)
 
+#define DEFINE_BLOCK_PROPERTIES(_state, _conf)                          \
+    DEFINE_PROP_DRIVE("drive", _state, _conf.blk),                      \
+    DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf)
+
 #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)      \
     DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
     DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0), \
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 6f7916f88f..6ff02bf3e4 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -292,6 +292,9 @@ struct MachineState {
     } \
     type_init(machine_initfn##_register_types)
 
+extern GlobalProperty hw_compat_4_0_1[];
+extern const size_t hw_compat_4_0_1_len;
+
 extern GlobalProperty hw_compat_4_0[];
 extern const size_t hw_compat_4_0_len;
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 43df7230a2..5d5636241e 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -293,6 +293,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
 int e820_get_num_entries(void);
 bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
+extern GlobalProperty pc_compat_4_0_1[];
+extern const size_t pc_compat_4_0_1_len;
+
 extern GlobalProperty pc_compat_4_0[];
 extern const size_t pc_compat_4_0_len;
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7e32f309c2..4f5becf1f3 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -119,6 +119,7 @@ struct SpaprMachineClass {
     bool pre_2_10_has_unused_icps;
     bool legacy_irq_allocation;
     bool broken_host_serial_model; /* present real host info to the guest */
+    bool pre_4_1_migration; /* don't migrate hpt-max-page-size */
 
     void (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
                           uint64_t *buid, hwaddr *pio, 
@@ -849,6 +850,7 @@ extern const VMStateDescription vmstate_spapr_cap_dfp;
 extern const VMStateDescription vmstate_spapr_cap_cfpc;
 extern const VMStateDescription vmstate_spapr_cap_sbbc;
 extern const VMStateDescription vmstate_spapr_cap_ibs;
+extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
index b855f74e44..14cab73c9c 100644
--- a/include/hw/ppc/spapr_irq.h
+++ b/include/hw/ppc/spapr_irq.h
@@ -48,6 +48,8 @@ typedef struct SpaprIrq {
     void (*reset)(SpaprMachineState *spapr, Error **errp);
     void (*set_irq)(void *opaque, int srcno, int val);
     const char *(*get_nodename)(SpaprMachineState *spapr);
+    void (*init_emu)(SpaprMachineState *spapr, Error **errp);
+    void (*init_kvm)(SpaprMachineState *spapr, Error **errp);
 } SpaprIrq;
 
 extern SpaprIrq spapr_irq_xics;
diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index fc3e9652f9..b26befcf6b 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -38,16 +38,55 @@ typedef struct SpaprXive {
     /* TIMA mapping address */
     hwaddr        tm_base;
     MemoryRegion  tm_mmio;
+
+    /* KVM support */
+    int           fd;
+    void          *tm_mmap;
+    VMChangeStateEntry *change;
 } SpaprXive;
 
+/*
+ * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
+ * to the controller block id value. It can nevertheless be changed
+ * for testing purpose.
+ */
+#define SPAPR_XIVE_BLOCK_ID 0x0
+
 bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
 bool spapr_xive_irq_free(SpaprXive *xive, uint32_t lisn);
 void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon);
+int spapr_xive_post_load(SpaprXive *xive, int version_id);
 
 void spapr_xive_hcall_init(SpaprMachineState *spapr);
 void spapr_dt_xive(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt,
                    uint32_t phandle);
 void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx);
 void spapr_xive_mmio_set_enabled(SpaprXive *xive, bool enable);
+void spapr_xive_map_mmio(SpaprXive *xive);
+
+int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
+                             uint32_t *out_server, uint8_t *out_prio);
+void spapr_xive_init(SpaprXive *xive, Error **errp);
+
+/*
+ * KVM XIVE device helpers
+ */
+void kvmppc_xive_connect(SpaprXive *xive, Error **errp);
+void kvmppc_xive_disconnect(SpaprXive *xive, Error **errp);
+void kvmppc_xive_reset(SpaprXive *xive, Error **errp);
+void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
+                                   Error **errp);
+void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp);
+uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+                            uint64_t data, bool write);
+void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                 uint32_t end_idx, XiveEND *end,
+                                 Error **errp);
+void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                 uint32_t end_idx, XiveEND *end,
+                                 Error **errp);
+void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp);
+int kvmppc_xive_pre_save(SpaprXive *xive);
+int kvmppc_xive_post_load(SpaprXive *xive, int version_id);
 
 #endif /* PPC_SPAPR_XIVE_H */
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index eb65ad7e43..d6f8e4c4c2 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -119,6 +119,7 @@ struct ICSState {
     uint32_t offset;
     ICSIRQState *irqs;
     XICSFabric *xics;
+    bool init; /* sPAPR ICS device initialized */
 };
 
 #define ICS_PROP_XICS "xics"
diff --git a/include/hw/ppc/xics_spapr.h b/include/hw/ppc/xics_spapr.h
index 15a8dcff66..2476b540ed 100644
--- a/include/hw/ppc/xics_spapr.h
+++ b/include/hw/ppc/xics_spapr.h
@@ -34,6 +34,7 @@
 void spapr_dt_xics(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt,
                    uint32_t phandle);
 int xics_kvm_init(SpaprMachineState *spapr, Error **errp);
+void xics_kvm_disconnect(SpaprMachineState *spapr, Error **errp);
 void xics_spapr_init(SpaprMachineState *spapr);
 
 #endif /* XICS_SPAPR_H */
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index c4f27742ca..d872f96d1a 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -140,6 +140,7 @@
 #ifndef PPC_XIVE_H
 #define PPC_XIVE_H
 
+#include "sysemu/kvm.h"
 #include "hw/qdev-core.h"
 #include "hw/sysbus.h"
 #include "hw/ppc/xive_regs.h"
@@ -194,6 +195,9 @@ typedef struct XiveSource {
     uint32_t        esb_shift;
     MemoryRegion    esb_mmio;
 
+    /* KVM support */
+    void            *esb_mmap;
+
     XiveNotifier    *xive;
 } XiveSource;
 
@@ -423,4 +427,14 @@ static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx)
     return (nvt_blk << 19) | nvt_idx;
 }
 
+/*
+ * KVM XIVE device helpers
+ */
+
+void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
+void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
+void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
+void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
+void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp);
+
 #endif /* PPC_XIVE_H */
diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
index bf36678a24..1a8c5b5e64 100644
--- a/include/hw/ppc/xive_regs.h
+++ b/include/hw/ppc/xive_regs.h
@@ -208,6 +208,12 @@ typedef struct XiveEND {
 #define xive_end_is_backlog(end)  (be32_to_cpu((end)->w0) & END_W0_BACKLOG)
 #define xive_end_is_escalate(end) (be32_to_cpu((end)->w0) & END_W0_ESCALATE_CTL)
 
+static inline uint64_t xive_end_qaddr(XiveEND *end)
+{
+    return ((uint64_t) be32_to_cpu(end->w2) & 0x0fffffff) << 32 |
+        be32_to_cpu(end->w3);
+}
+
 /* Notification Virtual Target (NVT) */
 typedef struct XiveNVT {
         uint32_t        w0;
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index b6758c852e..1eae5ab056 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -28,6 +28,7 @@ extern const PropertyInfo qdev_prop_blockdev_on_error;
 extern const PropertyInfo qdev_prop_bios_chs_trans;
 extern const PropertyInfo qdev_prop_fdc_drive_type;
 extern const PropertyInfo qdev_prop_drive;
+extern const PropertyInfo qdev_prop_drive_iothread;
 extern const PropertyInfo qdev_prop_netdev;
 extern const PropertyInfo qdev_prop_pci_devfn;
 extern const PropertyInfo qdev_prop_blocksize;
@@ -198,6 +199,8 @@ extern const PropertyInfo qdev_prop_pcie_link_width;
     DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, NICPeers)
 #define DEFINE_PROP_DRIVE(_n, _s, _f) \
     DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockBackend *)
+#define DEFINE_PROP_DRIVE_IOTHREAD(_n, _s, _f) \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_drive_iothread, BlockBackend *)
 #define DEFINE_PROP_MACADDR(_n, _s, _f)         \
     DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr)
 #define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index acef25faa4..426566a5c6 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -88,6 +88,7 @@ struct SCSIDevice
     int scsi_version;
     int default_scsi_version;
     bool needs_vpd_bl_emulation;
+    bool hba_supports_iothread;
 };
 
 extern const VMStateDescription vmstate_scsi_device;
diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h
index 1aedcf05c9..4c668fbbdc 100644
--- a/include/hw/sysbus.h
+++ b/include/hw/sysbus.h
@@ -89,6 +89,7 @@ qemu_irq sysbus_get_connected_irq(SysBusDevice *dev, int n);
 void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr);
 void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
                              int priority);
+void sysbus_mmio_unmap(SysBusDevice *dev, int n);
 void sysbus_add_io(SysBusDevice *dev, hwaddr addr,
                    MemoryRegion *mem);
 MemoryRegion *sysbus_address_space(SysBusDevice *dev);
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index d6632a18e6..6f6670783f 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -170,4 +170,6 @@ int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev,
 int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
                                           struct vhost_iotlb_msg *imsg);
 
+int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd);
+
 #endif /* VHOST_BACKEND_H */
diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h
new file mode 100644
index 0000000000..38d12160f6
--- /dev/null
+++ b/include/hw/virtio/virtio-gpu-bswap.h
@@ -0,0 +1,61 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_GPU_BSWAP_H
+#define HW_VIRTIO_GPU_BSWAP_H
+
+#include "qemu/bswap.h"
+
+static inline void
+virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr)
+{
+    le32_to_cpus(&hdr->type);
+    le32_to_cpus(&hdr->flags);
+    le64_to_cpus(&hdr->fence_id);
+    le32_to_cpus(&hdr->ctx_id);
+    le32_to_cpus(&hdr->padding);
+}
+
+static inline void
+virtio_gpu_bswap_32(void *ptr, size_t size)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+
+    size_t i;
+    struct virtio_gpu_ctrl_hdr *hdr = (struct virtio_gpu_ctrl_hdr *) ptr;
+
+    virtio_gpu_ctrl_hdr_bswap(hdr);
+
+    i = sizeof(struct virtio_gpu_ctrl_hdr);
+    while (i < size) {
+        le32_to_cpus((uint32_t *)(ptr + i));
+        i = i + sizeof(uint32_t);
+    }
+
+#endif
+}
+
+static inline void
+virtio_gpu_t2d_bswap(struct virtio_gpu_transfer_to_host_2d *t2d)
+{
+    virtio_gpu_ctrl_hdr_bswap(&t2d->hdr);
+    le32_to_cpus(&t2d->r.x);
+    le32_to_cpus(&t2d->r.y);
+    le32_to_cpus(&t2d->r.width);
+    le32_to_cpus(&t2d->r.height);
+    le64_to_cpus(&t2d->offset);
+    le32_to_cpus(&t2d->resource_id);
+    le32_to_cpus(&t2d->padding);
+}
+
+#endif
diff --git a/include/hw/virtio/virtio-gpu-pci.h b/include/hw/virtio/virtio-gpu-pci.h
new file mode 100644
index 0000000000..2f69b5a9cc
--- /dev/null
+++ b/include/hw/virtio/virtio-gpu-pci.h
@@ -0,0 +1,40 @@
+/*
+ * Virtio GPU PCI Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_GPU_PCI_H
+#define HW_VIRTIO_GPU_PCI_H
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-gpu.h"
+
+typedef struct VirtIOGPUPCIBase VirtIOGPUPCIBase;
+
+/*
+ * virtio-gpu-pci-base: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_GPU_PCI_BASE "virtio-gpu-pci-base"
+#define VIRTIO_GPU_PCI_BASE(obj)                                    \
+    OBJECT_CHECK(VirtIOGPUPCIBase, (obj), TYPE_VIRTIO_GPU_PCI_BASE)
+
+struct VirtIOGPUPCIBase {
+    VirtIOPCIProxy parent_obj;
+    VirtIOGPUBase *vgpu;
+};
+
+/* to share between PCI and VGA */
+#define DEFINE_VIRTIO_GPU_PCI_PROPERTIES(_state)                \
+    DEFINE_PROP_BIT("ioeventfd", _state, flags,                 \
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false),  \
+        DEFINE_PROP_UINT32("vectors", _state, nvectors, 3)
+
+#endif /* HW_VIRTIO_GPU_PCI_H */
diff --git a/include/hw/virtio/virtio-gpu-pixman.h b/include/hw/virtio/virtio-gpu-pixman.h
new file mode 100644
index 0000000000..4dba782758
--- /dev/null
+++ b/include/hw/virtio/virtio-gpu-pixman.h
@@ -0,0 +1,45 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_GPU_PIXMAN_H
+#define HW_VIRTIO_GPU_PIXMAN_H
+
+#include "ui/qemu-pixman.h"
+#include "standard-headers/linux/virtio_gpu.h"
+
+static inline pixman_format_code_t
+virtio_gpu_get_pixman_format(uint32_t virtio_gpu_format)
+{
+    switch (virtio_gpu_format) {
+    case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
+        return PIXMAN_BE_b8g8r8x8;
+    case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
+        return PIXMAN_BE_b8g8r8a8;
+    case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
+        return PIXMAN_BE_x8r8g8b8;
+    case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
+        return PIXMAN_BE_a8r8g8b8;
+    case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
+        return PIXMAN_BE_r8g8b8x8;
+    case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
+        return PIXMAN_BE_r8g8b8a8;
+    case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
+        return PIXMAN_BE_x8b8g8r8;
+    case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
+        return PIXMAN_BE_a8b8g8r8;
+    default:
+        return 0;
+    }
+}
+
+#endif
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 60425c5d58..8ecac1987a 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -19,13 +19,24 @@
 #include "ui/console.h"
 #include "hw/virtio/virtio.h"
 #include "qemu/log.h"
+#include "sysemu/vhost-user-backend.h"
 
 #include "standard-headers/linux/virtio_gpu.h"
 
+#define TYPE_VIRTIO_GPU_BASE "virtio-gpu-base"
+#define VIRTIO_GPU_BASE(obj)                                                \
+    OBJECT_CHECK(VirtIOGPUBase, (obj), TYPE_VIRTIO_GPU_BASE)
+#define VIRTIO_GPU_BASE_GET_CLASS(obj)                                      \
+    OBJECT_GET_CLASS(VirtIOGPUBaseClass, obj, TYPE_VIRTIO_GPU_BASE)
+#define VIRTIO_GPU_BASE_CLASS(klass)                                        \
+    OBJECT_CLASS_CHECK(VirtIOGPUBaseClass, klass, TYPE_VIRTIO_GPU_BASE)
+
 #define TYPE_VIRTIO_GPU "virtio-gpu-device"
 #define VIRTIO_GPU(obj)                                        \
         OBJECT_CHECK(VirtIOGPU, (obj), TYPE_VIRTIO_GPU)
 
+#define TYPE_VHOST_USER_GPU "vhost-user-gpu"
+
 #define VIRTIO_ID_GPU 16
 
 struct virtio_gpu_simple_resource {
@@ -58,7 +69,7 @@ struct virtio_gpu_requested_state {
     int x, y;
 };
 
-enum virtio_gpu_conf_flags {
+enum virtio_gpu_base_conf_flags {
     VIRTIO_GPU_FLAG_VIRGL_ENABLED = 1,
     VIRTIO_GPU_FLAG_STATS_ENABLED,
     VIRTIO_GPU_FLAG_EDID_ENABLED,
@@ -71,8 +82,7 @@ enum virtio_gpu_conf_flags {
 #define virtio_gpu_edid_enabled(_cfg) \
     (_cfg.flags & (1 << VIRTIO_GPU_FLAG_EDID_ENABLED))
 
-struct virtio_gpu_conf {
-    uint64_t max_hostmem;
+struct virtio_gpu_base_conf {
     uint32_t max_outputs;
     uint32_t flags;
     uint32_t xres;
@@ -88,31 +98,55 @@ struct virtio_gpu_ctrl_command {
     QTAILQ_ENTRY(virtio_gpu_ctrl_command) next;
 };
 
-typedef struct VirtIOGPU {
+typedef struct VirtIOGPUBase {
     VirtIODevice parent_obj;
 
-    QEMUBH *ctrl_bh;
-    QEMUBH *cursor_bh;
+    Error *migration_blocker;
+
+    struct virtio_gpu_base_conf conf;
+    struct virtio_gpu_config virtio_config;
+
+    bool use_virgl_renderer;
+    int renderer_blocked;
+    int enable;
+
+    struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS];
+
+    int enabled_output_bitmask;
+    struct virtio_gpu_requested_state req_state[VIRTIO_GPU_MAX_SCANOUTS];
+} VirtIOGPUBase;
+
+typedef struct VirtIOGPUBaseClass {
+    VirtioDeviceClass parent;
+
+    void (*gl_unblock)(VirtIOGPUBase *g);
+} VirtIOGPUBaseClass;
+
+#define VIRTIO_GPU_BASE_PROPERTIES(_state, _conf)                       \
+    DEFINE_PROP_UINT32("max_outputs", _state, _conf.max_outputs, 1),    \
+    DEFINE_PROP_BIT("edid", _state, _conf.flags, \
+                    VIRTIO_GPU_FLAG_EDID_ENABLED, false), \
+    DEFINE_PROP_UINT32("xres", _state, _conf.xres, 1024), \
+    DEFINE_PROP_UINT32("yres", _state, _conf.yres, 768)
+
+typedef struct VirtIOGPU {
+    VirtIOGPUBase parent_obj;
+
+    uint64_t conf_max_hostmem;
+
     VirtQueue *ctrl_vq;
     VirtQueue *cursor_vq;
 
-    int enable;
+    QEMUBH *ctrl_bh;
+    QEMUBH *cursor_bh;
 
     QTAILQ_HEAD(, virtio_gpu_simple_resource) reslist;
     QTAILQ_HEAD(, virtio_gpu_ctrl_command) cmdq;
     QTAILQ_HEAD(, virtio_gpu_ctrl_command) fenceq;
 
-    struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS];
-    struct virtio_gpu_requested_state req_state[VIRTIO_GPU_MAX_SCANOUTS];
-
-    struct virtio_gpu_conf conf;
     uint64_t hostmem;
-    int enabled_output_bitmask;
-    struct virtio_gpu_config virtio_config;
 
-    bool use_virgl_renderer;
     bool renderer_inited;
-    int renderer_blocked;
     bool renderer_reset;
     QEMUTimer *fence_poll;
     QEMUTimer *print_stats;
@@ -124,17 +158,19 @@ typedef struct VirtIOGPU {
         uint32_t req_3d;
         uint32_t bytes_3d;
     } stats;
-
-    Error *migration_blocker;
 } VirtIOGPU;
 
-extern const GraphicHwOps virtio_gpu_ops;
+typedef struct VhostUserGPU {
+    VirtIOGPUBase parent_obj;
+
+    VhostUserBackend *vhost;
+    int vhost_gpu_fd; /* closed by the chardev */
+    CharBackend vhost_chr;
+    QemuDmaBuf dmabuf[VIRTIO_GPU_MAX_SCANOUTS];
+    bool backend_blocked;
+} VhostUserGPU;
 
-/* to share between PCI and VGA */
-#define DEFINE_VIRTIO_GPU_PCI_PROPERTIES(_state)               \
-    DEFINE_PROP_BIT("ioeventfd", _state, flags,                \
-                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), \
-    DEFINE_PROP_UINT32("vectors", _state, nvectors, 3)
+extern const GraphicHwOps virtio_gpu_ops;
 
 #define VIRTIO_GPU_FILL_CMD(out) do {                                   \
         size_t s;                                                       \
@@ -148,6 +184,15 @@ extern const GraphicHwOps virtio_gpu_ops;
         }                                                               \
     } while (0)
 
+/* virtio-gpu-base.c */
+bool virtio_gpu_base_device_realize(DeviceState *qdev,
+                                    VirtIOHandleOutput ctrl_cb,
+                                    VirtIOHandleOutput cursor_cb,
+                                    Error **errp);
+void virtio_gpu_base_reset(VirtIOGPUBase *g);
+void virtio_gpu_base_fill_display_info(VirtIOGPUBase *g,
+                        struct virtio_gpu_resp_display_info *dpy_info);
+
 /* virtio-gpu.c */
 void virtio_gpu_ctrl_response(VirtIOGPU *g,
                               struct virtio_gpu_ctrl_command *cmd,
@@ -175,4 +220,5 @@ void virtio_gpu_virgl_fence_poll(VirtIOGPU *g);
 void virtio_gpu_virgl_reset(VirtIOGPU *g);
 int virtio_gpu_virgl_init(VirtIOGPU *g);
 int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
+
 #endif
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 938de34fe9..733c4957eb 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -76,7 +76,7 @@ typedef struct BlockBackendPublic {
     ThrottleGroupMember throttle_group_member;
 } BlockBackendPublic;
 
-BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm);
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
 BlockBackend *blk_new_open(const char *filename, const char *reference,
                            QDict *options, int flags, Error **errp);
 int blk_get_refcnt(BlockBackend *blk);
@@ -208,7 +208,8 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
 void blk_op_block_all(BlockBackend *blk, Error *reason);
 void blk_op_unblock_all(BlockBackend *blk, Error *reason);
 AioContext *blk_get_aio_context(BlockBackend *blk);
-void blk_set_aio_context(BlockBackend *blk, AioContext *new_context);
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                        Error **errp);
 void blk_add_aio_context_notifier(BlockBackend *blk,
         void (*attached_aio_context)(AioContext *new_context, void *opaque),
         void (*detach_aio_context)(void *opaque), void *opaque);
diff --git a/memory.c b/memory.c
index 3071c4bdad..0920c105aa 100644
--- a/memory.c
+++ b/memory.c
@@ -2027,14 +2027,6 @@ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
     memory_region_transaction_commit();
 }
 
-bool memory_region_get_dirty(MemoryRegion *mr, hwaddr addr,
-                             hwaddr size, unsigned client)
-{
-    assert(mr->ram_block);
-    return cpu_physical_memory_get_dirty(memory_region_get_ram_addr(mr) + addr,
-                                         size, client);
-}
-
 void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
                              hwaddr size)
 {
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index d1bb863cb6..4a896a09eb 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -273,7 +273,6 @@ static int init_dirty_bitmap_migration(void)
     BlockDriverState *bs;
     BdrvDirtyBitmap *bitmap;
     DirtyBitmapMigBitmapState *dbms;
-    BdrvNextIterator it;
     Error *local_err = NULL;
 
     dirty_bitmap_mig_state.bulk_completed = false;
@@ -281,13 +280,8 @@ static int init_dirty_bitmap_migration(void)
     dirty_bitmap_mig_state.prev_bitmap = NULL;
     dirty_bitmap_mig_state.no_bitmaps = false;
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
-        const char *drive_name = bdrv_get_device_or_node_name(bs);
-
-        /* skip automatically inserted nodes */
-        while (bs && bs->drv && bs->implicit) {
-            bs = backing_bs(bs);
-        }
+    for (bs = bdrv_next_all_states(NULL); bs; bs = bdrv_next_all_states(bs)) {
+        const char *name = bdrv_get_device_or_node_name(bs);
 
         for (bitmap = bdrv_dirty_bitmap_next(bs, NULL); bitmap;
              bitmap = bdrv_dirty_bitmap_next(bs, bitmap))
@@ -296,7 +290,7 @@ static int init_dirty_bitmap_migration(void)
                 continue;
             }
 
-            if (drive_name == NULL) {
+            if (!name || strcmp(name, "") == 0) {
                 error_report("Found bitmap '%s' in unnamed node %p. It can't "
                              "be migrated", bdrv_dirty_bitmap_name(bitmap), bs);
                 goto fail;
@@ -313,7 +307,7 @@ static int init_dirty_bitmap_migration(void)
 
             dbms = g_new0(DirtyBitmapMigBitmapState, 1);
             dbms->bs = bs;
-            dbms->node_name = drive_name;
+            dbms->node_name = name;
             dbms->bitmap = bitmap;
             dbms->total_sectors = bdrv_nb_sectors(bs);
             dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
diff --git a/migration/block.c b/migration/block.c
index 83c633fb3f..91f98ef44a 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -417,7 +417,8 @@ static int init_blk_migration(QEMUFile *f)
         }
 
         bmds = g_new0(BlkMigDevState, 1);
-        bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+        bmds->blk = blk_new(qemu_get_aio_context(),
+                            BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
         bmds->blk_name = g_strdup(bdrv_get_device_name(bs));
         bmds->bulk_completed = 0;
         bmds->total_sectors = sectors;
diff --git a/migration/fd.c b/migration/fd.c
index a7c13df4ad..0a29ecdebf 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -52,12 +52,14 @@ static gboolean fd_accept_incoming_migration(QIOChannel *ioc,
     return G_SOURCE_REMOVE;
 }
 
-void fd_start_incoming_migration(const char *infd, Error **errp)
+void fd_start_incoming_migration(const char *fdname, Error **errp)
 {
     QIOChannel *ioc;
-    int fd;
+    int fd = monitor_fd_param(cur_mon, fdname, errp);
+    if (fd == -1) {
+        return;
+    }
 
-    fd = strtol(infd, NULL, 0);
     trace_migration_fd_incoming(fd);
 
     ioc = qio_channel_new_fd(fd, errp);
diff --git a/migration/fd.h b/migration/fd.h
index a14a63ce2e..b901bc014e 100644
--- a/migration/fd.h
+++ b/migration/fd.h
@@ -16,7 +16,7 @@
 
 #ifndef QEMU_MIGRATION_FD_H
 #define QEMU_MIGRATION_FD_H
-void fd_start_incoming_migration(const char *path, Error **errp);
+void fd_start_incoming_migration(const char *fdname, Error **errp);
 
 void fd_start_outgoing_migration(MigrationState *s, const char *fdname,
                                  Error **errp);
diff --git a/migration/ram.c b/migration/ram.c
index 4c60869226..082aea9d23 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -661,8 +661,6 @@ typedef struct {
     uint64_t num_packets;
     /* pages sent through this channel */
     uint64_t num_pages;
-    /* syncs main thread and channels */
-    QemuSemaphore sem_sync;
 }  MultiFDSendParams;
 
 typedef struct {
@@ -894,8 +892,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
 
 struct {
     MultiFDSendParams *params;
-    /* number of created threads */
-    int count;
     /* array of pages to sent */
     MultiFDPages_t *pages;
     /* syncs main thread and channels */
@@ -1027,7 +1023,6 @@ void multifd_save_cleanup(void)
         p->c = NULL;
         qemu_mutex_destroy(&p->mutex);
         qemu_sem_destroy(&p->sem);
-        qemu_sem_destroy(&p->sem_sync);
         g_free(p->name);
         p->name = NULL;
         multifd_pages_clear(p->pages);
@@ -1174,8 +1169,6 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
         p->running = true;
         qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
                            QEMU_THREAD_JOINABLE);
-
-        atomic_inc(&multifd_send_state->count);
     }
 }
 
@@ -1191,7 +1184,6 @@ int multifd_save_setup(void)
     thread_count = migrate_multifd_channels();
     multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
     multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
-    atomic_set(&multifd_send_state->count, 0);
     multifd_send_state->pages = multifd_pages_init(page_count);
     qemu_sem_init(&multifd_send_state->sem_sync, 0);
     qemu_sem_init(&multifd_send_state->channels_ready, 0);
@@ -1201,7 +1193,6 @@ int multifd_save_setup(void)
 
         qemu_mutex_init(&p->mutex);
         qemu_sem_init(&p->sem, 0);
-        qemu_sem_init(&p->sem_sync, 0);
         p->quit = false;
         p->pending_job = 0;
         p->id = i;
@@ -3182,11 +3173,11 @@ static int ram_state_init(RAMState **rsp)
     QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
 
     /*
-     * Count the total number of pages used by ram blocks not including any
-     * gaps due to alignment or unplugs.
+     * This must match with the initial values of dirty bitmap.
+     * Currently we initialize the dirty bitmap to all zeros so
+     * here the total dirty page count is zero.
      */
-    (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
-
+    (*rsp)->migration_dirty_pages = 0;
     ram_state_reset(*rsp);
 
     return 0;
@@ -3201,8 +3192,16 @@ static void ram_list_init_bitmaps(void)
     if (ram_bytes_total()) {
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
             pages = block->max_length >> TARGET_PAGE_BITS;
+            /*
+             * The initial dirty bitmap for migration must be set with all
+             * ones to make sure we'll migrate every guest RAM page to
+             * destination.
+             * Here we didn't set RAMBlock.bmap simply because it is already
+             * set in ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION] in
+             * ram_block_add, and that's where we'll sync the dirty bitmaps.
+             * Here setting RAMBlock.bmap would be fine too but not necessary.
+             */
             block->bmap = bitmap_new(pages);
-            bitmap_set(block->bmap, 0, pages);
             if (migrate_postcopy_ram()) {
                 block->unsentmap = bitmap_new(pages);
                 bitmap_set(block->unsentmap, 0, pages);
diff --git a/nbd/server.c b/nbd/server.c
index e21bd501dc..aeca3893fe 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1484,13 +1484,15 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset,
     if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
         perm |= BLK_PERM_WRITE;
     }
-    blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
-                        BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
+    blk = blk_new(bdrv_get_aio_context(bs), perm,
+                  BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                  BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto fail;
     }
     blk_set_enable_write_cache(blk, !writethrough);
+    blk_set_allow_aio_context_change(blk, true);
 
     exp->refcount = 1;
     QTAILQ_INIT(&exp->clients);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 7ccbfff9d0..1defcde048 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2004,18 +2004,34 @@
             '*persistent': 'bool', '*autoload': 'bool', '*disabled': 'bool' } }
 
 ##
+# @BlockDirtyBitmapMergeSource:
+#
+# @local: name of the bitmap, attached to the same node as target bitmap.
+#
+# @external: bitmap with specified node
+#
+# Since: 4.1
+##
+{ 'alternate': 'BlockDirtyBitmapMergeSource',
+  'data': { 'local': 'str',
+            'external': 'BlockDirtyBitmap' } }
+
+##
 # @BlockDirtyBitmapMerge:
 #
-# @node: name of device/node which the bitmap is tracking
+# @node: name of device/node which the @target bitmap is tracking
 #
 # @target: name of the destination dirty bitmap
 #
-# @bitmaps: name(s) of the source dirty bitmap(s)
+# @bitmaps: name(s) of the source dirty bitmap(s) at @node and/or fully
+#           specifed BlockDirtyBitmap elements. The latter are supported
+#           since 4.1.
 #
 # Since: 4.0
 ##
 { 'struct': 'BlockDirtyBitmapMerge',
-  'data': { 'node': 'str', 'target': 'str', 'bitmaps': ['str'] } }
+  'data': { 'node': 'str', 'target': 'str',
+            'bitmaps': ['BlockDirtyBitmapMergeSource'] } }
 
 ##
 # @block-dirty-bitmap-add:
@@ -3215,6 +3231,8 @@
 #
 # @cor_write: a write due to copy-on-read (since 2.11)
 #
+# @cluster_alloc_space: an allocation of file space for a cluster (since 4.1)
+#
 # Since: 2.9
 ##
 { 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG',
@@ -3233,7 +3251,7 @@
             'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
             'pwritev_zero', 'pwritev_done', 'empty_image_prepare',
             'l1_shrink_write_table', 'l1_shrink_free_l2_clusters',
-            'cor_write'] }
+            'cor_write', 'cluster_alloc_space'] }
 
 ##
 # @BlkdebugInjectErrorOptions:
diff --git a/qemu-img.c b/qemu-img.c
index 28fba1e7a7..07b6e2a808 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3164,7 +3164,7 @@ static int img_rebase(int argc, char **argv)
     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
     uint8_t *buf_old = NULL;
     uint8_t *buf_new = NULL;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
     char *filename;
     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
     int c, flags, src_flags, ret;
@@ -3309,29 +3309,19 @@ static int img_rebase(int argc, char **argv)
 
     /* For safe rebasing we need to compare old and new backing file */
     if (!unsafe) {
-        char backing_name[PATH_MAX];
         QDict *options = NULL;
-
-        if (bs->backing) {
-            if (bs->backing_format[0] != '\0') {
-                options = qdict_new();
-                qdict_put_str(options, "driver", bs->backing_format);
-            }
-
-            if (force_share) {
-                if (!options) {
-                    options = qdict_new();
-                }
-                qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
-            }
-            bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
-            blk_old_backing = blk_new_open(backing_name, NULL,
-                                           options, src_flags, &local_err);
-            if (!blk_old_backing) {
+        BlockDriverState *base_bs = backing_bs(bs);
+
+        if (base_bs) {
+            blk_old_backing = blk_new(qemu_get_aio_context(),
+                                      BLK_PERM_CONSISTENT_READ,
+                                      BLK_PERM_ALL);
+            ret = blk_insert_bs(blk_old_backing, base_bs,
+                                &local_err);
+            if (ret < 0) {
                 error_reportf_err(local_err,
-                                  "Could not open old backing file '%s': ",
-                                  backing_name);
-                ret = -1;
+                                  "Could not reuse old backing file '%s': ",
+                                  base_bs->filename);
                 goto out;
             }
         } else {
@@ -3364,15 +3354,35 @@ static int img_rebase(int argc, char **argv)
                 goto out;
             }
 
-            blk_new_backing = blk_new_open(out_real_path, NULL,
-                                           options, src_flags, &local_err);
-            g_free(out_real_path);
-            if (!blk_new_backing) {
-                error_reportf_err(local_err,
-                                  "Could not open new backing file '%s': ",
-                                  out_baseimg);
-                ret = -1;
-                goto out;
+            /*
+             * Find out whether we rebase an image on top of a previous image
+             * in its chain.
+             */
+            prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
+            if (prefix_chain_bs) {
+                g_free(out_real_path);
+                blk_new_backing = blk_new(qemu_get_aio_context(),
+                                          BLK_PERM_CONSISTENT_READ,
+                                          BLK_PERM_ALL);
+                ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
+                                    &local_err);
+                if (ret < 0) {
+                    error_reportf_err(local_err,
+                                      "Could not reuse backing file '%s': ",
+                                      out_baseimg);
+                    goto out;
+                }
+            } else {
+                blk_new_backing = blk_new_open(out_real_path, NULL,
+                                               options, src_flags, &local_err);
+                g_free(out_real_path);
+                if (!blk_new_backing) {
+                    error_reportf_err(local_err,
+                                      "Could not open new backing file '%s': ",
+                                      out_baseimg);
+                    ret = -1;
+                    goto out;
+                }
             }
         }
     }
@@ -3448,6 +3458,23 @@ static int img_rebase(int argc, char **argv)
                 continue;
             }
 
+            if (prefix_chain_bs) {
+                /*
+                 * If cluster wasn't changed since prefix_chain, we don't need
+                 * to take action
+                 */
+                ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
+                                              offset, n, &n);
+                if (ret < 0) {
+                    error_report("error while reading image metadata: %s",
+                                 strerror(-ret));
+                    goto out;
+                }
+                if (!ret) {
+                    continue;
+                }
+            }
+
             /*
              * Read old and new backing file and take into consideration that
              * backing files may be smaller than the COW image.
diff --git a/rules.mak b/rules.mak
index df45bcffb4..967295dd2b 100644
--- a/rules.mak
+++ b/rules.mak
@@ -144,7 +144,7 @@ cc-option = $(if $(shell $(CC) $1 $2 -S -o /dev/null -xc /dev/null \
 cc-c-option = $(if $(shell $(CC) $1 $2 -c -o /dev/null -xc /dev/null \
                 >/dev/null 2>&1 && echo OK), $2, $3)
 
-VPATH_SUFFIXES = %.c %.h %.S %.cc %.cpp %.m %.mak %.texi %.sh %.rc Kconfig%
+VPATH_SUFFIXES = %.c %.h %.S %.cc %.cpp %.m %.mak %.texi %.sh %.rc Kconfig% %.json.in
 set-vpath = $(if $1,$(foreach PATTERN,$(VPATH_SUFFIXES),$(eval vpath $(PATTERN) $1)))
 
 # install-prog list, dir
@@ -392,3 +392,10 @@ TEXI2MAN = $(call quiet-command, \
 	$(call TEXI2MAN)
 %.8:
 	$(call TEXI2MAN)
+
+GEN_SUBST = $(call quiet-command, \
+	sed -e "s!@libexecdir@!$(libexecdir)!g" < $< > $@, \
+	"GEN","$@")
+
+%.json: %.json.in
+	$(call GEN_SUBST)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 88682cb0a9..c2aaf421da 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1949,7 +1949,8 @@ sub process {
 		}
 
 # no C99 // comments
-		if ($line =~ m{//}) {
+		if ($line =~ m{//} &&
+		    $rawline !~ m{// SPDX-License-Identifier: }) {
 			ERROR("do not use C99 // comments\n" . $herecurr);
 		}
 		# Remove C99 comments.
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 536d7d1520..c1ab86d63e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4730,6 +4730,9 @@ static void x86_cpu_reset(CPUState *s)
 
     env->pat = 0x0007040600070406ULL;
     env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT;
+    if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) {
+        env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_MWAIT;
+    }
 
     memset(env->dr, 0, sizeof(env->dr));
     env->dr[6] = DR6_FIXED_1;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index fce6660bac..bd06523a53 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -387,6 +387,7 @@ typedef enum X86Seg {
 #define MSR_IA32_MISC_ENABLE            0x1a0
 /* Indicates good rep/movs microcode on some processors: */
 #define MSR_IA32_MISC_ENABLE_DEFAULT    1
+#define MSR_IA32_MISC_ENABLE_MWAIT      (1ULL << 18)
 
 #define MSR_MTRRphysBase(reg)           (0x200 + 2 * (reg))
 #define MSR_MTRRphysMask(reg)           (0x200 + 2 * (reg) + 1)
diff --git a/target/mips/dsp_helper.c b/target/mips/dsp_helper.c
index 739b69dd45..8c58eeb0bf 100644
--- a/target/mips/dsp_helper.c
+++ b/target/mips/dsp_helper.c
@@ -22,8 +22,10 @@
 #include "exec/helper-proto.h"
 #include "qemu/bitops.h"
 
-/* As the byte ordering doesn't matter, i.e. all columns are treated
-   identically, these unions can be used directly.  */
+/*
+ * As the byte ordering doesn't matter, i.e. all columns are treated
+ * identically, these unions can be used directly.
+ */
 typedef union {
     uint8_t  ub[4];
     int8_t   sb[4];
@@ -1445,9 +1447,15 @@ target_ulong helper_precr_ob_qh(target_ulong rs, target_ulong rt)
     return temp;
 }
 
-#define PRECR_QH_PW(name, var) \
-target_ulong helper_precr_##name##_qh_pw(target_ulong rs, target_ulong rt, \
-                                    uint32_t sa)                      \
+
+/*
+ * In case sa == 0, use rt2, rt0, rs2, rs0.
+ * In case sa != 0, use rt3, rt1, rs3, rs1.
+ */
+#define PRECR_QH_PW(name, var)                                        \
+target_ulong helper_precr_##name##_qh_pw(target_ulong rs,             \
+                                         target_ulong rt,             \
+                                         uint32_t sa)                 \
 {                                                                     \
     uint16_t rs3, rs2, rs1, rs0;                                      \
     uint16_t rt3, rt2, rt1, rt0;                                      \
@@ -1456,8 +1464,6 @@ target_ulong helper_precr_##name##_qh_pw(target_ulong rs, target_ulong rt, \
     MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);                       \
     MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);                       \
                                                                       \
-    /* When sa = 0, we use rt2, rt0, rs2, rs0;                        \
-     * when sa != 0, we use rt3, rt1, rs3, rs1. */                    \
     if (sa == 0) {                                                    \
         tempD = rt2 << var;                                           \
         tempC = rt0 << var;                                           \
@@ -1965,7 +1971,8 @@ SHIFT_PH(shra_r, rnd16_rashift);
 #undef SHIFT_PH
 
 /** DSP Multiply Sub-class insns **/
-/* Return value made up by two 16bits value.
+/*
+ * Return value made up by two 16bits value.
  * FIXME give the macro a better name.
  */
 #define MUL_RETURN32_16_PH(name, func, \
@@ -3274,11 +3281,15 @@ target_ulong helper_dextr_l(target_ulong ac, target_ulong shift,
                             CPUMIPSState *env)
 {
     uint64_t temp[3];
+    target_ulong ret;
 
     shift = shift & 0x3F;
 
     mipsdsp_rndrashift_acc(temp, ac, shift, env);
-    return (temp[1] << 63) | (temp[0] >> 1);
+
+    ret = (temp[1] << 63) | (temp[0] >> 1);
+
+    return ret;
 }
 
 target_ulong helper_dextr_r_l(target_ulong ac, target_ulong shift,
@@ -3286,6 +3297,7 @@ target_ulong helper_dextr_r_l(target_ulong ac, target_ulong shift,
 {
     uint64_t temp[3];
     uint32_t temp128;
+    target_ulong ret;
 
     shift = shift & 0x3F;
     mipsdsp_rndrashift_acc(temp, ac, shift, env);
@@ -3305,7 +3317,9 @@ target_ulong helper_dextr_r_l(target_ulong ac, target_ulong shift,
         set_DSPControl_overflow_flag(1, 23, env);
     }
 
-    return (temp[1] << 63) | (temp[0] >> 1);
+    ret = (temp[1] << 63) | (temp[0] >> 1);
+
+    return ret;
 }
 
 target_ulong helper_dextr_rs_l(target_ulong ac, target_ulong shift,
@@ -3313,6 +3327,7 @@ target_ulong helper_dextr_rs_l(target_ulong ac, target_ulong shift,
 {
     uint64_t temp[3];
     uint32_t temp128;
+    target_ulong ret;
 
     shift = shift & 0x3F;
     mipsdsp_rndrashift_acc(temp, ac, shift, env);
@@ -3338,7 +3353,10 @@ target_ulong helper_dextr_rs_l(target_ulong ac, target_ulong shift,
         }
         set_DSPControl_overflow_flag(1, 23, env);
     }
-    return (temp[1] << 63) | (temp[0] >> 1);
+
+    ret = (temp[1] << 63) | (temp[0] >> 1);
+
+    return ret;
 }
 #endif
 
diff --git a/target/mips/lmi_helper.c b/target/mips/lmi_helper.c
index fb1245b39d..6c645cf679 100644
--- a/target/mips/lmi_helper.c
+++ b/target/mips/lmi_helper.c
@@ -21,9 +21,11 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 
-/* If the byte ordering doesn't matter, i.e. all columns are treated
-   identically, then this union can be used directly.  If byte ordering
-   does matter, we generally ignore dumping to memory.  */
+/*
+ * If the byte ordering doesn't matter, i.e. all columns are treated
+ * identically, then this union can be used directly.  If byte ordering
+ * does matter, we generally ignore dumping to memory.
+ */
 typedef union {
     uint8_t  ub[8];
     int8_t   sb[8];
diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
index ee1b1fa5f5..f6e16c2405 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/msa_helper.c
@@ -805,28 +805,45 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
-    uint32_t i;                                                         \
                                                                         \
     switch (df) {                                                       \
     case DF_BYTE:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
-            pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], pwt->b[i]);  \
-        }                                                               \
+        pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
+        pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
+        pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
+        pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
+        pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
+        pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
+        pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
+        pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
+        pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
+        pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
+        pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
+        pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
+        pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
+        pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
+        pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
+        pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
         break;                                                          \
     case DF_HALF:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
-            pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], pwt->h[i]);  \
-        }                                                               \
+        pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
+        pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
+        pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
+        pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
+        pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
+        pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
+        pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
+        pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
         break;                                                          \
     case DF_WORD:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
-            pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], pwt->w[i]);  \
-        }                                                               \
+        pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
+        pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
+        pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
+        pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
         break;                                                          \
     case DF_DOUBLE:                                                     \
-        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
-            pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], pwt->d[i]);  \
-        }                                                               \
+        pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
+        pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
         break;                                                          \
     default:                                                            \
         assert(0);                                                      \
@@ -1012,42 +1029,71 @@ static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
 }
 
 #define MSA_TEROP_DF(func) \
-void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,   \
-                          uint32_t ws, uint32_t wt)                     \
-{                                                                       \
-    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
-    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
-    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
-    uint32_t i;                                                         \
-                                                                        \
-    switch (df) {                                                       \
-    case DF_BYTE:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
-            pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
-                                            pwt->b[i]);                 \
-        }                                                               \
-        break;                                                          \
-    case DF_HALF:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
-            pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
-                                            pwt->h[i]);                 \
-        }                                                               \
-        break;                                                          \
-    case DF_WORD:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
-            pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
-                                            pwt->w[i]);                 \
-        }                                                               \
-        break;                                                          \
-    case DF_DOUBLE:                                                     \
-        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
-            pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
-                                            pwt->d[i]);                 \
-        }                                                               \
-        break;                                                          \
-    default:                                                            \
-        assert(0);                                                      \
-    }                                                                   \
+void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
+                                uint32_t ws, uint32_t wt)                     \
+{                                                                             \
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
+                                                                              \
+    switch (df) {                                                             \
+    case DF_BYTE:                                                             \
+        pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
+                                             pwt->b[0]);                      \
+        pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
+                                             pwt->b[1]);                      \
+        pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
+                                             pwt->b[2]);                      \
+        pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
+                                             pwt->b[3]);                      \
+        pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
+                                             pwt->b[4]);                      \
+        pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
+                                             pwt->b[5]);                      \
+        pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
+                                             pwt->b[6]);                      \
+        pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
+                                             pwt->b[7]);                      \
+        pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
+                                             pwt->b[8]);                      \
+        pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
+                                             pwt->b[9]);                      \
+        pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
+                                             pwt->b[10]);                     \
+        pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
+                                             pwt->b[11]);                     \
+        pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
+                                             pwt->b[12]);                     \
+        pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
+                                             pwt->b[13]);                     \
+        pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
+                                             pwt->b[14]);                     \
+        pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
+                                             pwt->b[15]);                     \
+        break;                                                                \
+    case DF_HALF:                                                             \
+        pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
+        pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
+        pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
+        pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
+        pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
+        pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
+        pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
+        pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
+        break;                                                                \
+    case DF_WORD:                                                             \
+        pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
+        pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
+        pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
+        pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
+        break;                                                                \
+    case DF_DOUBLE:                                                           \
+        pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
+        pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
+        break;                                                                \
+    default:                                                                  \
+        assert(0);                                                            \
+    }                                                                         \
 }
 
 MSA_TEROP_DF(maddv)
@@ -1167,53 +1213,6 @@ void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
 #define Rd(pwr, i) (pwr->d[i])
 #define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE)/2])
 
-#define MSA_DO(DF)                      \
-    do {                                \
-        R##DF(pwx, i) = pwt->DF[2*i];   \
-        L##DF(pwx, i) = pws->DF[2*i];   \
-    } while (0)
-MSA_FN_DF(pckev_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                      \
-    do {                                \
-        R##DF(pwx, i) = pwt->DF[2*i+1]; \
-        L##DF(pwx, i) = pws->DF[2*i+1]; \
-    } while (0)
-MSA_FN_DF(pckod_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                      \
-    do {                                \
-        pwx->DF[2*i]   = L##DF(pwt, i); \
-        pwx->DF[2*i+1] = L##DF(pws, i); \
-    } while (0)
-MSA_FN_DF(ilvl_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                      \
-    do {                                \
-        pwx->DF[2*i]   = R##DF(pwt, i); \
-        pwx->DF[2*i+1] = R##DF(pws, i); \
-    } while (0)
-MSA_FN_DF(ilvr_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                      \
-    do {                                \
-        pwx->DF[2*i]   = pwt->DF[2*i];  \
-        pwx->DF[2*i+1] = pws->DF[2*i];  \
-    } while (0)
-MSA_FN_DF(ilvev_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                          \
-    do {                                    \
-        pwx->DF[2*i]   = pwt->DF[2*i+1];    \
-        pwx->DF[2*i+1] = pws->DF[2*i+1];    \
-    } while (0)
-MSA_FN_DF(ilvod_df)
-#undef MSA_DO
 #undef MSA_LOOP_COND
 
 #define MSA_LOOP_COND(DF) \
@@ -1231,6 +1230,314 @@ MSA_FN_DF(vshf_df)
 #undef MSA_LOOP_COND
 #undef MSA_FN_DF
 
+
+void helper_msa_ilvev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[15] = pws->b[14];
+        pwd->b[14] = pwt->b[14];
+        pwd->b[13] = pws->b[12];
+        pwd->b[12] = pwt->b[12];
+        pwd->b[11] = pws->b[10];
+        pwd->b[10] = pwt->b[10];
+        pwd->b[9]  = pws->b[8];
+        pwd->b[8]  = pwt->b[8];
+        pwd->b[7]  = pws->b[6];
+        pwd->b[6]  = pwt->b[6];
+        pwd->b[5]  = pws->b[4];
+        pwd->b[4]  = pwt->b[4];
+        pwd->b[3]  = pws->b[2];
+        pwd->b[2]  = pwt->b[2];
+        pwd->b[1]  = pws->b[0];
+        pwd->b[0]  = pwt->b[0];
+        break;
+    case DF_HALF:
+        pwd->h[7] = pws->h[6];
+        pwd->h[6] = pwt->h[6];
+        pwd->h[5] = pws->h[4];
+        pwd->h[4] = pwt->h[4];
+        pwd->h[3] = pws->h[2];
+        pwd->h[2] = pwt->h[2];
+        pwd->h[1] = pws->h[0];
+        pwd->h[0] = pwt->h[0];
+        break;
+    case DF_WORD:
+        pwd->w[3] = pws->w[2];
+        pwd->w[2] = pwt->w[2];
+        pwd->w[1] = pws->w[0];
+        pwd->w[0] = pwt->w[0];
+        break;
+    case DF_DOUBLE:
+        pwd->d[1] = pws->d[0];
+        pwd->d[0] = pwt->d[0];
+        break;
+    default:
+        assert(0);
+    }
+}
+
+void helper_msa_ilvod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[0]  = pwt->b[1];
+        pwd->b[1]  = pws->b[1];
+        pwd->b[2]  = pwt->b[3];
+        pwd->b[3]  = pws->b[3];
+        pwd->b[4]  = pwt->b[5];
+        pwd->b[5]  = pws->b[5];
+        pwd->b[6]  = pwt->b[7];
+        pwd->b[7]  = pws->b[7];
+        pwd->b[8]  = pwt->b[9];
+        pwd->b[9]  = pws->b[9];
+        pwd->b[10] = pwt->b[11];
+        pwd->b[11] = pws->b[11];
+        pwd->b[12] = pwt->b[13];
+        pwd->b[13] = pws->b[13];
+        pwd->b[14] = pwt->b[15];
+        pwd->b[15] = pws->b[15];
+        break;
+    case DF_HALF:
+        pwd->h[0] = pwt->h[1];
+        pwd->h[1] = pws->h[1];
+        pwd->h[2] = pwt->h[3];
+        pwd->h[3] = pws->h[3];
+        pwd->h[4] = pwt->h[5];
+        pwd->h[5] = pws->h[5];
+        pwd->h[6] = pwt->h[7];
+        pwd->h[7] = pws->h[7];
+        break;
+    case DF_WORD:
+        pwd->w[0] = pwt->w[1];
+        pwd->w[1] = pws->w[1];
+        pwd->w[2] = pwt->w[3];
+        pwd->w[3] = pws->w[3];
+        break;
+    case DF_DOUBLE:
+        pwd->d[0] = pwt->d[1];
+        pwd->d[1] = pws->d[1];
+        break;
+    default:
+        assert(0);
+    }
+}
+
+void helper_msa_ilvl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                        uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[0]  = pwt->b[8];
+        pwd->b[1]  = pws->b[8];
+        pwd->b[2]  = pwt->b[9];
+        pwd->b[3]  = pws->b[9];
+        pwd->b[4]  = pwt->b[10];
+        pwd->b[5]  = pws->b[10];
+        pwd->b[6]  = pwt->b[11];
+        pwd->b[7]  = pws->b[11];
+        pwd->b[8]  = pwt->b[12];
+        pwd->b[9]  = pws->b[12];
+        pwd->b[10] = pwt->b[13];
+        pwd->b[11] = pws->b[13];
+        pwd->b[12] = pwt->b[14];
+        pwd->b[13] = pws->b[14];
+        pwd->b[14] = pwt->b[15];
+        pwd->b[15] = pws->b[15];
+        break;
+    case DF_HALF:
+        pwd->h[0] = pwt->h[4];
+        pwd->h[1] = pws->h[4];
+        pwd->h[2] = pwt->h[5];
+        pwd->h[3] = pws->h[5];
+        pwd->h[4] = pwt->h[6];
+        pwd->h[5] = pws->h[6];
+        pwd->h[6] = pwt->h[7];
+        pwd->h[7] = pws->h[7];
+        break;
+    case DF_WORD:
+        pwd->w[0] = pwt->w[2];
+        pwd->w[1] = pws->w[2];
+        pwd->w[2] = pwt->w[3];
+        pwd->w[3] = pws->w[3];
+        break;
+    case DF_DOUBLE:
+        pwd->d[0] = pwt->d[1];
+        pwd->d[1] = pws->d[1];
+        break;
+    default:
+        assert(0);
+    }
+}
+
+void helper_msa_ilvr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                        uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[15] = pws->b[7];
+        pwd->b[14] = pwt->b[7];
+        pwd->b[13] = pws->b[6];
+        pwd->b[12] = pwt->b[6];
+        pwd->b[11] = pws->b[5];
+        pwd->b[10] = pwt->b[5];
+        pwd->b[9]  = pws->b[4];
+        pwd->b[8]  = pwt->b[4];
+        pwd->b[7]  = pws->b[3];
+        pwd->b[6]  = pwt->b[3];
+        pwd->b[5]  = pws->b[2];
+        pwd->b[4]  = pwt->b[2];
+        pwd->b[3]  = pws->b[1];
+        pwd->b[2]  = pwt->b[1];
+        pwd->b[1]  = pws->b[0];
+        pwd->b[0]  = pwt->b[0];
+        break;
+    case DF_HALF:
+        pwd->h[7] = pws->h[3];
+        pwd->h[6] = pwt->h[3];
+        pwd->h[5] = pws->h[2];
+        pwd->h[4] = pwt->h[2];
+        pwd->h[3] = pws->h[1];
+        pwd->h[2] = pwt->h[1];
+        pwd->h[1] = pws->h[0];
+        pwd->h[0] = pwt->h[0];
+        break;
+    case DF_WORD:
+        pwd->w[3] = pws->w[1];
+        pwd->w[2] = pwt->w[1];
+        pwd->w[1] = pws->w[0];
+        pwd->w[0] = pwt->w[0];
+        break;
+    case DF_DOUBLE:
+        pwd->d[1] = pws->d[0];
+        pwd->d[0] = pwt->d[0];
+        break;
+    default:
+        assert(0);
+    }
+}
+
+void helper_msa_pckev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[15] = pws->b[14];
+        pwd->b[13] = pws->b[10];
+        pwd->b[11] = pws->b[6];
+        pwd->b[9]  = pws->b[2];
+        pwd->b[7]  = pwt->b[14];
+        pwd->b[5]  = pwt->b[10];
+        pwd->b[3]  = pwt->b[6];
+        pwd->b[1]  = pwt->b[2];
+        pwd->b[14] = pws->b[12];
+        pwd->b[10] = pws->b[4];
+        pwd->b[6]  = pwt->b[12];
+        pwd->b[2]  = pwt->b[4];
+        pwd->b[12] = pws->b[8];
+        pwd->b[4]  = pwt->b[8];
+        pwd->b[8]  = pws->b[0];
+        pwd->b[0]  = pwt->b[0];
+        break;
+    case DF_HALF:
+        pwd->h[7] = pws->h[6];
+        pwd->h[5] = pws->h[2];
+        pwd->h[3] = pwt->h[6];
+        pwd->h[1] = pwt->h[2];
+        pwd->h[6] = pws->h[4];
+        pwd->h[2] = pwt->h[4];
+        pwd->h[4] = pws->h[0];
+        pwd->h[0] = pwt->h[0];
+        break;
+    case DF_WORD:
+        pwd->w[3] = pws->w[2];
+        pwd->w[1] = pwt->w[2];
+        pwd->w[2] = pws->w[0];
+        pwd->w[0] = pwt->w[0];
+        break;
+    case DF_DOUBLE:
+        pwd->d[1] = pws->d[0];
+        pwd->d[0] = pwt->d[0];
+        break;
+    default:
+        assert(0);
+    }
+}
+
+void helper_msa_pckod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[0]  = pwt->b[1];
+        pwd->b[2]  = pwt->b[5];
+        pwd->b[4]  = pwt->b[9];
+        pwd->b[6]  = pwt->b[13];
+        pwd->b[8]  = pws->b[1];
+        pwd->b[10] = pws->b[5];
+        pwd->b[12] = pws->b[9];
+        pwd->b[14] = pws->b[13];
+        pwd->b[1]  = pwt->b[3];
+        pwd->b[5]  = pwt->b[11];
+        pwd->b[9]  = pws->b[3];
+        pwd->b[13] = pws->b[11];
+        pwd->b[3]  = pwt->b[7];
+        pwd->b[11] = pws->b[7];
+        pwd->b[7]  = pwt->b[15];
+        pwd->b[15] = pws->b[15];
+        break;
+    case DF_HALF:
+        pwd->h[0] = pwt->h[1];
+        pwd->h[2] = pwt->h[5];
+        pwd->h[4] = pws->h[1];
+        pwd->h[6] = pws->h[5];
+        pwd->h[1] = pwt->h[3];
+        pwd->h[5] = pws->h[3];
+        pwd->h[3] = pwt->h[7];
+        pwd->h[7] = pws->h[7];
+        break;
+    case DF_WORD:
+        pwd->w[0] = pwt->w[1];
+        pwd->w[2] = pws->w[1];
+        pwd->w[1] = pwt->w[3];
+        pwd->w[3] = pws->w[3];
+        break;
+    case DF_DOUBLE:
+        pwd->d[0] = pwt->d[1];
+        pwd->d[1] = pws->d[1];
+        break;
+    default:
+        assert(0);
+    }
+}
+
+
 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
                         uint32_t ws, uint32_t n)
 {
@@ -1537,28 +1844,45 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
 {                                                                       \
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
-    uint32_t i;                                                         \
                                                                         \
     switch (df) {                                                       \
     case DF_BYTE:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
-            pwd->b[i] = msa_ ## func ## _df(df, pws->b[i]);             \
-        }                                                               \
+        pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0]);                \
+        pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1]);                \
+        pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2]);                \
+        pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3]);                \
+        pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4]);                \
+        pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5]);                \
+        pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6]);                \
+        pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7]);                \
+        pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8]);                \
+        pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9]);                \
+        pwd->b[10] = msa_ ## func ## _df(df, pws->b[10]);               \
+        pwd->b[11] = msa_ ## func ## _df(df, pws->b[11]);               \
+        pwd->b[12] = msa_ ## func ## _df(df, pws->b[12]);               \
+        pwd->b[13] = msa_ ## func ## _df(df, pws->b[13]);               \
+        pwd->b[14] = msa_ ## func ## _df(df, pws->b[14]);               \
+        pwd->b[15] = msa_ ## func ## _df(df, pws->b[15]);               \
         break;                                                          \
     case DF_HALF:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
-            pwd->h[i] = msa_ ## func ## _df(df, pws->h[i]);             \
-        }                                                               \
+        pwd->h[0] = msa_ ## func ## _df(df, pws->h[0]);                 \
+        pwd->h[1] = msa_ ## func ## _df(df, pws->h[1]);                 \
+        pwd->h[2] = msa_ ## func ## _df(df, pws->h[2]);                 \
+        pwd->h[3] = msa_ ## func ## _df(df, pws->h[3]);                 \
+        pwd->h[4] = msa_ ## func ## _df(df, pws->h[4]);                 \
+        pwd->h[5] = msa_ ## func ## _df(df, pws->h[5]);                 \
+        pwd->h[6] = msa_ ## func ## _df(df, pws->h[6]);                 \
+        pwd->h[7] = msa_ ## func ## _df(df, pws->h[7]);                 \
         break;                                                          \
     case DF_WORD:                                                       \
-        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
-            pwd->w[i] = msa_ ## func ## _df(df, pws->w[i]);             \
-        }                                                               \
+        pwd->w[0] = msa_ ## func ## _df(df, pws->w[0]);                 \
+        pwd->w[1] = msa_ ## func ## _df(df, pws->w[1]);                 \
+        pwd->w[2] = msa_ ## func ## _df(df, pws->w[2]);                 \
+        pwd->w[3] = msa_ ## func ## _df(df, pws->w[3]);                 \
         break;                                                          \
     case DF_DOUBLE:                                                     \
-        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
-            pwd->d[i] = msa_ ## func ## _df(df, pws->d[i]);             \
-        }                                                               \
+        pwd->d[0] = msa_ ## func ## _df(df, pws->d[0]);                 \
+        pwd->d[1] = msa_ ## func ## _df(df, pws->d[1]);                 \
         break;                                                          \
     default:                                                            \
         assert(0);                                                      \
diff --git a/target/mips/translate.c b/target/mips/translate.c
index 70552fe543..e37722dfff 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -24357,6 +24357,146 @@ static void decode_opc_special(CPUMIPSState *env, DisasContext *ctx)
  *                     PEXTUW
  */
 
+/*
+ *  PCPYH rd, rt
+ *
+ *    Parallel Copy Halfword
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---------+---------+-----------+
+ *  |    MMI    |0 0 0 0 0|   rt    |   rd    |  PCPYH  |    MMI3   |
+ *  +-----------+---------+---------+---------+---------+-----------+
+ */
+static void gen_mmi_pcpyh(DisasContext *ctx)
+{
+    uint32_t pd, rt, rd;
+    uint32_t opcode;
+
+    opcode = ctx->opcode;
+
+    pd = extract32(opcode, 21, 5);
+    rt = extract32(opcode, 16, 5);
+    rd = extract32(opcode, 11, 5);
+
+    if (unlikely(pd != 0)) {
+        generate_exception_end(ctx, EXCP_RI);
+    } else if (rd == 0) {
+        /* nop */
+    } else if (rt == 0) {
+        tcg_gen_movi_i64(cpu_gpr[rd], 0);
+        tcg_gen_movi_i64(cpu_mmr[rd], 0);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new();
+        TCGv_i64 t1 = tcg_temp_new();
+        uint64_t mask = (1ULL << 16) - 1;
+
+        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask);
+        tcg_gen_movi_i64(t1, 0);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+
+        tcg_gen_mov_i64(cpu_gpr[rd], t1);
+
+        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask);
+        tcg_gen_movi_i64(t1, 0);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_shli_i64(t0, t0, 16);
+        tcg_gen_or_i64(t1, t0, t1);
+
+        tcg_gen_mov_i64(cpu_mmr[rd], t1);
+
+        tcg_temp_free(t0);
+        tcg_temp_free(t1);
+    }
+}
+
+/*
+ *  PCPYLD rd, rs, rt
+ *
+ *    Parallel Copy Lower Doubleword
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---------+---------+-----------+
+ *  |    MMI    |   rs    |   rt    |   rd    | PCPYLD  |    MMI2   |
+ *  +-----------+---------+---------+---------+---------+-----------+
+ */
+static void gen_mmi_pcpyld(DisasContext *ctx)
+{
+    uint32_t rs, rt, rd;
+    uint32_t opcode;
+
+    opcode = ctx->opcode;
+
+    rs = extract32(opcode, 21, 5);
+    rt = extract32(opcode, 16, 5);
+    rd = extract32(opcode, 11, 5);
+
+    if (rd == 0) {
+        /* nop */
+    } else {
+        if (rs == 0) {
+            tcg_gen_movi_i64(cpu_mmr[rd], 0);
+        } else {
+            tcg_gen_mov_i64(cpu_mmr[rd], cpu_gpr[rs]);
+        }
+        if (rt == 0) {
+            tcg_gen_movi_i64(cpu_gpr[rd], 0);
+        } else {
+            if (rd != rt) {
+                tcg_gen_mov_i64(cpu_gpr[rd], cpu_gpr[rt]);
+            }
+        }
+    }
+}
+
+/*
+ *  PCPYUD rd, rs, rt
+ *
+ *    Parallel Copy Upper Doubleword
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---------+---------+-----------+
+ *  |    MMI    |   rs    |   rt    |   rd    | PCPYUD  |    MMI3   |
+ *  +-----------+---------+---------+---------+---------+-----------+
+ */
+static void gen_mmi_pcpyud(DisasContext *ctx)
+{
+    uint32_t rs, rt, rd;
+    uint32_t opcode;
+
+    opcode = ctx->opcode;
+
+    rs = extract32(opcode, 21, 5);
+    rt = extract32(opcode, 16, 5);
+    rd = extract32(opcode, 11, 5);
+
+    if (rd == 0) {
+        /* nop */
+    } else {
+        if (rs == 0) {
+            tcg_gen_movi_i64(cpu_gpr[rd], 0);
+        } else {
+            tcg_gen_mov_i64(cpu_gpr[rd], cpu_mmr[rs]);
+        }
+        if (rt == 0) {
+            tcg_gen_movi_i64(cpu_mmr[rd], 0);
+        } else {
+            if (rd != rt) {
+                tcg_gen_mov_i64(cpu_mmr[rd], cpu_mmr[rt]);
+            }
+        }
+    }
+}
+
 #endif
 
 
@@ -27371,7 +27511,6 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
     case MMI_OPC_2_PINTH:     /* TODO: MMI_OPC_2_PINTH */
     case MMI_OPC_2_PMULTW:    /* TODO: MMI_OPC_2_PMULTW */
     case MMI_OPC_2_PDIVW:     /* TODO: MMI_OPC_2_PDIVW */
-    case MMI_OPC_2_PCPYLD:    /* TODO: MMI_OPC_2_PCPYLD */
     case MMI_OPC_2_PMADDH:    /* TODO: MMI_OPC_2_PMADDH */
     case MMI_OPC_2_PHMADH:    /* TODO: MMI_OPC_2_PHMADH */
     case MMI_OPC_2_PAND:      /* TODO: MMI_OPC_2_PAND */
@@ -27386,6 +27525,9 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
     case MMI_OPC_2_PROT3W:    /* TODO: MMI_OPC_2_PROT3W */
         generate_exception_end(ctx, EXCP_RI); /* TODO: MMI_OPC_CLASS_MMI2 */
         break;
+    case MMI_OPC_2_PCPYLD:
+        gen_mmi_pcpyld(ctx);
+        break;
     default:
         MIPS_INVAL("TX79 MMI class MMI2");
         generate_exception_end(ctx, EXCP_RI);
@@ -27405,14 +27547,18 @@ static void decode_mmi3(CPUMIPSState *env, DisasContext *ctx)
     case MMI_OPC_3_PINTEH:     /* TODO: MMI_OPC_3_PINTEH */
     case MMI_OPC_3_PMULTUW:    /* TODO: MMI_OPC_3_PMULTUW */
     case MMI_OPC_3_PDIVUW:     /* TODO: MMI_OPC_3_PDIVUW */
-    case MMI_OPC_3_PCPYUD:     /* TODO: MMI_OPC_3_PCPYUD */
     case MMI_OPC_3_POR:        /* TODO: MMI_OPC_3_POR */
     case MMI_OPC_3_PNOR:       /* TODO: MMI_OPC_3_PNOR */
     case MMI_OPC_3_PEXCH:      /* TODO: MMI_OPC_3_PEXCH */
-    case MMI_OPC_3_PCPYH:      /* TODO: MMI_OPC_3_PCPYH */
     case MMI_OPC_3_PEXCW:      /* TODO: MMI_OPC_3_PEXCW */
         generate_exception_end(ctx, EXCP_RI); /* TODO: MMI_OPC_CLASS_MMI3 */
         break;
+    case MMI_OPC_3_PCPYH:
+        gen_mmi_pcpyh(ctx);
+        break;
+    case MMI_OPC_3_PCPYUD:
+        gen_mmi_pcpyud(ctx);
+        break;
     default:
         MIPS_INVAL("TX79 MMI class MMI3");
         generate_exception_end(ctx, EXCP_RI);
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 638a6e99c4..02b67a333e 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -180,18 +180,6 @@ DEF_HELPER_3(vmuloub, void, avr, avr, avr)
 DEF_HELPER_3(vmulouh, void, avr, avr, avr)
 DEF_HELPER_3(vmulouw, void, avr, avr, avr)
 DEF_HELPER_3(vmuluwm, void, avr, avr, avr)
-DEF_HELPER_3(vsrab, void, avr, avr, avr)
-DEF_HELPER_3(vsrah, void, avr, avr, avr)
-DEF_HELPER_3(vsraw, void, avr, avr, avr)
-DEF_HELPER_3(vsrad, void, avr, avr, avr)
-DEF_HELPER_3(vsrb, void, avr, avr, avr)
-DEF_HELPER_3(vsrh, void, avr, avr, avr)
-DEF_HELPER_3(vsrw, void, avr, avr, avr)
-DEF_HELPER_3(vsrd, void, avr, avr, avr)
-DEF_HELPER_3(vslb, void, avr, avr, avr)
-DEF_HELPER_3(vslh, void, avr, avr, avr)
-DEF_HELPER_3(vslw, void, avr, avr, avr)
-DEF_HELPER_3(vsld, void, avr, avr, avr)
 DEF_HELPER_3(vslo, void, avr, avr, avr)
 DEF_HELPER_3(vsro, void, avr, avr, avr)
 DEF_HELPER_3(vsrv, void, avr, avr, avr)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 9af779ad38..8ce89f2ad9 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1791,23 +1791,6 @@ VSHIFT(l, 1)
 VSHIFT(r, 0)
 #undef VSHIFT
 
-#define VSL(suffix, element, mask)                                      \
-    void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
-    {                                                                   \
-        int i;                                                          \
-                                                                        \
-        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
-            unsigned int shift = b->element[i] & mask;                  \
-                                                                        \
-            r->element[i] = a->element[i] << shift;                     \
-        }                                                               \
-    }
-VSL(b, u8, 0x7)
-VSL(h, u16, 0x0F)
-VSL(w, u32, 0x1F)
-VSL(d, u64, 0x3F)
-#undef VSL
-
 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
     int i;
@@ -1815,10 +1798,10 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 
     size = ARRAY_SIZE(r->u8);
     for (i = 0; i < size; i++) {
-        shift = b->u8[i] & 0x7;             /* extract shift value */
-        bytes = (a->u8[i] << 8) +             /* extract adjacent bytes */
-            (((i + 1) < size) ? a->u8[i + 1] : 0);
-        r->u8[i] = (bytes << shift) >> 8;   /* shift and store result */
+        shift = b->VsrB(i) & 0x7;             /* extract shift value */
+        bytes = (a->VsrB(i) << 8) +           /* extract adjacent bytes */
+            (((i + 1) < size) ? a->VsrB(i + 1) : 0);
+        r->VsrB(i) = (bytes << shift) >> 8;   /* shift and store result */
     }
 }
 
@@ -1833,10 +1816,10 @@ void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
      * order will guarantee that computed result is not fed back.
      */
     for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
-        shift = b->u8[i] & 0x7;                 /* extract shift value */
-        bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
+        shift = b->VsrB(i) & 0x7;               /* extract shift value */
+        bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
                                                 /* extract adjacent bytes */
-        r->u8[i] = (bytes >> shift) & 0xFF;     /* shift and store result */
+        r->VsrB(i) = (bytes >> shift) & 0xFF;   /* shift and store result */
     }
 }
 
@@ -1980,26 +1963,6 @@ VNEG(vnegw, s32)
 VNEG(vnegd, s64)
 #undef VNEG
 
-#define VSR(suffix, element, mask)                                      \
-    void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
-    {                                                                   \
-        int i;                                                          \
-                                                                        \
-        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
-            unsigned int shift = b->element[i] & mask;                  \
-            r->element[i] = a->element[i] >> shift;                     \
-        }                                                               \
-    }
-VSR(ab, s8, 0x7)
-VSR(ah, s16, 0xF)
-VSR(aw, s32, 0x1F)
-VSR(ad, s64, 0x3F)
-VSR(b, u8, 0x7)
-VSR(h, u16, 0xF)
-VSR(w, u32, 0x1F)
-VSR(d, u64, 0x3F)
-#undef VSR
-
 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
     int sh = (b->VsrB(0xf) >> 3) & 0xf;
@@ -2053,7 +2016,7 @@ void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
         int64_t t = (int64_t)b->VsrSW(upper + i * 2);
 
-        result.VsrW(i) = 0;
+        result.VsrD(i) = 0;
         for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
             t += a->VsrSW(2 * i + j);
         }
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 02e22e2017..3bf0a46c33 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -75,6 +75,7 @@ static int cap_fixup_hcalls;
 static int cap_htm;             /* Hardware transactional memory support */
 static int cap_mmu_radix;
 static int cap_mmu_hash_v3;
+static int cap_xive;
 static int cap_resize_hpt;
 static int cap_ppc_pvr_compat;
 static int cap_ppc_safe_cache;
@@ -146,6 +147,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
+    cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
     kvmppc_get_cpu_characteristics(s);
     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
@@ -1721,7 +1723,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
             trace_kvm_handle_dcr_write();
             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
         } else {
-            trace_kvm_handle_drc_read();
+            trace_kvm_handle_dcr_read();
             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
         }
         break;
@@ -2478,6 +2480,11 @@ static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
     return 0;
 }
 
+bool kvmppc_has_cap_xive(void)
+{
+    return cap_xive;
+}
+
 static void kvmppc_get_cpu_characteristics(KVMState *s)
 {
     struct kvm_ppc_cpu_char c;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 22385134b4..45776cad79 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -60,6 +60,7 @@ bool kvmppc_has_cap_fixup_hcalls(void);
 bool kvmppc_has_cap_htm(void);
 bool kvmppc_has_cap_mmu_radix(void);
 bool kvmppc_has_cap_mmu_hash_v3(void);
+bool kvmppc_has_cap_xive(void);
 int kvmppc_get_cap_safe_cache(void);
 int kvmppc_get_cap_safe_bounds_check(void);
 int kvmppc_get_cap_safe_indirect_branch(void);
@@ -316,6 +317,11 @@ static inline bool kvmppc_has_cap_mmu_hash_v3(void)
     return false;
 }
 
+static inline bool kvmppc_has_cap_xive(void)
+{
+    return false;
+}
+
 static inline int kvmppc_get_cap_safe_cache(void)
 {
     return 0;
diff --git a/target/ppc/trace-events b/target/ppc/trace-events
index 7b3cfe11fd..3dc6740706 100644
--- a/target/ppc/trace-events
+++ b/target/ppc/trace-events
@@ -22,7 +22,7 @@ kvm_failed_put_vpa(void) "Warning: Unable to set VPA information to KVM"
 kvm_failed_get_vpa(void) "Warning: Unable to get VPA information from KVM"
 kvm_injected_interrupt(int irq) "injected interrupt %d"
 kvm_handle_dcr_write(void) "handle dcr write"
-kvm_handle_drc_read(void) "handle dcr read"
+kvm_handle_dcr_read(void) "handle dcr read"
 kvm_handle_halt(void) "handle halt"
 kvm_handle_papr_hcall(void) "handle PAPR hypercall"
 kvm_handle_epr(void) "handle epr"
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index 6861f4c5b9..663275b729 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -530,21 +530,21 @@ GEN_VXFORM(vmuleuw, 4, 10);
 GEN_VXFORM(vmulesb, 4, 12);
 GEN_VXFORM(vmulesh, 4, 13);
 GEN_VXFORM(vmulesw, 4, 14);
-GEN_VXFORM(vslb, 2, 4);
-GEN_VXFORM(vslh, 2, 5);
-GEN_VXFORM(vslw, 2, 6);
+GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4);
+GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5);
+GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6);
 GEN_VXFORM(vrlwnm, 2, 6);
 GEN_VXFORM_DUAL(vslw, PPC_ALTIVEC, PPC_NONE, \
                 vrlwnm, PPC_NONE, PPC2_ISA300)
-GEN_VXFORM(vsld, 2, 23);
-GEN_VXFORM(vsrb, 2, 8);
-GEN_VXFORM(vsrh, 2, 9);
-GEN_VXFORM(vsrw, 2, 10);
-GEN_VXFORM(vsrd, 2, 27);
-GEN_VXFORM(vsrab, 2, 12);
-GEN_VXFORM(vsrah, 2, 13);
-GEN_VXFORM(vsraw, 2, 14);
-GEN_VXFORM(vsrad, 2, 15);
+GEN_VXFORM_V(vsld, MO_64, tcg_gen_gvec_shlv, 2, 23);
+GEN_VXFORM_V(vsrb, MO_8, tcg_gen_gvec_shrv, 2, 8);
+GEN_VXFORM_V(vsrh, MO_16, tcg_gen_gvec_shrv, 2, 9);
+GEN_VXFORM_V(vsrw, MO_32, tcg_gen_gvec_shrv, 2, 10);
+GEN_VXFORM_V(vsrd, MO_64, tcg_gen_gvec_shrv, 2, 27);
+GEN_VXFORM_V(vsrab, MO_8, tcg_gen_gvec_sarv, 2, 12);
+GEN_VXFORM_V(vsrah, MO_16, tcg_gen_gvec_sarv, 2, 13);
+GEN_VXFORM_V(vsraw, MO_32, tcg_gen_gvec_sarv, 2, 14);
+GEN_VXFORM_V(vsrad, MO_64, tcg_gen_gvec_sarv, 2, 15);
 GEN_VXFORM(vsrv, 2, 28);
 GEN_VXFORM(vslv, 2, 29);
 GEN_VXFORM(vslo, 6, 16);
diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
index 11d9b75d01..199d22da97 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -227,7 +227,7 @@ static void gen_lxvb16x(DisasContext *ctx)
     tcg_temp_free_i64(xtl);
 }
 
-#define VSX_VECTOR_LOAD_STORE(name, op, indexed)            \
+#define VSX_VECTOR_LOAD(name, op, indexed)                  \
 static void gen_##name(DisasContext *ctx)                   \
 {                                                           \
     int xt;                                                 \
@@ -254,8 +254,6 @@ static void gen_##name(DisasContext *ctx)                   \
     }                                                       \
     xth = tcg_temp_new_i64();                               \
     xtl = tcg_temp_new_i64();                               \
-    get_cpu_vsrh(xth, xt);                                  \
-    get_cpu_vsrl(xtl, xt);                                  \
     gen_set_access_type(ctx, ACCESS_INT);                   \
     EA = tcg_temp_new();                                    \
     if (indexed) {                                          \
@@ -281,10 +279,61 @@ static void gen_##name(DisasContext *ctx)                   \
     tcg_temp_free_i64(xtl);                                 \
 }
 
-VSX_VECTOR_LOAD_STORE(lxv, ld_i64, 0)
-VSX_VECTOR_LOAD_STORE(stxv, st_i64, 0)
-VSX_VECTOR_LOAD_STORE(lxvx, ld_i64, 1)
-VSX_VECTOR_LOAD_STORE(stxvx, st_i64, 1)
+VSX_VECTOR_LOAD(lxv, ld_i64, 0)
+VSX_VECTOR_LOAD(lxvx, ld_i64, 1)
+
+#define VSX_VECTOR_STORE(name, op, indexed)                 \
+static void gen_##name(DisasContext *ctx)                   \
+{                                                           \
+    int xt;                                                 \
+    TCGv EA;                                                \
+    TCGv_i64 xth;                                           \
+    TCGv_i64 xtl;                                           \
+                                                            \
+    if (indexed) {                                          \
+        xt = xT(ctx->opcode);                               \
+    } else {                                                \
+        xt = DQxT(ctx->opcode);                             \
+    }                                                       \
+                                                            \
+    if (xt < 32) {                                          \
+        if (unlikely(!ctx->vsx_enabled)) {                  \
+            gen_exception(ctx, POWERPC_EXCP_VSXU);          \
+            return;                                         \
+        }                                                   \
+    } else {                                                \
+        if (unlikely(!ctx->altivec_enabled)) {              \
+            gen_exception(ctx, POWERPC_EXCP_VPU);           \
+            return;                                         \
+        }                                                   \
+    }                                                       \
+    xth = tcg_temp_new_i64();                               \
+    xtl = tcg_temp_new_i64();                               \
+    get_cpu_vsrh(xth, xt);                                  \
+    get_cpu_vsrl(xtl, xt);                                  \
+    gen_set_access_type(ctx, ACCESS_INT);                   \
+    EA = tcg_temp_new();                                    \
+    if (indexed) {                                          \
+        gen_addr_reg_index(ctx, EA);                        \
+    } else {                                                \
+        gen_addr_imm_index(ctx, EA, 0x0F);                  \
+    }                                                       \
+    if (ctx->le_mode) {                                     \
+        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_LEQ);   \
+        tcg_gen_addi_tl(EA, EA, 8);                         \
+        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_LEQ);   \
+    } else {                                                \
+        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_BEQ);   \
+        tcg_gen_addi_tl(EA, EA, 8);                         \
+        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_BEQ);   \
+    }                                                       \
+    tcg_temp_free(EA);                                      \
+    tcg_temp_free_i64(xth);                                 \
+    tcg_temp_free_i64(xtl);                                 \
+}
+
+VSX_VECTOR_STORE(stxv, st_i64, 0)
+VSX_VECTOR_STORE(stxvx, st_i64, 1)
 
 #ifdef TARGET_PPC64
 #define VSX_VECTOR_LOAD_STORE_LENGTH(name)                      \
@@ -329,7 +378,6 @@ static void gen_##name(DisasContext *ctx)                         \
         return;                                                   \
     }                                                             \
     xth = tcg_temp_new_i64();                                     \
-    get_cpu_vsrh(xth, rD(ctx->opcode) + 32);                      \
     gen_set_access_type(ctx, ACCESS_INT);                         \
     EA = tcg_temp_new();                                          \
     gen_addr_imm_index(ctx, EA, 0x03);                            \
@@ -513,8 +561,8 @@ static void gen_##name(DisasContext *ctx)                         \
     tcg_temp_free_i64(xth);                                       \
 }
 
-VSX_LOAD_SCALAR_DS(stxsd, st64_i64)
-VSX_LOAD_SCALAR_DS(stxssp, st32fs)
+VSX_STORE_SCALAR_DS(stxsd, st64_i64)
+VSX_STORE_SCALAR_DS(stxssp, st32fs)
 
 static void gen_mfvsrwz(DisasContext *ctx)
 {
@@ -858,8 +906,8 @@ static void glue(gen_, name)(DisasContext *ctx)                  \
         xbh = tcg_temp_new_i64();                                \
         xbl = tcg_temp_new_i64();                                \
         sgm = tcg_temp_new_i64();                                \
-        set_cpu_vsrh(xB(ctx->opcode), xbh);                      \
-        set_cpu_vsrl(xB(ctx->opcode), xbl);                      \
+        get_cpu_vsrh(xbh, xB(ctx->opcode));                      \
+        get_cpu_vsrl(xbl, xB(ctx->opcode));                      \
         tcg_gen_movi_i64(sgm, sgn_mask);                         \
         switch (op) {                                            \
             case OP_ABS: {                                       \
@@ -1192,7 +1240,7 @@ static void gen_xxbrq(DisasContext *ctx)
     tcg_gen_bswap64_i64(xtl, xbh);
     set_cpu_vsrl(xT(ctx->opcode), xtl);
     tcg_gen_mov_i64(xth, t0);
-    set_cpu_vsrl(xT(ctx->opcode), xth);
+    set_cpu_vsrh(xT(ctx->opcode), xth);
 
     tcg_temp_free_i64(t0);
     tcg_temp_free_i64(xth);
@@ -1220,7 +1268,7 @@ static void gen_xxbrw(DisasContext *ctx)
     get_cpu_vsrl(xbl, xB(ctx->opcode));
 
     gen_bswap32x4(xth, xtl, xbh, xbl);
-    set_cpu_vsrl(xT(ctx->opcode), xth);
+    set_cpu_vsrh(xT(ctx->opcode), xth);
     set_cpu_vsrl(xT(ctx->opcode), xtl);
 
     tcg_temp_free_i64(xth);
@@ -1355,13 +1403,13 @@ static void gen_xxspltib(DisasContext *ctx)
     int rt = xT(ctx->opcode);
 
     if (rt < 32) {
-        if (unlikely(!ctx->altivec_enabled)) {
-            gen_exception(ctx, POWERPC_EXCP_VPU);
+        if (unlikely(!ctx->vsx_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VSXU);
             return;
         }
     } else {
-        if (unlikely(!ctx->vsx_enabled)) {
-            gen_exception(ctx, POWERPC_EXCP_VSXU);
+        if (unlikely(!ctx->altivec_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VPU);
             return;
         }
     }
@@ -1820,7 +1868,7 @@ static void gen_xvxsigdp(DisasContext *ctx)
     tcg_gen_movi_i64(t0, 0x0010000000000000);
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0);
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0);
-    tcg_gen_deposit_i64(xth, t0, xbl, 0, 52);
+    tcg_gen_deposit_i64(xtl, t0, xbl, 0, 52);
     set_cpu_vsrl(xT(ctx->opcode), xtl);
 
     tcg_temp_free_i64(t0);
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 2334969f1f..a15b95e97b 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -255,9 +255,6 @@ check-qtest-sparc64-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF)
 check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
 check-qtest-sparc64-y += tests/boot-serial-test$(EXESUF)
 
-check-qtest-arm-y += tests/tmp105-test$(EXESUF)
-check-qtest-arm-y += tests/pca9552-test$(EXESUF)
-check-qtest-arm-y += tests/ds1338-test$(EXESUF)
 check-qtest-arm-y += tests/microbit-test$(EXESUF)
 check-qtest-arm-y += tests/m25p80-test$(EXESUF)
 check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF)
@@ -682,7 +679,7 @@ libqgraph-obj-y = tests/libqos/qgraph.o
 
 libqos-obj-y = $(libqgraph-obj-y) tests/libqos/pci.o tests/libqos/fw_cfg.o
 libqos-obj-y += tests/libqos/malloc.o
-libqos-obj-y += tests/libqos/i2c.o tests/libqos/libqos.o
+libqos-obj-y += tests/libqos/libqos.o
 libqos-spapr-obj-y = $(libqos-obj-y) tests/libqos/malloc-spapr.o
 libqos-spapr-obj-y += tests/libqos/libqos-spapr.o
 libqos-spapr-obj-y += tests/libqos/rtas.o
@@ -690,14 +687,15 @@ libqos-spapr-obj-y += tests/libqos/pci-spapr.o
 libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o
 libqos-pc-obj-y += tests/libqos/malloc-pc.o tests/libqos/libqos-pc.o
 libqos-pc-obj-y += tests/libqos/ahci.o
-libqos-omap-obj-y = $(libqos-obj-y) tests/libqos/i2c-omap.o
-libqos-imx-obj-y = $(libqos-obj-y) tests/libqos/i2c-imx.o
 libqos-usb-obj-y = $(libqos-spapr-obj-y) $(libqos-pc-obj-y) tests/libqos/usb.o
 
 # Devices
 qos-test-obj-y = tests/qos-test.o $(libqgraph-obj-y)
 qos-test-obj-y += $(libqos-pc-obj-y) $(libqos-spapr-obj-y)
 qos-test-obj-y += tests/libqos/e1000e.o
+qos-test-obj-y += tests/libqos/i2c.o
+qos-test-obj-y += tests/libqos/i2c-imx.o
+qos-test-obj-y += tests/libqos/i2c-omap.o
 qos-test-obj-y += tests/libqos/sdhci.o
 qos-test-obj-y += tests/libqos/tpci200.o
 qos-test-obj-y += tests/libqos/virtio.o
@@ -713,6 +711,8 @@ qos-test-obj-y += tests/libqos/virtio-serial.o
 
 # Machines
 qos-test-obj-y += tests/libqos/aarch64-xlnx-zcu102-machine.o
+qos-test-obj-y += tests/libqos/arm-imx25-pdk-machine.o
+qos-test-obj-y += tests/libqos/arm-n800-machine.o
 qos-test-obj-y += tests/libqos/arm-raspi2-machine.o
 qos-test-obj-y += tests/libqos/arm-sabrelite-machine.o
 qos-test-obj-y += tests/libqos/arm-smdkc210-machine.o
@@ -723,6 +723,7 @@ qos-test-obj-y += tests/libqos/x86_64_pc-machine.o
 
 # Tests
 qos-test-obj-y += tests/ac97-test.o
+qos-test-obj-y += tests/ds1338-test.o
 qos-test-obj-y += tests/e1000-test.o
 qos-test-obj-y += tests/e1000e-test.o
 qos-test-obj-y += tests/eepro100-test.o
@@ -731,10 +732,12 @@ qos-test-obj-y += tests/ipoctal232-test.o
 qos-test-obj-y += tests/megasas-test.o
 qos-test-obj-y += tests/ne2000-test.o
 qos-test-obj-y += tests/nvme-test.o
+qos-test-obj-y += tests/pca9552-test.o
 qos-test-obj-y += tests/pci-test.o
 qos-test-obj-y += tests/pcnet-test.o
 qos-test-obj-y += tests/sdhci-test.o
 qos-test-obj-y += tests/spapr-phb-test.o
+qos-test-obj-y += tests/tmp105-test.o
 qos-test-obj-y += tests/usb-hcd-ohci-test.o $(libqos-usb-obj-y)
 qos-test-obj-$(CONFIG_VHOST_NET_USER) += tests/vhost-user-test.o $(chardev-obj-y) $(test-io-obj-y)
 qos-test-obj-y += tests/virtio-test.o
@@ -772,9 +775,6 @@ tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y)
 tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \
 	tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y)
 tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y)
-tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y)
-tests/pca9552-test$(EXESUF): tests/pca9552-test.o $(libqos-omap-obj-y)
-tests/ds1338-test$(EXESUF): tests/ds1338-test.o $(libqos-imx-obj-y)
 tests/microbit-test$(EXESUF): tests/microbit-test.o
 tests/m25p80-test$(EXESUF): tests/m25p80-test.o
 tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y)
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index c0e1bf57a3..aaf5396b85 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -107,6 +107,7 @@ docker-image-debian-sparc64-cross: docker-image-debian-sid
 docker-image-debian-mips64-cross: docker-image-debian-sid
 docker-image-debian-riscv64-cross: docker-image-debian-sid
 docker-image-debian-powerpc-cross: docker-image-debian-sid
+docker-image-debian-ppc64-cross: docker-image-debian-sid
 docker-image-travis: NOUSER=1
 
 # Specialist build images, sometimes very limited tools
diff --git a/tests/docker/dockerfiles/debian-ppc64-cross.docker b/tests/docker/dockerfiles/debian-ppc64-cross.docker
new file mode 100644
index 0000000000..7f239c322d
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-ppc64-cross.docker
@@ -0,0 +1,11 @@
+#
+# Docker ppc64 cross-compiler target
+#
+# This docker target builds on the debian sid base image which
+# contains cross compilers for Debian "ports" targets.
+FROM qemu:debian-sid
+
+RUN DEBIAN_FRONTEND=noninteractive eatmydata \
+    apt-get install -y --no-install-recommends \
+       gcc-powerpc64-linux-gnu \
+       libc6-dev-ppc64-cross || { echo "Failed to build - see debian-sid.docker notes"; exit 1; }
diff --git a/tests/ds1338-test.c b/tests/ds1338-test.c
index 742dad9113..f6ade9a050 100644
--- a/tests/ds1338-test.c
+++ b/tests/ds1338-test.c
@@ -21,31 +21,22 @@
 #include "libqtest.h"
 #include "libqos/i2c.h"
 
-#define IMX25_I2C_0_BASE 0x43F80000
-
 #define DS1338_ADDR 0x68
 
-static I2CAdapter *i2c;
-static uint8_t addr;
-
 static inline uint8_t bcd2bin(uint8_t x)
 {
     return ((x) & 0x0f) + ((x) >> 4) * 10;
 }
 
-static void send_and_receive(void)
+static void send_and_receive(void *obj, void *data, QGuestAllocator *alloc)
 {
-    uint8_t cmd[1];
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+
     uint8_t resp[7];
     time_t now = time(NULL);
     struct tm *tm_ptr = gmtime(&now);
 
-    /* reset the index in the RTC memory */
-    cmd[0] = 0;
-    i2c_send(i2c, addr, cmd, 1);
-
-    /* retrieve the date */
-    i2c_recv(i2c, addr, resp, 7);
+    i2c_read_block(i2cdev, 0, resp, sizeof(resp));
 
     /* check retrieved time againt local time */
     g_assert_cmpuint(bcd2bin(resp[4]), == , tm_ptr->tm_mday);
@@ -53,23 +44,15 @@ static void send_and_receive(void)
     g_assert_cmpuint(2000 + bcd2bin(resp[6]), == , 1900 + tm_ptr->tm_year);
 }
 
-int main(int argc, char **argv)
+static void ds1338_register_nodes(void)
 {
-    QTestState *s = NULL;
-    int ret;
-
-    g_test_init(&argc, &argv, NULL);
-
-    s = qtest_start("-display none -machine imx25-pdk");
-    i2c = imx_i2c_create(s, IMX25_I2C_0_BASE);
-    addr = DS1338_ADDR;
-
-    qtest_add_func("/ds1338/tx-rx", send_and_receive);
-
-    ret = g_test_run();
-
-    qtest_quit(s);
-    g_free(i2c);
-
-    return ret;
+    QOSGraphEdgeOptions opts = {
+        .extra_device_opts = "address=0x68"
+    };
+    add_qi2c_address(&opts, &(QI2CAddress) { DS1338_ADDR });
+
+    qos_node_create_driver("ds1338", i2c_device_create);
+    qos_node_consumes("ds1338", "i2c-bus", &opts);
+    qos_add_test("tx-rx", "ds1338", send_and_receive, NULL);
 }
+libqos_init(ds1338_register_nodes);
diff --git a/tests/libqos/arm-imx25-pdk-machine.c b/tests/libqos/arm-imx25-pdk-machine.c
new file mode 100644
index 0000000000..25066fb8a9
--- /dev/null
+++ b/tests/libqos/arm-imx25-pdk-machine.c
@@ -0,0 +1,92 @@
+/*
+ * libqos driver framework
+ *
+ * Copyright (c) 2019 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "libqos/malloc.h"
+#include "libqos/qgraph.h"
+#include "libqos/i2c.h"
+
+#define ARM_PAGE_SIZE            4096
+#define IMX25_PDK_RAM_START      0x80000000
+#define IMX25_PDK_RAM_END        0x88000000
+
+typedef struct QIMX25PDKMachine QIMX25PDKMachine;
+
+struct QIMX25PDKMachine {
+    QOSGraphObject obj;
+    QGuestAllocator alloc;
+    IMXI2C i2c_1;
+};
+
+static void *imx25_pdk_get_driver(void *object, const char *interface)
+{
+    QIMX25PDKMachine *machine = object;
+    if (!g_strcmp0(interface, "memory")) {
+        return &machine->alloc;
+    }
+
+    fprintf(stderr, "%s not present in arm/imx25_pdk\n", interface);
+    g_assert_not_reached();
+}
+
+static QOSGraphObject *imx25_pdk_get_device(void *obj, const char *device)
+{
+    QIMX25PDKMachine *machine = obj;
+    if (!g_strcmp0(device, "imx.i2c")) {
+        return &machine->i2c_1.obj;
+    }
+
+    fprintf(stderr, "%s not present in arm/imx25_pdk\n", device);
+    g_assert_not_reached();
+}
+
+static void imx25_pdk_destructor(QOSGraphObject *obj)
+{
+    QIMX25PDKMachine *machine = (QIMX25PDKMachine *) obj;
+    alloc_destroy(&machine->alloc);
+}
+
+static void *qos_create_machine_arm_imx25_pdk(QTestState *qts)
+{
+    QIMX25PDKMachine *machine = g_new0(QIMX25PDKMachine, 1);
+
+    alloc_init(&machine->alloc, 0,
+               IMX25_PDK_RAM_START,
+               IMX25_PDK_RAM_END,
+               ARM_PAGE_SIZE);
+    machine->obj.get_device = imx25_pdk_get_device;
+    machine->obj.get_driver = imx25_pdk_get_driver;
+    machine->obj.destructor = imx25_pdk_destructor;
+
+    imx_i2c_init(&machine->i2c_1, qts, 0x43f80000);
+    return &machine->obj;
+}
+
+static void imx25_pdk_register_nodes(void)
+{
+    QOSGraphEdgeOptions edge = {
+        .extra_device_opts = "bus=i2c-bus.0"
+    };
+    qos_node_create_machine("arm/imx25-pdk", qos_create_machine_arm_imx25_pdk);
+    qos_node_contains("arm/imx25-pdk", "imx.i2c", &edge, NULL);
+}
+
+libqos_init(imx25_pdk_register_nodes);
diff --git a/tests/libqos/arm-n800-machine.c b/tests/libqos/arm-n800-machine.c
new file mode 100644
index 0000000000..87279bdb26
--- /dev/null
+++ b/tests/libqos/arm-n800-machine.c
@@ -0,0 +1,92 @@
+/*
+ * libqos driver framework
+ *
+ * Copyright (c) 2019 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "libqos/malloc.h"
+#include "libqos/qgraph.h"
+#include "libqos/i2c.h"
+
+#define ARM_PAGE_SIZE            4096
+#define N800_RAM_START      0x80000000
+#define N800_RAM_END        0x88000000
+
+typedef struct QN800Machine QN800Machine;
+
+struct QN800Machine {
+    QOSGraphObject obj;
+    QGuestAllocator alloc;
+    OMAPI2C i2c_1;
+};
+
+static void *n800_get_driver(void *object, const char *interface)
+{
+    QN800Machine *machine = object;
+    if (!g_strcmp0(interface, "memory")) {
+        return &machine->alloc;
+    }
+
+    fprintf(stderr, "%s not present in arm/n800\n", interface);
+    g_assert_not_reached();
+}
+
+static QOSGraphObject *n800_get_device(void *obj, const char *device)
+{
+    QN800Machine *machine = obj;
+    if (!g_strcmp0(device, "omap_i2c")) {
+        return &machine->i2c_1.obj;
+    }
+
+    fprintf(stderr, "%s not present in arm/n800\n", device);
+    g_assert_not_reached();
+}
+
+static void n800_destructor(QOSGraphObject *obj)
+{
+    QN800Machine *machine = (QN800Machine *) obj;
+    alloc_destroy(&machine->alloc);
+}
+
+static void *qos_create_machine_arm_n800(QTestState *qts)
+{
+    QN800Machine *machine = g_new0(QN800Machine, 1);
+
+    alloc_init(&machine->alloc, 0,
+               N800_RAM_START,
+               N800_RAM_END,
+               ARM_PAGE_SIZE);
+    machine->obj.get_device = n800_get_device;
+    machine->obj.get_driver = n800_get_driver;
+    machine->obj.destructor = n800_destructor;
+
+    omap_i2c_init(&machine->i2c_1, qts, 0x48070000);
+    return &machine->obj;
+}
+
+static void n800_register_nodes(void)
+{
+    QOSGraphEdgeOptions edge = {
+        .extra_device_opts = "bus=i2c-bus.0"
+    };
+    qos_node_create_machine("arm/n800", qos_create_machine_arm_n800);
+    qos_node_contains("arm/n800", "omap_i2c", &edge, NULL);
+}
+
+libqos_init(n800_register_nodes);
diff --git a/tests/libqos/i2c-imx.c b/tests/libqos/i2c-imx.c
index 0945f2ecdc..f33ece55a3 100644
--- a/tests/libqos/i2c-imx.c
+++ b/tests/libqos/i2c-imx.c
@@ -30,13 +30,6 @@ enum IMXI2CDirection {
     IMX_I2C_WRITE,
 };
 
-typedef struct IMXI2C {
-    I2CAdapter parent;
-
-    uint64_t addr;
-} IMXI2C;
-
-
 static void imx_i2c_set_slave_addr(IMXI2C *s, uint8_t addr,
                                    enum IMXI2CDirection direction)
 {
@@ -47,7 +40,7 @@ static void imx_i2c_set_slave_addr(IMXI2C *s, uint8_t addr,
 static void imx_i2c_send(I2CAdapter *i2c, uint8_t addr,
                          const uint8_t *buf, uint16_t len)
 {
-    IMXI2C *s = (IMXI2C *)i2c;
+    IMXI2C *s = container_of(i2c, IMXI2C, parent);
     uint8_t data;
     uint8_t status;
     uint16_t size = 0;
@@ -107,7 +100,7 @@ static void imx_i2c_send(I2CAdapter *i2c, uint8_t addr,
 static void imx_i2c_recv(I2CAdapter *i2c, uint8_t addr,
                          uint8_t *buf, uint16_t len)
 {
-    IMXI2C *s = (IMXI2C *)i2c;
+    IMXI2C *s = container_of(i2c, IMXI2C, parent);
     uint8_t data;
     uint8_t status;
     uint16_t size = 0;
@@ -193,16 +186,31 @@ static void imx_i2c_recv(I2CAdapter *i2c, uint8_t addr,
     g_assert((status & I2SR_IBB) == 0);
 }
 
-I2CAdapter *imx_i2c_create(QTestState *qts, uint64_t addr)
+static void *imx_i2c_get_driver(void *obj, const char *interface)
 {
-    IMXI2C *s = g_malloc0(sizeof(*s));
-    I2CAdapter *i2c = (I2CAdapter *)s;
+    IMXI2C *s = obj;
+    if (!g_strcmp0(interface, "i2c-bus")) {
+        return &s->parent;
+    }
+    fprintf(stderr, "%s not present in imx-i2c\n", interface);
+    g_assert_not_reached();
+}
 
+void imx_i2c_init(IMXI2C *s, QTestState *qts, uint64_t addr)
+{
     s->addr = addr;
 
-    i2c->send = imx_i2c_send;
-    i2c->recv = imx_i2c_recv;
-    i2c->qts = qts;
+    s->obj.get_driver = imx_i2c_get_driver;
 
-    return i2c;
+    s->parent.send = imx_i2c_send;
+    s->parent.recv = imx_i2c_recv;
+    s->parent.qts = qts;
 }
+
+static void imx_i2c_register_nodes(void)
+{
+    qos_node_create_driver("imx.i2c", NULL);
+    qos_node_produces("imx.i2c", "i2c-bus");
+}
+
+libqos_init(imx_i2c_register_nodes);
diff --git a/tests/libqos/i2c-omap.c b/tests/libqos/i2c-omap.c
index 1ef6e7b200..9ae8214fa8 100644
--- a/tests/libqos/i2c-omap.c
+++ b/tests/libqos/i2c-omap.c
@@ -40,12 +40,6 @@ enum OMAPI2CCONBits {
     OMAP_I2C_CON_I2C_EN = 1 << 15,
 };
 
-typedef struct OMAPI2C {
-    I2CAdapter parent;
-
-    uint64_t addr;
-} OMAPI2C;
-
 
 static void omap_i2c_set_slave_addr(OMAPI2C *s, uint8_t addr)
 {
@@ -59,7 +53,7 @@ static void omap_i2c_set_slave_addr(OMAPI2C *s, uint8_t addr)
 static void omap_i2c_send(I2CAdapter *i2c, uint8_t addr,
                           const uint8_t *buf, uint16_t len)
 {
-    OMAPI2C *s = (OMAPI2C *)i2c;
+    OMAPI2C *s = container_of(i2c, OMAPI2C, parent);
     uint16_t data;
 
     omap_i2c_set_slave_addr(s, addr);
@@ -103,8 +97,9 @@ static void omap_i2c_send(I2CAdapter *i2c, uint8_t addr,
 static void omap_i2c_recv(I2CAdapter *i2c, uint8_t addr,
                           uint8_t *buf, uint16_t len)
 {
-    OMAPI2C *s = (OMAPI2C *)i2c;
+    OMAPI2C *s = container_of(i2c, OMAPI2C, parent);
     uint16_t data, stat;
+    uint16_t orig_len = len;
 
     omap_i2c_set_slave_addr(s, addr);
 
@@ -116,16 +111,24 @@ static void omap_i2c_recv(I2CAdapter *i2c, uint8_t addr,
            OMAP_I2C_CON_STT |
            OMAP_I2C_CON_STP;
     qtest_writew(i2c->qts, s->addr + OMAP_I2C_CON, data);
-    data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_CON);
-    g_assert((data & OMAP_I2C_CON_STP) == 0);
 
     data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_STAT);
     g_assert((data & OMAP_I2C_STAT_NACK) == 0);
 
-    data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_CNT);
-    g_assert_cmpuint(data, ==, len);
-
     while (len > 0) {
+        data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_CON);
+        if (len <= 4) {
+            g_assert((data & OMAP_I2C_CON_STP) == 0);
+
+            data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_CNT);
+            g_assert_cmpuint(data, ==, orig_len);
+        } else {
+            g_assert((data & OMAP_I2C_CON_STP) != 0);
+
+            data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_CNT);
+            g_assert_cmpuint(data, ==, len - 4);
+        }
+
         data = qtest_readw(i2c->qts, s->addr + OMAP_I2C_STAT);
         g_assert((data & OMAP_I2C_STAT_RRDY) != 0);
         g_assert((data & OMAP_I2C_STAT_ROVR) == 0);
@@ -152,21 +155,42 @@ static void omap_i2c_recv(I2CAdapter *i2c, uint8_t addr,
     g_assert((data & OMAP_I2C_CON_STP) == 0);
 }
 
-I2CAdapter *omap_i2c_create(QTestState *qts, uint64_t addr)
+static void *omap_i2c_get_driver(void *obj, const char *interface)
+{
+    OMAPI2C *s = obj;
+    if (!g_strcmp0(interface, "i2c-bus")) {
+        return &s->parent;
+    }
+    fprintf(stderr, "%s not present in omap_i2c\n", interface);
+    g_assert_not_reached();
+}
+
+static void omap_i2c_start_hw(QOSGraphObject *object)
 {
-    OMAPI2C *s = g_malloc0(sizeof(*s));
-    I2CAdapter *i2c = (I2CAdapter *)s;
+    OMAPI2C *s = (OMAPI2C *) object;
     uint16_t data;
 
+    /* verify the mmio address by looking for a known signature */
+    data = qtest_readw(s->parent.qts, s->addr + OMAP_I2C_REV);
+    g_assert_cmphex(data, ==, 0x34);
+}
+
+void omap_i2c_init(OMAPI2C *s, QTestState *qts, uint64_t addr)
+{
     s->addr = addr;
 
-    i2c->send = omap_i2c_send;
-    i2c->recv = omap_i2c_recv;
-    i2c->qts = qts;
+    s->obj.get_driver = omap_i2c_get_driver;
+    s->obj.start_hw = omap_i2c_start_hw;
 
-    /* verify the mmio address by looking for a known signature */
-    data = qtest_readw(qts, addr + OMAP_I2C_REV);
-    g_assert_cmphex(data, ==, 0x34);
+    s->parent.send = omap_i2c_send;
+    s->parent.recv = omap_i2c_recv;
+    s->parent.qts = qts;
+}
 
-    return i2c;
+static void omap_i2c_register_nodes(void)
+{
+    qos_node_create_driver("omap_i2c", NULL);
+    qos_node_produces("omap_i2c", "i2c-bus");
 }
+
+libqos_init(omap_i2c_register_nodes);
diff --git a/tests/libqos/i2c.c b/tests/libqos/i2c.c
index 23bc2a3eb2..156114e745 100644
--- a/tests/libqos/i2c.c
+++ b/tests/libqos/i2c.c
@@ -10,14 +10,76 @@
 #include "libqos/i2c.h"
 #include "libqtest.h"
 
-void i2c_send(I2CAdapter *i2c, uint8_t addr,
-              const uint8_t *buf, uint16_t len)
+void i2c_send(QI2CDevice *i2cdev, const uint8_t *buf, uint16_t len)
 {
-    i2c->send(i2c, addr, buf, len);
+    i2cdev->bus->send(i2cdev->bus, i2cdev->addr, buf, len);
 }
 
-void i2c_recv(I2CAdapter *i2c, uint8_t addr,
-              uint8_t *buf, uint16_t len)
+void i2c_recv(QI2CDevice *i2cdev, uint8_t *buf, uint16_t len)
 {
-    i2c->recv(i2c, addr, buf, len);
+    i2cdev->bus->recv(i2cdev->bus, i2cdev->addr, buf, len);
+}
+
+void i2c_read_block(QI2CDevice *i2cdev, uint8_t reg,
+                    uint8_t *buf, uint16_t len)
+{
+    i2c_send(i2cdev, &reg, 1);
+    i2c_recv(i2cdev, buf, len);
+}
+
+void i2c_write_block(QI2CDevice *i2cdev, uint8_t reg,
+                     const uint8_t *buf, uint16_t len)
+{
+    uint8_t *cmd = g_malloc(len + 1);
+    cmd[0] = reg;
+    memcpy(&cmd[1], buf, len);
+    i2c_send(i2cdev, cmd, len + 1);
+    g_free(cmd);
+}
+
+uint8_t i2c_get8(QI2CDevice *i2cdev, uint8_t reg)
+{
+    uint8_t resp[1];
+    i2c_read_block(i2cdev, reg, resp, sizeof(resp));
+    return resp[0];
+}
+
+uint16_t i2c_get16(QI2CDevice *i2cdev, uint8_t reg)
+{
+    uint8_t resp[2];
+    i2c_read_block(i2cdev, reg, resp, sizeof(resp));
+    return (resp[0] << 8) | resp[1];
+}
+
+void i2c_set8(QI2CDevice *i2cdev, uint8_t reg, uint8_t value)
+{
+    i2c_write_block(i2cdev, reg, &value, 1);
+}
+
+void i2c_set16(QI2CDevice *i2cdev, uint8_t reg, uint16_t value)
+{
+    uint8_t data[2];
+
+    data[0] = value >> 8;
+    data[1] = value & 255;
+    i2c_write_block(i2cdev, reg, data, sizeof(data));
+}
+
+void *i2c_device_create(void *i2c_bus, QGuestAllocator *alloc, void *addr)
+{
+    QI2CDevice *i2cdev = g_new0(QI2CDevice, 1);
+
+    i2cdev->bus = i2c_bus;
+    if (addr) {
+        i2cdev->addr = ((QI2CAddress *)addr)->addr;
+    }
+    return &i2cdev->obj;
+}
+
+void add_qi2c_address(QOSGraphEdgeOptions *opts, QI2CAddress *addr)
+{
+    g_assert(addr);
+
+    opts->arg = addr;
+    opts->size_arg = sizeof(QI2CAddress);
 }
diff --git a/tests/libqos/i2c.h b/tests/libqos/i2c.h
index cc01358a9f..945b65b34c 100644
--- a/tests/libqos/i2c.h
+++ b/tests/libqos/i2c.h
@@ -10,6 +10,7 @@
 #define LIBQOS_I2C_H
 
 #include "libqtest.h"
+#include "libqos/qgraph.h"
 
 typedef struct I2CAdapter I2CAdapter;
 struct I2CAdapter {
@@ -21,17 +22,61 @@ struct I2CAdapter {
     QTestState *qts;
 };
 
-#define OMAP2_I2C_1_BASE 0x48070000
+typedef struct QI2CAddress QI2CAddress;
+struct QI2CAddress {
+    uint8_t addr;
+};
+
+typedef struct QI2CDevice QI2CDevice;
+struct QI2CDevice {
+    /*
+     * For now, all devices are simple enough that there is no need for
+     * them to define their own constructor and get_driver functions.
+     * Therefore, QOSGraphObject is included directly in QI2CDevice;
+     * the tests expect to get a QI2CDevice rather than doing something
+     * like obj->get_driver("i2c-device").
+     *
+     * In fact there is no i2c-device interface even, because there are
+     * no generic I2C tests).
+     */
+    QOSGraphObject obj;
+    I2CAdapter *bus;
+    uint8_t addr;
+};
+
+void *i2c_device_create(void *i2c_bus, QGuestAllocator *alloc, void *addr);
+void add_qi2c_address(QOSGraphEdgeOptions *opts, QI2CAddress *addr);
+
+void i2c_send(QI2CDevice *dev, const uint8_t *buf, uint16_t len);
+void i2c_recv(QI2CDevice *dev, uint8_t *buf, uint16_t len);
+
+void i2c_read_block(QI2CDevice *dev, uint8_t reg,
+                    uint8_t *buf, uint16_t len);
+void i2c_write_block(QI2CDevice *dev, uint8_t reg,
+                     const uint8_t *buf, uint16_t len);
+uint8_t i2c_get8(QI2CDevice *dev, uint8_t reg);
+uint16_t i2c_get16(QI2CDevice *dev, uint8_t reg);
+void i2c_set8(QI2CDevice *dev, uint8_t reg, uint8_t value);
+void i2c_set16(QI2CDevice *dev, uint8_t reg, uint16_t value);
+
+/* i2c-omap.c */
+typedef struct OMAPI2C {
+    QOSGraphObject obj;
+    I2CAdapter parent;
+
+    uint64_t addr;
+} OMAPI2C;
+
+void omap_i2c_init(OMAPI2C *s, QTestState *qts, uint64_t addr);
 
-void i2c_send(I2CAdapter *i2c, uint8_t addr,
-              const uint8_t *buf, uint16_t len);
-void i2c_recv(I2CAdapter *i2c, uint8_t addr,
-              uint8_t *buf, uint16_t len);
+/* i2c-imx.c */
+typedef struct IMXI2C {
+    QOSGraphObject obj;
+    I2CAdapter parent;
 
-/* libi2c-omap.c */
-I2CAdapter *omap_i2c_create(QTestState *qts, uint64_t addr);
+    uint64_t addr;
+} IMXI2C;
 
-/* libi2c-imx.c */
-I2CAdapter *imx_i2c_create(QTestState *qts, uint64_t addr);
+void imx_i2c_init(IMXI2C *s, QTestState *qts, uint64_t addr);
 
 #endif
diff --git a/tests/libqos/qgraph.c b/tests/libqos/qgraph.c
index b149caaaa9..7a7ae2a19e 100644
--- a/tests/libqos/qgraph.c
+++ b/tests/libqos/qgraph.c
@@ -632,15 +632,19 @@ void qos_node_create_driver(const char *name, QOSCreateDriverFunc function)
 }
 
 void qos_node_contains(const char *container, const char *contained,
-                       ...)
+                       QOSGraphEdgeOptions *opts, ...)
 {
     va_list va;
-    va_start(va, contained);
-    QOSGraphEdgeOptions *opts;
 
+    if (opts == NULL) {
+        add_edge(container, contained, QEDGE_CONTAINS, NULL);
+        return;
+    }
+
+    va_start(va, opts);
     do {
-        opts = va_arg(va, QOSGraphEdgeOptions *);
         add_edge(container, contained, QEDGE_CONTAINS, opts);
+        opts = va_arg(va, QOSGraphEdgeOptions *);
     } while (opts != NULL);
 
     va_end(va);
diff --git a/tests/libqos/qgraph.h b/tests/libqos/qgraph.h
index e799095b30..3a25dda4b2 100644
--- a/tests/libqos/qgraph.h
+++ b/tests/libqos/qgraph.h
@@ -453,14 +453,16 @@ void qos_node_create_machine_args(const char *name,
 void qos_node_create_driver(const char *name, QOSCreateDriverFunc function);
 
 /**
- * qos_node_contains(): creates an edge of type QEDGE_CONTAINS and
- * adds it to the edge list mapped to @container in the
+ * qos_node_contains(): creates one or more edges of type QEDGE_CONTAINS
+ * and adds them to the edge list mapped to @container in the
  * edge hash table.
  *
- * This edge will have @container as source and @contained as destination.
+ * The edges will have @container as source and @contained as destination.
  *
- * It also has the possibility to add optional NULL-terminated
- * @opts parameters (see %QOSGraphEdgeOptions)
+ * If @opts is NULL, a single edge will be added with no options.
+ * If @opts is non-NULL, the arguments after @contained represent a
+ * NULL-terminated list of %QOSGraphEdgeOptions structs, and an
+ * edge will be added for each of them.
  *
  * This function can be useful when there are multiple devices
  * with the same node name contained in a machine/other node
@@ -480,7 +482,8 @@ void qos_node_create_driver(const char *name, QOSCreateDriverFunc function);
  * For contains, op1.arg and op1.size_arg represent the arg to pass
  * to @contained constructor to properly initialize it.
  */
-void qos_node_contains(const char *container, const char *contained, ...);
+void qos_node_contains(const char *container, const char *contained,
+                       QOSGraphEdgeOptions *opts, ...);
 
 /**
  * qos_node_produces(): creates an edge of type QEDGE_PRODUCES and
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 8ac0c02af4..9b9b5f37fc 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -32,6 +32,7 @@
 
 #define MAX_IRQ 256
 #define SOCKET_TIMEOUT 50
+#define SOCKET_MAX_FDS 16
 
 QTestState *global_qtest;
 
@@ -391,6 +392,40 @@ static void GCC_FMT_ATTR(2, 3) qtest_sendf(QTestState *s, const char *fmt, ...)
     va_end(ap);
 }
 
+/* Sends a message and file descriptors to the socket.
+ * It's needed for qmp-commands like getfd/add-fd */
+static void socket_send_fds(int socket_fd, int *fds, size_t fds_num,
+                            const char *buf, size_t buf_size)
+{
+    ssize_t ret;
+    struct msghdr msg = { 0 };
+    char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)] = { 0 };
+    size_t fdsize = sizeof(int) * fds_num;
+    struct cmsghdr *cmsg;
+    struct iovec iov = { .iov_base = (char *)buf, .iov_len = buf_size };
+
+    msg.msg_iov = &iov;
+    msg.msg_iovlen = 1;
+
+    if (fds && fds_num > 0) {
+        g_assert_cmpuint(fds_num, <, SOCKET_MAX_FDS);
+
+        msg.msg_control = control;
+        msg.msg_controllen = CMSG_SPACE(fdsize);
+
+        cmsg = CMSG_FIRSTHDR(&msg);
+        cmsg->cmsg_len = CMSG_LEN(fdsize);
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+        memcpy(CMSG_DATA(cmsg), fds, fdsize);
+    }
+
+    do {
+        ret = sendmsg(socket_fd, &msg, 0);
+    } while (ret < 0 && errno == EINTR);
+    g_assert_cmpint(ret, >, 0);
+}
+
 static GString *qtest_recv_line(QTestState *s)
 {
     GString *line;
@@ -545,7 +580,8 @@ QDict *qtest_qmp_receive(QTestState *s)
  * in the case that they choose to discard all replies up until
  * a particular EVENT is received.
  */
-void qmp_fd_vsend(int fd, const char *fmt, va_list ap)
+void qmp_fd_vsend_fds(int fd, int *fds, size_t fds_num,
+                      const char *fmt, va_list ap)
 {
     QObject *qobj;
 
@@ -569,25 +605,49 @@ void qmp_fd_vsend(int fd, const char *fmt, va_list ap)
             fprintf(stderr, "%s", str);
         }
         /* Send QMP request */
-        socket_send(fd, str, qstring_get_length(qstr));
+        if (fds && fds_num > 0) {
+            socket_send_fds(fd, fds, fds_num, str, qstring_get_length(qstr));
+        } else {
+            socket_send(fd, str, qstring_get_length(qstr));
+        }
 
         qobject_unref(qstr);
         qobject_unref(qobj);
     }
 }
 
+void qmp_fd_vsend(int fd, const char *fmt, va_list ap)
+{
+    qmp_fd_vsend_fds(fd, NULL, 0, fmt, ap);
+}
+
+void qtest_qmp_vsend_fds(QTestState *s, int *fds, size_t fds_num,
+                         const char *fmt, va_list ap)
+{
+    qmp_fd_vsend_fds(s->qmp_fd, fds, fds_num, fmt, ap);
+}
+
 void qtest_qmp_vsend(QTestState *s, const char *fmt, va_list ap)
 {
-    qmp_fd_vsend(s->qmp_fd, fmt, ap);
+    qmp_fd_vsend_fds(s->qmp_fd, NULL, 0, fmt, ap);
 }
 
 QDict *qmp_fdv(int fd, const char *fmt, va_list ap)
 {
-    qmp_fd_vsend(fd, fmt, ap);
+    qmp_fd_vsend_fds(fd, NULL, 0, fmt, ap);
 
     return qmp_fd_receive(fd);
 }
 
+QDict *qtest_vqmp_fds(QTestState *s, int *fds, size_t fds_num,
+                      const char *fmt, va_list ap)
+{
+    qtest_qmp_vsend_fds(s, fds, fds_num, fmt, ap);
+
+    /* Receive reply */
+    return qtest_qmp_receive(s);
+}
+
 QDict *qtest_vqmp(QTestState *s, const char *fmt, va_list ap)
 {
     qtest_qmp_vsend(s, fmt, ap);
@@ -616,6 +676,18 @@ void qmp_fd_send(int fd, const char *fmt, ...)
     va_end(ap);
 }
 
+QDict *qtest_qmp_fds(QTestState *s, int *fds, size_t fds_num,
+                     const char *fmt, ...)
+{
+    va_list ap;
+    QDict *response;
+
+    va_start(ap, fmt);
+    response = qtest_vqmp_fds(s, fds, fds_num, fmt, ap);
+    va_end(ap);
+    return response;
+}
+
 QDict *qtest_qmp(QTestState *s, const char *fmt, ...)
 {
     va_list ap;
@@ -1038,6 +1110,25 @@ QDict *qmp(const char *fmt, ...)
     return response;
 }
 
+void qmp_assert_success(const char *fmt, ...)
+{
+    va_list ap;
+    QDict *response;
+
+    va_start(ap, fmt);
+    response = qtest_vqmp(global_qtest, fmt, ap);
+    va_end(ap);
+
+    g_assert(response);
+    if (!qdict_haskey(response, "return")) {
+        QString *s = qobject_to_json_pretty(QOBJECT(response));
+        g_test_message("%s", qstring_get_str(s));
+        qobject_unref(s);
+    }
+    g_assert(qdict_haskey(response, "return"));
+    qobject_unref(response);
+}
+
 char *hmp(const char *fmt, ...)
 {
     va_list ap;
diff --git a/tests/libqtest.h b/tests/libqtest.h
index a98ea15b7d..cadf1d4a03 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -85,6 +85,21 @@ QTestState *qtest_init_with_serial(const char *extra_args, int *sock_fd);
 void qtest_quit(QTestState *s);
 
 /**
+ * qtest_qmp_fds:
+ * @s: #QTestState instance to operate on.
+ * @fds: array of file descriptors
+ * @fds_num: number of elements in @fds
+ * @fmt...: QMP message to send to qemu, formatted like
+ * qobject_from_jsonf_nofail().  See parse_escape() for what's
+ * supported after '%'.
+ *
+ * Sends a QMP message to QEMU with fds and returns the response.
+ */
+QDict *qtest_qmp_fds(QTestState *s, int *fds, size_t fds_num,
+                     const char *fmt, ...)
+    GCC_FMT_ATTR(4, 5);
+
+/**
  * qtest_qmp:
  * @s: #QTestState instance to operate on.
  * @fmt...: QMP message to send to qemu, formatted like
@@ -120,7 +135,23 @@ void qtest_qmp_send_raw(QTestState *s, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
 
 /**
- * qtest_qmpv:
+ * qtest_vqmp_fds:
+ * @s: #QTestState instance to operate on.
+ * @fds: array of file descriptors
+ * @fds_num: number of elements in @fds
+ * @fmt: QMP message to send to QEMU, formatted like
+ * qobject_from_jsonf_nofail().  See parse_escape() for what's
+ * supported after '%'.
+ * @ap: QMP message arguments
+ *
+ * Sends a QMP message to QEMU with fds and returns the response.
+ */
+QDict *qtest_vqmp_fds(QTestState *s, int *fds, size_t fds_num,
+                      const char *fmt, va_list ap)
+    GCC_FMT_ATTR(4, 0);
+
+/**
+ * qtest_vqmp:
  * @s: #QTestState instance to operate on.
  * @fmt: QMP message to send to QEMU, formatted like
  * qobject_from_jsonf_nofail().  See parse_escape() for what's
@@ -133,6 +164,22 @@ QDict *qtest_vqmp(QTestState *s, const char *fmt, va_list ap)
     GCC_FMT_ATTR(2, 0);
 
 /**
+ * qtest_qmp_vsend_fds:
+ * @s: #QTestState instance to operate on.
+ * @fds: array of file descriptors
+ * @fds_num: number of elements in @fds
+ * @fmt: QMP message to send to QEMU, formatted like
+ * qobject_from_jsonf_nofail().  See parse_escape() for what's
+ * supported after '%'.
+ * @ap: QMP message arguments
+ *
+ * Sends a QMP message to QEMU and leaves the response in the stream.
+ */
+void qtest_qmp_vsend_fds(QTestState *s, int *fds, size_t fds_num,
+                         const char *fmt, va_list ap)
+    GCC_FMT_ATTR(4, 0);
+
+/**
  * qtest_qmp_vsend:
  * @s: #QTestState instance to operate on.
  * @fmt: QMP message to send to QEMU, formatted like
@@ -619,6 +666,17 @@ static inline void qtest_end(void)
 QDict *qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
 
 /**
+ * qmp_assert_success:
+ * @fmt...: QMP message to send to qemu, formatted like
+ * qobject_from_jsonf_nofail().  See parse_escape() for what's
+ * supported after '%'.
+ *
+ * Sends a QMP message to QEMU and asserts that a 'return' key is present in
+ * the response.
+ */
+void qmp_assert_success(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+
+/*
  * qmp_eventwait:
  * @s: #event event to wait for.
  *
@@ -877,6 +935,8 @@ static inline int64_t clock_step(int64_t step)
 }
 
 QDict *qmp_fd_receive(int fd);
+void qmp_fd_vsend_fds(int fd, int *fds, size_t fds_num,
+                      const char *fmt, va_list ap) GCC_FMT_ATTR(4, 0);
 void qmp_fd_vsend(int fd, const char *fmt, va_list ap) GCC_FMT_ATTR(2, 0);
 void qmp_fd_send(int fd, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
 void qmp_fd_send_raw(int fd, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
diff --git a/tests/migration-test.c b/tests/migration-test.c
index bd3f5c3125..e0407576cb 100644
--- a/tests/migration-test.c
+++ b/tests/migration-test.c
@@ -177,6 +177,21 @@ static void stop_cb(void *opaque, const char *name, QDict *data)
 /*
  * Events can get in the way of responses we are actually waiting for.
  */
+GCC_FMT_ATTR(3, 4)
+static QDict *wait_command_fd(QTestState *who, int fd, const char *command, ...)
+{
+    va_list ap;
+
+    va_start(ap, command);
+    qtest_qmp_vsend_fds(who, &fd, 1, command, ap);
+    va_end(ap);
+
+    return qtest_qmp_receive_success(who, stop_cb, NULL);
+}
+
+/*
+ * Events can get in the way of responses we are actually waiting for.
+ */
 GCC_FMT_ATTR(2, 3)
 static QDict *wait_command(QTestState *who, const char *command, ...)
 {
@@ -480,6 +495,7 @@ static void migrate(QTestState *who, const char *uri, const char *fmt, ...)
     qdict_put_str(args, "uri", uri);
 
     rsp = qmp("{ 'execute': 'migrate', 'arguments': %p}", args);
+
     g_assert(qdict_haskey(rsp, "return"));
     qobject_unref(rsp);
 }
@@ -1027,6 +1043,90 @@ static void test_precopy_tcp(void)
     g_free(uri);
 }
 
+static void test_migrate_fd_proto(void)
+{
+    QTestState *from, *to;
+    int ret;
+    int pair[2];
+    QDict *rsp;
+    const char *error_desc;
+
+    if (test_migrate_start(&from, &to, "defer", false, false)) {
+        return;
+    }
+
+    /*
+     * We want to pick a speed slow enough that the test completes
+     * quickly, but that it doesn't complete precopy even on a slow
+     * machine, so also set the downtime.
+     */
+    /* 1 ms should make it not converge */
+    migrate_set_parameter(from, "downtime-limit", 1);
+    /* 1GB/s */
+    migrate_set_parameter(from, "max-bandwidth", 1000000000);
+
+    /* Wait for the first serial output from the source */
+    wait_for_serial("src_serial");
+
+    /* Create two connected sockets for migration */
+    ret = socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
+    g_assert_cmpint(ret, ==, 0);
+
+    /* Send the 1st socket to the target */
+    rsp = wait_command_fd(to, pair[0],
+                          "{ 'execute': 'getfd',"
+                          "  'arguments': { 'fdname': 'fd-mig' }}");
+    qobject_unref(rsp);
+    close(pair[0]);
+
+    /* Start incoming migration from the 1st socket */
+    rsp = wait_command(to, "{ 'execute': 'migrate-incoming',"
+                           "  'arguments': { 'uri': 'fd:fd-mig' }}");
+    qobject_unref(rsp);
+
+    /* Send the 2nd socket to the target */
+    rsp = wait_command_fd(from, pair[1],
+                          "{ 'execute': 'getfd',"
+                          "  'arguments': { 'fdname': 'fd-mig' }}");
+    qobject_unref(rsp);
+    close(pair[1]);
+
+    /* Start migration to the 2nd socket*/
+    migrate(from, "fd:fd-mig", "{}");
+
+    wait_for_migration_pass(from);
+
+    /* 300ms should converge */
+    migrate_set_parameter(from, "downtime-limit", 300);
+
+    if (!got_stop) {
+        qtest_qmp_eventwait(from, "STOP");
+    }
+    qtest_qmp_eventwait(to, "RESUME");
+
+    /* Test closing fds */
+    /* We assume, that QEMU removes named fd from its list,
+     * so this should fail */
+    rsp = qtest_qmp(from, "{ 'execute': 'closefd',"
+                          "  'arguments': { 'fdname': 'fd-mig' }}");
+    g_assert_true(qdict_haskey(rsp, "error"));
+    error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
+    g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
+    qobject_unref(rsp);
+
+    rsp = qtest_qmp(to, "{ 'execute': 'closefd',"
+                        "  'arguments': { 'fdname': 'fd-mig' }}");
+    g_assert_true(qdict_haskey(rsp, "error"));
+    error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
+    g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
+    qobject_unref(rsp);
+
+    /* Complete migration */
+    wait_for_serial("dest_serial");
+    wait_for_migration_complete(from);
+    test_migrate_end(from, to, true);
+}
+
 int main(int argc, char **argv)
 {
     char template[] = "/tmp/migration-test-XXXXXX";
@@ -1081,6 +1181,7 @@ int main(int argc, char **argv)
     qtest_add_func("/migration/precopy/tcp", test_precopy_tcp);
     /* qtest_add_func("/migration/ignore_shared", test_ignore_shared); */
     qtest_add_func("/migration/xbzrle/unix", test_xbzrle_unix);
+    qtest_add_func("/migration/fd_proto", test_migrate_fd_proto);
 
     ret = g_test_run();
 
diff --git a/tests/pca9552-test.c b/tests/pca9552-test.c
index 5466a67ed7..4b800d3c3e 100644
--- a/tests/pca9552-test.c
+++ b/tests/pca9552-test.c
@@ -10,107 +10,84 @@
 #include "qemu/osdep.h"
 
 #include "libqtest.h"
+#include "libqos/qgraph.h"
 #include "libqos/i2c.h"
 #include "hw/misc/pca9552_regs.h"
 
 #define PCA9552_TEST_ID   "pca9552-test"
 #define PCA9552_TEST_ADDR 0x60
 
-static I2CAdapter *i2c;
-
-static uint8_t pca9552_get8(I2CAdapter *i2c, uint8_t addr, uint8_t reg)
+static void pca9552_init(QI2CDevice *i2cdev)
 {
-    uint8_t resp[1];
-    i2c_send(i2c, addr, &reg, 1);
-    i2c_recv(i2c, addr, resp, 1);
-    return resp[0];
+    /* Switch on LEDs 0 and 12 */
+    i2c_set8(i2cdev, PCA9552_LS0, 0x54);
+    i2c_set8(i2cdev, PCA9552_LS3, 0x54);
 }
 
-static void pca9552_set8(I2CAdapter *i2c, uint8_t addr, uint8_t reg,
-                         uint8_t value)
-{
-    uint8_t cmd[2];
-    uint8_t resp[1];
-
-    cmd[0] = reg;
-    cmd[1] = value;
-    i2c_send(i2c, addr, cmd, 2);
-    i2c_recv(i2c, addr, resp, 1);
-    g_assert_cmphex(resp[0], ==, cmd[1]);
-}
-
-static void receive_autoinc(void)
+static void receive_autoinc(void *obj, void *data, QGuestAllocator *alloc)
 {
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
     uint8_t resp;
     uint8_t reg = PCA9552_LS0 | PCA9552_AUTOINC;
 
-    i2c_send(i2c, PCA9552_TEST_ADDR, &reg, 1);
+    pca9552_init(i2cdev);
+
+    i2c_send(i2cdev, &reg, 1);
 
     /* PCA9552_LS0 */
-    i2c_recv(i2c, PCA9552_TEST_ADDR, &resp, 1);
+    i2c_recv(i2cdev, &resp, 1);
     g_assert_cmphex(resp, ==, 0x54);
 
     /* PCA9552_LS1 */
-    i2c_recv(i2c, PCA9552_TEST_ADDR, &resp, 1);
+    i2c_recv(i2cdev, &resp, 1);
     g_assert_cmphex(resp, ==, 0x55);
 
     /* PCA9552_LS2 */
-    i2c_recv(i2c, PCA9552_TEST_ADDR, &resp, 1);
+    i2c_recv(i2cdev, &resp, 1);
     g_assert_cmphex(resp, ==, 0x55);
 
     /* PCA9552_LS3 */
-    i2c_recv(i2c, PCA9552_TEST_ADDR, &resp, 1);
+    i2c_recv(i2cdev, &resp, 1);
     g_assert_cmphex(resp, ==, 0x54);
 }
 
-static void send_and_receive(void)
+static void send_and_receive(void *obj, void *data, QGuestAllocator *alloc)
 {
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
     uint8_t value;
 
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_LS0);
+    value = i2c_get8(i2cdev, PCA9552_LS0);
     g_assert_cmphex(value, ==, 0x55);
 
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_INPUT0);
+    value = i2c_get8(i2cdev, PCA9552_INPUT0);
     g_assert_cmphex(value, ==, 0x0);
 
-    /* Switch on LED 0 */
-    pca9552_set8(i2c, PCA9552_TEST_ADDR, PCA9552_LS0, 0x54);
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_LS0);
+    pca9552_init(i2cdev);
+
+    value = i2c_get8(i2cdev, PCA9552_LS0);
     g_assert_cmphex(value, ==, 0x54);
 
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_INPUT0);
+    value = i2c_get8(i2cdev, PCA9552_INPUT0);
     g_assert_cmphex(value, ==, 0x01);
 
-    /* Switch on LED 12 */
-    pca9552_set8(i2c, PCA9552_TEST_ADDR, PCA9552_LS3, 0x54);
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_LS3);
+    value = i2c_get8(i2cdev, PCA9552_LS3);
     g_assert_cmphex(value, ==, 0x54);
 
-    value = pca9552_get8(i2c, PCA9552_TEST_ADDR, PCA9552_INPUT1);
+    value = i2c_get8(i2cdev, PCA9552_INPUT1);
     g_assert_cmphex(value, ==, 0x10);
 }
 
-int main(int argc, char **argv)
+static void pca9552_register_nodes(void)
 {
-    QTestState *s = NULL;
-    int ret;
-
-    g_test_init(&argc, &argv, NULL);
-
-    s = qtest_start("-machine n800 "
-                    "-device pca9552,bus=i2c-bus.0,id=" PCA9552_TEST_ID
-                    ",address=0x60");
-    i2c = omap_i2c_create(s, OMAP2_I2C_1_BASE);
-
-    qtest_add_func("/pca9552/tx-rx", send_and_receive);
-    qtest_add_func("/pca9552/rx-autoinc", receive_autoinc);
-
-    ret = g_test_run();
+    QOSGraphEdgeOptions opts = {
+        .extra_device_opts = "address=0x60"
+    };
+    add_qi2c_address(&opts, &(QI2CAddress) { 0x60 });
 
-    if (s) {
-        qtest_quit(s);
-    }
-    g_free(i2c);
+    qos_node_create_driver("pca9552", i2c_device_create);
+    qos_node_consumes("pca9552", "i2c-bus", &opts);
 
-    return ret;
+    qos_add_test("tx-rx", "pca9552", send_and_receive, NULL);
+    qos_add_test("rx-autoinc", "pca9552", receive_autoinc, NULL);
 }
+libqos_init(pca9552_register_nodes);
diff --git a/tests/perf/block/qcow2/convert-blockstatus b/tests/perf/block/qcow2/convert-blockstatus
new file mode 100755
index 0000000000..a1a3c1ef43
--- /dev/null
+++ b/tests/perf/block/qcow2/convert-blockstatus
@@ -0,0 +1,71 @@
+#!/bin/bash
+#
+# Test lseek influence on qcow2 block-status
+#
+# Block layer may recursively check block_status in file child of qcow2, if
+# qcow2 driver returned DATA. There are several test cases to check influence
+# of lseek on block_status performance. To see real difference run on tmpfs.
+#
+# Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved.
+#
+# Tests originally written by Kevin Wolf
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+if [ "$#" -lt 1 ]; then
+    echo "Usage: $0 SOURCE_FILE"
+    exit 1
+fi
+
+ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../../.." >/dev/null 2>&1 && pwd )"
+QEMU_IMG="$ROOT_DIR/qemu-img"
+QEMU_IO="$ROOT_DIR/qemu-io"
+
+size=1G
+src="$1"
+
+# test-case plain
+
+(
+$QEMU_IMG create -f qcow2 "$src" $size
+for i in $(seq 16384 -1 0); do
+    echo "write $((i * 65536)) 64k"
+done | $QEMU_IO "$src"
+) > /dev/null
+
+echo -n "plain: "
+/usr/bin/time -f %e $QEMU_IMG convert -n "$src" null-co://
+
+# test-case forward
+
+(
+$QEMU_IMG create -f qcow2 "$src" $size
+for i in $(seq 0 2 16384); do
+    echo "write $((i * 65536)) 64k"
+done | $QEMU_IO "$src"
+for i in $(seq 1 2 16384); do
+    echo "write $((i * 65536)) 64k"
+done | $QEMU_IO "$src"
+) > /dev/null
+
+echo -n "forward: "
+/usr/bin/time -f %e $QEMU_IMG convert -n "$src" null-co://
+
+# test-case prealloc
+
+$QEMU_IMG create -f qcow2 -o preallocation=metadata "$src" $size > /dev/null
+
+echo -n "prealloc: "
+/usr/bin/time -f %e $QEMU_IMG convert -n "$src" null-co://
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index a3deb1fcad..200660f977 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -192,6 +192,30 @@ case "$QEMU_DEFAULT_MACHINE" in
 esac
 
 echo
+echo === Attach to node in non-default iothread ===
+echo
+
+case "$QEMU_DEFAULT_MACHINE" in
+    pc)
+        iothread="-drive file=$TEST_IMG,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on"
+
+        # Can't add a device in the main thread while virtio-scsi0 uses the node
+        run_qemu $iothread -device ide-hd,drive=disk,share-rw=on
+        run_qemu $iothread -device virtio-blk-pci,drive=disk,share-rw=on
+        run_qemu $iothread -device lsi53c895a,id=lsi0 -device scsi-hd,bus=lsi0.0,drive=disk,share-rw=on
+        run_qemu $iothread -device virtio-scsi,id=virtio-scsi1 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
+
+        # virtio-blk enables the iothread only when the driver initialises the
+        # device, so a second virtio-blk device can't be added even with the
+        # same iothread. virtio-scsi allows this.
+        run_qemu $iothread -device virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on
+        run_qemu $iothread -device virtio-scsi,id=virtio-scsi1,iothread=thread0 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
+        ;;
+     *)
+        ;;
+esac
+
+echo
 echo === Read-only ===
 echo
 
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index 9f1cf22608..8993835b94 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -137,6 +137,9 @@ QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) QEMU_PROG: -drive if=virtio: Device needs media, but drive is empty
 
 
+=== Attach to node in non-default iothread ===
+
+
 === Read-only ===
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=virtio,readonly=on
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index c4743cc31c..2d811c166c 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -173,6 +173,33 @@ QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) QEMU_PROG: -device scsi-hd,drive=disk: Device needs media, but drive is empty
 
 
+=== Attach to node in non-default iothread ===
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device ide-hd,drive=disk,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) QEMU_PROG: -device ide-hd,drive=disk,share-rw=on: Cannot change iothread of active block backend
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) QEMU_PROG: -device virtio-blk-pci,drive=disk,share-rw=on: Cannot change iothread of active block backend
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device lsi53c895a,id=lsi0 -device scsi-hd,bus=lsi0.0,drive=disk,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) QEMU_PROG: -device scsi-hd,bus=lsi0.0,drive=disk,share-rw=on: HBA does not support iothreads
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) QEMU_PROG: -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on: Cannot change iothread of active block backend
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) QEMU_PROG: -device virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on: Cannot change iothread of active block backend
+
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1,iothread=thread0 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) quit
+
+
 === Read-only ===
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=floppy,readonly=on
diff --git a/tests/qemu-iotests/056 b/tests/qemu-iotests/056
index 3df323984d..f40fc11a09 100755
--- a/tests/qemu-iotests/056
+++ b/tests/qemu-iotests/056
@@ -214,7 +214,7 @@ class BackupTest(iotests.QMPTestCase):
         res = self.vm.qmp('query-block-jobs')
         self.assert_qmp(res, 'return[0]/status', 'concluded')
         # Leave zombie job un-dismissed, observe a failure:
-        res = self.qmp_backup_and_wait(serror='Need a root block node',
+        res = self.qmp_backup_and_wait(serror="Node 'drive0' is busy: block device is in use by block job: backup",
                                        device='drive0', format=iotests.imgfmt,
                                        sync='full', target=self.dest_img,
                                        auto_dismiss=False)
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 89e911400c..b91d8321bb 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -150,10 +150,15 @@ $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
 echo
 echo "=== Testing overlap while COW is in flight ==="
 echo
+BACKING_IMG=$TEST_IMG.base
+TEST_IMG=$BACKING_IMG _make_test_img 1G
+
+$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
+
 # compat=0.10 is required in order to make the following discard actually
 # unallocate the sector rather than make it a zero sector - we want COW, after
 # all.
-IMGOPTS='compat=0.10' _make_test_img 1G
+IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G
 # Write two clusters, the second one enforces creation of an L2 table after
 # the first data cluster.
 $QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index e42bf8c5a9..0f6b0658a1 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -97,7 +97,10 @@ read 512/512 bytes at offset 0
 
 === Testing overlap while COW is in flight ===
 
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1073741824
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 backing_file=TEST_DIR/t.IMGFMT.base
 wrote 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 65536/65536 bytes at offset 536870912
diff --git a/tests/qemu-iotests/102 b/tests/qemu-iotests/102
index 749ff66b8a..b898df436f 100755
--- a/tests/qemu-iotests/102
+++ b/tests/qemu-iotests/102
@@ -55,7 +55,7 @@ $QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
 $QEMU_IMG resize -f raw --shrink "$TEST_IMG" $((5 * 64 * 1024))
 
 $QEMU_IO -c map "$TEST_IMG"
-$QEMU_IMG map "$TEST_IMG"
+$QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map
 
 echo
 echo '=== Testing map on an image file truncated outside of qemu ==='
diff --git a/tests/qemu-iotests/102.out b/tests/qemu-iotests/102.out
index 4401b08fee..cd2fdc7f96 100644
--- a/tests/qemu-iotests/102.out
+++ b/tests/qemu-iotests/102.out
@@ -7,7 +7,8 @@ wrote 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image resized.
 64 KiB (0x10000) bytes     allocated at offset 0 bytes (0x0)
-Offset          Length          Mapped to       File
+Offset          Length          File
+0               0x10000         TEST_DIR/t.IMGFMT
 
 === Testing map on an image file truncated outside of qemu ===
 
diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out
index 41c7291258..4d71d9dcae 100644
--- a/tests/qemu-iotests/141.out
+++ b/tests/qemu-iotests/141.out
@@ -42,9 +42,9 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
+{"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
-{"return": {}}
 {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
diff --git a/tests/qemu-iotests/144.out b/tests/qemu-iotests/144.out
index 55299201e4..a9a8216bea 100644
--- a/tests/qemu-iotests/144.out
+++ b/tests/qemu-iotests/144.out
@@ -14,10 +14,10 @@ Formatting 'TEST_DIR/tmp.qcow2', fmt=qcow2 size=536870912 backing_file=TEST_DIR/
 
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "virtio0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "virtio0"}}
+{"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "virtio0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
 {"return": {}}
-{"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "virtio0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "virtio0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240
index b4cf95096d..5be6b9c0f7 100755
--- a/tests/qemu-iotests/240
+++ b/tests/qemu-iotests/240
@@ -27,6 +27,12 @@ echo "QA output created by $seq"
 
 status=1	# failure is the default!
 
+_cleanup()
+{
+    rm -f "$TEST_DIR/nbd"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
 # get standard environment, filters and checks
 . ./common.rc
 . ./common.filter
@@ -122,6 +128,21 @@ run_qemu <<EOF
 { "execute": "quit"}
 EOF
 
+echo
+echo === Attach a SCSI disks using the same block device as a NBD server ===
+echo
+
+run_qemu <<EOF
+{ "execute": "qmp_capabilities" }
+{ "execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "hd0", "read-only": true}}
+{ "execute": "nbd-server-start", "arguments": {"addr":{"type":"unix","data":{"path":"$TEST_DIR/nbd"}}}}
+{ "execute": "nbd-server-add", "arguments": {"device":"hd0"}}
+{ "execute": "object-add", "arguments": {"qom-type": "iothread", "id": "iothread0"}}
+{ "execute": "device_add", "arguments": {"id": "scsi0", "driver": "${virtio_scsi}", "iothread": "iothread0"}}
+{ "execute": "device_add", "arguments": {"id": "scsi-hd0", "driver": "scsi-hd", "drive": "hd0", "bus": "scsi0.0"}}
+{ "execute": "quit"}
+EOF
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/240.out b/tests/qemu-iotests/240.out
index d76392966c..d00df50297 100644
--- a/tests/qemu-iotests/240.out
+++ b/tests/qemu-iotests/240.out
@@ -43,7 +43,20 @@ QMP_VERSION
 {"return": {}}
 {"return": {}}
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Cannot attach a blockdev that is using a different iothread"}}
+{"error": {"class": "GenericError", "desc": "Cannot change iothread of active block backend"}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+=== Attach a SCSI disks using the same block device as a NBD server ===
+
+Testing:
+QMP_VERSION
+{"return": {}}
 {"return": {}}
 {"return": {}}
 {"return": {}}
diff --git a/tests/qemu-iotests/250 b/tests/qemu-iotests/250
new file mode 100755
index 0000000000..c9c0a84a5a
--- /dev/null
+++ b/tests/qemu-iotests/250
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+#
+# Test big discard in qcow2 shrink
+#
+# Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=vsementsov@virtuozzo.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+# This test checks that qcow2_process_discards does not truncate a discard
+# request > 2G.
+# To reproduce bug we need to overflow int by one sequential discard, so we
+# need size > 2G, bigger cluster size (as with default 64k we may have maximum
+# of 512M sequential data, corresponding to one L1 entry), and we need some
+# data of the beginning of the disk mapped to the end of file to prevent
+# bdrv_co_truncate(bs->file) call in qcow2_co_truncate(), which might succeed
+# anyway.
+
+disk_usage()
+{
+    du --block-size=1 $1 | awk '{print $1}'
+}
+
+size=2100M
+IMGOPTS="cluster_size=1M,preallocation=metadata"
+
+_make_test_img $size
+$QEMU_IO -c 'discard 0 10M' -c 'discard 2090M 10M' \
+         -c 'write 2090M 10M' -c 'write 0 10M' "$TEST_IMG" | _filter_qemu_io
+
+# Check that our trick with swapping first and last 10M chunks succeeded.
+# Otherwise test may pass even if bdrv_pdiscard() fails in
+# qcow2_process_discards()
+$QEMU_IMG map "$TEST_IMG" | _filter_testdir
+
+before=$(disk_usage "$TEST_IMG")
+$QEMU_IMG resize --shrink "$TEST_IMG" 5M
+after=$(disk_usage "$TEST_IMG")
+
+echo "Disk usage delta: $((before - after))"
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/250.out b/tests/qemu-iotests/250.out
new file mode 100644
index 0000000000..f480fd273b
--- /dev/null
+++ b/tests/qemu-iotests/250.out
@@ -0,0 +1,16 @@
+QA output created by 250
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 preallocation=metadata
+discard 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 10485760/10485760 bytes at offset 2191523840
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 10485760/10485760 bytes at offset 2191523840
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          Mapped to       File
+0               0xa00000        0x82f00000      TEST_DIR/t.qcow2
+0x82a00000      0xa00000        0x500000        TEST_DIR/t.qcow2
+Image resized.
+Disk usage delta: 15728640
+*** done
diff --git a/tests/qemu-iotests/254 b/tests/qemu-iotests/254
new file mode 100755
index 0000000000..33cb80a512
--- /dev/null
+++ b/tests/qemu-iotests/254
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+#
+# Test external snapshot with bitmap copying.
+#
+# Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import iotests
+from iotests import qemu_img_create, file_path, log
+
+disk, top = file_path('disk', 'top')
+size = 1024 * 1024
+
+qemu_img_create('-f', iotests.imgfmt, disk, str(size))
+
+vm = iotests.VM().add_drive(disk, opts='node-name=base')
+vm.launch()
+
+vm.qmp_log('block-dirty-bitmap-add', node='drive0', name='bitmap0')
+
+vm.hmp_qemu_io('drive0', 'write 0 512K')
+
+vm.qmp_log('transaction', indent=2, actions=[
+    {'type': 'blockdev-snapshot-sync',
+     'data': {'device': 'drive0', 'snapshot-file': top,
+              'snapshot-node-name': 'snap'}},
+    {'type': 'block-dirty-bitmap-add',
+     'data': {'node': 'snap', 'name': 'bitmap0'}},
+    {'type': 'block-dirty-bitmap-merge',
+     'data': {'node': 'snap', 'target': 'bitmap0',
+              'bitmaps': [{'node': 'base', 'name': 'bitmap0'}]}}
+], filters=[iotests.filter_qmp_testfiles])
+
+result = vm.qmp('query-block')['return'][0]
+log("query-block: device = {}, node-name = {}, dirty-bitmaps:".format(
+    result['device'], result['inserted']['node-name']))
+log(result['dirty-bitmaps'], indent=2)
+
+vm.shutdown()
diff --git a/tests/qemu-iotests/254.out b/tests/qemu-iotests/254.out
new file mode 100644
index 0000000000..d7394cf002
--- /dev/null
+++ b/tests/qemu-iotests/254.out
@@ -0,0 +1,52 @@
+{"execute": "block-dirty-bitmap-add", "arguments": {"name": "bitmap0", "node": "drive0"}}
+{"return": {}}
+{
+  "execute": "transaction",
+  "arguments": {
+    "actions": [
+      {
+        "data": {
+          "device": "drive0",
+          "snapshot-file": "TEST_DIR/PID-top",
+          "snapshot-node-name": "snap"
+        },
+        "type": "blockdev-snapshot-sync"
+      },
+      {
+        "data": {
+          "name": "bitmap0",
+          "node": "snap"
+        },
+        "type": "block-dirty-bitmap-add"
+      },
+      {
+        "data": {
+          "bitmaps": [
+            {
+              "name": "bitmap0",
+              "node": "base"
+            }
+          ],
+          "node": "snap",
+          "target": "bitmap0"
+        },
+        "type": "block-dirty-bitmap-merge"
+      }
+    ]
+  }
+}
+{
+  "return": {}
+}
+query-block: device = drive0, node-name = snap, dirty-bitmaps:
+[
+  {
+    "busy": false,
+    "count": 524288,
+    "granularity": 65536,
+    "name": "bitmap0",
+    "persistent": false,
+    "recording": true,
+    "status": "active"
+  }
+]
diff --git a/tests/qemu-iotests/255 b/tests/qemu-iotests/255
new file mode 100755
index 0000000000..49433ec122
--- /dev/null
+++ b/tests/qemu-iotests/255
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+#
+# Test commit job graph modifications while requests are active
+#
+# Copyright (C) 2019 Red Hat, Inc.
+#
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import iotests
+from iotests import imgfmt
+
+iotests.verify_image_format(supported_fmts=['qcow2'])
+
+def blockdev_create(vm, options):
+    result = vm.qmp_log('blockdev-create',
+                        filters=[iotests.filter_qmp_testfiles],
+                        job_id='job0', options=options)
+
+    if 'return' in result:
+        assert result['return'] == {}
+        vm.run_job('job0')
+    iotests.log("")
+
+iotests.log('Finishing a commit job with background reads')
+iotests.log('============================================')
+iotests.log('')
+
+with iotests.FilePath('t.qcow2') as disk_path, \
+     iotests.FilePath('t.qcow2.mid') as mid_path, \
+     iotests.FilePath('t.qcow2.base') as base_path, \
+     iotests.VM() as vm:
+
+    iotests.log("=== Create backing chain and start VM ===")
+    iotests.log("")
+
+    size = 128 * 1024 * 1024
+    size_str = str(size)
+
+    iotests.create_image(base_path, size)
+    iotests.qemu_img_log('create', '-f', iotests.imgfmt, mid_path, size_str)
+    iotests.qemu_img_log('create', '-f', iotests.imgfmt, disk_path, size_str)
+
+    # Create a backing chain like this:
+    # base <- [throttled: bps-read=4096] <- mid <- overlay
+
+    vm.add_object('throttle-group,x-bps-read=4096,id=throttle0')
+    vm.add_blockdev('file,filename=%s,node-name=base' % (base_path))
+    vm.add_blockdev('throttle,throttle-group=throttle0,file=base,node-name=throttled')
+    vm.add_blockdev('file,filename=%s,node-name=mid-file' % (mid_path))
+    vm.add_blockdev('qcow2,file=mid-file,node-name=mid,backing=throttled')
+    vm.add_drive_raw('if=none,id=overlay,driver=qcow2,file=%s,backing=mid' % (disk_path))
+
+    vm.launch()
+
+    iotests.log("=== Start background read requests ===")
+    iotests.log("")
+
+    def start_requests():
+        vm.hmp_qemu_io('overlay', 'aio_read 0 4k')
+        vm.hmp_qemu_io('overlay', 'aio_read 0 4k')
+
+    start_requests()
+
+    iotests.log("=== Run a commit job ===")
+    iotests.log("")
+
+    result = vm.qmp_log('block-commit', job_id='job0', auto_finalize=False,
+                        device='overlay', top_node='mid')
+
+    vm.run_job('job0', auto_finalize=False, pre_finalize=start_requests,
+                auto_dismiss=True)
+
+    vm.shutdown()
+
+iotests.log('')
+iotests.log('Closing the VM while a job is being cancelled')
+iotests.log('=============================================')
+iotests.log('')
+
+with iotests.FilePath('src.qcow2') as src_path, \
+     iotests.FilePath('dst.qcow2') as dst_path, \
+     iotests.VM() as vm:
+
+    iotests.log('=== Create images and start VM ===')
+    iotests.log('')
+
+    size = 128 * 1024 * 1024
+    size_str = str(size)
+
+    iotests.qemu_img_log('create', '-f', iotests.imgfmt, src_path, size_str)
+    iotests.qemu_img_log('create', '-f', iotests.imgfmt, dst_path, size_str)
+
+    iotests.log(iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write 0 1M',
+                                src_path),
+                filters=[iotests.filter_test_dir, iotests.filter_qemu_io])
+
+    vm.add_object('throttle-group,x-bps-read=4096,id=throttle0')
+
+    vm.add_blockdev('file,node-name=src-file,filename=%s' % (src_path))
+    vm.add_blockdev('%s,node-name=src,file=src-file' % (iotests.imgfmt))
+
+    vm.add_blockdev('file,node-name=dst-file,filename=%s' % (dst_path))
+    vm.add_blockdev('%s,node-name=dst,file=dst-file' % (iotests.imgfmt))
+
+    vm.add_blockdev('throttle,node-name=src-throttled,' +
+                    'throttle-group=throttle0,file=src')
+
+    vm.add_device('virtio-blk,drive=src-throttled')
+
+    vm.launch()
+
+    iotests.log('=== Start a mirror job ===')
+    iotests.log('')
+
+    vm.qmp_log('blockdev-mirror', job_id='job0', device='src-throttled',
+                                  target='dst', sync='full')
+
+    vm.qmp_log('block-job-cancel', device='job0')
+    vm.qmp_log('quit')
+
+    vm.shutdown()
diff --git a/tests/qemu-iotests/255.out b/tests/qemu-iotests/255.out
new file mode 100644
index 0000000000..348909fdef
--- /dev/null
+++ b/tests/qemu-iotests/255.out
@@ -0,0 +1,40 @@
+Finishing a commit job with background reads
+============================================
+
+=== Create backing chain and start VM ===
+
+Formatting 'TEST_DIR/PID-t.qcow2.mid', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16
+
+Formatting 'TEST_DIR/PID-t.qcow2', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16
+
+=== Start background read requests ===
+
+=== Run a commit job ===
+
+{"execute": "block-commit", "arguments": {"auto-finalize": false, "device": "overlay", "job-id": "job0", "top-node": "mid"}}
+{"return": {}}
+{"execute": "job-finalize", "arguments": {"id": "job0"}}
+{"return": {}}
+{"data": {"id": "job0", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"device": "job0", "len": 134217728, "offset": 134217728, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+
+Closing the VM while a job is being cancelled
+=============================================
+
+=== Create images and start VM ===
+
+Formatting 'TEST_DIR/PID-src.qcow2', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16
+
+Formatting 'TEST_DIR/PID-dst.qcow2', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16
+
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Start a mirror job ===
+
+{"execute": "blockdev-mirror", "arguments": {"device": "src-throttled", "job-id": "job0", "sync": "full", "target": "dst"}}
+{"return": {}}
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
+{"return": {}}
+{"execute": "quit", "arguments": {}}
+{"return": {}}
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 95162c6cf9..44ebf24080 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -876,7 +876,6 @@ do
                     fi
                 else
                     mv $tmp.out $seq.out.bad
-                    $diff -w "$reference" "$PWD"/$seq.out.bad
                     status="fail"
                     results="output mismatch (see $seq.out.bad)"
                     printdiff=true
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 2c74deec00..f3b6d601b2 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -262,5 +262,8 @@
 247 rw quick
 248 rw quick
 249 rw auto quick
+250 rw auto quick
 252 rw auto backing quick
 253 rw auto quick
+254 rw auto backing quick
+255 rw auto quick
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 7bde380d96..6bcddd8870 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -126,6 +126,11 @@ def qemu_img_pipe(*args):
         sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args))))
     return subp.communicate()[0]
 
+def qemu_img_log(*args):
+    result = qemu_img_pipe(*args)
+    log(result, filters=[filter_testfiles])
+    return result
+
 def img_info_log(filename, filter_path=None, imgopts=False, extra_args=[]):
     args = [ 'info' ]
     if imgopts:
@@ -533,7 +538,8 @@ class VM(qtest.QEMUQtestMachine):
         return result
 
     # Returns None on success, and an error string on failure
-    def run_job(self, job, auto_finalize=True, auto_dismiss=False):
+    def run_job(self, job, auto_finalize=True, auto_dismiss=False,
+                pre_finalize=None):
         error = None
         while True:
             for ev in self.get_qmp_events_filtered(wait=True):
@@ -546,6 +552,8 @@ class VM(qtest.QEMUQtestMachine):
                                 error = j['error']
                                 log('Job failed: %s' % (j['error']))
                     elif status == 'pending' and not auto_finalize:
+                        if pre_finalize:
+                            pre_finalize()
                         self.qmp_log('job-finalize', id=job)
                     elif status == 'concluded' and not auto_dismiss:
                         self.qmp_log('job-dismiss', id=job)
diff --git a/tests/qos-test.c b/tests/qos-test.c
index ae2fb5de1c..01b2a22c08 100644
--- a/tests/qos-test.c
+++ b/tests/qos-test.c
@@ -340,7 +340,8 @@ static void walk_path(QOSGraphNode *orig_path, int len)
     char **path_vec = g_new0(char *, (QOS_PATH_MAX_ELEMENT_SIZE * 2));
     int path_vec_size = 0;
 
-    char *after_cmd = NULL, *before_cmd = NULL, *after_device = NULL;
+    char *after_cmd, *before_cmd, *after_device;
+    GString *after_device_str = g_string_new("");
     char *node_name = orig_path->name, *path_str;
 
     GString *cmd_line = g_string_new("");
@@ -363,9 +364,8 @@ static void walk_path(QOSGraphNode *orig_path, int len)
         /* append node command line + previous edge command line */
         if (path->command_line && etype == QEDGE_CONSUMED_BY) {
             g_string_append(cmd_line, path->command_line);
-            if (after_device) {
-                g_string_append(cmd_line, after_device);
-            }
+            g_string_append(cmd_line, after_device_str->str);
+            g_string_truncate(after_device_str, 0);
         }
 
         path_vec[path_vec_size++] = qos_graph_edge_get_name(path->path_edge);
@@ -382,12 +382,15 @@ static void walk_path(QOSGraphNode *orig_path, int len)
         if (after_cmd) {
             g_string_append(cmd_line2, after_cmd);
         }
+        if (after_device) {
+            g_string_append(after_device_str, after_device);
+        }
     }
 
     path_vec[path_vec_size++] = NULL;
-    if (after_device) {
-        g_string_append(cmd_line, after_device);
-    }
+    g_string_append(cmd_line, after_device_str->str);
+    g_string_free(after_device_str, true);
+
     g_string_append(cmd_line, cmd_line2->str);
     g_string_free(cmd_line2, true);
 
diff --git a/tests/tcg/mips/include/test_utils_128.h b/tests/tcg/mips/include/test_utils_128.h
index cfd7ad3188..a828416d4f 100644
--- a/tests/tcg/mips/include/test_utils_128.h
+++ b/tests/tcg/mips/include/test_utils_128.h
@@ -66,7 +66,7 @@ static inline int32_t check_results(const char *instruction_name,
         }
     }
 
-    printf("PASS: %3d   FAIL: %3d   elapsed time: %5.2f ms\n",
+    printf("\tPASS: %3d \tFAIL: %3d \telapsed time: %5.2f ms\n",
            pass_count, fail_count, elapsed_time);
 
     if (fail_count > 0) {
diff --git a/tests/tcg/mips/include/wrappers_msa.h b/tests/tcg/mips/include/wrappers_msa.h
index 254e215b8a..6feda368dd 100644
--- a/tests/tcg/mips/include/wrappers_msa.h
+++ b/tests/tcg/mips/include/wrappers_msa.h
@@ -24,7 +24,8 @@
 
 
 #define DO_MSA__WD__WS(suffix, mnemonic)                               \
-static inline void do_msa_##suffix(void *input, void *output)          \
+static inline void do_msa_##suffix(const void *input,                  \
+                                   const void *output)                 \
 {                                                                      \
    __asm__ volatile (                                                  \
       "move $t0, %0\n\t"                                               \
@@ -39,7 +40,8 @@ static inline void do_msa_##suffix(void *input, void *output)          \
 }
 
 #define DO_MSA__WD__WD(suffix, mnemonic)                               \
-static inline void do_msa_##suffix(void *input, void *output)          \
+static inline void do_msa_##suffix(const void *input,                  \
+                                   const void *output)                 \
 {                                                                      \
    __asm__ volatile (                                                  \
       "move $t0, %0\n\t"                                               \
@@ -70,8 +72,9 @@ DO_MSA__WD__WS(PCNT_D, pcnt.d)
 
 
 #define DO_MSA__WD__WS_WT(suffix, mnemonic)                            \
-static inline void do_msa_##suffix(void *input1, void *input2,         \
-                                   void *output)                       \
+static inline void do_msa_##suffix(const void *input1,                 \
+                                   const void *input2,                 \
+                                   const void *output)                 \
 {                                                                      \
    __asm__ volatile (                                                  \
       "move $t0, %0\n\t"                                               \
@@ -258,6 +261,142 @@ DO_MSA__WD__WS_WT(SRLR_H, srlr.h)
 DO_MSA__WD__WS_WT(SRLR_W, srlr.w)
 DO_MSA__WD__WS_WT(SRLR_D, srlr.d)
 
+DO_MSA__WD__WS_WT(ADD_A_B, add_a.b)
+DO_MSA__WD__WS_WT(ADD_A_H, add_a.h)
+DO_MSA__WD__WS_WT(ADD_A_W, add_a.w)
+DO_MSA__WD__WS_WT(ADD_A_D, add_a.d)
+
+DO_MSA__WD__WS_WT(ADDS_A_B, adds_a.b)
+DO_MSA__WD__WS_WT(ADDS_A_H, adds_a.h)
+DO_MSA__WD__WS_WT(ADDS_A_W, adds_a.w)
+DO_MSA__WD__WS_WT(ADDS_A_D, adds_a.d)
+
+DO_MSA__WD__WS_WT(ADDS_S_B, adds_s.b)
+DO_MSA__WD__WS_WT(ADDS_S_H, adds_s.h)
+DO_MSA__WD__WS_WT(ADDS_S_W, adds_s.w)
+DO_MSA__WD__WS_WT(ADDS_S_D, adds_s.d)
+
+DO_MSA__WD__WS_WT(ADDS_U_B, adds_u.b)
+DO_MSA__WD__WS_WT(ADDS_U_H, adds_u.h)
+DO_MSA__WD__WS_WT(ADDS_U_W, adds_u.w)
+DO_MSA__WD__WS_WT(ADDS_U_D, adds_u.d)
+
+DO_MSA__WD__WS_WT(ADDV_B, addv.b)
+DO_MSA__WD__WS_WT(ADDV_H, addv.h)
+DO_MSA__WD__WS_WT(ADDV_W, addv.w)
+DO_MSA__WD__WS_WT(ADDV_D, addv.d)
+
+DO_MSA__WD__WS_WT(HADD_S_H, hadd_s.h)
+DO_MSA__WD__WS_WT(HADD_S_W, hadd_s.w)
+DO_MSA__WD__WS_WT(HADD_S_D, hadd_s.d)
+
+DO_MSA__WD__WS_WT(HADD_U_H, hadd_u.h)
+DO_MSA__WD__WS_WT(HADD_U_W, hadd_u.w)
+DO_MSA__WD__WS_WT(HADD_U_D, hadd_u.d)
+
+DO_MSA__WD__WS_WT(AVER_S_B, aver_s.b)
+DO_MSA__WD__WS_WT(AVER_S_H, aver_s.h)
+DO_MSA__WD__WS_WT(AVER_S_W, aver_s.w)
+DO_MSA__WD__WS_WT(AVER_S_D, aver_s.d)
+
+DO_MSA__WD__WS_WT(AVER_U_B, aver_u.b)
+DO_MSA__WD__WS_WT(AVER_U_H, aver_u.h)
+DO_MSA__WD__WS_WT(AVER_U_W, aver_u.w)
+DO_MSA__WD__WS_WT(AVER_U_D, aver_u.d)
+
+DO_MSA__WD__WS_WT(AVE_S_B, ave_s.b)
+DO_MSA__WD__WS_WT(AVE_S_H, ave_s.h)
+DO_MSA__WD__WS_WT(AVE_S_W, ave_s.w)
+DO_MSA__WD__WS_WT(AVE_S_D, ave_s.d)
+
+DO_MSA__WD__WS_WT(AVE_U_B, ave_u.b)
+DO_MSA__WD__WS_WT(AVE_U_H, ave_u.h)
+DO_MSA__WD__WS_WT(AVE_U_W, ave_u.w)
+DO_MSA__WD__WS_WT(AVE_U_D, ave_u.d)
+
+DO_MSA__WD__WS_WT(DIV_S_B, div_s.b)
+DO_MSA__WD__WS_WT(DIV_S_H, div_s.h)
+DO_MSA__WD__WS_WT(DIV_S_W, div_s.w)
+DO_MSA__WD__WS_WT(DIV_S_D, div_s.d)
+
+DO_MSA__WD__WS_WT(DIV_U_B, div_u.b)
+DO_MSA__WD__WS_WT(DIV_U_H, div_u.h)
+DO_MSA__WD__WS_WT(DIV_U_W, div_u.w)
+DO_MSA__WD__WS_WT(DIV_U_D, div_u.d)
+
+DO_MSA__WD__WS_WT(DOTP_S_H, dotp_s.h)
+DO_MSA__WD__WS_WT(DOTP_S_W, dotp_s.w)
+DO_MSA__WD__WS_WT(DOTP_S_D, dotp_s.d)
+
+DO_MSA__WD__WS_WT(DOTP_U_H, dotp_u.h)
+DO_MSA__WD__WS_WT(DOTP_U_W, dotp_u.w)
+DO_MSA__WD__WS_WT(DOTP_U_D, dotp_u.d)
+
+DO_MSA__WD__WS_WT(MOD_S_B, mod_s.b)
+DO_MSA__WD__WS_WT(MOD_S_H, mod_s.h)
+DO_MSA__WD__WS_WT(MOD_S_W, mod_s.w)
+DO_MSA__WD__WS_WT(MOD_S_D, mod_s.d)
+
+DO_MSA__WD__WS_WT(MOD_U_B, mod_u.b)
+DO_MSA__WD__WS_WT(MOD_U_H, mod_u.h)
+DO_MSA__WD__WS_WT(MOD_U_W, mod_u.w)
+DO_MSA__WD__WS_WT(MOD_U_D, mod_u.d)
+
+DO_MSA__WD__WS_WT(MUL_Q_H, mul_q.h)
+DO_MSA__WD__WS_WT(MUL_Q_W, mul_q.w)
+DO_MSA__WD__WS_WT(MULR_Q_H, mulr_q.h)
+DO_MSA__WD__WS_WT(MULR_Q_W, mulr_q.w)
+
+DO_MSA__WD__WS_WT(MULV_B, mulv.b)
+DO_MSA__WD__WS_WT(MULV_H, mulv.h)
+DO_MSA__WD__WS_WT(MULV_W, mulv.w)
+DO_MSA__WD__WS_WT(MULV_D, mulv.d)
+
+DO_MSA__WD__WS_WT(SUBV_B, subv.b)
+DO_MSA__WD__WS_WT(SUBV_H, subv.h)
+DO_MSA__WD__WS_WT(SUBV_W, subv.w)
+DO_MSA__WD__WS_WT(SUBV_D, subv.d)
+
+DO_MSA__WD__WS_WT(SUBS_S_B, subs_s.b)
+DO_MSA__WD__WS_WT(SUBS_S_H, subs_s.h)
+DO_MSA__WD__WS_WT(SUBS_S_W, subs_s.w)
+DO_MSA__WD__WS_WT(SUBS_S_D, subs_s.d)
+
+DO_MSA__WD__WS_WT(SUBS_U_B, subs_u.b)
+DO_MSA__WD__WS_WT(SUBS_U_H, subs_u.h)
+DO_MSA__WD__WS_WT(SUBS_U_W, subs_u.w)
+DO_MSA__WD__WS_WT(SUBS_U_D, subs_u.d)
+
+DO_MSA__WD__WS_WT(ASUB_S_B, asub_s.b)
+DO_MSA__WD__WS_WT(ASUB_S_H, asub_s.h)
+DO_MSA__WD__WS_WT(ASUB_S_W, asub_s.w)
+DO_MSA__WD__WS_WT(ASUB_S_D, asub_s.d)
+
+DO_MSA__WD__WS_WT(ASUB_U_B, asub_u.b)
+DO_MSA__WD__WS_WT(ASUB_U_H, asub_u.h)
+DO_MSA__WD__WS_WT(ASUB_U_W, asub_u.w)
+DO_MSA__WD__WS_WT(ASUB_U_D, asub_u.d)
+
+DO_MSA__WD__WS_WT(SUBSUU_S_B, subsuu_s.b)
+DO_MSA__WD__WS_WT(SUBSUU_S_H, subsuu_s.h)
+DO_MSA__WD__WS_WT(SUBSUU_S_W, subsuu_s.w)
+DO_MSA__WD__WS_WT(SUBSUU_S_D, subsuu_s.d)
+
+DO_MSA__WD__WS_WT(SUBSUS_U_B, subsus_u.b)
+DO_MSA__WD__WS_WT(SUBSUS_U_H, subsus_u.h)
+DO_MSA__WD__WS_WT(SUBSUS_U_W, subsus_u.w)
+DO_MSA__WD__WS_WT(SUBSUS_U_D, subsus_u.d)
+
+DO_MSA__WD__WS_WT(HSUB_S_H, hsub_s.h)
+DO_MSA__WD__WS_WT(HSUB_S_W, hsub_s.w)
+DO_MSA__WD__WS_WT(HSUB_S_D, hsub_s.d)
+
+DO_MSA__WD__WS_WT(HSUB_U_H, hsub_u.h)
+DO_MSA__WD__WS_WT(HSUB_U_W, hsub_u.w)
+DO_MSA__WD__WS_WT(HSUB_U_D, hsub_u.d)
+
+
+
 DO_MSA__WD__WS_WT(BMNZ_V, bmnz.v)
 DO_MSA__WD__WS_WT(BMZ_V, bmz.v)
 
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_b.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_b.c
new file mode 100644
index 0000000000..c5aab141f2
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_b.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BCLR.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BCLR.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7f7f7f7f7f7f7f7fULL, 0x7f7f7f7f7f7f7f7fULL, },    /*   0  */
+        { 0xfefefefefefefefeULL, 0xfefefefefefefefeULL, },
+        { 0xfbfbfbfbfbfbfbfbULL, 0xfbfbfbfbfbfbfbfbULL, },
+        { 0xdfdfdfdfdfdfdfdfULL, 0xdfdfdfdfdfdfdfdfULL, },
+        { 0xefefefefefefefefULL, 0xefefefefefefefefULL, },
+        { 0xf7f7f7f7f7f7f7f7ULL, 0xf7f7f7f7f7f7f7f7ULL, },
+        { 0xf7bffef7bffef7bfULL, 0xfef7bffef7bffef7ULL, },
+        { 0xeffd7feffd7feffdULL, 0x7feffd7feffd7fefULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2a2a2a2a2a2a2a2aULL, 0x2a2a2a2a2a2a2a2aULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x8a8a8a8a8a8a8a8aULL, 0x8a8a8a8a8a8a8a8aULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xa2a2a2a2a2a2a2a2ULL, 0xa2a2a2a2a2a2a2a2ULL, },
+        { 0xa2aaaaa2aaaaa2aaULL, 0xaaa2aaaaa2aaaaa2ULL, },
+        { 0xaaa82aaaa82aaaa8ULL, 0x2aaaa82aaaa82aaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5454545454545454ULL, 0x5454545454545454ULL, },
+        { 0x5151515151515151ULL, 0x5151515151515151ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x4545454545454545ULL, 0x4545454545454545ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5515545515545515ULL, 0x5455155455155455ULL, },
+        { 0x4555554555554555ULL, 0x5545555545555545ULL, },
+        { 0x4c4c4c4c4c4c4c4cULL, 0x4c4c4c4c4c4c4c4cULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xc8c8c8c8c8c8c8c8ULL, 0xc8c8c8c8c8c8c8c8ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xc4c4c4c4c4c4c4c4ULL, 0xc4c4c4c4c4c4c4c4ULL, },
+        { 0xc48cccc48cccc48cULL, 0xccc48cccc48cccc4ULL, },
+        { 0xcccc4ccccc4cccccULL, 0x4ccccc4ccccc4cccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3232323232323232ULL, 0x3232323232323232ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1313131313131313ULL, 0x1313131313131313ULL, },
+        { 0x2323232323232323ULL, 0x2323232323232323ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333323333323333ULL, 0x3233333233333233ULL, },
+        { 0x2331332331332331ULL, 0x3323313323313323ULL, },
+        { 0x630e38630e38630eULL, 0x38630e38630e3863ULL, },    /*  48  */
+        { 0xe28e38e28e38e28eULL, 0x38e28e38e28e38e2ULL, },
+        { 0xe38a38e38a38e38aULL, 0x38e38a38e38a38e3ULL, },
+        { 0xc38e18c38e18c38eULL, 0x18c38e18c38e18c3ULL, },
+        { 0xe38e28e38e28e38eULL, 0x28e38e28e38e28e3ULL, },
+        { 0xe38630e38630e386ULL, 0x30e38630e38630e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38c38e38c38e38cULL, 0x38e38c38e38c38e3ULL, },
+        { 0x1c71471c71471c71ULL, 0x471c71471c71471cULL, },    /*  56  */
+        { 0x1c70c61c70c61c70ULL, 0xc61c70c61c70c61cULL, },
+        { 0x1871c31871c31871ULL, 0xc31871c31871c318ULL, },
+        { 0x1c51c71c51c71c51ULL, 0xc71c51c71c51c71cULL, },
+        { 0x0c61c70c61c70c61ULL, 0xc70c61c70c61c70cULL, },
+        { 0x1471c71471c71471ULL, 0xc71471c71471c714ULL, },
+        { 0x1431c61431c61431ULL, 0xc61431c61431c614ULL, },
+        { 0x0c71470c71470c71ULL, 0x470c71470c71470cULL, },
+        { 0x886aa6cc28625540ULL, 0x4367031ebe73b00cULL, },    /*  64  */
+        { 0x802ae6c408625540ULL, 0x4b67035ade7bb00cULL, },
+        { 0x886aa6c828625540ULL, 0x4b660b5ef673900cULL, },
+        { 0x886aa6cc28605100ULL, 0x4b650a5efc7bb00cULL, },
+        { 0xfaba00634c93c708ULL, 0x1277b31a153752ecULL, },
+        { 0xf3be00634d934708ULL, 0x1277b31a153f52ecULL, },
+        { 0xebba00634d13c708ULL, 0x12f6bb1a153752ecULL, },
+        { 0xfa3e00430d91c308ULL, 0x12f5ba1a153b52fcULL, },
+        { 0xac5aaeaab8cb8b80ULL, 0x2758c6bfab232404ULL, },    /*  72  */
+        { 0xa41aaea299c70b80ULL, 0x2358c6fb8b2b2104ULL, },
+        { 0xac5aaeaab94f8380ULL, 0x27d8867fa3230504ULL, },
+        { 0xac5aae8ab9cd8b80ULL, 0x07d8c6fea92b2114ULL, },
+        { 0x704b164d5e31c24eULL, 0x85718098a942e2a0ULL, },
+        { 0x700f16455e31624eULL, 0x897180d88942e2a0ULL, },
+        { 0x604b16495c31e24eULL, 0x0df08858a142c2a0ULL, },
+        { 0x704f164d1e31e20eULL, 0x8df188d8a942e2a0ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_B(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_B(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_d.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_d.c
new file mode 100644
index 0000000000..11e616e341
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_d.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BCLR.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BCLR.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },    /*   0  */
+        { 0xfffffffffffffffeULL, 0xfffffffffffffffeULL, },
+        { 0xfffffbffffffffffULL, 0xfffffbffffffffffULL, },
+        { 0xffffffffffdfffffULL, 0xffffffffffdfffffULL, },
+        { 0xffffffffffffefffULL, 0xffffffffffffefffULL, },
+        { 0xfff7ffffffffffffULL, 0xfff7ffffffffffffULL, },
+        { 0xffffffffffffbfffULL, 0xfffffff7ffffffffULL, },
+        { 0xfffdffffffffffffULL, 0xffffffffefffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2aaaaaaaaaaaaaaaULL, 0x2aaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaa8aaaaaULL, 0xaaaaaaaaaa8aaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaa2aaaaaaaaaaaaULL, 0xaaa2aaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaa2aaaaaaaaULL, },
+        { 0xaaa8aaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555554ULL, 0x5555555555555554ULL, },
+        { 0x5555515555555555ULL, 0x5555515555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555554555ULL, 0x5555555555554555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555551555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555545555555ULL, },
+        { 0x4cccccccccccccccULL, 0x4cccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccc8ccccccccccULL, 0xccccc8ccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccc4ccccccccccccULL, 0xccc4ccccccccccccULL, },
+        { 0xcccccccccccc8cccULL, 0xccccccc4ccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333332ULL, 0x3333333333333332ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333133333ULL, 0x3333333333133333ULL, },
+        { 0x3333333333332333ULL, 0x3333333333332333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3331333333333333ULL, 0x3333333323333333ULL, },
+        { 0x638e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e2ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38a38e38e38e3ULL, },
+        { 0xe38e38e38e18e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e28e3ULL, },
+        { 0xe38638e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38a38eULL, 0x38e38e30e38e38e3ULL, },
+        { 0xe38c38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x471c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c70ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c31c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c51c71cULL, },
+        { 0x1c71c71c71c70c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71471c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c70c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7ba00cULL, },    /*  64  */
+        { 0x886ae6cc28625440ULL, 0x4b670b5efe7bb00cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe6bb00cULL, },
+        { 0x886ae6cc28621540ULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f42fcULL, },
+        { 0xfbbe00634d93c608ULL, 0x02f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a152f52fcULL, },
+        { 0xfbbe00634d938708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8a80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6feab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f164d5e31a24eULL, 0x8df188d8a942e2a0ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_D(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_D(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_h.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_h.c
new file mode 100644
index 0000000000..dc67b4d67e
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_h.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BCLR.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BCLR.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fff7fff7fff7fffULL, 0x7fff7fff7fff7fffULL, },    /*   0  */
+        { 0xfffefffefffefffeULL, 0xfffefffefffefffeULL, },
+        { 0xfbfffbfffbfffbffULL, 0xfbfffbfffbfffbffULL, },
+        { 0xffdfffdfffdfffdfULL, 0xffdfffdfffdfffdfULL, },
+        { 0xefffefffefffefffULL, 0xefffefffefffefffULL, },
+        { 0xfff7fff7fff7fff7ULL, 0xfff7fff7fff7fff7ULL, },
+        { 0xbffffff7feffbfffULL, 0xfff7feffbffffff7ULL, },
+        { 0xfffdefffff7ffffdULL, 0xefffff7ffffdefffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2aaa2aaa2aaa2aaaULL, 0x2aaa2aaa2aaa2aaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaa8aaa8aaa8aaa8aULL, 0xaa8aaa8aaa8aaa8aULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaa2aaa2aaa2aaa2ULL, 0xaaa2aaa2aaa2aaa2ULL, },
+        { 0xaaaaaaa2aaaaaaaaULL, 0xaaa2aaaaaaaaaaa2ULL, },
+        { 0xaaa8aaaaaa2aaaa8ULL, 0xaaaaaa2aaaa8aaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5554555455545554ULL, 0x5554555455545554ULL, },
+        { 0x5155515551555155ULL, 0x5155515551555155ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x4555455545554555ULL, 0x4555455545554555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x1555555554551555ULL, 0x5555545515555555ULL, },
+        { 0x5555455555555555ULL, 0x4555555555554555ULL, },
+        { 0x4ccc4ccc4ccc4cccULL, 0x4ccc4ccc4ccc4cccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xc8ccc8ccc8ccc8ccULL, 0xc8ccc8ccc8ccc8ccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccc4ccc4ccc4ccc4ULL, 0xccc4ccc4ccc4ccc4ULL, },
+        { 0x8cccccc4cccc8cccULL, 0xccc4cccc8cccccc4ULL, },
+        { 0xcccccccccc4cccccULL, 0xcccccc4cccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3332333233323332ULL, 0x3332333233323332ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3313331333133313ULL, 0x3313331333133313ULL, },
+        { 0x2333233323332333ULL, 0x2333233323332333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333332333333ULL, 0x3333323333333333ULL, },
+        { 0x3331233333333331ULL, 0x2333333333312333ULL, },
+        { 0x638e38e30e38638eULL, 0x38e30e38638e38e3ULL, },    /*  48  */
+        { 0xe38e38e28e38e38eULL, 0x38e28e38e38e38e2ULL, },
+        { 0xe38e38e38a38e38eULL, 0x38e38a38e38e38e3ULL, },
+        { 0xe38e38c38e18e38eULL, 0x38c38e18e38e38c3ULL, },
+        { 0xe38e28e38e38e38eULL, 0x28e38e38e38e28e3ULL, },
+        { 0xe38638e38e30e386ULL, 0x38e38e30e38638e3ULL, },
+        { 0xa38e38e38e38a38eULL, 0x38e38e38a38e38e3ULL, },
+        { 0xe38c28e38e38e38cULL, 0x28e38e38e38c28e3ULL, },
+        { 0x1c71471c71c71c71ULL, 0x471c71c71c71471cULL, },    /*  56  */
+        { 0x1c70c71c71c61c70ULL, 0xc71c71c61c70c71cULL, },
+        { 0x1871c31c71c71871ULL, 0xc31c71c71871c31cULL, },
+        { 0x1c51c71c71c71c51ULL, 0xc71c71c71c51c71cULL, },
+        { 0x0c71c71c61c70c71ULL, 0xc71c61c70c71c71cULL, },
+        { 0x1c71c71471c71c71ULL, 0xc71471c71c71c714ULL, },
+        { 0x1c71c71470c71c71ULL, 0xc71470c71c71c714ULL, },
+        { 0x1c71c71c71471c71ULL, 0xc71c71471c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5ef67ba00cULL, },    /*  64  */
+        { 0x886ae6c428625440ULL, 0x4b670b5e7e7ba00cULL, },
+        { 0x886ae2cc28625540ULL, 0x4a670b5ef67bb00cULL, },
+        { 0x086ac6cc28601540ULL, 0x4b650a5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x1277bb1a153f42fcULL, },
+        { 0xbbbe00634d93c608ULL, 0x1277bb1a153f42fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f73b1a153f52ecULL, },
+        { 0x7bbe00634d918708ULL, 0x12f5ba1a153b52fcULL, },
+        { 0xa85aaeaab9cb8b80ULL, 0x275886ffa32b2514ULL, },    /*  72  */
+        { 0xac5aaea2b9c78a80ULL, 0x2758c2ff2b2b2514ULL, },
+        { 0xa85aaaaa39cf8b80ULL, 0x26d846ffa32b2504ULL, },
+        { 0x2c5a8eaab9cd8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f064d5e31e24eULL, 0x8d7188d8a142e2a0ULL, },
+        { 0x304f16455e31e24eULL, 0x8d7188d82942e2a0ULL, },
+        { 0x704f124d5e31e24eULL, 0x8cf108d8a142e2a0ULL, },
+        { 0x704f164d5e31a24eULL, 0x8df188d8a942e2a0ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_H(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_H(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_w.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_w.c
new file mode 100644
index 0000000000..ab0cce7f8b
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bclr_w.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BCLR.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BCLR.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fffffff7fffffffULL, 0x7fffffff7fffffffULL, },    /*   0  */
+        { 0xfffffffefffffffeULL, 0xfffffffefffffffeULL, },
+        { 0xfffffbfffffffbffULL, 0xfffffbfffffffbffULL, },
+        { 0xffdfffffffdfffffULL, 0xffdfffffffdfffffULL, },
+        { 0xffffefffffffefffULL, 0xffffefffffffefffULL, },
+        { 0xfff7fffffff7ffffULL, 0xfff7fffffff7ffffULL, },
+        { 0xfffffff7ffffbfffULL, 0xfefffffffffffff7ULL, },
+        { 0xeffffffffffdffffULL, 0xffffff7fefffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2aaaaaaa2aaaaaaaULL, 0x2aaaaaaa2aaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaa8aaaaaaa8aaaaaULL, 0xaa8aaaaaaa8aaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaa2aaaaaaa2aaaaULL, 0xaaa2aaaaaaa2aaaaULL, },
+        { 0xaaaaaaa2aaaaaaaaULL, 0xaaaaaaaaaaaaaaa2ULL, },
+        { 0xaaaaaaaaaaa8aaaaULL, 0xaaaaaa2aaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555455555554ULL, 0x5555555455555554ULL, },
+        { 0x5555515555555155ULL, 0x5555515555555155ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555455555554555ULL, 0x5555455555554555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555551555ULL, 0x5455555555555555ULL, },
+        { 0x4555555555555555ULL, 0x5555555545555555ULL, },
+        { 0x4ccccccc4cccccccULL, 0x4ccccccc4cccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccc8ccccccc8ccULL, 0xccccc8ccccccc8ccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccc4ccccccc4ccccULL, 0xccc4ccccccc4ccccULL, },
+        { 0xccccccc4cccc8cccULL, 0xccccccccccccccc4ULL, },
+        { 0xccccccccccccccccULL, 0xcccccc4cccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333233333332ULL, 0x3333333233333332ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3313333333133333ULL, 0x3313333333133333ULL, },
+        { 0x3333233333332333ULL, 0x3333233333332333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3233333333333333ULL, },
+        { 0x2333333333313333ULL, 0x3333333323333333ULL, },
+        { 0x638e38e30e38e38eULL, 0x38e38e38638e38e3ULL, },    /*  48  */
+        { 0xe38e38e28e38e38eULL, 0x38e38e38e38e38e2ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38a38e38e38e3ULL, },
+        { 0xe38e38e38e18e38eULL, 0x38c38e38e38e38e3ULL, },
+        { 0xe38e28e38e38e38eULL, 0x38e38e38e38e28e3ULL, },
+        { 0xe38638e38e30e38eULL, 0x38e38e38e38638e3ULL, },
+        { 0xe38e38e38e38a38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x471c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c70ULL, 0xc71c71c61c71c71cULL, },
+        { 0x1c71c31c71c71871ULL, 0xc71c71c71c71c31cULL, },
+        { 0x1c51c71c71c71c71ULL, 0xc71c71c71c51c71cULL, },
+        { 0x1c71c71c71c70c71ULL, 0xc71c61c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71471c71c71c71cULL, },
+        { 0x1c71c71471c71c71ULL, 0xc61c71c71c71c714ULL, },
+        { 0x0c71c71c71c51c71ULL, 0xc71c71470c71c71cULL, },
+        { 0x886ae6cc28625540ULL, 0x0b670b5efe7ba00cULL, },    /*  64  */
+        { 0x886ae6c428625440ULL, 0x4b670b5eee7bb00cULL, },
+        { 0x886ae2cc28625540ULL, 0x4b670b5efe6bb00cULL, },
+        { 0x886ac6cc28621540ULL, 0x4a670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f42fcULL, },
+        { 0xfbbe00634d93c608ULL, 0x12f7bb1a053f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a152f52fcULL, },
+        { 0xfbbe00634d938708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xac5aaea2b9cf8a80ULL, 0x23d8c6ffab2b2514ULL, },
+        { 0xac5aaaaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5a8eaab9cf8b80ULL, 0x26d8c6ffab2b2514ULL, },
+        { 0x704f064d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
+        { 0x704f16455e31e24eULL, 0x89f188d8a942e2a0ULL, },
+        { 0x704f124d5e31e24eULL, 0x0df188d8a942e2a0ULL, },
+        { 0x704f164d5e31a24eULL, 0x8cf188d8a942e2a0ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_W(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BCLR_W(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_b.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_b.c
new file mode 100644
index 0000000000..1939d1018d
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_b.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BNEG.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BNEG.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7f7f7f7f7f7f7f7fULL, 0x7f7f7f7f7f7f7f7fULL, },    /*   0  */
+        { 0xfefefefefefefefeULL, 0xfefefefefefefefeULL, },
+        { 0xfbfbfbfbfbfbfbfbULL, 0xfbfbfbfbfbfbfbfbULL, },
+        { 0xdfdfdfdfdfdfdfdfULL, 0xdfdfdfdfdfdfdfdfULL, },
+        { 0xefefefefefefefefULL, 0xefefefefefefefefULL, },
+        { 0xf7f7f7f7f7f7f7f7ULL, 0xf7f7f7f7f7f7f7f7ULL, },
+        { 0xf7bffef7bffef7bfULL, 0xfef7bffef7bffef7ULL, },
+        { 0xeffd7feffd7feffdULL, 0x7feffd7feffd7fefULL, },
+        { 0x8080808080808080ULL, 0x8080808080808080ULL, },    /*   8  */
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
+        { 0x0404040404040404ULL, 0x0404040404040404ULL, },
+        { 0x2020202020202020ULL, 0x2020202020202020ULL, },
+        { 0x1010101010101010ULL, 0x1010101010101010ULL, },
+        { 0x0808080808080808ULL, 0x0808080808080808ULL, },
+        { 0x0840010840010840ULL, 0x0108400108400108ULL, },
+        { 0x1002801002801002ULL, 0x8010028010028010ULL, },
+        { 0x2a2a2a2a2a2a2a2aULL, 0x2a2a2a2a2a2a2a2aULL, },    /*  16  */
+        { 0xababababababababULL, 0xababababababababULL, },
+        { 0xaeaeaeaeaeaeaeaeULL, 0xaeaeaeaeaeaeaeaeULL, },
+        { 0x8a8a8a8a8a8a8a8aULL, 0x8a8a8a8a8a8a8a8aULL, },
+        { 0xbabababababababaULL, 0xbabababababababaULL, },
+        { 0xa2a2a2a2a2a2a2a2ULL, 0xa2a2a2a2a2a2a2a2ULL, },
+        { 0xa2eaaba2eaaba2eaULL, 0xaba2eaaba2eaaba2ULL, },
+        { 0xbaa82abaa82abaa8ULL, 0x2abaa82abaa82abaULL, },
+        { 0xd5d5d5d5d5d5d5d5ULL, 0xd5d5d5d5d5d5d5d5ULL, },    /*  24  */
+        { 0x5454545454545454ULL, 0x5454545454545454ULL, },
+        { 0x5151515151515151ULL, 0x5151515151515151ULL, },
+        { 0x7575757575757575ULL, 0x7575757575757575ULL, },
+        { 0x4545454545454545ULL, 0x4545454545454545ULL, },
+        { 0x5d5d5d5d5d5d5d5dULL, 0x5d5d5d5d5d5d5d5dULL, },
+        { 0x5d15545d15545d15ULL, 0x545d15545d15545dULL, },
+        { 0x4557d54557d54557ULL, 0xd54557d54557d545ULL, },
+        { 0x4c4c4c4c4c4c4c4cULL, 0x4c4c4c4c4c4c4c4cULL, },    /*  32  */
+        { 0xcdcdcdcdcdcdcdcdULL, 0xcdcdcdcdcdcdcdcdULL, },
+        { 0xc8c8c8c8c8c8c8c8ULL, 0xc8c8c8c8c8c8c8c8ULL, },
+        { 0xececececececececULL, 0xececececececececULL, },
+        { 0xdcdcdcdcdcdcdcdcULL, 0xdcdcdcdcdcdcdcdcULL, },
+        { 0xc4c4c4c4c4c4c4c4ULL, 0xc4c4c4c4c4c4c4c4ULL, },
+        { 0xc48ccdc48ccdc48cULL, 0xcdc48ccdc48ccdc4ULL, },
+        { 0xdcce4cdcce4cdcceULL, 0x4cdcce4cdcce4cdcULL, },
+        { 0xb3b3b3b3b3b3b3b3ULL, 0xb3b3b3b3b3b3b3b3ULL, },    /*  40  */
+        { 0x3232323232323232ULL, 0x3232323232323232ULL, },
+        { 0x3737373737373737ULL, 0x3737373737373737ULL, },
+        { 0x1313131313131313ULL, 0x1313131313131313ULL, },
+        { 0x2323232323232323ULL, 0x2323232323232323ULL, },
+        { 0x3b3b3b3b3b3b3b3bULL, 0x3b3b3b3b3b3b3b3bULL, },
+        { 0x3b73323b73323b73ULL, 0x323b73323b73323bULL, },
+        { 0x2331b32331b32331ULL, 0xb32331b32331b323ULL, },
+        { 0x630eb8630eb8630eULL, 0xb8630eb8630eb863ULL, },    /*  48  */
+        { 0xe28f39e28f39e28fULL, 0x39e28f39e28f39e2ULL, },
+        { 0xe78a3ce78a3ce78aULL, 0x3ce78a3ce78a3ce7ULL, },
+        { 0xc3ae18c3ae18c3aeULL, 0x18c3ae18c3ae18c3ULL, },
+        { 0xf39e28f39e28f39eULL, 0x28f39e28f39e28f3ULL, },
+        { 0xeb8630eb8630eb86ULL, 0x30eb8630eb8630ebULL, },
+        { 0xebce39ebce39ebceULL, 0x39ebce39ebce39ebULL, },
+        { 0xf38cb8f38cb8f38cULL, 0xb8f38cb8f38cb8f3ULL, },
+        { 0x9cf1479cf1479cf1ULL, 0x479cf1479cf1479cULL, },    /*  56  */
+        { 0x1d70c61d70c61d70ULL, 0xc61d70c61d70c61dULL, },
+        { 0x1875c31875c31875ULL, 0xc31875c31875c318ULL, },
+        { 0x3c51e73c51e73c51ULL, 0xe73c51e73c51e73cULL, },
+        { 0x0c61d70c61d70c61ULL, 0xd70c61d70c61d70cULL, },
+        { 0x1479cf1479cf1479ULL, 0xcf1479cf1479cf14ULL, },
+        { 0x1431c61431c61431ULL, 0xc61431c61431c614ULL, },
+        { 0x0c73470c73470c73ULL, 0x470c73470c73470cULL, },
+        { 0x896ea6dc29667541ULL, 0x43e7031ebe73b11cULL, },    /*  64  */
+        { 0x802ae7c4086ad541ULL, 0x4fe7035adefbb41cULL, },
+        { 0x986ea6c82ae25d41ULL, 0xcb664bdef673901cULL, },
+        { 0x89eaa6ec68605100ULL, 0x6b650a5ffc7fb40dULL, },
+        { 0xfaba40734c97e709ULL, 0x1a77b35a553753ecULL, },
+        { 0xf3fe016b6d9b4709ULL, 0x1677b31e35bf56ecULL, },
+        { 0xebba40674f13cf09ULL, 0x92f6fb9a1d3772ecULL, },
+        { 0xfa3e40430d91c348ULL, 0x32f5ba1b173b56fdULL, },
+        { 0xad5eeebab8cbab81ULL, 0x2f58cebfeb232404ULL, },    /*  72  */
+        { 0xa41aafa299c70b81ULL, 0x2358cefb8bab2104ULL, },
+        { 0xbc5eeeaebb4f8381ULL, 0xa7d9867fa3230504ULL, },
+        { 0xaddaee8af9cd8fc0ULL, 0x07dac7fea92f2115ULL, },
+        { 0x714b565d5f35c24fULL, 0x85718098e94ae3b0ULL, },
+        { 0x780f17457e39624fULL, 0x897180dc89c2e6b0ULL, },
+        { 0x604b56495cb1ea4fULL, 0x0df0c858a14ac2b0ULL, },
+        { 0x71cf566d1e33e60eULL, 0xadf389d9ab46e6a1ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_B(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_B(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_d.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_d.c
new file mode 100644
index 0000000000..8462bb3e14
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_d.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BNEG.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BNEG.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },    /*   0  */
+        { 0xfffffffffffffffeULL, 0xfffffffffffffffeULL, },
+        { 0xfffffbffffffffffULL, 0xfffffbffffffffffULL, },
+        { 0xffffffffffdfffffULL, 0xffffffffffdfffffULL, },
+        { 0xffffffffffffefffULL, 0xffffffffffffefffULL, },
+        { 0xfff7ffffffffffffULL, 0xfff7ffffffffffffULL, },
+        { 0xffffffffffffbfffULL, 0xfffffff7ffffffffULL, },
+        { 0xfffdffffffffffffULL, 0xffffffffefffffffULL, },
+        { 0x8000000000000000ULL, 0x8000000000000000ULL, },    /*   8  */
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },
+        { 0x0000040000000000ULL, 0x0000040000000000ULL, },
+        { 0x0000000000200000ULL, 0x0000000000200000ULL, },
+        { 0x0000000000001000ULL, 0x0000000000001000ULL, },
+        { 0x0008000000000000ULL, 0x0008000000000000ULL, },
+        { 0x0000000000004000ULL, 0x0000000800000000ULL, },
+        { 0x0002000000000000ULL, 0x0000000010000000ULL, },
+        { 0x2aaaaaaaaaaaaaaaULL, 0x2aaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaabULL, 0xaaaaaaaaaaaaaaabULL, },
+        { 0xaaaaaeaaaaaaaaaaULL, 0xaaaaaeaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaa8aaaaaULL, 0xaaaaaaaaaa8aaaaaULL, },
+        { 0xaaaaaaaaaaaabaaaULL, 0xaaaaaaaaaaaabaaaULL, },
+        { 0xaaa2aaaaaaaaaaaaULL, 0xaaa2aaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaeaaaULL, 0xaaaaaaa2aaaaaaaaULL, },
+        { 0xaaa8aaaaaaaaaaaaULL, 0xaaaaaaaabaaaaaaaULL, },
+        { 0xd555555555555555ULL, 0xd555555555555555ULL, },    /*  24  */
+        { 0x5555555555555554ULL, 0x5555555555555554ULL, },
+        { 0x5555515555555555ULL, 0x5555515555555555ULL, },
+        { 0x5555555555755555ULL, 0x5555555555755555ULL, },
+        { 0x5555555555554555ULL, 0x5555555555554555ULL, },
+        { 0x555d555555555555ULL, 0x555d555555555555ULL, },
+        { 0x5555555555551555ULL, 0x5555555d55555555ULL, },
+        { 0x5557555555555555ULL, 0x5555555545555555ULL, },
+        { 0x4cccccccccccccccULL, 0x4cccccccccccccccULL, },    /*  32  */
+        { 0xcccccccccccccccdULL, 0xcccccccccccccccdULL, },
+        { 0xccccc8ccccccccccULL, 0xccccc8ccccccccccULL, },
+        { 0xccccccccccecccccULL, 0xccccccccccecccccULL, },
+        { 0xccccccccccccdcccULL, 0xccccccccccccdcccULL, },
+        { 0xccc4ccccccccccccULL, 0xccc4ccccccccccccULL, },
+        { 0xcccccccccccc8cccULL, 0xccccccc4ccccccccULL, },
+        { 0xccceccccccccccccULL, 0xccccccccdcccccccULL, },
+        { 0xb333333333333333ULL, 0xb333333333333333ULL, },    /*  40  */
+        { 0x3333333333333332ULL, 0x3333333333333332ULL, },
+        { 0x3333373333333333ULL, 0x3333373333333333ULL, },
+        { 0x3333333333133333ULL, 0x3333333333133333ULL, },
+        { 0x3333333333332333ULL, 0x3333333333332333ULL, },
+        { 0x333b333333333333ULL, 0x333b333333333333ULL, },
+        { 0x3333333333337333ULL, 0x3333333b33333333ULL, },
+        { 0x3331333333333333ULL, 0x3333333323333333ULL, },
+        { 0x638e38e38e38e38eULL, 0xb8e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38fULL, 0x38e38e38e38e38e2ULL, },
+        { 0xe38e3ce38e38e38eULL, 0x38e38a38e38e38e3ULL, },
+        { 0xe38e38e38e18e38eULL, 0x38e38e38e3ae38e3ULL, },
+        { 0xe38e38e38e38f38eULL, 0x38e38e38e38e28e3ULL, },
+        { 0xe38638e38e38e38eULL, 0x38eb8e38e38e38e3ULL, },
+        { 0xe38e38e38e38a38eULL, 0x38e38e30e38e38e3ULL, },
+        { 0xe38c38e38e38e38eULL, 0x38e38e38f38e38e3ULL, },
+        { 0x9c71c71c71c71c71ULL, 0x471c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c70ULL, 0xc71c71c71c71c71dULL, },
+        { 0x1c71c31c71c71c71ULL, 0xc71c75c71c71c71cULL, },
+        { 0x1c71c71c71e71c71ULL, 0xc71c71c71c51c71cULL, },
+        { 0x1c71c71c71c70c71ULL, 0xc71c71c71c71d71cULL, },
+        { 0x1c79c71c71c71c71ULL, 0xc71471c71c71c71cULL, },
+        { 0x1c71c71c71c75c71ULL, 0xc71c71cf1c71c71cULL, },
+        { 0x1c73c71c71c71c71ULL, 0xc71c71c70c71c71cULL, },
+        { 0x886ae6cc28625541ULL, 0x4b670b5efe7ba00cULL, },    /*  64  */
+        { 0x886ae6cc28625440ULL, 0x5b670b5efe7bb00cULL, },
+        { 0x886ae6cc28625541ULL, 0x4b670b5efe6bb00cULL, },
+        { 0x886ae6cc28621540ULL, 0x4b670b5ffe7bb00cULL, },
+        { 0xfbbe00634d93c709ULL, 0x12f7bb1a153f42fcULL, },
+        { 0xfbbe00634d93c608ULL, 0x02f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c709ULL, 0x12f7bb1a152f52fcULL, },
+        { 0xfbbe00634d938708ULL, 0x12f7bb1b153f52fcULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0x27d8c6ffab2b3514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8a80ULL, 0x37d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0x27d8c6ffab3b2514ULL, },
+        { 0xac5aaeaab9cfcb80ULL, 0x27d8c6feab2b2514ULL, },
+        { 0x704f164d5e31e24fULL, 0x8df188d8a942f2a0ULL, },
+        { 0x704f164d5e31e34eULL, 0x9df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24fULL, 0x8df188d8a952e2a0ULL, },
+        { 0x704f164d5e31a24eULL, 0x8df188d9a942e2a0ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_D(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_D(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_h.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_h.c
new file mode 100644
index 0000000000..172d4c0fd1
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_h.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BNEG.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BNEG.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fff7fff7fff7fffULL, 0x7fff7fff7fff7fffULL, },    /*   0  */
+        { 0xfffefffefffefffeULL, 0xfffefffefffefffeULL, },
+        { 0xfbfffbfffbfffbffULL, 0xfbfffbfffbfffbffULL, },
+        { 0xffdfffdfffdfffdfULL, 0xffdfffdfffdfffdfULL, },
+        { 0xefffefffefffefffULL, 0xefffefffefffefffULL, },
+        { 0xfff7fff7fff7fff7ULL, 0xfff7fff7fff7fff7ULL, },
+        { 0xbffffff7feffbfffULL, 0xfff7feffbffffff7ULL, },
+        { 0xfffdefffff7ffffdULL, 0xefffff7ffffdefffULL, },
+        { 0x8000800080008000ULL, 0x8000800080008000ULL, },    /*   8  */
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },
+        { 0x0400040004000400ULL, 0x0400040004000400ULL, },
+        { 0x0020002000200020ULL, 0x0020002000200020ULL, },
+        { 0x1000100010001000ULL, 0x1000100010001000ULL, },
+        { 0x0008000800080008ULL, 0x0008000800080008ULL, },
+        { 0x4000000801004000ULL, 0x0008010040000008ULL, },
+        { 0x0002100000800002ULL, 0x1000008000021000ULL, },
+        { 0x2aaa2aaa2aaa2aaaULL, 0x2aaa2aaa2aaa2aaaULL, },    /*  16  */
+        { 0xaaabaaabaaabaaabULL, 0xaaabaaabaaabaaabULL, },
+        { 0xaeaaaeaaaeaaaeaaULL, 0xaeaaaeaaaeaaaeaaULL, },
+        { 0xaa8aaa8aaa8aaa8aULL, 0xaa8aaa8aaa8aaa8aULL, },
+        { 0xbaaabaaabaaabaaaULL, 0xbaaabaaabaaabaaaULL, },
+        { 0xaaa2aaa2aaa2aaa2ULL, 0xaaa2aaa2aaa2aaa2ULL, },
+        { 0xeaaaaaa2abaaeaaaULL, 0xaaa2abaaeaaaaaa2ULL, },
+        { 0xaaa8baaaaa2aaaa8ULL, 0xbaaaaa2aaaa8baaaULL, },
+        { 0xd555d555d555d555ULL, 0xd555d555d555d555ULL, },    /*  24  */
+        { 0x5554555455545554ULL, 0x5554555455545554ULL, },
+        { 0x5155515551555155ULL, 0x5155515551555155ULL, },
+        { 0x5575557555755575ULL, 0x5575557555755575ULL, },
+        { 0x4555455545554555ULL, 0x4555455545554555ULL, },
+        { 0x555d555d555d555dULL, 0x555d555d555d555dULL, },
+        { 0x1555555d54551555ULL, 0x555d54551555555dULL, },
+        { 0x5557455555d55557ULL, 0x455555d555574555ULL, },
+        { 0x4ccc4ccc4ccc4cccULL, 0x4ccc4ccc4ccc4cccULL, },    /*  32  */
+        { 0xcccdcccdcccdcccdULL, 0xcccdcccdcccdcccdULL, },
+        { 0xc8ccc8ccc8ccc8ccULL, 0xc8ccc8ccc8ccc8ccULL, },
+        { 0xccecccecccecccecULL, 0xccecccecccecccecULL, },
+        { 0xdcccdcccdcccdcccULL, 0xdcccdcccdcccdcccULL, },
+        { 0xccc4ccc4ccc4ccc4ULL, 0xccc4ccc4ccc4ccc4ULL, },
+        { 0x8cccccc4cdcc8cccULL, 0xccc4cdcc8cccccc4ULL, },
+        { 0xcccedccccc4cccceULL, 0xdccccc4ccccedcccULL, },
+        { 0xb333b333b333b333ULL, 0xb333b333b333b333ULL, },    /*  40  */
+        { 0x3332333233323332ULL, 0x3332333233323332ULL, },
+        { 0x3733373337333733ULL, 0x3733373337333733ULL, },
+        { 0x3313331333133313ULL, 0x3313331333133313ULL, },
+        { 0x2333233323332333ULL, 0x2333233323332333ULL, },
+        { 0x333b333b333b333bULL, 0x333b333b333b333bULL, },
+        { 0x7333333b32337333ULL, 0x333b32337333333bULL, },
+        { 0x3331233333b33331ULL, 0x233333b333312333ULL, },
+        { 0x638eb8e30e38638eULL, 0xb8e30e38638eb8e3ULL, },    /*  48  */
+        { 0xe38f38e28e39e38fULL, 0x38e28e39e38f38e2ULL, },
+        { 0xe78e3ce38a38e78eULL, 0x3ce38a38e78e3ce3ULL, },
+        { 0xe3ae38c38e18e3aeULL, 0x38c38e18e3ae38c3ULL, },
+        { 0xf38e28e39e38f38eULL, 0x28e39e38f38e28e3ULL, },
+        { 0xe38638eb8e30e386ULL, 0x38eb8e30e38638ebULL, },
+        { 0xa38e38eb8f38a38eULL, 0x38eb8f38a38e38ebULL, },
+        { 0xe38c28e38eb8e38cULL, 0x28e38eb8e38c28e3ULL, },
+        { 0x9c71471cf1c79c71ULL, 0x471cf1c79c71471cULL, },    /*  56  */
+        { 0x1c70c71d71c61c70ULL, 0xc71d71c61c70c71dULL, },
+        { 0x1871c31c75c71871ULL, 0xc31c75c71871c31cULL, },
+        { 0x1c51c73c71e71c51ULL, 0xc73c71e71c51c73cULL, },
+        { 0x0c71d71c61c70c71ULL, 0xd71c61c70c71d71cULL, },
+        { 0x1c79c71471cf1c79ULL, 0xc71471cf1c79c714ULL, },
+        { 0x5c71c71470c75c71ULL, 0xc71470c75c71c714ULL, },
+        { 0x1c73d71c71471c73ULL, 0xd71c71471c73d71cULL, },
+        { 0x8c6af6cc28665541ULL, 0x4be74b5ef67ba00cULL, },    /*  64  */
+        { 0xc86ae6c4286a5440ULL, 0x4be70f5e7e7ba00cULL, },
+        { 0x8c6ae2cca8625541ULL, 0x4a678b5ef67bb01cULL, },
+        { 0x086ac6cc28601540ULL, 0x4b650a5efe7fb00dULL, },
+        { 0xffbe10634d97c709ULL, 0x1277fb1a1d3f42fcULL, },
+        { 0xbbbe006b4d9bc608ULL, 0x1277bf1a953f42fcULL, },
+        { 0xffbe0463cd93c709ULL, 0x13f73b1a1d3f52ecULL, },
+        { 0x7bbe20634d918708ULL, 0x12f5ba1a153b52fdULL, },
+        { 0xa85abeaab9cb8b81ULL, 0x275886ffa32b3514ULL, },    /*  72  */
+        { 0xec5aaea2b9c78a80ULL, 0x2758c2ff2b2b3514ULL, },
+        { 0xa85aaaaa39cf8b81ULL, 0x26d846ffa32b2504ULL, },
+        { 0x2c5a8eaab9cdcb80ULL, 0x27dac7ffab2f2515ULL, },
+        { 0x744f064d5e35e24fULL, 0x8d71c8d8a142f2a0ULL, },
+        { 0x304f16455e39e34eULL, 0x8d718cd82942f2a0ULL, },
+        { 0x744f124dde31e24fULL, 0x8cf108d8a142e2b0ULL, },
+        { 0xf04f364d5e33a24eULL, 0x8df389d8a946e2a1ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_H(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_H(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_w.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_w.c
new file mode 100644
index 0000000000..23bc0333eb
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bneg_w.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BNEG.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BNEG.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x7fffffff7fffffffULL, 0x7fffffff7fffffffULL, },    /*   0  */
+        { 0xfffffffefffffffeULL, 0xfffffffefffffffeULL, },
+        { 0xfffffbfffffffbffULL, 0xfffffbfffffffbffULL, },
+        { 0xffdfffffffdfffffULL, 0xffdfffffffdfffffULL, },
+        { 0xffffefffffffefffULL, 0xffffefffffffefffULL, },
+        { 0xfff7fffffff7ffffULL, 0xfff7fffffff7ffffULL, },
+        { 0xfffffff7ffffbfffULL, 0xfefffffffffffff7ULL, },
+        { 0xeffffffffffdffffULL, 0xffffff7fefffffffULL, },
+        { 0x8000000080000000ULL, 0x8000000080000000ULL, },    /*   8  */
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },
+        { 0x0000040000000400ULL, 0x0000040000000400ULL, },
+        { 0x0020000000200000ULL, 0x0020000000200000ULL, },
+        { 0x0000100000001000ULL, 0x0000100000001000ULL, },
+        { 0x0008000000080000ULL, 0x0008000000080000ULL, },
+        { 0x0000000800004000ULL, 0x0100000000000008ULL, },
+        { 0x1000000000020000ULL, 0x0000008010000000ULL, },
+        { 0x2aaaaaaa2aaaaaaaULL, 0x2aaaaaaa2aaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaabaaaaaaabULL, 0xaaaaaaabaaaaaaabULL, },
+        { 0xaaaaaeaaaaaaaeaaULL, 0xaaaaaeaaaaaaaeaaULL, },
+        { 0xaa8aaaaaaa8aaaaaULL, 0xaa8aaaaaaa8aaaaaULL, },
+        { 0xaaaabaaaaaaabaaaULL, 0xaaaabaaaaaaabaaaULL, },
+        { 0xaaa2aaaaaaa2aaaaULL, 0xaaa2aaaaaaa2aaaaULL, },
+        { 0xaaaaaaa2aaaaeaaaULL, 0xabaaaaaaaaaaaaa2ULL, },
+        { 0xbaaaaaaaaaa8aaaaULL, 0xaaaaaa2abaaaaaaaULL, },
+        { 0xd5555555d5555555ULL, 0xd5555555d5555555ULL, },    /*  24  */
+        { 0x5555555455555554ULL, 0x5555555455555554ULL, },
+        { 0x5555515555555155ULL, 0x5555515555555155ULL, },
+        { 0x5575555555755555ULL, 0x5575555555755555ULL, },
+        { 0x5555455555554555ULL, 0x5555455555554555ULL, },
+        { 0x555d5555555d5555ULL, 0x555d5555555d5555ULL, },
+        { 0x5555555d55551555ULL, 0x545555555555555dULL, },
+        { 0x4555555555575555ULL, 0x555555d545555555ULL, },
+        { 0x4ccccccc4cccccccULL, 0x4ccccccc4cccccccULL, },    /*  32  */
+        { 0xcccccccdcccccccdULL, 0xcccccccdcccccccdULL, },
+        { 0xccccc8ccccccc8ccULL, 0xccccc8ccccccc8ccULL, },
+        { 0xccecccccccecccccULL, 0xccecccccccecccccULL, },
+        { 0xccccdcccccccdcccULL, 0xccccdcccccccdcccULL, },
+        { 0xccc4ccccccc4ccccULL, 0xccc4ccccccc4ccccULL, },
+        { 0xccccccc4cccc8cccULL, 0xcdccccccccccccc4ULL, },
+        { 0xdcccccccccceccccULL, 0xcccccc4cdcccccccULL, },
+        { 0xb3333333b3333333ULL, 0xb3333333b3333333ULL, },    /*  40  */
+        { 0x3333333233333332ULL, 0x3333333233333332ULL, },
+        { 0x3333373333333733ULL, 0x3333373333333733ULL, },
+        { 0x3313333333133333ULL, 0x3313333333133333ULL, },
+        { 0x3333233333332333ULL, 0x3333233333332333ULL, },
+        { 0x333b3333333b3333ULL, 0x333b3333333b3333ULL, },
+        { 0x3333333b33337333ULL, 0x323333333333333bULL, },
+        { 0x2333333333313333ULL, 0x333333b323333333ULL, },
+        { 0x638e38e30e38e38eULL, 0xb8e38e38638e38e3ULL, },    /*  48  */
+        { 0xe38e38e28e38e38fULL, 0x38e38e39e38e38e2ULL, },
+        { 0xe38e3ce38e38e78eULL, 0x38e38a38e38e3ce3ULL, },
+        { 0xe3ae38e38e18e38eULL, 0x38c38e38e3ae38e3ULL, },
+        { 0xe38e28e38e38f38eULL, 0x38e39e38e38e28e3ULL, },
+        { 0xe38638e38e30e38eULL, 0x38eb8e38e38638e3ULL, },
+        { 0xe38e38eb8e38a38eULL, 0x39e38e38e38e38ebULL, },
+        { 0xf38e38e38e3ae38eULL, 0x38e38eb8f38e38e3ULL, },
+        { 0x9c71c71cf1c71c71ULL, 0x471c71c79c71c71cULL, },    /*  56  */
+        { 0x1c71c71d71c71c70ULL, 0xc71c71c61c71c71dULL, },
+        { 0x1c71c31c71c71871ULL, 0xc71c75c71c71c31cULL, },
+        { 0x1c51c71c71e71c71ULL, 0xc73c71c71c51c71cULL, },
+        { 0x1c71d71c71c70c71ULL, 0xc71c61c71c71d71cULL, },
+        { 0x1c79c71c71cf1c71ULL, 0xc71471c71c79c71cULL, },
+        { 0x1c71c71471c75c71ULL, 0xc61c71c71c71c714ULL, },
+        { 0x0c71c71c71c51c71ULL, 0xc71c71470c71c71cULL, },
+        { 0x886af6cc28625541ULL, 0x0b670b5efe7ba00cULL, },    /*  64  */
+        { 0x886ae6c428625440ULL, 0x4f670b5eee7bb00cULL, },
+        { 0x886ae2cc28625541ULL, 0xcb670b5efe6bb00cULL, },
+        { 0x886ac6cc28621540ULL, 0x4a670b5efe7bb00dULL, },
+        { 0xfbbe10634d93c709ULL, 0x52f7bb1a153f42fcULL, },
+        { 0xfbbe006b4d93c608ULL, 0x16f7bb1a053f52fcULL, },
+        { 0xfbbe04634d93c709ULL, 0x92f7bb1a152f52fcULL, },
+        { 0xfbbe20634d938708ULL, 0x13f7bb1a153f52fdULL, },
+        { 0xac5abeaab9cf8b81ULL, 0x67d8c6ffab2b3514ULL, },    /*  72  */
+        { 0xac5aaea2b9cf8a80ULL, 0x23d8c6ffbb2b2514ULL, },
+        { 0xac5aaaaab9cf8b81ULL, 0xa7d8c6ffab3b2514ULL, },
+        { 0xac5a8eaab9cfcb80ULL, 0x26d8c6ffab2b2515ULL, },
+        { 0x704f064d5e31e24fULL, 0xcdf188d8a942f2a0ULL, },
+        { 0x704f16455e31e34eULL, 0x89f188d8b942e2a0ULL, },
+        { 0x704f124d5e31e24fULL, 0x0df188d8a952e2a0ULL, },
+        { 0x704f364d5e31a24eULL, 0x8cf188d8a942e2a1ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_W(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BNEG_W(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_b.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_b.c
new file mode 100644
index 0000000000..f48adbd9a8
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_b.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BSET.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BSET.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x8080808080808080ULL, 0x8080808080808080ULL, },    /*   8  */
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
+        { 0x0404040404040404ULL, 0x0404040404040404ULL, },
+        { 0x2020202020202020ULL, 0x2020202020202020ULL, },
+        { 0x1010101010101010ULL, 0x1010101010101010ULL, },
+        { 0x0808080808080808ULL, 0x0808080808080808ULL, },
+        { 0x0840010840010840ULL, 0x0108400108400108ULL, },
+        { 0x1002801002801002ULL, 0x8010028010028010ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xababababababababULL, 0xababababababababULL, },
+        { 0xaeaeaeaeaeaeaeaeULL, 0xaeaeaeaeaeaeaeaeULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xbabababababababaULL, 0xbabababababababaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaeaabaaeaabaaeaULL, 0xabaaeaabaaeaabaaULL, },
+        { 0xbaaaaabaaaaabaaaULL, 0xaabaaaaabaaaaabaULL, },
+        { 0xd5d5d5d5d5d5d5d5ULL, 0xd5d5d5d5d5d5d5d5ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x7575757575757575ULL, 0x7575757575757575ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5d5d5d5d5d5d5d5dULL, 0x5d5d5d5d5d5d5d5dULL, },
+        { 0x5d55555d55555d55ULL, 0x555d55555d55555dULL, },
+        { 0x5557d55557d55557ULL, 0xd55557d55557d555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xcdcdcdcdcdcdcdcdULL, 0xcdcdcdcdcdcdcdcdULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xececececececececULL, 0xececececececececULL, },
+        { 0xdcdcdcdcdcdcdcdcULL, 0xdcdcdcdcdcdcdcdcULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xcccccdcccccdccccULL, 0xcdcccccdcccccdccULL, },
+        { 0xdcceccdcceccdcceULL, 0xccdcceccdcceccdcULL, },
+        { 0xb3b3b3b3b3b3b3b3ULL, 0xb3b3b3b3b3b3b3b3ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3737373737373737ULL, 0x3737373737373737ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3b3b3b3b3b3b3b3bULL, 0x3b3b3b3b3b3b3b3bULL, },
+        { 0x3b73333b73333b73ULL, 0x333b73333b73333bULL, },
+        { 0x3333b33333b33333ULL, 0xb33333b33333b333ULL, },
+        { 0xe38eb8e38eb8e38eULL, 0xb8e38eb8e38eb8e3ULL, },    /*  48  */
+        { 0xe38f39e38f39e38fULL, 0x39e38f39e38f39e3ULL, },
+        { 0xe78e3ce78e3ce78eULL, 0x3ce78e3ce78e3ce7ULL, },
+        { 0xe3ae38e3ae38e3aeULL, 0x38e3ae38e3ae38e3ULL, },
+        { 0xf39e38f39e38f39eULL, 0x38f39e38f39e38f3ULL, },
+        { 0xeb8e38eb8e38eb8eULL, 0x38eb8e38eb8e38ebULL, },
+        { 0xebce39ebce39ebceULL, 0x39ebce39ebce39ebULL, },
+        { 0xf38eb8f38eb8f38eULL, 0xb8f38eb8f38eb8f3ULL, },
+        { 0x9cf1c79cf1c79cf1ULL, 0xc79cf1c79cf1c79cULL, },    /*  56  */
+        { 0x1d71c71d71c71d71ULL, 0xc71d71c71d71c71dULL, },
+        { 0x1c75c71c75c71c75ULL, 0xc71c75c71c75c71cULL, },
+        { 0x3c71e73c71e73c71ULL, 0xe73c71e73c71e73cULL, },
+        { 0x1c71d71c71d71c71ULL, 0xd71c71d71c71d71cULL, },
+        { 0x1c79cf1c79cf1c79ULL, 0xcf1c79cf1c79cf1cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c73c71c73c71c73ULL, 0xc71c73c71c73c71cULL, },
+        { 0x896ee6dc29667541ULL, 0x4be70b5efe7bb11cULL, },    /*  64  */
+        { 0x886ae7cc286ad541ULL, 0x4fe70b5efefbb41cULL, },
+        { 0x986ee6cc2ae25d41ULL, 0xcb674bdefe7bb01cULL, },
+        { 0x89eae6ec68625540ULL, 0x6b670b5ffe7fb40dULL, },
+        { 0xfbbe40734d97e709ULL, 0x1af7bb5a553f53fcULL, },
+        { 0xfbfe016b6d9bc709ULL, 0x16f7bb1e35bf56fcULL, },
+        { 0xfbbe40674f93cf09ULL, 0x92f7fb9a1d3f72fcULL, },
+        { 0xfbbe40634d93c748ULL, 0x32f7bb1b173f56fdULL, },
+        { 0xad5eeebab9cfab81ULL, 0x2fd8ceffeb2b2514ULL, },    /*  72  */
+        { 0xac5aafaab9cf8b81ULL, 0x27d8ceffabab2514ULL, },
+        { 0xbc5eeeaebbcf8b81ULL, 0xa7d9c6ffab2b2514ULL, },
+        { 0xaddaeeaaf9cf8fc0ULL, 0x27dac7ffab2f2515ULL, },
+        { 0x714f565d5f35e24fULL, 0x8df188d8e94ae3b0ULL, },
+        { 0x784f174d7e39e24fULL, 0x8df188dca9c2e6b0ULL, },
+        { 0x704f564d5eb1ea4fULL, 0x8df1c8d8a94ae2b0ULL, },
+        { 0x71cf566d5e33e64eULL, 0xadf389d9ab46e6a1ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_B(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_B(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_d.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_d.c
new file mode 100644
index 0000000000..787016ae4f
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_d.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BSET.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BSET.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x8000000000000000ULL, 0x8000000000000000ULL, },    /*   8  */
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },
+        { 0x0000040000000000ULL, 0x0000040000000000ULL, },
+        { 0x0000000000200000ULL, 0x0000000000200000ULL, },
+        { 0x0000000000001000ULL, 0x0000000000001000ULL, },
+        { 0x0008000000000000ULL, 0x0008000000000000ULL, },
+        { 0x0000000000004000ULL, 0x0000000800000000ULL, },
+        { 0x0002000000000000ULL, 0x0000000010000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaabULL, 0xaaaaaaaaaaaaaaabULL, },
+        { 0xaaaaaeaaaaaaaaaaULL, 0xaaaaaeaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaabaaaULL, 0xaaaaaaaaaaaabaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaeaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaabaaaaaaaULL, },
+        { 0xd555555555555555ULL, 0xd555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555755555ULL, 0x5555555555755555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x555d555555555555ULL, 0x555d555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555d55555555ULL, },
+        { 0x5557555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xcccccccccccccccdULL, 0xcccccccccccccccdULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccecccccULL, 0xccccccccccecccccULL, },
+        { 0xccccccccccccdcccULL, 0xccccccccccccdcccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccceccccccccccccULL, 0xccccccccdcccccccULL, },
+        { 0xb333333333333333ULL, 0xb333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333373333333333ULL, 0x3333373333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x333b333333333333ULL, 0x333b333333333333ULL, },
+        { 0x3333333333337333ULL, 0x3333333b33333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0xb8e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38fULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e3ce38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e3ae38e3ULL, },
+        { 0xe38e38e38e38f38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38eb8e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38f38e38e3ULL, },
+        { 0x9c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71dULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c75c71c71c71cULL, },
+        { 0x1c71c71c71e71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71d71cULL, },
+        { 0x1c79c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c75c71ULL, 0xc71c71cf1c71c71cULL, },
+        { 0x1c73c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886ae6cc28625541ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x5b670b5efe7bb00cULL, },
+        { 0x886ae6cc28625541ULL, 0x4b670b5efe7bb00cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5ffe7bb00cULL, },
+        { 0xfbbe00634d93c709ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c709ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1b153f52fcULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0x27d8c6ffab2b3514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8b80ULL, 0x37d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0x27d8c6ffab3b2514ULL, },
+        { 0xac5aaeaab9cfcb80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24fULL, 0x8df188d8a942f2a0ULL, },
+        { 0x704f164d5e31e34eULL, 0x9df188d8a942e2a0ULL, },
+        { 0x704f164d5e31e24fULL, 0x8df188d8a952e2a0ULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d9a942e2a0ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_D(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_D(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_h.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_h.c
new file mode 100644
index 0000000000..e1ff9727ba
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_h.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BSET.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BSET.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x8000800080008000ULL, 0x8000800080008000ULL, },    /*   8  */
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },
+        { 0x0400040004000400ULL, 0x0400040004000400ULL, },
+        { 0x0020002000200020ULL, 0x0020002000200020ULL, },
+        { 0x1000100010001000ULL, 0x1000100010001000ULL, },
+        { 0x0008000800080008ULL, 0x0008000800080008ULL, },
+        { 0x4000000801004000ULL, 0x0008010040000008ULL, },
+        { 0x0002100000800002ULL, 0x1000008000021000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaabaaabaaabaaabULL, 0xaaabaaabaaabaaabULL, },
+        { 0xaeaaaeaaaeaaaeaaULL, 0xaeaaaeaaaeaaaeaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xbaaabaaabaaabaaaULL, 0xbaaabaaabaaabaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xeaaaaaaaabaaeaaaULL, 0xaaaaabaaeaaaaaaaULL, },
+        { 0xaaaabaaaaaaaaaaaULL, 0xbaaaaaaaaaaabaaaULL, },
+        { 0xd555d555d555d555ULL, 0xd555d555d555d555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5575557555755575ULL, 0x5575557555755575ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x555d555d555d555dULL, 0x555d555d555d555dULL, },
+        { 0x5555555d55555555ULL, 0x555d55555555555dULL, },
+        { 0x5557555555d55557ULL, 0x555555d555575555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xcccdcccdcccdcccdULL, 0xcccdcccdcccdcccdULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccecccecccecccecULL, 0xccecccecccecccecULL, },
+        { 0xdcccdcccdcccdcccULL, 0xdcccdcccdcccdcccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xcccccccccdccccccULL, 0xcccccdccccccccccULL, },
+        { 0xcccedcccccccccceULL, 0xdccccccccccedcccULL, },
+        { 0xb333b333b333b333ULL, 0xb333b333b333b333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3733373337333733ULL, 0x3733373337333733ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x333b333b333b333bULL, 0x333b333b333b333bULL, },
+        { 0x7333333b33337333ULL, 0x333b33337333333bULL, },
+        { 0x3333333333b33333ULL, 0x333333b333333333ULL, },
+        { 0xe38eb8e38e38e38eULL, 0xb8e38e38e38eb8e3ULL, },    /*  48  */
+        { 0xe38f38e38e39e38fULL, 0x38e38e39e38f38e3ULL, },
+        { 0xe78e3ce38e38e78eULL, 0x3ce38e38e78e3ce3ULL, },
+        { 0xe3ae38e38e38e3aeULL, 0x38e38e38e3ae38e3ULL, },
+        { 0xf38e38e39e38f38eULL, 0x38e39e38f38e38e3ULL, },
+        { 0xe38e38eb8e38e38eULL, 0x38eb8e38e38e38ebULL, },
+        { 0xe38e38eb8f38e38eULL, 0x38eb8f38e38e38ebULL, },
+        { 0xe38e38e38eb8e38eULL, 0x38e38eb8e38e38e3ULL, },
+        { 0x9c71c71cf1c79c71ULL, 0xc71cf1c79c71c71cULL, },    /*  56  */
+        { 0x1c71c71d71c71c71ULL, 0xc71d71c71c71c71dULL, },
+        { 0x1c71c71c75c71c71ULL, 0xc71c75c71c71c71cULL, },
+        { 0x1c71c73c71e71c71ULL, 0xc73c71e71c71c73cULL, },
+        { 0x1c71d71c71c71c71ULL, 0xd71c71c71c71d71cULL, },
+        { 0x1c79c71c71cf1c79ULL, 0xc71c71cf1c79c71cULL, },
+        { 0x5c71c71c71c75c71ULL, 0xc71c71c75c71c71cULL, },
+        { 0x1c73d71c71c71c73ULL, 0xd71c71c71c73d71cULL, },
+        { 0x8c6af6cc28665541ULL, 0x4be74b5efe7bb00cULL, },    /*  64  */
+        { 0xc86ae6cc286a5540ULL, 0x4be70f5efe7bb00cULL, },
+        { 0x8c6ae6cca8625541ULL, 0x4b678b5efe7bb01cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7fb00dULL, },
+        { 0xffbe10634d97c709ULL, 0x12f7fb1a1d3f52fcULL, },
+        { 0xfbbe006b4d9bc708ULL, 0x12f7bf1a953f52fcULL, },
+        { 0xffbe0463cd93c709ULL, 0x13f7bb1a1d3f52fcULL, },
+        { 0xfbbe20634d93c708ULL, 0x12f7bb1a153f52fdULL, },
+        { 0xac5abeaab9cf8b81ULL, 0x27d8c6ffab2b3514ULL, },    /*  72  */
+        { 0xec5aaeaab9cf8b80ULL, 0x27d8c6ffab2b3514ULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0xac5aaeaab9cfcb80ULL, 0x27dac7ffab2f2515ULL, },
+        { 0x744f164d5e35e24fULL, 0x8df1c8d8a942f2a0ULL, },
+        { 0x704f164d5e39e34eULL, 0x8df18cd8a942f2a0ULL, },
+        { 0x744f164dde31e24fULL, 0x8df188d8a942e2b0ULL, },
+        { 0xf04f364d5e33e24eULL, 0x8df389d8a946e2a1ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_H(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_H(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_w.c b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_w.c
new file mode 100644
index 0000000000..51945b2148
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/bit-set/test_msa_bset_w.c
@@ -0,0 +1,153 @@
+/*
+ *  Test program for MSA instruction BSET.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "BSET.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x8000000080000000ULL, 0x8000000080000000ULL, },    /*   8  */
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },
+        { 0x0000040000000400ULL, 0x0000040000000400ULL, },
+        { 0x0020000000200000ULL, 0x0020000000200000ULL, },
+        { 0x0000100000001000ULL, 0x0000100000001000ULL, },
+        { 0x0008000000080000ULL, 0x0008000000080000ULL, },
+        { 0x0000000800004000ULL, 0x0100000000000008ULL, },
+        { 0x1000000000020000ULL, 0x0000008010000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaabaaaaaaabULL, 0xaaaaaaabaaaaaaabULL, },
+        { 0xaaaaaeaaaaaaaeaaULL, 0xaaaaaeaaaaaaaeaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaabaaaaaaabaaaULL, 0xaaaabaaaaaaabaaaULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0xaaaaaaaaaaaaeaaaULL, 0xabaaaaaaaaaaaaaaULL, },
+        { 0xbaaaaaaaaaaaaaaaULL, 0xaaaaaaaabaaaaaaaULL, },
+        { 0xd5555555d5555555ULL, 0xd5555555d5555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5575555555755555ULL, 0x5575555555755555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x555d5555555d5555ULL, 0x555d5555555d5555ULL, },
+        { 0x5555555d55555555ULL, 0x555555555555555dULL, },
+        { 0x5555555555575555ULL, 0x555555d555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xcccccccdcccccccdULL, 0xcccccccdcccccccdULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccecccccccecccccULL, 0xccecccccccecccccULL, },
+        { 0xccccdcccccccdcccULL, 0xccccdcccccccdcccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xcdccccccccccccccULL, },
+        { 0xdcccccccccceccccULL, 0xccccccccdcccccccULL, },
+        { 0xb3333333b3333333ULL, 0xb3333333b3333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333373333333733ULL, 0x3333373333333733ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x333b3333333b3333ULL, 0x333b3333333b3333ULL, },
+        { 0x3333333b33337333ULL, 0x333333333333333bULL, },
+        { 0x3333333333333333ULL, 0x333333b333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0xb8e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38fULL, 0x38e38e39e38e38e3ULL, },
+        { 0xe38e3ce38e38e78eULL, 0x38e38e38e38e3ce3ULL, },
+        { 0xe3ae38e38e38e38eULL, 0x38e38e38e3ae38e3ULL, },
+        { 0xe38e38e38e38f38eULL, 0x38e39e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38eb8e38e38e38e3ULL, },
+        { 0xe38e38eb8e38e38eULL, 0x39e38e38e38e38ebULL, },
+        { 0xf38e38e38e3ae38eULL, 0x38e38eb8f38e38e3ULL, },
+        { 0x9c71c71cf1c71c71ULL, 0xc71c71c79c71c71cULL, },    /*  56  */
+        { 0x1c71c71d71c71c71ULL, 0xc71c71c71c71c71dULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c75c71c71c71cULL, },
+        { 0x1c71c71c71e71c71ULL, 0xc73c71c71c71c71cULL, },
+        { 0x1c71d71c71c71c71ULL, 0xc71c71c71c71d71cULL, },
+        { 0x1c79c71c71cf1c71ULL, 0xc71c71c71c79c71cULL, },
+        { 0x1c71c71c71c75c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x886af6cc28625541ULL, 0x4b670b5efe7bb00cULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x4f670b5efe7bb00cULL, },
+        { 0x886ae6cc28625541ULL, 0xcb670b5efe7bb00cULL, },
+        { 0x886ae6cc28625540ULL, 0x4b670b5efe7bb00dULL, },
+        { 0xfbbe10634d93c709ULL, 0x52f7bb1a153f52fcULL, },
+        { 0xfbbe006b4d93c708ULL, 0x16f7bb1a153f52fcULL, },
+        { 0xfbbe04634d93c709ULL, 0x92f7bb1a153f52fcULL, },
+        { 0xfbbe20634d93c708ULL, 0x13f7bb1a153f52fdULL, },
+        { 0xac5abeaab9cf8b81ULL, 0x67d8c6ffab2b3514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffbb2b2514ULL, },
+        { 0xac5aaeaab9cf8b81ULL, 0xa7d8c6ffab3b2514ULL, },
+        { 0xac5aaeaab9cfcb80ULL, 0x27d8c6ffab2b2515ULL, },
+        { 0x704f164d5e31e24fULL, 0xcdf188d8a942f2a0ULL, },
+        { 0x704f164d5e31e34eULL, 0x8df188d8b942e2a0ULL, },
+        { 0x704f164d5e31e24fULL, 0x8df188d8a952e2a0ULL, },
+        { 0x704f364d5e31e24eULL, 0x8df188d8a942e2a1ULL, },
+    };
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_W(b128_pattern[i], b128_pattern[j],
+                          b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_BSET_W(b128_random[i], b128_random[j],
+                          b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                       (PATTERN_INPUTS_SHORT_COUNT)) +
+                                      RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_b.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_b.c
index d2ea54f43d..b603dbe93a 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_b.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADD_A.B
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xc4a968a3a56293ceULL, 0x9a37b229ac6d4374ULL, },
         { 0xe8b930818693738eULL, 0xbe76838659bd6e6cULL, },
         { 0x759116b0ab9e5756ULL, 0x8518bd426c817064ULL, },
+        { 0xc4a968a3a56293ceULL, 0x9a37b229ac6d4374ULL, },
+        { 0xe09e2c9abc623c9cULL, 0xe61ef050ae843cc0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_d.c
index 56b81f9090..d4130c3bad 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADD_A.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xc3f467a2a46256ceULL, 0x99e73e2701e84274ULL, },
         { 0xe7e42f8135cf8d0eULL, 0xbd7582865538cd6cULL, },
         { 0x749115ea109e1b46ULL, 0x850632416bfc705cULL, },
+        { 0xc3f467a2a46256ceULL, 0x99e73e2701e84274ULL, },
+        { 0xe09e2c9abc63c49cULL, 0xe41cee4ead7a3ac0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_h.c
index fe3c664997..00428cf672 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADD_A.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xc3f567a3a4629232ULL, 0x99e7b029ab934274ULL, },
         { 0xe7e52f81869372f2ULL, 0xbd76828658436d54ULL, },
         { 0x749116b0abc456aaULL, 0x8506bc0e6bfd705cULL, },
+        { 0xc3f567a3a4629232ULL, 0x99e7b029ab934274ULL, },
+        { 0xe09e2c9abc623b64ULL, 0xe41eee50ad7c3ac0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_w.c
index 205117ea95..d60b11b85e 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_add_a_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADD_A.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xc3f467a3a46256ceULL, 0x99e73e27ab91f84cULL, },
         { 0xe7e42f818694378eULL, 0xbd75828658416d54ULL, },
         { 0x749115eaabc5a956ULL, 0x850632426bfc705cULL, },
+        { 0xc3f467a3a46256ceULL, 0x99e73e27ab91f84cULL, },
+        { 0xe09e2c9abc63c49cULL, 0xe41cee50ad7a3ac0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_b.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_b.c
index 6939e91fe4..9a7d00bc22 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_b.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_A.B
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x7f7f687f7f627f7fULL, 0x7f377f297f6d4374ULL, },
         { 0x7f7f307f7f7f737fULL, 0x7f767f7f597f6e6cULL, },
         { 0x757f167f7f7f5756ULL, 0x7f187f426c7f7064ULL, },
+        { 0x7f7f687f7f627f7fULL, 0x7f377f297f6d4374ULL, },
+        { 0x7f7f2c7f7f623c7fULL, 0x7f1e7f507f7f3c7fULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_d.c
index af0f3d3700..481fadf107 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_A.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },
         { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },
         { 0x749115ea109e1b46ULL, 0x7fffffffffffffffULL, },
+        { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },
+        { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_h.c
index 4d3774fef2..4c6c2b9000 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_A.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x7fff67a37fff7fffULL, 0x7fff7fff7fff4274ULL, },
         { 0x7fff2f817fff72f2ULL, 0x7fff7fff58436d54ULL, },
         { 0x749116b07fff56aaULL, 0x7fff7fff6bfd705cULL, },
+        { 0x7fff67a37fff7fffULL, 0x7fff7fff7fff4274ULL, },
+        { 0x7fff2c9a7fff3b64ULL, 0x7fff7fff7fff3ac0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_w.c
index 6f06fdc7cd..f9cb626ede 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_a_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_A.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x7fffffff7fffffffULL, 0x7fffffff7fffffffULL, },
         { 0x7fffffff7fffffffULL, 0x7fffffff58416d54ULL, },
         { 0x749115ea7fffffffULL, 0x7fffffff6bfc705cULL, },
+        { 0x7fffffff7fffffffULL, 0x7fffffff7fffffffULL, },
+        { 0x7fffffff7fffffffULL, 0x7fffffff7fffffffULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_b.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_b.c
index e6cb9871f5..e241b8b1b1 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_b.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_S.B
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x1c7fc4f7170080ceULL, 0xb4c980d7806d07b4ULL, },
         { 0xf87ffc197f7f377fULL, 0xd8589336a77f92acULL, },
         { 0x6b0d167f7fc4a956ULL, 0x9fe880f2be7f349cULL, },
+        { 0x1c7fc4f7170080ceULL, 0xb4c980d7806d07b4ULL, },
+        { 0x7f7f2c7f7f62c47fULL, 0x80e280b0807fc480ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_d.c
index 2cda5d9661..ae35a9a804 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_S.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x1ca9c4f818016dceULL, 0xb5ca4fd8546e07b4ULL, },
         { 0xf8b9fd198694378eULL, 0xd9589437a7be92acULL, },
         { 0x6c0d16b0abc5a956ULL, 0xa0e943f2be82359cULL, },
+        { 0x1ca9c4f818016dceULL, 0xb5ca4fd8546e07b4ULL, },
+        { 0x7fffffffffffffffULL, 0x8000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_h.c
index 5539322423..48e3da7b27 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_S.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x1ca9c4f718008000ULL, 0xb5c98000800007b4ULL, },
         { 0xf8b9fd197fff378eULL, 0xd9589436a7bd92acULL, },
         { 0x6c0d16b07fffa956ULL, 0xa0e88000be81359cULL, },
+        { 0x1ca9c4f718008000ULL, 0xb5c98000800007b4ULL, },
+        { 0x7fff2c9a7fffc49cULL, 0x800080008000c540ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_w.c
index 4f2cc3862e..77d49b63e0 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_s_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_S.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x1ca9c4f718016dceULL, 0xb5ca4fd780000000ULL, },
         { 0xf8b9fd197fffffffULL, 0xd9589436a7be92acULL, },
         { 0x6c0d16b07fffffffULL, 0xa0e943f2be82359cULL, },
+        { 0x1ca9c4f718016dceULL, 0xb5ca4fd780000000ULL, },
+        { 0x7fffffff7fffffffULL, 0x8000000080000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_b.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_b.c
index e2d9be38e7..dba9c208ee 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_b.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_U.B
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xffa9c4f7ffffffceULL, 0xb4ffffffff6dffb4ULL, },
         { 0xf8b9fcff8693ff8eULL, 0xd8ff93ffffbdffacULL, },
         { 0xffff16b0abc4ff56ULL, 0x9ffffff2be81ffffULL, },
+        { 0xffa9c4f7ffffffceULL, 0xb4ffffffff6dffb4ULL, },
+        { 0xe09e2c9abc62ff9cULL, 0xffffffffff84ffffULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_d.c
index 8418c636b6..39aad16a04 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_U.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xb5ca4fd8546e07b4ULL, },
         { 0xf8b9fd198694378eULL, 0xd9589437a7be92acULL, },
         { 0xffffffffffffffffULL, 0xa0e943f2be82359cULL, },
+        { 0xffffffffffffffffULL, 0xb5ca4fd8546e07b4ULL, },
+        { 0xe09e2c9abc63c49cULL, 0xffffffffffffffffULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_h.c
index 8a3b5c5cf5..40846b87e7 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_U.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xffffc4f7ffffffffULL, 0xb5c9ffffffffffffULL, },
         { 0xf8b9fd198693ffffULL, 0xd9589436ffffffffULL, },
         { 0xffff16b0abc4ffffULL, 0xa0e8ffffbe81ffffULL, },
+        { 0xffffc4f7ffffffffULL, 0xb5c9ffffffffffffULL, },
+        { 0xe09e2c9abc62ffffULL, 0xffffffffffffffffULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_w.c
index b18bdc3ea0..046d109884 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_adds_u_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDS_U.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xffffffffffffffffULL, 0xb5ca4fd7ffffffffULL, },
         { 0xf8b9fd198694378eULL, 0xd9589436ffffffffULL, },
         { 0xffffffffabc5a956ULL, 0xa0e943f2be82359cULL, },
+        { 0xffffffffffffffffULL, 0xb5ca4fd7ffffffffULL, },
+        { 0xe09e2c9abc63c49cULL, 0xffffffffffffffffULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_b.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_b.c
index c86c99291e..6107a42f5f 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_b.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDV.B
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x1ca9c4f717006dceULL, 0xb4c94ed7546d07b4ULL, },
         { 0xf8b9fc198693378eULL, 0xd8589336a7bd92acULL, },
         { 0x6b0d16b0abc4a956ULL, 0x9fe843f2be81349cULL, },
+        { 0x1ca9c4f717006dceULL, 0xb4c94ed7546d07b4ULL, },
+        { 0xe09e2c9abc62c49cULL, 0x1ae210b05284c440ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_d.c
index 0f301515b3..00f3ad7a0e 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDV.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x1ca9c4f818016dceULL, 0xb5ca4fd8546e07b4ULL, },
         { 0xf8b9fd198694378eULL, 0xd9589437a7be92acULL, },
         { 0x6c0d16b0abc5a956ULL, 0xa0e943f2be82359cULL, },
+        { 0x1ca9c4f818016dceULL, 0xb5ca4fd8546e07b4ULL, },
+        { 0xe09e2c9abc63c49cULL, 0x1be311b15285c540ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_h.c
index c6b4cf697b..2b9ffdcaa2 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDV.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x1ca9c4f718006dceULL, 0xb5c94fd7546d07b4ULL, },
         { 0xf8b9fd198693378eULL, 0xd9589436a7bd92acULL, },
         { 0x6c0d16b0abc4a956ULL, 0xa0e843f2be81359cULL, },
+        { 0x1ca9c4f718006dceULL, 0xb5c94fd7546d07b4ULL, },
+        { 0xe09e2c9abc62c49cULL, 0x1be211b05284c540ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_w.c
index 2a565e8ed4..93119edbb8 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_addv_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction ADDV.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x1ca9c4f718016dceULL, 0xb5ca4fd7546e07b4ULL, },
         { 0xf8b9fd198694378eULL, 0xd9589436a7be92acULL, },
         { 0x6c0d16b0abc5a956ULL, 0xa0e943f2be82359cULL, },
+        { 0x1ca9c4f718016dceULL, 0xb5ca4fd7546e07b4ULL, },
+        { 0xe09e2c9abc63c49cULL, 0x1be311b05285c540ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_d.c
index 7845dc0218..697847dc95 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_S.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x000000000a8c90f8ULL, 0xffffffffd11ba99fULL, },
         { 0x0000000098b16b8dULL, 0xffffffff8c6d38e4ULL, },
         { 0x00000000bde2dd55ULL, 0xffffffffa330dbd4ULL, },
+        { 0x000000002a1ea1cdULL, 0xffffffff391cadecULL, },
+        { 0x00000000ce80f89bULL, 0xffffffff37346b78ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_h.c
index ddc2de3ff2..63b95aafe5 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_S.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xfffbfffbffeaffd9ULL, 0x0018ff9effedffc5ULL, },
         { 0x00daffe200c00022ULL, 0xfff4ffe60024ffeeULL, },
         { 0x002e0079fff1ffeaULL, 0xff84ffa2ffe8ffdeULL, },
+        { 0x00caffc0002dff62ULL, 0xff65ff87ffd4fff6ULL, },
+        { 0x00bf0063008f0030ULL, 0xff7eff60ffebff82ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_w.c
index 887cd1cd5c..7ff2bdcc63 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_s_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_S.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xffffc2a7ffff9c1dULL, 0xffffb0b0ffff8dcbULL, },
         { 0x0000571b0000b371ULL, 0xffff994fffff594eULL, },
         { 0x000070b200002539ULL, 0xffff490bfffffc3eULL, },
+        { 0x00001ef9ffffe9b1ULL, 0xffff54f0ffffce56ULL, },
+        { 0x0000869c0000407fULL, 0xffff16c9ffff8be2ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_d.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_d.c
index f0710f15de..372e205be9 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_d.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_U.D
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x000000010a8c90f8ULL, 0x00000000d11ba99fULL, },
         { 0x0000000098b16b8dULL, 0x000000018c6d38e4ULL, },
         { 0x00000000bde2dd55ULL, 0x00000000a330dbd4ULL, },
+        { 0x000000012a1ea1cdULL, 0x00000001391cadecULL, },
+        { 0x00000000ce80f89bULL, 0x0000000137346b78ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_h.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_h.c
index fe55d3eaee..9c96d2b875 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_h.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_U.H
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x00fb00fb00ea00d9ULL, 0x0118019e00ed00c5ULL, },
         { 0x00da00e200c00122ULL, 0x00f400e6012400eeULL, },
         { 0x012e007900f100eaULL, 0x018400a200e801deULL, },
+        { 0x00ca00c0012d0162ULL, 0x0165018700d400f6ULL, },
+        { 0x00bf0063008f0130ULL, 0x017e016000eb0182ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_w.c b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_w.c
index babe04d586..3417fffcdd 100644
--- a/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-add/test_msa_hadd_u_w.c
@@ -1,6 +1,8 @@
 /*
  *  Test program for MSA instruction HADD_U.W
  *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
  *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
  *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0000c2a700019c1dULL, 0x0000b0b000018dcbULL, },
         { 0x0001571b0000b371ULL, 0x0000994f0001594eULL, },
         { 0x000070b200012539ULL, 0x0001490b0000fc3eULL, },
+        { 0x00011ef90000e9b1ULL, 0x000154f00000ce56ULL, },
+        { 0x0000869c0001407fULL, 0x000116c900018be2ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_b.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_b.c
index 675fb90c72..e279a2f2eb 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_S.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0e54e2fb0b00b6e7ULL, 0xdae4a7ebaa3603daULL, },
         { 0xfc5cfe0c43491b47ULL, 0xec2cc91bd35ec9d6ULL, },
         { 0x35060b5855e2d42bULL, 0xcff4a1f9df401aceULL, },
+        { 0x0e54e2fb0b00b6e7ULL, 0xdae4a7ebaa3603daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_d.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_d.c
index e87d414b5f..9969b5976b 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0e54e27c0c00b6e7ULL, 0xdae527ec2a3703daULL, },
         { 0xfc5cfe8cc34a1bc7ULL, 0xecac4a1bd3df4956ULL, },
         { 0x36068b5855e2d4abULL, 0xd074a1f95f411aceULL, },
+        { 0x0e54e27c0c00b6e7ULL, 0xdae527ec2a3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_h.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_h.c
index c850543587..3029232950 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0e54e27b0c00b6e7ULL, 0xdae4a7ebaa3603daULL, },
         { 0xfc5cfe8c43491bc7ULL, 0xecacca1bd3dec956ULL, },
         { 0x36060b5855e2d4abULL, 0xd074a1f9df401aceULL, },
+        { 0x0e54e27b0c00b6e7ULL, 0xdae4a7ebaa3603daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_w.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_w.c
index 3220574ca0..70e9a5d5d3 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0e54e27b0c00b6e7ULL, 0xdae527ebaa3703daULL, },
         { 0xfc5cfe8c434a1bc7ULL, 0xecac4a1bd3df4956ULL, },
         { 0x36068b5855e2d4abULL, 0xd074a1f9df411aceULL, },
+        { 0x0e54e27b0c00b6e7ULL, 0xdae527ebaa3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_b.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_b.c
index c3f96a6a5f..260bad24df 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_U.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8e54627b8b80b667ULL, 0x5ae4a7ebaa36835aULL, },
         { 0x7c5c7e8c43499b47ULL, 0x6cac499bd35ec956ULL, },
         { 0xb5860b585562d42bULL, 0x4ff4a1795f409aceULL, },
+        { 0x8e54627b8b80b667ULL, 0x5ae4a7ebaa36835aULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_d.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_d.c
index 3a78629cd8..2d9fb07d82 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8e54e27c0c00b6e7ULL, 0x5ae527ec2a3703daULL, },
         { 0x7c5cfe8cc34a1bc7ULL, 0x6cac4a1bd3df4956ULL, },
         { 0xb6068b5855e2d4abULL, 0x5074a1f95f411aceULL, },
+        { 0x8e54e27c0c00b6e7ULL, 0x5ae527ec2a3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_h.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_h.c
index b7db518afb..0b34441851 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8e54627b8c00b6e7ULL, 0x5ae4a7ebaa3683daULL, },
         { 0x7c5c7e8c43499bc7ULL, 0x6cac4a1bd3dec956ULL, },
         { 0xb6060b5855e2d4abULL, 0x5074a1f95f409aceULL, },
+        { 0x8e54627b8c00b6e7ULL, 0x5ae4a7ebaa3683daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_w.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_w.c
index 75e2409f1f..f1a4a5fee9 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_ave_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVE_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8e54e27b8c00b6e7ULL, 0x5ae527ebaa3703daULL, },
         { 0x7c5cfe8c434a1bc7ULL, 0x6cac4a1bd3df4956ULL, },
         { 0xb6068b5855e2d4abULL, 0x5074a1f95f411aceULL, },
+        { 0x8e54e27b8c00b6e7ULL, 0x5ae527ebaa3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_b.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_b.c
index 59bba28d2e..cc1528d14d 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_S.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0e55e2fc0c00b7e7ULL, 0xdae5a7ecaa3704daULL, },
         { 0xfc5dfe0d434a1c47ULL, 0xec2cca1bd45fc9d6ULL, },
         { 0x36070b5856e2d52bULL, 0xd0f4a2f9df411aceULL, },
+        { 0x0e55e2fc0c00b7e7ULL, 0xdae5a7ecaa3704daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_d.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_d.c
index 435c09f9bf..8070f2080f 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0e54e27c0c00b6e7ULL, 0xdae527ec2a3703daULL, },
         { 0xfc5cfe8cc34a1bc7ULL, 0xecac4a1bd3df4956ULL, },
         { 0x36068b5855e2d4abULL, 0xd074a1f95f411aceULL, },
+        { 0x0e54e27c0c00b6e7ULL, 0xdae527ec2a3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_h.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_h.c
index 0902e508ec..40db92467d 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0e55e27c0c00b6e7ULL, 0xdae5a7ecaa3703daULL, },
         { 0xfc5dfe8d434a1bc7ULL, 0xecacca1bd3dfc956ULL, },
         { 0x36070b5855e2d4abULL, 0xd074a1f9df411aceULL, },
+        { 0x0e55e27c0c00b6e7ULL, 0xdae5a7ecaa3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_w.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_w.c
index 31f4553916..bde28505a6 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0e54e27c0c00b6e7ULL, 0xdae527ecaa3703daULL, },
         { 0xfc5cfe8d434a1bc7ULL, 0xecac4a1bd3df4956ULL, },
         { 0x36068b5855e2d4abULL, 0xd074a1f9df411aceULL, },
+        { 0x0e54e27c0c00b6e7ULL, 0xdae527ecaa3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_b.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_b.c
index 8aa7ec6a41..9ef7b7c00a 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_U.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8e55627c8c80b767ULL, 0x5ae5a7ecaa37845aULL, },
         { 0x7c5d7e8d434a9c47ULL, 0x6cac4a9bd45fc956ULL, },
         { 0xb6870b585662d52bULL, 0x50f4a2795f419aceULL, },
+        { 0x8e55627c8c80b767ULL, 0x5ae5a7ecaa37845aULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_d.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_d.c
index 9b16e1250f..307a84466f 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8e54e27c0c00b6e7ULL, 0x5ae527ec2a3703daULL, },
         { 0x7c5cfe8cc34a1bc7ULL, 0x6cac4a1bd3df4956ULL, },
         { 0xb6068b5855e2d4abULL, 0x5074a1f95f411aceULL, },
+        { 0x8e54e27c0c00b6e7ULL, 0x5ae527ec2a3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_h.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_h.c
index 191e4acbdc..3eef867b4e 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8e55627c8c00b6e7ULL, 0x5ae5a7ecaa3783daULL, },
         { 0x7c5d7e8d434a9bc7ULL, 0x6cac4a1bd3dfc956ULL, },
         { 0xb6070b5855e2d4abULL, 0x5074a1f95f419aceULL, },
+        { 0x8e55627c8c00b6e7ULL, 0x5ae5a7ecaa3783daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_w.c b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_w.c
index e0d6b177c4..baf373035e 100644
--- a/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-average/test_msa_aver_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction AVER_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8e54e27c8c00b6e7ULL, 0x5ae527ecaa3703daULL, },
         { 0x7c5cfe8d434a1bc7ULL, 0x6cac4a1bd3df4956ULL, },
         { 0xb6068b5855e2d4abULL, 0x5074a1f95f411aceULL, },
+        { 0x8e54e27c8c00b6e7ULL, 0x5ae527ecaa3703daULL, },
+        { 0x704f164d5e31e24eULL, 0x8df188d8a942e2a0ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_b.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_b.c
index bb884ee752..f1f5e6db5a 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_d.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_d.c
index ef13f7d05d..a87f1c965c 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_h.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_h.c
index 1c43d40ee1..a300ee3d4a 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_w.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_w.c
index 1297d41f29..a3bb29adcb 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_ceq_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_b.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_b.c
index afd5f635f0..2b391c97ff 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_d.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_d.c
index 04d58d103c..44c6fe4817 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_h.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_h.c
index ed1a1e21bd..331745be0c 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_w.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_w.c
index ea4dc1a30b..acccc258ea 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_s_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_b.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_b.c
index 6e4fdd83ec..7421450ded 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_d.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_d.c
index b2b2f557b8..402a54df16 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_h.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_h.c
index b2267752eb..51685c76c0 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_w.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_w.c
index 00e930c0c7..adf744d9f1 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_cle_u_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_b.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_b.c
index 4a52ebe491..aa222fd165 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_d.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_d.c
index cc945cdf8d..f762c4d061 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_h.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_h.c
index b228dfe7f5..c456aef6f0 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_w.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_w.c
index 6cb192a851..33978d178d 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_s_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_b.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_b.c
index b6189d6b72..fc6dc51d38 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_d.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_d.c
index 4f547d8f0b..5394b27d6e 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_h.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_h.c
index 9fcd81c653..0f813d870e 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_w.c b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_w.c
index 8f648afa62..ff11360ba5 100644
--- a/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-compare/test_msa_clt_u_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_b.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_b.c
index 38e3670422..e17bb9a1ef 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_S.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0001fdff00ff03ffULL, 0x000200000000ff00ULL, },
         { 0x000000ff02000001ULL, 0xff00f6002b0000f8ULL, },
         { 0xeaffff0001000009ULL, 0xfa0101fffc010018ULL, },
+        { 0xff000000ffff0000ULL, 0xfe000228010100fcULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_d.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_d.c
index d92b6953e8..fcab2cd2e5 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
         { 0x0000000000000000ULL, 0xffffffffffffffffULL, },
         { 0xffffffffffffffe6ULL, 0xfffffffffffffffaULL, },
+        { 0xffffffffffffffffULL, 0xfffffffffffffffeULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_h.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_h.c
index f191b985b1..83cc02ef3c 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0000fffd00000003ULL, 0x000000000000ffffULL, },
         { 0x0000000000020000ULL, 0xfffffff600390000ULL, },
         { 0xffe6003900010000ULL, 0xfffa0001fffc0000ULL, },
+        { 0xffff0000ffff0000ULL, 0xfffe000200010000ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_w.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_w.c
index 0baaff10f9..c3bc831f03 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0000000000000000ULL, 0x0000000000000000ULL, },
         { 0x0000000000000002ULL, 0xffffffff00000039ULL, },
         { 0xffffffe600000001ULL, 0xfffffffafffffffcULL, },
+        { 0xffffffffffffffffULL, 0xfffffffe00000001ULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_b.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_b.c
index 770544a2de..97557d8f92 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_U.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0101070201040001ULL, 0x0000010101000000ULL, },
         { 0x0000000002000201ULL, 0x01020c020000010dULL, },
         { 0x0000ff0001000109ULL, 0x0700000808010200ULL, },
+        { 0x0000000000000100ULL, 0x0301000000010608ULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_d.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_d.c
index 9653e7db5c..a77553837b 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0000000000000001ULL, 0x0000000000000000ULL, },
         { 0x0000000000000000ULL, 0x0000000000000001ULL, },
         { 0x0000000000000000ULL, 0x0000000000000007ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000003ULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_h.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_h.c
index 3dcd30bee9..2628eb2d99 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0001000700010000ULL, 0x0000000100010000ULL, },
         { 0x0000000000020002ULL, 0x0001000c00000001ULL, },
         { 0x0000003900010001ULL, 0x0007000000070002ULL, },
+        { 0x0000000000000001ULL, 0x0003000000000006ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_w.c b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_w.c
index fd395ef5e9..8de2043663 100644
--- a/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-divide/test_msa_div_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DIV_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x0000000100000001ULL, 0x0000000000000001ULL, },
         { 0x0000000000000002ULL, 0x0000000100000000ULL, },
         { 0x0000000000000001ULL, 0x0000000700000007ULL, },
+        { 0x0000000000000000ULL, 0x0000000300000000ULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_d.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_d.c
index af8d609bea..d495361bdc 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xc17a5d0372a2a622ULL, 0x0afd6368668933a8ULL, },
         { 0xda65cd5e9f696cdcULL, 0xdeeb6bec644a26d0ULL, },
         { 0x1aad30609bff5437ULL, 0xf059a43d01b40370ULL, },
+        { 0xc17a5d0372a2a622ULL, 0x0afd6368668933a8ULL, },
+        { 0x53edf7dbd76122edULL, 0x50347e61c2f51a40ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_h.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_h.c
index 40de72ae97..b7850650a5 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xf706df16dc8de6b6ULL, 0xf0d31b5827f9f42aULL, },
         { 0xec36ee202172098aULL, 0xd846ec28206404e0ULL, },
         { 0xe9721dc70769091eULL, 0xf8711c48091bf7e4ULL, },
+        { 0xf706df16dc8de6b6ULL, 0xf0d31b5827f9f42aULL, },
+        { 0x4961190d2be51b48ULL, 0x348a3e802e952784ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_w.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_w.c
index 2f1d23be6f..57cfd79e99 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xd437b4e8f3b0139fULL, 0x08c7d980187d5896ULL, },
         { 0xc9576c1204f83042ULL, 0xd91d3e4709b06e36ULL, },
         { 0xfe2a6f6923268793ULL, 0x179e9377ef4766beULL, },
+        { 0xd437b4e8f3b0139fULL, 0x08c7d980187d5896ULL, },
+        { 0x33368b8a2619d525ULL, 0x6a47932120c31904ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_d.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_d.c
index e998e00410..24f736da86 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8ffb559e72a2a622ULL, 0x8744321b668933a8ULL, },
         { 0x4ab4e3ab9f696cdcULL, 0xd21109f6644a26d0ULL, },
         { 0x8afc46ad9bff5437ULL, 0x1890b25301b40370ULL, },
+        { 0x8ffb559e72a2a622ULL, 0x8744321b668933a8ULL, },
+        { 0x53edf7dbd76122edULL, 0xbe9d5551c2f51a40ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_h.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_h.c
index e8db601a74..4d30246067 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x670642166b8da1b6ULL, 0xe0d340587bf92d2aULL, },
         { 0x5c36512021725e8aULL, 0x8a465528c764a2e0ULL, },
         { 0xa8721dc73869b21eULL, 0xf27179481e1be5e4ULL, },
+        { 0x670642166b8da1b6ULL, 0xe0d340587bf92d2aULL, },
+        { 0x4961190d2be5df48ULL, 0x308afe8080952b84ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_w.c b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_w.c
index cf5bd13f48..ca96d21165 100644
--- a/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-dot-product/test_msa_dotp_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction DOTP_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x5ad3b4e8bfaf139fULL, 0x8076d98091fe5896ULL, },
         { 0x4ff36c125a383042ULL, 0x2fe23e4744196e36ULL, },
         { 0x6e796f69cc7c8793ULL, 0x6e879377578266beULL, },
+        { 0x5ad3b4e8bfaf139fULL, 0x8076d98091fe5896ULL, },
+        { 0x33368b8aeab5d525ULL, 0x97d9932138871904ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_b.c
index 5fa2644c30..1b90f23f2f 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_d.c
index 9d97982ab5..aa8dca8ac8 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_h.c
index 3365f726a2..fb9b63c16e 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_w.c
index b33f4b7d79..1334d80b59 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_a_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_b.c
index 71e571d0c4..05f8c032c0 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_d.c
index e088ab99e3..31218c1e45 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_h.c
index 6d1b81a119..1e4e69b6ed 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_w.c
index bd64294322..a4fa928d98 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_s_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_b.c
index 206d907a26..a245349dbe 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_d.c
index 4dd247f54a..7aac861e33 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_h.c
index 0e6a7651eb..16c21229bb 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_w.c
index db61440551..ce439a59f2 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_max_u_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_b.c
index d2a93a2e44..1dc8c3c75c 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_d.c
index 69fd3c7662..a82c7ee349 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_h.c
index 9f45b55539..91f1c23658 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_w.c
index b08231d65f..8030ed24be 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_a_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_b.c
index 80b5201be1..92aff1bc5c 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_d.c
index 0ed319024c..76e5dce863 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_h.c
index b049054d9f..878ca2be43 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_w.c
index 2bcd0a00ef..3df7e102c0 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_s_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_b.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_b.c
index 2a06b43379..fdaab02aed 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_b.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_d.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_d.c
index 37924f3038..68e1a92f94 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_d.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_h.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_h.c
index 1846995ce4..f9168b9b95 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_h.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_w.c b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_w.c
index 8b20c05440..749fcefc0c 100644
--- a/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-max-min/test_msa_min_u_w.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 2019  Wave Computing, Inc.
  *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_b.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_b.c
new file mode 100644
index 0000000000..dc1a4edf0d
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction MOD_S.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_S.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xdedededededededeULL, 0xdedededededededeULL, },
+        { 0xddddddddddddddddULL, 0xddddddddddddddddULL, },
+        { 0xe4aae2e4aae2e4aaULL, 0xe2e4aae2e4aae2e4ULL, },
+        { 0xfeaae3feaae3feaaULL, 0xe3feaae3feaae3feULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2121212121212121ULL, 0x2121212121212121ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x1b551d1b551d1b55ULL, 0x1d1b551d1b551d1bULL, },
+        { 0x01551c01551c0155ULL, 0x1c01551c01551c01ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xe9cccce9cccce9ccULL, 0xcce9cccce9cccce9ULL, },
+        { 0xe8cccce8cccce8ccULL, 0xcce8cccce8cccce8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1633331633331633ULL, 0x3316333316333316ULL, },
+        { 0x1733331733331733ULL, 0x3317333317333317ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe3e438e3e438e3e4ULL, 0x38e3e438e3e438e3ULL, },
+        { 0xe3e338e3e338e3e3ULL, 0x38e3e338e3e338e3ULL, },
+        { 0xe3f604e3f604e3f6ULL, 0x04e3f604e3f604e3ULL, },
+        { 0xe3f405e3f405e3f4ULL, 0x05e3f405e3f405e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff38ffff38ffffULL, 0x38ffff38ffff38ffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c1bc71c1bc71c1bULL, 0xc71c1bc71c1bc71cULL, },
+        { 0x1c1cc71c1cc71c1cULL, 0xc71c1cc71c1cc71cULL, },
+        { 0x1c09fb1c09fb1c09ULL, 0xfb1c09fb1c09fb1cULL, },
+        { 0x1c0bfa1c0bfa1c0bULL, 0xfa1c0bfa1c0bfa1cULL, },
+        { 0x1c71ff1c71ff1c71ULL, 0xff1c71ff1c71ff1cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x0028e6cc28621c00ULL, 0x03040b10fe3cb000ULL, },
+        { 0xdc10e6cc28005540ULL, 0x24170b00fe25fa0cULL, },
+        { 0xf81bfccc28001940ULL, 0x4b0d0b0efe39ec0cULL, },
+        { 0xfbbe002f25f5c708ULL, 0x12f7fd1a013f02fcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfbbe000d06f5c708ULL, 0x12f7f500151408fcULL, },
+        { 0xfbbe00164df5e508ULL, 0x12f7bb1a153f16fcULL, },
+        { 0xac5afcdee1cfe000ULL, 0x27d8fdffff2b2508ULL, },    /*  72  */
+        { 0xfc18aeaab9cffd00ULL, 0x03fcc6ffff2b2500ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xac0bf0f7b900e5ceULL, 0x27f6c6ffab2b0714ULL, },
+        { 0x704f16190e31e20eULL, 0xd8f1f6d8ff42e200ULL, },
+        { 0x020d164d1131e206ULL, 0xf9facdf2fd03e200ULL, },
+        { 0x1c4f164d1700e24eULL, 0xdbf1fc00fe17e2f0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_B(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_B(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_d.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_d.c
new file mode 100644
index 0000000000..0909793df8
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction MOD_S.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_S.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xdddddddddddddddeULL, 0xdddddddddddddddeULL, },
+        { 0xddddddddddddddddULL, 0xddddddddddddddddULL, },
+        { 0x0000000000000000ULL, 0xe38e38e38e38e38dULL, },
+        { 0xfffffffffffffffdULL, 0xe38e38e38e38e38eULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2222222222222221ULL, 0x2222222222222221ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x0000000000000002ULL, 0x1c71c71c71c71c71ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xe93e93e93e93e93eULL, 0xccccccccccccccccULL, },
+        { 0xe93e93e93e93e93dULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x16c16c16c16c16c1ULL, 0x3333333333333333ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e38e38e38eULL, 0x05b05b05b05b05afULL, },
+        { 0xe38e38e38e38e38eULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0x38e38e38e38e38e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xfa4fa4fa4fa4fa50ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xfa4fa4fa4fa4fa4fULL, },
+        { 0x1c71c71c71c71c71ULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0xffa2dbefac389060ULL, 0x127fda10bebdb718ULL, },
+        { 0xdc1038216e92c9c0ULL, 0x238e445f53508af8ULL, },
+        { 0xf8b9fd198694378eULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xfd40a74bf7d7c5e8ULL, 0x01e950cb80ac7f1cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0xd9589437a7be92acULL, },
+        { 0x019b20633f34191eULL, 0xffbfeb7528bed488ULL, },
+        { 0x1ca9c4f818016dceULL, 0xdda316d7ff992cc8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_h.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_h.c
new file mode 100644
index 0000000000..4d1463fa16
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction MOD_S.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_S.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xdddedddedddedddeULL, 0xdddedddedddedddeULL, },
+        { 0xddddddddddddddddULL, 0xddddddddddddddddULL, },
+        { 0x0000e38daaaa0000ULL, 0xe38daaaa0000e38dULL, },
+        { 0xfffde38eaaaafffdULL, 0xe38eaaaafffde38eULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2221222122212221ULL, 0x2221222122212221ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x1c711c7255551c71ULL, 0x1c7255551c711c72ULL, },
+        { 0x00021c7155550002ULL, 0x1c71555500021c71ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xe93ecccccccce93eULL, 0xcccccccce93eccccULL, },
+        { 0xe93dcccccccce93dULL, 0xcccccccce93dccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x16c13333333316c1ULL, 0x3333333316c13333ULL, },
+        { 0x16c23333333316c2ULL, 0x3333333316c23333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e3e38ee38eULL, 0x38e3e38ee38e38e3ULL, },
+        { 0xe38e38e3e38de38eULL, 0x38e3e38de38e38e3ULL, },
+        { 0xe38e05aff4a0e38eULL, 0x05aff4a0e38e05afULL, },
+        { 0xe38e05b0f49ee38eULL, 0x05b0f49ee38e05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff38e3ffffffffULL, 0x38e3ffffffff38e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c1c711c71ULL, 0xc71c1c711c71c71cULL, },
+        { 0x1c71c71c1c721c71ULL, 0xc71c1c721c71c71cULL, },
+        { 0x1c71fa500b5f1c71ULL, 0xfa500b5f1c71fa50ULL, },
+        { 0x1c71fa4f0b611c71ULL, 0xfa4f0b611c71fa4fULL, },
+        { 0x1c71ffff71c71c71ULL, 0xffff71c71c71ffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0xffa2ffef28621c48ULL, 0x12820b5efe7bb00cULL, },
+        { 0xdc10e6cc28625540ULL, 0x238f0b5efe7bfa34ULL, },
+        { 0xf8b9fd19286219dcULL, 0x4b670b5efe7beaccULL, },
+        { 0xfbbe00632531c708ULL, 0x12f7ff4e017e0308ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfbbe00630762c708ULL, 0x12f7f41b153f08d4ULL, },
+        { 0xfbbe00634d93e4baULL, 0x12f7bb1a153f183cULL, },
+        { 0xac5afa46e231e0c0ULL, 0x27d8ffd5febe2514ULL, },    /*  72  */
+        { 0xfd40ffe0b9cffd70ULL, 0x01eac6ffeae82514ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xac5af191b9cfe496ULL, 0x27d8c6ffab2b07b4ULL, },
+        { 0x704f164d0d6de24eULL, 0xd958fa84ffdfe2a0ULL, },
+        { 0x019b0042109ee24eULL, 0xffbbcdbefe3ee2a0ULL, },
+        { 0x1ca9164d1800e24eULL, 0xdda1fadafe17e2a0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_w.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_w.c
new file mode 100644
index 0000000000..7e13966358
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_s_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction MOD_S.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_S.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xdddddddedddddddeULL, 0xdddddddedddddddeULL, },
+        { 0xddddddddddddddddULL, 0xddddddddddddddddULL, },
+        { 0xe38e38e4aaaaaaaaULL, 0xe38e38e2e38e38e4ULL, },
+        { 0xfffffffeaaaaaaaaULL, 0xe38e38e3fffffffeULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x2222222122222221ULL, 0x2222222122222221ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x1c71c71b55555555ULL, 0x1c71c71d1c71c71bULL, },
+        { 0x0000000155555555ULL, 0x1c71c71c00000001ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0xe93e93e9ccccccccULL, 0xcccccccce93e93e9ULL, },
+        { 0xe93e93e8ccccccccULL, 0xcccccccce93e93e8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x16c16c1633333333ULL, 0x3333333316c16c16ULL, },
+        { 0x16c16c1733333333ULL, 0x3333333316c16c17ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e3e38e38e4ULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e3e38e38e3ULL, 0x38e38e38e38e38e3ULL, },
+        { 0xe38e38e3f49f49f6ULL, 0x05b05b04e38e38e3ULL, },
+        { 0xe38e38e3f49f49f4ULL, 0x05b05b05e38e38e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffffffffffULL, 0x38e38e38ffffffffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c1c71c71bULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c1c71c71cULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c0b60b609ULL, 0xfa4fa4fb1c71c71cULL, },
+        { 0x1c71c71c0b60b60bULL, 0xfa4fa4fa1c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xffffffff1c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0xffa2dbf828625540ULL, 0x127fda10fe7bb00cULL, },
+        { 0xdc10382228625540ULL, 0x238e445ffe7bb00cULL, },
+        { 0xf8b9fd1928625540ULL, 0x4b670b5efe7bb00cULL, },
+        { 0xfbbe0063253171c8ULL, 0x12f7bb1a0002f3a4ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfbbe006307635288ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xfbbe00634d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0xac5aaeaae231e0c0ULL, 0x27d8c6fffe985280ULL, },    /*  72  */
+        { 0xfd40a751b9cf8b80ULL, 0x01e950cbeae91e08ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xac5aaeaab9cf8b80ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d0d6d37ceULL, 0xd9589436ffb8aff4ULL, },
+        { 0x019b205b109e1b46ULL, 0xffbfeb74fe402e90ULL, },
+        { 0x1ca9c4f718016dceULL, 0xdda316d6fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_S_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_b.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_b.c
new file mode 100644
index 0000000000..fbc86996f5
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction MOD_U.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_U.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1c711f1c711f1c71ULL, 0x1f1c711f1c711f1cULL, },
+        { 0x031d38031d38031dULL, 0x38031d38031d3803ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1111111111111111ULL, 0x1111111111111111ULL, },
+        { 0xaa1c02aa1c02aa1cULL, 0x02aa1c02aa1c02aaULL, },
+        { 0x0239aa0239aa0239ULL, 0xaa0239aa0239aa02ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x55551d55551d5555ULL, 0x1d55551d55551d55ULL, },
+        { 0x0155550155550155ULL, 0x5501555501555501ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xcc3e24cc3e24cc3eULL, 0x24cc3e24cc3e24ccULL, },
+        { 0x085b05085b05085bULL, 0x05085b05085b0508ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1733331733331733ULL, 0x3317333317333317ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x398e38398e38398eULL, 0x38398e38398e3839ULL, },
+        { 0x3939383939383939ULL, 0x3839393839393839ULL, },
+        { 0x178e38178e38178eULL, 0x38178e38178e3817ULL, },
+        { 0x1728051728051728ULL, 0x0517280517280517ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x031d38031d38031dULL, 0x38031d38031d3803ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c711d1c711d1c71ULL, 0x1d1c711d1c711d1cULL, },
+        { 0x1c1c1d1c1c1d1c1cULL, 0x1d1c1c1d1c1c1d1cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c0b2e1c0b2e1c0bULL, 0x2e1c0b2e1c0b2e1cULL, },
+        { 0x1c711f1c711f1c71ULL, 0x1f1c711f1c711f1cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x886ae60628625500ULL, 0x03670b10023c0c0cULL, },
+        { 0x8810382228625540ULL, 0x24670b5e53251c0cULL, },
+        { 0x181b0a3228005540ULL, 0x4b670b5e5539b00cULL, },
+        { 0x7354006325311d08ULL, 0x1229001a153f5200ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f0a00634d933c08ULL, 0x121fbb1a1514080cULL, },
+        { 0x1b2000164d00c708ULL, 0x1206331a153f525cULL, },
+        { 0x245aaeaa190b3600ULL, 0x270a0043ab2b2508ULL, },    /*  72  */
+        { 0xac5aae471f3c8b00ULL, 0x03d80b15032b2514ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b14105b0b8b32ULL, 0x27d83e27022b2514ULL, },
+        { 0x704f164d0e31380eULL, 0x4223041ca9423204ULL, },
+        { 0x704f164d11311b06ULL, 0x0ff1880801033ea0ULL, },
+        { 0x704f164d5e31574eULL, 0x181988d8a9170400ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_B(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_B(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_d.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_d.c
new file mode 100644
index 0000000000..6c3224a71b
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction MOD_U.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_U.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71c71c71c73ULL, },
+        { 0x0000000000000006ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1111111111111111ULL, 0x1111111111111111ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0x0000000000000001ULL, },
+        { 0x0000000000000004ULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x5555555555555555ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x0000000000000002ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xccccccccccccccccULL, 0x2222222222222223ULL, },
+        { 0x05b05b05b05b05b5ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000006ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0x2d82d82d82d82d83ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71c71c71c73ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x127fda10bebdb718ULL, },
+        { 0x886ae6cc28625540ULL, 0x238e445f53508af8ULL, },
+        { 0x181bd07eca3072f2ULL, 0x4b670b5efe7bb00cULL, },
+        { 0x73531997253171c8ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b893c43b88ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x1b1fd3c89130026cULL, 0x12f7bb1a153f52fcULL, },
+        { 0x23efc7de916d3640ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xac5aaeaab9cf8b80ULL, 0x01e950cb80ac7f1cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x27d8c6ffab2b2514ULL, },
+        { 0x704f164d5e31e24eULL, 0x428a7d79aac73294ULL, },
+        { 0x704f164d5e31e24eULL, 0x092b6b2214879dbcULL, },
+        { 0x704f164d5e31e24eULL, 0x166733d9a7c17364ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_h.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_h.c
new file mode 100644
index 0000000000..c9b40d76f8
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction MOD_U.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_U.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1c711c7371c71c71ULL, 0x1c7371c71c711c73ULL, },
+        { 0x000638e31c710006ULL, 0x38e31c71000638e3ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1111111111111111ULL, 0x1111111111111111ULL, },
+        { 0xaaaa00011c72aaaaULL, 0x00011c72aaaa0001ULL, },
+        { 0x0004aaaa38e30004ULL, 0xaaaa38e30004aaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x55551c7255555555ULL, 0x1c72555555551c72ULL, },
+        { 0x0002555555550002ULL, 0x5555555500025555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xcccc22233e94ccccULL, 0x22233e94cccc2223ULL, },
+        { 0x05b505b05b0505b5ULL, 0x05b05b0505b505b0ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x16c23333333316c2ULL, 0x3333333316c23333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e438e38e3838e4ULL, 0x38e38e3838e438e3ULL, },
+        { 0x38e438e338e338e4ULL, 0x38e338e338e438e3ULL, },
+        { 0x16c238e38e3816c2ULL, 0x38e38e3816c238e3ULL, },
+        { 0x16c205b027d216c2ULL, 0x05b027d216c205b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x000638e31c710006ULL, 0x38e31c71000638e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c711c7271c71c71ULL, 0x1c7271c71c711c72ULL, },
+        { 0x1c711c721c721c71ULL, 0x1c721c721c711c72ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c712d830b611c71ULL, 0x2d830b611c712d83ULL, },
+        { 0x1c711c7371c71c71ULL, 0x1c7371c71c711c73ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x886a005028625540ULL, 0x12820b5e14c60a14ULL, },
+        { 0x886a382228625540ULL, 0x238f0b5e53501bbcULL, },
+        { 0x181b07ca28625540ULL, 0x4b670b5e5539b00cULL, },
+        { 0x7354006325311c88ULL, 0x12f7053a153f52fcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6400634d933b88ULL, 0x12f7bb1a153f08d4ULL, },
+        { 0x1b2000634d93c708ULL, 0x12f73242153f52fcULL, },
+        { 0x23f0aeaa18473640ULL, 0x27d805c1ab2b2514ULL, },    /*  72  */
+        { 0xac5a00411ea98b80ULL, 0x01ea0be501332514ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b128f5b9e8b80ULL, 0x27d83e2701e92514ULL, },
+        { 0x704f164d0d6d37ceULL, 0x428a0070a9423294ULL, },
+        { 0x704f0042109e1b46ULL, 0x093088d814893ca8ULL, },
+        { 0x704f164d5e3156ceULL, 0x166988d8a9420428ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_w.c b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_w.c
new file mode 100644
index 0000000000..9f248b85bc
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-modulo/test_msa_mod_u_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction MOD_U.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "MOD_U.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71f1c71c71cULL, },
+        { 0x000000031c71c71dULL, 0x38e38e3800000003ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x1111111111111111ULL, 0x1111111111111111ULL, },
+        { 0xaaaaaaaa1c71c71cULL, 0x00000002aaaaaaaaULL, },
+        { 0x0000000238e38e39ULL, 0xaaaaaaaa00000002ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x5555555555555555ULL, 0x1c71c71d55555555ULL, },
+        { 0x0000000155555555ULL, 0x5555555500000001ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xcccccccc3e93e93eULL, 0x22222224ccccccccULL, },
+        { 0x05b05b085b05b05bULL, 0x05b05b0505b05b08ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x16c16c1733333333ULL, 0x3333333316c16c17ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e398e38e38eULL, 0x38e38e3838e38e39ULL, },
+        { 0x38e38e3938e38e39ULL, 0x38e38e3838e38e39ULL, },
+        { 0x16c16c178e38e38eULL, 0x38e38e3816c16c17ULL, },
+        { 0x16c16c1727d27d28ULL, 0x05b05b0516c16c17ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x000000031c71c71dULL, 0x38e38e3800000003ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71d1c71c71cULL, },
+        { 0x1c71c71c1c71c71cULL, 0x1c71c71d1c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x1c71c71c0b60b60bULL, 0x2d82d82e1c71c71cULL, },
+        { 0x1c71c71c71c71c71ULL, 0x1c71c71f1c71c71cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x886ae6cc28625540ULL, 0x127fda1014c31f38ULL, },
+        { 0x886ae6cc28625540ULL, 0x238e445f53508af8ULL, },
+        { 0x181bd07f28625540ULL, 0x4b670b5e5538cd6cULL, },
+        { 0x73531997253171c8ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b94d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x1b1fd3c94d93c708ULL, 0x12f7bb1a153f52fcULL, },
+        { 0x23efc7de18463680ULL, 0x27d8c6ffab2b2514ULL, },    /*  72  */
+        { 0xac5aaeaa1ea7fd70ULL, 0x01e950cb01308d34ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x27d8c6ff01e84274ULL, },
+        { 0x704f164d0d6d37ceULL, 0x428a7d7aa942e2a0ULL, },
+        { 0x704f164d109e1b46ULL, 0x092b6b2214879dbcULL, },
+        { 0x704f164d5e31e24eULL, 0x166733dba942e2a0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_MOD_U_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_h.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_h.c
index f1526087fa..55cf7b4d86 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MUL_Q.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xb69bf1d4cc591b07ULL, 0xdc7e3510397df77dULL, },
         { 0x9712fb9b1db7ec38ULL, 0xbccff56b01071259ULL, },
         { 0xfc43001139150d37ULL, 0xef194023f19aecf4ULL, },
+        { 0xb69bf1d4cc591b07ULL, 0xdc7e3510397df77dULL, },
+        { 0x628a03e2455006e3ULL, 0x65a26eec3ac806bdULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_w.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_w.c
index df815ee9da..4b21d0651c 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mul_q_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MUL_Q.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xb69baa39cc590fcdULL, 0xdc7e6df7397c58d9ULL, },
         { 0x9713a7171db7f3a5ULL, 0xbccfb4690107236fULL, },
         { 0xfc439edc3916c1e4ULL, 0xef19389cf19a0fddULL, },
+        { 0xb69baa39cc590fcdULL, 0xdc7e6df7397c58d9ULL, },
+        { 0x628a97e4455157d3ULL, 0x65a1c5e13ac736e1ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_h.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_h.c
index fd0a5fa7a8..835105074f 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULR_Q.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xb69bf1d4cc591b07ULL, 0xdc7f3511397df77eULL, },
         { 0x9713fb9c1db7ec39ULL, 0xbccff56b01081259ULL, },
         { 0xfc44001139160d37ULL, 0xef1a4023f19aecf5ULL, },
+        { 0xb69bf1d4cc591b07ULL, 0xdc7f3511397df77eULL, },
+        { 0x628a03e3455006e4ULL, 0x65a36eec3ac806beULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_w.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_w.c
index f28b0d0a20..9e31adaf1b 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulr_q_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULR_Q.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xb69baa3acc590fcdULL, 0xdc7e6df7397c58daULL, },
         { 0x9713a7171db7f3a6ULL, 0xbccfb46a0107236fULL, },
         { 0xfc439edd3916c1e4ULL, 0xef19389cf19a0fdeULL, },
+        { 0xb69baa3acc590fcdULL, 0xdc7e6df7397c58daULL, },
+        { 0x628a97e4455157d3ULL, 0x65a1c5e23ac736e2ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_b.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_b.c
index 6beeda906d..3cb9ee9f3a 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULV.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x40c6f422ee9fb600ULL, 0x7b583028e316aa80ULL, },
         { 0x80b6c45cb0c20a80ULL, 0x4ff7d850aeb66080ULL, },
         { 0xd0a200c74623ae70ULL, 0xea8758f0dd3e6480ULL, },
+        { 0x40c6f422ee9fb600ULL, 0x7b583028e316aa80ULL, },
+        { 0x0061e429846184c4ULL, 0xa9e1404091048400ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_d.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_d.c
index 3205d4b378..8853440ce8 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULV.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x76a5ab8089e38100ULL, 0xa1019a60d4dad480ULL, },
         { 0xfbe1883aee787980ULL, 0x821d25438dd09f80ULL, },
         { 0xedbf72842143b470ULL, 0x7f8223caefce5580ULL, },
+        { 0x76a5ab8089e38100ULL, 0xa1019a60d4dad480ULL, },
+        { 0x4bb436d5b1e9cfc4ULL, 0x12d1ceb0e31ee400ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_h.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_h.c
index e7bd985ae1..b014e8141a 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULV.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8fc62522929f8100ULL, 0x7a585f288416d480ULL, },
         { 0x78b6f35cb6c27980ULL, 0xb6f78750ceb69f80ULL, },
         { 0xcfa29fc7d323b470ULL, 0xe587adf0113e5580ULL, },
+        { 0x8fc62522929f8100ULL, 0x7a585f288416d480ULL, },
+        { 0x386153290561cfc4ULL, 0x5ce136403504e400ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_w.c b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_w.c
index 9c318b3fbb..967151cca5 100644
--- a/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-multiply/test_msa_mulv_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction MULV.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0xe8bf252289e38100ULL, 0x91ae5f28d4dad480ULL, },
         { 0xb0f0f35cee787980ULL, 0xd67987508dd09f80ULL, },
         { 0x7abb9fc72143b470ULL, 0x11e5adf0efce5580ULL, },
+        { 0xe8bf252289e38100ULL, 0x91ae5f28d4dad480ULL, },
+        { 0x25775329b1e9cfc4ULL, 0xdfd63640e31ee400ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_b.c
new file mode 100644
index 0000000000..b67b7cf990
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction ASUB_S.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_S.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5656565656565656ULL, 0x5656565656565656ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3434343434343434ULL, 0x3434343434343434ULL, },
+        { 0x1c71391c71391c71ULL, 0x391c71391c71391cULL, },
+        { 0x1d72381d72381d72ULL, 0x381d72381d72381dULL, },
+        { 0x0101010101010101ULL, 0x0101010101010101ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5656565656565656ULL, 0x5656565656565656ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x3434343434343434ULL, 0x3434343434343434ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1d72381d72381d72ULL, 0x381d72381d72381dULL, },
+        { 0x1c71391c71391c71ULL, 0x391c71391c71391cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0x5656565656565656ULL, 0x5656565656565656ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xababababababababULL, 0xababababababababULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8989898989898989ULL, 0x8989898989898989ULL, },
+        { 0x391c8e391c8e391cULL, 0x8e391c8e391c8e39ULL, },
+        { 0x72c71d72c71d72c7ULL, 0x1d72c71d72c71d72ULL, },
+        { 0x5656565656565656ULL, 0x5656565656565656ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xababababababababULL, 0xababababababababULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x8989898989898989ULL, 0x8989898989898989ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x72c71d72c71d72c7ULL, 0x1d72c71d72c71d72ULL, },
+        { 0x391c8e391c8e391cULL, 0x8e391c8e391c8e39ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0x3434343434343434ULL, 0x3434343434343434ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8989898989898989ULL, 0x8989898989898989ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x6767676767676767ULL, 0x6767676767676767ULL, },
+        { 0x173e6c173e6c173eULL, 0x6c173e6c173e6c17ULL, },
+        { 0x50a50550a50550a5ULL, 0x0550a50550a50550ULL, },
+        { 0x3434343434343434ULL, 0x3434343434343434ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x8989898989898989ULL, 0x8989898989898989ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x6767676767676767ULL, 0x6767676767676767ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x50a50550a50550a5ULL, 0x0550a50550a50550ULL, },
+        { 0x173e6c173e6c173eULL, 0x6c173e6c173e6c17ULL, },
+        { 0x1c71391c71391c71ULL, 0x391c71391c71391cULL, },    /*  48  */
+        { 0x1d72381d72381d72ULL, 0x381d72381d72381dULL, },
+        { 0x391c8e391c8e391cULL, 0x8e391c8e391c8e39ULL, },
+        { 0x72c71d72c71d72c7ULL, 0x1d72c71d72c71d72ULL, },
+        { 0x173e6c173e6c173eULL, 0x6c173e6c173e6c17ULL, },
+        { 0x50a50550a50550a5ULL, 0x0550a50550a50550ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x39e37139e37139e3ULL, 0x7139e37139e37139ULL, },
+        { 0x1d72381d72381d72ULL, 0x381d72381d72381dULL, },    /*  56  */
+        { 0x1c71391c71391c71ULL, 0x391c71391c71391cULL, },
+        { 0x72c71d72c71d72c7ULL, 0x1d72c71d72c71d72ULL, },
+        { 0x391c8e391c8e391cULL, 0x8e391c8e391c8e39ULL, },
+        { 0x50a50550a50550a5ULL, 0x0550a50550a50550ULL, },
+        { 0x173e6c173e6c173eULL, 0x6c173e6c173e6c17ULL, },
+        { 0x39e37139e37139e3ULL, 0x7139e37139e37139ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x73ac1a9725cf8e38ULL, 0x39705044173ca210ULL, },
+        { 0x241038226f93cac0ULL, 0x248f455f53507508ULL, },
+        { 0xe81b30813631730eULL, 0xbe7683865539326cULL, },
+        { 0x73ac1a9725cf8e38ULL, 0x39705044173ca210ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f9c52b9943c3c88ULL, 0x151f0b1b6a142d18ULL, },
+        { 0x75911616119e1b46ULL, 0x850633426c03705cULL, },
+        { 0x241038226f93cac0ULL, 0x248f455f53507508ULL, },    /*  72  */
+        { 0x4f9c52b9943c3c88ULL, 0x151f0b1b6a142d18ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc40b68a3a56257ceULL, 0x9a193e2702174374ULL, },
+        { 0xe81b30813631730eULL, 0xbe7683865539326cULL, },
+        { 0x75911616119e1b46ULL, 0x850633426c03705cULL, },
+        { 0xc40b68a3a56257ceULL, 0x9a193e2702174374ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_B(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_B(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_d.c
new file mode 100644
index 0000000000..3db9ca9714
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction ASUB_S.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_S.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555556ULL, 0x5555555555555556ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333333333334ULL, 0x3333333333333334ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e4ULL, },
+        { 0x1c71c71c71c71c72ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555556ULL, 0x5555555555555556ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x3333333333333334ULL, 0x3333333333333334ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1c71c71c71c71c72ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e4ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0x5555555555555556ULL, 0x5555555555555556ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaabULL, 0xaaaaaaaaaaaaaaabULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8888888888888889ULL, 0x8888888888888889ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x8e38e38e38e38e39ULL, },
+        { 0x71c71c71c71c71c7ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x5555555555555556ULL, 0x5555555555555556ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaabULL, 0xaaaaaaaaaaaaaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x8888888888888889ULL, 0x8888888888888889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x71c71c71c71c71c7ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x8e38e38e38e38e39ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0x3333333333333334ULL, 0x3333333333333334ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8888888888888889ULL, 0x8888888888888889ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x6666666666666667ULL, 0x6666666666666667ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x6c16c16c16c16c17ULL, },
+        { 0x4fa4fa4fa4fa4fa5ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x3333333333333334ULL, 0x3333333333333334ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x8888888888888889ULL, 0x8888888888888889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x6666666666666667ULL, 0x6666666666666667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4fa4fa4fa4fa4fa5ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x6c16c16c16c16c17ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e4ULL, },    /*  48  */
+        { 0x1c71c71c71c71c72ULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x8e38e38e38e38e39ULL, },
+        { 0x71c71c71c71c71c7ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x6c16c16c16c16c17ULL, },
+        { 0x4fa4fa4fa4fa4fa5ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x38e38e38e38e38e3ULL, 0x71c71c71c71c71c7ULL, },
+        { 0x1c71c71c71c71c72ULL, 0x38e38e38e38e38e3ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0x38e38e38e38e38e4ULL, },
+        { 0x71c71c71c71c71c7ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x8e38e38e38e38e39ULL, },
+        { 0x4fa4fa4fa4fa4fa5ULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x6c16c16c16c16c17ULL, },
+        { 0x38e38e38e38e38e3ULL, 0x71c71c71c71c71c7ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },
+        { 0xe7e42f8135cf8d0eULL, 0xbd7582865538cd6cULL, },
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b893c43b88ULL, 0x14e10be595ebd218ULL, },
+        { 0x749115ea109e1b46ULL, 0x850632416bfc705cULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },    /*  72  */
+        { 0x4f6351b893c43b88ULL, 0x14e10be595ebd218ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc3f467a2a46256ceULL, 0x99e73e2701e84274ULL, },
+        { 0xe7e42f8135cf8d0eULL, 0xbd7582865538cd6cULL, },
+        { 0x749115ea109e1b46ULL, 0x850632416bfc705cULL, },
+        { 0xc3f467a2a46256ceULL, 0x99e73e2701e84274ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_h.c
new file mode 100644
index 0000000000..fce13434f5
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction ASUB_S.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_S.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5556555655565556ULL, 0x5556555655565556ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3334333433343334ULL, 0x3334333433343334ULL, },
+        { 0x1c7138e471c71c71ULL, 0x38e471c71c7138e4ULL, },
+        { 0x1c7238e371c81c72ULL, 0x38e371c81c7238e3ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5556555655565556ULL, 0x5556555655565556ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x3334333433343334ULL, 0x3334333433343334ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1c7238e371c81c72ULL, 0x38e371c81c7238e3ULL, },
+        { 0x1c7138e471c71c71ULL, 0x38e471c71c7138e4ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0x5556555655565556ULL, 0x5556555655565556ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaabaaabaaabaaabULL, 0xaaabaaabaaabaaabULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8889888988898889ULL, 0x8889888988898889ULL, },
+        { 0x38e48e391c7238e4ULL, 0x8e391c7238e48e39ULL, },
+        { 0x71c71c72c71d71c7ULL, 0x1c72c71d71c71c72ULL, },
+        { 0x5556555655565556ULL, 0x5556555655565556ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaabaaabaaabaaabULL, 0xaaabaaabaaabaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x8889888988898889ULL, 0x8889888988898889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x71c71c72c71d71c7ULL, 0x1c72c71d71c71c72ULL, },
+        { 0x38e48e391c7238e4ULL, 0x8e391c7238e48e39ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0x3334333433343334ULL, 0x3334333433343334ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8889888988898889ULL, 0x8889888988898889ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x6667666766676667ULL, 0x6667666766676667ULL, },
+        { 0x16c26c173e9416c2ULL, 0x6c173e9416c26c17ULL, },
+        { 0x4fa505b0a4fb4fa5ULL, 0x05b0a4fb4fa505b0ULL, },
+        { 0x3334333433343334ULL, 0x3334333433343334ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x8889888988898889ULL, 0x8889888988898889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x6667666766676667ULL, 0x6667666766676667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4fa505b0a4fb4fa5ULL, 0x05b0a4fb4fa505b0ULL, },
+        { 0x16c26c173e9416c2ULL, 0x6c173e9416c26c17ULL, },
+        { 0x1c7138e471c71c71ULL, 0x38e471c71c7138e4ULL, },    /*  48  */
+        { 0x1c7238e371c81c72ULL, 0x38e371c81c7238e3ULL, },
+        { 0x38e48e391c7238e4ULL, 0x8e391c7238e48e39ULL, },
+        { 0x71c71c72c71d71c7ULL, 0x1c72c71d71c71c72ULL, },
+        { 0x16c26c173e9416c2ULL, 0x6c173e9416c26c17ULL, },
+        { 0x4fa505b0a4fb4fa5ULL, 0x05b0a4fb4fa505b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x38e371c7e38f38e3ULL, 0x71c7e38f38e371c7ULL, },
+        { 0x1c7238e371c81c72ULL, 0x38e371c81c7238e3ULL, },    /*  56  */
+        { 0x1c7138e471c71c71ULL, 0x38e471c71c7138e4ULL, },
+        { 0x71c71c72c71d71c7ULL, 0x1c72c71d71c71c72ULL, },
+        { 0x38e48e391c7238e4ULL, 0x8e391c7238e48e39ULL, },
+        { 0x4fa505b0a4fb4fa5ULL, 0x05b0a4fb4fa505b0ULL, },
+        { 0x16c26c173e9416c2ULL, 0x6c173e9416c26c17ULL, },
+        { 0x38e371c7e38f38e3ULL, 0x71c7e38f38e371c7ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x7354199725318e38ULL, 0x3870504416c4a2f0ULL, },
+        { 0x23f038226e93c9c0ULL, 0x238f445f53507508ULL, },
+        { 0xe7e52f8135cf72f2ULL, 0xbd76828655393294ULL, },
+        { 0x7354199725318e38ULL, 0x3870504416c4a2f0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6451b993c43b88ULL, 0x14e10be56a142de8ULL, },
+        { 0x749115ea109e1b46ULL, 0x850632426bfd705cULL, },
+        { 0x23f038226e93c9c0ULL, 0x238f445f53507508ULL, },    /*  72  */
+        { 0x4f6451b993c43b88ULL, 0x14e10be56a142de8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc3f567a3a46256ceULL, 0x99e73e2701e94274ULL, },
+        { 0xe7e52f8135cf72f2ULL, 0xbd76828655393294ULL, },
+        { 0x749115ea109e1b46ULL, 0x850632426bfd705cULL, },
+        { 0xc3f567a3a46256ceULL, 0x99e73e2701e94274ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_w.c
new file mode 100644
index 0000000000..e087f480ab
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_s_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction ASUB_S.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_S.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555655555556ULL, 0x5555555655555556ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x3333333433333334ULL, 0x3333333433333334ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e391c71c71cULL, },
+        { 0x1c71c71d71c71c72ULL, 0x38e38e381c71c71dULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555655555556ULL, 0x5555555655555556ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x3333333433333334ULL, 0x3333333433333334ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x1c71c71d71c71c72ULL, 0x38e38e381c71c71dULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e391c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0x5555555655555556ULL, 0x5555555655555556ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaabaaaaaaabULL, 0xaaaaaaabaaaaaaabULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8888888988888889ULL, 0x8888888988888889ULL, },
+        { 0x38e38e391c71c71cULL, 0x8e38e38e38e38e39ULL, },
+        { 0x71c71c72c71c71c7ULL, 0x1c71c71d71c71c72ULL, },
+        { 0x5555555655555556ULL, 0x5555555655555556ULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaabaaaaaaabULL, 0xaaaaaaabaaaaaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x8888888988888889ULL, 0x8888888988888889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x71c71c72c71c71c7ULL, 0x1c71c71d71c71c72ULL, },
+        { 0x38e38e391c71c71cULL, 0x8e38e38e38e38e39ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0x3333333433333334ULL, 0x3333333433333334ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8888888988888889ULL, 0x8888888988888889ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x6666666766666667ULL, 0x6666666766666667ULL, },
+        { 0x16c16c173e93e93eULL, 0x6c16c16c16c16c17ULL, },
+        { 0x4fa4fa50a4fa4fa5ULL, 0x05b05b054fa4fa50ULL, },
+        { 0x3333333433333334ULL, 0x3333333433333334ULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x8888888988888889ULL, 0x8888888988888889ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x6666666766666667ULL, 0x6666666766666667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4fa4fa50a4fa4fa5ULL, 0x05b05b054fa4fa50ULL, },
+        { 0x16c16c173e93e93eULL, 0x6c16c16c16c16c17ULL, },
+        { 0x1c71c71c71c71c71ULL, 0x38e38e391c71c71cULL, },    /*  48  */
+        { 0x1c71c71d71c71c72ULL, 0x38e38e381c71c71dULL, },
+        { 0x38e38e391c71c71cULL, 0x8e38e38e38e38e39ULL, },
+        { 0x71c71c72c71c71c7ULL, 0x1c71c71d71c71c72ULL, },
+        { 0x16c16c173e93e93eULL, 0x6c16c16c16c16c17ULL, },
+        { 0x4fa4fa50a4fa4fa5ULL, 0x05b05b054fa4fa50ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x38e38e39e38e38e3ULL, 0x71c71c7138e38e39ULL, },
+        { 0x1c71c71d71c71c72ULL, 0x38e38e381c71c71dULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0x38e38e391c71c71cULL, },
+        { 0x71c71c72c71c71c7ULL, 0x1c71c71d71c71c72ULL, },
+        { 0x38e38e391c71c71cULL, 0x8e38e38e38e38e39ULL, },
+        { 0x4fa4fa50a4fa4fa5ULL, 0x05b05b054fa4fa50ULL, },
+        { 0x16c16c173e93e93eULL, 0x6c16c16c16c16c17ULL, },
+        { 0x38e38e39e38e38e3ULL, 0x71c71c7138e38e39ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x73531997253171c8ULL, 0x386f504416c3a2f0ULL, },
+        { 0x23efc7de6e92c9c0ULL, 0x238e445f53508af8ULL, },
+        { 0xe7e42f8135cf8d0eULL, 0xbd7582865538cd6cULL, },
+        { 0x73531997253171c8ULL, 0x386f504416c3a2f0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b993c43b88ULL, 0x14e10be56a142de8ULL, },
+        { 0x749115ea109e1b46ULL, 0x850632426bfc705cULL, },
+        { 0x23efc7de6e92c9c0ULL, 0x238e445f53508af8ULL, },    /*  72  */
+        { 0x4f6351b993c43b88ULL, 0x14e10be56a142de8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc3f467a3a46256ceULL, 0x99e73e2701e84274ULL, },
+        { 0xe7e42f8135cf8d0eULL, 0xbd7582865538cd6cULL, },
+        { 0x749115ea109e1b46ULL, 0x850632426bfc705cULL, },
+        { 0xc3f467a3a46256ceULL, 0x99e73e2701e84274ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_S_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_b.c
new file mode 100644
index 0000000000..25a859a3d4
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_b.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction ASUB_U.B
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_U.B";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x391c72391c72391cULL, 0x72391c72391c7239ULL, },
+        { 0x8e391d8e391d8e39ULL, 0x1d8e391d8e391d8eULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8e391d8e391d8e39ULL, 0x1d8e391d8e391d8eULL, },
+        { 0x391c72391c72391cULL, 0x72391c72391c7239ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x173e94173e94173eULL, 0x94173e94173e9417ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x173e94173e94173eULL, 0x94173e94173e9417ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x391c72391c72391cULL, 0x72391c72391c7239ULL, },
+        { 0x8e391d8e391d8e39ULL, 0x1d8e391d8e391d8eULL, },
+        { 0x173e94173e94173eULL, 0x94173e94173e9417ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc71d8fc71d8fc71dULL, 0x8fc71d8fc71d8fc7ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x8e391d8e391d8e39ULL, 0x1d8e391d8e391d8eULL, },
+        { 0x391c72391c72391cULL, 0x72391c72391c7239ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x173e94173e94173eULL, 0x94173e94173e9417ULL, },
+        { 0xc71d8fc71d8fc71dULL, 0x8fc71d8fc71d8fc7ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x7354e66925317238ULL, 0x3990b044e93c5ef0ULL, },
+        { 0x24103822916d3640ULL, 0x2471bba153508b08ULL, },
+        { 0x181bd07f36318d0eULL, 0x428a7d7a55393294ULL, },
+        { 0x7354e66925317238ULL, 0x3990b044e93c5ef0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f64ae476c3c3c78ULL, 0x151f0be596142de8ULL, },
+        { 0x8b6f161611621b46ULL, 0x7b0633be9403905cULL, },
+        { 0x24103822916d3640ULL, 0x2471bba153508b08ULL, },    /*  72  */
+        { 0x4f64ae476c3c3c78ULL, 0x151f0be596142de8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9e5732ULL, 0x66193e270217bd8cULL, },
+        { 0x181bd07f36318d0eULL, 0x428a7d7a55393294ULL, },
+        { 0x8b6f161611621b46ULL, 0x7b0633be9403905cULL, },
+        { 0x3c0b985d5b9e5732ULL, 0x66193e270217bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_B(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_B(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_d.c
new file mode 100644
index 0000000000..5506525208
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction ASUB_U.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_U.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x71c71c71c71c71c7ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71c71c71c72ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x71c71c71c71c71c7ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x93e93e93e93e93e9ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x93e93e93e93e93e9ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x71c71c71c71c71c7ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x93e93e93e93e93e9ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc71c71c71c71c71dULL, 0x8e38e38e38e38e39ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71c71c71c72ULL, },
+        { 0x38e38e38e38e38e4ULL, 0x71c71c71c71c71c7ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c16c16c16c2ULL, 0x93e93e93e93e93e9ULL, },
+        { 0xc71c71c71c71c71dULL, 0x8e38e38e38e38e39ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },
+        { 0x181bd07eca3072f2ULL, 0x428a7d79aac73294ULL, },
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b893c43b88ULL, 0x14e10be595ebd218ULL, },
+        { 0x8b6eea15ef61e4baULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },    /*  72  */
+        { 0x4f6351b893c43b88ULL, 0x14e10be595ebd218ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x181bd07eca3072f2ULL, 0x428a7d79aac73294ULL, },
+        { 0x8b6eea15ef61e4baULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_h.c
new file mode 100644
index 0000000000..513f02bc24
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction ASUB_U.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_U.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x38e471c71c7238e4ULL, 0x71c71c7238e471c7ULL, },
+        { 0x8e391c7238e38e39ULL, 0x1c7238e38e391c72ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8e391c7238e38e39ULL, 0x1c7238e38e391c72ULL, },
+        { 0x38e471c71c7238e4ULL, 0x71c71c7238e471c7ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x16c293e93e9416c2ULL, 0x93e93e9416c293e9ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c293e93e9416c2ULL, 0x93e93e9416c293e9ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e471c71c7238e4ULL, 0x71c71c7238e471c7ULL, },
+        { 0x8e391c7238e38e39ULL, 0x1c7238e38e391c72ULL, },
+        { 0x16c293e93e9416c2ULL, 0x93e93e9416c293e9ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc71d8e391c71c71dULL, 0x8e391c71c71d8e39ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x8e391c7238e38e39ULL, 0x1c7238e38e391c72ULL, },
+        { 0x38e471c71c7238e4ULL, 0x71c71c7238e471c7ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c293e93e9416c2ULL, 0x93e93e9416c293e9ULL, },
+        { 0xc71d8e391c71c71dULL, 0x8e391c71c71d8e39ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x7354e669253171c8ULL, 0x3870afbce93c5d10ULL, },
+        { 0x23f03822916d3640ULL, 0x238fbba153508af8ULL, },
+        { 0x181bd07f35cf8d0eULL, 0x428a7d7a55393294ULL, },
+        { 0x7354e669253171c8ULL, 0x3870afbce93c5d10ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f64ae476c3c3b88ULL, 0x14e10be595ec2de8ULL, },
+        { 0x8b6f15ea109e1b46ULL, 0x7afa324294038fa4ULL, },
+        { 0x23f03822916d3640ULL, 0x238fbba153508af8ULL, },    /*  72  */
+        { 0x4f64ae476c3c3b88ULL, 0x14e10be595ec2de8ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9e56ceULL, 0x66193e2701e9bd8cULL, },
+        { 0x181bd07f35cf8d0eULL, 0x428a7d7a55393294ULL, },
+        { 0x8b6f15ea109e1b46ULL, 0x7afa324294038fa4ULL, },
+        { 0x3c0b985d5b9e56ceULL, 0x66193e2701e9bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_w.c
new file mode 100644
index 0000000000..974db1fd6e
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_asub_u_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction ASUB_U.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "ASUB_U.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },    /*  16  */
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x38e38e391c71c71cULL, 0x71c71c7238e38e39ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71d8e38e38eULL, },
+        { 0xaaaaaaaaaaaaaaaaULL, 0xaaaaaaaaaaaaaaaaULL, },    /*  24  */
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x5555555555555555ULL, 0x5555555555555555ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71d8e38e38eULL, },
+        { 0x38e38e391c71c71cULL, 0x71c71c7238e38e39ULL, },
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },    /*  32  */
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x16c16c173e93e93eULL, 0x93e93e9416c16c17ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0xccccccccccccccccULL, 0xccccccccccccccccULL, },    /*  40  */
+        { 0x3333333333333333ULL, 0x3333333333333333ULL, },
+        { 0x7777777777777777ULL, 0x7777777777777777ULL, },
+        { 0x2222222222222222ULL, 0x2222222222222222ULL, },
+        { 0x9999999999999999ULL, 0x9999999999999999ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c173e93e93eULL, 0x93e93e9416c16c17ULL, },
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },    /*  48  */
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },
+        { 0x38e38e391c71c71cULL, 0x71c71c7238e38e39ULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71d8e38e38eULL, },
+        { 0x16c16c173e93e93eULL, 0x93e93e9416c16c17ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xc71c71c71c71c71dULL, 0x8e38e38fc71c71c7ULL, },
+        { 0xe38e38e38e38e38eULL, 0x38e38e38e38e38e3ULL, },    /*  56  */
+        { 0x1c71c71c71c71c71ULL, 0xc71c71c71c71c71cULL, },
+        { 0x8e38e38e38e38e39ULL, 0x1c71c71d8e38e38eULL, },
+        { 0x38e38e391c71c71cULL, 0x71c71c7238e38e39ULL, },
+        { 0xb05b05b05b05b05bULL, 0x05b05b05b05b05b0ULL, },
+        { 0x16c16c173e93e93eULL, 0x93e93e9416c16c17ULL, },
+        { 0xc71c71c71c71c71dULL, 0x8e38e38fc71c71c7ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*  64  */
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },
+        { 0x181bd07f35cf8d0eULL, 0x428a7d7a5538cd6cULL, },
+        { 0x73531997253171c8ULL, 0x386f5044e93c5d10ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x4f6351b96c3bc478ULL, 0x14e10be595ebd218ULL, },
+        { 0x8b6eea16109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x23efc7de916d3640ULL, 0x238e445f53508af8ULL, },    /*  72  */
+        { 0x4f6351b96c3bc478ULL, 0x14e10be595ebd218ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x6618c1d901e84274ULL, },
+        { 0x181bd07f35cf8d0eULL, 0x428a7d7a5538cd6cULL, },
+        { 0x8b6eea16109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x3c0b985d5b9da932ULL, 0x6618c1d901e84274ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_ASUB_U_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_d.c
new file mode 100644
index 0000000000..7153bba0d6
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction HSUB_S.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_S.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0xffffffffaaaaaaaaULL, 0xffffffffaaaaaaaaULL, },
+        { 0x0000000033333333ULL, 0x0000000033333333ULL, },
+        { 0xffffffffccccccccULL, 0xffffffffccccccccULL, },
+        { 0x0000000071c71c71ULL, 0x000000001c71c71cULL, },
+        { 0xffffffff8e38e38eULL, 0xffffffffe38e38e3ULL, },
+        { 0x0000000000000001ULL, 0x0000000000000001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000055555556ULL, 0x0000000055555556ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },
+        { 0x0000000033333334ULL, 0x0000000033333334ULL, },
+        { 0xffffffffcccccccdULL, 0xffffffffcccccccdULL, },
+        { 0x0000000071c71c72ULL, 0x000000001c71c71dULL, },
+        { 0xffffffff8e38e38fULL, 0xffffffffe38e38e4ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },    /*  16  */
+        { 0xffffffffaaaaaaaaULL, 0xffffffffaaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffff55555555ULL, 0xffffffff55555555ULL, },
+        { 0xffffffffdddddddeULL, 0xffffffffdddddddeULL, },
+        { 0xffffffff77777777ULL, 0xffffffff77777777ULL, },
+        { 0x000000001c71c71cULL, 0xffffffffc71c71c7ULL, },
+        { 0xffffffff38e38e39ULL, 0xffffffff8e38e38eULL, },
+        { 0x0000000055555556ULL, 0x0000000055555556ULL, },    /*  24  */
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0x00000000aaaaaaabULL, 0x00000000aaaaaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000088888889ULL, 0x0000000088888889ULL, },
+        { 0x0000000022222222ULL, 0x0000000022222222ULL, },
+        { 0x00000000c71c71c7ULL, 0x0000000071c71c72ULL, },
+        { 0xffffffffe38e38e4ULL, 0x0000000038e38e39ULL, },
+        { 0xffffffffcccccccdULL, 0xffffffffcccccccdULL, },    /*  32  */
+        { 0xffffffffccccccccULL, 0xffffffffccccccccULL, },
+        { 0x0000000022222222ULL, 0x0000000022222222ULL, },
+        { 0xffffffff77777777ULL, 0xffffffff77777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffff99999999ULL, 0xffffffff99999999ULL, },
+        { 0x000000003e93e93eULL, 0xffffffffe93e93e9ULL, },
+        { 0xffffffff5b05b05bULL, 0xffffffffb05b05b0ULL, },
+        { 0x0000000033333334ULL, 0x0000000033333334ULL, },    /*  40  */
+        { 0x0000000033333333ULL, 0x0000000033333333ULL, },
+        { 0x0000000088888889ULL, 0x0000000088888889ULL, },
+        { 0xffffffffdddddddeULL, 0xffffffffdddddddeULL, },
+        { 0x0000000066666667ULL, 0x0000000066666667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x00000000a4fa4fa5ULL, 0x000000004fa4fa50ULL, },
+        { 0xffffffffc16c16c2ULL, 0x0000000016c16c17ULL, },
+        { 0xffffffffe38e38e4ULL, 0x0000000038e38e39ULL, },    /*  48  */
+        { 0xffffffffe38e38e3ULL, 0x0000000038e38e38ULL, },
+        { 0x0000000038e38e39ULL, 0x000000008e38e38eULL, },
+        { 0xffffffff8e38e38eULL, 0xffffffffe38e38e3ULL, },
+        { 0x0000000016c16c17ULL, 0x000000006c16c16cULL, },
+        { 0xffffffffb05b05b0ULL, 0x0000000005b05b05ULL, },
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0xffffffff71c71c72ULL, 0x000000001c71c71cULL, },
+        { 0x000000001c71c71dULL, 0xffffffffc71c71c8ULL, },    /*  56  */
+        { 0x000000001c71c71cULL, 0xffffffffc71c71c7ULL, },
+        { 0x0000000071c71c72ULL, 0x000000001c71c71dULL, },
+        { 0xffffffffc71c71c7ULL, 0xffffffff71c71c72ULL, },
+        { 0x000000004fa4fa50ULL, 0xfffffffffa4fa4fbULL, },
+        { 0xffffffffe93e93e9ULL, 0xffffffff93e93e94ULL, },
+        { 0x000000008e38e38eULL, 0xffffffffe38e38e4ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },
+        { 0xffffffff6008918cULL, 0x000000004ceb5b52ULL, },    /*  64  */
+        { 0xffffffff3ad71fc4ULL, 0x000000003627b862ULL, },
+        { 0xffffffffce9b5b4cULL, 0x00000000a03be64aULL, },
+        { 0xffffffff2a39047eULL, 0x00000000a22428beULL, },
+        { 0xffffffffd35bab23ULL, 0x00000000147c0b0eULL, },
+        { 0xffffffffae2a395bULL, 0xfffffffffdb8681eULL, },
+        { 0x0000000041ee74e3ULL, 0x0000000067cc9606ULL, },
+        { 0xffffffff9d8c1e15ULL, 0x0000000069b4d87aULL, },
+        { 0xffffffff83f8596aULL, 0x00000000295d16f3ULL, },    /*  72  */
+        { 0xffffffff5ec6e7a2ULL, 0x0000000012997403ULL, },
+        { 0xfffffffff28b232aULL, 0x000000007cada1ebULL, },
+        { 0xffffffff4e28cc5cULL, 0x000000007e95e45fULL, },
+        { 0x0000000047ecc10dULL, 0xffffffff8f75d8ccULL, },
+        { 0x0000000022bb4f45ULL, 0xffffffff78b235dcULL, },
+        { 0x00000000b67f8acdULL, 0xffffffffe2c663c4ULL, },
+        { 0x00000000121d33ffULL, 0xffffffffe4aea638ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_h.c
new file mode 100644
index 0000000000..37de5cb572
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction HSUB_S.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_S.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0055005500550055ULL, 0x0055005500550055ULL, },
+        { 0xffaaffaaffaaffaaULL, 0xffaaffaaffaaffaaULL, },
+        { 0x0033003300330033ULL, 0x0033003300330033ULL, },
+        { 0xffccffccffccffccULL, 0xffccffccffccffccULL, },
+        { 0x0071001cffc70071ULL, 0x001cffc70071001cULL, },
+        { 0xff8effe30038ff8eULL, 0xffe30038ff8effe3ULL, },
+        { 0x0001000100010001ULL, 0x0001000100010001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0056005600560056ULL, 0x0056005600560056ULL, },
+        { 0xffabffabffabffabULL, 0xffabffabffabffabULL, },
+        { 0x0034003400340034ULL, 0x0034003400340034ULL, },
+        { 0xffcdffcdffcdffcdULL, 0xffcdffcdffcdffcdULL, },
+        { 0x0072001dffc80072ULL, 0x001dffc80072001dULL, },
+        { 0xff8fffe40039ff8fULL, 0xffe40039ff8fffe4ULL, },
+        { 0xffabffabffabffabULL, 0xffabffabffabffabULL, },    /*  16  */
+        { 0xffaaffaaffaaffaaULL, 0xffaaffaaffaaffaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xff55ff55ff55ff55ULL, 0xff55ff55ff55ff55ULL, },
+        { 0xffdeffdeffdeffdeULL, 0xffdeffdeffdeffdeULL, },
+        { 0xff77ff77ff77ff77ULL, 0xff77ff77ff77ff77ULL, },
+        { 0x001cffc7ff72001cULL, 0xffc7ff72001cffc7ULL, },
+        { 0xff39ff8effe3ff39ULL, 0xff8effe3ff39ff8eULL, },
+        { 0x0056005600560056ULL, 0x0056005600560056ULL, },    /*  24  */
+        { 0x0055005500550055ULL, 0x0055005500550055ULL, },
+        { 0x00ab00ab00ab00abULL, 0x00ab00ab00ab00abULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0089008900890089ULL, 0x0089008900890089ULL, },
+        { 0x0022002200220022ULL, 0x0022002200220022ULL, },
+        { 0x00c70072001d00c7ULL, 0x0072001d00c70072ULL, },
+        { 0xffe40039008effe4ULL, 0x0039008effe40039ULL, },
+        { 0xffcdffcdffcdffcdULL, 0xffcdffcdffcdffcdULL, },    /*  32  */
+        { 0xffccffccffccffccULL, 0xffccffccffccffccULL, },
+        { 0x0022002200220022ULL, 0x0022002200220022ULL, },
+        { 0xff77ff77ff77ff77ULL, 0xff77ff77ff77ff77ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xff99ff99ff99ff99ULL, 0xff99ff99ff99ff99ULL, },
+        { 0x003effe9ff94003eULL, 0xffe9ff94003effe9ULL, },
+        { 0xff5bffb00005ff5bULL, 0xffb00005ff5bffb0ULL, },
+        { 0x0034003400340034ULL, 0x0034003400340034ULL, },    /*  40  */
+        { 0x0033003300330033ULL, 0x0033003300330033ULL, },
+        { 0x0089008900890089ULL, 0x0089008900890089ULL, },
+        { 0xffdeffdeffdeffdeULL, 0xffdeffdeffdeffdeULL, },
+        { 0x0067006700670067ULL, 0x0067006700670067ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x00a50050fffb00a5ULL, 0x0050fffb00a50050ULL, },
+        { 0xffc20017006cffc2ULL, 0x0017006cffc20017ULL, },
+        { 0xffe40039ff8fffe4ULL, 0x0039ff8fffe40039ULL, },    /*  48  */
+        { 0xffe30038ff8effe3ULL, 0x0038ff8effe30038ULL, },
+        { 0x0039008effe40039ULL, 0x008effe40039008eULL, },
+        { 0xff8effe3ff39ff8eULL, 0xffe3ff39ff8effe3ULL, },
+        { 0x0017006cffc20017ULL, 0x006cffc20017006cULL, },
+        { 0xffb00005ff5bffb0ULL, 0x0005ff5bffb00005ULL, },
+        { 0x00550055ff560055ULL, 0x0055ff5600550055ULL, },
+        { 0xff72001cffc7ff72ULL, 0x001cffc7ff72001cULL, },
+        { 0x001dffc80072001dULL, 0xffc80072001dffc8ULL, },    /*  56  */
+        { 0x001cffc70071001cULL, 0xffc70071001cffc7ULL, },
+        { 0x0072001d00c70072ULL, 0x001d00c70072001dULL, },
+        { 0xffc7ff72001cffc7ULL, 0xff72001cffc7ff72ULL, },
+        { 0x0050fffb00a50050ULL, 0xfffb00a50050fffbULL, },
+        { 0xffe9ff94003effe9ULL, 0xff94003effe9ff94ULL, },
+        { 0x008effe40039008eULL, 0xffe40039008effe4ULL, },
+        { 0xffabffab00aaffabULL, 0xffab00aaffabffabULL, },
+        { 0xff1e001affc60015ULL, 0xffe4ffadff83ffa4ULL, },    /*  64  */
+        { 0xffcaff830095004dULL, 0x0054fff1ffbfffb4ULL, },
+        { 0xff2e003c005900d5ULL, 0x0073000cffd3ff9cULL, },
+        { 0xff39ff99fff70007ULL, 0x005a0033ffbc0010ULL, },
+        { 0xff910034ffebff87ULL, 0xffabff5dff9a0046ULL, },
+        { 0x003dff9d00baffbfULL, 0x001bffa1ffd60056ULL, },
+        { 0xffa10056007e0047ULL, 0x003affbcffea003eULL, },
+        { 0xffacffb3001cff79ULL, 0x0021ffe3ffd300b2ULL, },
+        { 0xff42ffe2ff57ff4bULL, 0xffc0ff68ff300019ULL, },    /*  72  */
+        { 0xffeeff4b0026ff83ULL, 0x0030ffacff6c0029ULL, },
+        { 0xff520004ffea000bULL, 0x004fffc7ff800011ULL, },
+        { 0xff5dff61ff88ff3dULL, 0x0036ffeeff690085ULL, },
+        { 0x0006004afffcffa2ULL, 0xff26ff2aff2effd6ULL, },
+        { 0x00b2ffb300cbffdaULL, 0xff96ff6eff6affe6ULL, },
+        { 0x0016006c008f0062ULL, 0xffb5ff89ff7effceULL, },
+        { 0x0021ffc9002dff94ULL, 0xff9cffb0ff670042ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_w.c
new file mode 100644
index 0000000000..6e90e1373b
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_s_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction HSUB_S.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_S.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0xffffffffffffffffULL, 0xffffffffffffffffULL, },
+        { 0x0000555500005555ULL, 0x0000555500005555ULL, },
+        { 0xffffaaaaffffaaaaULL, 0xffffaaaaffffaaaaULL, },
+        { 0x0000333300003333ULL, 0x0000333300003333ULL, },
+        { 0xffffccccffffccccULL, 0xffffccccffffccccULL, },
+        { 0xffffc71c00001c71ULL, 0x000071c7ffffc71cULL, },
+        { 0x000038e3ffffe38eULL, 0xffff8e38000038e3ULL, },
+        { 0x0000000100000001ULL, 0x0000000100000001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000555600005556ULL, 0x0000555600005556ULL, },
+        { 0xffffaaabffffaaabULL, 0xffffaaabffffaaabULL, },
+        { 0x0000333400003334ULL, 0x0000333400003334ULL, },
+        { 0xffffcccdffffcccdULL, 0xffffcccdffffcccdULL, },
+        { 0xffffc71d00001c72ULL, 0x000071c8ffffc71dULL, },
+        { 0x000038e4ffffe38fULL, 0xffff8e39000038e4ULL, },
+        { 0xffffaaabffffaaabULL, 0xffffaaabffffaaabULL, },    /*  16  */
+        { 0xffffaaaaffffaaaaULL, 0xffffaaaaffffaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff5555ffff5555ULL, 0xffff5555ffff5555ULL, },
+        { 0xffffdddeffffdddeULL, 0xffffdddeffffdddeULL, },
+        { 0xffff7777ffff7777ULL, 0xffff7777ffff7777ULL, },
+        { 0xffff71c7ffffc71cULL, 0x00001c72ffff71c7ULL, },
+        { 0xffffe38effff8e39ULL, 0xffff38e3ffffe38eULL, },
+        { 0x0000555600005556ULL, 0x0000555600005556ULL, },    /*  24  */
+        { 0x0000555500005555ULL, 0x0000555500005555ULL, },
+        { 0x0000aaab0000aaabULL, 0x0000aaab0000aaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000888900008889ULL, 0x0000888900008889ULL, },
+        { 0x0000222200002222ULL, 0x0000222200002222ULL, },
+        { 0x00001c72000071c7ULL, 0x0000c71d00001c72ULL, },
+        { 0x00008e39000038e4ULL, 0xffffe38e00008e39ULL, },
+        { 0xffffcccdffffcccdULL, 0xffffcccdffffcccdULL, },    /*  32  */
+        { 0xffffccccffffccccULL, 0xffffccccffffccccULL, },
+        { 0x0000222200002222ULL, 0x0000222200002222ULL, },
+        { 0xffff7777ffff7777ULL, 0xffff7777ffff7777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff9999ffff9999ULL, 0xffff9999ffff9999ULL, },
+        { 0xffff93e9ffffe93eULL, 0x00003e94ffff93e9ULL, },
+        { 0x000005b0ffffb05bULL, 0xffff5b05000005b0ULL, },
+        { 0x0000333400003334ULL, 0x0000333400003334ULL, },    /*  40  */
+        { 0x0000333300003333ULL, 0x0000333300003333ULL, },
+        { 0x0000888900008889ULL, 0x0000888900008889ULL, },
+        { 0xffffdddeffffdddeULL, 0xffffdddeffffdddeULL, },
+        { 0x0000666700006667ULL, 0x0000666700006667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfffffa5000004fa5ULL, 0x0000a4fbfffffa50ULL, },
+        { 0x00006c17000016c2ULL, 0xffffc16c00006c17ULL, },
+        { 0xffffe38fffff8e39ULL, 0x000038e4ffffe38fULL, },    /*  48  */
+        { 0xffffe38effff8e38ULL, 0x000038e3ffffe38eULL, },
+        { 0x000038e4ffffe38eULL, 0x00008e39000038e4ULL, },
+        { 0xffff8e39ffff38e3ULL, 0xffffe38effff8e39ULL, },
+        { 0x000016c2ffffc16cULL, 0x00006c17000016c2ULL, },
+        { 0xffffb05bffff5b05ULL, 0x000005b0ffffb05bULL, },
+        { 0xffffaaabffffaaaaULL, 0x0000aaabffffaaabULL, },
+        { 0x00001c72ffff71c7ULL, 0xffffc71c00001c72ULL, },
+        { 0x00001c72000071c8ULL, 0xffffc71d00001c72ULL, },    /*  56  */
+        { 0x00001c71000071c7ULL, 0xffffc71c00001c71ULL, },
+        { 0x000071c70000c71dULL, 0x00001c72000071c7ULL, },
+        { 0xffffc71c00001c72ULL, 0xffff71c7ffffc71cULL, },
+        { 0x00004fa50000a4fbULL, 0xfffffa5000004fa5ULL, },
+        { 0xffffe93e00003e94ULL, 0xffff93e9ffffe93eULL, },
+        { 0xffffe38e00008e39ULL, 0x000038e4ffffe38eULL, },
+        { 0x0000555500005556ULL, 0xffff555500005555ULL, },
+        { 0xffffa19effffd322ULL, 0x0000400900004e6fULL, },    /*  64  */
+        { 0xffff88070000615aULL, 0x0000904dffffab7fULL, },
+        { 0xffffd9c000009ce2ULL, 0x00008468ffffd967ULL, },
+        { 0xffff721d00004614ULL, 0x0000c28f00001bdbULL, },
+        { 0x000014f2fffff853ULL, 0x0000079900006533ULL, },
+        { 0xfffffb5b0000868bULL, 0x000057ddffffc243ULL, },
+        { 0x00004d140000c213ULL, 0x00004bf8fffff02bULL, },
+        { 0xffffe57100006b45ULL, 0x00008a1f0000329fULL, },
+        { 0xffffc58effff648fULL, 0x00001c7afffffb1fULL, },    /*  72  */
+        { 0xffffabf7fffff2c7ULL, 0x00006cbeffff582fULL, },
+        { 0xfffffdb000002e4fULL, 0x000060d9ffff8617ULL, },
+        { 0xffff960dffffd781ULL, 0x00009f00ffffc88bULL, },
+        { 0x00008983000008f1ULL, 0xffff8293fffff936ULL, },
+        { 0x00006fec00009729ULL, 0xffffd2d7ffff5646ULL, },
+        { 0x0000c1a50000d2b1ULL, 0xffffc6f2ffff842eULL, },
+        { 0x00005a0200007be3ULL, 0x00000519ffffc6a2ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_S_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_d.c
new file mode 100644
index 0000000000..57cf9d2e46
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_d.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction HSUB_U.D
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_U.D";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x00000000ffffffffULL, 0x00000000ffffffffULL, },
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0x00000000aaaaaaaaULL, 0x00000000aaaaaaaaULL, },
+        { 0x0000000033333333ULL, 0x0000000033333333ULL, },
+        { 0x00000000ccccccccULL, 0x00000000ccccccccULL, },
+        { 0x0000000071c71c71ULL, 0x000000001c71c71cULL, },
+        { 0x000000008e38e38eULL, 0x00000000e38e38e3ULL, },
+        { 0xffffffff00000001ULL, 0xffffffff00000001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffff55555556ULL, 0xffffffff55555556ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },
+        { 0xffffffff33333334ULL, 0xffffffff33333334ULL, },
+        { 0xffffffffcccccccdULL, 0xffffffffcccccccdULL, },
+        { 0xffffffff71c71c72ULL, 0xffffffff1c71c71dULL, },
+        { 0xffffffff8e38e38fULL, 0xffffffffe38e38e4ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },    /*  16  */
+        { 0x00000000aaaaaaaaULL, 0x00000000aaaaaaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0xffffffffdddddddeULL, 0xffffffffdddddddeULL, },
+        { 0x0000000077777777ULL, 0x0000000077777777ULL, },
+        { 0x000000001c71c71cULL, 0xffffffffc71c71c7ULL, },
+        { 0x0000000038e38e39ULL, 0x000000008e38e38eULL, },
+        { 0xffffffff55555556ULL, 0xffffffff55555556ULL, },    /*  24  */
+        { 0x0000000055555555ULL, 0x0000000055555555ULL, },
+        { 0xffffffffaaaaaaabULL, 0xffffffffaaaaaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffff88888889ULL, 0xffffffff88888889ULL, },
+        { 0x0000000022222222ULL, 0x0000000022222222ULL, },
+        { 0xffffffffc71c71c7ULL, 0xffffffff71c71c72ULL, },
+        { 0xffffffffe38e38e4ULL, 0x0000000038e38e39ULL, },
+        { 0xffffffffcccccccdULL, 0xffffffffcccccccdULL, },    /*  32  */
+        { 0x00000000ccccccccULL, 0x00000000ccccccccULL, },
+        { 0x0000000022222222ULL, 0x0000000022222222ULL, },
+        { 0x0000000077777777ULL, 0x0000000077777777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000000099999999ULL, 0x0000000099999999ULL, },
+        { 0x000000003e93e93eULL, 0xffffffffe93e93e9ULL, },
+        { 0x000000005b05b05bULL, 0x00000000b05b05b0ULL, },
+        { 0xffffffff33333334ULL, 0xffffffff33333334ULL, },    /*  40  */
+        { 0x0000000033333333ULL, 0x0000000033333333ULL, },
+        { 0xffffffff88888889ULL, 0xffffffff88888889ULL, },
+        { 0xffffffffdddddddeULL, 0xffffffffdddddddeULL, },
+        { 0xffffffff66666667ULL, 0xffffffff66666667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffffffffa4fa4fa5ULL, 0xffffffff4fa4fa50ULL, },
+        { 0xffffffffc16c16c2ULL, 0x0000000016c16c17ULL, },
+        { 0xffffffffe38e38e4ULL, 0xffffffff38e38e39ULL, },    /*  48  */
+        { 0x00000000e38e38e3ULL, 0x0000000038e38e38ULL, },
+        { 0x0000000038e38e39ULL, 0xffffffff8e38e38eULL, },
+        { 0x000000008e38e38eULL, 0xffffffffe38e38e3ULL, },
+        { 0x0000000016c16c17ULL, 0xffffffff6c16c16cULL, },
+        { 0x00000000b05b05b0ULL, 0x0000000005b05b05ULL, },
+        { 0x0000000055555555ULL, 0xffffffff55555555ULL, },
+        { 0x0000000071c71c72ULL, 0x000000001c71c71cULL, },
+        { 0xffffffff1c71c71dULL, 0xffffffffc71c71c8ULL, },    /*  56  */
+        { 0x000000001c71c71cULL, 0x00000000c71c71c7ULL, },
+        { 0xffffffff71c71c72ULL, 0x000000001c71c71dULL, },
+        { 0xffffffffc71c71c7ULL, 0x0000000071c71c72ULL, },
+        { 0xffffffff4fa4fa50ULL, 0xfffffffffa4fa4fbULL, },
+        { 0xffffffffe93e93e9ULL, 0x0000000093e93e94ULL, },
+        { 0xffffffff8e38e38eULL, 0xffffffffe38e38e4ULL, },
+        { 0xffffffffaaaaaaabULL, 0x00000000aaaaaaabULL, },
+        { 0x000000006008918cULL, 0xffffffff4ceb5b52ULL, },    /*  64  */
+        { 0x000000003ad71fc4ULL, 0x000000003627b862ULL, },
+        { 0xffffffffce9b5b4cULL, 0xffffffffa03be64aULL, },
+        { 0x000000002a39047eULL, 0xffffffffa22428beULL, },
+        { 0x00000000d35bab23ULL, 0xffffffff147c0b0eULL, },
+        { 0x00000000ae2a395bULL, 0xfffffffffdb8681eULL, },
+        { 0x0000000041ee74e3ULL, 0xffffffff67cc9606ULL, },
+        { 0x000000009d8c1e15ULL, 0xffffffff69b4d87aULL, },
+        { 0x0000000083f8596aULL, 0xffffffff295d16f3ULL, },    /*  72  */
+        { 0x000000005ec6e7a2ULL, 0x0000000012997403ULL, },
+        { 0xfffffffff28b232aULL, 0xffffffff7cada1ebULL, },
+        { 0x000000004e28cc5cULL, 0xffffffff7e95e45fULL, },
+        { 0x0000000047ecc10dULL, 0xffffffff8f75d8ccULL, },
+        { 0x0000000022bb4f45ULL, 0x0000000078b235dcULL, },
+        { 0xffffffffb67f8acdULL, 0xffffffffe2c663c4ULL, },
+        { 0x00000000121d33ffULL, 0xffffffffe4aea638ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_D(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_D(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_h.c
new file mode 100644
index 0000000000..433ff08f60
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_h.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction HSUB_U.H
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_U.H";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, },
+        { 0x0055005500550055ULL, 0x0055005500550055ULL, },
+        { 0x00aa00aa00aa00aaULL, 0x00aa00aa00aa00aaULL, },
+        { 0x0033003300330033ULL, 0x0033003300330033ULL, },
+        { 0x00cc00cc00cc00ccULL, 0x00cc00cc00cc00ccULL, },
+        { 0x0071001c00c70071ULL, 0x001c00c70071001cULL, },
+        { 0x008e00e30038008eULL, 0x00e30038008e00e3ULL, },
+        { 0xff01ff01ff01ff01ULL, 0xff01ff01ff01ff01ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xff56ff56ff56ff56ULL, 0xff56ff56ff56ff56ULL, },
+        { 0xffabffabffabffabULL, 0xffabffabffabffabULL, },
+        { 0xff34ff34ff34ff34ULL, 0xff34ff34ff34ff34ULL, },
+        { 0xffcdffcdffcdffcdULL, 0xffcdffcdffcdffcdULL, },
+        { 0xff72ff1dffc8ff72ULL, 0xff1dffc8ff72ff1dULL, },
+        { 0xff8fffe4ff39ff8fULL, 0xffe4ff39ff8fffe4ULL, },
+        { 0xffabffabffabffabULL, 0xffabffabffabffabULL, },    /*  16  */
+        { 0x00aa00aa00aa00aaULL, 0x00aa00aa00aa00aaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0055005500550055ULL, 0x0055005500550055ULL, },
+        { 0xffdeffdeffdeffdeULL, 0xffdeffdeffdeffdeULL, },
+        { 0x0077007700770077ULL, 0x0077007700770077ULL, },
+        { 0x001cffc70072001cULL, 0xffc70072001cffc7ULL, },
+        { 0x0039008effe30039ULL, 0x008effe30039008eULL, },
+        { 0xff56ff56ff56ff56ULL, 0xff56ff56ff56ff56ULL, },    /*  24  */
+        { 0x0055005500550055ULL, 0x0055005500550055ULL, },
+        { 0xffabffabffabffabULL, 0xffabffabffabffabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xff89ff89ff89ff89ULL, 0xff89ff89ff89ff89ULL, },
+        { 0x0022002200220022ULL, 0x0022002200220022ULL, },
+        { 0xffc7ff72001dffc7ULL, 0xff72001dffc7ff72ULL, },
+        { 0xffe40039ff8effe4ULL, 0x0039ff8effe40039ULL, },
+        { 0xffcdffcdffcdffcdULL, 0xffcdffcdffcdffcdULL, },    /*  32  */
+        { 0x00cc00cc00cc00ccULL, 0x00cc00cc00cc00ccULL, },
+        { 0x0022002200220022ULL, 0x0022002200220022ULL, },
+        { 0x0077007700770077ULL, 0x0077007700770077ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0099009900990099ULL, 0x0099009900990099ULL, },
+        { 0x003effe90094003eULL, 0xffe90094003effe9ULL, },
+        { 0x005b00b00005005bULL, 0x00b00005005b00b0ULL, },
+        { 0xff34ff34ff34ff34ULL, 0xff34ff34ff34ff34ULL, },    /*  40  */
+        { 0x0033003300330033ULL, 0x0033003300330033ULL, },
+        { 0xff89ff89ff89ff89ULL, 0xff89ff89ff89ff89ULL, },
+        { 0xffdeffdeffdeffdeULL, 0xffdeffdeffdeffdeULL, },
+        { 0xff67ff67ff67ff67ULL, 0xff67ff67ff67ff67ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffa5ff50fffbffa5ULL, 0xff50fffbffa5ff50ULL, },
+        { 0xffc20017ff6cffc2ULL, 0x0017ff6cffc20017ULL, },
+        { 0xffe4ff39ff8fffe4ULL, 0xff39ff8fffe4ff39ULL, },    /*  48  */
+        { 0x00e30038008e00e3ULL, 0x0038008e00e30038ULL, },
+        { 0x0039ff8effe40039ULL, 0xff8effe40039ff8eULL, },
+        { 0x008effe30039008eULL, 0xffe30039008effe3ULL, },
+        { 0x0017ff6cffc20017ULL, 0xff6cffc20017ff6cULL, },
+        { 0x00b00005005b00b0ULL, 0x0005005b00b00005ULL, },
+        { 0x0055ff5500560055ULL, 0xff5500560055ff55ULL, },
+        { 0x0072001cffc70072ULL, 0x001cffc70072001cULL, },
+        { 0xff1dffc8ff72ff1dULL, 0xffc8ff72ff1dffc8ULL, },    /*  56  */
+        { 0x001c00c70071001cULL, 0x00c70071001c00c7ULL, },
+        { 0xff72001dffc7ff72ULL, 0x001dffc7ff72001dULL, },
+        { 0xffc70072001cffc7ULL, 0x0072001cffc70072ULL, },
+        { 0xff50fffbffa5ff50ULL, 0xfffbffa5ff50fffbULL, },
+        { 0xffe90094003effe9ULL, 0x0094003effe90094ULL, },
+        { 0xff8effe40039ff8eULL, 0xffe40039ff8effe4ULL, },
+        { 0xffab00abffaaffabULL, 0x00abffaaffab00abULL, },
+        { 0x001e001affc60015ULL, 0xffe4ffad008300a4ULL, },    /*  64  */
+        { 0xffca0083ff95004dULL, 0xff54fff100bfffb4ULL, },
+        { 0x002e003cff59ffd5ULL, 0xff73ff0c00d3009cULL, },
+        { 0x00390099fff70007ULL, 0xff5aff3300bc0010ULL, },
+        { 0x0091ff34ffeb0087ULL, 0xffab005dff9a0046ULL, },
+        { 0x003dff9dffba00bfULL, 0xff1b00a1ffd6ff56ULL, },
+        { 0x00a1ff56ff7e0047ULL, 0xff3affbcffea003eULL, },
+        { 0x00acffb3001c0079ULL, 0xff21ffe3ffd3ffb2ULL, },
+        { 0x0042ffe20057004bULL, 0xffc0006800300019ULL, },    /*  72  */
+        { 0xffee004b00260083ULL, 0xff3000ac006cff29ULL, },
+        { 0x00520004ffea000bULL, 0xff4fffc700800011ULL, },
+        { 0x005d00610088003dULL, 0xff36ffee0069ff85ULL, },
+        { 0x0006ff4afffc00a2ULL, 0x0026002a002e00d6ULL, },
+        { 0xffb2ffb3ffcb00daULL, 0xff96006e006affe6ULL, },
+        { 0x0016ff6cff8f0062ULL, 0xffb5ff89007e00ceULL, },
+        { 0x0021ffc9002d0094ULL, 0xff9cffb000670042ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_H(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_H(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_w.c
new file mode 100644
index 0000000000..3c77c0fcda
--- /dev/null
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_hsub_u_w.c
@@ -0,0 +1,155 @@
+/*
+ *  Test program for MSA instruction HSUB_U.W
+ *
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <sys/time.h>
+#include <stdint.h>
+
+#include "../../../../include/wrappers_msa.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
+
+#define TEST_COUNT_TOTAL (                                                \
+            (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
+            (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
+
+
+int32_t main(void)
+{
+    char *instruction_name = "HSUB_U.W";
+    int32_t ret;
+    uint32_t i, j;
+    struct timeval start, end;
+    double elapsed_time;
+
+    uint64_t b128_result[TEST_COUNT_TOTAL][2];
+    uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },    /*   0  */
+        { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, },
+        { 0x0000555500005555ULL, 0x0000555500005555ULL, },
+        { 0x0000aaaa0000aaaaULL, 0x0000aaaa0000aaaaULL, },
+        { 0x0000333300003333ULL, 0x0000333300003333ULL, },
+        { 0x0000cccc0000ccccULL, 0x0000cccc0000ccccULL, },
+        { 0x0000c71c00001c71ULL, 0x000071c70000c71cULL, },
+        { 0x000038e30000e38eULL, 0x00008e38000038e3ULL, },
+        { 0xffff0001ffff0001ULL, 0xffff0001ffff0001ULL, },    /*   8  */
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff5556ffff5556ULL, 0xffff5556ffff5556ULL, },
+        { 0xffffaaabffffaaabULL, 0xffffaaabffffaaabULL, },
+        { 0xffff3334ffff3334ULL, 0xffff3334ffff3334ULL, },
+        { 0xffffcccdffffcccdULL, 0xffffcccdffffcccdULL, },
+        { 0xffffc71dffff1c72ULL, 0xffff71c8ffffc71dULL, },
+        { 0xffff38e4ffffe38fULL, 0xffff8e39ffff38e4ULL, },
+        { 0xffffaaabffffaaabULL, 0xffffaaabffffaaabULL, },    /*  16  */
+        { 0x0000aaaa0000aaaaULL, 0x0000aaaa0000aaaaULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000555500005555ULL, 0x0000555500005555ULL, },
+        { 0xffffdddeffffdddeULL, 0xffffdddeffffdddeULL, },
+        { 0x0000777700007777ULL, 0x0000777700007777ULL, },
+        { 0x000071c7ffffc71cULL, 0x00001c72000071c7ULL, },
+        { 0xffffe38e00008e39ULL, 0x000038e3ffffe38eULL, },
+        { 0xffff5556ffff5556ULL, 0xffff5556ffff5556ULL, },    /*  24  */
+        { 0x0000555500005555ULL, 0x0000555500005555ULL, },
+        { 0xffffaaabffffaaabULL, 0xffffaaabffffaaabULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xffff8889ffff8889ULL, 0xffff8889ffff8889ULL, },
+        { 0x0000222200002222ULL, 0x0000222200002222ULL, },
+        { 0x00001c72ffff71c7ULL, 0xffffc71d00001c72ULL, },
+        { 0xffff8e39000038e4ULL, 0xffffe38effff8e39ULL, },
+        { 0xffffcccdffffcccdULL, 0xffffcccdffffcccdULL, },    /*  32  */
+        { 0x0000cccc0000ccccULL, 0x0000cccc0000ccccULL, },
+        { 0x0000222200002222ULL, 0x0000222200002222ULL, },
+        { 0x0000777700007777ULL, 0x0000777700007777ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0x0000999900009999ULL, 0x0000999900009999ULL, },
+        { 0x000093e9ffffe93eULL, 0x00003e94000093e9ULL, },
+        { 0x000005b00000b05bULL, 0x00005b05000005b0ULL, },
+        { 0xffff3334ffff3334ULL, 0xffff3334ffff3334ULL, },    /*  40  */
+        { 0x0000333300003333ULL, 0x0000333300003333ULL, },
+        { 0xffff8889ffff8889ULL, 0xffff8889ffff8889ULL, },
+        { 0xffffdddeffffdddeULL, 0xffffdddeffffdddeULL, },
+        { 0xffff6667ffff6667ULL, 0xffff6667ffff6667ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
+        { 0xfffffa50ffff4fa5ULL, 0xffffa4fbfffffa50ULL, },
+        { 0xffff6c17000016c2ULL, 0xffffc16cffff6c17ULL, },
+        { 0xffffe38fffff8e39ULL, 0xffff38e4ffffe38fULL, },    /*  48  */
+        { 0x0000e38e00008e38ULL, 0x000038e30000e38eULL, },
+        { 0x000038e4ffffe38eULL, 0xffff8e39000038e4ULL, },
+        { 0x00008e39000038e3ULL, 0xffffe38e00008e39ULL, },
+        { 0x000016c2ffffc16cULL, 0xffff6c17000016c2ULL, },
+        { 0x0000b05b00005b05ULL, 0x000005b00000b05bULL, },
+        { 0x0000aaabffffaaaaULL, 0xffffaaab0000aaabULL, },
+        { 0x00001c72000071c7ULL, 0xffffc71c00001c72ULL, },
+        { 0xffff1c72ffff71c8ULL, 0xffffc71dffff1c72ULL, },    /*  56  */
+        { 0x00001c71000071c7ULL, 0x0000c71c00001c71ULL, },
+        { 0xffff71c7ffffc71dULL, 0x00001c72ffff71c7ULL, },
+        { 0xffffc71c00001c72ULL, 0x000071c7ffffc71cULL, },
+        { 0xffff4fa5ffffa4fbULL, 0xfffffa50ffff4fa5ULL, },
+        { 0xffffe93e00003e94ULL, 0x000093e9ffffe93eULL, },
+        { 0xffffe38effff8e39ULL, 0x000038e4ffffe38eULL, },
+        { 0xffff555500005556ULL, 0x00005555ffff5555ULL, },
+        { 0xffffa19effffd322ULL, 0x0000400900004e6fULL, },    /*  64  */
+        { 0x00008807ffff615aULL, 0xffff904d0000ab7fULL, },
+        { 0xffffd9c0ffff9ce2ULL, 0xffff84680000d967ULL, },
+        { 0x0000721dffff4614ULL, 0xffffc28f00001bdbULL, },
+        { 0x000014f2fffff853ULL, 0x00000799ffff6533ULL, },
+        { 0x0000fb5bffff868bULL, 0xffff57ddffffc243ULL, },
+        { 0x00004d14ffffc213ULL, 0xffff4bf8fffff02bULL, },
+        { 0x0000e571ffff6b45ULL, 0xffff8a1fffff329fULL, },
+        { 0xffffc58e0000648fULL, 0x00001c7afffffb1fULL, },    /*  72  */
+        { 0x0000abf7fffff2c7ULL, 0xffff6cbe0000582fULL, },
+        { 0xfffffdb000002e4fULL, 0xffff60d900008617ULL, },
+        { 0x0000960dffffd781ULL, 0xffff9f00ffffc88bULL, },
+        { 0xffff8983000008f1ULL, 0x00008293fffff936ULL, },
+        { 0x00006fecffff9729ULL, 0xffffd2d700005646ULL, },
+        { 0xffffc1a5ffffd2b1ULL, 0xffffc6f20000842eULL, },
+        { 0x00005a02ffff7be3ULL, 0x00000519ffffc6a2ULL, },
+};
+
+    gettimeofday(&start, NULL);
+
+    for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_W(b128_pattern[i], b128_pattern[j],
+                           b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
+        for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
+            do_msa_HSUB_U_W(b128_random[i], b128_random[j],
+                           b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
+                                        (PATTERN_INPUTS_SHORT_COUNT)) +
+                                       RANDOM_INPUTS_SHORT_COUNT * i + j]);
+        }
+    }
+
+    gettimeofday(&end, NULL);
+
+    elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
+    elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
+
+    ret = check_results(instruction_name, TEST_COUNT_TOTAL, elapsed_time,
+                        &b128_result[0][0], &b128_expect[0][0]);
+
+    return ret;
+}
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_b.c
index 04e6159fc7..8855153947 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_S.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x800b9880809ea980ULL, 0x7fe73e2702e94374ULL, },
         { 0x7fe5307f36cf8d0eULL, 0x808a8080abc73294ULL, },
         { 0x757f16ea117f1b46ULL, 0x80facdbe940390a4ULL, },
+        { 0x7ff5687f7f62577fULL, 0x8019c2d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_d.c
index 195137f41f..d1cc4e3c08 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8000000000000000ULL, 0x7fffffffffffffffULL, },
         { 0x7fffffffffffffffULL, 0x8000000000000000ULL, },
         { 0x749115ea109e1b46ULL, 0x8000000000000000ULL, },
+        { 0x7fffffffffffffffULL, 0x8000000000000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_h.c
index c57238d31a..e2457937c6 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8000985d8000a932ULL, 0x7fff3e2701e94274ULL, },
         { 0x7fff2f8135cf8d0eULL, 0x80008000aac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x8000cdbe94038fa4ULL, },
+        { 0x7fff67a37fff56ceULL, 0x8000c1d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_w.c
index 1cded65a7e..460de6a060 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x8000000080000000ULL, 0x7fffffff01e84274ULL, },
         { 0x7fffffff35cf8d0eULL, 0x80000000aac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x8000000094038fa4ULL, },
+        { 0x7fffffff7fffffffULL, 0x80000000fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_b.c
index cb38f033a6..8039643ba3 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_U.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9e0032ULL, 0x00003e2702000000ULL, },
         { 0x0000000036008d0eULL, 0x428a7d7a00003294ULL, },
         { 0x0000160011001b46ULL, 0x7b0000be94039000ULL, },
+        { 0x0000000000005700ULL, 0x661900000017bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_d.c
index 2685b2fe7e..4b5830b48d 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x0000000000000000ULL, },
         { 0x0000000000000000ULL, 0x428a7d79aac73294ULL, },
         { 0x0000000000000000ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x0000000000000000ULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_h.c
index ca6dd38b69..dd3a0d5f86 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9e0000ULL, 0x00003e2701e90000ULL, },
         { 0x0000000035cf8d0eULL, 0x428a7d7a00003294ULL, },
         { 0x000015ea109e1b46ULL, 0x7afa000094038fa4ULL, },
+        { 0x00000000000056ceULL, 0x661900000000bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_w.c
index 42ebddb408..690287546c 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subs_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBS_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x0000000001e84274ULL, },
         { 0x0000000035cf8d0eULL, 0x428a7d7a00000000ULL, },
         { 0x00000000109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0x0000000000000000ULL, 0x6618c1d900000000ULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_b.c
index dac20cc769..370952da25 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUS_U.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9ea932ULL, 0x9ae7ffffff004374ULL, },
         { 0xe800308136008d0eULL, 0x428a7d7aab00ff94ULL, },
         { 0x75911600119eff46ULL, 0x7bfacdbe940390a4ULL, },
+        { 0xc40068a3a562ffceULL, 0x66ffc2d9fe17bd8cULL, },
+        { 0x000000000000ff00ULL, 0xffffffffff00ffffULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_d.c
index 4485502c1c..4ba3b59adb 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUS_U.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x99e73e2701e84274ULL, },
         { 0xe7e42f8135cf8d0eULL, 0x428a7d79aac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0xc3f467a2a46256ceULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0xffffffffffffffffULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_h.c
index 9e99aeefc5..12a28ecbc4 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUS_U.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9ea932ULL, 0x99e7ffffffff4274ULL, },
         { 0xe7e52f8135cf8d0eULL, 0x428a7d7aaac7ffffULL, },
         { 0x749115ea109effffULL, 0x7afacdbe94038fa4ULL, },
+        { 0xc3f567a3a462ffffULL, 0x6619c1d9fe17bd8cULL, },
+        { 0x000000000000ffffULL, 0xffffffffffffffffULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_w.c
index 53a9acac1b..fab5452394 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsus_u_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUS_U.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x99e73e27ffffffffULL, },
         { 0xe7e42f8135cf8d0eULL, 0x428a7d7aaac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0xc3f467a3a46256ceULL, 0x6618c1d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0xffffffffffffffffULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_b.c
index 86fb4f3e26..02245099de 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUU_S.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b7f5d5b7fa932ULL, 0x9ae73e2702e98080ULL, },
         { 0xe8e5808136cf7f0eULL, 0x427f7d7aabc7327fULL, },
         { 0x809116ea119e1b46ULL, 0x7bfacd7f7f037fa4ULL, },
+        { 0xc4f580a3a58057ceULL, 0x6619c2d9fe177f7fULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_d.c
index 45a1eb3094..8bdfe05b79 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUU_S.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x99e73e2701e84274ULL, },
         { 0xe7e42f8135cf8d0eULL, 0x428a7d79aac73294ULL, },
         { 0x8000000000000000ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0xc3f467a2a46256ceULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_h.c
index 14ac7def29..420bfc7476 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUU_S.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b7fff5b9ea932ULL, 0x99e73e2701e98000ULL, },
         { 0xe7e5800035cf7fffULL, 0x428a7d7aaac73294ULL, },
         { 0x800015ea109e1b46ULL, 0x7afacdbe7fff7fffULL, },
+        { 0xc3f58000a46256ceULL, 0x6619c1d9fe177fffULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_w.c
index 688f469cd0..321faf9de6 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subsuu_s_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBSUU_S.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x99e73e2701e84274ULL, },
         { 0xe7e42f8135cf8d0eULL, 0x428a7d7aaac73294ULL, },
         { 0x80000000109e1b46ULL, 0x7af9cdbe7fffffffULL, },
+        { 0xc3f467a3a46256ceULL, 0x6618c1d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_b.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_b.c
index d0964dcd59..6fc4f66480 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_b.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_b.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBV.B
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9ea932ULL, 0x9ae73e2702e94374ULL, },
         { 0xe8e5308136cf8d0eULL, 0x428a7d7aabc73294ULL, },
         { 0x759116ea119e1b46ULL, 0x7bfacdbe940390a4ULL, },
+        { 0xc4f568a3a56257ceULL, 0x6619c2d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_d.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_d.c
index ec26a8e0c6..32eeff91f7 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_d.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_d.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBV.D
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9da932ULL, 0x99e73e2701e84274ULL, },
         { 0xe7e42f8135cf8d0eULL, 0x428a7d79aac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x7af9cdbe94038fa4ULL, },
+        { 0xc3f467a2a46256ceULL, 0x6618c1d8fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_h.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_h.c
index 420422ecf1..e30b195412 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_h.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_h.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBV.H
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
@@ -119,6 +121,8 @@ int32_t main(void)
         { 0x3c0b985d5b9ea932ULL, 0x99e73e2701e94274ULL, },
         { 0xe7e52f8135cf8d0eULL, 0x428a7d7aaac73294ULL, },
         { 0x749115ea109e1b46ULL, 0x7afacdbe94038fa4ULL, },
+        { 0xc3f567a3a46256ceULL, 0x6619c1d9fe17bd8cULL, },
+        { 0x0000000000000000ULL, 0x0000000000000000ULL, },
 };
 
     gettimeofday(&start, NULL);
diff --git a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_w.c b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_w.c
index 3e97005815..09d49cb057 100644
--- a/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_w.c
+++ b/tests/tcg/mips/user/ase/msa/int-subtract/test_msa_subv_w.c
@@ -1,8 +1,10 @@
 /*
  *  Test program for MSA instruction SUBV.W
  *
- *  Copyright (C) 2018  Wave Computing, Inc.
- *  Copyright (C) 2018  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
+ *  Copyright (C) 2019  Wave Computing, Inc.
+ *  Copyright (C) 2019  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *  Copyright (C) 2019  RT-RK Computer Based Systems LLC
+ *  Copyright (C) 2019  Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -23,8 +25,8 @@
 #include <stdint.h>
 
 #include "../../../../include/wrappers_msa.h"
-#include "../../../../include/test_inputs.h"
-#include "../../../../include/test_utils.h"
+#include "../../../../include/test_inputs_128.h"
+#include "../../../../include/test_utils_128.h"
 
 #define TEST_COUNT_TOTAL (                                                \
             (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_b.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_b.c
index d720dc30a5..c61091ec00 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_b.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_b.c
@@ -33,7 +33,7 @@
 
 int32_t main(void)
 {
-    char *instruction_name = "*ILVEV.B";
+    char *instruction_name = "ILVEV.B";
     int32_t ret;
     uint32_t i, j;
     struct timeval start, end;
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_d.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_d.c
index 83239949af..f67ed281e8 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_d.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_d.c
@@ -33,7 +33,7 @@
 
 int32_t main(void)
 {
-    char *instruction_name = "*ILVEV.D";
+    char *instruction_name = "ILVEV.D";
     int32_t ret;
     uint32_t i, j;
     struct timeval start, end;
diff --git a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_h.c b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_h.c
index 3f6fc265d2..e760835fac 100644
--- a/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_h.c
+++ b/tests/tcg/mips/user/ase/msa/interleave/test_msa_ilvev_h.c
@@ -33,7 +33,7 @@
 
 int32_t main(void)
 {
-    char *instruction_name = "*ILVEV.H";
+    char *instruction_name = "ILVEV.H";
     int32_t ret;
     uint32_t i, j;
     struct timeval start, end;
diff --git a/tests/tcg/ppc/Makefile.include b/tests/tcg/ppc/Makefile.include
index b062c30dd3..ae01fb8fad 100644
--- a/tests/tcg/ppc/Makefile.include
+++ b/tests/tcg/ppc/Makefile.include
@@ -1,6 +1,9 @@
 ifeq ($(TARGET_NAME),ppc)
 DOCKER_IMAGE=debian-powerpc-cross
 DOCKER_CROSS_COMPILER=powerpc-linux-gnu-gcc
+else ifeq ($(TARGET_NAME),ppc64)
+DOCKER_IMAGE=debian-ppc64-cross
+DOCKER_CROSS_COMPILER=powerpc64-linux-gnu-gcc
 else ifeq ($(TARGET_NAME),ppc64le)
 DOCKER_IMAGE=debian-ppc64el-cross
 DOCKER_CROSS_COMPILER=powerpc64le-linux-gnu-gcc
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index eda90750eb..12e2ecf517 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -206,7 +206,7 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
 
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
                               &error_abort);
     s = bs->opaque;
@@ -290,7 +290,7 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
     BlockBackend *blk;
     BlockDriverState *bs, *backing;
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
                               &error_abort);
     blk_insert_bs(blk, bs, &error_abort);
@@ -353,7 +353,7 @@ static void test_nested(void)
     BDRVTestState *s, *backing_s;
     enum drain_type outer, inner;
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
                               &error_abort);
     s = bs->opaque;
@@ -402,13 +402,13 @@ static void test_multiparent(void)
     BlockDriverState *bs_a, *bs_b, *backing;
     BDRVTestState *a_s, *b_s, *backing_s;
 
-    blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
                                 &error_abort);
     a_s = bs_a->opaque;
     blk_insert_bs(blk_a, bs_a, &error_abort);
 
-    blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
                                 &error_abort);
     b_s = bs_b->opaque;
@@ -475,13 +475,13 @@ static void test_graph_change_drain_subtree(void)
     BlockDriverState *bs_a, *bs_b, *backing;
     BDRVTestState *a_s, *b_s, *backing_s;
 
-    blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
                                 &error_abort);
     a_s = bs_a->opaque;
     blk_insert_bs(blk_a, bs_a, &error_abort);
 
-    blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
                                 &error_abort);
     b_s = bs_b->opaque;
@@ -555,7 +555,7 @@ static void test_graph_change_drain_all(void)
     BDRVTestState *a_s, *b_s;
 
     /* Create node A with a BlockBackend */
-    blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
                                 &error_abort);
     a_s = bs_a->opaque;
@@ -571,7 +571,7 @@ static void test_graph_change_drain_all(void)
     g_assert_cmpint(a_s->drain_count, ==, 1);
 
     /* Create node B with a BlockBackend */
-    blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
                                 &error_abort);
     b_s = bs_b->opaque;
@@ -672,13 +672,13 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
         goto out;
     }
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
                               &error_abort);
     s = bs->opaque;
     blk_insert_bs(blk, bs, &error_abort);
 
-    blk_set_aio_context(blk, ctx_a);
+    blk_set_aio_context(blk, ctx_a, &error_abort);
     aio_context_acquire(ctx_a);
 
     s->bh_indirection_ctx = ctx_b;
@@ -742,7 +742,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
     }
 
     aio_context_acquire(ctx_a);
-    blk_set_aio_context(blk, qemu_get_aio_context());
+    blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
     aio_context_release(ctx_a);
 
     bdrv_unref(bs);
@@ -883,7 +883,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
     bdrv_set_backing_hd(src, src_backing, &error_abort);
     bdrv_unref(src_backing);
 
-    blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_src = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk_src, src_overlay, &error_abort);
 
     switch (drain_node) {
@@ -903,15 +903,16 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
     if (use_iothread) {
         iothread = iothread_new();
         ctx = iothread_get_aio_context(iothread);
-        blk_set_aio_context(blk_src, ctx);
+        blk_set_aio_context(blk_src, ctx, &error_abort);
     } else {
         ctx = qemu_get_aio_context();
     }
 
     target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
                                   &error_abort);
-    blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk_target = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk_target, target, &error_abort);
+    blk_set_allow_aio_context_change(blk_target, true);
 
     aio_context_acquire(ctx);
     tjob = block_job_create("job0", &test_job_driver, NULL, src,
@@ -972,7 +973,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
     g_assert_false(job->job.paused);
     g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
 
-    do_drain_begin(drain_type, target);
+    do_drain_begin_unlocked(drain_type, target);
 
     if (drain_type == BDRV_DRAIN_ALL) {
         /* bdrv_drain_all() drains both src and target */
@@ -983,7 +984,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
     g_assert_true(job->job.paused);
     g_assert_false(job->job.busy); /* The job is paused */
 
-    do_drain_end(drain_type, target);
+    do_drain_end_unlocked(drain_type, target);
 
     if (use_iothread) {
         /* paused is reset in the I/O thread, wait for it */
@@ -1001,7 +1002,8 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
     g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO));
 
     if (use_iothread) {
-        blk_set_aio_context(blk_src, qemu_get_aio_context());
+        blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort);
+        assert(blk_get_aio_context(blk_target) == qemu_get_aio_context());
     }
     aio_context_release(ctx);
 
@@ -1205,7 +1207,7 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
                         &error_abort);
     bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort);
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk, bs, &error_abort);
 
     /* Referenced by blk now */
@@ -1368,7 +1370,7 @@ static void test_detach_indirect(bool by_parent_cb)
     c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort);
 
     /* blk is a BB for parent-a */
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk, parent_a, &error_abort);
     bdrv_unref(parent_a);
 
@@ -1436,12 +1438,6 @@ static void test_detach_indirect(bool by_parent_cb)
     bdrv_unref(parent_b);
     blk_unref(blk);
 
-    /* XXX Once bdrv_close() unref's children instead of just detaching them,
-     * this won't be necessary any more. */
-    bdrv_unref(a);
-    bdrv_unref(a);
-    bdrv_unref(c);
-
     g_assert_cmpint(a->refcnt, ==, 1);
     g_assert_cmpint(b->refcnt, ==, 1);
     g_assert_cmpint(c->refcnt, ==, 1);
@@ -1466,7 +1462,7 @@ static void test_append_to_drained(void)
     BlockDriverState *base, *overlay;
     BDRVTestState *base_s, *overlay_s;
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     base = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
     base_s = base->opaque;
     blk_insert_bs(blk, base, &error_abort);
@@ -1513,16 +1509,16 @@ static void test_set_aio_context(void)
                               &error_abort);
 
     bdrv_drained_begin(bs);
-    bdrv_set_aio_context(bs, ctx_a);
+    bdrv_try_set_aio_context(bs, ctx_a, &error_abort);
 
     aio_context_acquire(ctx_a);
     bdrv_drained_end(bs);
 
     bdrv_drained_begin(bs);
-    bdrv_set_aio_context(bs, ctx_b);
+    bdrv_try_set_aio_context(bs, ctx_b, &error_abort);
     aio_context_release(ctx_a);
     aio_context_acquire(ctx_b);
-    bdrv_set_aio_context(bs, qemu_get_aio_context());
+    bdrv_try_set_aio_context(bs, qemu_get_aio_context(), &error_abort);
     aio_context_release(ctx_b);
     bdrv_drained_end(bs);
 
diff --git a/tests/test-bdrv-graph-mod.c b/tests/test-bdrv-graph-mod.c
index 283dc84869..cfeec36566 100644
--- a/tests/test-bdrv-graph-mod.c
+++ b/tests/test-bdrv-graph-mod.c
@@ -102,7 +102,8 @@ static void test_update_perm_tree(void)
 {
     Error *local_err = NULL;
 
-    BlockBackend *root = blk_new(BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ,
+    BlockBackend *root = blk_new(qemu_get_aio_context(),
+                                 BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ,
                                  BLK_PERM_ALL & ~BLK_PERM_WRITE);
     BlockDriverState *bs = no_perm_node("node");
     BlockDriverState *filter = pass_through_node("filter");
@@ -116,7 +117,6 @@ static void test_update_perm_tree(void)
     g_assert_nonnull(local_err);
     error_free(local_err);
 
-    bdrv_unref(bs);
     blk_unref(root);
 }
 
@@ -166,7 +166,7 @@ static void test_update_perm_tree(void)
  */
 static void test_should_update_child(void)
 {
-    BlockBackend *root = blk_new(0, BLK_PERM_ALL);
+    BlockBackend *root = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
     BlockDriverState *bs = no_perm_node("node");
     BlockDriverState *filter = no_perm_node("filter");
     BlockDriverState *target = no_perm_node("target");
diff --git a/tests/test-block-backend.c b/tests/test-block-backend.c
index fd59f02bd0..5b5d6845c0 100644
--- a/tests/test-block-backend.c
+++ b/tests/test-block-backend.c
@@ -37,7 +37,8 @@ static void test_drain_aio_error_flush_cb(void *opaque, int ret)
 
 static void test_drain_aio_error(void)
 {
-    BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    BlockBackend *blk = blk_new(qemu_get_aio_context(),
+                                BLK_PERM_ALL, BLK_PERM_ALL);
     BlockAIOCB *acb;
     bool completed = false;
 
@@ -53,7 +54,8 @@ static void test_drain_aio_error(void)
 
 static void test_drain_all_aio_error(void)
 {
-    BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    BlockBackend *blk = blk_new(qemu_get_aio_context(),
+                                BLK_PERM_ALL, BLK_PERM_ALL);
     BlockAIOCB *acb;
     bool completed = false;
 
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
index 59f692892e..79d9cf8a57 100644
--- a/tests/test-block-iothread.c
+++ b/tests/test-block-iothread.c
@@ -336,20 +336,20 @@ static void test_sync_op(const void *opaque)
     BlockDriverState *bs;
     BdrvChild *c;
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
     bs->total_sectors = 65536 / BDRV_SECTOR_SIZE;
     blk_insert_bs(blk, bs, &error_abort);
     c = QLIST_FIRST(&bs->parents);
 
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
     aio_context_acquire(ctx);
     t->fn(c);
     if (t->blkfn) {
         t->blkfn(blk);
     }
     aio_context_release(ctx);
-    blk_set_aio_context(blk, qemu_get_aio_context());
+    blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
 
     bdrv_unref(bs);
     blk_unref(blk);
@@ -415,7 +415,7 @@ static void test_attach_blockjob(void)
     BlockDriverState *bs;
     TestBlockJob *tjob;
 
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
     blk_insert_bs(blk, bs, &error_abort);
 
@@ -428,7 +428,7 @@ static void test_attach_blockjob(void)
         aio_poll(qemu_get_aio_context(), false);
     }
 
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
 
     tjob->n = 0;
     while (tjob->n == 0) {
@@ -436,7 +436,7 @@ static void test_attach_blockjob(void)
     }
 
     aio_context_acquire(ctx);
-    blk_set_aio_context(blk, qemu_get_aio_context());
+    blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
     aio_context_release(ctx);
 
     tjob->n = 0;
@@ -444,7 +444,7 @@ static void test_attach_blockjob(void)
         aio_poll(qemu_get_aio_context(), false);
     }
 
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
 
     tjob->n = 0;
     while (tjob->n == 0) {
@@ -453,7 +453,7 @@ static void test_attach_blockjob(void)
 
     aio_context_acquire(ctx);
     job_complete_sync(&tjob->common.job, &error_abort);
-    blk_set_aio_context(blk, qemu_get_aio_context());
+    blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
     aio_context_release(ctx);
 
     bdrv_unref(bs);
@@ -481,7 +481,7 @@ static void test_propagate_basic(void)
     QDict *options;
 
     /* Create bs_a and its BlockBackend */
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     bs_a = bdrv_new_open_driver(&bdrv_test, "bs_a", BDRV_O_RDWR, &error_abort);
     blk_insert_bs(blk, bs_a, &error_abort);
 
@@ -497,7 +497,7 @@ static void test_propagate_basic(void)
     bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
 
     /* Switch the AioContext */
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
     g_assert(blk_get_aio_context(blk) == ctx);
     g_assert(bdrv_get_aio_context(bs_a) == ctx);
     g_assert(bdrv_get_aio_context(bs_verify) == ctx);
@@ -505,7 +505,7 @@ static void test_propagate_basic(void)
 
     /* Switch the AioContext back */
     ctx = qemu_get_aio_context();
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
     g_assert(blk_get_aio_context(blk) == ctx);
     g_assert(bdrv_get_aio_context(bs_a) == ctx);
     g_assert(bdrv_get_aio_context(bs_verify) == ctx);
@@ -561,11 +561,11 @@ static void test_propagate_diamond(void)
     qdict_put_str(options, "raw", "bs_c");
 
     bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
-    blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk, bs_verify, &error_abort);
 
     /* Switch the AioContext */
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
     g_assert(blk_get_aio_context(blk) == ctx);
     g_assert(bdrv_get_aio_context(bs_verify) == ctx);
     g_assert(bdrv_get_aio_context(bs_a) == ctx);
@@ -574,7 +574,7 @@ static void test_propagate_diamond(void)
 
     /* Switch the AioContext back */
     ctx = qemu_get_aio_context();
-    blk_set_aio_context(blk, ctx);
+    blk_set_aio_context(blk, ctx, &error_abort);
     g_assert(blk_get_aio_context(blk) == ctx);
     g_assert(bdrv_get_aio_context(bs_verify) == ctx);
     g_assert(bdrv_get_aio_context(bs_a) == ctx);
@@ -593,7 +593,7 @@ static void test_propagate_mirror(void)
     IOThread *iothread = iothread_new();
     AioContext *ctx = iothread_get_aio_context(iothread);
     AioContext *main_ctx = qemu_get_aio_context();
-    BlockDriverState *src, *target;
+    BlockDriverState *src, *target, *filter;
     BlockBackend *blk;
     Job *job;
     Error *local_err = NULL;
@@ -610,11 +610,13 @@ static void test_propagate_mirror(void)
                  false, "filter_node", MIRROR_COPY_MODE_BACKGROUND,
                  &error_abort);
     job = job_get("job0");
+    filter = bdrv_find_node("filter_node");
 
     /* Change the AioContext of src */
     bdrv_try_set_aio_context(src, ctx, &error_abort);
     g_assert(bdrv_get_aio_context(src) == ctx);
     g_assert(bdrv_get_aio_context(target) == ctx);
+    g_assert(bdrv_get_aio_context(filter) == ctx);
     g_assert(job->aio_context == ctx);
 
     /* Change the AioContext of target */
@@ -623,9 +625,10 @@ static void test_propagate_mirror(void)
     aio_context_release(ctx);
     g_assert(bdrv_get_aio_context(src) == main_ctx);
     g_assert(bdrv_get_aio_context(target) == main_ctx);
+    g_assert(bdrv_get_aio_context(filter) == main_ctx);
 
     /* With a BlockBackend on src, changing target must fail */
-    blk = blk_new(0, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
     blk_insert_bs(blk, src, &error_abort);
 
     bdrv_try_set_aio_context(target, ctx, &local_err);
@@ -635,6 +638,7 @@ static void test_propagate_mirror(void)
     g_assert(blk_get_aio_context(blk) == main_ctx);
     g_assert(bdrv_get_aio_context(src) == main_ctx);
     g_assert(bdrv_get_aio_context(target) == main_ctx);
+    g_assert(bdrv_get_aio_context(filter) == main_ctx);
 
     /* ...unless we explicitly allow it */
     aio_context_acquire(ctx);
@@ -645,11 +649,12 @@ static void test_propagate_mirror(void)
     g_assert(blk_get_aio_context(blk) == ctx);
     g_assert(bdrv_get_aio_context(src) == ctx);
     g_assert(bdrv_get_aio_context(target) == ctx);
+    g_assert(bdrv_get_aio_context(filter) == ctx);
 
     job_cancel_sync_all();
 
     aio_context_acquire(ctx);
-    blk_set_aio_context(blk, main_ctx);
+    blk_set_aio_context(blk, main_ctx, &error_abort);
     bdrv_try_set_aio_context(target, main_ctx, &error_abort);
     aio_context_release(ctx);
 
@@ -658,6 +663,69 @@ static void test_propagate_mirror(void)
     bdrv_unref(target);
 }
 
+static void test_attach_second_node(void)
+{
+    IOThread *iothread = iothread_new();
+    AioContext *ctx = iothread_get_aio_context(iothread);
+    AioContext *main_ctx = qemu_get_aio_context();
+    BlockBackend *blk;
+    BlockDriverState *bs, *filter;
+    QDict *options;
+
+    blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
+    bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
+    blk_insert_bs(blk, bs, &error_abort);
+
+    options = qdict_new();
+    qdict_put_str(options, "driver", "raw");
+    qdict_put_str(options, "file", "base");
+
+    filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
+    g_assert(blk_get_aio_context(blk) == ctx);
+    g_assert(bdrv_get_aio_context(bs) == ctx);
+    g_assert(bdrv_get_aio_context(filter) == ctx);
+
+    blk_set_aio_context(blk, main_ctx, &error_abort);
+    g_assert(blk_get_aio_context(blk) == main_ctx);
+    g_assert(bdrv_get_aio_context(bs) == main_ctx);
+    g_assert(bdrv_get_aio_context(filter) == main_ctx);
+
+    bdrv_unref(filter);
+    bdrv_unref(bs);
+    blk_unref(blk);
+}
+
+static void test_attach_preserve_blk_ctx(void)
+{
+    IOThread *iothread = iothread_new();
+    AioContext *ctx = iothread_get_aio_context(iothread);
+    BlockBackend *blk;
+    BlockDriverState *bs;
+
+    blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
+    bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
+    bs->total_sectors = 65536 / BDRV_SECTOR_SIZE;
+
+    /* Add node to BlockBackend that has an iothread context assigned */
+    blk_insert_bs(blk, bs, &error_abort);
+    g_assert(blk_get_aio_context(blk) == ctx);
+    g_assert(bdrv_get_aio_context(bs) == ctx);
+
+    /* Remove the node again */
+    blk_remove_bs(blk);
+    g_assert(blk_get_aio_context(blk) == ctx);
+    g_assert(bdrv_get_aio_context(bs) == qemu_get_aio_context());
+
+    /* Re-attach the node */
+    blk_insert_bs(blk, bs, &error_abort);
+    g_assert(blk_get_aio_context(blk) == ctx);
+    g_assert(bdrv_get_aio_context(bs) == ctx);
+
+    blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
+    bdrv_unref(bs);
+    blk_unref(blk);
+}
+
 int main(int argc, char **argv)
 {
     int i;
@@ -673,6 +741,8 @@ int main(int argc, char **argv)
     }
 
     g_test_add_func("/attach/blockjob", test_attach_blockjob);
+    g_test_add_func("/attach/second_node", test_attach_second_node);
+    g_test_add_func("/attach/preserve_blk_ctx", test_attach_preserve_blk_ctx);
     g_test_add_func("/propagate/basic", test_propagate_basic);
     g_test_add_func("/propagate/diamond", test_propagate_diamond);
     g_test_add_func("/propagate/mirror", test_propagate_mirror);
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index 652d1e8359..8c91980c70 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -68,7 +68,7 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
 static BlockBackend *create_blk(const char *name)
 {
     /* No I/O is performed on this device */
-    BlockBackend *blk = blk_new(0, BLK_PERM_ALL);
+    BlockBackend *blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
     BlockDriverState *bs;
 
     bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort);
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
index 9cdccb3a47..0b675923f6 100644
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@@ -27,9 +27,10 @@ static int worker_cb(void *opaque)
 static int long_cb(void *opaque)
 {
     WorkerTestData *data = opaque;
-    atomic_inc(&data->n);
-    g_usleep(2000000);
-    atomic_inc(&data->n);
+    if (atomic_cmpxchg(&data->n, 0, 1) == 0) {
+        g_usleep(2000000);
+        atomic_or(&data->n, 2);
+    }
     return 0;
 }
 
@@ -171,7 +172,7 @@ static void do_test_cancel(bool sync)
     /* Cancel the jobs that haven't been started yet.  */
     num_canceled = 0;
     for (i = 0; i < 100; i++) {
-        if (atomic_cmpxchg(&data[i].n, 0, 3) == 0) {
+        if (atomic_cmpxchg(&data[i].n, 0, 4) == 0) {
             data[i].ret = -ECANCELED;
             if (sync) {
                 bdrv_aio_cancel(data[i].aiocb);
@@ -185,7 +186,7 @@ static void do_test_cancel(bool sync)
     g_assert_cmpint(num_canceled, <, 100);
 
     for (i = 0; i < 100; i++) {
-        if (data[i].aiocb && data[i].n != 3) {
+        if (data[i].aiocb && atomic_read(&data[i].n) < 4) {
             if (sync) {
                 /* Canceling the others will be a blocking operation.  */
                 bdrv_aio_cancel(data[i].aiocb);
@@ -201,13 +202,22 @@ static void do_test_cancel(bool sync)
     }
     g_assert_cmpint(active, ==, 0);
     for (i = 0; i < 100; i++) {
-        if (data[i].n == 3) {
+        g_assert(data[i].aiocb == NULL);
+        switch (data[i].n) {
+        case 0:
+            fprintf(stderr, "Callback not canceled but never started?\n");
+            abort();
+        case 3:
+            /* Couldn't be canceled asynchronously, must have completed.  */
+            g_assert_cmpint(data[i].ret, ==, 0);
+            break;
+        case 4:
+            /* Could be canceled asynchronously, never started.  */
             g_assert_cmpint(data[i].ret, ==, -ECANCELED);
-            g_assert(data[i].aiocb == NULL);
-        } else {
-            g_assert_cmpint(data[i].n, ==, 2);
-            g_assert(data[i].ret == 0 || data[i].ret == -ECANCELED);
-            g_assert(data[i].aiocb == NULL);
+            break;
+        default:
+            fprintf(stderr, "Callback aborted while running?\n");
+            abort();
         }
     }
 }
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 948a42c991..5644cf95ca 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -675,9 +675,9 @@ static void test_groups(void)
     ThrottleGroupMember *tgm1, *tgm2, *tgm3;
 
     /* No actual I/O is performed on these devices */
-    blk1 = blk_new(0, BLK_PERM_ALL);
-    blk2 = blk_new(0, BLK_PERM_ALL);
-    blk3 = blk_new(0, BLK_PERM_ALL);
+    blk1 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
+    blk2 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
+    blk3 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
 
     blkp1 = blk_get_public(blk1);
     blkp2 = blk_get_public(blk2);
diff --git a/tests/tmp105-test.c b/tests/tmp105-test.c
index 34cae7a582..f599309a4a 100644
--- a/tests/tmp105-test.c
+++ b/tests/tmp105-test.c
@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 
 #include "libqtest.h"
+#include "libqos/qgraph.h"
 #include "libqos/i2c.h"
 #include "qapi/qmp/qdict.h"
 #include "hw/misc/tmp105_regs.h"
@@ -17,52 +18,6 @@
 #define TMP105_TEST_ID   "tmp105-test"
 #define TMP105_TEST_ADDR 0x49
 
-static I2CAdapter *i2c;
-
-static uint16_t tmp105_get8(I2CAdapter *i2c, uint8_t addr, uint8_t reg)
-{
-    uint8_t resp[1];
-    i2c_send(i2c, addr, &reg, 1);
-    i2c_recv(i2c, addr, resp, 1);
-    return resp[0];
-}
-
-static uint16_t tmp105_get16(I2CAdapter *i2c, uint8_t addr, uint8_t reg)
-{
-    uint8_t resp[2];
-    i2c_send(i2c, addr, &reg, 1);
-    i2c_recv(i2c, addr, resp, 2);
-    return (resp[0] << 8) | resp[1];
-}
-
-static void tmp105_set8(I2CAdapter *i2c, uint8_t addr, uint8_t reg,
-                        uint8_t value)
-{
-    uint8_t cmd[2];
-    uint8_t resp[1];
-
-    cmd[0] = reg;
-    cmd[1] = value;
-    i2c_send(i2c, addr, cmd, 2);
-    i2c_recv(i2c, addr, resp, 1);
-    g_assert_cmphex(resp[0], ==, cmd[1]);
-}
-
-static void tmp105_set16(I2CAdapter *i2c, uint8_t addr, uint8_t reg,
-                         uint16_t value)
-{
-    uint8_t cmd[3];
-    uint8_t resp[2];
-
-    cmd[0] = reg;
-    cmd[1] = value >> 8;
-    cmd[2] = value & 255;
-    i2c_send(i2c, addr, cmd, 3);
-    i2c_recv(i2c, addr, resp, 2);
-    g_assert_cmphex(resp[0], ==, cmd[1]);
-    g_assert_cmphex(resp[1], ==, cmd[2]);
-}
-
 static int qmp_tmp105_get_temperature(const char *id)
 {
     QDict *response;
@@ -87,21 +42,22 @@ static void qmp_tmp105_set_temperature(const char *id, int value)
 }
 
 #define TMP105_PRECISION (1000/16)
-static void send_and_receive(void)
+static void send_and_receive(void *obj, void *data, QGuestAllocator *alloc)
 {
     uint16_t value;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
 
     value = qmp_tmp105_get_temperature(TMP105_TEST_ID);
     g_assert_cmpuint(value, ==, 0);
 
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0);
 
     qmp_tmp105_set_temperature(TMP105_TEST_ID, 20000);
     value = qmp_tmp105_get_temperature(TMP105_TEST_ID);
     g_assert_cmpuint(value, ==, 20000);
 
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x1400);
 
     qmp_tmp105_set_temperature(TMP105_TEST_ID, 20938); /* 20 + 15/16 */
@@ -110,24 +66,27 @@ static void send_and_receive(void)
     g_assert_cmpuint(value, <, 20938 + TMP105_PRECISION/2);
 
     /* Set config */
-    tmp105_set8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG, 0x60);
-    value = tmp105_get8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG);
+    i2c_set8(i2cdev, TMP105_REG_CONFIG, 0x60);
+    value = i2c_get8(i2cdev, TMP105_REG_CONFIG);
     g_assert_cmphex(value, ==, 0x60);
 
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x14f0);
 
     /* Set precision to 9, 10, 11 bits.  */
-    tmp105_set8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG, 0x00);
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    i2c_set8(i2cdev, TMP105_REG_CONFIG, 0x00);
+    g_assert_cmphex(i2c_get8(i2cdev, TMP105_REG_CONFIG), ==, 0x00);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x1480);
 
-    tmp105_set8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG, 0x20);
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    i2c_set8(i2cdev, TMP105_REG_CONFIG, 0x20);
+    g_assert_cmphex(i2c_get8(i2cdev, TMP105_REG_CONFIG), ==, 0x20);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x14c0);
 
-    tmp105_set8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG, 0x40);
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    i2c_set8(i2cdev, TMP105_REG_CONFIG, 0x40);
+    g_assert_cmphex(i2c_get8(i2cdev, TMP105_REG_CONFIG), ==, 0x40);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x14e0);
 
     /* stored precision remains the same */
@@ -135,32 +94,27 @@ static void send_and_receive(void)
     g_assert_cmpuint(value, >=, 20938 - TMP105_PRECISION/2);
     g_assert_cmpuint(value, <, 20938 + TMP105_PRECISION/2);
 
-    tmp105_set8(i2c, TMP105_TEST_ADDR, TMP105_REG_CONFIG, 0x60);
-    value = tmp105_get16(i2c, TMP105_TEST_ADDR, TMP105_REG_TEMPERATURE);
+    i2c_set8(i2cdev, TMP105_REG_CONFIG, 0x60);
+    g_assert_cmphex(i2c_get8(i2cdev, TMP105_REG_CONFIG), ==, 0x60);
+    value = i2c_get16(i2cdev, TMP105_REG_TEMPERATURE);
     g_assert_cmphex(value, ==, 0x14f0);
 
-    tmp105_set16(i2c, TMP105_TEST_ADDR, TMP105_REG_T_LOW, 0x1234);
-    tmp105_set16(i2c, TMP105_TEST_ADDR, TMP105_REG_T_HIGH, 0x4231);
+    i2c_set16(i2cdev, TMP105_REG_T_LOW, 0x1234);
+    g_assert_cmphex(i2c_get16(i2cdev, TMP105_REG_T_LOW), ==, 0x1234);
+    i2c_set16(i2cdev, TMP105_REG_T_HIGH, 0x4231);
+    g_assert_cmphex(i2c_get16(i2cdev, TMP105_REG_T_HIGH), ==, 0x4231);
 }
 
-int main(int argc, char **argv)
+static void tmp105_register_nodes(void)
 {
-    QTestState *s = NULL;
-    int ret;
-
-    g_test_init(&argc, &argv, NULL);
+    QOSGraphEdgeOptions opts = {
+        .extra_device_opts = "id=" TMP105_TEST_ID ",address=0x49"
+    };
+    add_qi2c_address(&opts, &(QI2CAddress) { 0x49 });
 
-    s = qtest_start("-machine n800 "
-                    "-device tmp105,bus=i2c-bus.0,id=" TMP105_TEST_ID
-                    ",address=0x49");
-    i2c = omap_i2c_create(s, OMAP2_I2C_1_BASE);
+    qos_node_create_driver("tmp105", i2c_device_create);
+    qos_node_consumes("tmp105", "i2c-bus", &opts);
 
-    qtest_add_func("/tmp105/tx-rx", send_and_receive);
-
-    ret = g_test_run();
-
-    qtest_quit(s);
-    g_free(i2c);
-
-    return ret;
+    qos_add_test("tx-rx", "tmp105", send_and_receive, NULL);
 }
+libqos_init(tmp105_register_nodes);
diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index 162b31c88d..1e535cb178 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c
@@ -188,6 +188,53 @@ static void test_unaligned_write_same(void *obj, void *data,
     qvirtio_scsi_pci_free(vs);
 }
 
+static void test_iothread_attach_node(void *obj, void *data,
+                                      QGuestAllocator *t_alloc)
+{
+    QVirtioSCSIPCI *scsi_pci = obj;
+    QVirtioSCSI *scsi = &scsi_pci->scsi;
+    QVirtioSCSIQueues *vs;
+    char tmp_path[] = "/tmp/qtest.XXXXXX";
+    int fd;
+    int ret;
+
+    uint8_t buf[512] = { 0 };
+    const uint8_t write_cdb[VIRTIO_SCSI_CDB_SIZE] = {
+        /* WRITE(10) to LBA 0, transfer length 1 */
+        0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00
+    };
+
+    alloc = t_alloc;
+    vs = qvirtio_scsi_init(scsi->vdev);
+
+    /* Create a temporary qcow2 overlay*/
+    fd = mkstemp(tmp_path);
+    g_assert(fd >= 0);
+    close(fd);
+
+    if (!have_qemu_img()) {
+        g_test_message("QTEST_QEMU_IMG not set or qemu-img missing; "
+                       "skipping snapshot test");
+        goto fail;
+    }
+
+    mkqcow2(tmp_path, 64);
+
+    /* Attach the overlay to the null0 node */
+    qmp_assert_success("{'execute': 'blockdev-add', 'arguments': {"
+                       "   'driver': 'qcow2', 'node-name': 'overlay',"
+                       "   'backing': 'null0', 'file': {"
+                       "     'driver': 'file', 'filename': %s}}}", tmp_path);
+
+    /* Send a request to see if the AioContext is still right */
+    ret = virtio_scsi_do_command(vs, write_cdb, NULL, 0, buf, 512, NULL);
+    g_assert_cmphex(ret, ==, 0);
+
+fail:
+    qvirtio_scsi_pci_free(vs);
+    unlink(tmp_path);
+}
+
 static void *virtio_scsi_hotplug_setup(GString *cmd_line, void *arg)
 {
     g_string_append(cmd_line,
@@ -204,6 +251,15 @@ static void *virtio_scsi_setup(GString *cmd_line, void *arg)
     return arg;
 }
 
+static void *virtio_scsi_setup_iothread(GString *cmd_line, void *arg)
+{
+    g_string_append(cmd_line,
+                    " -object iothread,id=thread0"
+                    " -blockdev driver=null-co,node-name=null0"
+                    " -device scsi-hd,drive=null0");
+    return arg;
+}
+
 static void register_virtio_scsi_test(void)
 {
     QOSGraphTestOptions opts = { };
@@ -214,6 +270,13 @@ static void register_virtio_scsi_test(void)
     opts.before = virtio_scsi_setup;
     qos_add_test("unaligned-write-same", "virtio-scsi",
                  test_unaligned_write_same, &opts);
+
+    opts.before = virtio_scsi_setup_iothread;
+    opts.edge = (QOSGraphEdgeOptions) {
+        .extra_device_opts = "iothread=thread0",
+    };
+    qos_add_test("iothread-attach-node", "virtio-scsi-pci",
+                 test_iothread_attach_node, &opts);
 }
 
 libqos_init(register_virtio_scsi_test);
diff --git a/ui/spice-app.c b/ui/spice-app.c
index 925b27b708..30541b1022 100644
--- a/ui/spice-app.c
+++ b/ui/spice-app.c
@@ -157,9 +157,10 @@ static void spice_app_display_early_init(DisplayOptions *opts)
     qemu_opt_set(qopts, "addr", sock_path, &error_abort);
     qemu_opt_set(qopts, "image-compression", "off", &error_abort);
     qemu_opt_set(qopts, "streaming-video", "off", &error_abort);
+#ifdef CONFIG_OPENGL
     qemu_opt_set(qopts, "gl", opts->has_gl ? "on" : "off", &error_abort);
     display_opengl = opts->has_gl;
-
+#endif
     be->u.spiceport.data->fqdn = g_strdup("org.qemu.monitor.qmp.0");
     qemu_chardev_new("org.qemu.monitor.qmp", TYPE_CHARDEV_SPICEPORT,
                      be, NULL, &error_abort);
diff --git a/util/Makefile.objs b/util/Makefile.objs
index c27a923dbe..38178201ff 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -53,7 +53,7 @@ util-obj-y += systemd.o
 util-obj-y += iova-tree.o
 util-obj-$(CONFIG_INOTIFY1) += filemonitor-inotify.o
 util-obj-$(CONFIG_LINUX) += vfio-helpers.o
-util-obj-$(CONFIG_OPENGL) += drm.o
+util-obj-$(CONFIG_POSIX) += drm.o
 util-obj-y += guest-random.o
 
 stub-obj-y += filemonitor-stub.o
diff --git a/vl.c b/vl.c
index 2e69c9fef2..cd1fbc4cdc 100644
--- a/vl.c
+++ b/vl.c
@@ -239,6 +239,7 @@ static struct {
     { .driver = "qxl-vga",              .flag = &default_vga       },
     { .driver = "virtio-vga",           .flag = &default_vga       },
     { .driver = "ati-vga",              .flag = &default_vga       },
+    { .driver = "vhost-user-vga",       .flag = &default_vga       },
 };
 
 static QemuOptsList qemu_rtc_opts = {
@@ -2750,20 +2751,25 @@ static bool object_create_initial(const char *type, QemuOpts *opts)
         exit(0);
     }
 
-    if (g_str_equal(type, "rng-egd") ||
-        g_str_has_prefix(type, "pr-manager-")) {
+    /*
+     * Objects should not be made "delayed" without a reason.  If you
+     * add one, state the reason in a comment!
+     */
+
+    /* Reason: rng-egd property "chardev" */
+    if (g_str_equal(type, "rng-egd")) {
         return false;
     }
 
 #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
+    /* Reason: cryptodev-vhost-user property "chardev" */
     if (g_str_equal(type, "cryptodev-vhost-user")) {
         return false;
     }
 #endif
 
     /*
-     * return false for concrete netfilters since
-     * they depend on netdevs already existing
+     * Reason: filter-* property "netdev" etc.
      */
     if (g_str_equal(type, "filter-buffer") ||
         g_str_equal(type, "filter-dump") ||
@@ -3575,7 +3581,23 @@ int main(int argc, char **argv, char **envp)
                                                      optarg, true);
                 optarg = qemu_opt_get(accel_opts, "accel");
                 if (!optarg || is_help_option(optarg)) {
-                    printf("Possible accelerators: kvm, xen, hax, tcg\n");
+                    printf("Accelerators supported in QEMU binary:\n");
+                    GSList *el, *accel_list = object_class_get_list(TYPE_ACCEL,
+                                                                    false);
+                    for (el = accel_list; el; el = el->next) {
+                        gchar *typename = g_strdup(object_class_get_name(
+                                                   OBJECT_CLASS(el->data)));
+                        /* omit qtest which is used for tests only */
+                        if (g_strcmp0(typename, ACCEL_CLASS_NAME("qtest")) &&
+                            g_str_has_suffix(typename, ACCEL_CLASS_SUFFIX)) {
+                            gchar **optname = g_strsplit(typename,
+                                                         ACCEL_CLASS_SUFFIX, 0);
+                            printf("%s\n", optname[0]);
+                            g_free(optname);
+                        }
+                        g_free(typename);
+                    }
+                    g_slist_free(accel_list);
                     exit(0);
                 }
                 opts = qemu_opts_create(qemu_find_opts("machine"), NULL,