summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS24
-rw-r--r--Makefile5
-rw-r--r--Makefile.objs4
-rw-r--r--accel/tcg/user-exec.c75
-rwxr-xr-xconfigure12
-rw-r--r--contrib/elf2dmp/pdb.h2
-rw-r--r--contrib/elf2dmp/pe.h1
-rw-r--r--contrib/elf2dmp/qemu_elf.h1
-rw-r--r--contrib/rdmacm-mux/Makefile.objs4
-rw-r--r--contrib/rdmacm-mux/main.c798
-rw-r--r--contrib/rdmacm-mux/rdmacm-mux.h61
-rw-r--r--contrib/vhost-user-blk/vhost-user-blk.c1
-rw-r--r--contrib/vhost-user-scsi/vhost-user-scsi.c1
-rw-r--r--default-configs/ppc64-softmmu.mak2
-rw-r--r--disas.c10
-rw-r--r--disas/microblaze.c1
-rw-r--r--disas/ppc.c2
-rw-r--r--docs/pvrdma.txt126
-rw-r--r--hw/acpi/aml-build.c68
-rw-r--r--hw/acpi/pcihp.c47
-rw-r--r--hw/acpi/piix4.c40
-rw-r--r--hw/arm/virt-acpi-build.c40
-rw-r--r--hw/arm/virt.c2
-rw-r--r--hw/core/machine.c2
-rw-r--r--hw/core/qdev-properties.c176
-rw-r--r--hw/display/virtio-gpu-pci.c7
-rw-r--r--hw/display/virtio-vga.c7
-rw-r--r--hw/i386/acpi-build.c52
-rw-r--r--hw/i386/amd_iommu.c2
-rw-r--r--hw/i386/intel_iommu.c72
-rw-r--r--hw/i386/intel_iommu_internal.h3
-rw-r--r--hw/i386/pc.c4
-rw-r--r--hw/i386/pc_piix.c72
-rw-r--r--hw/i386/pc_q35.c4
-rw-r--r--hw/i386/trace-events6
-rw-r--r--hw/i386/x86-iommu.c18
-rw-r--r--hw/intc/Makefile.objs2
-rw-r--r--hw/intc/spapr_xive.c1486
-rw-r--r--hw/intc/xics_spapr.c3
-rw-r--r--hw/intc/xive.c1599
-rw-r--r--hw/net/vmxnet3.c116
-rw-r--r--hw/net/vmxnet3_defs.h133
-rw-r--r--hw/pci-bridge/gen_pcie_root_port.c4
-rw-r--r--hw/pci-bridge/pci_bridge_dev.c31
-rw-r--r--hw/pci-bridge/pcie_pci_bridge.c32
-rw-r--r--hw/pci-bridge/pcie_root_port.c14
-rw-r--r--hw/pci/pci.c4
-rw-r--r--hw/pci/pci_bridge.c2
-rw-r--r--hw/pci/pci_host.c26
-rw-r--r--hw/pci/pcie.c159
-rw-r--r--hw/pci/pcie_port.c5
-rw-r--r--hw/pci/shpc.c25
-rw-r--r--hw/ppc/e500.c18
-rw-r--r--hw/ppc/mac_newworld.c30
-rw-r--r--hw/ppc/ppc405_boards.c4
-rw-r--r--hw/ppc/ppc405_uc.c4
-rw-r--r--hw/ppc/ppc440_bamboo.c5
-rw-r--r--hw/ppc/sam460ex.c2
-rw-r--r--hw/ppc/spapr.c121
-rw-r--r--hw/ppc/spapr_cpu_core.c4
-rw-r--r--hw/ppc/spapr_iommu.c2
-rw-r--r--hw/ppc/spapr_irq.c194
-rw-r--r--hw/ppc/spapr_pci.c33
-rw-r--r--hw/ppc/spapr_rtas_ddw.c19
-rw-r--r--hw/ppc/spapr_vio.c2
-rw-r--r--hw/ppc/virtex_ml507.c2
-rw-r--r--hw/rdma/rdma_backend.c524
-rw-r--r--hw/rdma/rdma_backend.h28
-rw-r--r--hw/rdma/rdma_backend_defs.h19
-rw-r--r--hw/rdma/rdma_rm.c120
-rw-r--r--hw/rdma/rdma_rm.h17
-rw-r--r--hw/rdma/rdma_rm_defs.h21
-rw-r--r--hw/rdma/rdma_utils.c1
-rw-r--r--hw/rdma/rdma_utils.h26
-rw-r--r--hw/rdma/vmw/pvrdma.h10
-rw-r--r--hw/rdma/vmw/pvrdma_cmd.c273
-rw-r--r--hw/rdma/vmw/pvrdma_dev_ring.c29
-rw-r--r--hw/rdma/vmw/pvrdma_dev_ring.h1
-rw-r--r--hw/rdma/vmw/pvrdma_main.c70
-rw-r--r--hw/rdma/vmw/pvrdma_qp_ops.c62
-rw-r--r--hw/s390x/css.c32
-rw-r--r--hw/s390x/s390-pci-bus.c12
-rw-r--r--hw/smbios/smbios-stub.c2
-rw-r--r--hw/smbios/smbios.c3
-rw-r--r--hw/smbios/smbios_build.h4
-rw-r--r--hw/smbios/smbios_type_38-stub.c2
-rw-r--r--hw/smbios/smbios_type_38.c3
-rw-r--r--hw/vfio/ap.c2
-rw-r--r--hw/vfio/pci.c9
-rw-r--r--hw/virtio/virtio-crypto-pci.c7
-rw-r--r--hw/virtio/virtio-pci.c267
-rw-r--r--hw/virtio/virtio-pci.h78
-rw-r--r--include/elf.h55
-rw-r--r--include/exec/helper-head.h13
-rw-r--r--include/exec/helper-tcg.h21
-rw-r--r--include/exec/poison.h1
-rw-r--r--include/hw/acpi/acpi-defs.h19
-rw-r--r--include/hw/acpi/aml-build.h2
-rw-r--r--include/hw/acpi/pcihp.h5
-rw-r--r--include/hw/boards.h1
-rw-r--r--include/hw/compat.h10
-rw-r--r--include/hw/firmware/smbios.h (renamed from include/hw/smbios/smbios.h)0
-rw-r--r--include/hw/i386/intel_iommu.h1
-rw-r--r--include/hw/i386/pc.h5
-rw-r--r--include/hw/i386/x86-iommu.h4
-rw-r--r--include/hw/pci/pci.h13
-rw-r--r--include/hw/pci/pci_bridge.h6
-rw-r--r--include/hw/pci/pcie.h11
-rw-r--r--include/hw/pci/pcie_port.h4
-rw-r--r--include/hw/pci/pcie_regs.h23
-rw-r--r--include/hw/pci/shpc.h10
-rw-r--r--include/hw/ppc/openpic.h2
-rw-r--r--include/hw/ppc/spapr.h25
-rw-r--r--include/hw/ppc/spapr_irq.h12
-rw-r--r--include/hw/ppc/spapr_xive.h52
-rw-r--r--include/hw/ppc/xics.h4
-rw-r--r--include/hw/ppc/xive.h429
-rw-r--r--include/hw/ppc/xive_regs.h235
-rw-r--r--include/hw/qdev-properties.h8
-rw-r--r--include/hw/s390x/tod.h2
-rw-r--r--include/hw/smbios/ipmi.h15
-rw-r--r--include/qemu/vfio-helpers.h1
-rw-r--r--include/sysemu/sysemu.h1
-rw-r--r--include/sysemu/whpx.h1
-rw-r--r--linux-user/host/riscv32/hostdep.h11
-rw-r--r--linux-user/host/riscv64/hostdep.h34
-rw-r--r--linux-user/host/riscv64/safe-syscall.inc.S77
-rw-r--r--qapi/common.json42
-rw-r--r--qapi/qapi-schema.json1
-rw-r--r--qapi/rdma.json38
-rw-r--r--qemu-deprecated.texi2
-rw-r--r--target/i386/sev.c3
-rw-r--r--target/i386/whp-dispatch.h1
-rw-r--r--target/ppc/cpu.h18
-rw-r--r--target/ppc/translate.c60
-rw-r--r--target/ppc/translate/vmx-ops.inc.c2
-rw-r--r--target/ppc/translate_init.inc.c6
-rw-r--r--target/riscv/fpu_helper.c1
-rw-r--r--tcg/riscv/tcg-target.h177
-rw-r--r--tcg/riscv/tcg-target.inc.c1949
-rw-r--r--tcg/tcg-op.c2
-rw-r--r--tcg/tcg-op.h1
-rw-r--r--tcg/tcg-opc.h7
-rw-r--r--tcg/tcg.c620
-rw-r--r--tcg/tcg.h31
-rw-r--r--tests/acceptance/virtio_version.py176
-rw-r--r--tests/acpi-utils.c50
-rw-r--r--tests/acpi-utils.h5
-rw-r--r--tests/bios-tables-test.c24
-rw-r--r--tests/cpu-plug-test.c4
-rw-r--r--tests/fp/platform.h1
-rw-r--r--tests/tpm-util.h1
-rw-r--r--tests/vhost-user-bridge.c2
-rw-r--r--tests/vmgenid-test.c8
-rw-r--r--util/qemu-thread-common.h1
-rw-r--r--vl.c17
156 files changed, 10615 insertions, 1203 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 317eff6cec..5a56b6e848 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -266,7 +266,8 @@ S: Supported
 F: target/riscv/
 F: hw/riscv/
 F: include/hw/riscv/
-F: disas/riscv.c
+F: linux-user/host/riscv32/
+F: linux-user/host/riscv64/
 
 S390
 M: Richard Henderson <rth@twiddle.net>
@@ -1011,6 +1012,14 @@ F: tests/libqos/*spapr*
 F: tests/rtas*
 F: tests/libqos/rtas*
 
+XIVE
+M: David Gibson <david@gibson.dropbear.id.au>
+M: Cédric Le Goater <clg@kaod.org>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/*/*xive*
+F: include/hw/*/*xive*
+
 virtex_ml507
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 L: qemu-ppc@nongnu.org
@@ -1262,7 +1271,7 @@ M: Michael S. Tsirkin <mst@redhat.com>
 M: Igor Mammedov <imammedo@redhat.com>
 S: Supported
 F: include/hw/acpi/*
-F: include/hw/smbios/*
+F: include/hw/firmware/smbios.h
 F: hw/mem/*
 F: hw/acpi/*
 F: hw/smbios/*
@@ -2164,6 +2173,15 @@ S: Odd Fixes
 F: tcg/ppc/
 F: disas/ppc.c
 
+RISC-V
+M: Michael Clark <mjc@sifive.com>
+M: Palmer Dabbelt <palmer@sifive.com>
+M: Alistair Francis <Alistair.Francis@wdc.com>
+L: qemu-riscv@nongnu.org
+S: Maintained
+F: tcg/riscv/
+F: disas/riscv.c
+
 S390 target
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
@@ -2404,6 +2422,8 @@ S: Maintained
 F: hw/rdma/*
 F: hw/rdma/vmw/*
 F: docs/pvrdma.txt
+F: contrib/rdmacm-mux/*
+F: qapi/rdma.json
 
 Build and test automation
 -------------------------
diff --git a/Makefile b/Makefile
index c8b9efdad4..dd53965f77 100644
--- a/Makefile
+++ b/Makefile
@@ -67,7 +67,7 @@ CONFIG_ALL=y
 -include config-all-devices.mak
 -include config-all-disas.mak
 
-config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios
+config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios $(SRC_PATH)/VERSION
 	@echo $@ is out-of-date, running configure
 	@# TODO: The next lines include code which supports a smooth
 	@# transition from old configurations without config.status.
@@ -362,6 +362,7 @@ dummy := $(call unnest-vars,, \
                 elf2dmp-obj-y \
                 ivshmem-client-obj-y \
                 ivshmem-server-obj-y \
+                rdmacm-mux-obj-y \
                 libvhost-user-obj-y \
                 vhost-user-scsi-obj-y \
                 vhost-user-blk-obj-y \
@@ -579,6 +580,8 @@ vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) libvhost-user.a
 	$(call LINK, $^)
 vhost-user-blk$(EXESUF): $(vhost-user-blk-obj-y) libvhost-user.a
 	$(call LINK, $^)
+rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
+	$(call LINK, $^)
 
 module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
 	$(call quiet-command,$(PYTHON) $< $@ \
diff --git a/Makefile.objs b/Makefile.objs
index 56af0347d3..bc5b8a8442 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -1,5 +1,6 @@
 QAPI_MODULES = block-core block char common crypto introspect job migration
-QAPI_MODULES += misc net rocker run-state sockets tpm trace transaction ui
+QAPI_MODULES += misc net rdma rocker run-state sockets tpm trace transaction
+QAPI_MODULES += ui
 
 #######################################################################
 # Common libraries for tools and emulators
@@ -133,6 +134,7 @@ vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
 vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
 vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
 vhost-user-blk-obj-y = contrib/vhost-user-blk/
+rdmacm-mux-obj-y = contrib/rdmacm-mux/
 
 ######################################################################
 trace-events-subdirs =
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index cd75829cf2..941295ea49 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -571,6 +571,81 @@ int cpu_signal_handler(int host_signum, void *pinfo,
     return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
 }
 
+#elif defined(__riscv)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    ucontext_t *uc = puc;
+    greg_t pc = uc->uc_mcontext.__gregs[REG_PC];
+    uint32_t insn = *(uint32_t *)pc;
+    int is_write = 0;
+
+    /* Detect store by reading the instruction at the program
+       counter. Note: we currently only generate 32-bit
+       instructions so we thus only detect 32-bit stores */
+    switch (((insn >> 0) & 0b11)) {
+    case 3:
+        switch (((insn >> 2) & 0b11111)) {
+        case 8:
+            switch (((insn >> 12) & 0b111)) {
+            case 0: /* sb */
+            case 1: /* sh */
+            case 2: /* sw */
+            case 3: /* sd */
+            case 4: /* sq */
+                is_write = 1;
+                break;
+            default:
+                break;
+            }
+            break;
+        case 9:
+            switch (((insn >> 12) & 0b111)) {
+            case 2: /* fsw */
+            case 3: /* fsd */
+            case 4: /* fsq */
+                is_write = 1;
+                break;
+            default:
+                break;
+            }
+            break;
+        default:
+            break;
+        }
+    }
+
+    /* Check for compressed instructions */
+    switch (((insn >> 13) & 0b111)) {
+    case 7:
+        switch (insn & 0b11) {
+        case 0: /*c.sd */
+        case 2: /* c.sdsp */
+            is_write = 1;
+            break;
+        default:
+            break;
+        }
+        break;
+    case 6:
+        switch (insn & 0b11) {
+        case 0: /* c.sw */
+        case 3: /* c.swsp */
+            is_write = 1;
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+
+    return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
+}
+
 #else
 
 #error host CPU specific signal handler needed
diff --git a/configure b/configure
index 224d3071ac..79375affc1 100755
--- a/configure
+++ b/configure
@@ -710,6 +710,12 @@ elif check_define __s390__ ; then
   else
     cpu="s390"
   fi
+elif check_define __riscv ; then
+  if check_define _LP64 ; then
+    cpu="riscv64"
+  else
+    cpu="riscv32"
+  fi
 elif check_define __arm__ ; then
   cpu="arm"
 elif check_define __aarch64__ ; then
@@ -722,7 +728,7 @@ ARCH=
 # Normalise host CPU name and set ARCH.
 # Note that this case should only have supported host CPUs, not guests.
 case "$cpu" in
-  ppc|ppc64|s390|s390x|sparc64|x32)
+  ppc|ppc64|s390|s390x|sparc64|x32|riscv32|riscv64)
     cpu="$cpu"
     supported_cpu="yes"
     eval "cross_cc_${cpu}=\$host_cc"
@@ -6937,6 +6943,8 @@ elif test "$ARCH" = "x86_64" -o "$ARCH" = "x32" ; then
   QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/i386 $QEMU_INCLUDES"
 elif test "$ARCH" = "ppc64" ; then
   QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/ppc $QEMU_INCLUDES"
+elif test "$ARCH" = "riscv32" -o "$ARCH" = "riscv64" ; then
+  QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/riscv $QEMU_INCLUDES"
 else
   QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/\$(ARCH) $QEMU_INCLUDES"
 fi
@@ -7433,7 +7441,7 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
   ppc*)
     disas_config "PPC"
   ;;
-  riscv)
+  riscv*)
     disas_config "RISCV"
   ;;
   s390*)
diff --git a/contrib/elf2dmp/pdb.h b/contrib/elf2dmp/pdb.h
index 4351a2dd61..a3a3cac2c1 100644
--- a/contrib/elf2dmp/pdb.h
+++ b/contrib/elf2dmp/pdb.h
@@ -8,8 +8,6 @@
 #ifndef PDB_H
 #define PDB_H
 
-#include <stdint.h>
-#include <stdlib.h>
 
 typedef struct GUID {
     unsigned int Data1;
diff --git a/contrib/elf2dmp/pe.h b/contrib/elf2dmp/pe.h
index 374e06a9c5..dafb26afbb 100644
--- a/contrib/elf2dmp/pe.h
+++ b/contrib/elf2dmp/pe.h
@@ -8,7 +8,6 @@
 #ifndef PE_H
 #define PE_H
 
-#include <stdint.h>
 
 typedef struct IMAGE_DOS_HEADER {
     uint16_t  e_magic;      /* 0x00: MZ Header signature */
diff --git a/contrib/elf2dmp/qemu_elf.h b/contrib/elf2dmp/qemu_elf.h
index d85d6558fa..86e6e688fb 100644
--- a/contrib/elf2dmp/qemu_elf.h
+++ b/contrib/elf2dmp/qemu_elf.h
@@ -8,7 +8,6 @@
 #ifndef QEMU_ELF_H
 #define QEMU_ELF_H
 
-#include <stdint.h>
 #include <elf.h>
 
 typedef struct QEMUCPUSegment {
diff --git a/contrib/rdmacm-mux/Makefile.objs b/contrib/rdmacm-mux/Makefile.objs
new file mode 100644
index 0000000000..be3eacb6f7
--- /dev/null
+++ b/contrib/rdmacm-mux/Makefile.objs
@@ -0,0 +1,4 @@
+ifdef CONFIG_PVRDMA
+CFLAGS += -libumad -Wno-format-truncation
+rdmacm-mux-obj-y = main.o
+endif
diff --git a/contrib/rdmacm-mux/main.c b/contrib/rdmacm-mux/main.c
new file mode 100644
index 0000000000..835a7f9214
--- /dev/null
+++ b/contrib/rdmacm-mux/main.c
@@ -0,0 +1,798 @@
+/*
+ * QEMU paravirtual RDMA - rdmacm-mux implementation
+ *
+ * Copyright (C) 2018 Oracle
+ * Copyright (C) 2018 Red Hat Inc
+ *
+ * Authors:
+ *     Yuval Shaia <yuval.shaia@oracle.com>
+ *     Marcel Apfelbaum <marcel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "sys/poll.h"
+#include "sys/ioctl.h"
+#include "pthread.h"
+#include "syslog.h"
+
+#include "infiniband/verbs.h"
+#include "infiniband/umad.h"
+#include "infiniband/umad_types.h"
+#include "infiniband/umad_sa.h"
+#include "infiniband/umad_cm.h"
+
+#include "rdmacm-mux.h"
+
+#define SCALE_US 1000
+#define COMMID_TTL 2 /* How many SCALE_US a context of MAD session is saved */
+#define SLEEP_SECS 5 /* This is used both in poll() and thread */
+#define SERVER_LISTEN_BACKLOG 10
+#define MAX_CLIENTS 4096
+#define MAD_RMPP_VERSION 0
+#define MAD_METHOD_MASK0 0x8
+
+#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof(long)))
+
+#define CM_REQ_DGID_POS      80
+#define CM_SIDR_REQ_DGID_POS 44
+
+/* The below can be override by command line parameter */
+#define UNIX_SOCKET_PATH "/var/run/rdmacm-mux"
+#define RDMA_PORT_NUM 1
+
+typedef struct RdmaCmServerArgs {
+    char unix_socket_path[PATH_MAX];
+    char rdma_dev_name[NAME_MAX];
+    int rdma_port_num;
+} RdmaCMServerArgs;
+
+typedef struct CommId2FdEntry {
+    int fd;
+    int ttl; /* Initialized to 2, decrement each timeout, entry delete when 0 */
+    __be64 gid_ifid;
+} CommId2FdEntry;
+
+typedef struct RdmaCmUMadAgent {
+    int port_id;
+    int agent_id;
+    GHashTable *gid2fd; /* Used to find fd of a given gid */
+    GHashTable *commid2fd; /* Used to find fd on of a given comm_id */
+} RdmaCmUMadAgent;
+
+typedef struct RdmaCmServer {
+    bool run;
+    RdmaCMServerArgs args;
+    struct pollfd fds[MAX_CLIENTS];
+    int nfds;
+    RdmaCmUMadAgent umad_agent;
+    pthread_t umad_recv_thread;
+    pthread_rwlock_t lock;
+} RdmaCMServer;
+
+static RdmaCMServer server = {0};
+
+static void usage(const char *progname)
+{
+    printf("Usage: %s [OPTION]...\n"
+           "Start a RDMA-CM multiplexer\n"
+           "\n"
+           "\t-h                    Show this help\n"
+           "\t-d rdma-device-name   Name of RDMA device to register with\n"
+           "\t-s unix-socket-path   Path to unix socket to listen on (default %s)\n"
+           "\t-p rdma-device-port   Port number of RDMA device to register with (default %d)\n",
+           progname, UNIX_SOCKET_PATH, RDMA_PORT_NUM);
+}
+
+static void help(const char *progname)
+{
+    fprintf(stderr, "Try '%s -h' for more information.\n", progname);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+    int c;
+    char unix_socket_path[PATH_MAX];
+
+    strcpy(server.args.rdma_dev_name, "");
+    strcpy(unix_socket_path, UNIX_SOCKET_PATH);
+    server.args.rdma_port_num = RDMA_PORT_NUM;
+
+    while ((c = getopt(argc, argv, "hs:d:p:")) != -1) {
+        switch (c) {
+        case 'h':
+            usage(argv[0]);
+            exit(0);
+
+        case 'd':
+            strncpy(server.args.rdma_dev_name, optarg, NAME_MAX - 1);
+            break;
+
+        case 's':
+            /* This is temporary, final name will build below */
+            strncpy(unix_socket_path, optarg, PATH_MAX);
+            break;
+
+        case 'p':
+            server.args.rdma_port_num = atoi(optarg);
+            break;
+
+        default:
+            help(argv[0]);
+            exit(1);
+        }
+    }
+
+    if (!strcmp(server.args.rdma_dev_name, "")) {
+        fprintf(stderr, "Missing RDMA device name\n");
+        help(argv[0]);
+        exit(1);
+    }
+
+    /* Build unique unix-socket file name */
+    snprintf(server.args.unix_socket_path, PATH_MAX, "%s-%s-%d",
+             unix_socket_path, server.args.rdma_dev_name,
+             server.args.rdma_port_num);
+
+    syslog(LOG_INFO, "unix_socket_path=%s", server.args.unix_socket_path);
+    syslog(LOG_INFO, "rdma-device-name=%s", server.args.rdma_dev_name);
+    syslog(LOG_INFO, "rdma-device-port=%d", server.args.rdma_port_num);
+}
+
+static void hash_tbl_alloc(void)
+{
+
+    server.umad_agent.gid2fd = g_hash_table_new_full(g_int64_hash,
+                                                     g_int64_equal,
+                                                     g_free, g_free);
+    server.umad_agent.commid2fd = g_hash_table_new_full(g_int_hash,
+                                                        g_int_equal,
+                                                        g_free, g_free);
+}
+
+static void hash_tbl_free(void)
+{
+    if (server.umad_agent.commid2fd) {
+        g_hash_table_destroy(server.umad_agent.commid2fd);
+    }
+    if (server.umad_agent.gid2fd) {
+        g_hash_table_destroy(server.umad_agent.gid2fd);
+    }
+}
+
+
+static int _hash_tbl_search_fd_by_ifid(__be64 *gid_ifid)
+{
+    int *fd;
+
+    fd = g_hash_table_lookup(server.umad_agent.gid2fd, gid_ifid);
+    if (!fd) {
+        /* Let's try IPv4 */
+        *gid_ifid |= 0x00000000ffff0000;
+        fd = g_hash_table_lookup(server.umad_agent.gid2fd, gid_ifid);
+    }
+
+    return fd ? *fd : 0;
+}
+
+static int hash_tbl_search_fd_by_ifid(int *fd, __be64 *gid_ifid)
+{
+    pthread_rwlock_rdlock(&server.lock);
+    *fd = _hash_tbl_search_fd_by_ifid(gid_ifid);
+    pthread_rwlock_unlock(&server.lock);
+
+    if (!fd) {
+        syslog(LOG_WARNING, "Can't find matching for ifid 0x%llx\n", *gid_ifid);
+        return -ENOENT;
+    }
+
+    return 0;
+}
+
+static int hash_tbl_search_fd_by_comm_id(uint32_t comm_id, int *fd,
+                                         __be64 *gid_idid)
+{
+    CommId2FdEntry *fde;
+
+    pthread_rwlock_rdlock(&server.lock);
+    fde = g_hash_table_lookup(server.umad_agent.commid2fd, &comm_id);
+    pthread_rwlock_unlock(&server.lock);
+
+    if (!fde) {
+        syslog(LOG_WARNING, "Can't find matching for comm_id 0x%x\n", comm_id);
+        return -ENOENT;
+    }
+
+    *fd = fde->fd;
+    *gid_idid = fde->gid_ifid;
+
+    return 0;
+}
+
+static RdmaCmMuxErrCode add_fd_ifid_pair(int fd, __be64 gid_ifid)
+{
+    int fd1;
+
+    pthread_rwlock_wrlock(&server.lock);
+
+    fd1 = _hash_tbl_search_fd_by_ifid(&gid_ifid);
+    if (fd1) { /* record already exist - an error */
+        pthread_rwlock_unlock(&server.lock);
+        return fd == fd1 ? RDMACM_MUX_ERR_CODE_EEXIST :
+                           RDMACM_MUX_ERR_CODE_EACCES;
+    }
+
+    g_hash_table_insert(server.umad_agent.gid2fd, g_memdup(&gid_ifid,
+                        sizeof(gid_ifid)), g_memdup(&fd, sizeof(fd)));
+
+    pthread_rwlock_unlock(&server.lock);
+
+    syslog(LOG_INFO, "0x%lx registered on socket %d",
+           be64toh((uint64_t)gid_ifid), fd);
+
+    return RDMACM_MUX_ERR_CODE_OK;
+}
+
+static RdmaCmMuxErrCode delete_fd_ifid_pair(int fd, __be64 gid_ifid)
+{
+    int fd1;
+
+    pthread_rwlock_wrlock(&server.lock);
+
+    fd1 = _hash_tbl_search_fd_by_ifid(&gid_ifid);
+    if (!fd1) { /* record not exist - an error */
+        pthread_rwlock_unlock(&server.lock);
+        return RDMACM_MUX_ERR_CODE_ENOTFOUND;
+    }
+
+    g_hash_table_remove(server.umad_agent.gid2fd, g_memdup(&gid_ifid,
+                        sizeof(gid_ifid)));
+    pthread_rwlock_unlock(&server.lock);
+
+    syslog(LOG_INFO, "0x%lx unregistered on socket %d",
+           be64toh((uint64_t)gid_ifid), fd);
+
+    return RDMACM_MUX_ERR_CODE_OK;
+}
+
+static void hash_tbl_save_fd_comm_id_pair(int fd, uint32_t comm_id,
+                                          uint64_t gid_ifid)
+{
+    CommId2FdEntry fde = {fd, COMMID_TTL, gid_ifid};
+
+    pthread_rwlock_wrlock(&server.lock);
+    g_hash_table_insert(server.umad_agent.commid2fd,
+                        g_memdup(&comm_id, sizeof(comm_id)),
+                        g_memdup(&fde, sizeof(fde)));
+    pthread_rwlock_unlock(&server.lock);
+}
+
+static gboolean remove_old_comm_ids(gpointer key, gpointer value,
+                                    gpointer user_data)
+{
+    CommId2FdEntry *fde = (CommId2FdEntry *)value;
+
+    return !fde->ttl--;
+}
+
+static gboolean remove_entry_from_gid2fd(gpointer key, gpointer value,
+                                         gpointer user_data)
+{
+    if (*(int *)value == *(int *)user_data) {
+        syslog(LOG_INFO, "0x%lx unregistered on socket %d",
+               be64toh(*(uint64_t *)key), *(int *)value);
+        return true;
+    }
+
+    return false;
+}
+
+static void hash_tbl_remove_fd_ifid_pair(int fd)
+{
+    pthread_rwlock_wrlock(&server.lock);
+    g_hash_table_foreach_remove(server.umad_agent.gid2fd,
+                                remove_entry_from_gid2fd, (gpointer)&fd);
+    pthread_rwlock_unlock(&server.lock);
+}
+
+static int get_fd(const char *mad, int *fd, __be64 *gid_ifid)
+{
+    struct umad_hdr *hdr = (struct umad_hdr *)mad;
+    char *data = (char *)hdr + sizeof(*hdr);
+    int32_t comm_id = 0;
+    uint16_t attr_id = be16toh(hdr->attr_id);
+    int rc = 0;
+
+    switch (attr_id) {
+    case UMAD_CM_ATTR_REQ:
+        memcpy(gid_ifid, data + CM_REQ_DGID_POS, sizeof(*gid_ifid));
+        rc = hash_tbl_search_fd_by_ifid(fd, gid_ifid);
+        break;
+
+    case UMAD_CM_ATTR_SIDR_REQ:
+        memcpy(gid_ifid, data + CM_SIDR_REQ_DGID_POS, sizeof(*gid_ifid));
+        rc = hash_tbl_search_fd_by_ifid(fd, gid_ifid);
+        break;
+
+    case UMAD_CM_ATTR_REP:
+        /* Fall through */
+    case UMAD_CM_ATTR_REJ:
+        /* Fall through */
+    case UMAD_CM_ATTR_DREQ:
+        /* Fall through */
+    case UMAD_CM_ATTR_DREP:
+        /* Fall through */
+    case UMAD_CM_ATTR_RTU:
+        data += sizeof(comm_id);
+        /* Fall through */
+    case UMAD_CM_ATTR_SIDR_REP:
+        memcpy(&comm_id, data, sizeof(comm_id));
+        if (comm_id) {
+            rc = hash_tbl_search_fd_by_comm_id(comm_id, fd, gid_ifid);
+        }
+        break;
+
+    default:
+        rc = -EINVAL;
+        syslog(LOG_WARNING, "Unsupported attr_id 0x%x\n", attr_id);
+    }
+
+    syslog(LOG_DEBUG, "mad_to_vm: %d 0x%x 0x%x\n", *fd, attr_id, comm_id);
+
+    return rc;
+}
+
+static void *umad_recv_thread_func(void *args)
+{
+    int rc;
+    RdmaCmMuxMsg msg = {0};
+    int fd = -2;
+
+    msg.hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ;
+    msg.hdr.op_code = RDMACM_MUX_OP_CODE_MAD;
+
+    while (server.run) {
+        do {
+            msg.umad_len = sizeof(msg.umad.mad);
+            rc = umad_recv(server.umad_agent.port_id, &msg.umad, &msg.umad_len,
+                           SLEEP_SECS * SCALE_US);
+            if ((rc == -EIO) || (rc == -EINVAL)) {
+                syslog(LOG_CRIT, "Fatal error while trying to read MAD");
+            }
+
+            if (rc == -ETIMEDOUT) {
+                g_hash_table_foreach_remove(server.umad_agent.commid2fd,
+                                            remove_old_comm_ids, NULL);
+            }
+        } while (rc && server.run);
+
+        if (server.run) {
+            rc = get_fd(msg.umad.mad, &fd, &msg.hdr.sgid.global.interface_id);
+            if (rc) {
+                continue;
+            }
+
+            send(fd, &msg, sizeof(msg), 0);
+        }
+    }
+
+    return NULL;
+}
+
+static int read_and_process(int fd)
+{
+    int rc;
+    RdmaCmMuxMsg msg = {0};
+    struct umad_hdr *hdr;
+    uint32_t *comm_id = 0;
+    uint16_t attr_id;
+
+    rc = recv(fd, &msg, sizeof(msg), 0);
+    syslog(LOG_DEBUG, "Socket %d, recv %d\n", fd, rc);
+
+    if (rc < 0 && errno != EWOULDBLOCK) {
+        syslog(LOG_ERR, "Fail to read from socket %d\n", fd);
+        return -EIO;
+    }
+
+    if (!rc) {
+        syslog(LOG_ERR, "Fail to read from socket %d\n", fd);
+        return -EPIPE;
+    }
+
+    if (msg.hdr.msg_type != RDMACM_MUX_MSG_TYPE_REQ) {
+        syslog(LOG_WARNING, "Got non-request message (%d) from socket %d\n",
+               msg.hdr.msg_type, fd);
+        return -EPERM;
+    }
+
+    switch (msg.hdr.op_code) {
+    case RDMACM_MUX_OP_CODE_REG:
+        rc = add_fd_ifid_pair(fd, msg.hdr.sgid.global.interface_id);
+        break;
+
+    case RDMACM_MUX_OP_CODE_UNREG:
+        rc = delete_fd_ifid_pair(fd, msg.hdr.sgid.global.interface_id);
+        break;
+
+    case RDMACM_MUX_OP_CODE_MAD:
+        /* If this is REQ or REP then store the pair comm_id,fd to be later
+         * used for other messages where gid is unknown */
+        hdr = (struct umad_hdr *)msg.umad.mad;
+        attr_id = be16toh(hdr->attr_id);
+        if ((attr_id == UMAD_CM_ATTR_REQ) || (attr_id == UMAD_CM_ATTR_DREQ) ||
+            (attr_id == UMAD_CM_ATTR_SIDR_REQ) ||
+            (attr_id == UMAD_CM_ATTR_REP) || (attr_id == UMAD_CM_ATTR_DREP)) {
+            comm_id = (uint32_t *)(msg.umad.mad + sizeof(*hdr));
+            hash_tbl_save_fd_comm_id_pair(fd, *comm_id,
+                                          msg.hdr.sgid.global.interface_id);
+        }
+
+        syslog(LOG_DEBUG, "vm_to_mad: %d 0x%x 0x%x\n", fd, attr_id,
+               comm_id ? *comm_id : 0);
+        rc = umad_send(server.umad_agent.port_id, server.umad_agent.agent_id,
+                       &msg.umad, msg.umad_len, 1, 0);
+        if (rc) {
+            syslog(LOG_ERR,
+                  "Fail to send MAD message (0x%x) from socket %d, err=%d",
+                  attr_id, fd, rc);
+        }
+        break;
+
+    default:
+        syslog(LOG_ERR, "Got invalid op_code (%d) from socket %d",
+               msg.hdr.msg_type, fd);
+        rc = RDMACM_MUX_ERR_CODE_EINVAL;
+    }
+
+    msg.hdr.msg_type = RDMACM_MUX_MSG_TYPE_RESP;
+    msg.hdr.err_code = rc;
+    rc = send(fd, &msg, sizeof(msg), 0);
+
+    return rc == sizeof(msg) ? 0 : -EPIPE;
+}
+
+static int accept_all(void)
+{
+    int fd, rc = 0;;
+
+    pthread_rwlock_wrlock(&server.lock);
+
+    do {
+        if ((server.nfds + 1) > MAX_CLIENTS) {
+            syslog(LOG_WARNING, "Too many clients (%d)", server.nfds);
+            rc = -EIO;
+            goto out;
+        }
+
+        fd = accept(server.fds[0].fd, NULL, NULL);
+        if (fd < 0) {
+            if (errno != EWOULDBLOCK) {
+                syslog(LOG_WARNING, "accept() failed");
+                rc = -EIO;
+                goto out;
+            }
+            break;
+        }
+
+        syslog(LOG_INFO, "Client connected on socket %d\n", fd);
+        server.fds[server.nfds].fd = fd;
+        server.fds[server.nfds].events = POLLIN;
+        server.nfds++;
+    } while (fd != -1);
+
+out:
+    pthread_rwlock_unlock(&server.lock);
+    return rc;
+}
+
+static void compress_fds(void)
+{
+    int i, j;
+    int closed = 0;
+
+    pthread_rwlock_wrlock(&server.lock);
+
+    for (i = 1; i < server.nfds; i++) {
+        if (!server.fds[i].fd) {
+            closed++;
+            for (j = i; j < server.nfds - 1; j++) {
+                server.fds[j] = server.fds[j + 1];
+            }
+        }
+    }
+
+    server.nfds -= closed;
+
+    pthread_rwlock_unlock(&server.lock);
+}
+
+static void close_fd(int idx)
+{
+    close(server.fds[idx].fd);
+    syslog(LOG_INFO, "Socket %d closed\n", server.fds[idx].fd);
+    hash_tbl_remove_fd_ifid_pair(server.fds[idx].fd);
+    server.fds[idx].fd = 0;
+}
+
+static void run(void)
+{
+    int rc, nfds, i;
+    bool compress = false;
+
+    syslog(LOG_INFO, "Service started");
+
+    while (server.run) {
+        rc = poll(server.fds, server.nfds, SLEEP_SECS * SCALE_US);
+        if (rc < 0) {
+            if (errno != EINTR) {
+                syslog(LOG_WARNING, "poll() failed");
+            }
+            continue;
+        }
+
+        if (rc == 0) {
+            continue;
+        }
+
+        nfds = server.nfds;
+        for (i = 0; i < nfds; i++) {
+            syslog(LOG_DEBUG, "pollfd[%d]: revents 0x%x, events 0x%x\n", i,
+                   server.fds[i].revents, server.fds[i].events);
+            if (server.fds[i].revents == 0) {
+                continue;
+            }
+
+            if (server.fds[i].revents != POLLIN) {
+                if (i == 0) {
+                    syslog(LOG_NOTICE, "Unexpected poll() event (0x%x)\n",
+                           server.fds[i].revents);
+                } else {
+                    close_fd(i);
+                    compress = true;
+                }
+                continue;
+            }
+
+            if (i == 0) {
+                rc = accept_all();
+                if (rc) {
+                    continue;
+                }
+            } else {
+                rc = read_and_process(server.fds[i].fd);
+                if (rc) {
+                    close_fd(i);
+                    compress = true;
+                }
+            }
+        }
+
+        if (compress) {
+            compress = false;
+            compress_fds();
+        }
+    }
+}
+
+static void fini_listener(void)
+{
+    int i;
+
+    if (server.fds[0].fd <= 0) {
+        return;
+    }
+
+    for (i = server.nfds - 1; i >= 0; i--) {
+        if (server.fds[i].fd) {
+            close(server.fds[i].fd);
+        }
+    }
+
+    unlink(server.args.unix_socket_path);
+}
+
+static void fini_umad(void)
+{
+    if (server.umad_agent.agent_id) {
+        umad_unregister(server.umad_agent.port_id, server.umad_agent.agent_id);
+    }
+
+    if (server.umad_agent.port_id) {
+        umad_close_port(server.umad_agent.port_id);
+    }
+
+    hash_tbl_free();
+}
+
+static void fini(void)
+{
+    if (server.umad_recv_thread) {
+        pthread_join(server.umad_recv_thread, NULL);
+        server.umad_recv_thread = 0;
+    }
+    fini_umad();
+    fini_listener();
+    pthread_rwlock_destroy(&server.lock);
+
+    syslog(LOG_INFO, "Service going down");
+}
+
+static int init_listener(void)
+{
+    struct sockaddr_un sun;
+    int rc, on = 1;
+
+    server.fds[0].fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (server.fds[0].fd < 0) {
+        syslog(LOG_ALERT, "socket() failed");
+        return -EIO;
+    }
+
+    rc = setsockopt(server.fds[0].fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
+                    sizeof(on));
+    if (rc < 0) {
+        syslog(LOG_ALERT, "setsockopt() failed");
+        rc = -EIO;
+        goto err;
+    }
+
+    rc = ioctl(server.fds[0].fd, FIONBIO, (char *)&on);
+    if (rc < 0) {
+        syslog(LOG_ALERT, "ioctl() failed");
+        rc = -EIO;
+        goto err;
+    }
+
+    if (strlen(server.args.unix_socket_path) >= sizeof(sun.sun_path)) {
+        syslog(LOG_ALERT,
+               "Invalid unix_socket_path, size must be less than %ld\n",
+               sizeof(sun.sun_path));
+        rc = -EINVAL;
+        goto err;
+    }
+
+    sun.sun_family = AF_UNIX;
+    rc = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
+                  server.args.unix_socket_path);
+    if (rc < 0 || rc >= sizeof(sun.sun_path)) {
+        syslog(LOG_ALERT, "Could not copy unix socket path\n");
+        rc = -EINVAL;
+        goto err;
+    }
+
+    rc = bind(server.fds[0].fd, (struct sockaddr *)&sun, sizeof(sun));
+    if (rc < 0) {
+        syslog(LOG_ALERT, "bind() failed");
+        rc = -EIO;
+        goto err;
+    }
+
+    rc = listen(server.fds[0].fd, SERVER_LISTEN_BACKLOG);
+    if (rc < 0) {
+        syslog(LOG_ALERT, "listen() failed");
+        rc = -EIO;
+        goto err;
+    }
+
+    server.fds[0].events = POLLIN;
+    server.nfds = 1;
+    server.run = true;
+
+    return 0;
+
+err:
+    close(server.fds[0].fd);
+    return rc;
+}
+
+static int init_umad(void)
+{
+    long method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK];
+
+    server.umad_agent.port_id = umad_open_port(server.args.rdma_dev_name,
+                                               server.args.rdma_port_num);
+
+    if (server.umad_agent.port_id < 0) {
+        syslog(LOG_WARNING, "umad_open_port() failed");
+        return -EIO;
+    }
+
+    memset(&method_mask, 0, sizeof(method_mask));
+    method_mask[0] = MAD_METHOD_MASK0;
+    server.umad_agent.agent_id = umad_register(server.umad_agent.port_id,
+                                               UMAD_CLASS_CM,
+                                               UMAD_SA_CLASS_VERSION,
+                                               MAD_RMPP_VERSION, method_mask);
+    if (server.umad_agent.agent_id < 0) {
+        syslog(LOG_WARNING, "umad_register() failed");
+        return -EIO;
+    }
+
+    hash_tbl_alloc();
+
+    return 0;
+}
+
+static void signal_handler(int sig, siginfo_t *siginfo, void *context)
+{
+    static bool warned;
+
+    /* Prevent stop if clients are connected */
+    if (server.nfds != 1) {
+        if (!warned) {
+            syslog(LOG_WARNING,
+                   "Can't stop while active client exist, resend SIGINT to overid");
+            warned = true;
+            return;
+        }
+    }
+
+    if (sig == SIGINT) {
+        server.run = false;
+        fini();
+    }
+
+    exit(0);
+}
+
+static int init(void)
+{
+    int rc;
+    struct sigaction sig = {0};
+
+    rc = init_listener();
+    if (rc) {
+        return rc;
+    }
+
+    rc = init_umad();
+    if (rc) {
+        return rc;
+    }
+
+    pthread_rwlock_init(&server.lock, 0);
+
+    rc = pthread_create(&server.umad_recv_thread, NULL, umad_recv_thread_func,
+                        NULL);
+    if (rc) {
+        syslog(LOG_ERR, "Fail to create UMAD receiver thread (%d)\n", rc);
+        return rc;
+    }
+
+    sig.sa_sigaction = &signal_handler;
+    sig.sa_flags = SA_SIGINFO;
+    rc = sigaction(SIGINT, &sig, NULL);
+    if (rc < 0) {
+        syslog(LOG_ERR, "Fail to install SIGINT handler (%d)\n", errno);
+        return rc;
+    }
+
+    return 0;
+}
+
+int main(int argc, char *argv[])
+{
+    int rc;
+
+    memset(&server, 0, sizeof(server));
+
+    parse_args(argc, argv);
+
+    rc = init();
+    if (rc) {
+        syslog(LOG_ERR, "Fail to initialize server (%d)\n", rc);
+        rc = -EAGAIN;
+        goto out;
+    }
+
+    run();
+
+out:
+    fini();
+
+    return rc;
+}
diff --git a/contrib/rdmacm-mux/rdmacm-mux.h b/contrib/rdmacm-mux/rdmacm-mux.h
new file mode 100644
index 0000000000..942a802c47
--- /dev/null
+++ b/contrib/rdmacm-mux/rdmacm-mux.h
@@ -0,0 +1,61 @@
+/*
+ * QEMU paravirtual RDMA - rdmacm-mux declarations
+ *
+ * Copyright (C) 2018 Oracle
+ * Copyright (C) 2018 Red Hat Inc
+ *
+ * Authors:
+ *     Yuval Shaia <yuval.shaia@oracle.com>
+ *     Marcel Apfelbaum <marcel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef RDMACM_MUX_H
+#define RDMACM_MUX_H
+
+#include "linux/if.h"
+#include "infiniband/verbs.h"
+#include "infiniband/umad.h"
+#include "rdma/rdma_user_cm.h"
+
+typedef enum RdmaCmMuxMsgType {
+    RDMACM_MUX_MSG_TYPE_REQ   = 0,
+    RDMACM_MUX_MSG_TYPE_RESP  = 1,
+} RdmaCmMuxMsgType;
+
+typedef enum RdmaCmMuxOpCode {
+    RDMACM_MUX_OP_CODE_REG   = 0,
+    RDMACM_MUX_OP_CODE_UNREG = 1,
+    RDMACM_MUX_OP_CODE_MAD   = 2,
+} RdmaCmMuxOpCode;
+
+typedef enum RdmaCmMuxErrCode {
+    RDMACM_MUX_ERR_CODE_OK        = 0,
+    RDMACM_MUX_ERR_CODE_EINVAL    = 1,
+    RDMACM_MUX_ERR_CODE_EEXIST    = 2,
+    RDMACM_MUX_ERR_CODE_EACCES    = 3,
+    RDMACM_MUX_ERR_CODE_ENOTFOUND = 4,
+} RdmaCmMuxErrCode;
+
+typedef struct RdmaCmMuxHdr {
+    RdmaCmMuxMsgType msg_type;
+    RdmaCmMuxOpCode op_code;
+    union ibv_gid sgid;
+    RdmaCmMuxErrCode err_code;
+} RdmaCmUHdr;
+
+typedef struct RdmaCmUMad {
+    struct ib_user_mad hdr;
+    char mad[RDMA_MAX_PRIVATE_DATA];
+} RdmaCmUMad;
+
+typedef struct RdmaCmMuxMsg {
+    RdmaCmUHdr hdr;
+    int umad_len;
+    RdmaCmUMad umad;
+} RdmaCmMuxMsg;
+
+#endif
diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c
index 571f114a56..858221ad95 100644
--- a/contrib/vhost-user-blk/vhost-user-blk.c
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -20,7 +20,6 @@
 #include "contrib/libvhost-user/libvhost-user-glib.h"
 #include "contrib/libvhost-user/libvhost-user.h"
 
-#include <glib.h>
 
 struct virtio_blk_inhdr {
     unsigned char status;
diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c
index 02c29019d1..496dd6e693 100644
--- a/contrib/vhost-user-scsi/vhost-user-scsi.c
+++ b/contrib/vhost-user-scsi/vhost-user-scsi.c
@@ -16,7 +16,6 @@
 #include "contrib/libvhost-user/libvhost-user-glib.h"
 #include "standard-headers/linux/virtio_scsi.h"
 
-#include <glib.h>
 
 #define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi"
 
diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index aec2855750..7f34ad0528 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -16,6 +16,8 @@ CONFIG_VIRTIO_VGA=y
 CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
 CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
+CONFIG_XIVE=$(CONFIG_PSERIES)
+CONFIG_XIVE_SPAPR=$(CONFIG_PSERIES)
 CONFIG_MEM_DEVICE=y
 CONFIG_DIMM=y
 CONFIG_SPAPR_RNG=y
diff --git a/disas.c b/disas.c
index f9c517b358..d9aa713a40 100644
--- a/disas.c
+++ b/disas.c
@@ -522,8 +522,14 @@ void disas(FILE *out, void *code, unsigned long size)
 # ifdef _ARCH_PPC64
     s.info.cap_mode = CS_MODE_64;
 # endif
-#elif defined(__riscv__)
-    print_insn = print_insn_riscv;
+#elif defined(__riscv) && defined(CONFIG_RISCV_DIS)
+#if defined(_ILP32) || (__riscv_xlen == 32)
+    print_insn = print_insn_riscv32;
+#elif defined(_LP64)
+    print_insn = print_insn_riscv64;
+#else
+#error unsupported RISC-V ABI
+#endif
 #elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS)
     print_insn = print_insn_arm_a64;
     s.info.cap_arch = CS_ARCH_ARM64;
diff --git a/disas/microblaze.c b/disas/microblaze.c
index 598ecbc89d..c23605043a 100644
--- a/disas/microblaze.c
+++ b/disas/microblaze.c
@@ -176,7 +176,6 @@ enum microblaze_instr_type {
 #define REG_TLBSX 36869 /* MMU: TLB Search Index reg */
 
 /* alternate names for gen purpose regs */
-#define REG_SP  1 /* stack pointer */
 #define REG_ROSDP 2 /* read-only small data pointer */
 #define REG_RWSDP 13 /* read-write small data pointer */
 
diff --git a/disas/ppc.c b/disas/ppc.c
index 5ab9c35a84..da1140ba2b 100644
--- a/disas/ppc.c
+++ b/disas/ppc.c
@@ -3734,6 +3734,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "addmeo.", XO(31,234,1,1), XORB_MASK, PPCCOM,		{ RT, RA } },
 { "ameo.",   XO(31,234,1,1), XORB_MASK, PWRCOM,		{ RT, RA } },
 
+{ "addex",   XO(31,170,0,0), XO_MASK,   POWER9,         { RT, RA, RB } },
+
 { "mullw",   XO(31,235,0,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
 { "muls",    XO(31,235,0,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
 { "mullw.",  XO(31,235,0,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
diff --git a/docs/pvrdma.txt b/docs/pvrdma.txt
index 5599318159..5175251b47 100644
--- a/docs/pvrdma.txt
+++ b/docs/pvrdma.txt
@@ -9,8 +9,9 @@ It works with its Linux Kernel driver AS IS, no need for any special guest
 modifications.
 
 While it complies with the VMware device, it can also communicate with bare
-metal RDMA-enabled machines and does not require an RDMA HCA in the host, it
-can work with Soft-RoCE (rxe).
+metal RDMA-enabled machines as peers.
+
+It does not require an RDMA HCA in the host, it can work with Soft-RoCE (rxe).
 
 It does not require the whole guest RAM to be pinned allowing memory
 over-commit and, even if not implemented yet, migration support will be
@@ -78,29 +79,116 @@ the required RDMA libraries.
 
 3. Usage
 ========
+
+
+3.1 VM Memory settings
+======================
 Currently the device is working only with memory backed RAM
 and it must be mark as "shared":
    -m 1G \
    -object memory-backend-ram,id=mb1,size=1G,share \
    -numa node,memdev=mb1 \
 
-The pvrdma device is composed of two functions:
- - Function 0 is a vmxnet Ethernet Device which is redundant in Guest
-   but is required to pass the ibdevice GID using its MAC.
-   Examples:
-     For an rxe backend using eth0 interface it will use its mac:
-       -device vmxnet3,addr=<slot>.0,multifunction=on,mac=<eth0 MAC>
-     For an SRIOV VF, we take the Ethernet Interface exposed by it:
-       -device vmxnet3,multifunction=on,mac=<RoCE eth MAC>
- - Function 1 is the actual device:
-       -device pvrdma,addr=<slot>.1,backend-dev=<ibdevice>,backend-gid-idx=<gid>,backend-port=<port>
-   where the ibdevice can be rxe or RDMA VF (e.g. mlx5_4)
- Note: Pay special attention that the GID at backend-gid-idx matches vmxnet's MAC.
- The rules of conversion are part of the RoCE spec, but since manual conversion
- is not required, spotting problems is not hard:
-    Example: GID: fe80:0000:0000:0000:7efe:90ff:fecb:743a
-             MAC: 7c:fe:90:cb:74:3a
-    Note the difference between the first byte of the MAC and the GID.
+
+3.2 MAD Multiplexer
+===================
+MAD Multiplexer is a service that exposes MAD-like interface for VMs in
+order to overcome the limitation where only single entity can register with
+MAD layer to send and receive RDMA-CM MAD packets.
+
+To build rdmacm-mux run
+# make rdmacm-mux
+
+The application accepts 3 command line arguments and exposes a UNIX socket
+to pass control and data to it.
+-d rdma-device-name  Name of RDMA device to register with
+-s unix-socket-path  Path to unix socket to listen (default /var/run/rdmacm-mux)
+-p rdma-device-port  Port number of RDMA device to register with (default 1)
+The final UNIX socket file name is a concatenation of the 3 arguments so
+for example for device mlx5_0 on port 2 this /var/run/rdmacm-mux-mlx5_0-2
+will be created.
+
+pvrdma requires this service.
+
+Please refer to contrib/rdmacm-mux for more details.
+
+
+3.3 Service exposed by libvirt daemon
+=====================================
+The control over the RDMA device's GID table is done by updating the
+device's Ethernet function addresses.
+Usually the first GID entry is determined by the MAC address, the second by
+the first IPv6 address and the third by the IPv4 address. Other entries can
+be added by adding more IP addresses. The opposite is the same, i.e.
+whenever an address is removed, the corresponding GID entry is removed.
+The process is done by the network and RDMA stacks. Whenever an address is
+added the ib_core driver is notified and calls the device driver add_gid
+function which in turn update the device.
+To support this in pvrdma device the device hooks into the create_bind and
+destroy_bind HW commands triggered by pvrdma driver in guest.
+
+Whenever changed is made to the pvrdma port's GID table a special QMP
+messages is sent to be processed by libvirt to update the address of the
+backend Ethernet device.
+
+pvrdma requires that libvirt service will be up.
+
+
+3.4 PCI devices settings
+========================
+RoCE device exposes two functions - an Ethernet and RDMA.
+To support it, pvrdma device is composed of two PCI functions, an Ethernet
+device of type vmxnet3 on PCI slot 0 and a PVRDMA device on PCI slot 1. The
+Ethernet function can be used for other Ethernet purposes such as IP.
+
+
+3.5 Device parameters
+=====================
+- netdev: Specifies the Ethernet device function name on the host for
+  example enp175s0f0. For Soft-RoCE device (rxe) this would be the Ethernet
+  device used to create it.
+- ibdev: The IB device name on host for example rxe0, mlx5_0 etc.
+- mad-chardev: The name of the MAD multiplexer char device.
+- ibport: In case of multi-port device (such as Mellanox's HCA) this
+  specify the port to use. If not set 1 will be used.
+- dev-caps-max-mr-size: The maximum size of MR.
+- dev-caps-max-qp:      Maximum number of QPs.
+- dev-caps-max-sge:     Maximum number of SGE elements in WR.
+- dev-caps-max-cq:      Maximum number of CQs.
+- dev-caps-max-mr:      Maximum number of MRs.
+- dev-caps-max-pd:      Maximum number of PDs.
+- dev-caps-max-ah:      Maximum number of AHs.
+
+Notes:
+- The first 3 parameters are mandatory settings, the rest have their
+  defaults.
+- The last 8 parameters (the ones that prefixed by dev-caps) defines the top
+  limits but the final values is adjusted by the backend device limitations.
+- netdev can be extracted from ibdev's sysfs
+  (/sys/class/infiniband/<ibdev>/device/net/)
+
+
+3.6 Example
+===========
+Define bridge device with vmxnet3 network backend:
+<interface type='bridge'>
+  <mac address='56:b4:44:e9:62:dc'/>
+  <source bridge='bridge1'/>
+  <model type='vmxnet3'/>
+  <address type='pci' domain='0x0000' bus='0x00' slot='0x10' function='0x0' multifunction='on'/>
+</interface>
+
+Define pvrdma device:
+<qemu:commandline>
+  <qemu:arg value='-object'/>
+  <qemu:arg value='memory-backend-ram,id=mb1,size=1G,share'/>
+  <qemu:arg value='-numa'/>
+  <qemu:arg value='node,memdev=mb1'/>
+  <qemu:arg value='-chardev'/>
+  <qemu:arg value='socket,path=/var/run/rdmacm-mux-rxe0-1,id=mads'/>
+  <qemu:arg value='-device'/>
+  <qemu:arg value='pvrdma,addr=10.1,ibdev=rxe0,netdev=bridge0,mad-chardev=mads'/>
+</qemu:commandline>
 
 
 
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 1e43cd736d..555c24f21d 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1589,6 +1589,74 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
     g_array_free(tables->vmgenid, mfre);
 }
 
+/*
+ * ACPI spec 5.2.5.3 Root System Description Pointer (RSDP).
+ * (Revision 1.0 or later)
+ */
+void
+build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data)
+{
+    int tbl_off = tbl->len; /* Table offset in the RSDP file */
+
+    switch (rsdp_data->revision) {
+    case 0:
+        /* With ACPI 1.0, we must have an RSDT pointer */
+        g_assert(rsdp_data->rsdt_tbl_offset);
+        break;
+    case 2:
+        /* With ACPI 2.0+, we must have an XSDT pointer */
+        g_assert(rsdp_data->xsdt_tbl_offset);
+        break;
+    default:
+        /* Only revisions 0 (ACPI 1.0) and 2 (ACPI 2.0+) are valid for RSDP */
+        g_assert_not_reached();
+    }
+
+    bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, tbl, 16,
+                             true /* fseg memory */);
+
+    g_array_append_vals(tbl, "RSD PTR ", 8); /* Signature */
+    build_append_int_noprefix(tbl, 0, 1); /* Checksum */
+    g_array_append_vals(tbl, rsdp_data->oem_id, 6); /* OEMID */
+    build_append_int_noprefix(tbl, rsdp_data->revision, 1); /* Revision */
+    build_append_int_noprefix(tbl, 0, 4); /* RsdtAddress */
+    if (rsdp_data->rsdt_tbl_offset) {
+        /* RSDT address to be filled by guest linker */
+        bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE,
+                                       tbl_off + 16, 4,
+                                       ACPI_BUILD_TABLE_FILE,
+                                       *rsdp_data->rsdt_tbl_offset);
+    }
+
+    /* Checksum to be filled by guest linker */
+    bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE,
+                                    tbl_off, 20, /* ACPI rev 1.0 RSDP size */
+                                    8);
+
+    if (rsdp_data->revision == 0) {
+        /* ACPI 1.0 RSDP, we're done */
+        return;
+    }
+
+    build_append_int_noprefix(tbl, 36, 4); /* Length */
+
+    /* XSDT address to be filled by guest linker */
+    build_append_int_noprefix(tbl, 0, 8); /* XsdtAddress */
+    /* We already validated our xsdt pointer */
+    bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE,
+                                   tbl_off + 24, 8,
+                                   ACPI_BUILD_TABLE_FILE,
+                                   *rsdp_data->xsdt_tbl_offset);
+
+    build_append_int_noprefix(tbl, 0, 1); /* Extended Checksum */
+    build_append_int_noprefix(tbl, 0, 3); /* Reserved */
+
+    /* Extended checksum to be filled by Guest linker */
+    bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE,
+                                    tbl_off, 36, /* ACPI rev 2.0 RSDP size */
+                                    32);
+}
+
 /* Build rsdt table */
 void
 build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets,
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index 80d42e12ff..7bc7a72340 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -30,6 +30,7 @@
 #include "hw/hw.h"
 #include "hw/i386/pc.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/acpi/acpi.h"
 #include "sysemu/sysemu.h"
 #include "exec/address-spaces.h"
@@ -153,6 +154,7 @@ static bool acpi_pcihp_pc_no_hotplug(AcpiPciHpState *s, PCIDevice *dev)
 
 static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slots)
 {
+    HotplugHandler *hotplug_ctrl;
     BusChild *kid, *next;
     int slot = ctz32(slots);
     PCIBus *bus = acpi_pcihp_find_hotplug_bus(s, bsel);
@@ -170,7 +172,8 @@ static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slo
         PCIDevice *dev = PCI_DEVICE(qdev);
         if (PCI_SLOT(dev->devfn) == slot) {
             if (!acpi_pcihp_pc_no_hotplug(s, dev)) {
-                object_unparent(OBJECT(qdev));
+                hotplug_ctrl = qdev_get_hotplug_handler(qdev);
+                hotplug_handler_unplug(hotplug_ctrl, qdev, &error_abort);
             }
         }
     }
@@ -217,25 +220,48 @@ void acpi_pcihp_reset(AcpiPciHpState *s)
     acpi_pcihp_update(s);
 }
 
-void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
-                               DeviceState *dev, Error **errp)
+void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
+                                   DeviceState *dev, Error **errp)
 {
-    PCIDevice *pdev = PCI_DEVICE(dev);
-    int slot = PCI_SLOT(pdev->devfn);
-    int bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev));
-    if (bsel < 0) {
+    /* Only hotplugged devices need the hotplug capability. */
+    if (dev->hotplugged &&
+        acpi_pcihp_get_bsel(pci_get_bus(PCI_DEVICE(dev))) < 0) {
         error_setg(errp, "Unsupported bus. Bus doesn't have property '"
                    ACPI_PCIHP_PROP_BSEL "' set");
         return;
     }
+}
+
+void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
+                               DeviceState *dev, Error **errp)
+{
+    PCIDevice *pdev = PCI_DEVICE(dev);
+    int slot = PCI_SLOT(pdev->devfn);
+    int bsel;
 
     /* Don't send event when device is enabled during qemu machine creation:
      * it is present on boot, no hotplug event is necessary. We do send an
      * event when the device is disabled later. */
     if (!dev->hotplugged) {
+        /*
+         * Overwrite the default hotplug handler with the ACPI PCI one
+         * for cold plugged bridges only.
+         */
+        if (!s->legacy_piix &&
+            object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
+            PCIBus *sec = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+
+            qbus_set_hotplug_handler(BUS(sec), DEVICE(hotplug_dev),
+                                     &error_abort);
+            /* We don't have to overwrite any other hotplug handler yet */
+            assert(QLIST_EMPTY(&sec->child));
+        }
+
         return;
     }
 
+    bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev));
+    g_assert(bsel >= 0);
     s->acpi_pcihp_pci_status[bsel].up |= (1U << slot);
     acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
 }
@@ -243,6 +269,13 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
 void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
                                  DeviceState *dev, Error **errp)
 {
+    object_unparent(OBJECT(dev));
+}
+
+void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                         AcpiPciHpState *s, DeviceState *dev,
+                                         Error **errp)
+{
     PCIDevice *pdev = PCI_DEVICE(dev);
     int slot = PCI_SLOT(pdev->devfn);
     int bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev));
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index e330f24c71..88f9a9ec09 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -173,6 +173,7 @@ static int vmstate_acpi_post_load(void *opaque, int version_id)
     PIIX4PMState *s = opaque;
 
     pm_io_space_update(s);
+    smbus_io_space_update(s);
     return 0;
 }
 
@@ -370,6 +371,18 @@ static void piix4_pm_powerdown_req(Notifier *n, void *opaque)
     acpi_pm1_evt_power_down(&s->ar);
 }
 
+static void piix4_device_pre_plug_cb(HotplugHandler *hotplug_dev,
+                                    DeviceState *dev, Error **errp)
+{
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+        acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp);
+    } else if (!object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) &&
+               !object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+        error_setg(errp, "acpi: device pre plug request for not supported"
+                   " device type: %s", object_get_typename(OBJECT(dev)));
+    }
+}
+
 static void piix4_device_plug_cb(HotplugHandler *hotplug_dev,
                                  DeviceState *dev, Error **errp)
 {
@@ -392,8 +405,7 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev,
             acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
         }
     } else {
-        error_setg(errp, "acpi: device plug request for not supported device"
-                   " type: %s", object_get_typename(OBJECT(dev)));
+        g_assert_not_reached();
     }
 }
 
@@ -407,8 +419,8 @@ static void piix4_device_unplug_request_cb(HotplugHandler *hotplug_dev,
         acpi_memory_unplug_request_cb(hotplug_dev, &s->acpi_memory_hotplug,
                                       dev, errp);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
-        acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev,
-                                    errp);
+        acpi_pcihp_device_unplug_request_cb(hotplug_dev, &s->acpi_pci_hotplug,
+                                            dev, errp);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) &&
                !s->cpu_hotplug_legacy) {
         acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
@@ -426,6 +438,9 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev,
     if (s->acpi_memory_hotplug.is_enabled &&
         object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
         acpi_memory_unplug_cb(&s->acpi_memory_hotplug, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+        acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev,
+                                    errp);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) &&
                !s->cpu_hotplug_legacy) {
         acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp);
@@ -435,15 +450,6 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev,
     }
 }
 
-static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque)
-{
-    PIIX4PMState *s = opaque;
-
-    /* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive
-     * and it's not hot-unpluggable */
-    qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort);
-}
-
 static void piix4_pm_machine_ready(Notifier *n, void *opaque)
 {
     PIIX4PMState *s = container_of(n, PIIX4PMState, machine_ready);
@@ -457,12 +463,6 @@ static void piix4_pm_machine_ready(Notifier *n, void *opaque)
     pci_conf[0x63] = 0x60;
     pci_conf[0x67] = (memory_region_present(io_as, 0x3f8) ? 0x08 : 0) |
         (memory_region_present(io_as, 0x2f8) ? 0x90 : 0);
-
-    if (s->use_acpi_pci_hotplug) {
-        pci_for_each_bus(pci_get_bus(d), piix4_update_bus_hotplug, s);
-    } else {
-        piix4_update_bus_hotplug(pci_get_bus(d), s);
-    }
 }
 
 static void piix4_pm_add_propeties(PIIX4PMState *s)
@@ -536,6 +536,7 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp)
 
     piix4_acpi_system_hot_add_init(pci_address_space_io(dev),
                                    pci_get_bus(dev), s);
+    qbus_set_hotplug_handler(BUS(pci_get_bus(dev)), DEVICE(s), &error_abort);
 
     piix4_pm_add_propeties(s);
 }
@@ -702,6 +703,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data)
      */
     dc->user_creatable = false;
     dc->hotpluggable = false;
+    hc->pre_plug = piix4_device_pre_plug_cb;
     hc->plug = piix4_device_plug_cb;
     hc->unplug_request = piix4_device_unplug_request_cb;
     hc->unplug = piix4_device_unplug_cb;
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 5785fb697c..95fad6f0ce 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -366,36 +366,6 @@ static void acpi_dsdt_add_power_button(Aml *scope)
     aml_append(scope, dev);
 }
 
-/* RSDP */
-static GArray *
-build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned xsdt_tbl_offset)
-{
-    AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp);
-    unsigned xsdt_pa_size = sizeof(rsdp->xsdt_physical_address);
-    unsigned xsdt_pa_offset =
-        (char *)&rsdp->xsdt_physical_address - rsdp_table->data;
-
-    bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, rsdp_table, 16,
-                             true /* fseg memory */);
-
-    memcpy(&rsdp->signature, "RSD PTR ", sizeof(rsdp->signature));
-    memcpy(rsdp->oem_id, ACPI_BUILD_APPNAME6, sizeof(rsdp->oem_id));
-    rsdp->length = cpu_to_le32(sizeof(*rsdp));
-    rsdp->revision = 0x02;
-
-    /* Address to be filled by Guest linker */
-    bios_linker_loader_add_pointer(linker,
-        ACPI_BUILD_RSDP_FILE, xsdt_pa_offset, xsdt_pa_size,
-        ACPI_BUILD_TABLE_FILE, xsdt_tbl_offset);
-
-    /* Checksum to be filled by Guest linker */
-    bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE,
-        (char *)rsdp - rsdp_table->data, sizeof *rsdp,
-        (char *)&rsdp->checksum - rsdp_table->data);
-
-    return rsdp_table;
-}
-
 static void
 build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
@@ -854,7 +824,15 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
     build_xsdt(tables_blob, tables->linker, table_offsets, NULL, NULL);
 
     /* RSDP is in FSEG memory, so allocate it separately */
-    build_rsdp(tables->rsdp, tables->linker, xsdt);
+    {
+        AcpiRsdpData rsdp_data = {
+            .revision = 2,
+            .oem_id = ACPI_BUILD_APPNAME6,
+            .xsdt_tbl_offset = &xsdt,
+            .rsdt_tbl_offset = NULL,
+        };
+        build_rsdp(tables->rsdp, tables->linker, &rsdp_data);
+    }
 
     /* Cleanup memory that's no longer used. */
     g_array_free(table_offsets, true);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5b678237b7..c2641e56ea 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -55,7 +55,7 @@
 #include "hw/intc/arm_gic.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "kvm_arm.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
 #include "qapi/visitor.h"
 #include "standard-headers/linux/input.h"
 #include "hw/arm/smmuv3.h"
diff --git a/hw/core/machine.c b/hw/core/machine.c
index c51423b647..4439ea663f 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -653,8 +653,10 @@ static void machine_class_base_init(ObjectClass *oc, void *data)
 static void machine_initfn(Object *obj)
 {
     MachineState *ms = MACHINE(obj);
+    MachineClass *mc = MACHINE_GET_CLASS(obj);
 
     ms->kernel_irqchip_allowed = true;
+    ms->kernel_irqchip_split = mc->default_kernel_irqchip_split;
     ms->kvm_shadow_mem = -1;
     ms->dump_guest_core = true;
     ms->mem_merge = true;
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index bd84c4ea4c..943dc2654b 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -1297,3 +1297,179 @@ const PropertyInfo qdev_prop_off_auto_pcibar = {
     .set = set_enum,
     .set_default_value = set_default_value_enum,
 };
+
+/* --- PCIELinkSpeed 2_5/5/8/16 -- */
+
+static void get_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    PCIExpLinkSpeed *p = qdev_get_prop_ptr(dev, prop);
+    int speed;
+
+    switch (*p) {
+    case QEMU_PCI_EXP_LNK_2_5GT:
+        speed = PCIE_LINK_SPEED_2_5;
+        break;
+    case QEMU_PCI_EXP_LNK_5GT:
+        speed = PCIE_LINK_SPEED_5;
+        break;
+    case QEMU_PCI_EXP_LNK_8GT:
+        speed = PCIE_LINK_SPEED_8;
+        break;
+    case QEMU_PCI_EXP_LNK_16GT:
+        speed = PCIE_LINK_SPEED_16;
+        break;
+    default:
+        /* Unreachable */
+        abort();
+    }
+
+    visit_type_enum(v, prop->name, &speed, prop->info->enum_table, errp);
+}
+
+static void set_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    PCIExpLinkSpeed *p = qdev_get_prop_ptr(dev, prop);
+    int speed;
+    Error *local_err = NULL;
+
+    if (dev->realized) {
+        qdev_prop_set_after_realize(dev, name, errp);
+        return;
+    }
+
+    visit_type_enum(v, prop->name, &speed, prop->info->enum_table, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    switch (speed) {
+    case PCIE_LINK_SPEED_2_5:
+        *p = QEMU_PCI_EXP_LNK_2_5GT;
+        break;
+    case PCIE_LINK_SPEED_5:
+        *p = QEMU_PCI_EXP_LNK_5GT;
+        break;
+    case PCIE_LINK_SPEED_8:
+        *p = QEMU_PCI_EXP_LNK_8GT;
+        break;
+    case PCIE_LINK_SPEED_16:
+        *p = QEMU_PCI_EXP_LNK_16GT;
+        break;
+    default:
+        /* Unreachable */
+        abort();
+    }
+}
+
+const PropertyInfo qdev_prop_pcie_link_speed = {
+    .name = "PCIELinkSpeed",
+    .description = "2_5/5/8/16",
+    .enum_table = &PCIELinkSpeed_lookup,
+    .get = get_prop_pcielinkspeed,
+    .set = set_prop_pcielinkspeed,
+    .set_default_value = set_default_value_enum,
+};
+
+/* --- PCIELinkWidth 1/2/4/8/12/16/32 -- */
+
+static void get_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    PCIExpLinkWidth *p = qdev_get_prop_ptr(dev, prop);
+    int width;
+
+    switch (*p) {
+    case QEMU_PCI_EXP_LNK_X1:
+        width = PCIE_LINK_WIDTH_1;
+        break;
+    case QEMU_PCI_EXP_LNK_X2:
+        width = PCIE_LINK_WIDTH_2;
+        break;
+    case QEMU_PCI_EXP_LNK_X4:
+        width = PCIE_LINK_WIDTH_4;
+        break;
+    case QEMU_PCI_EXP_LNK_X8:
+        width = PCIE_LINK_WIDTH_8;
+        break;
+    case QEMU_PCI_EXP_LNK_X12:
+        width = PCIE_LINK_WIDTH_12;
+        break;
+    case QEMU_PCI_EXP_LNK_X16:
+        width = PCIE_LINK_WIDTH_16;
+        break;
+    case QEMU_PCI_EXP_LNK_X32:
+        width = PCIE_LINK_WIDTH_32;
+        break;
+    default:
+        /* Unreachable */
+        abort();
+    }
+
+    visit_type_enum(v, prop->name, &width, prop->info->enum_table, errp);
+}
+
+static void set_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    PCIExpLinkWidth *p = qdev_get_prop_ptr(dev, prop);
+    int width;
+    Error *local_err = NULL;
+
+    if (dev->realized) {
+        qdev_prop_set_after_realize(dev, name, errp);
+        return;
+    }
+
+    visit_type_enum(v, prop->name, &width, prop->info->enum_table, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    switch (width) {
+    case PCIE_LINK_WIDTH_1:
+        *p = QEMU_PCI_EXP_LNK_X1;
+        break;
+    case PCIE_LINK_WIDTH_2:
+        *p = QEMU_PCI_EXP_LNK_X2;
+        break;
+    case PCIE_LINK_WIDTH_4:
+        *p = QEMU_PCI_EXP_LNK_X4;
+        break;
+    case PCIE_LINK_WIDTH_8:
+        *p = QEMU_PCI_EXP_LNK_X8;
+        break;
+    case PCIE_LINK_WIDTH_12:
+        *p = QEMU_PCI_EXP_LNK_X12;
+        break;
+    case PCIE_LINK_WIDTH_16:
+        *p = QEMU_PCI_EXP_LNK_X16;
+        break;
+    case PCIE_LINK_WIDTH_32:
+        *p = QEMU_PCI_EXP_LNK_X32;
+        break;
+    default:
+        /* Unreachable */
+        abort();
+    }
+}
+
+const PropertyInfo qdev_prop_pcie_link_width = {
+    .name = "PCIELinkWidth",
+    .description = "1/2/4/8/12/16/32",
+    .enum_table = &PCIELinkWidth_lookup,
+    .get = get_prop_pcielinkwidth,
+    .set = set_prop_pcielinkwidth,
+    .set_default_value = set_default_value_enum,
+};
diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index cece4aa495..faf76a8bc4 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -69,9 +69,8 @@ static void virtio_gpu_initfn(Object *obj)
                                 TYPE_VIRTIO_GPU);
 }
 
-static const TypeInfo virtio_gpu_pci_info = {
-    .name = TYPE_VIRTIO_GPU_PCI,
-    .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_gpu_pci_info = {
+    .generic_name = TYPE_VIRTIO_GPU_PCI,
     .instance_size = sizeof(VirtIOGPUPCI),
     .instance_init = virtio_gpu_initfn,
     .class_init = virtio_gpu_pci_class_init,
@@ -79,6 +78,6 @@ static const TypeInfo virtio_gpu_pci_info = {
 
 static void virtio_gpu_pci_register_types(void)
 {
-    type_register_static(&virtio_gpu_pci_info);
+    virtio_pci_types_register(&virtio_gpu_pci_info);
 }
 type_init(virtio_gpu_pci_register_types)
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index ab2e369b28..8db4d916f2 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -207,9 +207,8 @@ static void virtio_vga_inst_initfn(Object *obj)
                                 TYPE_VIRTIO_GPU);
 }
 
-static TypeInfo virtio_vga_info = {
-    .name          = TYPE_VIRTIO_VGA,
-    .parent        = TYPE_VIRTIO_PCI,
+static VirtioPCIDeviceTypeInfo virtio_vga_info = {
+    .generic_name  = TYPE_VIRTIO_VGA,
     .instance_size = sizeof(struct VirtIOVGA),
     .instance_init = virtio_vga_inst_initfn,
     .class_init    = virtio_vga_class_init,
@@ -217,7 +216,7 @@ static TypeInfo virtio_vga_info = {
 
 static void virtio_vga_register_types(void)
 {
-    type_register_static(&virtio_vga_info);
+    virtio_pci_types_register(&virtio_vga_info);
 }
 
 type_init(virtio_vga_register_types)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 236a20eaa8..14f757fc36 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2426,7 +2426,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
     IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
 
     assert(iommu);
-    if (iommu->intr_supported) {
+    if (x86_iommu_ir_supported(iommu)) {
         dmar_flags |= 0x1;      /* Flags: 0x1: INT_REMAP */
     }
 
@@ -2499,7 +2499,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
      * When interrupt remapping is supported, we add a special IVHD device
      * for type IO-APIC.
      */
-    if (x86_iommu_get_default()->intr_supported) {
+    if (x86_iommu_ir_supported(x86_iommu_get_default())) {
         ivhd_table_len += 8;
     }
     /* IVHD length */
@@ -2535,7 +2535,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
      * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
      * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
      */
-    if (x86_iommu_get_default()->intr_supported) {
+    if (x86_iommu_ir_supported(x86_iommu_get_default())) {
         build_append_int_noprefix(table_data,
                                  (0x1ull << 56) |           /* type IOAPIC */
                                  (IOAPIC_SB_DEVID << 40) |  /* IOAPIC devid */
@@ -2547,32 +2547,6 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
                  "IVRS", table_data->len - iommu_start, 1, NULL, NULL);
 }
 
-static GArray *
-build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset)
-{
-    AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp);
-    unsigned rsdt_pa_size = sizeof(rsdp->rsdt_physical_address);
-    unsigned rsdt_pa_offset =
-        (char *)&rsdp->rsdt_physical_address - rsdp_table->data;
-
-    bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, rsdp_table, 16,
-                             true /* fseg memory */);
-
-    memcpy(&rsdp->signature, "RSD PTR ", 8);
-    memcpy(rsdp->oem_id, ACPI_BUILD_APPNAME6, 6);
-    /* Address to be filled by Guest linker */
-    bios_linker_loader_add_pointer(linker,
-        ACPI_BUILD_RSDP_FILE, rsdt_pa_offset, rsdt_pa_size,
-        ACPI_BUILD_TABLE_FILE, rsdt_tbl_offset);
-
-    /* Checksum to be filled by Guest linker */
-    bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE,
-        (char *)rsdp - rsdp_table->data, sizeof *rsdp,
-        (char *)&rsdp->checksum - rsdp_table->data);
-
-    return rsdp_table;
-}
-
 typedef
 struct AcpiBuildState {
     /* Copy of table in RAM (for patching). */
@@ -2729,7 +2703,25 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
                slic_oem.id, slic_oem.table_id);
 
     /* RSDP is in FSEG memory, so allocate it separately */
-    build_rsdp(tables->rsdp, tables->linker, rsdt);
+    {
+        AcpiRsdpData rsdp_data = {
+            .revision = 0,
+            .oem_id = ACPI_BUILD_APPNAME6,
+            .xsdt_tbl_offset = NULL,
+            .rsdt_tbl_offset = &rsdt,
+        };
+        build_rsdp(tables->rsdp, tables->linker, &rsdp_data);
+        if (!pcmc->rsdp_in_ram) {
+            /* We used to allocate some extra space for RSDP revision 2 but
+             * only used the RSDP revision 0 space. The extra bytes were
+             * zeroed out and not used.
+             * Here we continue wasting those extra 16 bytes to make sure we
+             * don't break migration for machine types 2.2 and older due to
+             * RSDP blob size mismatch.
+             */
+            build_append_int_noprefix(tables->rsdp, 0, 16);
+        }
+    }
 
     /* We'll expose it all to Guest so we want to reduce
      * chance of size changes.
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 353a810e6b..8ad707aba0 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1233,7 +1233,7 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
     }
 
     /* validate that we are configure with intremap=on */
-    if (!X86_IOMMU_DEVICE(iommu)->intr_supported) {
+    if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) {
         trace_amdvi_err("Interrupt remapping is enabled in the guest but "
                         "not in the host. Use intremap=on to enable interrupt "
                         "remapping in amd-iommu.");
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index d97bcbc2f7..8b72735650 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -524,7 +524,6 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
 
     addr = s->root + index * sizeof(*re);
     if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
-        trace_vtd_re_invalid(re->rsvd, re->val);
         re->val = 0;
         return -VTD_FR_ROOT_TABLE_INV;
     }
@@ -545,7 +544,6 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
     /* we have checked that root entry is present */
     addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce);
     if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) {
-        trace_vtd_re_invalid(root->rsvd, root->val);
         return -VTD_FR_CONTEXT_TABLE_INV;
     }
     ce->lo = le64_to_cpu(ce->lo);
@@ -630,16 +628,20 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
         break;
     case VTD_CONTEXT_TT_DEV_IOTLB:
         if (!x86_iommu->dt_supported) {
+            error_report_once("%s: DT specified but not supported", __func__);
             return false;
         }
         break;
     case VTD_CONTEXT_TT_PASS_THROUGH:
         if (!x86_iommu->pt_supported) {
+            error_report_once("%s: PT specified but not supported", __func__);
             return false;
         }
         break;
     default:
         /* Unknwon type */
+        error_report_once("%s: unknown ce type: %"PRIu32, __func__,
+                          vtd_ce_get_type(ce));
         return false;
     }
     return true;
@@ -1003,7 +1005,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
     }
 
     if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) {
-        trace_vtd_re_invalid(re.rsvd, re.val);
+        error_report_once("%s: invalid root entry: rsvd=0x%"PRIx64
+                          ", val=0x%"PRIx64" (reserved nonzero)",
+                          __func__, re.rsvd, re.val);
         return -VTD_FR_ROOT_ENTRY_RSVD;
     }
 
@@ -1020,19 +1024,23 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
 
     if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
                (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) {
-        trace_vtd_ce_invalid(ce->hi, ce->lo);
+        error_report_once("%s: invalid context entry: hi=%"PRIx64
+                          ", lo=%"PRIx64" (reserved nonzero)",
+                          __func__, ce->hi, ce->lo);
         return -VTD_FR_CONTEXT_ENTRY_RSVD;
     }
 
     /* Check if the programming of context-entry is valid */
     if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) {
-        trace_vtd_ce_invalid(ce->hi, ce->lo);
+        error_report_once("%s: invalid context entry: hi=%"PRIx64
+                          ", lo=%"PRIx64" (level %d not supported)",
+                          __func__, ce->hi, ce->lo, vtd_ce_get_level(ce));
         return -VTD_FR_CONTEXT_ENTRY_INV;
     }
 
     /* Do translation type check */
     if (!vtd_ce_type_check(x86_iommu, ce)) {
-        trace_vtd_ce_invalid(ce->hi, ce->lo);
+        /* Errors dumped in vtd_ce_type_check() */
         return -VTD_FR_CONTEXT_ENTRY_INV;
     }
 
@@ -1878,7 +1886,9 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
 {
     if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
         (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
-        trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
+        error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64
+                          " (reserved nonzero)", __func__, inv_desc->hi,
+                          inv_desc->lo);
         return false;
     }
     if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
@@ -1901,7 +1911,9 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
         /* Interrupt flag */
         vtd_generate_completion_event(s);
     } else {
-        trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
+        error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64
+                          " (unknown type)", __func__, inv_desc->hi,
+                          inv_desc->lo);
         return false;
     }
     return true;
@@ -1913,7 +1925,9 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
     uint16_t sid, fmask;
 
     if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) {
-        trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
+        error_report_once("%s: invalid cc inv desc: hi=%"PRIx64", lo=%"PRIx64
+                          " (reserved nonzero)", __func__, inv_desc->hi,
+                          inv_desc->lo);
         return false;
     }
     switch (inv_desc->lo & VTD_INV_DESC_CC_G) {
@@ -1932,7 +1946,9 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
         break;
 
     default:
-        trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
+        error_report_once("%s: invalid cc inv desc: hi=%"PRIx64", lo=%"PRIx64
+                          " (invalid type)", __func__, inv_desc->hi,
+                          inv_desc->lo);
         return false;
     }
     return true;
@@ -1946,7 +1962,9 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
 
     if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
         (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
-        trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
+        error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64
+                          ", lo=0x%"PRIx64" (reserved bits unzero)\n",
+                          __func__, inv_desc->hi, inv_desc->lo);
         return false;
     }
 
@@ -1965,14 +1983,20 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
         addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
         am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
         if (am > VTD_MAMV) {
-            trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
+            error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64
+                              ", lo=0x%"PRIx64" (am=%u > VTD_MAMV=%u)\n",
+                              __func__, inv_desc->hi, inv_desc->lo,
+                              am, (unsigned)VTD_MAMV);
             return false;
         }
         vtd_iotlb_page_invalidate(s, domain_id, addr, am);
         break;
 
     default:
-        trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
+        error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64
+                          ", lo=0x%"PRIx64" (type mismatch: 0x%llx)\n",
+                          __func__, inv_desc->hi, inv_desc->lo,
+                          inv_desc->lo & VTD_INV_DESC_IOTLB_G);
         return false;
     }
     return true;
@@ -2012,7 +2036,9 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
 
     if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
         (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
-        trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
+        error_report_once("%s: invalid dev-iotlb inv desc: hi=%"PRIx64
+                          ", lo=%"PRIx64" (reserved nonzero)", __func__,
+                          inv_desc->hi, inv_desc->lo);
         return false;
     }
 
@@ -2103,7 +2129,9 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
         break;
 
     default:
-        trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo);
+        error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64
+                          " (unknown type)", __func__, inv_desc.hi,
+                          inv_desc.lo);
         return false;
     }
     s->iq_head++;
@@ -2540,7 +2568,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
                           __func__, pci_bus_num(vtd_as->bus),
                           VTD_PCI_SLOT(vtd_as->devfn),
                           VTD_PCI_FUNC(vtd_as->devfn),
-                          iotlb.iova);
+                          addr);
     }
 
     return iotlb;
@@ -2628,9 +2656,10 @@ static Property vtd_properties[] = {
     DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
                             ON_OFF_AUTO_AUTO),
     DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
-    DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits,
+    DEFINE_PROP_UINT8("aw-bits", IntelIOMMUState, aw_bits,
                       VTD_HOST_ADDRESS_WIDTH),
     DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
+    DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -3119,6 +3148,9 @@ static void vtd_init(IntelIOMMUState *s)
     s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
              VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
              VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
+    if (s->dma_drain) {
+        s->cap |= VTD_CAP_DRAIN;
+    }
     if (s->aw_bits == VTD_HOST_AW_48BIT) {
         s->cap |= VTD_CAP_SAGAW_48bit;
     }
@@ -3137,7 +3169,7 @@ static void vtd_init(IntelIOMMUState *s)
     vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits);
     vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits);
 
-    if (x86_iommu->intr_supported) {
+    if (x86_iommu_ir_supported(x86_iommu)) {
         s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
         if (s->intr_eim == ON_OFF_AUTO_ON) {
             s->ecap |= VTD_ECAP_EIM;
@@ -3238,14 +3270,14 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
 {
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
 
-    if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) {
+    if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu_ir_supported(x86_iommu)) {
         error_setg(errp, "eim=on cannot be selected without intremap=on");
         return false;
     }
 
     if (s->intr_eim == ON_OFF_AUTO_AUTO) {
         s->intr_eim = (kvm_irqchip_in_kernel() || s->buggy_eim)
-                      && x86_iommu->intr_supported ?
+                      && x86_iommu_ir_supported(x86_iommu) ?
                                               ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
     }
     if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index d084099ed9..00e9edbc66 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -203,6 +203,9 @@
 #define VTD_CAP_MAMV                (VTD_MAMV << 48)
 #define VTD_CAP_PSI                 (1ULL << 39)
 #define VTD_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
+#define VTD_CAP_DRAIN_WRITE         (1ULL << 54)
+#define VTD_CAP_DRAIN_READ          (1ULL << 55)
+#define VTD_CAP_DRAIN               (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
 #define VTD_CAP_CM                  (1ULL << 7)
 
 /* Supported Adjusted Guest Address Widths */
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 115bc2825c..f248662e97 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -37,7 +37,7 @@
 #include "hw/pci/pci_bus.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/timer/hpet.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
 #include "hw/loader.h"
 #include "elf.h"
 #include "multiboot.h"
@@ -1244,7 +1244,7 @@ void pc_machine_done(Notifier *notifier, void *data)
     if (pcms->apic_id_limit > 255 && !xen_enabled()) {
         IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
 
-        if (!iommu || !iommu->x86_iommu.intr_supported ||
+        if (!iommu || !x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu)) ||
             iommu->intr_eim != ON_OFF_AUTO_ON) {
             error_report("current -smp configuration requires "
                          "Extended Interrupt Mode enabled. "
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 6981cfa740..7f1cb527b5 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -30,7 +30,7 @@
 #include "hw/i386/pc.h"
 #include "hw/i386/apic.h"
 #include "hw/display/ramfb.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_ids.h"
 #include "hw/usb.h"
@@ -368,7 +368,7 @@ static void pc_compat_1_2(MachineState *machine)
     x86_cpu_change_kvm_default("kvm-pv-eoi", NULL);
 }
 
-/* PC compat function for pc-0.10 to pc-0.13 */
+/* PC compat function for pc-0.12 and pc-0.13 */
 static void pc_compat_0_13(MachineState *machine)
 {
     pc_compat_1_2(machine);
@@ -834,6 +834,7 @@ static void pc_i440fx_0_15_machine_options(MachineClass *m)
 {
     pc_i440fx_1_0_machine_options(m);
     m->hw_version = "0.15";
+    m->deprecation_reason = "use a newer machine type instead";
     SET_MACHINE_COMPAT(m, PC_COMPAT_0_15);
 }
 
@@ -951,73 +952,6 @@ static void pc_i440fx_0_12_machine_options(MachineClass *m)
 DEFINE_I440FX_MACHINE(v0_12, "pc-0.12", pc_compat_0_13,
                       pc_i440fx_0_12_machine_options);
 
-
-#define PC_COMPAT_0_11 \
-        PC_CPU_MODEL_IDS("0.11") \
-        {\
-            .driver   = "virtio-blk-pci",\
-            .property = "vectors",\
-            .value    = stringify(0),\
-        },{\
-            .driver   = TYPE_PCI_DEVICE,\
-            .property = "rombar",\
-            .value    = stringify(0),\
-        },{\
-            .driver   = "ide-drive",\
-            .property = "ver",\
-            .value    = "0.11",\
-        },{\
-            .driver   = "scsi-disk",\
-            .property = "ver",\
-            .value    = "0.11",\
-        },
-
-static void pc_i440fx_0_11_machine_options(MachineClass *m)
-{
-    pc_i440fx_0_12_machine_options(m);
-    m->hw_version = "0.11";
-    m->deprecation_reason = "use a newer machine type instead";
-    SET_MACHINE_COMPAT(m, PC_COMPAT_0_11);
-}
-
-DEFINE_I440FX_MACHINE(v0_11, "pc-0.11", pc_compat_0_13,
-                      pc_i440fx_0_11_machine_options);
-
-
-#define PC_COMPAT_0_10 \
-    PC_CPU_MODEL_IDS("0.10") \
-    {\
-        .driver   = "virtio-blk-pci",\
-        .property = "class",\
-        .value    = stringify(PCI_CLASS_STORAGE_OTHER),\
-    },{\
-        .driver   = "virtio-serial-pci",\
-        .property = "class",\
-        .value    = stringify(PCI_CLASS_DISPLAY_OTHER),\
-    },{\
-        .driver   = "virtio-net-pci",\
-        .property = "vectors",\
-        .value    = stringify(0),\
-    },{\
-        .driver   = "ide-drive",\
-        .property = "ver",\
-        .value    = "0.10",\
-    },{\
-        .driver   = "scsi-disk",\
-        .property = "ver",\
-        .value    = "0.10",\
-    },
-
-static void pc_i440fx_0_10_machine_options(MachineClass *m)
-{
-    pc_i440fx_0_11_machine_options(m);
-    m->hw_version = "0.10";
-    SET_MACHINE_COMPAT(m, PC_COMPAT_0_10);
-}
-
-DEFINE_I440FX_MACHINE(v0_10, "pc-0.10", pc_compat_0_13,
-                      pc_i440fx_0_10_machine_options);
-
 typedef struct {
     uint16_t gpu_device_id;
     uint16_t pch_device_id;
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 58459bdab5..0a3f3f18e4 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -47,7 +47,7 @@
 #include "hw/i386/amd_iommu.h"
 #include "hw/i386/intel_iommu.h"
 #include "hw/display/ramfb.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
 #include "hw/ide/pci.h"
 #include "hw/ide/ahci.h"
 #include "hw/usb.h"
@@ -304,6 +304,7 @@ static void pc_q35_machine_options(MachineClass *m)
     m->units_per_default_bus = 1;
     m->default_machine_opts = "firmware=bios-256k.bin";
     m->default_display = "std";
+    m->default_kernel_irqchip_split = true;
     m->no_floppy = 1;
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE);
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
@@ -323,6 +324,7 @@ DEFINE_Q35_MACHINE(v4_0, "pc-q35-4.0", NULL,
 static void pc_q35_3_1_machine_options(MachineClass *m)
 {
     pc_q35_4_0_machine_options(m);
+    m->default_kernel_irqchip_split = false;
     m->alias = NULL;
     SET_MACHINE_COMPAT(m, PC_COMPAT_3_1);
 }
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 6ac347d18c..77244fc384 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -5,19 +5,15 @@ x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC inv
 
 # hw/i386/intel_iommu.c
 vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64
-vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16
 vtd_inv_desc_cc_global(void) "context invalidate globally"
 vtd_inv_desc_cc_device(uint8_t bus, uint8_t dev, uint8_t fn) "context invalidate device %02"PRIx8":%02"PRIx8".%02"PRIx8
 vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate devices sid 0x%"PRIx16" fmask 0x%"PRIx16
-vtd_inv_desc_cc_invalid(uint64_t hi, uint64_t lo) "invalid context-cache desc hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_inv_desc_iotlb_global(void) "iotlb invalidate global"
 vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16
 vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8
-vtd_inv_desc_iotlb_invalid(uint64_t hi, uint64_t lo) "invalid iotlb desc hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32
 vtd_inv_desc_wait_irq(const char *msg) "%s"
-vtd_inv_desc_wait_invalid(uint64_t hi, uint64_t lo) "invalid wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_inv_desc_iec(uint32_t granularity, uint32_t index, uint32_t mask) "granularity 0x%"PRIx32" index 0x%"PRIx32" mask 0x%"PRIx32
 vtd_inv_qi_enable(bool enable) "enabled %d"
@@ -27,9 +23,7 @@ vtd_inv_qi_tail(uint16_t head) "write tail %d"
 vtd_inv_qi_fetch(void) ""
 vtd_context_cache_reset(void) ""
 vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
-vtd_re_invalid(uint64_t hi, uint64_t lo) "invalid root entry hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
-vtd_ce_invalid(uint64_t hi, uint64_t lo) "invalid context entry hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
 vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
 vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index abc3c03158..d1534c1ae0 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -112,6 +112,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
     PCMachineState *pcms =
         PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
     QLIST_INIT(&x86_iommu->iec_notifiers);
+    bool irq_all_kernel = kvm_irqchip_in_kernel() && !kvm_irqchip_is_split();
 
     if (!pcms || !pcms->bus) {
         error_setg(errp, "Machine-type '%s' not supported by IOMMU",
@@ -119,9 +120,14 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    /* If the user didn't specify IR, choose a default value for it */
+    if (x86_iommu->intr_supported == ON_OFF_AUTO_AUTO) {
+        x86_iommu->intr_supported = irq_all_kernel ?
+            ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON;
+    }
+
     /* Both Intel and AMD IOMMU IR only support "kernel-irqchip={off|split}" */
-    if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
-        !kvm_irqchip_is_split()) {
+    if (x86_iommu_ir_supported(x86_iommu) && irq_all_kernel) {
         error_setg(errp, "Interrupt Remapping cannot work with "
                          "kernel-irqchip=on, please use 'split|off'.");
         return;
@@ -135,7 +141,8 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
 }
 
 static Property x86_iommu_properties[] = {
-    DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false),
+    DEFINE_PROP_ON_OFF_AUTO("intremap", X86IOMMUState,
+                            intr_supported, ON_OFF_AUTO_AUTO),
     DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false),
     DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true),
     DEFINE_PROP_END_OF_LIST(),
@@ -148,6 +155,11 @@ static void x86_iommu_class_init(ObjectClass *klass, void *data)
     dc->props = x86_iommu_properties;
 }
 
+bool x86_iommu_ir_supported(X86IOMMUState *s)
+{
+    return s->intr_supported == ON_OFF_AUTO_ON;
+}
+
 static const TypeInfo x86_iommu_info = {
     .name          = TYPE_X86_IOMMU_DEVICE,
     .parent        = TYPE_SYS_BUS_DEVICE,
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 0e9963f5ee..301a8e972d 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -37,6 +37,8 @@ obj-$(CONFIG_SH4) += sh_intc.o
 obj-$(CONFIG_XICS) += xics.o
 obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
+obj-$(CONFIG_XIVE) += xive.o
+obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
 obj-$(CONFIG_POWERNV) += xics_pnv.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
new file mode 100644
index 0000000000..0e39c90cbd
--- /dev/null
+++ b/hw/intc/spapr_xive.c
@@ -0,0 +1,1486 @@
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "monitor/monitor.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_xive.h"
+#include "hw/ppc/xive.h"
+#include "hw/ppc/xive_regs.h"
+
+/*
+ * XIVE Virtualization Controller BAR and Thread Managment BAR that we
+ * use for the ESB pages and the TIMA pages
+ */
+#define SPAPR_XIVE_VC_BASE   0x0006010000000000ull
+#define SPAPR_XIVE_TM_BASE   0x0006030203180000ull
+
+/*
+ * The allocation of VP blocks is a complex operation in OPAL and the
+ * VP identifiers have a relation with the number of HW chips, the
+ * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
+ * controller model does not have the same constraints and can use a
+ * simple mapping scheme of the CPU vcpu_id
+ *
+ * These identifiers are never returned to the OS.
+ */
+
+#define SPAPR_XIVE_NVT_BASE 0x400
+
+/*
+ * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
+ * to the controller block id value. It can nevertheless be changed
+ * for testing purpose.
+ */
+#define SPAPR_XIVE_BLOCK_ID 0x0
+
+/*
+ * sPAPR NVT and END indexing helpers
+ */
+static uint32_t spapr_xive_nvt_to_target(uint8_t nvt_blk, uint32_t nvt_idx)
+{
+    return nvt_idx - SPAPR_XIVE_NVT_BASE;
+}
+
+static void spapr_xive_cpu_to_nvt(PowerPCCPU *cpu,
+                                  uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
+{
+    assert(cpu);
+
+    if (out_nvt_blk) {
+        *out_nvt_blk = SPAPR_XIVE_BLOCK_ID;
+    }
+
+    if (out_nvt_blk) {
+        *out_nvt_idx = SPAPR_XIVE_NVT_BASE + cpu->vcpu_id;
+    }
+}
+
+static int spapr_xive_target_to_nvt(uint32_t target,
+                                    uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
+{
+    PowerPCCPU *cpu = spapr_find_cpu(target);
+
+    if (!cpu) {
+        return -1;
+    }
+
+    spapr_xive_cpu_to_nvt(cpu, out_nvt_blk, out_nvt_idx);
+    return 0;
+}
+
+/*
+ * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
+ * priorities per CPU
+ */
+static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
+                                  uint8_t *out_end_blk, uint32_t *out_end_idx)
+{
+    assert(cpu);
+
+    if (out_end_blk) {
+        *out_end_blk = SPAPR_XIVE_BLOCK_ID;
+    }
+
+    if (out_end_idx) {
+        *out_end_idx = (cpu->vcpu_id << 3) + prio;
+    }
+}
+
+static int spapr_xive_target_to_end(uint32_t target, uint8_t prio,
+                                    uint8_t *out_end_blk, uint32_t *out_end_idx)
+{
+    PowerPCCPU *cpu = spapr_find_cpu(target);
+
+    if (!cpu) {
+        return -1;
+    }
+
+    spapr_xive_cpu_to_end(cpu, prio, out_end_blk, out_end_idx);
+    return 0;
+}
+
+/*
+ * On sPAPR machines, use a simplified output for the XIVE END
+ * structure dumping only the information related to the OS EQ.
+ */
+static void spapr_xive_end_pic_print_info(sPAPRXive *xive, XiveEND *end,
+                                          Monitor *mon)
+{
+    uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+    uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
+    uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+    uint32_t qentries = 1 << (qsize + 10);
+    uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
+    uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
+
+    monitor_printf(mon, "%3d/%d % 6d/%5d ^%d",
+                   spapr_xive_nvt_to_target(0, nvt),
+                   priority, qindex, qentries, qgen);
+
+    xive_end_queue_pic_print_info(end, 6, mon);
+    monitor_printf(mon, "]");
+}
+
+void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
+{
+    XiveSource *xsrc = &xive->source;
+    int i;
+
+    monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
+
+    for (i = 0; i < xive->nr_irqs; i++) {
+        uint8_t pq = xive_source_esb_get(xsrc, i);
+        XiveEAS *eas = &xive->eat[i];
+
+        if (!xive_eas_is_valid(eas)) {
+            continue;
+        }
+
+        monitor_printf(mon, "  %08x %s %c%c%c %s %08x ", i,
+                       xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
+                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
+                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
+                       xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ',
+                       xive_eas_is_masked(eas) ? "M" : " ",
+                       (int) xive_get_field64(EAS_END_DATA, eas->w));
+
+        if (!xive_eas_is_masked(eas)) {
+            uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+            XiveEND *end;
+
+            assert(end_idx < xive->nr_ends);
+            end = &xive->endt[end_idx];
+
+            if (xive_end_is_valid(end)) {
+                spapr_xive_end_pic_print_info(xive, end, mon);
+            }
+        }
+        monitor_printf(mon, "\n");
+    }
+}
+
+static void spapr_xive_map_mmio(sPAPRXive *xive)
+{
+    sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base);
+    sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base);
+    sysbus_mmio_map(SYS_BUS_DEVICE(xive), 2, xive->tm_base);
+}
+
+/*
+ * When a Virtual Processor is scheduled to run on a HW thread, the
+ * hypervisor pushes its identifier in the OS CAM line. Emulate the
+ * same behavior under QEMU.
+ */
+void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx)
+{
+    uint8_t  nvt_blk;
+    uint32_t nvt_idx;
+    uint32_t nvt_cam;
+
+    spapr_xive_cpu_to_nvt(POWERPC_CPU(tctx->cs), &nvt_blk, &nvt_idx);
+
+    nvt_cam = cpu_to_be32(TM_QW1W2_VO | xive_nvt_cam_line(nvt_blk, nvt_idx));
+    memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &nvt_cam, 4);
+}
+
+static void spapr_xive_end_reset(XiveEND *end)
+{
+    memset(end, 0, sizeof(*end));
+
+    /* switch off the escalation and notification ESBs */
+    end->w1 = cpu_to_be32(END_W1_ESe_Q | END_W1_ESn_Q);
+}
+
+static void spapr_xive_reset(void *dev)
+{
+    sPAPRXive *xive = SPAPR_XIVE(dev);
+    int i;
+
+    /*
+     * The XiveSource has its own reset handler, which mask off all
+     * IRQs (!P|Q)
+     */
+
+    /* Mask all valid EASs in the IRQ number space. */
+    for (i = 0; i < xive->nr_irqs; i++) {
+        XiveEAS *eas = &xive->eat[i];
+        if (xive_eas_is_valid(eas)) {
+            eas->w = cpu_to_be64(EAS_VALID | EAS_MASKED);
+        } else {
+            eas->w = 0;
+        }
+    }
+
+    /* Clear all ENDs */
+    for (i = 0; i < xive->nr_ends; i++) {
+        spapr_xive_end_reset(&xive->endt[i]);
+    }
+}
+
+static void spapr_xive_instance_init(Object *obj)
+{
+    sPAPRXive *xive = SPAPR_XIVE(obj);
+
+    object_initialize(&xive->source, sizeof(xive->source), TYPE_XIVE_SOURCE);
+    object_property_add_child(obj, "source", OBJECT(&xive->source), NULL);
+
+    object_initialize(&xive->end_source, sizeof(xive->end_source),
+                      TYPE_XIVE_END_SOURCE);
+    object_property_add_child(obj, "end_source", OBJECT(&xive->end_source),
+                              NULL);
+}
+
+static void spapr_xive_realize(DeviceState *dev, Error **errp)
+{
+    sPAPRXive *xive = SPAPR_XIVE(dev);
+    XiveSource *xsrc = &xive->source;
+    XiveENDSource *end_xsrc = &xive->end_source;
+    Error *local_err = NULL;
+
+    if (!xive->nr_irqs) {
+        error_setg(errp, "Number of interrupt needs to be greater 0");
+        return;
+    }
+
+    if (!xive->nr_ends) {
+        error_setg(errp, "Number of interrupt needs to be greater 0");
+        return;
+    }
+
+    /*
+     * Initialize the internal sources, for IPIs and virtual devices.
+     */
+    object_property_set_int(OBJECT(xsrc), xive->nr_irqs, "nr-irqs",
+                            &error_fatal);
+    object_property_add_const_link(OBJECT(xsrc), "xive", OBJECT(xive),
+                                   &error_fatal);
+    object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /*
+     * Initialize the END ESB source
+     */
+    object_property_set_int(OBJECT(end_xsrc), xive->nr_irqs, "nr-ends",
+                            &error_fatal);
+    object_property_add_const_link(OBJECT(end_xsrc), "xive", OBJECT(xive),
+                                   &error_fatal);
+    object_property_set_bool(OBJECT(end_xsrc), true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* Set the mapping address of the END ESB pages after the source ESBs */
+    xive->end_base = xive->vc_base + (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+
+    /*
+     * Allocate the routing tables
+     */
+    xive->eat = g_new0(XiveEAS, xive->nr_irqs);
+    xive->endt = g_new0(XiveEND, xive->nr_ends);
+
+    /* TIMA initialization */
+    memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
+                          "xive.tima", 4ull << TM_SHIFT);
+
+    /* Define all XIVE MMIO regions on SysBus */
+    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio);
+    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
+    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio);
+
+    /* Map all regions */
+    spapr_xive_map_mmio(xive);
+
+    qemu_register_reset(spapr_xive_reset, dev);
+}
+
+static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk,
+                              uint32_t eas_idx, XiveEAS *eas)
+{
+    sPAPRXive *xive = SPAPR_XIVE(xrtr);
+
+    if (eas_idx >= xive->nr_irqs) {
+        return -1;
+    }
+
+    *eas = xive->eat[eas_idx];
+    return 0;
+}
+
+static int spapr_xive_get_end(XiveRouter *xrtr,
+                              uint8_t end_blk, uint32_t end_idx, XiveEND *end)
+{
+    sPAPRXive *xive = SPAPR_XIVE(xrtr);
+
+    if (end_idx >= xive->nr_ends) {
+        return -1;
+    }
+
+    memcpy(end, &xive->endt[end_idx], sizeof(XiveEND));
+    return 0;
+}
+
+static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk,
+                                uint32_t end_idx, XiveEND *end,
+                                uint8_t word_number)
+{
+    sPAPRXive *xive = SPAPR_XIVE(xrtr);
+
+    if (end_idx >= xive->nr_ends) {
+        return -1;
+    }
+
+    memcpy(&xive->endt[end_idx], end, sizeof(XiveEND));
+    return 0;
+}
+
+static int spapr_xive_get_nvt(XiveRouter *xrtr,
+                              uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
+{
+    uint32_t vcpu_id = spapr_xive_nvt_to_target(nvt_blk, nvt_idx);
+    PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
+
+    if (!cpu) {
+        /* TODO: should we assert() if we can find a NVT ? */
+        return -1;
+    }
+
+    /*
+     * sPAPR does not maintain a NVT table. Return that the NVT is
+     * valid if we have found a matching CPU
+     */
+    nvt->w0 = cpu_to_be32(NVT_W0_VALID);
+    return 0;
+}
+
+static int spapr_xive_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk,
+                                uint32_t nvt_idx, XiveNVT *nvt,
+                                uint8_t word_number)
+{
+    /*
+     * We don't need to write back to the NVTs because the sPAPR
+     * machine should never hit a non-scheduled NVT. It should never
+     * get called.
+     */
+    g_assert_not_reached();
+}
+
+static const VMStateDescription vmstate_spapr_xive_end = {
+    .name = TYPE_SPAPR_XIVE "/end",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT32(w0, XiveEND),
+        VMSTATE_UINT32(w1, XiveEND),
+        VMSTATE_UINT32(w2, XiveEND),
+        VMSTATE_UINT32(w3, XiveEND),
+        VMSTATE_UINT32(w4, XiveEND),
+        VMSTATE_UINT32(w5, XiveEND),
+        VMSTATE_UINT32(w6, XiveEND),
+        VMSTATE_UINT32(w7, XiveEND),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const VMStateDescription vmstate_spapr_xive_eas = {
+    .name = TYPE_SPAPR_XIVE "/eas",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT64(w, XiveEAS),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const VMStateDescription vmstate_spapr_xive = {
+    .name = TYPE_SPAPR_XIVE,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
+        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, sPAPRXive, nr_irqs,
+                                     vmstate_spapr_xive_eas, XiveEAS),
+        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(endt, sPAPRXive, nr_ends,
+                                             vmstate_spapr_xive_end, XiveEND),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property spapr_xive_properties[] = {
+    DEFINE_PROP_UINT32("nr-irqs", sPAPRXive, nr_irqs, 0),
+    DEFINE_PROP_UINT32("nr-ends", sPAPRXive, nr_ends, 0),
+    DEFINE_PROP_UINT64("vc-base", sPAPRXive, vc_base, SPAPR_XIVE_VC_BASE),
+    DEFINE_PROP_UINT64("tm-base", sPAPRXive, tm_base, SPAPR_XIVE_TM_BASE),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_xive_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
+
+    dc->desc    = "sPAPR XIVE Interrupt Controller";
+    dc->props   = spapr_xive_properties;
+    dc->realize = spapr_xive_realize;
+    dc->vmsd    = &vmstate_spapr_xive;
+
+    xrc->get_eas = spapr_xive_get_eas;
+    xrc->get_end = spapr_xive_get_end;
+    xrc->write_end = spapr_xive_write_end;
+    xrc->get_nvt = spapr_xive_get_nvt;
+    xrc->write_nvt = spapr_xive_write_nvt;
+}
+
+static const TypeInfo spapr_xive_info = {
+    .name = TYPE_SPAPR_XIVE,
+    .parent = TYPE_XIVE_ROUTER,
+    .instance_init = spapr_xive_instance_init,
+    .instance_size = sizeof(sPAPRXive),
+    .class_init = spapr_xive_class_init,
+};
+
+static void spapr_xive_register_types(void)
+{
+    type_register_static(&spapr_xive_info);
+}
+
+type_init(spapr_xive_register_types)
+
+bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi)
+{
+    XiveSource *xsrc = &xive->source;
+
+    if (lisn >= xive->nr_irqs) {
+        return false;
+    }
+
+    xive->eat[lisn].w |= cpu_to_be64(EAS_VALID);
+    xive_source_irq_set(xsrc, lisn, lsi);
+    return true;
+}
+
+bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn)
+{
+    XiveSource *xsrc = &xive->source;
+
+    if (lisn >= xive->nr_irqs) {
+        return false;
+    }
+
+    xive->eat[lisn].w &= cpu_to_be64(~EAS_VALID);
+    xive_source_irq_set(xsrc, lisn, false);
+    return true;
+}
+
+qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn)
+{
+    XiveSource *xsrc = &xive->source;
+
+    if (lisn >= xive->nr_irqs) {
+        return NULL;
+    }
+
+    /* The sPAPR machine/device should have claimed the IRQ before */
+    assert(xive_eas_is_valid(&xive->eat[lisn]));
+
+    return xive_source_qirq(xsrc, lisn);
+}
+
+/*
+ * XIVE hcalls
+ *
+ * The terminology used by the XIVE hcalls is the following :
+ *
+ *   TARGET vCPU number
+ *   EQ     Event Queue assigned by OS to receive event data
+ *   ESB    page for source interrupt management
+ *   LISN   Logical Interrupt Source Number identifying a source in the
+ *          machine
+ *   EISN   Effective Interrupt Source Number used by guest OS to
+ *          identify source in the guest
+ *
+ * The EAS, END, NVT structures are not exposed.
+ */
+
+/*
+ * Linux hosts under OPAL reserve priority 7 for their own escalation
+ * interrupts (DD2.X POWER9). So we only allow the guest to use
+ * priorities [0..6].
+ */
+static bool spapr_xive_priority_is_reserved(uint8_t priority)
+{
+    switch (priority) {
+    case 0 ... 6:
+        return false;
+    case 7: /* OPAL escalation queue */
+    default:
+        return true;
+    }
+}
+
+/*
+ * The H_INT_GET_SOURCE_INFO hcall() is used to obtain the logical
+ * real address of the MMIO page through which the Event State Buffer
+ * entry associated with the value of the "lisn" parameter is managed.
+ *
+ * Parameters:
+ * Input
+ * - R4: "flags"
+ *         Bits 0-63 reserved
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ *       "ibm,xive-lisn-ranges" properties, or as returned by the
+ *       ibm,query-interrupt-source-number RTAS call, or as returned
+ *       by the H_ALLOCATE_VAS_WINDOW hcall
+ *
+ * Output
+ * - R4: "flags"
+ *         Bits 0-59: Reserved
+ *         Bit 60: H_INT_ESB must be used for Event State Buffer
+ *                 management
+ *         Bit 61: 1 == LSI  0 == MSI
+ *         Bit 62: the full function page supports trigger
+ *         Bit 63: Store EOI Supported
+ * - R5: Logical Real address of full function Event State Buffer
+ *       management page, -1 if H_INT_ESB hcall flag is set to 1.
+ * - R6: Logical Real Address of trigger only Event State Buffer
+ *       management page or -1.
+ * - R7: Power of 2 page size for the ESB management pages returned in
+ *       R5 and R6.
+ */
+
+#define SPAPR_XIVE_SRC_H_INT_ESB     PPC_BIT(60) /* ESB manage with H_INT_ESB */
+#define SPAPR_XIVE_SRC_LSI           PPC_BIT(61) /* Virtual LSI type */
+#define SPAPR_XIVE_SRC_TRIGGER       PPC_BIT(62) /* Trigger and management
+                                                    on same page */
+#define SPAPR_XIVE_SRC_STORE_EOI     PPC_BIT(63) /* Store EOI support */
+
+static target_ulong h_int_get_source_info(PowerPCCPU *cpu,
+                                          sPAPRMachineState *spapr,
+                                          target_ulong opcode,
+                                          target_ulong *args)
+{
+    sPAPRXive *xive = spapr->xive;
+    XiveSource *xsrc = &xive->source;
+    target_ulong flags  = args[0];
+    target_ulong lisn   = args[1];
+
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    if (flags) {
+        return H_PARAMETER;
+    }
+
+    if (lisn >= xive->nr_irqs) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+                      lisn);
+        return H_P2;
+    }
+
+    if (!xive_eas_is_valid(&xive->eat[lisn])) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+                      lisn);
+        return H_P2;
+    }
+
+    /*
+     * All sources are emulated under the main XIVE object and share
+     * the same characteristics.
+     */
+    args[0] = 0;
+    if (!xive_source_esb_has_2page(xsrc)) {
+        args[0] |= SPAPR_XIVE_SRC_TRIGGER;
+    }
+    if (xsrc->esb_flags & XIVE_SRC_STORE_EOI) {
+        args[0] |= SPAPR_XIVE_SRC_STORE_EOI;
+    }
+
+    /*
+     * Force the use of the H_INT_ESB hcall in case of an LSI
+     * interrupt. This is necessary under KVM to re-trigger the
+     * interrupt if the level is still asserted
+     */
+    if (xive_source_irq_is_lsi(xsrc, lisn)) {
+        args[0] |= SPAPR_XIVE_SRC_H_INT_ESB | SPAPR_XIVE_SRC_LSI;
+    }
+
+    if (!(args[0] & SPAPR_XIVE_SRC_H_INT_ESB)) {
+        args[1] = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn);
+    } else {
+        args[1] = -1;
+    }
+
+    if (xive_source_esb_has_2page(xsrc) &&
+        !(args[0] & SPAPR_XIVE_SRC_H_INT_ESB)) {
+        args[2] = xive->vc_base + xive_source_esb_page(xsrc, lisn);
+    } else {
+        args[2] = -1;
+    }
+
+    if (xive_source_esb_has_2page(xsrc)) {
+        args[3] = xsrc->esb_shift - 1;
+    } else {
+        args[3] = xsrc->esb_shift;
+    }
+
+    return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SET_SOURCE_CONFIG hcall() is used to assign a Logical
+ * Interrupt Source to a target. The Logical Interrupt Source is
+ * designated with the "lisn" parameter and the target is designated
+ * with the "target" and "priority" parameters.  Upon return from the
+ * hcall(), no additional interrupts will be directed to the old EQ.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ *         Bits 0-61: Reserved
+ *         Bit 62: set the "eisn" in the EAS
+ *         Bit 63: masks the interrupt source in the hardware interrupt
+ *       control structure. An interrupt masked by this mechanism will
+ *       be dropped, but it's source state bits will still be
+ *       set. There is no race-free way of unmasking and restoring the
+ *       source. Thus this should only be used in interrupts that are
+ *       also masked at the source, and only in cases where the
+ *       interrupt is not meant to be used for a large amount of time
+ *       because no valid target exists for it for example
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ *       "ibm,xive-lisn-ranges" properties, or as returned by the
+ *       ibm,query-interrupt-source-number RTAS call, or as returned by
+ *       the H_ALLOCATE_VAS_WINDOW hcall
+ * - R6: "target" is per "ibm,ppc-interrupt-server#s" or
+ *       "ibm,ppc-interrupt-gserver#s"
+ * - R7: "priority" is a valid priority not in
+ *       "ibm,plat-res-int-priorities"
+ * - R8: "eisn" is the guest EISN associated with the "lisn"
+ *
+ * Output:
+ * - None
+ */
+
+#define SPAPR_XIVE_SRC_SET_EISN PPC_BIT(62)
+#define SPAPR_XIVE_SRC_MASK     PPC_BIT(63)
+
+static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
+                                            sPAPRMachineState *spapr,
+                                            target_ulong opcode,
+                                            target_ulong *args)
+{
+    sPAPRXive *xive = spapr->xive;
+    XiveEAS eas, new_eas;
+    target_ulong flags    = args[0];
+    target_ulong lisn     = args[1];
+    target_ulong target   = args[2];
+    target_ulong priority = args[3];
+    target_ulong eisn     = args[4];
+    uint8_t end_blk;
+    uint32_t end_idx;
+
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    if (flags & ~(SPAPR_XIVE_SRC_SET_EISN | SPAPR_XIVE_SRC_MASK)) {
+        return H_PARAMETER;
+    }
+
+    if (lisn >= xive->nr_irqs) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+                      lisn);
+        return H_P2;
+    }
+
+    eas = xive->eat[lisn];
+    if (!xive_eas_is_valid(&eas)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+                      lisn);
+        return H_P2;
+    }
+
+    /* priority 0xff is used to reset the EAS */
+    if (priority == 0xff) {
+        new_eas.w = cpu_to_be64(EAS_VALID | EAS_MASKED);
+        goto out;
+    }
+
+    if (flags & SPAPR_XIVE_SRC_MASK) {
+        new_eas.w = eas.w | cpu_to_be64(EAS_MASKED);
+    } else {
+        new_eas.w = eas.w & cpu_to_be64(~EAS_MASKED);
+    }
+
+    if (spapr_xive_priority_is_reserved(priority)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+                      " is reserved\n", priority);
+        return H_P4;
+    }
+
+    /*
+     * Validate that "target" is part of the list of threads allocated
+     * to the partition. For that, find the END corresponding to the
+     * target.
+     */
+    if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+        return H_P3;
+    }
+
+    new_eas.w = xive_set_field64(EAS_END_BLOCK, new_eas.w, end_blk);
+    new_eas.w = xive_set_field64(EAS_END_INDEX, new_eas.w, end_idx);
+
+    if (flags & SPAPR_XIVE_SRC_SET_EISN) {
+        new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
+    }
+
+out:
+    xive->eat[lisn] = new_eas;
+    return H_SUCCESS;
+}
+
+/*
+ * The H_INT_GET_SOURCE_CONFIG hcall() is used to determine to which
+ * target/priority pair is assigned to the specified Logical Interrupt
+ * Source.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ *         Bits 0-63 Reserved
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ *       "ibm,xive-lisn-ranges" properties, or as returned by the
+ *       ibm,query-interrupt-source-number RTAS call, or as
+ *       returned by the H_ALLOCATE_VAS_WINDOW hcall
+ *
+ * Output:
+ * - R4: Target to which the specified Logical Interrupt Source is
+ *       assigned
+ * - R5: Priority to which the specified Logical Interrupt Source is
+ *       assigned
+ * - R6: EISN for the specified Logical Interrupt Source (this will be
+ *       equivalent to the LISN if not changed by H_INT_SET_SOURCE_CONFIG)
+ */
+static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
+                                            sPAPRMachineState *spapr,
+                                            target_ulong opcode,
+                                            target_ulong *args)
+{
+    sPAPRXive *xive = spapr->xive;
+    target_ulong flags = args[0];
+    target_ulong lisn = args[1];
+    XiveEAS eas;
+    XiveEND *end;
+    uint8_t nvt_blk;
+    uint32_t end_idx, nvt_idx;
+
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    if (flags) {
+        return H_PARAMETER;
+    }
+
+    if (lisn >= xive->nr_irqs) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+                      lisn);
+        return H_P2;
+    }
+
+    eas = xive->eat[lisn];
+    if (!xive_eas_is_valid(&eas)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+                      lisn);
+        return H_P2;
+    }
+
+    /* EAS_END_BLOCK is unused on sPAPR */
+    end_idx = xive_get_field64(EAS_END_INDEX, eas.w);
+
+    assert(end_idx < xive->nr_ends);
+    end = &xive->endt[end_idx];
+
+    nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
+    nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end->w6);
+    args[0] = spapr_xive_nvt_to_target(nvt_blk, nvt_idx);
+
+    if (xive_eas_is_masked(&eas)) {
+        args[1] = 0xff;
+    } else {
+        args[1] = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
+    }
+
+    args[2] = xive_get_field64(EAS_END_DATA, eas.w);
+
+    return H_SUCCESS;
+}
+
+/*
+ * The H_INT_GET_QUEUE_INFO hcall() is used to get the logical real
+ * address of the notification management page associated with the
+ * specified target and priority.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ *         Bits 0-63 Reserved
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ *       "ibm,ppc-interrupt-gserver#s"
+ * - R6: "priority" is a valid priority not in
+ *       "ibm,plat-res-int-priorities"
+ *
+ * Output:
+ * - R4: Logical real address of notification page
+ * - R5: Power of 2 page size of the notification page
+ */
+static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
+                                         sPAPRMachineState *spapr,
+                                         target_ulong opcode,
+                                         target_ulong *args)
+{
+    sPAPRXive *xive = spapr->xive;
+    XiveENDSource *end_xsrc = &xive->end_source;
+    target_ulong flags = args[0];
+    target_ulong target = args[1];
+    target_ulong priority = args[2];
+    XiveEND *end;
+    uint8_t end_blk;
+    uint32_t end_idx;
+
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    if (flags) {
+        return H_PARAMETER;
+    }
+
+    /*
+     * H_STATE should be returned if a H_INT_RESET is in progress.
+     * This is not needed when running the emulation under QEMU
+     */
+
+    if (spapr_xive_priority_is_reserved(priority)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+                      " is reserved\n", priority);
+        return H_P3;
+    }
+
+    /*
+     * Validate that "target" is part of the list of threads allocated
+     * to the partition. For that, find the END corresponding to the
+     * target.
+     */
+    if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+        return H_P2;
+    }
+
+    assert(end_idx < xive->nr_ends);
+    end = &xive->endt[end_idx];
+
+    args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx;
+    if (xive_end_is_enqueue(end)) {
+        args[1] = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
+    } else {
+        args[1] = 0;
+    }
+
+    return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SET_QUEUE_CONFIG hcall() is used to set or reset a EQ for
+ * a given "target" and "priority".  It is also used to set the
+ * notification config associated with the EQ.  An EQ size of 0 is
+ * used to reset the EQ config for a given target and priority. If
+ * resetting the EQ config, the END associated with the given "target"
+ * and "priority" will be changed to disable queueing.
+ *
+ * Upon return from the hcall(), no additional interrupts will be
+ * directed to the old EQ (if one was set). The old EQ (if one was
+ * set) should be investigated for interrupts that occurred prior to
+ * or during the hcall().
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ *         Bits 0-62: Reserved
+ *         Bit 63: Unconditional Notify (n) per the XIVE spec
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ *       "ibm,ppc-interrupt-gserver#s"
+ * - R6: "priority" is a valid priority not in
+ *       "ibm,plat-res-int-priorities"
+ * - R7: "eventQueue": The logical real address of the start of the EQ
+ * - R8: "eventQueueSize": The power of 2 EQ size per "ibm,xive-eq-sizes"
+ *
+ * Output:
+ * - None
+ */
+
+#define SPAPR_XIVE_END_ALWAYS_NOTIFY PPC_BIT(63)
+
+static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
+                                           sPAPRMachineState *spapr,
+                                           target_ulong opcode,
+                                           target_ulong *args)
+{
+    sPAPRXive *xive = spapr->xive;
+    target_ulong flags = args[0];
+    target_ulong target = args[1];
+    target_ulong priority = args[2];
+    target_ulong qpage = args[3];
+    target_ulong qsize = args[4];
+    XiveEND end;
+    uint8_t end_blk, nvt_blk;
+    uint32_t end_idx, nvt_idx;
+
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    if (flags & ~SPAPR_XIVE_END_ALWAYS_NOTIFY) {
+        return H_PARAMETER;
+    }
+
+    /*
+     * H_STATE should be returned if a H_INT_RESET is in progress.
+     * This is not needed when running the emulation under QEMU
+     */
+
+    if (spapr_xive_priority_is_reserved(priority)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+                      " is reserved\n", priority);
+        return H_P3;
+    }
+
+    /*
+     * Validate that "target" is part of the list of threads allocated
+     * to the partition. For that, find the END corresponding to the
+     * target.
+     */
+
+    if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+        return H_P2;
+    }
+
+    assert(end_idx < xive->nr_ends);
+    memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND));
+
+    switch (qsize) {
+    case 12:
+    case 16:
+    case 21:
+    case 24:
+        end.w2 = cpu_to_be32((qpage >> 32) & 0x0fffffff);
+        end.w3 = cpu_to_be32(qpage & 0xffffffff);
+        end.w0 |= cpu_to_be32(END_W0_ENQUEUE);
+        end.w0 = xive_set_field32(END_W0_QSIZE, end.w0, qsize - 12);
+        break;
+    case 0:
+        /* reset queue and disable queueing */
+        spapr_xive_end_reset(&end);
+        goto out;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EQ size %"PRIx64"\n",
+                      qsize);
+        return H_P5;
+    }
+
+    if (qsize) {
+        hwaddr plen = 1 << qsize;
+        void *eq;
+
+        /*
+         * Validate the guest EQ. We should also check that the queue
+         * has been zeroed by the OS.
+         */
+        eq = address_space_map(CPU(cpu)->as, qpage, &plen, true,
+                               MEMTXATTRS_UNSPECIFIED);
+        if (plen != 1 << qsize) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to map EQ @0x%"
+                          HWADDR_PRIx "\n", qpage);
+            return H_P4;
+        }
+        address_space_unmap(CPU(cpu)->as, eq, plen, true, plen);
+    }
+
+    /* "target" should have been validated above */
+    if (spapr_xive_target_to_nvt(target, &nvt_blk, &nvt_idx)) {
+        g_assert_not_reached();
+    }
+
+    /*
+     * Ensure the priority and target are correctly set (they will not
+     * be right after allocation)
+     */
+    end.w6 = xive_set_field32(END_W6_NVT_BLOCK, 0ul, nvt_blk) |
+        xive_set_field32(END_W6_NVT_INDEX, 0ul, nvt_idx);
+    end.w7 = xive_set_field32(END_W7_F0_PRIORITY, 0ul, priority);
+
+    if (flags & SPAPR_XIVE_END_ALWAYS_NOTIFY) {
+        end.w0 |= cpu_to_be32(END_W0_UCOND_NOTIFY);
+    } else {
+        end.w0 &= cpu_to_be32((uint32_t)~END_W0_UCOND_NOTIFY);
+    }
+
+    /*
+     * The generation bit for the END starts at 1 and The END page
+     * offset counter starts at 0.
+     */
+    end.w1 = cpu_to_be32(END_W1_GENERATION) |
+        xive_set_field32(END_W1_PAGE_OFF, 0ul, 0ul);
+    end.w0 |= cpu_to_be32(END_W0_VALID);
+
+    /*
+     * TODO: issue syncs required to ensure all in-flight interrupts
+     * are complete on the old END
+     */
+
+out:
+    /* Update END */
+    memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND));
+    return H_SUCCESS;
+}
+
+/*
+ * The H_INT_GET_QUEUE_CONFIG hcall() is used to get a EQ for a given
+ * target and priority.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ *         Bits 0-62: Reserved
+ *         Bit 63: Debug: Return debug data
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ *       "ibm,ppc-interrupt-gserver#s"
+ * - R6: "priority" is a valid priority not in
+ *       "ibm,plat-res-int-priorities"
+ *
+ * Output:
+ * - R4: "flags":
+ *       Bits 0-61: Reserved
+ *       Bit 62: The value of Event Queue Generation Number (g) per
+ *              the XIVE spec if "Debug" = 1
+ *       Bit 63: The value of Unconditional Notify (n) per the XIVE spec
+ * - R5: The logical real address of the start of the EQ
+ * - R6: The power of 2 EQ size per "ibm,xive-eq-sizes"
+ * - R7: The value of Event Queue Offset Counter per XIVE spec
+ *       if "Debug" = 1, else 0
+ *
+ */
+
+#define SPAPR_XIVE_END_DEBUG     PPC_BIT(63)
+
+static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
+                                           sPAPRMachineState *spapr,
+                                           target_ulong opcode,
+                                           target_ulong *args)
+{
+    sPAPRXive *xive = spapr->xive;
+    target_ulong flags = args[0];
+    target_ulong target = args[1];
+    target_ulong priority = args[2];
+    XiveEND *end;
+    uint8_t end_blk;
+    uint32_t end_idx;
+
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    if (flags & ~SPAPR_XIVE_END_DEBUG) {
+        return H_PARAMETER;
+    }
+
+    /*
+     * H_STATE should be returned if a H_INT_RESET is in progress.
+     * This is not needed when running the emulation under QEMU
+     */
+
+    if (spapr_xive_priority_is_reserved(priority)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+                      " is reserved\n", priority);
+        return H_P3;
+    }
+
+    /*
+     * Validate that "target" is part of the list of threads allocated
+     * to the partition. For that, find the END corresponding to the
+     * target.
+     */
+    if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+        return H_P2;
+    }
+
+    assert(end_idx < xive->nr_ends);
+    end = &xive->endt[end_idx];
+
+    args[0] = 0;
+    if (xive_end_is_notify(end)) {
+        args[0] |= SPAPR_XIVE_END_ALWAYS_NOTIFY;
+    }
+
+    if (xive_end_is_enqueue(end)) {
+        args[1] = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+            | be32_to_cpu(end->w3);
+        args[2] = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
+    } else {
+        args[1] = 0;
+        args[2] = 0;
+    }
+
+    /* TODO: do we need any locking on the END ? */
+    if (flags & SPAPR_XIVE_END_DEBUG) {
+        /* Load the event queue generation number into the return flags */
+        args[0] |= (uint64_t)xive_get_field32(END_W1_GENERATION, end->w1) << 62;
+
+        /* Load R7 with the event queue offset counter */
+        args[3] = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+    } else {
+        args[3] = 0;
+    }
+
+    return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SET_OS_REPORTING_LINE hcall() is used to set the
+ * reporting cache line pair for the calling thread.  The reporting
+ * cache lines will contain the OS interrupt context when the OS
+ * issues a CI store byte to @TIMA+0xC10 to acknowledge the OS
+ * interrupt. The reporting cache lines can be reset by inputting -1
+ * in "reportingLine".  Issuing the CI store byte without reporting
+ * cache lines registered will result in the data not being accessible
+ * to the OS.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ *         Bits 0-63: Reserved
+ * - R5: "reportingLine": The logical real address of the reporting cache
+ *       line pair
+ *
+ * Output:
+ * - None
+ */
+static target_ulong h_int_set_os_reporting_line(PowerPCCPU *cpu,
+                                                sPAPRMachineState *spapr,
+                                                target_ulong opcode,
+                                                target_ulong *args)
+{
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    /*
+     * H_STATE should be returned if a H_INT_RESET is in progress.
+     * This is not needed when running the emulation under QEMU
+     */
+
+    /* TODO: H_INT_SET_OS_REPORTING_LINE */
+    return H_FUNCTION;
+}
+
+/*
+ * The H_INT_GET_OS_REPORTING_LINE hcall() is used to get the logical
+ * real address of the reporting cache line pair set for the input
+ * "target".  If no reporting cache line pair has been set, -1 is
+ * returned.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ *         Bits 0-63: Reserved
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ *       "ibm,ppc-interrupt-gserver#s"
+ * - R6: "reportingLine": The logical real address of the reporting
+ *        cache line pair
+ *
+ * Output:
+ * - R4: The logical real address of the reporting line if set, else -1
+ */
+static target_ulong h_int_get_os_reporting_line(PowerPCCPU *cpu,
+                                                sPAPRMachineState *spapr,
+                                                target_ulong opcode,
+                                                target_ulong *args)
+{
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    /*
+     * H_STATE should be returned if a H_INT_RESET is in progress.
+     * This is not needed when running the emulation under QEMU
+     */
+
+    /* TODO: H_INT_GET_OS_REPORTING_LINE */
+    return H_FUNCTION;
+}
+
+/*
+ * The H_INT_ESB hcall() is used to issue a load or store to the ESB
+ * page for the input "lisn".  This hcall is only supported for LISNs
+ * that have the ESB hcall flag set to 1 when returned from hcall()
+ * H_INT_GET_SOURCE_INFO.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ *         Bits 0-62: Reserved
+ *         bit 63: Store: Store=1, store operation, else load operation
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ *       "ibm,xive-lisn-ranges" properties, or as returned by the
+ *       ibm,query-interrupt-source-number RTAS call, or as
+ *       returned by the H_ALLOCATE_VAS_WINDOW hcall
+ * - R6: "esbOffset" is the offset into the ESB page for the load or
+ *       store operation
+ * - R7: "storeData" is the data to write for a store operation
+ *
+ * Output:
+ * - R4: The value of the load if load operation, else -1
+ */
+
+#define SPAPR_XIVE_ESB_STORE PPC_BIT(63)
+
+static target_ulong h_int_esb(PowerPCCPU *cpu,
+                              sPAPRMachineState *spapr,
+                              target_ulong opcode,
+                              target_ulong *args)
+{
+    sPAPRXive *xive = spapr->xive;
+    XiveEAS eas;
+    target_ulong flags  = args[0];
+    target_ulong lisn   = args[1];
+    target_ulong offset = args[2];
+    target_ulong data   = args[3];
+    hwaddr mmio_addr;
+    XiveSource *xsrc = &xive->source;
+
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    if (flags & ~SPAPR_XIVE_ESB_STORE) {
+        return H_PARAMETER;
+    }
+
+    if (lisn >= xive->nr_irqs) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+                      lisn);
+        return H_P2;
+    }
+
+    eas = xive->eat[lisn];
+    if (!xive_eas_is_valid(&eas)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+                      lisn);
+        return H_P2;
+    }
+
+    if (offset > (1ull << xsrc->esb_shift)) {
+        return H_P3;
+    }
+
+    mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
+
+    if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
+                      (flags & SPAPR_XIVE_ESB_STORE))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
+                      HWADDR_PRIx "\n", mmio_addr);
+        return H_HARDWARE;
+    }
+    args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
+    return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SYNC hcall() is used to issue hardware syncs that will
+ * ensure any in flight events for the input lisn are in the event
+ * queue.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ *         Bits 0-63: Reserved
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ *       "ibm,xive-lisn-ranges" properties, or as returned by the
+ *       ibm,query-interrupt-source-number RTAS call, or as
+ *       returned by the H_ALLOCATE_VAS_WINDOW hcall
+ *
+ * Output:
+ * - None
+ */
+static target_ulong h_int_sync(PowerPCCPU *cpu,
+                               sPAPRMachineState *spapr,
+                               target_ulong opcode,
+                               target_ulong *args)
+{
+    sPAPRXive *xive = spapr->xive;
+    XiveEAS eas;
+    target_ulong flags = args[0];
+    target_ulong lisn = args[1];
+
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    if (flags) {
+        return H_PARAMETER;
+    }
+
+    if (lisn >= xive->nr_irqs) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+                      lisn);
+        return H_P2;
+    }
+
+    eas = xive->eat[lisn];
+    if (!xive_eas_is_valid(&eas)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+                      lisn);
+        return H_P2;
+    }
+
+    /*
+     * H_STATE should be returned if a H_INT_RESET is in progress.
+     * This is not needed when running the emulation under QEMU
+     */
+
+    /* This is not real hardware. Nothing to be done */
+    return H_SUCCESS;
+}
+
+/*
+ * The H_INT_RESET hcall() is used to reset all of the partition's
+ * interrupt exploitation structures to their initial state.  This
+ * means losing all previously set interrupt state set via
+ * H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ *         Bits 0-63: Reserved
+ *
+ * Output:
+ * - None
+ */
+static target_ulong h_int_reset(PowerPCCPU *cpu,
+                                sPAPRMachineState *spapr,
+                                target_ulong opcode,
+                                target_ulong *args)
+{
+    sPAPRXive *xive = spapr->xive;
+    target_ulong flags   = args[0];
+
+    if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        return H_FUNCTION;
+    }
+
+    if (flags) {
+        return H_PARAMETER;
+    }
+
+    device_reset(DEVICE(xive));
+    return H_SUCCESS;
+}
+
+void spapr_xive_hcall_init(sPAPRMachineState *spapr)
+{
+    spapr_register_hypercall(H_INT_GET_SOURCE_INFO, h_int_get_source_info);
+    spapr_register_hypercall(H_INT_SET_SOURCE_CONFIG, h_int_set_source_config);
+    spapr_register_hypercall(H_INT_GET_SOURCE_CONFIG, h_int_get_source_config);
+    spapr_register_hypercall(H_INT_GET_QUEUE_INFO, h_int_get_queue_info);
+    spapr_register_hypercall(H_INT_SET_QUEUE_CONFIG, h_int_set_queue_config);
+    spapr_register_hypercall(H_INT_GET_QUEUE_CONFIG, h_int_get_queue_config);
+    spapr_register_hypercall(H_INT_SET_OS_REPORTING_LINE,
+                             h_int_set_os_reporting_line);
+    spapr_register_hypercall(H_INT_GET_OS_REPORTING_LINE,
+                             h_int_get_os_reporting_line);
+    spapr_register_hypercall(H_INT_ESB, h_int_esb);
+    spapr_register_hypercall(H_INT_SYNC, h_int_sync);
+    spapr_register_hypercall(H_INT_RESET, h_int_reset);
+}
+
+void spapr_dt_xive(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt,
+                   uint32_t phandle)
+{
+    sPAPRXive *xive = spapr->xive;
+    int node;
+    uint64_t timas[2 * 2];
+    /* Interrupt number ranges for the IPIs */
+    uint32_t lisn_ranges[] = {
+        cpu_to_be32(0),
+        cpu_to_be32(nr_servers),
+    };
+    /*
+     * EQ size - the sizes of pages supported by the system 4K, 64K,
+     * 2M, 16M. We only advertise 64K for the moment.
+     */
+    uint32_t eq_sizes[] = {
+        cpu_to_be32(16), /* 64K */
+    };
+    /*
+     * The following array is in sync with the reserved priorities
+     * defined by the 'spapr_xive_priority_is_reserved' routine.
+     */
+    uint32_t plat_res_int_priorities[] = {
+        cpu_to_be32(7),    /* start */
+        cpu_to_be32(0xf8), /* count */
+    };
+    gchar *nodename;
+
+    /* Thread Interrupt Management Area : User (ring 3) and OS (ring 2) */
+    timas[0] = cpu_to_be64(xive->tm_base +
+                           XIVE_TM_USER_PAGE * (1ull << TM_SHIFT));
+    timas[1] = cpu_to_be64(1ull << TM_SHIFT);
+    timas[2] = cpu_to_be64(xive->tm_base +
+                           XIVE_TM_OS_PAGE * (1ull << TM_SHIFT));
+    timas[3] = cpu_to_be64(1ull << TM_SHIFT);
+
+    nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
+                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
+    _FDT(node = fdt_add_subnode(fdt, 0, nodename));
+    g_free(nodename);
+
+    _FDT(fdt_setprop_string(fdt, node, "device_type", "power-ivpe"));
+    _FDT(fdt_setprop(fdt, node, "reg", timas, sizeof(timas)));
+
+    _FDT(fdt_setprop_string(fdt, node, "compatible", "ibm,power-ivpe"));
+    _FDT(fdt_setprop(fdt, node, "ibm,xive-eq-sizes", eq_sizes,
+                     sizeof(eq_sizes)));
+    _FDT(fdt_setprop(fdt, node, "ibm,xive-lisn-ranges", lisn_ranges,
+                     sizeof(lisn_ranges)));
+
+    /* For Linux to link the LSIs to the interrupt controller. */
+    _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0));
+    _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2));
+
+    /* For SLOF */
+    _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
+    _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
+
+    /*
+     * The "ibm,plat-res-int-priorities" property defines the priority
+     * ranges reserved by the hypervisor
+     */
+    _FDT(fdt_setprop(fdt, 0, "ibm,plat-res-int-priorities",
+                     plat_res_int_priorities, sizeof(plat_res_int_priorities)));
+}
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 2e27b92b87..f67d3c80bf 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -244,7 +244,8 @@ void xics_spapr_init(sPAPRMachineState *spapr)
     spapr_register_hypercall(H_IPOLL, h_ipoll);
 }
 
-void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle)
+void spapr_dt_xics(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt,
+                   uint32_t phandle)
 {
     uint32_t interrupt_server_ranges_prop[] = {
         0, cpu_to_be32(nr_servers),
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
new file mode 100644
index 0000000000..ea33494338
--- /dev/null
+++ b/hw/intc/xive.c
@@ -0,0 +1,1599 @@
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "sysemu/dma.h"
+#include "hw/qdev-properties.h"
+#include "monitor/monitor.h"
+#include "hw/ppc/xive.h"
+#include "hw/ppc/xive_regs.h"
+
+/*
+ * XIVE Thread Interrupt Management context
+ */
+
+/*
+ * Convert a priority number to an Interrupt Pending Buffer (IPB)
+ * register, which indicates a pending interrupt at the priority
+ * corresponding to the bit number
+ */
+static uint8_t priority_to_ipb(uint8_t priority)
+{
+    return priority > XIVE_PRIORITY_MAX ?
+        0 : 1 << (XIVE_PRIORITY_MAX - priority);
+}
+
+/*
+ * Convert an Interrupt Pending Buffer (IPB) register to a Pending
+ * Interrupt Priority Register (PIPR), which contains the priority of
+ * the most favored pending notification.
+ */
+static uint8_t ipb_to_pipr(uint8_t ibp)
+{
+    return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
+}
+
+static void ipb_update(uint8_t *regs, uint8_t priority)
+{
+    regs[TM_IPB] |= priority_to_ipb(priority);
+    regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
+}
+
+static uint8_t exception_mask(uint8_t ring)
+{
+    switch (ring) {
+    case TM_QW1_OS:
+        return TM_QW1_NSR_EO;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
+{
+    uint8_t *regs = &tctx->regs[ring];
+    uint8_t nsr = regs[TM_NSR];
+    uint8_t mask = exception_mask(ring);
+
+    qemu_irq_lower(tctx->output);
+
+    if (regs[TM_NSR] & mask) {
+        uint8_t cppr = regs[TM_PIPR];
+
+        regs[TM_CPPR] = cppr;
+
+        /* Reset the pending buffer bit */
+        regs[TM_IPB] &= ~priority_to_ipb(cppr);
+        regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
+
+        /* Drop Exception bit */
+        regs[TM_NSR] &= ~mask;
+    }
+
+    return (nsr << 8) | regs[TM_CPPR];
+}
+
+static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
+{
+    uint8_t *regs = &tctx->regs[ring];
+
+    if (regs[TM_PIPR] < regs[TM_CPPR]) {
+        regs[TM_NSR] |= exception_mask(ring);
+        qemu_irq_raise(tctx->output);
+    }
+}
+
+static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
+{
+    if (cppr > XIVE_PRIORITY_MAX) {
+        cppr = 0xff;
+    }
+
+    tctx->regs[ring + TM_CPPR] = cppr;
+
+    /* CPPR has changed, check if we need to raise a pending exception */
+    xive_tctx_notify(tctx, ring);
+}
+
+/*
+ * XIVE Thread Interrupt Management Area (TIMA)
+ */
+
+/*
+ * Define an access map for each page of the TIMA that we will use in
+ * the memory region ops to filter values when doing loads and stores
+ * of raw registers values
+ *
+ * Registers accessibility bits :
+ *
+ *    0x0 - no access
+ *    0x1 - write only
+ *    0x2 - read only
+ *    0x3 - read/write
+ */
+
+static const uint8_t xive_tm_hw_view[] = {
+    /* QW-0 User */   3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0,
+    /* QW-1 OS   */   3, 3, 3, 3,   3, 3, 0, 3,   3, 3, 3, 3,   0, 0, 0, 0,
+    /* QW-2 POOL */   0, 0, 3, 3,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0,
+    /* QW-3 PHYS */   3, 3, 3, 3,   0, 3, 0, 3,   3, 0, 0, 3,   3, 3, 3, 0,
+};
+
+static const uint8_t xive_tm_hv_view[] = {
+    /* QW-0 User */   3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0,
+    /* QW-1 OS   */   3, 3, 3, 3,   3, 3, 0, 3,   3, 3, 3, 3,   0, 0, 0, 0,
+    /* QW-2 POOL */   0, 0, 3, 3,   0, 0, 0, 0,   0, 3, 3, 3,   0, 0, 0, 0,
+    /* QW-3 PHYS */   3, 3, 3, 3,   0, 3, 0, 3,   3, 0, 0, 3,   0, 0, 0, 0,
+};
+
+static const uint8_t xive_tm_os_view[] = {
+    /* QW-0 User */   3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0,
+    /* QW-1 OS   */   2, 3, 2, 2,   2, 2, 0, 2,   0, 0, 0, 0,   0, 0, 0, 0,
+    /* QW-2 POOL */   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
+    /* QW-3 PHYS */   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
+};
+
+static const uint8_t xive_tm_user_view[] = {
+    /* QW-0 User */   3, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
+    /* QW-1 OS   */   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
+    /* QW-2 POOL */   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
+    /* QW-3 PHYS */   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
+};
+
+/*
+ * Overall TIMA access map for the thread interrupt management context
+ * registers
+ */
+static const uint8_t *xive_tm_views[] = {
+    [XIVE_TM_HW_PAGE]   = xive_tm_hw_view,
+    [XIVE_TM_HV_PAGE]   = xive_tm_hv_view,
+    [XIVE_TM_OS_PAGE]   = xive_tm_os_view,
+    [XIVE_TM_USER_PAGE] = xive_tm_user_view,
+};
+
+/*
+ * Computes a register access mask for a given offset in the TIMA
+ */
+static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
+{
+    uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
+    uint8_t reg_offset = offset & 0x3F;
+    uint8_t reg_mask = write ? 0x1 : 0x2;
+    uint64_t mask = 0x0;
+    int i;
+
+    for (i = 0; i < size; i++) {
+        if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
+            mask |= (uint64_t) 0xff << (8 * (size - i - 1));
+        }
+    }
+
+    return mask;
+}
+
+static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
+                              unsigned size)
+{
+    uint8_t ring_offset = offset & 0x30;
+    uint8_t reg_offset = offset & 0x3F;
+    uint64_t mask = xive_tm_mask(offset, size, true);
+    int i;
+
+    /*
+     * Only 4 or 8 bytes stores are allowed and the User ring is
+     * excluded
+     */
+    if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
+                      HWADDR_PRIx"\n", offset);
+        return;
+    }
+
+    /*
+     * Use the register offset for the raw values and filter out
+     * reserved values
+     */
+    for (i = 0; i < size; i++) {
+        uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
+        if (byte_mask) {
+            tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
+                byte_mask;
+        }
+    }
+}
+
+static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
+{
+    uint8_t ring_offset = offset & 0x30;
+    uint8_t reg_offset = offset & 0x3F;
+    uint64_t mask = xive_tm_mask(offset, size, false);
+    uint64_t ret;
+    int i;
+
+    /*
+     * Only 4 or 8 bytes loads are allowed and the User ring is
+     * excluded
+     */
+    if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
+                      HWADDR_PRIx"\n", offset);
+        return -1;
+    }
+
+    /* Use the register offset for the raw values */
+    ret = 0;
+    for (i = 0; i < size; i++) {
+        ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
+    }
+
+    /* filter out reserved values */
+    return ret & mask;
+}
+
+/*
+ * The TM context is mapped twice within each page. Stores and loads
+ * to the first mapping below 2K write and read the specified values
+ * without modification. The second mapping above 2K performs specific
+ * state changes (side effects) in addition to setting/returning the
+ * interrupt management area context of the processor thread.
+ */
+static uint64_t xive_tm_ack_os_reg(XiveTCTX *tctx, hwaddr offset, unsigned size)
+{
+    return xive_tctx_accept(tctx, TM_QW1_OS);
+}
+
+static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
+                                uint64_t value, unsigned size)
+{
+    xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
+}
+
+/*
+ * Adjust the IPB to allow a CPU to process event queues of other
+ * priorities during one physical interrupt cycle.
+ */
+static void xive_tm_set_os_pending(XiveTCTX *tctx, hwaddr offset,
+                                   uint64_t value, unsigned size)
+{
+    ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff);
+    xive_tctx_notify(tctx, TM_QW1_OS);
+}
+
+/*
+ * Define a mapping of "special" operations depending on the TIMA page
+ * offset and the size of the operation.
+ */
+typedef struct XiveTmOp {
+    uint8_t  page_offset;
+    uint32_t op_offset;
+    unsigned size;
+    void     (*write_handler)(XiveTCTX *tctx, hwaddr offset, uint64_t value,
+                              unsigned size);
+    uint64_t (*read_handler)(XiveTCTX *tctx, hwaddr offset, unsigned size);
+} XiveTmOp;
+
+static const XiveTmOp xive_tm_operations[] = {
+    /*
+     * MMIOs below 2K : raw values and special operations without side
+     * effects
+     */
+    { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,   1, xive_tm_set_os_cppr, NULL },
+
+    /* MMIOs above 2K : special operations with side effects */
+    { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,     2, NULL, xive_tm_ack_os_reg },
+    { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
+};
+
+static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
+{
+    uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
+    uint32_t op_offset = offset & 0xFFF;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) {
+        const XiveTmOp *xto = &xive_tm_operations[i];
+
+        /* Accesses done from a more privileged TIMA page is allowed */
+        if (xto->page_offset >= page_offset &&
+            xto->op_offset == op_offset &&
+            xto->size == size &&
+            ((write && xto->write_handler) || (!write && xto->read_handler))) {
+            return xto;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * TIMA MMIO handlers
+ */
+static void xive_tm_write(void *opaque, hwaddr offset,
+                          uint64_t value, unsigned size)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+    XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
+    const XiveTmOp *xto;
+
+    /*
+     * TODO: check V bit in Q[0-3]W2, check PTER bit associated with CPU
+     */
+
+    /*
+     * First, check for special operations in the 2K region
+     */
+    if (offset & 0x800) {
+        xto = xive_tm_find_op(offset, size, true);
+        if (!xto) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA"
+                          "@%"HWADDR_PRIx"\n", offset);
+        } else {
+            xto->write_handler(tctx, offset, value, size);
+        }
+        return;
+    }
+
+    /*
+     * Then, for special operations in the region below 2K.
+     */
+    xto = xive_tm_find_op(offset, size, true);
+    if (xto) {
+        xto->write_handler(tctx, offset, value, size);
+        return;
+    }
+
+    /*
+     * Finish with raw access to the register values
+     */
+    xive_tm_raw_write(tctx, offset, value, size);
+}
+
+static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+    XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
+    const XiveTmOp *xto;
+
+    /*
+     * TODO: check V bit in Q[0-3]W2, check PTER bit associated with CPU
+     */
+
+    /*
+     * First, check for special operations in the 2K region
+     */
+    if (offset & 0x800) {
+        xto = xive_tm_find_op(offset, size, false);
+        if (!xto) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
+                          "@%"HWADDR_PRIx"\n", offset);
+            return -1;
+        }
+        return xto->read_handler(tctx, offset, size);
+    }
+
+    /*
+     * Then, for special operations in the region below 2K.
+     */
+    xto = xive_tm_find_op(offset, size, false);
+    if (xto) {
+        return xto->read_handler(tctx, offset, size);
+    }
+
+    /*
+     * Finish with raw access to the register values
+     */
+    return xive_tm_raw_read(tctx, offset, size);
+}
+
+const MemoryRegionOps xive_tm_ops = {
+    .read = xive_tm_read,
+    .write = xive_tm_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+};
+
+static inline uint32_t xive_tctx_word2(uint8_t *ring)
+{
+    return *((uint32_t *) &ring[TM_WORD2]);
+}
+
+static char *xive_tctx_ring_print(uint8_t *ring)
+{
+    uint32_t w2 = xive_tctx_word2(ring);
+
+    return g_strdup_printf("%02x   %02x  %02x    %02x   %02x  "
+                   "%02x  %02x   %02x  %08x",
+                   ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
+                   ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
+                   be32_to_cpu(w2));
+}
+
+static const char * const xive_tctx_ring_names[] = {
+    "USER", "OS", "POOL", "PHYS",
+};
+
+void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
+{
+    int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
+    int i;
+
+    monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
+                   "  W2\n", cpu_index);
+
+    for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
+        char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
+        monitor_printf(mon, "CPU[%04x]: %4s    %s\n", cpu_index,
+                       xive_tctx_ring_names[i], s);
+        g_free(s);
+    }
+}
+
+static void xive_tctx_reset(void *dev)
+{
+    XiveTCTX *tctx = XIVE_TCTX(dev);
+
+    memset(tctx->regs, 0, sizeof(tctx->regs));
+
+    /* Set some defaults */
+    tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
+    tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
+    tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
+
+    /*
+     * Initialize PIPR to 0xFF to avoid phantom interrupts when the
+     * CPPR is first set.
+     */
+    tctx->regs[TM_QW1_OS + TM_PIPR] =
+        ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
+}
+
+static void xive_tctx_realize(DeviceState *dev, Error **errp)
+{
+    XiveTCTX *tctx = XIVE_TCTX(dev);
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+    Object *obj;
+    Error *local_err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "cpu", &local_err);
+    if (!obj) {
+        error_propagate(errp, local_err);
+        error_prepend(errp, "required link 'cpu' not found: ");
+        return;
+    }
+
+    cpu = POWERPC_CPU(obj);
+    tctx->cs = CPU(obj);
+
+    env = &cpu->env;
+    switch (PPC_INPUT(env)) {
+    case PPC_FLAGS_INPUT_POWER7:
+        tctx->output = env->irq_inputs[POWER7_INPUT_INT];
+        break;
+
+    default:
+        error_setg(errp, "XIVE interrupt controller does not support "
+                   "this CPU bus model");
+        return;
+    }
+
+    qemu_register_reset(xive_tctx_reset, dev);
+}
+
+static void xive_tctx_unrealize(DeviceState *dev, Error **errp)
+{
+    qemu_unregister_reset(xive_tctx_reset, dev);
+}
+
+static const VMStateDescription vmstate_xive_tctx = {
+    .name = TYPE_XIVE_TCTX,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_BUFFER(regs, XiveTCTX),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void xive_tctx_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "XIVE Interrupt Thread Context";
+    dc->realize = xive_tctx_realize;
+    dc->unrealize = xive_tctx_unrealize;
+    dc->vmsd = &vmstate_xive_tctx;
+}
+
+static const TypeInfo xive_tctx_info = {
+    .name          = TYPE_XIVE_TCTX,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(XiveTCTX),
+    .class_init    = xive_tctx_class_init,
+};
+
+Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp)
+{
+    Error *local_err = NULL;
+    Object *obj;
+
+    obj = object_new(TYPE_XIVE_TCTX);
+    object_property_add_child(cpu, TYPE_XIVE_TCTX, obj, &error_abort);
+    object_unref(obj);
+    object_property_add_const_link(obj, "cpu", cpu, &error_abort);
+    object_property_set_bool(obj, true, "realized", &local_err);
+    if (local_err) {
+        goto error;
+    }
+
+    return obj;
+
+error:
+    object_unparent(obj);
+    error_propagate(errp, local_err);
+    return NULL;
+}
+
+/*
+ * XIVE ESB helpers
+ */
+
+static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
+{
+    uint8_t old_pq = *pq & 0x3;
+
+    *pq &= ~0x3;
+    *pq |= value & 0x3;
+
+    return old_pq;
+}
+
+static bool xive_esb_trigger(uint8_t *pq)
+{
+    uint8_t old_pq = *pq & 0x3;
+
+    switch (old_pq) {
+    case XIVE_ESB_RESET:
+        xive_esb_set(pq, XIVE_ESB_PENDING);
+        return true;
+    case XIVE_ESB_PENDING:
+    case XIVE_ESB_QUEUED:
+        xive_esb_set(pq, XIVE_ESB_QUEUED);
+        return false;
+    case XIVE_ESB_OFF:
+        xive_esb_set(pq, XIVE_ESB_OFF);
+        return false;
+    default:
+         g_assert_not_reached();
+    }
+}
+
+static bool xive_esb_eoi(uint8_t *pq)
+{
+    uint8_t old_pq = *pq & 0x3;
+
+    switch (old_pq) {
+    case XIVE_ESB_RESET:
+    case XIVE_ESB_PENDING:
+        xive_esb_set(pq, XIVE_ESB_RESET);
+        return false;
+    case XIVE_ESB_QUEUED:
+        xive_esb_set(pq, XIVE_ESB_PENDING);
+        return true;
+    case XIVE_ESB_OFF:
+        xive_esb_set(pq, XIVE_ESB_OFF);
+        return false;
+    default:
+         g_assert_not_reached();
+    }
+}
+
+/*
+ * XIVE Interrupt Source (or IVSE)
+ */
+
+uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+
+    return xsrc->status[srcno] & 0x3;
+}
+
+uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
+{
+    assert(srcno < xsrc->nr_irqs);
+
+    return xive_esb_set(&xsrc->status[srcno], pq);
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
+{
+    uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
+
+    xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
+
+    switch (old_pq) {
+    case XIVE_ESB_RESET:
+        xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING);
+        return true;
+    default:
+        return false;
+    }
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
+{
+    bool ret;
+
+    assert(srcno < xsrc->nr_irqs);
+
+    ret = xive_esb_trigger(&xsrc->status[srcno]);
+
+    if (xive_source_irq_is_lsi(xsrc, srcno) &&
+        xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "XIVE: queued an event on LSI IRQ %d\n", srcno);
+    }
+
+    return ret;
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
+{
+    bool ret;
+
+    assert(srcno < xsrc->nr_irqs);
+
+    ret = xive_esb_eoi(&xsrc->status[srcno]);
+
+    /*
+     * LSI sources do not set the Q bit but they can still be
+     * asserted, in which case we should forward a new event
+     * notification
+     */
+    if (xive_source_irq_is_lsi(xsrc, srcno) &&
+        xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+        ret = xive_source_lsi_trigger(xsrc, srcno);
+    }
+
+    return ret;
+}
+
+/*
+ * Forward the source event notification to the Router
+ */
+static void xive_source_notify(XiveSource *xsrc, int srcno)
+{
+    XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive);
+
+    if (xnc->notify) {
+        xnc->notify(xsrc->xive, srcno);
+    }
+}
+
+/*
+ * In a two pages ESB MMIO setting, even page is the trigger page, odd
+ * page is for management
+ */
+static inline bool addr_is_even(hwaddr addr, uint32_t shift)
+{
+    return !((addr >> shift) & 1);
+}
+
+static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
+{
+    return xive_source_esb_has_2page(xsrc) &&
+        addr_is_even(addr, xsrc->esb_shift - 1);
+}
+
+/*
+ * ESB MMIO loads
+ *                      Trigger page    Management/EOI page
+ *
+ * ESB MMIO setting     2 pages         1 or 2 pages
+ *
+ * 0x000 .. 0x3FF       -1              EOI and return 0|1
+ * 0x400 .. 0x7FF       -1              EOI and return 0|1
+ * 0x800 .. 0xBFF       -1              return PQ
+ * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=00
+ * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=01
+ * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=10
+ * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=11
+ */
+static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
+{
+    XiveSource *xsrc = XIVE_SOURCE(opaque);
+    uint32_t offset = addr & 0xFFF;
+    uint32_t srcno = addr >> xsrc->esb_shift;
+    uint64_t ret = -1;
+
+    /* In a two pages ESB MMIO setting, trigger page should not be read */
+    if (xive_source_is_trigger_page(xsrc, addr)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "XIVE: invalid load on IRQ %d trigger page at "
+                      "0x%"HWADDR_PRIx"\n", srcno, addr);
+        return -1;
+    }
+
+    switch (offset) {
+    case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
+        ret = xive_source_esb_eoi(xsrc, srcno);
+
+        /* Forward the source event notification for routing */
+        if (ret) {
+            xive_source_notify(xsrc, srcno);
+        }
+        break;
+
+    case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
+        ret = xive_source_esb_get(xsrc, srcno);
+        break;
+
+    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+        ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
+                      offset);
+    }
+
+    return ret;
+}
+
+/*
+ * ESB MMIO stores
+ *                      Trigger page    Management/EOI page
+ *
+ * ESB MMIO setting     2 pages         1 or 2 pages
+ *
+ * 0x000 .. 0x3FF       Trigger         Trigger
+ * 0x400 .. 0x7FF       Trigger         EOI
+ * 0x800 .. 0xBFF       Trigger         undefined
+ * 0xC00 .. 0xCFF       Trigger         PQ=00
+ * 0xD00 .. 0xDFF       Trigger         PQ=01
+ * 0xE00 .. 0xDFF       Trigger         PQ=10
+ * 0xF00 .. 0xDFF       Trigger         PQ=11
+ */
+static void xive_source_esb_write(void *opaque, hwaddr addr,
+                                  uint64_t value, unsigned size)
+{
+    XiveSource *xsrc = XIVE_SOURCE(opaque);
+    uint32_t offset = addr & 0xFFF;
+    uint32_t srcno = addr >> xsrc->esb_shift;
+    bool notify = false;
+
+    /* In a two pages ESB MMIO setting, trigger page only triggers */
+    if (xive_source_is_trigger_page(xsrc, addr)) {
+        notify = xive_source_esb_trigger(xsrc, srcno);
+        goto out;
+    }
+
+    switch (offset) {
+    case 0 ... 0x3FF:
+        notify = xive_source_esb_trigger(xsrc, srcno);
+        break;
+
+    case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
+        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
+            return;
+        }
+
+        notify = xive_source_esb_eoi(xsrc, srcno);
+        break;
+
+    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+        xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
+                      offset);
+        return;
+    }
+
+out:
+    /* Forward the source event notification for routing */
+    if (notify) {
+        xive_source_notify(xsrc, srcno);
+    }
+}
+
+static const MemoryRegionOps xive_source_esb_ops = {
+    .read = xive_source_esb_read,
+    .write = xive_source_esb_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+};
+
+static void xive_source_set_irq(void *opaque, int srcno, int val)
+{
+    XiveSource *xsrc = XIVE_SOURCE(opaque);
+    bool notify = false;
+
+    if (xive_source_irq_is_lsi(xsrc, srcno)) {
+        if (val) {
+            notify = xive_source_lsi_trigger(xsrc, srcno);
+        } else {
+            xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
+        }
+    } else {
+        if (val) {
+            notify = xive_source_esb_trigger(xsrc, srcno);
+        }
+    }
+
+    /* Forward the source event notification for routing */
+    if (notify) {
+        xive_source_notify(xsrc, srcno);
+    }
+}
+
+void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
+{
+    int i;
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        uint8_t pq = xive_source_esb_get(xsrc, i);
+
+        if (pq == XIVE_ESB_OFF) {
+            continue;
+        }
+
+        monitor_printf(mon, "  %08x %s %c%c%c\n", i + offset,
+                       xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
+                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
+                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
+                       xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ');
+    }
+}
+
+static void xive_source_reset(void *dev)
+{
+    XiveSource *xsrc = XIVE_SOURCE(dev);
+
+    /* Do not clear the LSI bitmap */
+
+    /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */
+    memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs);
+}
+
+static void xive_source_realize(DeviceState *dev, Error **errp)
+{
+    XiveSource *xsrc = XIVE_SOURCE(dev);
+    Object *obj;
+    Error *local_err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
+    if (!obj) {
+        error_propagate(errp, local_err);
+        error_prepend(errp, "required link 'xive' not found: ");
+        return;
+    }
+
+    xsrc->xive = XIVE_NOTIFIER(obj);
+
+    if (!xsrc->nr_irqs) {
+        error_setg(errp, "Number of interrupt needs to be greater than 0");
+        return;
+    }
+
+    if (xsrc->esb_shift != XIVE_ESB_4K &&
+        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
+        xsrc->esb_shift != XIVE_ESB_64K &&
+        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
+        error_setg(errp, "Invalid ESB shift setting");
+        return;
+    }
+
+    xsrc->status = g_malloc0(xsrc->nr_irqs);
+    xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
+
+    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+                          &xive_source_esb_ops, xsrc, "xive.esb",
+                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+
+    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
+                                     xsrc->nr_irqs);
+
+    qemu_register_reset(xive_source_reset, dev);
+}
+
+static const VMStateDescription vmstate_xive_source = {
+    .name = TYPE_XIVE_SOURCE,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
+        VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+/*
+ * The default XIVE interrupt source setting for the ESB MMIOs is two
+ * 64k pages without Store EOI, to be in sync with KVM.
+ */
+static Property xive_source_properties[] = {
+    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
+    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
+    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xive_source_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc    = "XIVE Interrupt Source";
+    dc->props   = xive_source_properties;
+    dc->realize = xive_source_realize;
+    dc->vmsd    = &vmstate_xive_source;
+}
+
+static const TypeInfo xive_source_info = {
+    .name          = TYPE_XIVE_SOURCE,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(XiveSource),
+    .class_init    = xive_source_class_init,
+};
+
+/*
+ * XiveEND helpers
+ */
+
+void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
+{
+    uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+        | be32_to_cpu(end->w3);
+    uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+    uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+    uint32_t qentries = 1 << (qsize + 10);
+    int i;
+
+    /*
+     * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
+     */
+    monitor_printf(mon, " [ ");
+    qindex = (qindex - (width - 1)) & (qentries - 1);
+    for (i = 0; i < width; i++) {
+        uint64_t qaddr = qaddr_base + (qindex << 2);
+        uint32_t qdata = -1;
+
+        if (dma_memory_read(&address_space_memory, qaddr, &qdata,
+                            sizeof(qdata))) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
+                          HWADDR_PRIx "\n", qaddr);
+            return;
+        }
+        monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
+                       be32_to_cpu(qdata));
+        qindex = (qindex + 1) & (qentries - 1);
+    }
+}
+
+void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
+{
+    uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+        | be32_to_cpu(end->w3);
+    uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+    uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
+    uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+    uint32_t qentries = 1 << (qsize + 10);
+
+    uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
+    uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
+
+    if (!xive_end_is_valid(end)) {
+        return;
+    }
+
+    monitor_printf(mon, "  %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64
+                   "% 6d/%5d ^%d", end_idx,
+                   xive_end_is_valid(end)    ? 'v' : '-',
+                   xive_end_is_enqueue(end)  ? 'q' : '-',
+                   xive_end_is_notify(end)   ? 'n' : '-',
+                   xive_end_is_backlog(end)  ? 'b' : '-',
+                   xive_end_is_escalate(end) ? 'e' : '-',
+                   priority, nvt, qaddr_base, qindex, qentries, qgen);
+
+    xive_end_queue_pic_print_info(end, 6, mon);
+    monitor_printf(mon, "]\n");
+}
+
+static void xive_end_enqueue(XiveEND *end, uint32_t data)
+{
+    uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+        | be32_to_cpu(end->w3);
+    uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+    uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+    uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
+
+    uint64_t qaddr = qaddr_base + (qindex << 2);
+    uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
+    uint32_t qentries = 1 << (qsize + 10);
+
+    if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
+                      HWADDR_PRIx "\n", qaddr);
+        return;
+    }
+
+    qindex = (qindex + 1) & (qentries - 1);
+    if (qindex == 0) {
+        qgen ^= 1;
+        end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen);
+    }
+    end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex);
+}
+
+/*
+ * XIVE Router (aka. Virtualization Controller or IVRE)
+ */
+
+int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                        XiveEAS *eas)
+{
+    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+    return xrc->get_eas(xrtr, eas_blk, eas_idx, eas);
+}
+
+int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                        XiveEND *end)
+{
+   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+   return xrc->get_end(xrtr, end_blk, end_idx, end);
+}
+
+int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                          XiveEND *end, uint8_t word_number)
+{
+   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+   return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
+}
+
+int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+                        XiveNVT *nvt)
+{
+   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+   return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
+}
+
+int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+                        XiveNVT *nvt, uint8_t word_number)
+{
+   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+   return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
+}
+
+/*
+ * The thread context register words are in big-endian format.
+ */
+static int xive_presenter_tctx_match(XiveTCTX *tctx, uint8_t format,
+                                     uint8_t nvt_blk, uint32_t nvt_idx,
+                                     bool cam_ignore, uint32_t logic_serv)
+{
+    uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
+    uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
+    uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
+    uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
+
+    /*
+     * TODO (PowerNV): ignore mode. The low order bits of the NVT
+     * identifier are ignored in the "CAM" match.
+     */
+
+    if (format == 0) {
+        if (cam_ignore == true) {
+            /*
+             * F=0 & i=1: Logical server notification (bits ignored at
+             * the end of the NVT identifier)
+             */
+            qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
+                          nvt_blk, nvt_idx);
+             return -1;
+        }
+
+        /* F=0 & i=0: Specific NVT notification */
+
+        /* TODO (PowerNV) : PHYS ring */
+
+        /* HV POOL ring */
+        if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) &&
+            cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) {
+            return TM_QW2_HV_POOL;
+        }
+
+        /* OS ring */
+        if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
+            cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) {
+            return TM_QW1_OS;
+        }
+    } else {
+        /* F=1 : User level Event-Based Branch (EBB) notification */
+
+        /* USER ring */
+        if  ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
+             (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) &&
+             (be32_to_cpu(qw0w2) & TM_QW0W2_VU) &&
+             (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) {
+            return TM_QW0_USER;
+        }
+    }
+    return -1;
+}
+
+typedef struct XiveTCTXMatch {
+    XiveTCTX *tctx;
+    uint8_t ring;
+} XiveTCTXMatch;
+
+static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
+                                 uint8_t nvt_blk, uint32_t nvt_idx,
+                                 bool cam_ignore, uint8_t priority,
+                                 uint32_t logic_serv, XiveTCTXMatch *match)
+{
+    CPUState *cs;
+
+    /*
+     * TODO (PowerNV): handle chip_id overwrite of block field for
+     * hardwired CAM compares
+     */
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
+        int ring;
+
+        /*
+         * HW checks that the CPU is enabled in the Physical Thread
+         * Enable Register (PTER).
+         */
+
+        /*
+         * Check the thread context CAM lines and record matches. We
+         * will handle CPU exception delivery later
+         */
+        ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx,
+                                         cam_ignore, logic_serv);
+        /*
+         * Save the context and follow on to catch duplicates, that we
+         * don't support yet.
+         */
+        if (ring != -1) {
+            if (match->tctx) {
+                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
+                              "context NVT %x/%x\n", nvt_blk, nvt_idx);
+                return false;
+            }
+
+            match->ring = ring;
+            match->tctx = tctx;
+        }
+    }
+
+    if (!match->tctx) {
+        qemu_log_mask(LOG_UNIMP, "XIVE: NVT %x/%x is not dispatched\n",
+                      nvt_blk, nvt_idx);
+        return false;
+    }
+
+    return true;
+}
+
+/*
+ * This is our simple Xive Presenter Engine model. It is merged in the
+ * Router as it does not require an extra object.
+ *
+ * It receives notification requests sent by the IVRE to find one
+ * matching NVT (or more) dispatched on the processor threads. In case
+ * of a single NVT notification, the process is abreviated and the
+ * thread is signaled if a match is found. In case of a logical server
+ * notification (bits ignored at the end of the NVT identifier), the
+ * IVPE and IVRE select a winning thread using different filters. This
+ * involves 2 or 3 exchanges on the PowerBus that the model does not
+ * support.
+ *
+ * The parameters represent what is sent on the PowerBus
+ */
+static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
+                                  uint8_t nvt_blk, uint32_t nvt_idx,
+                                  bool cam_ignore, uint8_t priority,
+                                  uint32_t logic_serv)
+{
+    XiveNVT nvt;
+    XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
+    bool found;
+
+    /* NVT cache lookup */
+    if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
+                      nvt_blk, nvt_idx);
+        return;
+    }
+
+    if (!xive_nvt_is_valid(&nvt)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
+                      nvt_blk, nvt_idx);
+        return;
+    }
+
+    found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
+                                 priority, logic_serv, &match);
+    if (found) {
+        ipb_update(&match.tctx->regs[match.ring], priority);
+        xive_tctx_notify(match.tctx, match.ring);
+        return;
+    }
+
+    /* Record the IPB in the associated NVT structure */
+    ipb_update((uint8_t *) &nvt.w4, priority);
+    xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
+
+    /*
+     * If no matching NVT is dispatched on a HW thread :
+     * - update the NVT structure if backlog is activated
+     * - escalate (ESe PQ bits and EAS in w4-5) if escalation is
+     *   activated
+     */
+}
+
+/*
+ * An END trigger can come from an event trigger (IPI or HW) or from
+ * another chip. We don't model the PowerBus but the END trigger
+ * message has the same parameters than in the function below.
+ */
+static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
+                                   uint32_t end_idx, uint32_t end_data)
+{
+    XiveEND end;
+    uint8_t priority;
+    uint8_t format;
+
+    /* END cache lookup */
+    if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
+                      end_idx);
+        return;
+    }
+
+    if (!xive_end_is_valid(&end)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
+                      end_blk, end_idx);
+        return;
+    }
+
+    if (xive_end_is_enqueue(&end)) {
+        xive_end_enqueue(&end, end_data);
+        /* Enqueuing event data modifies the EQ toggle and index */
+        xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
+    }
+
+    /*
+     * The W7 format depends on the F bit in W6. It defines the type
+     * of the notification :
+     *
+     *   F=0 : single or multiple NVT notification
+     *   F=1 : User level Event-Based Branch (EBB) notification, no
+     *         priority
+     */
+    format = xive_get_field32(END_W6_FORMAT_BIT, end.w6);
+    priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7);
+
+    /* The END is masked */
+    if (format == 0 && priority == 0xff) {
+        return;
+    }
+
+    /*
+     * Check the END ESn (Event State Buffer for notification) for
+     * even futher coalescing in the Router
+     */
+    if (!xive_end_is_notify(&end)) {
+        uint8_t pq = xive_get_field32(END_W1_ESn, end.w1);
+        bool notify = xive_esb_trigger(&pq);
+
+        if (pq != xive_get_field32(END_W1_ESn, end.w1)) {
+            end.w1 = xive_set_field32(END_W1_ESn, end.w1, pq);
+            xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
+        }
+
+        /* ESn[Q]=1 : end of notification */
+        if (!notify) {
+            return;
+        }
+    }
+
+    /*
+     * Follows IVPE notification
+     */
+    xive_presenter_notify(xrtr, format,
+                          xive_get_field32(END_W6_NVT_BLOCK, end.w6),
+                          xive_get_field32(END_W6_NVT_INDEX, end.w6),
+                          xive_get_field32(END_W7_F0_IGNORE, end.w7),
+                          priority,
+                          xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7));
+
+    /* TODO: Auto EOI. */
+}
+
+static void xive_router_notify(XiveNotifier *xn, uint32_t lisn)
+{
+    XiveRouter *xrtr = XIVE_ROUTER(xn);
+    uint8_t eas_blk = XIVE_SRCNO_BLOCK(lisn);
+    uint32_t eas_idx = XIVE_SRCNO_INDEX(lisn);
+    XiveEAS eas;
+
+    /* EAS cache lookup */
+    if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
+        return;
+    }
+
+    /*
+     * The IVRE checks the State Bit Cache at this point. We skip the
+     * SBC lookup because the state bits of the sources are modeled
+     * internally in QEMU.
+     */
+
+    if (!xive_eas_is_valid(&eas)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
+        return;
+    }
+
+    if (xive_eas_is_masked(&eas)) {
+        /* Notification completed */
+        return;
+    }
+
+    /*
+     * The event trigger becomes an END trigger
+     */
+    xive_router_end_notify(xrtr,
+                           xive_get_field64(EAS_END_BLOCK, eas.w),
+                           xive_get_field64(EAS_END_INDEX, eas.w),
+                           xive_get_field64(EAS_END_DATA,  eas.w));
+}
+
+static void xive_router_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
+
+    dc->desc    = "XIVE Router Engine";
+    xnc->notify = xive_router_notify;
+}
+
+static const TypeInfo xive_router_info = {
+    .name          = TYPE_XIVE_ROUTER,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .abstract      = true,
+    .class_size    = sizeof(XiveRouterClass),
+    .class_init    = xive_router_class_init,
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_XIVE_NOTIFIER },
+        { }
+    }
+};
+
+void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
+{
+    if (!xive_eas_is_valid(eas)) {
+        return;
+    }
+
+    monitor_printf(mon, "  %08x %s end:%02x/%04x data:%08x\n",
+                   lisn, xive_eas_is_masked(eas) ? "M" : " ",
+                   (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
+                   (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
+                   (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
+}
+
+/*
+ * END ESB MMIO loads
+ */
+static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size)
+{
+    XiveENDSource *xsrc = XIVE_END_SOURCE(opaque);
+    uint32_t offset = addr & 0xFFF;
+    uint8_t end_blk;
+    uint32_t end_idx;
+    XiveEND end;
+    uint32_t end_esmask;
+    uint8_t pq;
+    uint64_t ret = -1;
+
+    end_blk = xsrc->block_id;
+    end_idx = addr >> (xsrc->esb_shift + 1);
+
+    if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
+                      end_idx);
+        return -1;
+    }
+
+    if (!xive_end_is_valid(&end)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
+                      end_blk, end_idx);
+        return -1;
+    }
+
+    end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe;
+    pq = xive_get_field32(end_esmask, end.w1);
+
+    switch (offset) {
+    case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
+        ret = xive_esb_eoi(&pq);
+
+        /* Forward the source event notification for routing ?? */
+        break;
+
+    case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
+        ret = pq;
+        break;
+
+    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+        ret = xive_esb_set(&pq, (offset >> 8) & 0x3);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n",
+                      offset);
+        return -1;
+    }
+
+    if (pq != xive_get_field32(end_esmask, end.w1)) {
+        end.w1 = xive_set_field32(end_esmask, end.w1, pq);
+        xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1);
+    }
+
+    return ret;
+}
+
+/*
+ * END ESB MMIO stores are invalid
+ */
+static void xive_end_source_write(void *opaque, hwaddr addr,
+                                  uint64_t value, unsigned size)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%"
+                  HWADDR_PRIx"\n", addr);
+}
+
+static const MemoryRegionOps xive_end_source_ops = {
+    .read = xive_end_source_read,
+    .write = xive_end_source_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+};
+
+static void xive_end_source_realize(DeviceState *dev, Error **errp)
+{
+    XiveENDSource *xsrc = XIVE_END_SOURCE(dev);
+    Object *obj;
+    Error *local_err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
+    if (!obj) {
+        error_propagate(errp, local_err);
+        error_prepend(errp, "required link 'xive' not found: ");
+        return;
+    }
+
+    xsrc->xrtr = XIVE_ROUTER(obj);
+
+    if (!xsrc->nr_ends) {
+        error_setg(errp, "Number of interrupt needs to be greater than 0");
+        return;
+    }
+
+    if (xsrc->esb_shift != XIVE_ESB_4K &&
+        xsrc->esb_shift != XIVE_ESB_64K) {
+        error_setg(errp, "Invalid ESB shift setting");
+        return;
+    }
+
+    /*
+     * Each END is assigned an even/odd pair of MMIO pages, the even page
+     * manages the ESn field while the odd page manages the ESe field.
+     */
+    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+                          &xive_end_source_ops, xsrc, "xive.end",
+                          (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends);
+}
+
+static Property xive_end_source_properties[] = {
+    DEFINE_PROP_UINT8("block-id", XiveENDSource, block_id, 0),
+    DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0),
+    DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xive_end_source_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc    = "XIVE END Source";
+    dc->props   = xive_end_source_properties;
+    dc->realize = xive_end_source_realize;
+}
+
+static const TypeInfo xive_end_source_info = {
+    .name          = TYPE_XIVE_END_SOURCE,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(XiveENDSource),
+    .class_init    = xive_end_source_class_init,
+};
+
+/*
+ * XIVE Fabric
+ */
+static const TypeInfo xive_fabric_info = {
+    .name = TYPE_XIVE_NOTIFIER,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(XiveNotifierClass),
+};
+
+static void xive_register_types(void)
+{
+    type_register_static(&xive_source_info);
+    type_register_static(&xive_fabric_info);
+    type_register_static(&xive_router_info);
+    type_register_static(&xive_end_source_info);
+    type_register_static(&xive_tctx_info);
+}
+
+type_init(xive_register_types)
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 3648630386..54746a4030 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -18,7 +18,6 @@
 #include "qemu/osdep.h"
 #include "hw/hw.h"
 #include "hw/pci/pci.h"
-#include "net/net.h"
 #include "net/tap.h"
 #include "net/checksum.h"
 #include "sysemu/sysemu.h"
@@ -29,6 +28,7 @@
 #include "migration/register.h"
 
 #include "vmxnet3.h"
+#include "vmxnet3_defs.h"
 #include "vmxnet_debug.h"
 #include "vmware_utils.h"
 #include "net_tx_pkt.h"
@@ -131,23 +131,11 @@ typedef struct VMXNET3Class {
     DeviceRealize parent_dc_realize;
 } VMXNET3Class;
 
-#define TYPE_VMXNET3 "vmxnet3"
-#define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3)
-
 #define VMXNET3_DEVICE_CLASS(klass) \
     OBJECT_CLASS_CHECK(VMXNET3Class, (klass), TYPE_VMXNET3)
 #define VMXNET3_DEVICE_GET_CLASS(obj) \
     OBJECT_GET_CLASS(VMXNET3Class, (obj), TYPE_VMXNET3)
 
-/* Cyclic ring abstraction */
-typedef struct {
-    hwaddr pa;
-    uint32_t size;
-    uint32_t cell_size;
-    uint32_t next;
-    uint8_t gen;
-} Vmxnet3Ring;
-
 static inline void vmxnet3_ring_init(PCIDevice *d,
 				     Vmxnet3Ring *ring,
                                      hwaddr pa,
@@ -245,108 +233,6 @@ vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
               descr->rsvd, descr->dtype, descr->ext1, descr->btype);
 }
 
-/* Device state and helper functions */
-#define VMXNET3_RX_RINGS_PER_QUEUE (2)
-
-typedef struct {
-    Vmxnet3Ring tx_ring;
-    Vmxnet3Ring comp_ring;
-
-    uint8_t intr_idx;
-    hwaddr tx_stats_pa;
-    struct UPT1_TxStats txq_stats;
-} Vmxnet3TxqDescr;
-
-typedef struct {
-    Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
-    Vmxnet3Ring comp_ring;
-    uint8_t intr_idx;
-    hwaddr rx_stats_pa;
-    struct UPT1_RxStats rxq_stats;
-} Vmxnet3RxqDescr;
-
-typedef struct {
-    bool is_masked;
-    bool is_pending;
-    bool is_asserted;
-} Vmxnet3IntState;
-
-typedef struct {
-        PCIDevice parent_obj;
-        NICState *nic;
-        NICConf conf;
-        MemoryRegion bar0;
-        MemoryRegion bar1;
-        MemoryRegion msix_bar;
-
-        Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
-        Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
-
-        /* Whether MSI-X support was installed successfully */
-        bool msix_used;
-        hwaddr drv_shmem;
-        hwaddr temp_shared_guest_driver_memory;
-
-        uint8_t txq_num;
-
-        /* This boolean tells whether RX packet being indicated has to */
-        /* be split into head and body chunks from different RX rings  */
-        bool rx_packets_compound;
-
-        bool rx_vlan_stripping;
-        bool lro_supported;
-
-        uint8_t rxq_num;
-
-        /* Network MTU */
-        uint32_t mtu;
-
-        /* Maximum number of fragments for indicated TX packets */
-        uint32_t max_tx_frags;
-
-        /* Maximum number of fragments for indicated RX packets */
-        uint16_t max_rx_frags;
-
-        /* Index for events interrupt */
-        uint8_t event_int_idx;
-
-        /* Whether automatic interrupts masking enabled */
-        bool auto_int_masking;
-
-        bool peer_has_vhdr;
-
-        /* TX packets to QEMU interface */
-        struct NetTxPkt *tx_pkt;
-        uint32_t offload_mode;
-        uint32_t cso_or_gso_size;
-        uint16_t tci;
-        bool needs_vlan;
-
-        struct NetRxPkt *rx_pkt;
-
-        bool tx_sop;
-        bool skip_current_tx_pkt;
-
-        uint32_t device_active;
-        uint32_t last_command;
-
-        uint32_t link_status_and_speed;
-
-        Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS];
-
-        uint32_t temp_mac;   /* To store the low part first */
-
-        MACAddr perm_mac;
-        uint32_t vlan_table[VMXNET3_VFT_SIZE];
-        uint32_t rx_mode;
-        MACAddr *mcast_list;
-        uint32_t mcast_list_len;
-        uint32_t mcast_list_buff_size; /* needed for live migration. */
-
-        /* Compatibility flags for migration */
-        uint32_t compat_flags;
-} VMXNET3State;
-
 /* Interrupt management */
 
 /*
diff --git a/hw/net/vmxnet3_defs.h b/hw/net/vmxnet3_defs.h
new file mode 100644
index 0000000000..6c19d29b12
--- /dev/null
+++ b/hw/net/vmxnet3_defs.h
@@ -0,0 +1,133 @@
+/*
+ * QEMU VMWARE VMXNET3 paravirtual NIC
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Tamir Shomer <tamirs@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "net/net.h"
+#include "hw/net/vmxnet3.h"
+
+#define TYPE_VMXNET3 "vmxnet3"
+#define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3)
+
+/* Device state and helper functions */
+#define VMXNET3_RX_RINGS_PER_QUEUE (2)
+
+/* Cyclic ring abstraction */
+typedef struct {
+    hwaddr pa;
+    uint32_t size;
+    uint32_t cell_size;
+    uint32_t next;
+    uint8_t gen;
+} Vmxnet3Ring;
+
+typedef struct {
+    Vmxnet3Ring tx_ring;
+    Vmxnet3Ring comp_ring;
+
+    uint8_t intr_idx;
+    hwaddr tx_stats_pa;
+    struct UPT1_TxStats txq_stats;
+} Vmxnet3TxqDescr;
+
+typedef struct {
+    Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
+    Vmxnet3Ring comp_ring;
+    uint8_t intr_idx;
+    hwaddr rx_stats_pa;
+    struct UPT1_RxStats rxq_stats;
+} Vmxnet3RxqDescr;
+
+typedef struct {
+    bool is_masked;
+    bool is_pending;
+    bool is_asserted;
+} Vmxnet3IntState;
+
+typedef struct {
+        PCIDevice parent_obj;
+        NICState *nic;
+        NICConf conf;
+        MemoryRegion bar0;
+        MemoryRegion bar1;
+        MemoryRegion msix_bar;
+
+        Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
+        Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
+
+        /* Whether MSI-X support was installed successfully */
+        bool msix_used;
+        hwaddr drv_shmem;
+        hwaddr temp_shared_guest_driver_memory;
+
+        uint8_t txq_num;
+
+        /* This boolean tells whether RX packet being indicated has to */
+        /* be split into head and body chunks from different RX rings  */
+        bool rx_packets_compound;
+
+        bool rx_vlan_stripping;
+        bool lro_supported;
+
+        uint8_t rxq_num;
+
+        /* Network MTU */
+        uint32_t mtu;
+
+        /* Maximum number of fragments for indicated TX packets */
+        uint32_t max_tx_frags;
+
+        /* Maximum number of fragments for indicated RX packets */
+        uint16_t max_rx_frags;
+
+        /* Index for events interrupt */
+        uint8_t event_int_idx;
+
+        /* Whether automatic interrupts masking enabled */
+        bool auto_int_masking;
+
+        bool peer_has_vhdr;
+
+        /* TX packets to QEMU interface */
+        struct NetTxPkt *tx_pkt;
+        uint32_t offload_mode;
+        uint32_t cso_or_gso_size;
+        uint16_t tci;
+        bool needs_vlan;
+
+        struct NetRxPkt *rx_pkt;
+
+        bool tx_sop;
+        bool skip_current_tx_pkt;
+
+        uint32_t device_active;
+        uint32_t last_command;
+
+        uint32_t link_status_and_speed;
+
+        Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS];
+
+        uint32_t temp_mac;   /* To store the low part first */
+
+        MACAddr perm_mac;
+        uint32_t vlan_table[VMXNET3_VFT_SIZE];
+        uint32_t rx_mode;
+        MACAddr *mcast_list;
+        uint32_t mcast_list_len;
+        uint32_t mcast_list_buff_size; /* needed for live migration. */
+
+        /* Compatibility flags for migration */
+        uint32_t compat_flags;
+} VMXNET3State;
diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c
index 299de429ec..9766edb445 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -124,6 +124,10 @@ static Property gen_rp_props[] = {
                      res_reserve.mem_pref_32, -1),
     DEFINE_PROP_SIZE("pref64-reserve", GenPCIERootPort,
                      res_reserve.mem_pref_64, -1),
+    DEFINE_PROP_PCIE_LINK_SPEED("x-speed", PCIESlot,
+                                speed, PCIE_LINK_SPEED_16),
+    DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot,
+                                width, PCIE_LINK_WIDTH_32),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index 97a8e8b6a4..ff6b8323da 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -206,31 +206,39 @@ static const VMStateDescription pci_bridge_dev_vmstate = {
     }
 };
 
-static void pci_bridge_dev_hotplug_cb(HotplugHandler *hotplug_dev,
-                                      DeviceState *dev, Error **errp)
+void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                            Error **errp)
 {
     PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
 
     if (!shpc_present(pci_hotplug_dev)) {
         error_setg(errp, "standard hotplug controller has been disabled for "
-                   "this %s", TYPE_PCI_BRIDGE_DEV);
+                   "this %s", object_get_typename(OBJECT(hotplug_dev)));
         return;
     }
-    shpc_device_hotplug_cb(hotplug_dev, dev, errp);
+    shpc_device_plug_cb(hotplug_dev, dev, errp);
 }
 
-static void pci_bridge_dev_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
-                                                 DeviceState *dev,
-                                                 Error **errp)
+void pci_bridge_dev_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                              Error **errp)
+{
+    PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
+
+    g_assert(shpc_present(pci_hotplug_dev));
+    shpc_device_unplug_cb(hotplug_dev, dev, errp);
+}
+
+void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                      DeviceState *dev, Error **errp)
 {
     PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
 
     if (!shpc_present(pci_hotplug_dev)) {
         error_setg(errp, "standard hotplug controller has been disabled for "
-                   "this %s", TYPE_PCI_BRIDGE_DEV);
+                   "this %s", object_get_typename(OBJECT(hotplug_dev)));
         return;
     }
-    shpc_device_hot_unplug_request_cb(hotplug_dev, dev, errp);
+    shpc_device_unplug_request_cb(hotplug_dev, dev, errp);
 }
 
 static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
@@ -251,8 +259,9 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
     dc->props = pci_bridge_dev_properties;
     dc->vmsd = &pci_bridge_dev_vmstate;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
-    hc->plug = pci_bridge_dev_hotplug_cb;
-    hc->unplug_request = pci_bridge_dev_hot_unplug_request_cb;
+    hc->plug = pci_bridge_dev_plug_cb;
+    hc->unplug = pci_bridge_dev_unplug_cb;
+    hc->unplug_request = pci_bridge_dev_unplug_request_cb;
 }
 
 static const TypeInfo pci_bridge_dev_info = {
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index 04cf5a6a92..d491b40d04 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -137,33 +137,6 @@ static const VMStateDescription pcie_pci_bridge_dev_vmstate = {
         }
 };
 
-static void pcie_pci_bridge_hotplug_cb(HotplugHandler *hotplug_dev,
-                                      DeviceState *dev, Error **errp)
-{
-    PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
-
-    if (!shpc_present(pci_hotplug_dev)) {
-        error_setg(errp, "standard hotplug controller has been disabled for "
-                   "this %s", TYPE_PCIE_PCI_BRIDGE_DEV);
-        return;
-    }
-    shpc_device_hotplug_cb(hotplug_dev, dev, errp);
-}
-
-static void pcie_pci_bridge_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
-                                                 DeviceState *dev,
-                                                 Error **errp)
-{
-    PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
-
-    if (!shpc_present(pci_hotplug_dev)) {
-        error_setg(errp, "standard hotplug controller has been disabled for "
-                   "this %s", TYPE_PCIE_PCI_BRIDGE_DEV);
-        return;
-    }
-    shpc_device_hot_unplug_request_cb(hotplug_dev, dev, errp);
-}
-
 static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
@@ -180,8 +153,9 @@ static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data)
     dc->props = pcie_pci_bridge_dev_properties;
     dc->reset = &pcie_pci_bridge_reset;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
-    hc->plug = pcie_pci_bridge_hotplug_cb;
-    hc->unplug_request = pcie_pci_bridge_hot_unplug_request_cb;
+    hc->plug = pci_bridge_dev_plug_cb;
+    hc->unplug = pci_bridge_dev_unplug_cb;
+    hc->unplug_request = pci_bridge_dev_unplug_request_cb;
 }
 
 static const TypeInfo pcie_pci_bridge_info = {
diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c
index 45f9e8cd4a..34ad76743c 100644
--- a/hw/pci-bridge/pcie_root_port.c
+++ b/hw/pci-bridge/pcie_root_port.c
@@ -140,6 +140,19 @@ static Property rp_props[] = {
     DEFINE_PROP_END_OF_LIST()
 };
 
+static void rp_instance_post_init(Object *obj)
+{
+    PCIESlot *s = PCIE_SLOT(obj);
+
+    if (!s->speed) {
+        s->speed = QEMU_PCI_EXP_LNK_2_5GT;
+    }
+
+    if (!s->width) {
+        s->width = QEMU_PCI_EXP_LNK_X1;
+    }
+}
+
 static void rp_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -157,6 +170,7 @@ static void rp_class_init(ObjectClass *klass, void *data)
 static const TypeInfo rp_info = {
     .name          = TYPE_PCIE_ROOT_PORT,
     .parent        = TYPE_PCIE_SLOT,
+    .instance_post_init = rp_instance_post_init,
     .class_init    = rp_class_init,
     .abstract      = true,
     .class_size = sizeof(PCIERootPortClass),
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index efb5ce196f..d831fa0a36 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1353,6 +1353,10 @@ uint32_t pci_default_read_config(PCIDevice *d,
 {
     uint32_t val = 0;
 
+    if (pci_is_express_downstream_port(d) &&
+        ranges_overlap(address, len, d->exp.exp_cap + PCI_EXP_LNKSTA, 2)) {
+        pcie_sync_bridge_lnk(d);
+    }
     memcpy(&val, d->config + address, len);
     return le32_to_cpu(val);
 }
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index ee9dff2d3a..b9143ac88b 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -241,9 +241,9 @@ void pci_bridge_update_mappings(PCIBridge *br)
      * while another accesses an unaffected region. */
     memory_region_transaction_begin();
     pci_bridge_region_del(br, br->windows);
+    pci_bridge_region_cleanup(br, w);
     br->windows = pci_bridge_region_init(br);
     memory_region_transaction_commit();
-    pci_bridge_region_cleanup(br, w);
 }
 
 /* default write_config function for PCI-to-PCI bridge */
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index 5eaa935cb5..5f5345dbac 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -20,6 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_host.h"
 #include "hw/pci/pci_bus.h"
 #include "trace.h"
@@ -50,9 +51,29 @@ static inline PCIDevice *pci_dev_find_by_addr(PCIBus *bus, uint32_t addr)
     return pci_find_device(bus, bus_num, devfn);
 }
 
+static void pci_adjust_config_limit(PCIBus *bus, uint32_t *limit)
+{
+    if (*limit > PCI_CONFIG_SPACE_SIZE) {
+        if (!pci_bus_is_express(bus)) {
+            *limit = PCI_CONFIG_SPACE_SIZE;
+            return;
+        }
+
+        if (!pci_bus_is_root(bus)) {
+            PCIDevice *bridge = pci_bridge_get_device(bus);
+            pci_adjust_config_limit(pci_get_bus(bridge), limit);
+        }
+    }
+}
+
 void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
                                   uint32_t limit, uint32_t val, uint32_t len)
 {
+    pci_adjust_config_limit(pci_get_bus(pci_dev), &limit);
+    if (limit <= addr) {
+        return;
+    }
+
     assert(len <= 4);
     /* non-zero functions are only exposed when function 0 is present,
      * allowing direct removal of unexposed functions.
@@ -71,6 +92,11 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
 {
     uint32_t ret;
 
+    pci_adjust_config_limit(pci_get_bus(pci_dev), &limit);
+    if (limit <= addr) {
+        return ~0x0;
+    }
+
     assert(len <= 4);
     /* non-zero functions are only exposed when function 0 is present,
      * allowing direct removal of unexposed functions.
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6c91bd44a0..2d3d8a047b 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -27,6 +27,7 @@
 #include "hw/pci/msi.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci/pcie_regs.h"
+#include "hw/pci/pcie_port.h"
 #include "qemu/range.h"
 
 //#define DEBUG_PCIE
@@ -68,11 +69,12 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version)
     pci_set_long(exp_cap + PCI_EXP_LNKCAP,
                  (port << PCI_EXP_LNKCAP_PN_SHIFT) |
                  PCI_EXP_LNKCAP_ASPMS_0S |
-                 PCI_EXP_LNK_MLW_1 |
-                 PCI_EXP_LNK_LS_25);
+                 QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) |
+                 QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT));
 
     pci_set_word(exp_cap + PCI_EXP_LNKSTA,
-                 PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25);
+                 QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1) |
+                 QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT));
 
     if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
         pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
@@ -86,6 +88,76 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version)
     pci_set_word(cmask + PCI_EXP_LNKSTA, 0);
 }
 
+static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
+{
+    PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT);
+    uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+
+    /* Skip anything that isn't a PCIESlot */
+    if (!s) {
+        return;
+    }
+
+    /* Clear and fill LNKCAP from what was configured above */
+    pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP,
+                                 PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
+    pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+                               QEMU_PCI_EXP_LNKCAP_MLW(s->width) |
+                               QEMU_PCI_EXP_LNKCAP_MLS(s->speed));
+
+    /*
+     * Link bandwidth notification is required for all root ports and
+     * downstream ports supporting links wider than x1 or multiple link
+     * speeds.
+     */
+    if (s->width > QEMU_PCI_EXP_LNK_X1 ||
+        s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
+        pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+                                   PCI_EXP_LNKCAP_LBNC);
+    }
+
+    if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
+        /*
+         * Hot-plug capable downstream ports and downstream ports supporting
+         * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC
+         * to 1b.  PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which
+         * we also hardwire to 1b here.  2.5GT/s hot-plug slots should also
+         * technically implement this, but it's not done here for compatibility.
+         */
+        pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+                                   PCI_EXP_LNKCAP_DLLLARC);
+        pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
+                                   PCI_EXP_LNKSTA_DLLLA);
+
+        /*
+         * Target Link Speed defaults to the highest link speed supported by
+         * the component.  2.5GT/s devices are permitted to hardwire to zero.
+         */
+        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKCTL2,
+                                     PCI_EXP_LNKCTL2_TLS);
+        pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKCTL2,
+                                   QEMU_PCI_EXP_LNKCAP_MLS(s->speed) &
+                                   PCI_EXP_LNKCTL2_TLS);
+    }
+
+    /*
+     * 2.5 & 5.0GT/s can be fully described by LNKCAP, but 8.0GT/s is
+     * actually a reference to the highest bit supported in this register.
+     * We assume the device supports all link speeds.
+     */
+    if (s->speed > QEMU_PCI_EXP_LNK_5GT) {
+        pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP2, ~0U);
+        pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
+                                   PCI_EXP_LNKCAP2_SLS_2_5GB |
+                                   PCI_EXP_LNKCAP2_SLS_5_0GB |
+                                   PCI_EXP_LNKCAP2_SLS_8_0GB);
+        if (s->speed > QEMU_PCI_EXP_LNK_8GT) {
+            pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
+                                       PCI_EXP_LNKCAP2_SLS_16_0GB);
+        }
+    }
+}
+
 int pcie_cap_init(PCIDevice *dev, uint8_t offset,
                   uint8_t type, uint8_t port,
                   Error **errp)
@@ -107,6 +179,9 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset,
     /* Filling values common with v1 */
     pcie_cap_v1_fill(dev, port, type, PCI_EXP_FLAGS_VER2);
 
+    /* Fill link speed and width options */
+    pcie_cap_fill_slot_lnk(dev);
+
     /* Filling v2 specific values */
     pci_set_long(exp_cap + PCI_EXP_DEVCAP2,
                  PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP);
@@ -315,9 +390,8 @@ static void pcie_cap_slot_event(PCIDevice *dev, PCIExpressHotPlugEvent event)
     hotplug_event_notify(dev);
 }
 
-static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev,
-                                         DeviceState *dev,
-                                         uint8_t **exp_cap, Error **errp)
+static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState *dev,
+                                      uint8_t **exp_cap, Error **errp)
 {
     *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap;
     uint16_t sltsta = pci_get_word(*exp_cap + PCI_EXP_SLTSTA);
@@ -331,13 +405,13 @@ static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev,
     }
 }
 
-void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
-                              Error **errp)
+void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                           Error **errp)
 {
     uint8_t *exp_cap;
     PCIDevice *pci_dev = PCI_DEVICE(dev);
 
-    pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
+    pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
 
     /* Don't send event when device is enabled during qemu machine creation:
      * it is present on boot, no hotplug event is necessary. We do send an
@@ -345,6 +419,10 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
     if (!dev->hotplugged) {
         pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
                                    PCI_EXP_SLTSTA_PDS);
+        if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
+            pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
+                                       PCI_EXP_LNKSTA_DLLLA);
+        }
         return;
     }
 
@@ -355,24 +433,36 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
     if (pci_get_function_0(pci_dev)) {
         pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
                                    PCI_EXP_SLTSTA_PDS);
+        if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
+            pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
+                                       PCI_EXP_LNKSTA_DLLLA);
+        }
         pcie_cap_slot_event(PCI_DEVICE(hotplug_dev),
                             PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
     }
 }
 
-static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                             Error **errp)
 {
     object_unparent(OBJECT(dev));
 }
 
-void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
-                                         DeviceState *dev, Error **errp)
+static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+    HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(dev));
+
+    hotplug_handler_unplug(hotplug_ctrl, DEVICE(dev), &error_abort);
+}
+
+void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                     DeviceState *dev, Error **errp)
 {
     uint8_t *exp_cap;
     PCIDevice *pci_dev = PCI_DEVICE(dev);
     PCIBus *bus = pci_get_bus(pci_dev);
 
-    pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
+    pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
 
     /* In case user cancel the operation of multi-function hot-add,
      * remove the function that is unexposed to guest individually,
@@ -531,6 +621,10 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
 
         pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
                                      PCI_EXP_SLTSTA_PDS);
+        if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
+            pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+                                         PCI_EXP_LNKSTA_DLLLA);
+        }
         pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
                                        PCI_EXP_SLTSTA_PDC);
     }
@@ -728,6 +822,45 @@ void pcie_add_capability(PCIDevice *dev,
     memset(dev->cmask + offset, 0xFF, size);
 }
 
+/*
+ * Sync the PCIe Link Status negotiated speed and width of a bridge with the
+ * downstream device.  If downstream device is not present, re-write with the
+ * Link Capability fields.  Limit width and speed to bridge capabilities for
+ * compatibility.  Use config_read to access the downstream device since it
+ * could be an assigned device with volatile link information.
+ */
+void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
+{
+    PCIBridge *br = PCI_BRIDGE(bridge_dev);
+    PCIBus *bus = pci_bridge_get_sec_bus(br);
+    PCIDevice *target = bus->devices[0];
+    uint8_t *exp_cap = bridge_dev->config + bridge_dev->exp.exp_cap;
+    uint16_t lnksta, lnkcap = pci_get_word(exp_cap + PCI_EXP_LNKCAP);
+
+    if (!target || !target->exp.exp_cap) {
+        lnksta = lnkcap;
+    } else {
+        lnksta = target->config_read(target,
+                                     target->exp.exp_cap + PCI_EXP_LNKSTA,
+                                     sizeof(lnksta));
+
+        if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) {
+            lnksta &= ~PCI_EXP_LNKSTA_NLW;
+            lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW;
+        }
+
+        if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) {
+            lnksta &= ~PCI_EXP_LNKSTA_CLS;
+            lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS;
+        }
+    }
+
+    pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+                                 PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
+    pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta &
+                               (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW));
+}
+
 /**************************************************************************
  * pci express extended capability helper functions
  */
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index 6432b9ac1f..bc07abc31b 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -154,8 +154,9 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data)
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
 
     dc->props = pcie_slot_props;
-    hc->plug = pcie_cap_slot_hotplug_cb;
-    hc->unplug_request = pcie_cap_slot_hot_unplug_request_cb;
+    hc->plug = pcie_cap_slot_plug_cb;
+    hc->unplug = pcie_cap_slot_unplug_cb;
+    hc->unplug_request = pcie_cap_slot_unplug_request_cb;
 }
 
 static const TypeInfo pcie_slot_type_info = {
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index 96a43d2f70..45053b39b9 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -238,6 +238,7 @@ static void shpc_invalid_command(SHPCDevice *shpc)
 
 static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot)
 {
+    HotplugHandler *hotplug_ctrl;
     int devfn;
     int pci_slot = SHPC_IDX_TO_PCI(slot);
     for (devfn = PCI_DEVFN(pci_slot, 0);
@@ -245,7 +246,9 @@ static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot)
          ++devfn) {
         PCIDevice *affected_dev = shpc->sec_bus->devices[devfn];
         if (affected_dev) {
-            object_unparent(OBJECT(affected_dev));
+            hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(affected_dev));
+            hotplug_handler_unplug(hotplug_ctrl, DEVICE(affected_dev),
+                                   &error_abort);
         }
     }
 }
@@ -482,8 +485,8 @@ static const MemoryRegionOps shpc_mmio_ops = {
         .max_access_size = 4,
     },
 };
-static void shpc_device_hotplug_common(PCIDevice *affected_dev, int *slot,
-                                       SHPCDevice *shpc, Error **errp)
+static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot,
+                                    SHPCDevice *shpc, Error **errp)
 {
     int pci_slot = PCI_SLOT(affected_dev->devfn);
     *slot = SHPC_PCI_TO_IDX(pci_slot);
@@ -497,7 +500,7 @@ static void shpc_device_hotplug_common(PCIDevice *affected_dev, int *slot,
     }
 }
 
-void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
                             Error **errp)
 {
     Error *local_err = NULL;
@@ -505,7 +508,7 @@ void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
     SHPCDevice *shpc = pci_hotplug_dev->shpc;
     int slot;
 
-    shpc_device_hotplug_common(PCI_DEVICE(dev), &slot, shpc, &local_err);
+    shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -540,8 +543,14 @@ void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
     shpc_interrupt_update(pci_hotplug_dev);
 }
 
-void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
-                                       DeviceState *dev, Error **errp)
+void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                           Error **errp)
+{
+    object_unparent(OBJECT(dev));
+}
+
+void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                   DeviceState *dev, Error **errp)
 {
     Error *local_err = NULL;
     PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
@@ -550,7 +559,7 @@ void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
     uint8_t led;
     int slot;
 
-    shpc_device_hotplug_common(PCI_DEVICE(dev), &slot, shpc, &local_err);
+    shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index e6747fce28..b20fea0dfc 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -685,7 +685,7 @@ static void ppce500_cpu_reset(void *opaque)
 }
 
 static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
-                                           qemu_irq **irqs)
+                                           IrqLines  *irqs)
 {
     DeviceState *dev;
     SysBusDevice *s;
@@ -705,7 +705,7 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
     k = 0;
     for (i = 0; i < smp_cpus; i++) {
         for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
-            sysbus_connect_irq(s, k++, irqs[i][j]);
+            sysbus_connect_irq(s, k++, irqs[i].irq[j]);
         }
     }
 
@@ -713,7 +713,7 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
 }
 
 static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc,
-                                          qemu_irq **irqs, Error **errp)
+                                          IrqLines *irqs, Error **errp)
 {
     Error *err = NULL;
     DeviceState *dev;
@@ -742,7 +742,7 @@ static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc,
 
 static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms,
                                       MemoryRegion *ccsr,
-                                      qemu_irq **irqs)
+                                      IrqLines *irqs)
 {
     MachineState *machine = MACHINE(pms);
     const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
@@ -806,15 +806,14 @@ void ppce500_init(MachineState *machine)
     /* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and
      * 4 respectively */
     unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4};
-    qemu_irq **irqs;
+    IrqLines *irqs;
     DeviceState *dev, *mpicdev;
     CPUPPCState *firstenv = NULL;
     MemoryRegion *ccsr_addr_space;
     SysBusDevice *s;
     PPCE500CCSRState *ccsr;
 
-    irqs = g_malloc0(smp_cpus * sizeof(qemu_irq *));
-    irqs[0] = g_malloc0(smp_cpus * sizeof(qemu_irq) * OPENPIC_OUTPUT_NB);
+    irqs = g_new0(IrqLines, smp_cpus);
     for (i = 0; i < smp_cpus; i++) {
         PowerPCCPU *cpu;
         CPUState *cs;
@@ -834,10 +833,9 @@ void ppce500_init(MachineState *machine)
             firstenv = env;
         }
 
-        irqs[i] = irqs[0] + (i * OPENPIC_OUTPUT_NB);
         input = (qemu_irq *)env->irq_inputs;
-        irqs[i][OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
-        irqs[i][OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
+        irqs[i].irq[OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
+        irqs[i].irq[OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
         env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i;
         env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0;
 
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 7e45afae7c..bb19eaba36 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -115,7 +115,7 @@ static void ppc_core99_init(MachineState *machine)
     PowerPCCPU *cpu = NULL;
     CPUPPCState *env = NULL;
     char *filename;
-    qemu_irq **openpic_irqs;
+    IrqLines *openpic_irqs;
     int linux_boot, i, j, k;
     MemoryRegion *ram = g_new(MemoryRegion, 1), *bios = g_new(MemoryRegion, 1);
     hwaddr kernel_base, initrd_base, cmdline_base = 0;
@@ -248,41 +248,37 @@ static void ppc_core99_init(MachineState *machine)
     memory_region_add_subregion(get_system_memory(), 0xf8000000,
                                 sysbus_mmio_get_region(s, 0));
 
-    openpic_irqs = g_malloc0(smp_cpus * sizeof(qemu_irq *));
-    openpic_irqs[0] =
-        g_malloc0(smp_cpus * sizeof(qemu_irq) * OPENPIC_OUTPUT_NB);
+    openpic_irqs = g_new0(IrqLines, smp_cpus);
     for (i = 0; i < smp_cpus; i++) {
         /* Mac99 IRQ connection between OpenPIC outputs pins
          * and PowerPC input pins
          */
         switch (PPC_INPUT(env)) {
         case PPC_FLAGS_INPUT_6xx:
-            openpic_irqs[i] = openpic_irqs[0] + (i * OPENPIC_OUTPUT_NB);
-            openpic_irqs[i][OPENPIC_OUTPUT_INT] =
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] =
                 ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT];
-            openpic_irqs[i][OPENPIC_OUTPUT_CINT] =
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] =
                 ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT];
-            openpic_irqs[i][OPENPIC_OUTPUT_MCK] =
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] =
                 ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_MCP];
             /* Not connected ? */
-            openpic_irqs[i][OPENPIC_OUTPUT_DEBUG] = NULL;
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL;
             /* Check this */
-            openpic_irqs[i][OPENPIC_OUTPUT_RESET] =
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] =
                 ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_HRESET];
             break;
 #if defined(TARGET_PPC64)
         case PPC_FLAGS_INPUT_970:
-            openpic_irqs[i] = openpic_irqs[0] + (i * OPENPIC_OUTPUT_NB);
-            openpic_irqs[i][OPENPIC_OUTPUT_INT] =
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] =
                 ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT];
-            openpic_irqs[i][OPENPIC_OUTPUT_CINT] =
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] =
                 ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT];
-            openpic_irqs[i][OPENPIC_OUTPUT_MCK] =
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] =
                 ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_MCP];
             /* Not connected ? */
-            openpic_irqs[i][OPENPIC_OUTPUT_DEBUG] = NULL;
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL;
             /* Check this */
-            openpic_irqs[i][OPENPIC_OUTPUT_RESET] =
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] =
                 ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_HRESET];
             break;
 #endif /* defined(TARGET_PPC64) */
@@ -299,7 +295,7 @@ static void ppc_core99_init(MachineState *machine)
     k = 0;
     for (i = 0; i < smp_cpus; i++) {
         for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
-            sysbus_connect_irq(s, k++, openpic_irqs[i][j]);
+            sysbus_connect_irq(s, k++, openpic_irqs[i].irq[j]);
         }
     }
     g_free(openpic_irqs);
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
index 1b0a0a8ba3..f47b15f10e 100644
--- a/hw/ppc/ppc405_boards.c
+++ b/hw/ppc/ppc405_boards.c
@@ -149,7 +149,7 @@ static void ref405ep_init(MachineState *machine)
     MemoryRegion *bios;
     MemoryRegion *sram = g_new(MemoryRegion, 1);
     ram_addr_t bdloc;
-    MemoryRegion *ram_memories = g_malloc(2 * sizeof(*ram_memories));
+    MemoryRegion *ram_memories = g_new(MemoryRegion, 2);
     hwaddr ram_bases[2], ram_sizes[2];
     target_ulong sram_size;
     long bios_size;
@@ -448,7 +448,7 @@ static void taihu_405ep_init(MachineState *machine)
     qemu_irq *pic;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *bios;
-    MemoryRegion *ram_memories = g_malloc(2 * sizeof(*ram_memories));
+    MemoryRegion *ram_memories = g_new(MemoryRegion, 2);
     MemoryRegion *ram = g_malloc0(sizeof(*ram));
     hwaddr ram_bases[2], ram_sizes[2];
     long bios_size;
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
index 5c58415cf1..e1aadf126d 100644
--- a/hw/ppc/ppc405_uc.c
+++ b/hw/ppc/ppc405_uc.c
@@ -1519,7 +1519,7 @@ CPUPPCState *ppc405cr_init(MemoryRegion *address_space_mem,
     /* OBP arbitrer */
     ppc4xx_opba_init(0xef600600);
     /* Universal interrupt controller */
-    irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+    irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
     irqs[PPCUIC_OUTPUT_INT] =
         ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
     irqs[PPCUIC_OUTPUT_CINT] =
@@ -1877,7 +1877,7 @@ CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem,
     /* Initialize timers */
     ppc_booke_timers_init(cpu, sysclk, 0);
     /* Universal interrupt controller */
-    irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+    irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
     irqs[PPCUIC_OUTPUT_INT] =
         ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
     irqs[PPCUIC_OUTPUT_CINT] =
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index f5720f979e..b8aa55d526 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -169,8 +169,7 @@ static void bamboo_init(MachineState *machine)
     unsigned int pci_irq_nrs[4] = { 28, 27, 26, 25 };
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *isa = g_new(MemoryRegion, 1);
-    MemoryRegion *ram_memories
-        = g_malloc(PPC440EP_SDRAM_NR_BANKS * sizeof(*ram_memories));
+    MemoryRegion *ram_memories = g_new(MemoryRegion, PPC440EP_SDRAM_NR_BANKS);
     hwaddr ram_bases[PPC440EP_SDRAM_NR_BANKS];
     hwaddr ram_sizes[PPC440EP_SDRAM_NR_BANKS];
     qemu_irq *pic;
@@ -200,7 +199,7 @@ static void bamboo_init(MachineState *machine)
     ppc_dcr_init(env, NULL, NULL);
 
     /* interrupt controller */
-    irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+    irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
     irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
     irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT];
     pic = ppcuic_init(env, irqs, 0x0C0, 0, 1);
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index 5aac58f36e..4b051c0950 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -430,7 +430,7 @@ static void sam460ex_init(MachineState *machine)
     ppc4xx_plb_init(env);
 
     /* interrupt controllers */
-    irqs = g_malloc0(sizeof(*irqs) * PPCUIC_OUTPUT_NB);
+    irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
     irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
     irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT];
     uic[0] = ppcuic_init(env, irqs, 0xc0, 0, 1);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 55be0f56cb..19a07c5c9d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -150,7 +150,7 @@ static void pre_2_10_vmstate_unregister_dummy_icp(int i)
                        (void *)(uintptr_t) i);
 }
 
-static int xics_max_server_number(sPAPRMachineState *spapr)
+int spapr_max_server_number(sPAPRMachineState *spapr)
 {
     assert(spapr->vsmt);
     return DIV_ROUND_UP(max_cpus * spapr->vsmt, smp_threads);
@@ -889,8 +889,6 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
     /* ibm,associativity-lookup-arrays */
     buf_len = (nr_nodes * 4 + 2) * sizeof(uint32_t);
     cur_index = int_buf = g_malloc0(buf_len);
-
-    cur_index = int_buf;
     int_buf[0] = cpu_to_be32(nr_nodes);
     int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */
     cur_index += 2;
@@ -1033,7 +1031,7 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
         cpu_to_be32(0),
         cpu_to_be32(0),
         cpu_to_be32(0),
-        cpu_to_be32(nb_numa_nodes ? nb_numa_nodes - 1 : 0),
+        cpu_to_be32(nb_numa_nodes ? nb_numa_nodes : 1),
     };
 
     _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
@@ -1097,15 +1095,18 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
     spapr_dt_rtas_tokens(fdt, rtas);
 }
 
-/* Prepare ibm,arch-vec-5-platform-support, which indicates the MMU features
- * that the guest may request and thus the valid values for bytes 24..26 of
- * option vector 5: */
-static void spapr_dt_ov5_platform_support(void *fdt, int chosen)
+/*
+ * Prepare ibm,arch-vec-5-platform-support, which indicates the MMU
+ * and the XIVE features that the guest may request and thus the valid
+ * values for bytes 23..26 of option vector 5:
+ */
+static void spapr_dt_ov5_platform_support(sPAPRMachineState *spapr, void *fdt,
+                                          int chosen)
 {
     PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
 
     char val[2 * 4] = {
-        23, 0x00, /* Xive mode, filled in below. */
+        23, spapr->irq->ov5, /* Xive mode. */
         24, 0x00, /* Hash/Radix, filled in below. */
         25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */
         26, 0x40, /* Radix options: GTSE == yes. */
@@ -1113,7 +1114,11 @@ static void spapr_dt_ov5_platform_support(void *fdt, int chosen)
 
     if (!ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0,
                           first_ppc_cpu->compat_pvr)) {
-        /* If we're in a pre POWER9 compat mode then the guest should do hash */
+        /*
+         * If we're in a pre POWER9 compat mode then the guest should
+         * do hash and use the legacy interrupt mode
+         */
+        val[1] = 0x00; /* XICS */
         val[3] = 0x00; /* Hash */
     } else if (kvm_enabled()) {
         if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
@@ -1191,7 +1196,7 @@ static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt)
         _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
     }
 
-    spapr_dt_ov5_platform_support(fdt, chosen);
+    spapr_dt_ov5_platform_support(spapr, fdt, chosen);
 
     g_free(stdout_path);
     g_free(bootlist);
@@ -1270,7 +1275,8 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
     _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
 
     /* /interrupt controller */
-    spapr_dt_xics(xics_max_server_number(spapr), fdt, PHANDLE_XICP);
+    spapr->irq->dt_populate(spapr, spapr_max_server_number(spapr), fdt,
+                          PHANDLE_XICP);
 
     ret = spapr_populate_memory(spapr, fdt);
     if (ret < 0) {
@@ -1290,7 +1296,8 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
     }
 
     QLIST_FOREACH(phb, &spapr->phbs, list) {
-        ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt, smc->irq->nr_msis);
+        ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt,
+                                    spapr->irq->nr_msis);
         if (ret < 0) {
             error_report("couldn't setup PCI devices in fdt");
             exit(1);
@@ -1620,6 +1627,12 @@ static void spapr_machine_reset(void)
 
     qemu_devices_reset();
 
+    /*
+     * This is fixing some of the default configuration of the XIVE
+     * devices. To be called after the reset of the machine devices.
+     */
+    spapr_irq_reset(spapr, &error_fatal);
+
     /* DRC reset may cause a device to be unplugged. This will cause troubles
      * if this device is used by another device (eg, a running vhost backend
      * will crash QEMU if the DIMM holding the vring goes away). To avoid such
@@ -1731,14 +1744,6 @@ static int spapr_post_load(void *opaque, int version_id)
         return err;
     }
 
-    if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
-        CPUState *cs;
-        CPU_FOREACH(cs) {
-            PowerPCCPU *cpu = POWERPC_CPU(cs);
-            icp_resend(ICP(cpu->intc));
-        }
-    }
-
     /* In earlier versions, there was no separate qdev for the PAPR
      * RTC, so the RTC offset was stored directly in sPAPREnvironment.
      * So when migrating from those versions, poke the incoming offset
@@ -1759,6 +1764,11 @@ static int spapr_post_load(void *opaque, int version_id)
         }
     }
 
+    err = spapr_irq_post_load(spapr, version_id);
+    if (err) {
+        return err;
+    }
+
     return err;
 }
 
@@ -2466,15 +2476,10 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
         boot_cores_nr = possible_cpus->len;
     }
 
-    /* VSMT must be set in order to be able to compute VCPU ids, ie to
-     * call xics_max_server_number() or spapr_vcpu_id().
-     */
-    spapr_set_vsmt_mode(spapr, &error_fatal);
-
     if (smc->pre_2_10_has_unused_icps) {
         int i;
 
-        for (i = 0; i < xics_max_server_number(spapr); i++) {
+        for (i = 0; i < spapr_max_server_number(spapr); i++) {
             /* Dummy entries get deregistered when real ICPState objects
              * are registered during CPU core hotplug.
              */
@@ -2593,8 +2598,14 @@ static void spapr_machine_init(MachineState *machine)
     /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
     load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
 
+    /*
+     * VSMT must be set in order to be able to compute VCPU ids, ie to
+     * call spapr_max_server_number() or spapr_vcpu_id().
+     */
+    spapr_set_vsmt_mode(spapr, &error_fatal);
+
     /* Set up Interrupt Controller before we create the VCPUs */
-    smc->irq->init(spapr, &error_fatal);
+    spapr_irq_init(spapr, &error_fatal);
 
     /* Set up containers for ibm,client-architecture-support negotiated options
      */
@@ -2621,6 +2632,17 @@ static void spapr_machine_init(MachineState *machine)
     /* advertise support for ibm,dyamic-memory-v2 */
     spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
 
+    /* advertise XIVE on POWER9 machines */
+    if (spapr->irq->ov5 & SPAPR_OV5_XIVE_EXPLOIT) {
+        if (ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
+                                  0, spapr->max_compat_pvr)) {
+            spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
+        } else {
+            error_report("XIVE-only machines require a POWER9 CPU");
+            exit(1);
+        }
+    }
+
     /* init CPUs */
     spapr_init_cpus(spapr);
 
@@ -3031,9 +3053,38 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
     visit_type_uint32(v, name, (uint32_t *)opaque, errp);
 }
 
+static char *spapr_get_ic_mode(Object *obj, Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+    if (spapr->irq == &spapr_irq_xics_legacy) {
+        return g_strdup("legacy");
+    } else if (spapr->irq == &spapr_irq_xics) {
+        return g_strdup("xics");
+    } else if (spapr->irq == &spapr_irq_xive) {
+        return g_strdup("xive");
+    }
+    g_assert_not_reached();
+}
+
+static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+    /* The legacy IRQ backend can not be set */
+    if (strcmp(value, "xics") == 0) {
+        spapr->irq = &spapr_irq_xics;
+    } else if (strcmp(value, "xive") == 0) {
+        spapr->irq = &spapr_irq_xive;
+    } else {
+        error_setg(errp, "Bad value for \"ic-mode\" property");
+    }
+}
+
 static void spapr_instance_init(Object *obj)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 
     spapr->htab_fd = -1;
     spapr->use_hotplug_event_source = true;
@@ -3067,6 +3118,14 @@ static void spapr_instance_init(Object *obj)
                                     " the host's SMT mode", &error_abort);
     object_property_add_bool(obj, "vfio-no-msix-emulation",
                              spapr_get_msix_emulation, NULL, NULL);
+
+    /* The machine class defines the default interrupt controller mode */
+    spapr->irq = smc->irq;
+    object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
+                            spapr_set_ic_mode, NULL);
+    object_property_set_description(obj, "ic-mode",
+                 "Specifies the interrupt controller mode (xics, xive)",
+                 NULL);
 }
 
 static void spapr_machine_finalizefn(Object *obj)
@@ -3789,9 +3848,8 @@ static void spapr_pic_print_info(InterruptStatsProvider *obj,
                                  Monitor *mon)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
-    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 
-    smc->irq->print_info(spapr, mon);
+    spapr->irq->print_info(spapr, mon);
 }
 
 int spapr_get_vcpu_id(PowerPCCPU *cpu)
@@ -3873,7 +3931,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     hc->unplug = spapr_machine_device_unplug;
 
     smc->dr_lmb_enabled = true;
-    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0");
     mc->has_hotpluggable_cpus = true;
     smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;
     fwc->get_dev_path = spapr_get_fw_dev_path;
@@ -3970,6 +4028,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
 {
     spapr_machine_4_0_class_options(mc);
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_3_1);
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
 }
 
 DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 2398ce62c0..82666436e9 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -11,7 +11,6 @@
 #include "hw/ppc/spapr_cpu_core.h"
 #include "target/ppc/cpu.h"
 #include "hw/ppc/spapr.h"
-#include "hw/ppc/xics.h" /* for icp_create() - to be removed */
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "sysemu/cpus.h"
@@ -233,8 +232,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     qemu_register_reset(spapr_cpu_reset, cpu);
     spapr_cpu_reset(cpu);
 
-    cpu->intc = icp_create(OBJECT(cpu), spapr->icp_type, XICS_FABRIC(spapr),
-                           &local_err);
+    cpu->intc = spapr->irq->cpu_intc_create(spapr, OBJECT(cpu), &local_err);
     if (local_err) {
         goto error_unregister;
     }
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 1b0880ac9e..b56466f89a 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -93,7 +93,7 @@ static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
 
     if (!table) {
         *fd = -1;
-        table = g_malloc0(nb_table * sizeof(uint64_t));
+        table = g_new0(uint64_t, nb_table);
     }
 
     trace_spapr_iommu_new_table(liobn, table, *fd);
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index e77b94cc68..7b3b5afec2 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -12,6 +12,7 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_xive.h"
 #include "hw/ppc/xics.h"
 #include "sysemu/kvm.h"
 
@@ -93,15 +94,9 @@ error:
 static void spapr_irq_init_xics(sPAPRMachineState *spapr, Error **errp)
 {
     MachineState *machine = MACHINE(spapr);
-    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
-    int nr_irqs = smc->irq->nr_irqs;
+    int nr_irqs = spapr->irq->nr_irqs;
     Error *local_err = NULL;
 
-    /* Initialize the MSI IRQ allocator. */
-    if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
-        spapr_irq_msi_init(spapr, smc->irq->nr_msis);
-    }
-
     if (kvm_enabled()) {
         if (machine_kernel_irqchip_allowed(machine) &&
             !xics_kvm_init(spapr, &local_err)) {
@@ -195,6 +190,24 @@ static void spapr_irq_print_info_xics(sPAPRMachineState *spapr, Monitor *mon)
     ics_pic_print_info(spapr->ics, mon);
 }
 
+static Object *spapr_irq_cpu_intc_create_xics(sPAPRMachineState *spapr,
+                                              Object *cpu, Error **errp)
+{
+    return icp_create(cpu, spapr->icp_type, XICS_FABRIC(spapr), errp);
+}
+
+static int spapr_irq_post_load_xics(sPAPRMachineState *spapr, int version_id)
+{
+    if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
+        CPUState *cs;
+        CPU_FOREACH(cs) {
+            PowerPCCPU *cpu = POWERPC_CPU(cs);
+            icp_resend(ICP(cpu->intc));
+        }
+    }
+    return 0;
+}
+
 #define SPAPR_IRQ_XICS_NR_IRQS     0x1000
 #define SPAPR_IRQ_XICS_NR_MSIS     \
     (XICS_IRQ_BASE + SPAPR_IRQ_XICS_NR_IRQS - SPAPR_IRQ_MSI)
@@ -202,37 +215,184 @@ static void spapr_irq_print_info_xics(sPAPRMachineState *spapr, Monitor *mon)
 sPAPRIrq spapr_irq_xics = {
     .nr_irqs     = SPAPR_IRQ_XICS_NR_IRQS,
     .nr_msis     = SPAPR_IRQ_XICS_NR_MSIS,
+    .ov5         = SPAPR_OV5_XIVE_LEGACY,
 
     .init        = spapr_irq_init_xics,
     .claim       = spapr_irq_claim_xics,
     .free        = spapr_irq_free_xics,
     .qirq        = spapr_qirq_xics,
     .print_info  = spapr_irq_print_info_xics,
+    .dt_populate = spapr_dt_xics,
+    .cpu_intc_create = spapr_irq_cpu_intc_create_xics,
+    .post_load   = spapr_irq_post_load_xics,
+};
+
+/*
+ * XIVE IRQ backend.
+ */
+static void spapr_irq_init_xive(sPAPRMachineState *spapr, Error **errp)
+{
+    MachineState *machine = MACHINE(spapr);
+    uint32_t nr_servers = spapr_max_server_number(spapr);
+    DeviceState *dev;
+    int i;
+
+    /* KVM XIVE device not yet available */
+    if (kvm_enabled()) {
+        if (machine_kernel_irqchip_required(machine)) {
+            error_setg(errp, "kernel_irqchip requested. no KVM XIVE support");
+            return;
+        }
+    }
+
+    dev = qdev_create(NULL, TYPE_SPAPR_XIVE);
+    qdev_prop_set_uint32(dev, "nr-irqs", spapr->irq->nr_irqs);
+    /*
+     * 8 XIVE END structures per CPU. One for each available priority
+     */
+    qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
+    qdev_init_nofail(dev);
+
+    spapr->xive = SPAPR_XIVE(dev);
+
+    /* Enable the CPU IPIs */
+    for (i = 0; i < nr_servers; ++i) {
+        spapr_xive_irq_claim(spapr->xive, SPAPR_IRQ_IPI + i, false);
+    }
+
+    spapr_xive_hcall_init(spapr);
+}
+
+static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi,
+                                Error **errp)
+{
+    if (!spapr_xive_irq_claim(spapr->xive, irq, lsi)) {
+        error_setg(errp, "IRQ %d is invalid", irq);
+        return -1;
+    }
+    return 0;
+}
+
+static void spapr_irq_free_xive(sPAPRMachineState *spapr, int irq, int num)
+{
+    int i;
+
+    for (i = irq; i < irq + num; ++i) {
+        spapr_xive_irq_free(spapr->xive, i);
+    }
+}
+
+static qemu_irq spapr_qirq_xive(sPAPRMachineState *spapr, int irq)
+{
+    return spapr_xive_qirq(spapr->xive, irq);
+}
+
+static void spapr_irq_print_info_xive(sPAPRMachineState *spapr,
+                                      Monitor *mon)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        xive_tctx_pic_print_info(XIVE_TCTX(cpu->intc), mon);
+    }
+
+    spapr_xive_pic_print_info(spapr->xive, mon);
+}
+
+static Object *spapr_irq_cpu_intc_create_xive(sPAPRMachineState *spapr,
+                                              Object *cpu, Error **errp)
+{
+    Object *obj = xive_tctx_create(cpu, XIVE_ROUTER(spapr->xive), errp);
+
+    /*
+     * (TCG) Early setting the OS CAM line for hotplugged CPUs as they
+     * don't benificiate from the reset of the XIVE IRQ backend
+     */
+    spapr_xive_set_tctx_os_cam(XIVE_TCTX(obj));
+    return obj;
+}
+
+static int spapr_irq_post_load_xive(sPAPRMachineState *spapr, int version_id)
+{
+    return 0;
+}
+
+static void spapr_irq_reset_xive(sPAPRMachineState *spapr, Error **errp)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        /* (TCG) Set the OS CAM line of the thread interrupt context. */
+        spapr_xive_set_tctx_os_cam(XIVE_TCTX(cpu->intc));
+    }
+}
+
+/*
+ * XIVE uses the full IRQ number space. Set it to 8K to be compatible
+ * with XICS.
+ */
+
+#define SPAPR_IRQ_XIVE_NR_IRQS     0x2000
+#define SPAPR_IRQ_XIVE_NR_MSIS     (SPAPR_IRQ_XIVE_NR_IRQS - SPAPR_IRQ_MSI)
+
+sPAPRIrq spapr_irq_xive = {
+    .nr_irqs     = SPAPR_IRQ_XIVE_NR_IRQS,
+    .nr_msis     = SPAPR_IRQ_XIVE_NR_MSIS,
+    .ov5         = SPAPR_OV5_XIVE_EXPLOIT,
+
+    .init        = spapr_irq_init_xive,
+    .claim       = spapr_irq_claim_xive,
+    .free        = spapr_irq_free_xive,
+    .qirq        = spapr_qirq_xive,
+    .print_info  = spapr_irq_print_info_xive,
+    .dt_populate = spapr_dt_xive,
+    .cpu_intc_create = spapr_irq_cpu_intc_create_xive,
+    .post_load   = spapr_irq_post_load_xive,
+    .reset       = spapr_irq_reset_xive,
 };
 
 /*
  * sPAPR IRQ frontend routines for devices
  */
+void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
+{
+    /* Initialize the MSI IRQ allocator. */
+    if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        spapr_irq_msi_init(spapr, spapr->irq->nr_msis);
+    }
+
+    spapr->irq->init(spapr, errp);
+}
 
 int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp)
 {
-    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
-
-    return smc->irq->claim(spapr, irq, lsi, errp);
+    return spapr->irq->claim(spapr, irq, lsi, errp);
 }
 
 void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num)
 {
-    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
-
-    smc->irq->free(spapr, irq, num);
+    spapr->irq->free(spapr, irq, num);
 }
 
 qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq)
 {
-    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    return spapr->irq->qirq(spapr, irq);
+}
+
+int spapr_irq_post_load(sPAPRMachineState *spapr, int version_id)
+{
+    return spapr->irq->post_load(spapr, version_id);
+}
 
-    return smc->irq->qirq(spapr, irq);
+void spapr_irq_reset(sPAPRMachineState *spapr, Error **errp)
+{
+    if (spapr->irq->reset) {
+        spapr->irq->reset(spapr, errp);
+    }
 }
 
 /*
@@ -295,10 +455,14 @@ int spapr_irq_find(sPAPRMachineState *spapr, int num, bool align, Error **errp)
 sPAPRIrq spapr_irq_xics_legacy = {
     .nr_irqs     = SPAPR_IRQ_XICS_LEGACY_NR_IRQS,
     .nr_msis     = SPAPR_IRQ_XICS_LEGACY_NR_IRQS,
+    .ov5         = SPAPR_OV5_XIVE_LEGACY,
 
     .init        = spapr_irq_init_xics,
     .claim       = spapr_irq_claim_xics,
     .free        = spapr_irq_free_xics,
     .qirq        = spapr_qirq_xics,
     .print_info  = spapr_irq_print_info_xics,
+    .dt_populate = spapr_dt_xics,
+    .cpu_intc_create = spapr_irq_cpu_intc_create_xics,
+    .post_load   = spapr_irq_post_load_xics,
 };
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 2374d55fc1..bfb02ee96b 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1370,18 +1370,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
 /* Callback to be called during DRC release. */
 void spapr_phb_remove_pci_device_cb(DeviceState *dev)
 {
-    /* some version guests do not wait for completion of a device
-     * cleanup (generally done asynchronously by the kernel) before
-     * signaling to QEMU that the device is safe, but instead sleep
-     * for some 'safe' period of time. unfortunately on a busy host
-     * this sleep isn't guaranteed to be long enough, resulting in
-     * bad things like IRQ lines being left asserted during final
-     * device removal. to deal with this we call reset just prior
-     * to finalizing the device, which will put the device back into
-     * an 'idle' state, as the device cleanup code expects.
-     */
-    pci_device_reset(PCI_DEVICE(dev));
-    object_unparent(OBJECT(dev));
+    HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
 }
 
 static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb,
@@ -1490,6 +1481,23 @@ out:
     }
 }
 
+static void spapr_pci_unplug(HotplugHandler *plug_handler,
+                             DeviceState *plugged_dev, Error **errp)
+{
+    /* some version guests do not wait for completion of a device
+     * cleanup (generally done asynchronously by the kernel) before
+     * signaling to QEMU that the device is safe, but instead sleep
+     * for some 'safe' period of time. unfortunately on a busy host
+     * this sleep isn't guaranteed to be long enough, resulting in
+     * bad things like IRQ lines being left asserted during final
+     * device removal. to deal with this we call reset just prior
+     * to finalizing the device, which will put the device back into
+     * an 'idle' state, as the device cleanup code expects.
+     */
+    pci_device_reset(PCI_DEVICE(plugged_dev));
+    object_unparent(OBJECT(plugged_dev));
+}
+
 static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
                                      DeviceState *plugged_dev, Error **errp)
 {
@@ -1965,6 +1973,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
     dc->user_creatable = true;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     hp->plug = spapr_pci_plug;
+    hp->unplug = spapr_pci_unplug;
     hp->unplug_request = spapr_pci_unplug_request;
 }
 
diff --git a/hw/ppc/spapr_rtas_ddw.c b/hw/ppc/spapr_rtas_ddw.c
index 329feb148f..cb8a410359 100644
--- a/hw/ppc/spapr_rtas_ddw.c
+++ b/hw/ppc/spapr_rtas_ddw.c
@@ -96,9 +96,8 @@ static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu,
                                          uint32_t nret, target_ulong rets)
 {
     sPAPRPHBState *sphb;
-    uint64_t buid, max_window_size;
+    uint64_t buid;
     uint32_t avail, addr, pgmask = 0;
-    MachineState *machine = MACHINE(spapr);
 
     if ((nargs != 3) || (nret != 5)) {
         goto param_error_exit;
@@ -114,27 +113,15 @@ static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu,
     /* Translate page mask to LoPAPR format */
     pgmask = spapr_page_mask_to_query_mask(sphb->page_size_mask);
 
-    /*
-     * This is "Largest contiguous block of TCEs allocated specifically
-     * for (that is, are reserved for) this PE".
-     * Return the maximum number as maximum supported RAM size was in 4K pages.
-     */
-    if (machine->ram_size == machine->maxram_size) {
-        max_window_size = machine->ram_size;
-    } else {
-        max_window_size = machine->device_memory->base +
-                          memory_region_size(&machine->device_memory->mr);
-    }
-
     avail = SPAPR_PCI_DMA_MAX_WINDOWS - spapr_phb_get_active_win_num(sphb);
 
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
     rtas_st(rets, 1, avail);
-    rtas_st(rets, 2, max_window_size >> SPAPR_TCE_PAGE_SHIFT);
+    rtas_st(rets, 2, 0x80000000); /* The largest window we can possibly have */
     rtas_st(rets, 3, pgmask);
     rtas_st(rets, 4, 0); /* DMA migration mask, not supported */
 
-    trace_spapr_iommu_ddw_query(buid, addr, avail, max_window_size, pgmask);
+    trace_spapr_iommu_ddw_query(buid, addr, avail, 0x80000000, pgmask);
     return;
 
 param_error_exit:
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 840d4a3c45..7e8a9ad093 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -730,7 +730,7 @@ void spapr_dt_vdevice(VIOsPAPRBus *bus, void *fdt)
     }
 
     /* Copy out into an array of pointers */
-    qdevs = g_malloc(sizeof(qdev) * num);
+    qdevs = g_new(DeviceState *, num);
     num = 0;
     QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
         qdevs[num++] = kid->child;
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index ee9b4b4490..5177120574 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -105,7 +105,7 @@ static PowerPCCPU *ppc440_init_xilinx(ram_addr_t *ram_size,
     ppc_dcr_init(env, NULL, NULL);
 
     /* interrupt controller */
-    irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+    irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
     irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
     irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT];
     ppcuic_init(env, irqs, 0x0C0, 0, 1);
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index d7a4bbd91f..c28bfbd44d 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -15,10 +15,18 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
+#include "sysemu/sysemu.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qapi-events-rdma.h"
 
 #include <infiniband/verbs.h>
+#include <infiniband/umad_types.h>
+#include <infiniband/umad.h>
+#include <rdma/rdma_user_cm.h>
 
+#include "contrib/rdmacm-mux/rdmacm-mux.h"
 #include "trace.h"
 #include "rdma_utils.h"
 #include "rdma_rm.h"
@@ -29,27 +37,46 @@
 #define VENDOR_ERR_TOO_MANY_SGES    0x202
 #define VENDOR_ERR_NOMEM            0x203
 #define VENDOR_ERR_QP0              0x204
-#define VENDOR_ERR_NO_SGE           0x205
+#define VENDOR_ERR_INV_NUM_SGE      0x205
 #define VENDOR_ERR_MAD_SEND         0x206
 #define VENDOR_ERR_INVLKEY          0x207
 #define VENDOR_ERR_MR_SMALL         0x208
+#define VENDOR_ERR_INV_MAD_BUFF     0x209
 
 #define THR_NAME_LEN 16
 #define THR_POLL_TO  5000
 
+#define MAD_HDR_SIZE sizeof(struct ibv_grh)
+
 typedef struct BackendCtx {
-    uint64_t req_id;
     void *up_ctx;
     bool is_tx_req;
+    struct ibv_sge sge; /* Used to save MAD recv buffer */
 } BackendCtx;
 
-static void (*comp_handler)(int status, unsigned int vendor_err, void *ctx);
+struct backend_umad {
+    struct ib_user_mad hdr;
+    char mad[RDMA_MAX_PRIVATE_DATA];
+};
+
+static void (*comp_handler)(void *ctx, struct ibv_wc *wc);
 
-static void dummy_comp_handler(int status, unsigned int vendor_err, void *ctx)
+static void dummy_comp_handler(void *ctx, struct ibv_wc *wc)
 {
     pr_err("No completion handler is registered\n");
 }
 
+static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err,
+                                 void *ctx)
+{
+    struct ibv_wc wc = {0};
+
+    wc.status = status;
+    wc.vendor_err = vendor_err;
+
+    comp_handler(ctx, &wc);
+}
+
 static void poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
 {
     int i, ne;
@@ -74,7 +101,7 @@ static void poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
             }
             pr_dbg("Processing %s CQE\n", bctx->is_tx_req ? "send" : "recv");
 
-            comp_handler(wc[i].status, wc[i].vendor_err, bctx->up_ctx);
+            comp_handler(bctx->up_ctx, &wc[i]);
 
             rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
             g_free(bctx);
@@ -146,6 +173,77 @@ static void *comp_handler_thread(void *arg)
     return NULL;
 }
 
+static inline void disable_rdmacm_mux_async(RdmaBackendDev *backend_dev)
+{
+    atomic_set(&backend_dev->rdmacm_mux.can_receive, 0);
+}
+
+static inline void enable_rdmacm_mux_async(RdmaBackendDev *backend_dev)
+{
+    atomic_set(&backend_dev->rdmacm_mux.can_receive, sizeof(RdmaCmMuxMsg));
+}
+
+static inline int rdmacm_mux_can_process_async(RdmaBackendDev *backend_dev)
+{
+    return atomic_read(&backend_dev->rdmacm_mux.can_receive);
+}
+
+static int check_mux_op_status(CharBackend *mad_chr_be)
+{
+    RdmaCmMuxMsg msg = {0};
+    int ret;
+
+    pr_dbg("Reading response\n");
+    ret = qemu_chr_fe_read_all(mad_chr_be, (uint8_t *)&msg, sizeof(msg));
+    if (ret != sizeof(msg)) {
+        pr_dbg("Invalid message size %d, expecting %ld\n", ret, sizeof(msg));
+        return -EIO;
+    }
+
+    pr_dbg("msg_type=%d\n", msg.hdr.msg_type);
+    pr_dbg("op_code=%d\n", msg.hdr.op_code);
+    pr_dbg("err_code=%d\n", msg.hdr.err_code);
+
+    if (msg.hdr.msg_type != RDMACM_MUX_MSG_TYPE_RESP) {
+        pr_dbg("Invalid message type %d\n", msg.hdr.msg_type);
+        return -EIO;
+    }
+
+    if (msg.hdr.err_code != RDMACM_MUX_ERR_CODE_OK) {
+        pr_dbg("Operation failed in mux, error code %d\n", msg.hdr.err_code);
+        return -EIO;
+    }
+
+    return 0;
+}
+
+static int exec_rdmacm_mux_req(RdmaBackendDev *backend_dev, RdmaCmMuxMsg *msg)
+{
+    int rc = 0;
+
+    pr_dbg("Executing request %d\n", msg->hdr.op_code);
+
+    msg->hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ;
+    disable_rdmacm_mux_async(backend_dev);
+    rc = qemu_chr_fe_write(backend_dev->rdmacm_mux.chr_be,
+                           (const uint8_t *)msg, sizeof(*msg));
+    if (rc != sizeof(*msg)) {
+        enable_rdmacm_mux_async(backend_dev);
+        pr_dbg("Fail to send request to rdmacm_mux (rc=%d)\n", rc);
+        return -EIO;
+    }
+
+    rc = check_mux_op_status(backend_dev->rdmacm_mux.chr_be);
+    if (rc) {
+        pr_dbg("Fail to execute rdmacm_mux request %d (rc=%d)\n",
+               msg->hdr.op_code, rc);
+    }
+
+    enable_rdmacm_mux_async(backend_dev);
+
+    return 0;
+}
+
 static void stop_backend_thread(RdmaBackendThread *thread)
 {
     thread->run = false;
@@ -168,8 +266,8 @@ static void start_comp_thread(RdmaBackendDev *backend_dev)
                        comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
 }
 
-void rdma_backend_register_comp_handler(void (*handler)(int status,
-                                        unsigned int vendor_err, void *ctx))
+void rdma_backend_register_comp_handler(void (*handler)(void *ctx,
+                                                         struct ibv_wc *wc))
 {
     comp_handler = handler;
 }
@@ -286,11 +384,73 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
     return 0;
 }
 
+static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx,
+                    union ibv_gid *sgid, struct ibv_sge *sge, uint32_t num_sge)
+{
+    RdmaCmMuxMsg msg = {0};
+    char *hdr, *data;
+    int ret;
+
+    pr_dbg("num_sge=%d\n", num_sge);
+
+    if (num_sge != 2) {
+        return -EINVAL;
+    }
+
+    msg.hdr.op_code = RDMACM_MUX_OP_CODE_MAD;
+    memcpy(msg.hdr.sgid.raw, sgid->raw, sizeof(msg.hdr.sgid));
+
+    msg.umad_len = sge[0].length + sge[1].length;
+    pr_dbg("umad_len=%d\n", msg.umad_len);
+
+    if (msg.umad_len > sizeof(msg.umad.mad)) {
+        return -ENOMEM;
+    }
+
+    msg.umad.hdr.addr.qpn = htobe32(1);
+    msg.umad.hdr.addr.grh_present = 1;
+    pr_dbg("sgid_idx=%d\n", sgid_idx);
+    pr_dbg("sgid=0x%llx\n", sgid->global.interface_id);
+    msg.umad.hdr.addr.gid_index = sgid_idx;
+    memcpy(msg.umad.hdr.addr.gid, sgid->raw, sizeof(msg.umad.hdr.addr.gid));
+    msg.umad.hdr.addr.hop_limit = 0xFF;
+
+    hdr = rdma_pci_dma_map(backend_dev->dev, sge[0].addr, sge[0].length);
+    if (!hdr) {
+        pr_dbg("Fail to map to sge[0]\n");
+        return -ENOMEM;
+    }
+    data = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length);
+    if (!data) {
+        pr_dbg("Fail to map to sge[1]\n");
+        rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length);
+        return -ENOMEM;
+    }
+
+    pr_dbg_buf("mad_hdr", hdr, sge[0].length);
+    pr_dbg_buf("mad_data", data, sge[1].length);
+
+    memcpy(&msg.umad.mad[0], hdr, sge[0].length);
+    memcpy(&msg.umad.mad[sge[0].length], data, sge[1].length);
+
+    rdma_pci_dma_unmap(backend_dev->dev, data, sge[1].length);
+    rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length);
+
+    ret = exec_rdmacm_mux_req(backend_dev, &msg);
+    if (ret) {
+        pr_dbg("Fail to send MAD to rdma_umadmux (%d)\n", ret);
+        return -EIO;
+    }
+
+    return 0;
+}
+
 void rdma_backend_post_send(RdmaBackendDev *backend_dev,
                             RdmaBackendQP *qp, uint8_t qp_type,
                             struct ibv_sge *sge, uint32_t num_sge,
-                            union ibv_gid *dgid, uint32_t dqpn,
-                            uint32_t dqkey, void *ctx)
+                            uint8_t sgid_idx, union ibv_gid *sgid,
+                            union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey,
+                            void *ctx)
 {
     BackendCtx *bctx;
     struct ibv_sge new_sge[MAX_SGE];
@@ -301,19 +461,23 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
     if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */
         if (qp_type == IBV_QPT_SMI) {
             pr_dbg("QP0 unsupported\n");
-            comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
+            complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
         } else if (qp_type == IBV_QPT_GSI) {
             pr_dbg("QP1\n");
-            comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+            rc = mad_send(backend_dev, sgid_idx, sgid, sge, num_sge);
+            if (rc) {
+                complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+            } else {
+                complete_work(IBV_WC_SUCCESS, 0, ctx);
+            }
         }
-        pr_dbg("qp->ibqp is NULL for qp_type %d!!!\n", qp_type);
         return;
     }
 
     pr_dbg("num_sge=%d\n", num_sge);
-    if (!num_sge) {
-        pr_dbg("num_sge=0\n");
-        comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NO_SGE, ctx);
+    if (!num_sge || num_sge > MAX_SGE) {
+        pr_dbg("invalid num_sge=%d\n", num_sge);
+        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_NUM_SGE, ctx);
         return;
     }
 
@@ -324,20 +488,23 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
     rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
     if (unlikely(rc)) {
         pr_dbg("Failed to allocate cqe_ctx\n");
-        comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
+        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
         goto out_free_bctx;
     }
 
     rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge);
     if (rc) {
         pr_dbg("Error: Failed to build host SGE array\n");
-        comp_handler(IBV_WC_GENERAL_ERR, rc, ctx);
+        complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
         goto out_dealloc_cqe_ctx;
     }
 
     if (qp_type == IBV_QPT_UD) {
-        wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd,
-                                backend_dev->backend_gid_idx, dgid);
+        wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd, sgid_idx, dgid);
+        if (!wr.wr.ud.ah) {
+            complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
+            goto out_dealloc_cqe_ctx;
+        }
         wr.wr.ud.remote_qpn = dqpn;
         wr.wr.ud.remote_qkey = dqkey;
     }
@@ -353,7 +520,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
     if (rc) {
         pr_dbg("Fail (%d, %d) to post send WQE to qpn %d\n", rc, errno,
                 qp->ibqp->qp_num);
-        comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
+        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
         goto out_dealloc_cqe_ctx;
     }
 
@@ -366,6 +533,48 @@ out_free_bctx:
     g_free(bctx);
 }
 
+static unsigned int save_mad_recv_buffer(RdmaBackendDev *backend_dev,
+                                         struct ibv_sge *sge, uint32_t num_sge,
+                                         void *ctx)
+{
+    BackendCtx *bctx;
+    int rc;
+    uint32_t bctx_id;
+
+    if (num_sge != 1) {
+        pr_dbg("Invalid num_sge (%d), expecting 1\n", num_sge);
+        return VENDOR_ERR_INV_NUM_SGE;
+    }
+
+    if (sge[0].length < RDMA_MAX_PRIVATE_DATA + sizeof(struct ibv_grh)) {
+        pr_dbg("Too small buffer for MAD\n");
+        return VENDOR_ERR_INV_MAD_BUFF;
+    }
+
+    pr_dbg("addr=0x%" PRIx64"\n", sge[0].addr);
+    pr_dbg("length=%d\n", sge[0].length);
+    pr_dbg("lkey=%d\n", sge[0].lkey);
+
+    bctx = g_malloc0(sizeof(*bctx));
+
+    rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
+    if (unlikely(rc)) {
+        g_free(bctx);
+        pr_dbg("Fail to allocate cqe_ctx\n");
+        return VENDOR_ERR_NOMEM;
+    }
+
+    pr_dbg("bctx_id %d, bctx %p, ctx %p\n", bctx_id, bctx, ctx);
+    bctx->up_ctx = ctx;
+    bctx->sge = *sge;
+
+    qemu_mutex_lock(&backend_dev->recv_mads_list.lock);
+    qlist_append_int(backend_dev->recv_mads_list.list, bctx_id);
+    qemu_mutex_unlock(&backend_dev->recv_mads_list.lock);
+
+    return 0;
+}
+
 void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
                             RdmaDeviceResources *rdma_dev_res,
                             RdmaBackendQP *qp, uint8_t qp_type,
@@ -380,19 +589,22 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
     if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */
         if (qp_type == IBV_QPT_SMI) {
             pr_dbg("QP0 unsupported\n");
-            comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
+            complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
         }
         if (qp_type == IBV_QPT_GSI) {
             pr_dbg("QP1\n");
-            comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+            rc = save_mad_recv_buffer(backend_dev, sge, num_sge, ctx);
+            if (rc) {
+                complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
+            }
         }
         return;
     }
 
     pr_dbg("num_sge=%d\n", num_sge);
-    if (!num_sge) {
-        pr_dbg("num_sge=0\n");
-        comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NO_SGE, ctx);
+    if (!num_sge || num_sge > MAX_SGE) {
+        pr_dbg("invalid num_sge=%d\n", num_sge);
+        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_NUM_SGE, ctx);
         return;
     }
 
@@ -403,14 +615,14 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
     rc = rdma_rm_alloc_cqe_ctx(rdma_dev_res, &bctx_id, bctx);
     if (unlikely(rc)) {
         pr_dbg("Failed to allocate cqe_ctx\n");
-        comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
+        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
         goto out_free_bctx;
     }
 
     rc = build_host_sge_array(rdma_dev_res, new_sge, sge, num_sge);
     if (rc) {
         pr_dbg("Error: Failed to build host SGE array\n");
-        comp_handler(IBV_WC_GENERAL_ERR, rc, ctx);
+        complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
         goto out_dealloc_cqe_ctx;
     }
 
@@ -422,7 +634,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
     if (rc) {
         pr_dbg("Fail (%d, %d) to post recv WQE to qpn %d\n", rc, errno,
                 qp->ibqp->qp_num);
-        comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
+        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
         goto out_dealloc_cqe_ctx;
     }
 
@@ -513,7 +725,6 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
 
     switch (qp_type) {
     case IBV_QPT_GSI:
-        pr_dbg("QP1 unsupported\n");
         return 0;
 
     case IBV_QPT_RC:
@@ -594,9 +805,9 @@ int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
 }
 
 int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
-                              uint8_t qp_type, union ibv_gid *dgid,
-                              uint32_t dqpn, uint32_t rq_psn, uint32_t qkey,
-                              bool use_qkey)
+                              uint8_t qp_type, uint8_t sgid_idx,
+                              union ibv_gid *dgid, uint32_t dqpn,
+                              uint32_t rq_psn, uint32_t qkey, bool use_qkey)
 {
     struct ibv_qp_attr attr = {0};
     union ibv_gid ibv_gid = {
@@ -608,13 +819,15 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
     attr.qp_state = IBV_QPS_RTR;
     attr_mask = IBV_QP_STATE;
 
+    qp->sgid_idx = sgid_idx;
+
     switch (qp_type) {
     case IBV_QPT_RC:
         pr_dbg("dgid=0x%" PRIx64 ",%" PRIx64 "\n",
                be64_to_cpu(ibv_gid.global.subnet_prefix),
                be64_to_cpu(ibv_gid.global.interface_id));
         pr_dbg("dqpn=0x%x\n", dqpn);
-        pr_dbg("sgid_idx=%d\n", backend_dev->backend_gid_idx);
+        pr_dbg("sgid_idx=%d\n", qp->sgid_idx);
         pr_dbg("sport_num=%d\n", backend_dev->port_num);
         pr_dbg("rq_psn=0x%x\n", rq_psn);
 
@@ -626,7 +839,7 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
         attr.ah_attr.is_global      = 1;
         attr.ah_attr.grh.hop_limit  = 1;
         attr.ah_attr.grh.dgid       = ibv_gid;
-        attr.ah_attr.grh.sgid_index = backend_dev->backend_gid_idx;
+        attr.ah_attr.grh.sgid_index = qp->sgid_idx;
         attr.rq_psn                 = rq_psn;
 
         attr_mask |= IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN |
@@ -635,8 +848,8 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
         break;
 
     case IBV_QPT_UD:
+        pr_dbg("qkey=0x%x\n", qkey);
         if (use_qkey) {
-            pr_dbg("qkey=0x%x\n", qkey);
             attr.qkey = qkey;
             attr_mask |= IBV_QP_QKEY;
         }
@@ -744,23 +957,224 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
     return 0;
 }
 
+static inline void build_mad_hdr(struct ibv_grh *grh, union ibv_gid *sgid,
+                                 union ibv_gid *my_gid, int paylen)
+{
+    grh->paylen = htons(paylen);
+    grh->sgid = *sgid;
+    grh->dgid = *my_gid;
+
+    pr_dbg("paylen=%d (net=0x%x)\n", paylen, grh->paylen);
+    pr_dbg("dgid=0x%llx\n", my_gid->global.interface_id);
+    pr_dbg("sgid=0x%llx\n", sgid->global.interface_id);
+}
+
+static void process_incoming_mad_req(RdmaBackendDev *backend_dev,
+                                     RdmaCmMuxMsg *msg)
+{
+    QObject *o_ctx_id;
+    unsigned long cqe_ctx_id;
+    BackendCtx *bctx;
+    char *mad;
+
+    pr_dbg("umad_len=%d\n", msg->umad_len);
+
+#ifdef PVRDMA_DEBUG
+    struct umad_hdr *hdr = (struct umad_hdr *)&msg->umad.mad;
+    pr_dbg("bv %x cls %x cv %x mtd %x st %d tid %" PRIx64 " at %x atm %x\n",
+           hdr->base_version, hdr->mgmt_class, hdr->class_version,
+           hdr->method, hdr->status, be64toh(hdr->tid),
+           hdr->attr_id, hdr->attr_mod);
+#endif
+
+    qemu_mutex_lock(&backend_dev->recv_mads_list.lock);
+    o_ctx_id = qlist_pop(backend_dev->recv_mads_list.list);
+    qemu_mutex_unlock(&backend_dev->recv_mads_list.lock);
+    if (!o_ctx_id) {
+        pr_dbg("No more free MADs buffers, waiting for a while\n");
+        sleep(THR_POLL_TO);
+        return;
+    }
+
+    cqe_ctx_id = qnum_get_uint(qobject_to(QNum, o_ctx_id));
+    bctx = rdma_rm_get_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id);
+    if (unlikely(!bctx)) {
+        pr_dbg("Error: Fail to find ctx for %ld\n", cqe_ctx_id);
+        return;
+    }
+
+    pr_dbg("id %ld, bctx %p, ctx %p\n", cqe_ctx_id, bctx, bctx->up_ctx);
+
+    mad = rdma_pci_dma_map(backend_dev->dev, bctx->sge.addr,
+                           bctx->sge.length);
+    if (!mad || bctx->sge.length < msg->umad_len + MAD_HDR_SIZE) {
+        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_MAD_BUFF,
+                      bctx->up_ctx);
+    } else {
+        struct ibv_wc wc = {0};
+        pr_dbg_buf("mad", msg->umad.mad, msg->umad_len);
+        memset(mad, 0, bctx->sge.length);
+        build_mad_hdr((struct ibv_grh *)mad,
+                      (union ibv_gid *)&msg->umad.hdr.addr.gid, &msg->hdr.sgid,
+                      msg->umad_len);
+        memcpy(&mad[MAD_HDR_SIZE], msg->umad.mad, msg->umad_len);
+        rdma_pci_dma_unmap(backend_dev->dev, mad, bctx->sge.length);
+
+        wc.byte_len = msg->umad_len;
+        wc.status = IBV_WC_SUCCESS;
+        wc.wc_flags = IBV_WC_GRH;
+        comp_handler(bctx->up_ctx, &wc);
+    }
+
+    g_free(bctx);
+    rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id);
+}
+
+static inline int rdmacm_mux_can_receive(void *opaque)
+{
+    RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque;
+
+    return rdmacm_mux_can_process_async(backend_dev);
+}
+
+static void rdmacm_mux_read(void *opaque, const uint8_t *buf, int size)
+{
+    RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque;
+    RdmaCmMuxMsg *msg = (RdmaCmMuxMsg *)buf;
+
+    pr_dbg("Got %d bytes\n", size);
+    pr_dbg("msg_type=%d\n", msg->hdr.msg_type);
+    pr_dbg("op_code=%d\n", msg->hdr.op_code);
+
+    if (msg->hdr.msg_type != RDMACM_MUX_MSG_TYPE_REQ &&
+        msg->hdr.op_code != RDMACM_MUX_OP_CODE_MAD) {
+            pr_dbg("Error: Not a MAD request, skipping\n");
+            return;
+    }
+    process_incoming_mad_req(backend_dev, msg);
+}
+
+static int mad_init(RdmaBackendDev *backend_dev, CharBackend *mad_chr_be)
+{
+    int ret;
+
+    backend_dev->rdmacm_mux.chr_be = mad_chr_be;
+
+    ret = qemu_chr_fe_backend_connected(backend_dev->rdmacm_mux.chr_be);
+    if (!ret) {
+        pr_dbg("Missing chardev for MAD multiplexer\n");
+        return -EIO;
+    }
+
+    qemu_mutex_init(&backend_dev->recv_mads_list.lock);
+    backend_dev->recv_mads_list.list = qlist_new();
+
+    enable_rdmacm_mux_async(backend_dev);
+
+    qemu_chr_fe_set_handlers(backend_dev->rdmacm_mux.chr_be,
+                             rdmacm_mux_can_receive, rdmacm_mux_read, NULL,
+                             NULL, backend_dev, NULL, true);
+
+    return 0;
+}
+
+static void mad_fini(RdmaBackendDev *backend_dev)
+{
+    pr_dbg("Stopping MAD\n");
+    disable_rdmacm_mux_async(backend_dev);
+    qemu_chr_fe_disconnect(backend_dev->rdmacm_mux.chr_be);
+    qlist_destroy_obj(QOBJECT(backend_dev->recv_mads_list.list));
+    qemu_mutex_destroy(&backend_dev->recv_mads_list.lock);
+}
+
+int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev,
+                               union ibv_gid *gid)
+{
+    union ibv_gid sgid;
+    int ret;
+    int i = 0;
+
+    pr_dbg("0x%llx, 0x%llx\n",
+           (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
+           (long long unsigned int)be64_to_cpu(gid->global.interface_id));
+
+    do {
+        ret = ibv_query_gid(backend_dev->context, backend_dev->port_num, i,
+                            &sgid);
+        i++;
+    } while (!ret && (memcmp(&sgid, gid, sizeof(*gid))));
+
+    pr_dbg("gid_index=%d\n", i - 1);
+
+    return ret ? ret : i - 1;
+}
+
+int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname,
+                         union ibv_gid *gid)
+{
+    RdmaCmMuxMsg msg = {0};
+    int ret;
+
+    pr_dbg("0x%llx, 0x%llx\n",
+           (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
+           (long long unsigned int)be64_to_cpu(gid->global.interface_id));
+
+    msg.hdr.op_code = RDMACM_MUX_OP_CODE_REG;
+    memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid));
+
+    ret = exec_rdmacm_mux_req(backend_dev, &msg);
+    if (ret) {
+        pr_dbg("Fail to register GID to rdma_umadmux (%d)\n", ret);
+        return -EIO;
+    }
+
+    qapi_event_send_rdma_gid_status_changed(ifname, true,
+                                            gid->global.subnet_prefix,
+                                            gid->global.interface_id);
+
+    return ret;
+}
+
+int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname,
+                         union ibv_gid *gid)
+{
+    RdmaCmMuxMsg msg = {0};
+    int ret;
+
+    pr_dbg("0x%llx, 0x%llx\n",
+           (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
+           (long long unsigned int)be64_to_cpu(gid->global.interface_id));
+
+    msg.hdr.op_code = RDMACM_MUX_OP_CODE_UNREG;
+    memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid));
+
+    ret = exec_rdmacm_mux_req(backend_dev, &msg);
+    if (ret) {
+        pr_dbg("Fail to unregister GID from rdma_umadmux (%d)\n", ret);
+        return -EIO;
+    }
+
+    qapi_event_send_rdma_gid_status_changed(ifname, false,
+                                            gid->global.subnet_prefix,
+                                            gid->global.interface_id);
+
+    return 0;
+}
+
 int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
                       RdmaDeviceResources *rdma_dev_res,
                       const char *backend_device_name, uint8_t port_num,
-                      uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
+                      struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be,
                       Error **errp)
 {
     int i;
     int ret = 0;
     int num_ibv_devices;
     struct ibv_device **dev_list;
-    struct ibv_port_attr port_attr;
 
     memset(backend_dev, 0, sizeof(*backend_dev));
 
     backend_dev->dev = pdev;
-
-    backend_dev->backend_gid_idx = backend_gid_idx;
     backend_dev->port_num = port_num;
     backend_dev->rdma_dev_res = rdma_dev_res;
 
@@ -797,9 +1211,9 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
         backend_dev->ib_dev = *dev_list;
     }
 
-    pr_dbg("Using backend device %s, port %d, gid_idx %d\n",
-           ibv_get_device_name(backend_dev->ib_dev),
-           backend_dev->port_num, backend_dev->backend_gid_idx);
+    pr_dbg("Using backend device %s, port %d\n",
+           ibv_get_device_name(backend_dev->ib_dev), backend_dev->port_num);
+    pr_dbg("uverb device %s\n", backend_dev->ib_dev->dev_name);
 
     backend_dev->context = ibv_open_device(backend_dev->ib_dev);
     if (!backend_dev->context) {
@@ -816,20 +1230,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
     }
     pr_dbg("dev->backend_dev.channel=%p\n", backend_dev->channel);
 
-    ret = ibv_query_port(backend_dev->context, backend_dev->port_num,
-                         &port_attr);
-    if (ret) {
-        error_setg(errp, "Error %d from ibv_query_port", ret);
-        ret = -EIO;
-        goto out_destroy_comm_channel;
-    }
-
-    if (backend_dev->backend_gid_idx >= port_attr.gid_tbl_len) {
-        error_setg(errp, "Invalid backend_gid_idx, should be less than %d",
-                   port_attr.gid_tbl_len);
-        goto out_destroy_comm_channel;
-    }
-
     ret = init_device_caps(backend_dev, dev_attr);
     if (ret) {
         error_setg(errp, "Failed to initialize device capabilities");
@@ -837,18 +1237,13 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
         goto out_destroy_comm_channel;
     }
 
-    ret = ibv_query_gid(backend_dev->context, backend_dev->port_num,
-                         backend_dev->backend_gid_idx, &backend_dev->gid);
+
+    ret = mad_init(backend_dev, mad_chr_be);
     if (ret) {
-        error_setg(errp, "Failed to query gid %d",
-                   backend_dev->backend_gid_idx);
+        error_setg(errp, "Fail to initialize mad");
         ret = -EIO;
         goto out_destroy_comm_channel;
     }
-    pr_dbg("subnet_prefix=0x%" PRIx64 "\n",
-           be64_to_cpu(backend_dev->gid.global.subnet_prefix));
-    pr_dbg("interface_id=0x%" PRIx64 "\n",
-           be64_to_cpu(backend_dev->gid.global.interface_id));
 
     backend_dev->comp_thread.run = false;
     backend_dev->comp_thread.is_running = false;
@@ -886,6 +1281,7 @@ void rdma_backend_stop(RdmaBackendDev *backend_dev)
 void rdma_backend_fini(RdmaBackendDev *backend_dev)
 {
     rdma_backend_stop(backend_dev);
+    mad_fini(backend_dev);
     g_hash_table_destroy(ah_hash);
     ibv_destroy_comp_channel(backend_dev->channel);
     ibv_close_device(backend_dev->context);
diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
index 86e8fe8ab6..8cae40f827 100644
--- a/hw/rdma/rdma_backend.h
+++ b/hw/rdma/rdma_backend.h
@@ -17,6 +17,8 @@
 #define RDMA_BACKEND_H
 
 #include "qapi/error.h"
+#include "chardev/char-fe.h"
+
 #include "rdma_rm_defs.h"
 #include "rdma_backend_defs.h"
 
@@ -26,14 +28,9 @@ enum ibv_special_qp_type {
     IBV_QPT_GSI = 1,
 };
 
-static inline union ibv_gid *rdma_backend_gid(RdmaBackendDev *dev)
-{
-    return &dev->gid;
-}
-
 static inline uint32_t rdma_backend_qpn(const RdmaBackendQP *qp)
 {
-    return qp->ibqp ? qp->ibqp->qp_num : 0;
+    return qp->ibqp ? qp->ibqp->qp_num : 1;
 }
 
 static inline uint32_t rdma_backend_mr_lkey(const RdmaBackendMR *mr)
@@ -49,13 +46,19 @@ static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr)
 int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
                       RdmaDeviceResources *rdma_dev_res,
                       const char *backend_device_name, uint8_t port_num,
-                      uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
+                      struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be,
                       Error **errp);
 void rdma_backend_fini(RdmaBackendDev *backend_dev);
+int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname,
+                         union ibv_gid *gid);
+int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname,
+                         union ibv_gid *gid);
+int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev,
+                               union ibv_gid *gid);
 void rdma_backend_start(RdmaBackendDev *backend_dev);
 void rdma_backend_stop(RdmaBackendDev *backend_dev);
-void rdma_backend_register_comp_handler(void (*handler)(int status,
-                                        unsigned int vendor_err, void *ctx));
+void rdma_backend_register_comp_handler(void (*handler)(void *ctx,
+                                                        struct ibv_wc *wc));
 void rdma_backend_unregister_comp_handler(void);
 
 int rdma_backend_query_port(RdmaBackendDev *backend_dev,
@@ -80,9 +83,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
 int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
                                uint8_t qp_type, uint32_t qkey);
 int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
-                              uint8_t qp_type, union ibv_gid *dgid,
-                              uint32_t dqpn, uint32_t rq_psn, uint32_t qkey,
-                              bool use_qkey);
+                              uint8_t qp_type, uint8_t sgid_idx,
+                              union ibv_gid *dgid, uint32_t dqpn,
+                              uint32_t rq_psn, uint32_t qkey, bool use_qkey);
 int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type,
                               uint32_t sq_psn, uint32_t qkey, bool use_qkey);
 int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr,
@@ -92,6 +95,7 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp);
 void rdma_backend_post_send(RdmaBackendDev *backend_dev,
                             RdmaBackendQP *qp, uint8_t qp_type,
                             struct ibv_sge *sge, uint32_t num_sge,
+                            uint8_t sgid_idx, union ibv_gid *sgid,
                             union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey,
                             void *ctx);
 void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
index 7404f64002..1e5c3dd3bf 100644
--- a/hw/rdma/rdma_backend_defs.h
+++ b/hw/rdma/rdma_backend_defs.h
@@ -16,8 +16,10 @@
 #ifndef RDMA_BACKEND_DEFS_H
 #define RDMA_BACKEND_DEFS_H
 
-#include <infiniband/verbs.h>
 #include "qemu/thread.h"
+#include "chardev/char-fe.h"
+#include <infiniband/verbs.h>
+#include "contrib/rdmacm-mux/rdmacm-mux.h"
 
 typedef struct RdmaDeviceResources RdmaDeviceResources;
 
@@ -28,17 +30,27 @@ typedef struct RdmaBackendThread {
     bool is_running; /* Set by the thread to report its status */
 } RdmaBackendThread;
 
+typedef struct RecvMadList {
+    QemuMutex lock;
+    QList *list;
+} RecvMadList;
+
+typedef struct RdmaCmMux {
+    CharBackend *chr_be;
+    int can_receive;
+} RdmaCmMux;
+
 typedef struct RdmaBackendDev {
     struct ibv_device_attr dev_attr;
     RdmaBackendThread comp_thread;
-    union ibv_gid gid;
     PCIDevice *dev;
     RdmaDeviceResources *rdma_dev_res;
     struct ibv_device *ib_dev;
     struct ibv_context *context;
     struct ibv_comp_channel *channel;
     uint8_t port_num;
-    uint8_t backend_gid_idx;
+    RecvMadList recv_mads_list;
+    RdmaCmMux rdmacm_mux;
 } RdmaBackendDev;
 
 typedef struct RdmaBackendPD {
@@ -58,6 +70,7 @@ typedef struct RdmaBackendCQ {
 typedef struct RdmaBackendQP {
     struct ibv_pd *ibpd;
     struct ibv_qp *ibqp;
+    uint8_t sgid_idx;
 } RdmaBackendQP;
 
 #endif
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index 8d59a42cd1..f5b1295890 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -43,7 +43,7 @@ static inline void res_tbl_free(RdmaRmResTbl *tbl)
 {
     qemu_mutex_destroy(&tbl->lock);
     g_free(tbl->tbl);
-    bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0);
+    g_free(tbl->bitmap);
 }
 
 static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
@@ -263,7 +263,7 @@ int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
     }
 
     cq->opaque = opaque;
-    cq->notify = false;
+    cq->notify = CNT_CLEAR;
 
     rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
     if (rc) {
@@ -291,7 +291,10 @@ void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
         return;
     }
 
-    cq->notify = notify;
+    if (cq->notify != CNT_SET) {
+        cq->notify = notify ? CNT_ARM : CNT_CLEAR;
+    }
+
     pr_dbg("notify=%d\n", cq->notify);
 }
 
@@ -349,6 +352,11 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
         return -EINVAL;
     }
 
+    if (qp_type == IBV_QPT_GSI) {
+        scq->notify = CNT_SET;
+        rcq->notify = CNT_SET;
+    }
+
     qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
     if (!qp) {
         return -ENOMEM;
@@ -383,7 +391,7 @@ out_dealloc_qp:
 }
 
 int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
-                      uint32_t qp_handle, uint32_t attr_mask,
+                      uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
                       union ibv_gid *dgid, uint32_t dqpn,
                       enum ibv_qp_state qp_state, uint32_t qkey,
                       uint32_t rq_psn, uint32_t sq_psn)
@@ -392,6 +400,7 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
     int ret;
 
     pr_dbg("qpn=0x%x\n", qp_handle);
+    pr_dbg("qkey=0x%x\n", qkey);
 
     qp = rdma_rm_get_qp(dev_res, qp_handle);
     if (!qp) {
@@ -422,9 +431,19 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
         }
 
         if (qp->qp_state == IBV_QPS_RTR) {
+            /* Get backend gid index */
+            pr_dbg("Guest sgid_idx=%d\n", sgid_idx);
+            sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev,
+                                                     sgid_idx);
+            if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */
+                pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", sgid_idx);
+                return -EIO;
+            }
+
             ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
-                                            qp->qp_type, dgid, dqpn, rq_psn,
-                                            qkey, attr_mask & IBV_QP_QKEY);
+                                            qp->qp_type, sgid_idx, dgid, dqpn,
+                                            rq_psn, qkey,
+                                            attr_mask & IBV_QP_QKEY);
             if (ret) {
                 return -EIO;
             }
@@ -515,11 +534,93 @@ void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
     res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
 }
 
+int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+                    const char *ifname, union ibv_gid *gid, int gid_idx)
+{
+    int rc;
+
+    rc = rdma_backend_add_gid(backend_dev, ifname, gid);
+    if (rc) {
+        pr_dbg("Fail to add gid\n");
+        return -EINVAL;
+    }
+
+    memcpy(&dev_res->port.gid_tbl[gid_idx].gid, gid, sizeof(*gid));
+
+    return 0;
+}
+
+int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+                    const char *ifname, int gid_idx)
+{
+    int rc;
+
+    if (!dev_res->port.gid_tbl[gid_idx].gid.global.interface_id) {
+        return 0;
+    }
+
+    rc = rdma_backend_del_gid(backend_dev, ifname,
+                              &dev_res->port.gid_tbl[gid_idx].gid);
+    if (rc) {
+        pr_dbg("Fail to delete gid\n");
+        return -EINVAL;
+    }
+
+    memset(dev_res->port.gid_tbl[gid_idx].gid.raw, 0,
+           sizeof(dev_res->port.gid_tbl[gid_idx].gid));
+    dev_res->port.gid_tbl[gid_idx].backend_gid_index = -1;
+
+    return 0;
+}
+
+int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
+                                  RdmaBackendDev *backend_dev, int sgid_idx)
+{
+    if (unlikely(sgid_idx < 0 || sgid_idx > MAX_PORT_GIDS)) {
+        pr_dbg("Got invalid sgid_idx %d\n", sgid_idx);
+        return -EINVAL;
+    }
+
+    if (unlikely(dev_res->port.gid_tbl[sgid_idx].backend_gid_index == -1)) {
+        dev_res->port.gid_tbl[sgid_idx].backend_gid_index =
+        rdma_backend_get_gid_index(backend_dev,
+                                   &dev_res->port.gid_tbl[sgid_idx].gid);
+    }
+
+    pr_dbg("backend_gid_index=%d\n",
+           dev_res->port.gid_tbl[sgid_idx].backend_gid_index);
+
+    return dev_res->port.gid_tbl[sgid_idx].backend_gid_index;
+}
+
 static void destroy_qp_hash_key(gpointer data)
 {
     g_bytes_unref(data);
 }
 
+static void init_ports(RdmaDeviceResources *dev_res)
+{
+    int i;
+
+    memset(&dev_res->port, 0, sizeof(dev_res->port));
+
+    dev_res->port.state = IBV_PORT_DOWN;
+    for (i = 0; i < MAX_PORT_GIDS; i++) {
+        dev_res->port.gid_tbl[i].backend_gid_index = -1;
+    }
+}
+
+static void fini_ports(RdmaDeviceResources *dev_res,
+                       RdmaBackendDev *backend_dev, const char *ifname)
+{
+    int i;
+
+    dev_res->port.state = IBV_PORT_DOWN;
+    for (i = 0; i < MAX_PORT_GIDS; i++) {
+        rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
+    }
+}
+
 int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
                  Error **errp)
 {
@@ -537,11 +638,16 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
                        dev_attr->max_qp_wr, sizeof(void *));
     res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
 
+    init_ports(dev_res);
+
     return 0;
 }
 
-void rdma_rm_fini(RdmaDeviceResources *dev_res)
+void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+                  const char *ifname)
 {
+    fini_ports(dev_res, backend_dev, ifname);
+
     res_tbl_free(&dev_res->uc_tbl);
     res_tbl_free(&dev_res->cqe_ctx_tbl);
     res_tbl_free(&dev_res->qp_tbl);
diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
index b4e04cc7b4..3c602c04c0 100644
--- a/hw/rdma/rdma_rm.h
+++ b/hw/rdma/rdma_rm.h
@@ -22,7 +22,8 @@
 
 int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
                  Error **errp);
-void rdma_rm_fini(RdmaDeviceResources *dev_res);
+void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+                  const char *ifname);
 
 int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                      uint32_t *pd_handle, uint32_t ctx_handle);
@@ -55,7 +56,7 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
                      uint32_t recv_cq_handle, void *opaque, uint32_t *qpn);
 RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
 int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
-                      uint32_t qp_handle, uint32_t attr_mask,
+                      uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
                       union ibv_gid *dgid, uint32_t dqpn,
                       enum ibv_qp_state qp_state, uint32_t qkey,
                       uint32_t rq_psn, uint32_t sq_psn);
@@ -69,4 +70,16 @@ int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
 void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
 
+int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+                    const char *ifname, union ibv_gid *gid, int gid_idx);
+int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+                    const char *ifname, int gid_idx);
+int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
+                                  RdmaBackendDev *backend_dev, int sgid_idx);
+static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res,
+                                             int sgid_idx)
+{
+    return &dev_res->port.gid_tbl[sgid_idx].gid;
+}
+
 #endif
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index 7228151239..0ba61d1838 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -18,8 +18,8 @@
 
 #include "rdma_backend_defs.h"
 
-#define MAX_PORTS             1
-#define MAX_PORT_GIDS         1
+#define MAX_PORTS             1 /* Do not change - we support only one port */
+#define MAX_PORT_GIDS         255
 #define MAX_GIDS              MAX_PORT_GIDS
 #define MAX_PORT_PKEYS        1
 #define MAX_PKEYS             MAX_PORT_PKEYS
@@ -49,10 +49,16 @@ typedef struct RdmaRmPD {
     uint32_t ctx_handle;
 } RdmaRmPD;
 
+typedef enum CQNotificationType {
+    CNT_CLEAR,
+    CNT_ARM,
+    CNT_SET,
+} CQNotificationType;
+
 typedef struct RdmaRmCQ {
     RdmaBackendCQ backend_cq;
     void *opaque;
-    bool notify;
+    CQNotificationType notify;
 } RdmaRmCQ;
 
 /* MR (DMA region) */
@@ -80,13 +86,18 @@ typedef struct RdmaRmQP {
     enum ibv_qp_state qp_state;
 } RdmaRmQP;
 
+typedef struct RdmaRmGid {
+    union ibv_gid gid;
+    int backend_gid_index;
+} RdmaRmGid;
+
 typedef struct RdmaRmPort {
-    union ibv_gid gid_tbl[MAX_PORT_GIDS];
+    RdmaRmGid gid_tbl[MAX_PORT_GIDS];
     enum ibv_port_state state;
 } RdmaRmPort;
 
 typedef struct RdmaDeviceResources {
-    RdmaRmPort ports[MAX_PORTS];
+    RdmaRmPort port;
     RdmaRmResTbl pd_tbl;
     RdmaRmResTbl mr_tbl;
     RdmaRmResTbl uc_tbl;
diff --git a/hw/rdma/rdma_utils.c b/hw/rdma/rdma_utils.c
index dc23f158f3..4fbea8cde2 100644
--- a/hw/rdma/rdma_utils.c
+++ b/hw/rdma/rdma_utils.c
@@ -13,6 +13,7 @@
  *
  */
 
+#include "qemu/osdep.h"
 #include "rdma_utils.h"
 
 #ifdef PVRDMA_DEBUG
diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h
index 04c7c2ef5b..4490ea0b94 100644
--- a/hw/rdma/rdma_utils.h
+++ b/hw/rdma/rdma_utils.h
@@ -17,9 +17,9 @@
 #ifndef RDMA_UTILS_H
 #define RDMA_UTILS_H
 
-#include "qemu/osdep.h"
 #include "hw/pci/pci.h"
 #include "sysemu/dma.h"
+#include "stdio.h"
 
 #define pr_info(fmt, ...) \
     fprintf(stdout, "%s: %-20s (%3d): " fmt, "rdma",  __func__, __LINE__,\
@@ -40,12 +40,36 @@ extern unsigned long pr_dbg_cnt;
 #define pr_dbg(fmt, ...) \
     fprintf(stdout, "%lx %ld: %-20s (%3d): " fmt, pthread_self(), pr_dbg_cnt++, \
             __func__, __LINE__, ## __VA_ARGS__)
+
+#define pr_dbg_buf(title, buf, len) \
+{ \
+    int i; \
+    char *b = g_malloc0(len * 3 + 1); \
+    char b1[4]; \
+    for (i = 0; i < len; i++) { \
+        sprintf(b1, "%.2X ", buf[i] & 0x000000FF); \
+        strcat(b, b1); \
+    } \
+    pr_dbg("%s (%d): %s\n", title, len, b); \
+    g_free(b); \
+}
+
 #else
 #define init_pr_dbg(void)
 #define pr_dbg(fmt, ...)
+#define pr_dbg_buf(title, buf, len)
 #endif
 
 void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen);
 void rdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len);
 
+static inline void addrconf_addr_eui48(uint8_t *eui, const char *addr)
+{
+    memcpy(eui, addr, 3);
+    eui[3] = 0xFF;
+    eui[4] = 0xFE;
+    memcpy(eui + 5, addr + 3, 3);
+    eui[0] ^= 2;
+}
+
 #endif
diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h
index e2d9f93cdf..ffae36986e 100644
--- a/hw/rdma/vmw/pvrdma.h
+++ b/hw/rdma/vmw/pvrdma.h
@@ -17,8 +17,11 @@
 #define PVRDMA_PVRDMA_H
 
 #include "qemu/units.h"
+#include "qemu/notify.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/msix.h"
+#include "chardev/char-fe.h"
+#include "hw/net/vmxnet3_defs.h"
 
 #include "../rdma_backend_defs.h"
 #include "../rdma_rm_defs.h"
@@ -51,7 +54,7 @@
 #define PVRDMA_FW_VERSION    14
 
 /* Some defaults */
-#define PVRDMA_PKEY          0x7FFF
+#define PVRDMA_PKEY          0xFFFF
 
 typedef struct DSRInfo {
     dma_addr_t dma;
@@ -78,11 +81,14 @@ typedef struct PVRDMADev {
     int interrupt_mask;
     struct ibv_device_attr dev_attr;
     uint64_t node_guid;
+    char *backend_eth_device_name;
     char *backend_device_name;
-    uint8_t backend_gid_idx;
     uint8_t backend_port_num;
     RdmaBackendDev backend_dev;
     RdmaDeviceResources rdma_dev_res;
+    CharBackend mad_chr;
+    VMXNET3State *func0;
+    Notifier shutdown_notifier;
 } PVRDMADev;
 #define PVRDMA_DEV(dev) OBJECT_CHECK(PVRDMADev, (dev), PVRDMA_HW_NAME)
 
diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index 4faeb21631..89920887bf 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -128,6 +128,9 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req,
     struct pvrdma_port_attr attrs = {0};
 
     pr_dbg("port=%d\n", cmd->port_num);
+    if (cmd->port_num > MAX_PORTS) {
+        return -EINVAL;
+    }
 
     if (rdma_backend_query_port(&dev->backend_dev,
                                 (struct ibv_port_attr *)&attrs)) {
@@ -135,11 +138,9 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req,
     }
 
     memset(resp, 0, sizeof(*resp));
-    resp->hdr.response = cmd->hdr.response;
-    resp->hdr.ack = PVRDMA_CMD_QUERY_PORT_RESP;
-    resp->hdr.err = 0;
 
-    resp->attrs.state = attrs.state;
+    resp->attrs.state = dev->func0->device_active ? attrs.state :
+                                                    PVRDMA_PORT_DOWN;
     resp->attrs.max_mtu = attrs.max_mtu;
     resp->attrs.active_mtu = attrs.active_mtu;
     resp->attrs.phys_state = attrs.phys_state;
@@ -159,12 +160,16 @@ static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req *req,
     struct pvrdma_cmd_query_pkey_resp *resp = &rsp->query_pkey_resp;
 
     pr_dbg("port=%d\n", cmd->port_num);
+    if (cmd->port_num > MAX_PORTS) {
+        return -EINVAL;
+    }
+
     pr_dbg("index=%d\n", cmd->index);
+    if (cmd->index > MAX_PKEYS) {
+        return -EINVAL;
+    }
 
     memset(resp, 0, sizeof(*resp));
-    resp->hdr.response = cmd->hdr.response;
-    resp->hdr.ack = PVRDMA_CMD_QUERY_PKEY_RESP;
-    resp->hdr.err = 0;
 
     resp->pkey = PVRDMA_PKEY;
     pr_dbg("pkey=0x%x\n", resp->pkey);
@@ -177,17 +182,15 @@ static int create_pd(PVRDMADev *dev, union pvrdma_cmd_req *req,
 {
     struct pvrdma_cmd_create_pd *cmd = &req->create_pd;
     struct pvrdma_cmd_create_pd_resp *resp = &rsp->create_pd_resp;
+    int rc;
 
     pr_dbg("context=0x%x\n", cmd->ctx_handle ? cmd->ctx_handle : 0);
 
     memset(resp, 0, sizeof(*resp));
-    resp->hdr.response = cmd->hdr.response;
-    resp->hdr.ack = PVRDMA_CMD_CREATE_PD_RESP;
-    resp->hdr.err = rdma_rm_alloc_pd(&dev->rdma_dev_res, &dev->backend_dev,
-                                     &resp->pd_handle, cmd->ctx_handle);
+    rc = rdma_rm_alloc_pd(&dev->rdma_dev_res, &dev->backend_dev,
+                          &resp->pd_handle, cmd->ctx_handle);
 
-    pr_dbg("ret=%d\n", resp->hdr.err);
-    return resp->hdr.err;
+    return rc;
 }
 
 static int destroy_pd(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -209,10 +212,9 @@ static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req,
     struct pvrdma_cmd_create_mr_resp *resp = &rsp->create_mr_resp;
     PCIDevice *pci_dev = PCI_DEVICE(dev);
     void *host_virt = NULL;
+    int rc = 0;
 
     memset(resp, 0, sizeof(*resp));
-    resp->hdr.response = cmd->hdr.response;
-    resp->hdr.ack = PVRDMA_CMD_CREATE_MR_RESP;
 
     pr_dbg("pd_handle=%d\n", cmd->pd_handle);
     pr_dbg("access_flags=0x%x\n", cmd->access_flags);
@@ -223,22 +225,18 @@ static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req,
                                        cmd->length);
         if (!host_virt) {
             pr_dbg("Failed to map to pdir\n");
-            resp->hdr.err = -EINVAL;
-            goto out;
+            return -EINVAL;
         }
     }
 
-    resp->hdr.err = rdma_rm_alloc_mr(&dev->rdma_dev_res, cmd->pd_handle,
-                                     cmd->start, cmd->length, host_virt,
-                                     cmd->access_flags, &resp->mr_handle,
-                                     &resp->lkey, &resp->rkey);
-    if (host_virt && !resp->hdr.err) {
+    rc = rdma_rm_alloc_mr(&dev->rdma_dev_res, cmd->pd_handle, cmd->start,
+                          cmd->length, host_virt, cmd->access_flags,
+                          &resp->mr_handle, &resp->lkey, &resp->rkey);
+    if (rc && host_virt) {
         munmap(host_virt, cmd->length);
     }
 
-out:
-    pr_dbg("ret=%d\n", resp->hdr.err);
-    return resp->hdr.err;
+    return rc;
 }
 
 static int destroy_mr(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -261,6 +259,11 @@ static int create_cq_ring(PCIDevice *pci_dev , PvrdmaRing **ring,
     int rc = -EINVAL;
     char ring_name[MAX_RING_NAME_SZ];
 
+    if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) {
+        pr_dbg("invalid nchunks: %d\n", nchunks);
+        return rc;
+    }
+
     pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma);
     dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
     if (!dir) {
@@ -310,34 +313,43 @@ out:
     return rc;
 }
 
+static void destroy_cq_ring(PvrdmaRing *ring)
+{
+    pvrdma_ring_free(ring);
+    /* ring_state was in slot 1, not 0 so need to jump back */
+    rdma_pci_dma_unmap(ring->dev, --ring->ring_state, TARGET_PAGE_SIZE);
+    g_free(ring);
+}
+
 static int create_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
                      union pvrdma_cmd_resp *rsp)
 {
     struct pvrdma_cmd_create_cq *cmd = &req->create_cq;
     struct pvrdma_cmd_create_cq_resp *resp = &rsp->create_cq_resp;
     PvrdmaRing *ring = NULL;
+    int rc;
 
     memset(resp, 0, sizeof(*resp));
-    resp->hdr.response = cmd->hdr.response;
-    resp->hdr.ack = PVRDMA_CMD_CREATE_CQ_RESP;
 
     resp->cqe = cmd->cqe;
 
-    resp->hdr.err = create_cq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
-                                   cmd->nchunks, cmd->cqe);
-    if (resp->hdr.err) {
-        goto out;
+    rc = create_cq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, cmd->nchunks,
+                        cmd->cqe);
+    if (rc) {
+        return rc;
     }
 
     pr_dbg("ring=%p\n", ring);
 
-    resp->hdr.err = rdma_rm_alloc_cq(&dev->rdma_dev_res, &dev->backend_dev,
-                                     cmd->cqe, &resp->cq_handle, ring);
+    rc = rdma_rm_alloc_cq(&dev->rdma_dev_res, &dev->backend_dev, cmd->cqe,
+                          &resp->cq_handle, ring);
+    if (rc) {
+        destroy_cq_ring(ring);
+    }
+
     resp->cqe = cmd->cqe;
 
-out:
-    pr_dbg("ret=%d\n", resp->hdr.err);
-    return resp->hdr.err;
+    return rc;
 }
 
 static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -356,10 +368,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
     }
 
     ring = (PvrdmaRing *)cq->opaque;
-    pvrdma_ring_free(ring);
-    /* ring_state was in slot 1, not 0 so need to jump back */
-    rdma_pci_dma_unmap(PCI_DEVICE(dev), --ring->ring_state, TARGET_PAGE_SIZE);
-    g_free(ring);
+    destroy_cq_ring(ring);
 
     rdma_rm_dealloc_cq(&dev->rdma_dev_res, cmd->cq_handle);
 
@@ -377,6 +386,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
     char ring_name[MAX_RING_NAME_SZ];
     uint32_t wqe_sz;
 
+    if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES
+        || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) {
+        pr_dbg("invalid pages: %d, %d\n", spages, rpages);
+        return rc;
+    }
+
     pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma);
     dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
     if (!dir) {
@@ -451,36 +466,49 @@ out:
     return rc;
 }
 
+static void destroy_qp_rings(PvrdmaRing *ring)
+{
+    pr_dbg("sring=%p\n", &ring[0]);
+    pvrdma_ring_free(&ring[0]);
+    pr_dbg("rring=%p\n", &ring[1]);
+    pvrdma_ring_free(&ring[1]);
+
+    rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
+    g_free(ring);
+}
+
 static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
                      union pvrdma_cmd_resp *rsp)
 {
     struct pvrdma_cmd_create_qp *cmd = &req->create_qp;
     struct pvrdma_cmd_create_qp_resp *resp = &rsp->create_qp_resp;
     PvrdmaRing *rings = NULL;
+    int rc;
 
     memset(resp, 0, sizeof(*resp));
-    resp->hdr.response = cmd->hdr.response;
-    resp->hdr.ack = PVRDMA_CMD_CREATE_QP_RESP;
 
     pr_dbg("total_chunks=%d\n", cmd->total_chunks);
     pr_dbg("send_chunks=%d\n", cmd->send_chunks);
 
-    resp->hdr.err = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings,
-                                    cmd->max_send_wr, cmd->max_send_sge,
-                                    cmd->send_chunks, cmd->max_recv_wr,
-                                    cmd->max_recv_sge, cmd->total_chunks -
-                                    cmd->send_chunks - 1);
-    if (resp->hdr.err) {
-        goto out;
+    rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings,
+                         cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks,
+                         cmd->max_recv_wr, cmd->max_recv_sge,
+                         cmd->total_chunks - cmd->send_chunks - 1);
+    if (rc) {
+        return rc;
     }
 
     pr_dbg("rings=%p\n", rings);
 
-    resp->hdr.err = rdma_rm_alloc_qp(&dev->rdma_dev_res, cmd->pd_handle,
-                                     cmd->qp_type, cmd->max_send_wr,
-                                     cmd->max_send_sge, cmd->send_cq_handle,
-                                     cmd->max_recv_wr, cmd->max_recv_sge,
-                                     cmd->recv_cq_handle, rings, &resp->qpn);
+    rc = rdma_rm_alloc_qp(&dev->rdma_dev_res, cmd->pd_handle, cmd->qp_type,
+                          cmd->max_send_wr, cmd->max_send_sge,
+                          cmd->send_cq_handle, cmd->max_recv_wr,
+                          cmd->max_recv_sge, cmd->recv_cq_handle, rings,
+                          &resp->qpn);
+    if (rc) {
+        destroy_qp_rings(rings);
+        return rc;
+    }
 
     resp->max_send_wr = cmd->max_send_wr;
     resp->max_recv_wr = cmd->max_recv_wr;
@@ -488,32 +516,31 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
     resp->max_recv_sge = cmd->max_recv_sge;
     resp->max_inline_data = cmd->max_inline_data;
 
-out:
-    pr_dbg("ret=%d\n", resp->hdr.err);
-    return resp->hdr.err;
+    return 0;
 }
 
 static int modify_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
                      union pvrdma_cmd_resp *rsp)
 {
     struct pvrdma_cmd_modify_qp *cmd = &req->modify_qp;
+    int rc;
 
     pr_dbg("qp_handle=%d\n", cmd->qp_handle);
 
     memset(rsp, 0, sizeof(*rsp));
-    rsp->hdr.response = cmd->hdr.response;
-    rsp->hdr.ack = PVRDMA_CMD_MODIFY_QP_RESP;
-
-    rsp->hdr.err = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev,
-                                 cmd->qp_handle, cmd->attr_mask,
-                                 (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid,
-                                 cmd->attrs.dest_qp_num,
-                                 (enum ibv_qp_state)cmd->attrs.qp_state,
-                                 cmd->attrs.qkey, cmd->attrs.rq_psn,
-                                 cmd->attrs.sq_psn);
-
-    pr_dbg("ret=%d\n", rsp->hdr.err);
-    return rsp->hdr.err;
+
+    /* No need to verify sgid_index since it is u8 */
+
+    rc = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev,
+                           cmd->qp_handle, cmd->attr_mask,
+                           cmd->attrs.ah_attr.grh.sgid_index,
+                           (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid,
+                           cmd->attrs.dest_qp_num,
+                           (enum ibv_qp_state)cmd->attrs.qp_state,
+                           cmd->attrs.qkey, cmd->attrs.rq_psn,
+                           cmd->attrs.sq_psn);
+
+    return rc;
 }
 
 static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -522,21 +549,18 @@ static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
     struct pvrdma_cmd_query_qp *cmd = &req->query_qp;
     struct pvrdma_cmd_query_qp_resp *resp = &rsp->query_qp_resp;
     struct ibv_qp_init_attr init_attr;
+    int rc;
 
     pr_dbg("qp_handle=%d\n", cmd->qp_handle);
     pr_dbg("attr_mask=0x%x\n", cmd->attr_mask);
 
     memset(rsp, 0, sizeof(*rsp));
-    rsp->hdr.response = cmd->hdr.response;
-    rsp->hdr.ack = PVRDMA_CMD_QUERY_QP_RESP;
 
-    rsp->hdr.err = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev,
-                                    cmd->qp_handle,
-                                    (struct ibv_qp_attr *)&resp->attrs,
-                                    cmd->attr_mask, &init_attr);
+    rc = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev, cmd->qp_handle,
+                          (struct ibv_qp_attr *)&resp->attrs, cmd->attr_mask,
+                          &init_attr);
 
-    pr_dbg("ret=%d\n", rsp->hdr.err);
-    return rsp->hdr.err;
+    return rc;
 }
 
 static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -555,13 +579,7 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
     rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
 
     ring = (PvrdmaRing *)qp->opaque;
-    pr_dbg("sring=%p\n", &ring[0]);
-    pvrdma_ring_free(&ring[0]);
-    pr_dbg("rring=%p\n", &ring[1]);
-    pvrdma_ring_free(&ring[1]);
-
-    rdma_pci_dma_unmap(PCI_DEVICE(dev), ring->ring_state, TARGET_PAGE_SIZE);
-    g_free(ring);
+    destroy_qp_rings(ring);
 
     return 0;
 }
@@ -570,10 +588,8 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
                        union pvrdma_cmd_resp *rsp)
 {
     struct pvrdma_cmd_create_bind *cmd = &req->create_bind;
-#ifdef PVRDMA_DEBUG
-    __be64 *subnet = (__be64 *)&cmd->new_gid[0];
-    __be64 *if_id = (__be64 *)&cmd->new_gid[8];
-#endif
+    int rc;
+    union ibv_gid *gid = (union ibv_gid *)&cmd->new_gid;
 
     pr_dbg("index=%d\n", cmd->index);
 
@@ -582,26 +598,20 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
     }
 
     pr_dbg("gid[%d]=0x%llx,0x%llx\n", cmd->index,
-           (long long unsigned int)be64_to_cpu(*subnet),
-           (long long unsigned int)be64_to_cpu(*if_id));
-
-    /* Driver forces to one port only */
-    memcpy(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, &cmd->new_gid,
-           sizeof(cmd->new_gid));
+           (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
+           (long long unsigned int)be64_to_cpu(gid->global.interface_id));
 
-    /* TODO: Since drivers stores node_guid at load_dsr phase then this
-     * assignment is not relevant, i need to figure out a way how to
-     * retrieve MAC of our netdev */
-    dev->node_guid = dev->rdma_dev_res.ports[0].gid_tbl[0].global.interface_id;
-    pr_dbg("dev->node_guid=0x%llx\n",
-           (long long unsigned int)be64_to_cpu(dev->node_guid));
+    rc = rdma_rm_add_gid(&dev->rdma_dev_res, &dev->backend_dev,
+                         dev->backend_eth_device_name, gid, cmd->index);
 
-    return 0;
+    return rc;
 }
 
 static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
                         union pvrdma_cmd_resp *rsp)
 {
+    int rc;
+
     struct pvrdma_cmd_destroy_bind *cmd = &req->destroy_bind;
 
     pr_dbg("index=%d\n", cmd->index);
@@ -610,10 +620,10 @@ static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
         return -EINVAL;
     }
 
-    memset(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, 0,
-           sizeof(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw));
+    rc = rdma_rm_del_gid(&dev->rdma_dev_res, &dev->backend_dev,
+                        dev->backend_eth_device_name, cmd->index);
 
-    return 0;
+    return rc;
 }
 
 static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -621,18 +631,14 @@ static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
 {
     struct pvrdma_cmd_create_uc *cmd = &req->create_uc;
     struct pvrdma_cmd_create_uc_resp *resp = &rsp->create_uc_resp;
+    int rc;
 
     pr_dbg("pfn=%d\n", cmd->pfn);
 
     memset(resp, 0, sizeof(*resp));
-    resp->hdr.response = cmd->hdr.response;
-    resp->hdr.ack = PVRDMA_CMD_CREATE_UC_RESP;
-    resp->hdr.err = rdma_rm_alloc_uc(&dev->rdma_dev_res, cmd->pfn,
-                                     &resp->ctx_handle);
-
-    pr_dbg("ret=%d\n", resp->hdr.err);
+    rc = rdma_rm_alloc_uc(&dev->rdma_dev_res, cmd->pfn, &resp->ctx_handle);
 
-    return 0;
+    return rc;
 }
 
 static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -646,30 +652,32 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
 
     return 0;
 }
+
 struct cmd_handler {
     uint32_t cmd;
+    uint32_t ack;
     int (*exec)(PVRDMADev *dev, union pvrdma_cmd_req *req,
             union pvrdma_cmd_resp *rsp);
 };
 
 static struct cmd_handler cmd_handlers[] = {
-    {PVRDMA_CMD_QUERY_PORT, query_port},
-    {PVRDMA_CMD_QUERY_PKEY, query_pkey},
-    {PVRDMA_CMD_CREATE_PD, create_pd},
-    {PVRDMA_CMD_DESTROY_PD, destroy_pd},
-    {PVRDMA_CMD_CREATE_MR, create_mr},
-    {PVRDMA_CMD_DESTROY_MR, destroy_mr},
-    {PVRDMA_CMD_CREATE_CQ, create_cq},
-    {PVRDMA_CMD_RESIZE_CQ, NULL},
-    {PVRDMA_CMD_DESTROY_CQ, destroy_cq},
-    {PVRDMA_CMD_CREATE_QP, create_qp},
-    {PVRDMA_CMD_MODIFY_QP, modify_qp},
-    {PVRDMA_CMD_QUERY_QP, query_qp},
-    {PVRDMA_CMD_DESTROY_QP, destroy_qp},
-    {PVRDMA_CMD_CREATE_UC, create_uc},
-    {PVRDMA_CMD_DESTROY_UC, destroy_uc},
-    {PVRDMA_CMD_CREATE_BIND, create_bind},
-    {PVRDMA_CMD_DESTROY_BIND, destroy_bind},
+    {PVRDMA_CMD_QUERY_PORT,   PVRDMA_CMD_QUERY_PORT_RESP,        query_port},
+    {PVRDMA_CMD_QUERY_PKEY,   PVRDMA_CMD_QUERY_PKEY_RESP,        query_pkey},
+    {PVRDMA_CMD_CREATE_PD,    PVRDMA_CMD_CREATE_PD_RESP,         create_pd},
+    {PVRDMA_CMD_DESTROY_PD,   PVRDMA_CMD_DESTROY_PD_RESP_NOOP,   destroy_pd},
+    {PVRDMA_CMD_CREATE_MR,    PVRDMA_CMD_CREATE_MR_RESP,         create_mr},
+    {PVRDMA_CMD_DESTROY_MR,   PVRDMA_CMD_DESTROY_MR_RESP_NOOP,   destroy_mr},
+    {PVRDMA_CMD_CREATE_CQ,    PVRDMA_CMD_CREATE_CQ_RESP,         create_cq},
+    {PVRDMA_CMD_RESIZE_CQ,    PVRDMA_CMD_RESIZE_CQ_RESP,         NULL},
+    {PVRDMA_CMD_DESTROY_CQ,   PVRDMA_CMD_DESTROY_CQ_RESP_NOOP,   destroy_cq},
+    {PVRDMA_CMD_CREATE_QP,    PVRDMA_CMD_CREATE_QP_RESP,         create_qp},
+    {PVRDMA_CMD_MODIFY_QP,    PVRDMA_CMD_MODIFY_QP_RESP,         modify_qp},
+    {PVRDMA_CMD_QUERY_QP,     PVRDMA_CMD_QUERY_QP_RESP,          query_qp},
+    {PVRDMA_CMD_DESTROY_QP,   PVRDMA_CMD_DESTROY_QP_RESP,        destroy_qp},
+    {PVRDMA_CMD_CREATE_UC,    PVRDMA_CMD_CREATE_UC_RESP,         create_uc},
+    {PVRDMA_CMD_DESTROY_UC,   PVRDMA_CMD_DESTROY_UC_RESP_NOOP,   destroy_uc},
+    {PVRDMA_CMD_CREATE_BIND,  PVRDMA_CMD_CREATE_BIND_RESP_NOOP,  create_bind},
+    {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind},
 };
 
 int execute_command(PVRDMADev *dev)
@@ -692,7 +700,12 @@ int execute_command(PVRDMADev *dev)
     }
 
     err = cmd_handlers[dsr_info->req->hdr.cmd].exec(dev, dsr_info->req,
-                            dsr_info->rsp);
+                                                    dsr_info->rsp);
+    dsr_info->rsp->hdr.response = dsr_info->req->hdr.response;
+    dsr_info->rsp->hdr.ack = cmd_handlers[dsr_info->req->hdr.cmd].ack;
+    dsr_info->rsp->hdr.err = err < 0 ? -err : 0;
+    pr_dbg("rsp->hdr.err=%d\n", dsr_info->rsp->hdr.err);
+
 out:
     set_reg_val(dev, PVRDMA_REG_ERR, err);
     post_interrupt(dev, INTR_VEC_CMD_RING);
diff --git a/hw/rdma/vmw/pvrdma_dev_ring.c b/hw/rdma/vmw/pvrdma_dev_ring.c
index 01247fc041..e8e5b502f6 100644
--- a/hw/rdma/vmw/pvrdma_dev_ring.c
+++ b/hw/rdma/vmw/pvrdma_dev_ring.c
@@ -73,23 +73,16 @@ out:
 
 void *pvrdma_ring_next_elem_read(PvrdmaRing *ring)
 {
+    int e;
     unsigned int idx = 0, offset;
 
-    /*
-    pr_dbg("%s: t=%d, h=%d\n", ring->name, ring->ring_state->prod_tail,
-           ring->ring_state->cons_head);
-    */
-
-    if (!pvrdma_idx_ring_has_data(ring->ring_state, ring->max_elems, &idx)) {
+    e = pvrdma_idx_ring_has_data(ring->ring_state, ring->max_elems, &idx);
+    if (e <= 0) {
         pr_dbg("No more data in ring\n");
         return NULL;
     }
 
     offset = idx * ring->elem_sz;
-    /*
-    pr_dbg("idx=%d\n", idx);
-    pr_dbg("offset=%d\n", offset);
-    */
     return ring->pages[offset / TARGET_PAGE_SIZE] + (offset % TARGET_PAGE_SIZE);
 }
 
@@ -105,20 +98,20 @@ void pvrdma_ring_read_inc(PvrdmaRing *ring)
 
 void *pvrdma_ring_next_elem_write(PvrdmaRing *ring)
 {
-    unsigned int idx, offset, tail;
+    int idx;
+    unsigned int offset, tail;
 
-    /*
-    pr_dbg("%s: t=%d, h=%d\n", ring->name, ring->ring_state->prod_tail,
-           ring->ring_state->cons_head);
-    */
-
-    if (!pvrdma_idx_ring_has_space(ring->ring_state, ring->max_elems, &tail)) {
+    idx = pvrdma_idx_ring_has_space(ring->ring_state, ring->max_elems, &tail);
+    if (idx <= 0) {
         pr_dbg("CQ is full\n");
         return NULL;
     }
 
     idx = pvrdma_idx(&ring->ring_state->prod_tail, ring->max_elems);
-    /* TODO: tail == idx */
+    if (idx < 0 || tail != idx) {
+        pr_dbg("invalid idx\n");
+        return NULL;
+    }
 
     offset = idx * ring->elem_sz;
     return ring->pages[offset / TARGET_PAGE_SIZE] + (offset % TARGET_PAGE_SIZE);
diff --git a/hw/rdma/vmw/pvrdma_dev_ring.h b/hw/rdma/vmw/pvrdma_dev_ring.h
index 411d244603..5f2a0cf9b9 100644
--- a/hw/rdma/vmw/pvrdma_dev_ring.h
+++ b/hw/rdma/vmw/pvrdma_dev_ring.h
@@ -16,7 +16,6 @@
 #ifndef PVRDMA_DEV_RING_H
 #define PVRDMA_DEV_RING_H
 
-#include "qemu/typedefs.h"
 
 #define MAX_RING_NAME_SZ 32
 
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
index ca5fa8d981..838ad8a949 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -24,6 +24,7 @@
 #include "hw/qdev-properties.h"
 #include "cpu.h"
 #include "trace.h"
+#include "sysemu/sysemu.h"
 
 #include "../rdma_rm.h"
 #include "../rdma_backend.h"
@@ -36,9 +37,9 @@
 #include "pvrdma_qp_ops.h"
 
 static Property pvrdma_dev_properties[] = {
-    DEFINE_PROP_STRING("backend-dev", PVRDMADev, backend_device_name),
-    DEFINE_PROP_UINT8("backend-port", PVRDMADev, backend_port_num, 1),
-    DEFINE_PROP_UINT8("backend-gid-idx", PVRDMADev, backend_gid_idx, 0),
+    DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name),
+    DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name),
+    DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1),
     DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size,
                        MAX_MR_SIZE),
     DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP),
@@ -51,6 +52,7 @@ static Property pvrdma_dev_properties[] = {
     DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
                       dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
     DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
+    DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -263,7 +265,7 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
     dsr->caps.sys_image_guid = 0;
     pr_dbg("sys_image_guid=%" PRIx64 "\n", dsr->caps.sys_image_guid);
 
-    dsr->caps.node_guid = cpu_to_be64(dev->node_guid);
+    dsr->caps.node_guid = dev->node_guid;
     pr_dbg("node_guid=%" PRIx64 "\n", be64_to_cpu(dsr->caps.node_guid));
 
     dsr->caps.phys_port_cnt = MAX_PORTS;
@@ -275,17 +277,6 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
     pr_dbg("Initialized\n");
 }
 
-static void init_ports(PVRDMADev *dev, Error **errp)
-{
-    int i;
-
-    memset(dev->rdma_dev_res.ports, 0, sizeof(dev->rdma_dev_res.ports));
-
-    for (i = 0; i < MAX_PORTS; i++) {
-        dev->rdma_dev_res.ports[i].state = IBV_PORT_DOWN;
-    }
-}
-
 static void uninit_msix(PCIDevice *pdev, int used_vectors)
 {
     PVRDMADev *dev = PVRDMA_DEV(pdev);
@@ -334,7 +325,8 @@ static void pvrdma_fini(PCIDevice *pdev)
 
     pvrdma_qp_ops_fini();
 
-    rdma_rm_fini(&dev->rdma_dev_res);
+    rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev,
+                 dev->backend_eth_device_name);
 
     rdma_backend_fini(&dev->backend_dev);
 
@@ -343,6 +335,9 @@ static void pvrdma_fini(PCIDevice *pdev)
     if (msix_enabled(pdev)) {
         uninit_msix(pdev, RDMA_MAX_INTRS);
     }
+
+    pr_dbg("Device %s %x.%x is down\n", pdev->name, PCI_SLOT(pdev->devfn),
+           PCI_FUNC(pdev->devfn));
 }
 
 static void pvrdma_stop(PVRDMADev *dev)
@@ -368,13 +363,11 @@ static int unquiesce_device(PVRDMADev *dev)
     return 0;
 }
 
-static int reset_device(PVRDMADev *dev)
+static void reset_device(PVRDMADev *dev)
 {
     pvrdma_stop(dev);
 
     pr_dbg("Device reset complete\n");
-
-    return 0;
 }
 
 static uint64_t regs_read(void *opaque, hwaddr addr, unsigned size)
@@ -455,6 +448,11 @@ static const MemoryRegionOps regs_ops = {
     },
 };
 
+static uint64_t uar_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return 0xffffffff;
+}
+
 static void uar_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
 {
     PVRDMADev *dev = opaque;
@@ -496,6 +494,7 @@ static void uar_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
 }
 
 static const MemoryRegionOps uar_ops = {
+    .read = uar_read,
     .write = uar_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .impl = {
@@ -570,12 +569,21 @@ static int pvrdma_check_ram_shared(Object *obj, void *opaque)
     return 0;
 }
 
+static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
+{
+    PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier);
+    PCIDevice *pci_dev = PCI_DEVICE(dev);
+
+    pvrdma_fini(pci_dev);
+}
+
 static void pvrdma_realize(PCIDevice *pdev, Error **errp)
 {
-    int rc;
+    int rc = 0;
     PVRDMADev *dev = PVRDMA_DEV(pdev);
     Object *memdev_root;
     bool ram_shared = false;
+    PCIDevice *func0;
 
     init_pr_dbg();
 
@@ -587,6 +595,20 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
         return;
     }
 
+    func0 = pci_get_function_0(pdev);
+    /* Break if not vmxnet3 device in slot 0 */
+    if (strcmp(object_get_typename(&func0->qdev.parent_obj), TYPE_VMXNET3)) {
+        pr_dbg("func0 type is %s\n",
+               object_get_typename(&func0->qdev.parent_obj));
+        error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn),
+                   TYPE_VMXNET3);
+        return;
+    }
+    dev->func0 = VMXNET3(func0);
+
+    addrconf_addr_eui48((unsigned char *)&dev->node_guid,
+                        (const char *)&dev->func0->conf.macaddr.a);
+
     memdev_root = object_resolve_path("/objects", NULL);
     if (memdev_root) {
         object_child_foreach(memdev_root, pvrdma_check_ram_shared, &ram_shared);
@@ -613,7 +635,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
 
     rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res,
                            dev->backend_device_name, dev->backend_port_num,
-                           dev->backend_gid_idx, &dev->dev_attr, errp);
+                           &dev->dev_attr, &dev->mad_chr, errp);
     if (rc) {
         goto out;
     }
@@ -623,15 +645,17 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
         goto out;
     }
 
-    init_ports(dev, errp);
-
     rc = pvrdma_qp_ops_init();
     if (rc) {
         goto out;
     }
 
+    dev->shutdown_notifier.notify = pvrdma_shutdown_notifier;
+    qemu_register_shutdown_notifier(&dev->shutdown_notifier);
+
 out:
     if (rc) {
+        pvrdma_fini(pdev);
         error_append_hint(errp, "Device fail to load\n");
     }
 }
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
index c668afd0ed..300471a4c9 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.c
+++ b/hw/rdma/vmw/pvrdma_qp_ops.c
@@ -47,7 +47,7 @@ typedef struct PvrdmaRqWqe {
  * 3. Interrupt host
  */
 static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
-                           struct pvrdma_cqe *cqe)
+                           struct pvrdma_cqe *cqe, struct ibv_wc *wc)
 {
     struct pvrdma_cqe *cqe1;
     struct pvrdma_cqne *cqne;
@@ -66,6 +66,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
     pr_dbg("Writing CQE\n");
     cqe1 = pvrdma_ring_next_elem_write(ring);
     if (unlikely(!cqe1)) {
+        pr_dbg("No CQEs in ring\n");
         return -EINVAL;
     }
 
@@ -73,8 +74,20 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
     cqe1->wr_id = cqe->wr_id;
     cqe1->qp = cqe->qp;
     cqe1->opcode = cqe->opcode;
-    cqe1->status = cqe->status;
-    cqe1->vendor_err = cqe->vendor_err;
+    cqe1->status = wc->status;
+    cqe1->byte_len = wc->byte_len;
+    cqe1->src_qp = wc->src_qp;
+    cqe1->wc_flags = wc->wc_flags;
+    cqe1->vendor_err = wc->vendor_err;
+
+    pr_dbg("wr_id=%" PRIx64 "\n", cqe1->wr_id);
+    pr_dbg("qp=0x%lx\n", cqe1->qp);
+    pr_dbg("opcode=%d\n", cqe1->opcode);
+    pr_dbg("status=%d\n", cqe1->status);
+    pr_dbg("byte_len=%d\n", cqe1->byte_len);
+    pr_dbg("src_qp=%d\n", cqe1->src_qp);
+    pr_dbg("wc_flags=%d\n", cqe1->wc_flags);
+    pr_dbg("vendor_err=%d\n", cqe1->vendor_err);
 
     pvrdma_ring_write_inc(ring);
 
@@ -89,26 +102,22 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
     pvrdma_ring_write_inc(&dev->dsr_info.cq);
 
     pr_dbg("cq->notify=%d\n", cq->notify);
-    if (cq->notify) {
-        cq->notify = false;
+    if (cq->notify != CNT_CLEAR) {
+        if (cq->notify == CNT_ARM) {
+            cq->notify = CNT_CLEAR;
+        }
         post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q);
     }
 
     return 0;
 }
 
-static void pvrdma_qp_ops_comp_handler(int status, unsigned int vendor_err,
-                                       void *ctx)
+static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc)
 {
     CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx;
 
-    pr_dbg("cq_handle=%d\n", comp_ctx->cq_handle);
-    pr_dbg("wr_id=%" PRIx64 "\n", comp_ctx->cqe.wr_id);
-    pr_dbg("status=%d\n", status);
-    pr_dbg("vendor_err=0x%x\n", vendor_err);
-    comp_ctx->cqe.status = status;
-    comp_ctx->cqe.vendor_err = vendor_err;
-    pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe);
+    pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe, wc);
+
     g_free(ctx);
 }
 
@@ -129,6 +138,8 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
     RdmaRmQP *qp;
     PvrdmaSqWqe *wqe;
     PvrdmaRing *ring;
+    int sgid_idx;
+    union ibv_gid *sgid;
 
     pr_dbg("qp_handle=0x%x\n", qp_handle);
 
@@ -152,10 +163,28 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
         comp_ctx->cq_handle = qp->send_cq_handle;
         comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
         comp_ctx->cqe.qp = qp_handle;
-        comp_ctx->cqe.opcode = wqe->hdr.opcode;
+        comp_ctx->cqe.opcode = IBV_WC_SEND;
+
+        sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index);
+        if (!sgid) {
+            pr_dbg("Fail to get gid for idx %d\n", wqe->hdr.wr.ud.av.gid_index);
+            return -EIO;
+        }
+        pr_dbg("sgid_id=%d, sgid=0x%llx\n", wqe->hdr.wr.ud.av.gid_index,
+               sgid->global.interface_id);
+
+        sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res,
+                                                 &dev->backend_dev,
+                                                 wqe->hdr.wr.ud.av.gid_index);
+        if (sgid_idx <= 0) {
+            pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n",
+                   wqe->hdr.wr.ud.av.gid_index);
+            return -EIO;
+        }
 
         rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
                                (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
+                               sgid_idx, sgid,
                                (union ibv_gid *)wqe->hdr.wr.ud.av.dgid,
                                wqe->hdr.wr.ud.remote_qpn,
                                wqe->hdr.wr.ud.remote_qkey, comp_ctx);
@@ -194,8 +223,9 @@ int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
         comp_ctx = g_malloc(sizeof(CompHandlerCtx));
         comp_ctx->dev = dev;
         comp_ctx->cq_handle = qp->recv_cq_handle;
-        comp_ctx->cqe.qp = qp_handle;
         comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
+        comp_ctx->cqe.qp = qp_handle;
+        comp_ctx->cqe.opcode = IBV_WC_RECV;
 
         rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res,
                                &qp->backend_qp, qp->qp_type,
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 04ec5cc970..f92b046cd3 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -1290,9 +1290,19 @@ void copy_scsw_to_guest(SCSW *dest, const SCSW *src)
 static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src)
 {
     int i;
-
-    copy_pmcw_to_guest(&dest->pmcw, &src->pmcw);
-    copy_scsw_to_guest(&dest->scsw, &src->scsw);
+    /*
+     * We copy the PMCW and SCSW in and out of local variables to
+     * avoid taking the address of members of a packed struct.
+     */
+    PMCW src_pmcw, dest_pmcw;
+    SCSW src_scsw, dest_scsw;
+
+    src_pmcw = src->pmcw;
+    copy_pmcw_to_guest(&dest_pmcw, &src_pmcw);
+    dest->pmcw = dest_pmcw;
+    src_scsw = src->scsw;
+    copy_scsw_to_guest(&dest_scsw, &src_scsw);
+    dest->scsw = dest_scsw;
     dest->mba = cpu_to_be64(src->mba);
     for (i = 0; i < ARRAY_SIZE(dest->mda); i++) {
         dest->mda[i] = src->mda[i];
@@ -1339,9 +1349,19 @@ static void copy_scsw_from_guest(SCSW *dest, const SCSW *src)
 static void copy_schib_from_guest(SCHIB *dest, const SCHIB *src)
 {
     int i;
-
-    copy_pmcw_from_guest(&dest->pmcw, &src->pmcw);
-    copy_scsw_from_guest(&dest->scsw, &src->scsw);
+    /*
+     * We copy the PMCW and SCSW in and out of local variables to
+     * avoid taking the address of members of a packed struct.
+     */
+    PMCW src_pmcw, dest_pmcw;
+    SCSW src_scsw, dest_scsw;
+
+    src_pmcw = src->pmcw;
+    copy_pmcw_from_guest(&dest_pmcw, &src_pmcw);
+    dest->pmcw = dest_pmcw;
+    src_scsw = src->scsw;
+    copy_scsw_from_guest(&dest_scsw, &src_scsw);
+    dest->scsw = dest_scsw;
     dest->mba = be64_to_cpu(src->mba);
     for (i = 0; i < ARRAY_SIZE(dest->mda); i++) {
         dest->mda[i] = src->mda[i];
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index f7458445c0..15759b6514 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -823,8 +823,8 @@ static bool s390_pci_alloc_idx(S390pciState *s, S390PCIBusDevice *pbdev)
     return true;
 }
 
-static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
-                                  DeviceState *dev, Error **errp)
+static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                              Error **errp)
 {
     PCIDevice *pdev = NULL;
     S390PCIBusDevice *pbdev = NULL;
@@ -932,8 +932,8 @@ static void s390_pcihost_timer_cb(void *opaque)
     qdev_unplug(DEVICE(pbdev), NULL);
 }
 
-static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
-                                    DeviceState *dev, Error **errp)
+static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                Error **errp)
 {
     PCIDevice *pci_dev = NULL;
     PCIBus *bus;
@@ -1041,8 +1041,8 @@ static void s390_pcihost_class_init(ObjectClass *klass, void *data)
 
     dc->reset = s390_pcihost_reset;
     dc->realize = s390_pcihost_realize;
-    hc->plug = s390_pcihost_hot_plug;
-    hc->unplug = s390_pcihost_hot_unplug;
+    hc->plug = s390_pcihost_plug;
+    hc->unplug = s390_pcihost_unplug;
     msi_nonbroken = true;
 }
 
diff --git a/hw/smbios/smbios-stub.c b/hw/smbios/smbios-stub.c
index d3a385441a..64e5ba93ec 100644
--- a/hw/smbios/smbios-stub.c
+++ b/hw/smbios/smbios-stub.c
@@ -23,7 +23,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
 
 void smbios_entry_add(QemuOpts *opts, Error **errp)
 {
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 04811279a0..818be8a838 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -24,11 +24,10 @@
 #include "sysemu/sysemu.h"
 #include "qemu/uuid.h"
 #include "sysemu/cpus.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
 #include "hw/loader.h"
 #include "exec/cpu-common.h"
 #include "smbios_build.h"
-#include "hw/smbios/ipmi.h"
 
 /* legacy structures and constants for <= 2.0 machines */
 struct smbios_header {
diff --git a/hw/smbios/smbios_build.h b/hw/smbios/smbios_build.h
index 93b360d520..56b5a1e3f3 100644
--- a/hw/smbios/smbios_build.h
+++ b/hw/smbios/smbios_build.h
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
  * Copyright (C) 2013 Red Hat, Inc.
+ * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC
  *
  * Authors:
  *  Alex Williamson <alex.williamson@hp.com>
@@ -96,4 +97,7 @@ extern unsigned smbios_table_cnt;
         smbios_table_cnt++;                                               \
     } while (0)
 
+/* IPMI SMBIOS firmware handling */
+void smbios_build_type_38_table(void);
+
 #endif /* QEMU_SMBIOS_BUILD_H */
diff --git a/hw/smbios/smbios_type_38-stub.c b/hw/smbios/smbios_type_38-stub.c
index 5b83c9b1f1..14b53d004b 100644
--- a/hw/smbios/smbios_type_38-stub.c
+++ b/hw/smbios/smbios_type_38-stub.c
@@ -8,7 +8,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "hw/smbios/ipmi.h"
+#include "smbios_build.h"
 
 void smbios_build_type_38_table(void)
 {
diff --git a/hw/smbios/smbios_type_38.c b/hw/smbios/smbios_type_38.c
index 56e8609c00..0c08f282de 100644
--- a/hw/smbios/smbios_type_38.c
+++ b/hw/smbios/smbios_type_38.c
@@ -9,8 +9,7 @@
 
 #include "qemu/osdep.h"
 #include "hw/ipmi/ipmi.h"
-#include "hw/smbios/ipmi.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
 #include "qemu/error-report.h"
 #include "smbios_build.h"
 
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 0a25f5e096..6166ccd47a 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -10,9 +10,9 @@
  * directory.
  */
 
+#include "qemu/osdep.h"
 #include <linux/vfio.h>
 #include <sys/ioctl.h>
-#include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "hw/sysbus.h"
 #include "hw/vfio/vfio.h"
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5c7bd96984..c0cb1ec289 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1897,15 +1897,10 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
                                    PCI_EXP_TYPE_ENDPOINT << 4,
                                    PCI_EXP_FLAGS_TYPE);
             vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP,
-                                   PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25, ~0);
+                           QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) |
+                           QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0);
             vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0);
         }
-
-        /* Mark the Link Status bits as emulated to allow virtual negotiation */
-        vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKSTA,
-                               pci_get_word(vdev->pdev.config + pos +
-                                            PCI_EXP_LNKSTA),
-                               PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
     }
 
     /*
diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c
index bf64996e48..8cc3fa3ef7 100644
--- a/hw/virtio/virtio-crypto-pci.c
+++ b/hw/virtio/virtio-crypto-pci.c
@@ -64,9 +64,8 @@ static void virtio_crypto_initfn(Object *obj)
                                 TYPE_VIRTIO_CRYPTO);
 }
 
-static const TypeInfo virtio_crypto_pci_info = {
-    .name          = TYPE_VIRTIO_CRYPTO_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_crypto_pci_info = {
+    .generic_name  = TYPE_VIRTIO_CRYPTO_PCI,
     .instance_size = sizeof(VirtIOCryptoPCI),
     .instance_init = virtio_crypto_initfn,
     .class_init    = virtio_crypto_pci_class_init,
@@ -74,6 +73,6 @@ static const TypeInfo virtio_crypto_pci_info = {
 
 static void virtio_crypto_pci_register_types(void)
 {
-    type_register_static(&virtio_crypto_pci_info);
+    virtio_pci_types_register(&virtio_crypto_pci_info);
 }
 type_init(virtio_crypto_pci_register_types)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index a954799267..d05066deb8 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1119,9 +1119,11 @@ static void virtio_9p_pci_instance_init(Object *obj)
                                 TYPE_VIRTIO_9P);
 }
 
-static const TypeInfo virtio_9p_pci_info = {
-    .name          = TYPE_VIRTIO_9P_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_9p_pci_info = {
+    .base_name              = TYPE_VIRTIO_9P_PCI,
+    .generic_name           = "virtio-9p-pci",
+    .transitional_name      = "virtio-9p-pci-transitional",
+    .non_transitional_name  = "virtio-9p-pci-non-transitional",
     .instance_size = sizeof(V9fsPCIState),
     .instance_init = virtio_9p_pci_instance_init,
     .class_init    = virtio_9p_pci_class_init,
@@ -1877,9 +1879,6 @@ static void virtio_pci_reset(DeviceState *qdev)
 static Property virtio_pci_properties[] = {
     DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
                     VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
-    DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
-                            ON_OFF_AUTO_AUTO),
-    DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
     DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
                     VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
     DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
@@ -1939,13 +1938,123 @@ static const TypeInfo virtio_pci_info = {
     .class_init    = virtio_pci_class_init,
     .class_size    = sizeof(VirtioPCIClass),
     .abstract      = true,
-    .interfaces = (InterfaceInfo[]) {
-        { INTERFACE_PCIE_DEVICE },
-        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
-        { }
-    },
 };
 
+static Property virtio_pci_generic_properties[] = {
+    DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
+                            ON_OFF_AUTO_AUTO),
+    DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_pci_base_class_init(ObjectClass *klass, void *data)
+{
+    const VirtioPCIDeviceTypeInfo *t = data;
+    if (t->class_init) {
+        t->class_init(klass, NULL);
+    }
+}
+
+static void virtio_pci_generic_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->props = virtio_pci_generic_properties;
+}
+
+/* Used when the generic type and the base type is the same */
+static void virtio_pci_generic_base_class_init(ObjectClass *klass, void *data)
+{
+    virtio_pci_base_class_init(klass, data);
+    virtio_pci_generic_class_init(klass, NULL);
+}
+
+static void virtio_pci_transitional_instance_init(Object *obj)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
+
+    proxy->disable_legacy = ON_OFF_AUTO_OFF;
+    proxy->disable_modern = false;
+}
+
+static void virtio_pci_non_transitional_instance_init(Object *obj)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
+
+    proxy->disable_legacy = ON_OFF_AUTO_ON;
+    proxy->disable_modern = false;
+}
+
+void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
+{
+    TypeInfo base_type_info = {
+        .name          = t->base_name,
+        .parent        = t->parent ? t->parent : TYPE_VIRTIO_PCI,
+        .instance_size = t->instance_size,
+        .instance_init = t->instance_init,
+        .class_init    = virtio_pci_base_class_init,
+        .class_data    = (void *)t,
+        .abstract      = true,
+    };
+    TypeInfo generic_type_info = {
+        .name = t->generic_name,
+        .parent = base_type_info.name,
+        .class_init = virtio_pci_generic_class_init,
+        .interfaces = (InterfaceInfo[]) {
+            { INTERFACE_PCIE_DEVICE },
+            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+            { }
+        },
+    };
+
+    if (!base_type_info.name) {
+        /* No base type -> register a single generic device type */
+        base_type_info.name = t->generic_name;
+        base_type_info.class_init = virtio_pci_generic_base_class_init;
+        base_type_info.interfaces = generic_type_info.interfaces;
+        base_type_info.abstract = false;
+        generic_type_info.name = NULL;
+        assert(!t->non_transitional_name);
+        assert(!t->transitional_name);
+    }
+
+    type_register(&base_type_info);
+    if (generic_type_info.name) {
+        type_register(&generic_type_info);
+    }
+
+    if (t->non_transitional_name) {
+        const TypeInfo non_transitional_type_info = {
+            .name          = t->non_transitional_name,
+            .parent        = base_type_info.name,
+            .instance_init = virtio_pci_non_transitional_instance_init,
+            .interfaces = (InterfaceInfo[]) {
+                { INTERFACE_PCIE_DEVICE },
+                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+                { }
+            },
+        };
+        type_register(&non_transitional_type_info);
+    }
+
+    if (t->transitional_name) {
+        const TypeInfo transitional_type_info = {
+            .name          = t->transitional_name,
+            .parent        = base_type_info.name,
+            .instance_init = virtio_pci_transitional_instance_init,
+            .interfaces = (InterfaceInfo[]) {
+                /*
+                 * Transitional virtio devices work only as Conventional PCI
+                 * devices because they require PIO ports.
+                 */
+                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+                { }
+            },
+        };
+        type_register(&transitional_type_info);
+    }
+}
+
 /* virtio-blk-pci */
 
 static Property virtio_blk_pci_properties[] = {
@@ -1995,9 +2104,11 @@ static void virtio_blk_pci_instance_init(Object *obj)
                               "bootindex", &error_abort);
 }
 
-static const TypeInfo virtio_blk_pci_info = {
-    .name          = TYPE_VIRTIO_BLK_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = {
+    .base_name              = TYPE_VIRTIO_BLK_PCI,
+    .generic_name           = "virtio-blk-pci",
+    .transitional_name      = "virtio-blk-pci-transitional",
+    .non_transitional_name  = "virtio-blk-pci-non-transitional",
     .instance_size = sizeof(VirtIOBlkPCI),
     .instance_init = virtio_blk_pci_instance_init,
     .class_init    = virtio_blk_pci_class_init,
@@ -2051,9 +2162,11 @@ static void vhost_user_blk_pci_instance_init(Object *obj)
                               "bootindex", &error_abort);
 }
 
-static const TypeInfo vhost_user_blk_pci_info = {
-    .name           = TYPE_VHOST_USER_BLK_PCI,
-    .parent         = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo vhost_user_blk_pci_info = {
+    .base_name               = TYPE_VHOST_USER_BLK_PCI,
+    .generic_name            = "vhost-user-blk-pci",
+    .transitional_name       = "vhost-user-blk-pci-transitional",
+    .non_transitional_name   = "vhost-user-blk-pci-non-transitional",
     .instance_size  = sizeof(VHostUserBlkPCI),
     .instance_init  = vhost_user_blk_pci_instance_init,
     .class_init     = vhost_user_blk_pci_class_init,
@@ -2119,9 +2232,11 @@ static void virtio_scsi_pci_instance_init(Object *obj)
                                 TYPE_VIRTIO_SCSI);
 }
 
-static const TypeInfo virtio_scsi_pci_info = {
-    .name          = TYPE_VIRTIO_SCSI_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_scsi_pci_info = {
+    .base_name              = TYPE_VIRTIO_SCSI_PCI,
+    .generic_name           = "virtio-scsi-pci",
+    .transitional_name      = "virtio-scsi-pci-transitional",
+    .non_transitional_name  = "virtio-scsi-pci-non-transitional",
     .instance_size = sizeof(VirtIOSCSIPCI),
     .instance_init = virtio_scsi_pci_instance_init,
     .class_init    = virtio_scsi_pci_class_init,
@@ -2174,9 +2289,11 @@ static void vhost_scsi_pci_instance_init(Object *obj)
                               "bootindex", &error_abort);
 }
 
-static const TypeInfo vhost_scsi_pci_info = {
-    .name          = TYPE_VHOST_SCSI_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo vhost_scsi_pci_info = {
+    .base_name             = TYPE_VHOST_SCSI_PCI,
+    .generic_name          = "vhost-scsi-pci",
+    .transitional_name     = "vhost-scsi-pci-transitional",
+    .non_transitional_name = "vhost-scsi-pci-non-transitional",
     .instance_size = sizeof(VHostSCSIPCI),
     .instance_init = vhost_scsi_pci_instance_init,
     .class_init    = vhost_scsi_pci_class_init,
@@ -2229,9 +2346,11 @@ static void vhost_user_scsi_pci_instance_init(Object *obj)
                               "bootindex", &error_abort);
 }
 
-static const TypeInfo vhost_user_scsi_pci_info = {
-    .name          = TYPE_VHOST_USER_SCSI_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo vhost_user_scsi_pci_info = {
+    .base_name             = TYPE_VHOST_USER_SCSI_PCI,
+    .generic_name          = "vhost-user-scsi-pci",
+    .transitional_name     = "vhost-user-scsi-pci-transitional",
+    .non_transitional_name = "vhost-user-scsi-pci-non-transitional",
     .instance_size = sizeof(VHostUserSCSIPCI),
     .instance_init = vhost_user_scsi_pci_instance_init,
     .class_init    = vhost_user_scsi_pci_class_init,
@@ -2277,9 +2396,11 @@ static void vhost_vsock_pci_instance_init(Object *obj)
                                 TYPE_VHOST_VSOCK);
 }
 
-static const TypeInfo vhost_vsock_pci_info = {
-    .name          = TYPE_VHOST_VSOCK_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = {
+    .base_name             = TYPE_VHOST_VSOCK_PCI,
+    .generic_name          = "vhost-vsock-pci",
+    .transitional_name     = "vhost-vsock-pci-transitional",
+    .non_transitional_name = "vhost-vsock-pci-non-transitional",
     .instance_size = sizeof(VHostVSockPCI),
     .instance_init = vhost_vsock_pci_instance_init,
     .class_init    = vhost_vsock_pci_class_init,
@@ -2334,9 +2455,11 @@ static void virtio_balloon_pci_instance_init(Object *obj)
                               "guest-stats-polling-interval", &error_abort);
 }
 
-static const TypeInfo virtio_balloon_pci_info = {
-    .name          = TYPE_VIRTIO_BALLOON_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_balloon_pci_info = {
+    .base_name             = TYPE_VIRTIO_BALLOON_PCI,
+    .generic_name          = "virtio-balloon-pci",
+    .transitional_name     = "virtio-balloon-pci-transitional",
+    .non_transitional_name = "virtio-balloon-pci-non-transitional",
     .instance_size = sizeof(VirtIOBalloonPCI),
     .instance_init = virtio_balloon_pci_instance_init,
     .class_init    = virtio_balloon_pci_class_init,
@@ -2407,9 +2530,11 @@ static void virtio_serial_pci_instance_init(Object *obj)
                                 TYPE_VIRTIO_SERIAL);
 }
 
-static const TypeInfo virtio_serial_pci_info = {
-    .name          = TYPE_VIRTIO_SERIAL_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_serial_pci_info = {
+    .base_name             = TYPE_VIRTIO_SERIAL_PCI,
+    .generic_name          = "virtio-serial-pci",
+    .transitional_name     = "virtio-serial-pci-transitional",
+    .non_transitional_name = "virtio-serial-pci-non-transitional",
     .instance_size = sizeof(VirtIOSerialPCI),
     .instance_init = virtio_serial_pci_instance_init,
     .class_init    = virtio_serial_pci_class_init,
@@ -2462,9 +2587,11 @@ static void virtio_net_pci_instance_init(Object *obj)
                               "bootindex", &error_abort);
 }
 
-static const TypeInfo virtio_net_pci_info = {
-    .name          = TYPE_VIRTIO_NET_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = {
+    .base_name             = TYPE_VIRTIO_NET_PCI,
+    .generic_name          = "virtio-net-pci",
+    .transitional_name     = "virtio-net-pci-transitional",
+    .non_transitional_name = "virtio-net-pci-non-transitional",
     .instance_size = sizeof(VirtIONetPCI),
     .instance_init = virtio_net_pci_instance_init,
     .class_init    = virtio_net_pci_class_init,
@@ -2513,9 +2640,11 @@ static void virtio_rng_initfn(Object *obj)
                                 TYPE_VIRTIO_RNG);
 }
 
-static const TypeInfo virtio_rng_pci_info = {
-    .name          = TYPE_VIRTIO_RNG_PCI,
-    .parent        = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_rng_pci_info = {
+    .base_name             = TYPE_VIRTIO_RNG_PCI,
+    .generic_name          = "virtio-rng-pci",
+    .transitional_name     = "virtio-rng-pci-transitional",
+    .non_transitional_name = "virtio-rng-pci-non-transitional",
     .instance_size = sizeof(VirtIORngPCI),
     .instance_init = virtio_rng_initfn,
     .class_init    = virtio_rng_pci_class_init,
@@ -2605,24 +2734,24 @@ static const TypeInfo virtio_input_hid_pci_info = {
     .abstract      = true,
 };
 
-static const TypeInfo virtio_keyboard_pci_info = {
-    .name          = TYPE_VIRTIO_KEYBOARD_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_keyboard_pci_info = {
+    .generic_name  = TYPE_VIRTIO_KEYBOARD_PCI,
     .parent        = TYPE_VIRTIO_INPUT_HID_PCI,
     .class_init    = virtio_input_hid_kbd_pci_class_init,
     .instance_size = sizeof(VirtIOInputHIDPCI),
     .instance_init = virtio_keyboard_initfn,
 };
 
-static const TypeInfo virtio_mouse_pci_info = {
-    .name          = TYPE_VIRTIO_MOUSE_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_mouse_pci_info = {
+    .generic_name  = TYPE_VIRTIO_MOUSE_PCI,
     .parent        = TYPE_VIRTIO_INPUT_HID_PCI,
     .class_init    = virtio_input_hid_mouse_pci_class_init,
     .instance_size = sizeof(VirtIOInputHIDPCI),
     .instance_init = virtio_mouse_initfn,
 };
 
-static const TypeInfo virtio_tablet_pci_info = {
-    .name          = TYPE_VIRTIO_TABLET_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_tablet_pci_info = {
+    .generic_name  = TYPE_VIRTIO_TABLET_PCI,
     .parent        = TYPE_VIRTIO_INPUT_HID_PCI,
     .instance_size = sizeof(VirtIOInputHIDPCI),
     .instance_init = virtio_tablet_initfn,
@@ -2637,8 +2766,11 @@ static void virtio_host_initfn(Object *obj)
                                 TYPE_VIRTIO_INPUT_HOST);
 }
 
-static const TypeInfo virtio_host_pci_info = {
-    .name          = TYPE_VIRTIO_INPUT_HOST_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_host_pci_info = {
+    .base_name             = TYPE_VIRTIO_INPUT_HOST_PCI,
+    .generic_name          = "virtio-input-host-pci",
+    .transitional_name     = "virtio-input-host-pci-transitional",
+    .non_transitional_name = "virtio-input-host-pci-non-transitional",
     .parent        = TYPE_VIRTIO_INPUT_PCI,
     .instance_size = sizeof(VirtIOInputHostPCI),
     .instance_init = virtio_host_initfn,
@@ -2692,36 +2824,39 @@ static const TypeInfo virtio_pci_bus_info = {
 
 static void virtio_pci_register_types(void)
 {
-    type_register_static(&virtio_rng_pci_info);
+    /* Base types: */
+    type_register_static(&virtio_pci_bus_info);
+    type_register_static(&virtio_pci_info);
     type_register_static(&virtio_input_pci_info);
     type_register_static(&virtio_input_hid_pci_info);
-    type_register_static(&virtio_keyboard_pci_info);
-    type_register_static(&virtio_mouse_pci_info);
-    type_register_static(&virtio_tablet_pci_info);
+
+    /* Implementations: */
+    virtio_pci_types_register(&virtio_rng_pci_info);
+    virtio_pci_types_register(&virtio_keyboard_pci_info);
+    virtio_pci_types_register(&virtio_mouse_pci_info);
+    virtio_pci_types_register(&virtio_tablet_pci_info);
 #ifdef CONFIG_LINUX
-    type_register_static(&virtio_host_pci_info);
+    virtio_pci_types_register(&virtio_host_pci_info);
 #endif
-    type_register_static(&virtio_pci_bus_info);
-    type_register_static(&virtio_pci_info);
 #ifdef CONFIG_VIRTFS
-    type_register_static(&virtio_9p_pci_info);
+    virtio_pci_types_register(&virtio_9p_pci_info);
 #endif
-    type_register_static(&virtio_blk_pci_info);
+    virtio_pci_types_register(&virtio_blk_pci_info);
 #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
-    type_register_static(&vhost_user_blk_pci_info);
+    virtio_pci_types_register(&vhost_user_blk_pci_info);
 #endif
-    type_register_static(&virtio_scsi_pci_info);
-    type_register_static(&virtio_balloon_pci_info);
-    type_register_static(&virtio_serial_pci_info);
-    type_register_static(&virtio_net_pci_info);
+    virtio_pci_types_register(&virtio_scsi_pci_info);
+    virtio_pci_types_register(&virtio_balloon_pci_info);
+    virtio_pci_types_register(&virtio_serial_pci_info);
+    virtio_pci_types_register(&virtio_net_pci_info);
 #ifdef CONFIG_VHOST_SCSI
-    type_register_static(&vhost_scsi_pci_info);
+    virtio_pci_types_register(&vhost_scsi_pci_info);
 #endif
 #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
-    type_register_static(&vhost_user_scsi_pci_info);
+    virtio_pci_types_register(&vhost_user_scsi_pci_info);
 #endif
 #ifdef CONFIG_VHOST_VSOCK
-    type_register_static(&vhost_vsock_pci_info);
+    virtio_pci_types_register(&vhost_vsock_pci_info);
 #endif
 }
 
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 813082b0d7..29b4216107 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -216,7 +216,7 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy)
 /*
  * virtio-scsi-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci"
+#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci-base"
 #define VIRTIO_SCSI_PCI(obj) \
         OBJECT_CHECK(VirtIOSCSIPCI, (obj), TYPE_VIRTIO_SCSI_PCI)
 
@@ -229,7 +229,7 @@ struct VirtIOSCSIPCI {
 /*
  * vhost-scsi-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci"
+#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci-base"
 #define VHOST_SCSI_PCI(obj) \
         OBJECT_CHECK(VHostSCSIPCI, (obj), TYPE_VHOST_SCSI_PCI)
 
@@ -239,7 +239,7 @@ struct VHostSCSIPCI {
 };
 #endif
 
-#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci"
+#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci-base"
 #define VHOST_USER_SCSI_PCI(obj) \
         OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI)
 
@@ -252,7 +252,7 @@ struct VHostUserSCSIPCI {
 /*
  * vhost-user-blk-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci"
+#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci-base"
 #define VHOST_USER_BLK_PCI(obj) \
         OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI)
 
@@ -265,7 +265,7 @@ struct VHostUserBlkPCI {
 /*
  * virtio-blk-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci"
+#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci-base"
 #define VIRTIO_BLK_PCI(obj) \
         OBJECT_CHECK(VirtIOBlkPCI, (obj), TYPE_VIRTIO_BLK_PCI)
 
@@ -277,7 +277,7 @@ struct VirtIOBlkPCI {
 /*
  * virtio-balloon-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci"
+#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci-base"
 #define VIRTIO_BALLOON_PCI(obj) \
         OBJECT_CHECK(VirtIOBalloonPCI, (obj), TYPE_VIRTIO_BALLOON_PCI)
 
@@ -289,7 +289,7 @@ struct VirtIOBalloonPCI {
 /*
  * virtio-serial-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci"
+#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci-base"
 #define VIRTIO_SERIAL_PCI(obj) \
         OBJECT_CHECK(VirtIOSerialPCI, (obj), TYPE_VIRTIO_SERIAL_PCI)
 
@@ -301,7 +301,7 @@ struct VirtIOSerialPCI {
 /*
  * virtio-net-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VIRTIO_NET_PCI "virtio-net-pci"
+#define TYPE_VIRTIO_NET_PCI "virtio-net-pci-base"
 #define VIRTIO_NET_PCI(obj) \
         OBJECT_CHECK(VirtIONetPCI, (obj), TYPE_VIRTIO_NET_PCI)
 
@@ -316,7 +316,7 @@ struct VirtIONetPCI {
 
 #ifdef CONFIG_VIRTFS
 
-#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci"
+#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci-base"
 #define VIRTIO_9P_PCI(obj) \
         OBJECT_CHECK(V9fsPCIState, (obj), TYPE_VIRTIO_9P_PCI)
 
@@ -330,7 +330,7 @@ typedef struct V9fsPCIState {
 /*
  * virtio-rng-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci"
+#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci-base"
 #define VIRTIO_RNG_PCI(obj) \
         OBJECT_CHECK(VirtIORngPCI, (obj), TYPE_VIRTIO_RNG_PCI)
 
@@ -365,7 +365,7 @@ struct VirtIOInputHIDPCI {
 
 #ifdef CONFIG_LINUX
 
-#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci"
+#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci-base"
 #define VIRTIO_INPUT_HOST_PCI(obj) \
         OBJECT_CHECK(VirtIOInputHostPCI, (obj), TYPE_VIRTIO_INPUT_HOST_PCI)
 
@@ -392,7 +392,7 @@ struct VirtIOGPUPCI {
 /*
  * vhost-vsock-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci"
+#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci-base"
 #define VHOST_VSOCK_PCI(obj) \
         OBJECT_CHECK(VHostVSockPCI, (obj), TYPE_VHOST_VSOCK_PCI)
 
@@ -417,4 +417,58 @@ struct VirtIOCryptoPCI {
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION          0
 
+/* Input for virtio_pci_types_register() */
+typedef struct VirtioPCIDeviceTypeInfo {
+    /*
+     * Common base class for the subclasses below.
+     *
+     * Required only if transitional_name or non_transitional_name is set.
+     *
+     * We need a separate base type instead of making all types
+     * inherit from generic_name for two reasons:
+     * 1) generic_name implements INTERFACE_PCIE_DEVICE, but
+     *    transitional_name does not.
+     * 2) generic_name has the "disable-legacy" and "disable-modern"
+     *    properties, transitional_name and non_transitional name don't.
+     */
+    const char *base_name;
+    /*
+     * Generic device type.  Optional.
+     *
+     * Supports both transitional and non-transitional modes,
+     * using the disable-legacy and disable-modern properties.
+     * If disable-legacy=auto, (non-)transitional mode is selected
+     * depending on the bus where the device is plugged.
+     *
+     * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE,
+     * but PCI Express is supported only in non-transitional mode.
+     *
+     * The only type implemented by QEMU 3.1 and older.
+     */
+    const char *generic_name;
+    /*
+     * The transitional device type.  Optional.
+     *
+     * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE.
+     */
+    const char *transitional_name;
+    /*
+     * The non-transitional device type.  Optional.
+     *
+     * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only.
+     */
+    const char *non_transitional_name;
+
+    /* Parent type.  If NULL, TYPE_VIRTIO_PCI is used */
+    const char *parent;
+
+    /* Same as TypeInfo fields: */
+    size_t instance_size;
+    void (*instance_init)(Object *obj);
+    void (*class_init)(ObjectClass *klass, void *data);
+} VirtioPCIDeviceTypeInfo;
+
+/* Register virtio-pci type(s).  @t must be static. */
+void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t);
+
 #endif
diff --git a/include/elf.h b/include/elf.h
index c151164b63..0ac7911b7b 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -1338,6 +1338,61 @@ typedef struct {
 #define R_IA64_DTPREL64LSB	0xb7	/* @dtprel(sym + add), data8 LSB */
 #define R_IA64_LTOFF_DTPREL22	0xba	/* @ltoff(@dtprel(s+a)), imm22 */
 
+/* RISC-V relocations.  */
+#define R_RISCV_NONE          0
+#define R_RISCV_32            1
+#define R_RISCV_64            2
+#define R_RISCV_RELATIVE      3
+#define R_RISCV_COPY          4
+#define R_RISCV_JUMP_SLOT     5
+#define R_RISCV_TLS_DTPMOD32  6
+#define R_RISCV_TLS_DTPMOD64  7
+#define R_RISCV_TLS_DTPREL32  8
+#define R_RISCV_TLS_DTPREL64  9
+#define R_RISCV_TLS_TPREL32   10
+#define R_RISCV_TLS_TPREL64   11
+#define R_RISCV_BRANCH        16
+#define R_RISCV_JAL           17
+#define R_RISCV_CALL          18
+#define R_RISCV_CALL_PLT      19
+#define R_RISCV_GOT_HI20      20
+#define R_RISCV_TLS_GOT_HI20  21
+#define R_RISCV_TLS_GD_HI20   22
+#define R_RISCV_PCREL_HI20    23
+#define R_RISCV_PCREL_LO12_I  24
+#define R_RISCV_PCREL_LO12_S  25
+#define R_RISCV_HI20          26
+#define R_RISCV_LO12_I        27
+#define R_RISCV_LO12_S        28
+#define R_RISCV_TPREL_HI20    29
+#define R_RISCV_TPREL_LO12_I  30
+#define R_RISCV_TPREL_LO12_S  31
+#define R_RISCV_TPREL_ADD     32
+#define R_RISCV_ADD8          33
+#define R_RISCV_ADD16         34
+#define R_RISCV_ADD32         35
+#define R_RISCV_ADD64         36
+#define R_RISCV_SUB8          37
+#define R_RISCV_SUB16         38
+#define R_RISCV_SUB32         39
+#define R_RISCV_SUB64         40
+#define R_RISCV_GNU_VTINHERIT 41
+#define R_RISCV_GNU_VTENTRY   42
+#define R_RISCV_ALIGN         43
+#define R_RISCV_RVC_BRANCH    44
+#define R_RISCV_RVC_JUMP      45
+#define R_RISCV_RVC_LUI       46
+#define R_RISCV_GPREL_I       47
+#define R_RISCV_GPREL_S       48
+#define R_RISCV_TPREL_I       49
+#define R_RISCV_TPREL_S       50
+#define R_RISCV_RELAX         51
+#define R_RISCV_SUB6          52
+#define R_RISCV_SET6          53
+#define R_RISCV_SET8          54
+#define R_RISCV_SET16         55
+#define R_RISCV_SET32         56
+
 typedef struct elf32_rel {
   Elf32_Addr	r_offset;
   Elf32_Word	r_info;
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
index 276dd5afce..ab4f8b6623 100644
--- a/include/exec/helper-head.h
+++ b/include/exec/helper-head.h
@@ -108,6 +108,19 @@
 #define dh_is_signed_env dh_is_signed_ptr
 #define dh_is_signed(t) dh_is_signed_##t
 
+#define dh_callflag_i32  0
+#define dh_callflag_s32  0
+#define dh_callflag_int  0
+#define dh_callflag_i64  0
+#define dh_callflag_s64  0
+#define dh_callflag_f16  0
+#define dh_callflag_f32  0
+#define dh_callflag_f64  0
+#define dh_callflag_ptr  0
+#define dh_callflag_void 0
+#define dh_callflag_noreturn TCG_CALL_NO_RETURN
+#define dh_callflag(t) glue(dh_callflag_, dh_alias(t))
+
 #define dh_sizemask(t, n) \
   ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1)))
 
diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h
index b3bdb0c399..268e0f804b 100644
--- a/include/exec/helper-tcg.h
+++ b/include/exec/helper-tcg.h
@@ -11,36 +11,43 @@
 #define str(s) #s
 
 #define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \
-  { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
+  { .func = HELPER(NAME), .name = str(NAME), \
+    .flags = FLAGS | dh_callflag(ret), \
     .sizemask = dh_sizemask(ret, 0) },
 
 #define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \
-  { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
+  { .func = HELPER(NAME), .name = str(NAME), \
+    .flags = FLAGS | dh_callflag(ret), \
     .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) },
 
 #define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \
-  { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
+  { .func = HELPER(NAME), .name = str(NAME), \
+    .flags = FLAGS | dh_callflag(ret), \
     .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
     | dh_sizemask(t2, 2) },
 
 #define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \
-  { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
+  { .func = HELPER(NAME), .name = str(NAME), \
+    .flags = FLAGS | dh_callflag(ret), \
     .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
     | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) },
 
 #define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \
-  { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
+  { .func = HELPER(NAME), .name = str(NAME), \
+    .flags = FLAGS | dh_callflag(ret), \
     .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
     | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) },
 
 #define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \
-  { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
+  { .func = HELPER(NAME), .name = str(NAME), \
+    .flags = FLAGS | dh_callflag(ret), \
     .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
     | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
     | dh_sizemask(t5, 5) },
 
 #define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \
-  { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
+  { .func = HELPER(NAME), .name = str(NAME), \
+    .flags = FLAGS | dh_callflag(ret), \
     .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
     | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
     | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) },
diff --git a/include/exec/poison.h b/include/exec/poison.h
index 32d53789f8..ecdc83c147 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -79,6 +79,7 @@
 #pragma GCC poison CONFIG_MOXIE_DIS
 #pragma GCC poison CONFIG_NIOS2_DIS
 #pragma GCC poison CONFIG_PPC_DIS
+#pragma GCC poison CONFIG_RISCV_DIS
 #pragma GCC poison CONFIG_S390_DIS
 #pragma GCC poison CONFIG_SH4_DIS
 #pragma GCC poison CONFIG_SPARC_DIS
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index af8e023968..5021cb9e79 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -40,18 +40,13 @@ enum {
     ACPI_FADT_F_LOW_POWER_S0_IDLE_CAPABLE,
 };
 
-struct AcpiRsdpDescriptor {        /* Root System Descriptor Pointer */
-    uint64_t signature;              /* ACPI signature, contains "RSD PTR " */
-    uint8_t  checksum;               /* To make sum of struct == 0 */
-    uint8_t  oem_id [6];             /* OEM identification */
-    uint8_t  revision;               /* Must be 0 for 1.0, 2 for 2.0 */
-    uint32_t rsdt_physical_address;  /* 32-bit physical address of RSDT */
-    uint32_t length;                 /* XSDT Length in bytes including hdr */
-    uint64_t xsdt_physical_address;  /* 64-bit physical address of XSDT */
-    uint8_t  extended_checksum;      /* Checksum of entire table */
-    uint8_t  reserved [3];           /* Reserved field must be 0 */
-} QEMU_PACKED;
-typedef struct AcpiRsdpDescriptor AcpiRsdpDescriptor;
+typedef struct AcpiRsdpData {
+    uint8_t oem_id[6]; /* OEM identification */
+    uint8_t revision;  /* Must be 0 for 1.0, 2 for 2.0 */
+
+    unsigned *rsdt_tbl_offset;
+    unsigned *xsdt_tbl_offset;
+} AcpiRsdpData;
 
 /* Table structure from Linux kernel (the ACPI tables are under the
    BSD license) */
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 6c36903c0a..1a563ad756 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -388,6 +388,8 @@ void acpi_add_table(GArray *table_offsets, GArray *table_data);
 void acpi_build_tables_init(AcpiBuildTables *tables);
 void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre);
 void
+build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data);
+void
 build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets,
            const char *oem_id, const char *oem_table_id);
 void
diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h
index 8a65f99fc8..8bc4a4c01d 100644
--- a/include/hw/acpi/pcihp.h
+++ b/include/hw/acpi/pcihp.h
@@ -56,10 +56,15 @@ typedef struct AcpiPciHpState {
 void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root,
                      MemoryRegion *address_space_io, bool bridges_enabled);
 
+void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
+                                   DeviceState *dev, Error **errp);
 void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
                                DeviceState *dev, Error **errp);
 void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
                                  DeviceState *dev, Error **errp);
+void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                         AcpiPciHpState *s, DeviceState *dev,
+                                         Error **errp);
 
 /* Called on reset */
 void acpi_pcihp_reset(AcpiPciHpState *s);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index f82f28468b..362384815e 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -195,6 +195,7 @@ struct MachineClass {
     const char *hw_version;
     ram_addr_t default_ram_size;
     const char *default_cpu_type;
+    bool default_kernel_irqchip_split;
     bool option_rom_has_mr;
     bool rom_file_has_mr;
     int minimum_page_bits;
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 70958328fe..3ca85b037c 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -2,7 +2,15 @@
 #define HW_COMPAT_H
 
 #define HW_COMPAT_3_1 \
-    /* empty */
+    {\
+        .driver   = "pcie-root-port",\
+        .property = "x-speed",\
+        .value    = "2_5",\
+    },{\
+        .driver   = "pcie-root-port",\
+        .property = "x-width",\
+        .value    = "1",\
+    },
 
 #define HW_COMPAT_3_0 \
     /* empty */
diff --git a/include/hw/smbios/smbios.h b/include/hw/firmware/smbios.h
index eeb5a4d7b6..eeb5a4d7b6 100644
--- a/include/hw/smbios/smbios.h
+++ b/include/hw/firmware/smbios.h
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index ed4e758273..a321cc9691 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -245,6 +245,7 @@ struct IntelIOMMUState {
     OnOffAuto intr_eim;             /* Toggle for EIM cabability */
     bool buggy_eim;                 /* Force buggy EIM unless eim=off */
     uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
+    bool dma_drain;                 /* Whether DMA r/w draining enabled */
 
     /*
      * Protects IOMMU states in general.  Currently it protects the
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9d29c4b1df..c7c0c944e8 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -296,6 +296,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
 #define PC_COMPAT_3_1 \
     HW_COMPAT_3_1 \
+    {\
+        .driver   = "intel-iommu",\
+        .property = "dma-drain",\
+        .value    = "off",\
+    },
 
 #define PC_COMPAT_3_0 \
     HW_COMPAT_3_0 \
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
index 2b22a579a3..dcd9719a2c 100644
--- a/include/hw/i386/x86-iommu.h
+++ b/include/hw/i386/x86-iommu.h
@@ -74,13 +74,15 @@ typedef struct IEC_Notifier IEC_Notifier;
 
 struct X86IOMMUState {
     SysBusDevice busdev;
-    bool intr_supported;        /* Whether vIOMMU supports IR */
+    OnOffAuto intr_supported;   /* Whether vIOMMU supports IR */
     bool dt_supported;          /* Whether vIOMMU supports DT */
     bool pt_supported;          /* Whether vIOMMU supports pass-through */
     IommuType type;             /* IOMMU type - AMD/Intel     */
     QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
 };
 
+bool x86_iommu_ir_supported(X86IOMMUState *s);
+
 /* Generic IRQ entry information when interrupt remapping is enabled */
 struct X86IOMMUIrq {
     /* Used by both IOAPIC/MSI interrupt remapping */
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index e6514bba23..eb12fa112e 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -737,6 +737,19 @@ static inline int pci_is_express(const PCIDevice *d)
     return d->cap_present & QEMU_PCI_CAP_EXPRESS;
 }
 
+static inline int pci_is_express_downstream_port(const PCIDevice *d)
+{
+    uint8_t type;
+
+    if (!pci_is_express(d) || !d->exp.exp_cap) {
+        return 0;
+    }
+
+    type = pcie_cap_get_type(d);
+
+    return type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_ROOT_PORT;
+}
+
 static inline uint32_t pci_config_size(const PCIDevice *d)
 {
     return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index cdff7edfd1..ba488818d2 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -99,6 +99,12 @@ void pci_bridge_reset(DeviceState *qdev);
 void pci_bridge_initfn(PCIDevice *pci_dev, const char *typename);
 void pci_bridge_exitfn(PCIDevice *pci_dev);
 
+void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                            Error **errp);
+void pci_bridge_dev_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                              Error **errp);
+void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                      DeviceState *dev, Error **errp);
 
 /*
  * before qdev initialization(qdev_init()), this function sets bus_name and
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index b71e369703..cd318646a2 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -126,13 +126,16 @@ uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id);
 void pcie_add_capability(PCIDevice *dev,
                          uint16_t cap_id, uint8_t cap_ver,
                          uint16_t offset, uint16_t size);
+void pcie_sync_bridge_lnk(PCIDevice *dev);
 
 void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
 void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
 void pcie_ats_init(PCIDevice *dev, uint16_t offset);
 
-void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
-                              Error **errp);
-void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
-                                         DeviceState *dev, Error **errp);
+void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                           Error **errp);
+void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                             Error **errp);
+void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                     DeviceState *dev, Error **errp);
 #endif /* QEMU_PCIE_H */
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index 0736014bfd..df242a0caf 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -49,6 +49,10 @@ struct PCIESlot {
     /* pci express switch port with slot */
     uint8_t     chassis;
     uint16_t    slot;
+
+    PCIExpLinkSpeed speed;
+    PCIExpLinkWidth width;
+
     QLIST_ENTRY(PCIESlot) next;
 };
 
diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
index a95522a13b..ad4e7808b8 100644
--- a/include/hw/pci/pcie_regs.h
+++ b/include/hw/pci/pcie_regs.h
@@ -34,10 +34,29 @@
 
 /* PCI_EXP_LINK{CAP, STA} */
 /* link speed */
-#define PCI_EXP_LNK_LS_25               1
+typedef enum PCIExpLinkSpeed {
+    QEMU_PCI_EXP_LNK_2_5GT = 1,
+    QEMU_PCI_EXP_LNK_5GT,
+    QEMU_PCI_EXP_LNK_8GT,
+    QEMU_PCI_EXP_LNK_16GT,
+} PCIExpLinkSpeed;
+
+#define QEMU_PCI_EXP_LNKCAP_MLS(speed)  (speed)
+#define QEMU_PCI_EXP_LNKSTA_CLS         QEMU_PCI_EXP_LNKCAP_MLS
+
+typedef enum PCIExpLinkWidth {
+    QEMU_PCI_EXP_LNK_X1 = 1,
+    QEMU_PCI_EXP_LNK_X2 = 2,
+    QEMU_PCI_EXP_LNK_X4 = 4,
+    QEMU_PCI_EXP_LNK_X8 = 8,
+    QEMU_PCI_EXP_LNK_X12 = 12,
+    QEMU_PCI_EXP_LNK_X16 = 16,
+    QEMU_PCI_EXP_LNK_X32 = 32,
+} PCIExpLinkWidth;
 
 #define PCI_EXP_LNK_MLW_SHIFT           ctz32(PCI_EXP_LNKCAP_MLW)
-#define PCI_EXP_LNK_MLW_1               (1 << PCI_EXP_LNK_MLW_SHIFT)
+#define QEMU_PCI_EXP_LNKCAP_MLW(width)  (width << PCI_EXP_LNK_MLW_SHIFT)
+#define QEMU_PCI_EXP_LNKSTA_NLW         QEMU_PCI_EXP_LNKCAP_MLW
 
 /* PCI_EXP_LINKCAP */
 #define PCI_EXP_LNKCAP_ASPMS_SHIFT      ctz32(PCI_EXP_LNKCAP_ASPMS)
diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h
index ee19fecf61..18f6ec1cd5 100644
--- a/include/hw/pci/shpc.h
+++ b/include/hw/pci/shpc.h
@@ -45,10 +45,12 @@ void shpc_free(PCIDevice *dev);
 void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len);
 
 
-void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
-                            Error **errp);
-void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
-                                       DeviceState *dev, Error **errp);
+void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                         Error **errp);
+void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                           Error **errp);
+void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                   DeviceState *dev, Error **errp);
 
 extern VMStateInfo shpc_vmstate_info;
 #define SHPC_VMSTATE(_field, _type,  _test) \
diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h
index 5eb982197d..dad08fe9be 100644
--- a/include/hw/ppc/openpic.h
+++ b/include/hw/ppc/openpic.h
@@ -20,6 +20,8 @@ enum {
     OPENPIC_OUTPUT_NB,
 };
 
+typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines;
+
 #define OPENPIC_MODEL_RAVEN       0
 #define OPENPIC_MODEL_FSL_MPIC_20 1
 #define OPENPIC_MODEL_FSL_MPIC_42 2
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 6279711fe8..2c77a8ba88 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -16,6 +16,7 @@ typedef struct sPAPREventLogEntry sPAPREventLogEntry;
 typedef struct sPAPREventSource sPAPREventSource;
 typedef struct sPAPRPendingHPT sPAPRPendingHPT;
 typedef struct ICSState ICSState;
+typedef struct sPAPRXive sPAPRXive;
 
 #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
 #define SPAPR_ENTRY_POINT       0x100
@@ -175,6 +176,8 @@ struct sPAPRMachineState {
     const char *icp_type;
     int32_t irq_map_nr;
     unsigned long *irq_map;
+    sPAPRXive  *xive;
+    sPAPRIrq *irq;
 
     bool cmd_line_caps[SPAPR_CAP_NUM];
     sPAPRCapabilities def, eff, mig;
@@ -450,7 +453,20 @@ struct sPAPRMachineState {
 #define H_INVALIDATE_PID        0x378
 #define H_REGISTER_PROC_TBL     0x37C
 #define H_SIGNAL_SYS_RESET      0x380
-#define MAX_HCALL_OPCODE        H_SIGNAL_SYS_RESET
+
+#define H_INT_GET_SOURCE_INFO   0x3A8
+#define H_INT_SET_SOURCE_CONFIG 0x3AC
+#define H_INT_GET_SOURCE_CONFIG 0x3B0
+#define H_INT_GET_QUEUE_INFO    0x3B4
+#define H_INT_SET_QUEUE_CONFIG  0x3B8
+#define H_INT_GET_QUEUE_CONFIG  0x3BC
+#define H_INT_SET_OS_REPORTING_LINE 0x3C0
+#define H_INT_GET_OS_REPORTING_LINE 0x3C4
+#define H_INT_ESB               0x3C8
+#define H_INT_SYNC              0x3CC
+#define H_INT_RESET             0x3D0
+
+#define MAX_HCALL_OPCODE        H_INT_RESET
 
 /* The hcalls above are standardized in PAPR and implemented by pHyp
  * as well.
@@ -737,6 +753,7 @@ int spapr_hpt_shift_for_ramsize(uint64_t ramsize);
 void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
                           Error **errp);
 void spapr_clear_pending_events(sPAPRMachineState *spapr);
+int spapr_max_server_number(sPAPRMachineState *spapr);
 
 /* CPU and LMB DRC release callbacks. */
 void spapr_core_release(DeviceState *dev);
@@ -808,5 +825,11 @@ int spapr_caps_post_migration(sPAPRMachineState *spapr);
 
 void spapr_check_pagesize(sPAPRMachineState *spapr, hwaddr pagesize,
                           Error **errp);
+/*
+ * XIVE definitions
+ */
+#define SPAPR_OV5_XIVE_LEGACY   0x0
+#define SPAPR_OV5_XIVE_EXPLOIT  0x40
+#define SPAPR_OV5_XIVE_BOTH     0x80 /* Only to advertise on the platform */
 
 #endif /* HW_SPAPR_H */
diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
index a467ce696e..b34d5a0038 100644
--- a/include/hw/ppc/spapr_irq.h
+++ b/include/hw/ppc/spapr_irq.h
@@ -13,6 +13,7 @@
 /*
  * IRQ range offsets per device type
  */
+#define SPAPR_IRQ_IPI        0x0
 #define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
 #define SPAPR_IRQ_HOTPLUG    0x1001
 #define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
@@ -32,20 +33,31 @@ void spapr_irq_msi_reset(sPAPRMachineState *spapr);
 typedef struct sPAPRIrq {
     uint32_t    nr_irqs;
     uint32_t    nr_msis;
+    uint8_t     ov5;
 
     void (*init)(sPAPRMachineState *spapr, Error **errp);
     int (*claim)(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
     void (*free)(sPAPRMachineState *spapr, int irq, int num);
     qemu_irq (*qirq)(sPAPRMachineState *spapr, int irq);
     void (*print_info)(sPAPRMachineState *spapr, Monitor *mon);
+    void (*dt_populate)(sPAPRMachineState *spapr, uint32_t nr_servers,
+                        void *fdt, uint32_t phandle);
+    Object *(*cpu_intc_create)(sPAPRMachineState *spapr, Object *cpu,
+                               Error **errp);
+    int (*post_load)(sPAPRMachineState *spapr, int version_id);
+    void (*reset)(sPAPRMachineState *spapr, Error **errp);
 } sPAPRIrq;
 
 extern sPAPRIrq spapr_irq_xics;
 extern sPAPRIrq spapr_irq_xics_legacy;
+extern sPAPRIrq spapr_irq_xive;
 
+void spapr_irq_init(sPAPRMachineState *spapr, Error **errp);
 int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
 void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num);
 qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq);
+int spapr_irq_post_load(sPAPRMachineState *spapr, int version_id);
+void spapr_irq_reset(sPAPRMachineState *spapr, Error **errp);
 
 /*
  * XICS legacy routines
diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
new file mode 100644
index 0000000000..728735dbcf
--- /dev/null
+++ b/include/hw/ppc/spapr_xive.h
@@ -0,0 +1,52 @@
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_SPAPR_XIVE_H
+#define PPC_SPAPR_XIVE_H
+
+#include "hw/ppc/xive.h"
+
+#define TYPE_SPAPR_XIVE "spapr-xive"
+#define SPAPR_XIVE(obj) OBJECT_CHECK(sPAPRXive, (obj), TYPE_SPAPR_XIVE)
+
+typedef struct sPAPRXive {
+    XiveRouter    parent;
+
+    /* Internal interrupt source for IPIs and virtual devices */
+    XiveSource    source;
+    hwaddr        vc_base;
+
+    /* END ESB MMIOs */
+    XiveENDSource end_source;
+    hwaddr        end_base;
+
+    /* Routing table */
+    XiveEAS       *eat;
+    uint32_t      nr_irqs;
+    XiveEND       *endt;
+    uint32_t      nr_ends;
+
+    /* TIMA mapping address */
+    hwaddr        tm_base;
+    MemoryRegion  tm_mmio;
+} sPAPRXive;
+
+bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
+bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
+void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
+qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn);
+
+typedef struct sPAPRMachineState sPAPRMachineState;
+
+void spapr_xive_hcall_init(sPAPRMachineState *spapr);
+void spapr_dt_xive(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt,
+                   uint32_t phandle);
+void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx);
+
+#endif /* PPC_SPAPR_XIVE_H */
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 9958443d19..14afda198c 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -181,8 +181,6 @@ typedef struct XICSFabricClass {
     ICPState *(*icp_get)(XICSFabric *xi, int server);
 } XICSFabricClass;
 
-void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle);
-
 ICPState *xics_icp_get(XICSFabric *xi, int server);
 
 /* Internal XICS interfaces */
@@ -204,6 +202,8 @@ void icp_resend(ICPState *ss);
 
 typedef struct sPAPRMachineState sPAPRMachineState;
 
+void spapr_dt_xics(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt,
+                   uint32_t phandle);
 int xics_kvm_init(sPAPRMachineState *spapr, Error **errp);
 void xics_spapr_init(sPAPRMachineState *spapr);
 
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
new file mode 100644
index 0000000000..18cd114eb2
--- /dev/null
+++ b/include/hw/ppc/xive.h
@@ -0,0 +1,429 @@
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ *
+ * The POWER9 processor comes with a new interrupt controller, called
+ * XIVE as "eXternal Interrupt Virtualization Engine".
+ *
+ * = Overall architecture
+ *
+ *
+ *              XIVE Interrupt Controller
+ *              +------------------------------------+      IPIs
+ *              | +---------+ +---------+ +--------+ |    +-------+
+ *              | |VC       | |CQ       | |PC      |----> | CORES |
+ *              | |     esb | |         | |        |----> |       |
+ *              | |     eas | |  Bridge | |   tctx |----> |       |
+ *              | |SC   end | |         | |    nvt | |    |       |
+ *  +------+    | +---------+ +----+----+ +--------+ |    +-+-+-+-+
+ *  | RAM  |    +------------------|-----------------+      | | |
+ *  |      |                       |                        | | |
+ *  |      |                       |                        | | |
+ *  |      |  +--------------------v------------------------v-v-v--+    other
+ *  |      <--+                     Power Bus                      +--> chips
+ *  |  esb |  +---------+-----------------------+------------------+
+ *  |  eas |            |                       |
+ *  |  end |         +--|------+                |
+ *  |  nvt |       +----+----+ |           +----+----+
+ *  +------+       |SC       | |           |SC       |
+ *                 |         | |           |         |
+ *                 | PQ-bits | |           | PQ-bits |
+ *                 | local   |-+           |  in VC  |
+ *                 +---------+             +---------+
+ *                    PCIe                 NX,NPU,CAPI
+ *
+ *                   SC: Source Controller (aka. IVSE)
+ *                   VC: Virtualization Controller (aka. IVRE)
+ *                   PC: Presentation Controller (aka. IVPE)
+ *                   CQ: Common Queue (Bridge)
+ *
+ *              PQ-bits: 2 bits source state machine (P:pending Q:queued)
+ *                  esb: Event State Buffer (Array of PQ bits in an IVSE)
+ *                  eas: Event Assignment Structure
+ *                  end: Event Notification Descriptor
+ *                  nvt: Notification Virtual Target
+ *                 tctx: Thread interrupt Context
+ *
+ *
+ * The XIVE IC is composed of three sub-engines :
+ *
+ * - Interrupt Virtualization Source Engine (IVSE), or Source
+ *   Controller (SC). These are found in PCI PHBs, in the PSI host
+ *   bridge controller, but also inside the main controller for the
+ *   core IPIs and other sub-chips (NX, CAP, NPU) of the
+ *   chip/processor. They are configured to feed the IVRE with events.
+ *
+ * - Interrupt Virtualization Routing Engine (IVRE) or Virtualization
+ *   Controller (VC). Its job is to match an event source with an
+ *   Event Notification Descriptor (END).
+ *
+ * - Interrupt Virtualization Presentation Engine (IVPE) or
+ *   Presentation Controller (PC). It maintains the interrupt context
+ *   state of each thread and handles the delivery of the external
+ *   exception to the thread.
+ *
+ * In XIVE 1.0, the sub-engines used to be referred as:
+ *
+ *   SC     Source Controller
+ *   VC     Virtualization Controller
+ *   PC     Presentation Controller
+ *   CQ     Common Queue (PowerBUS Bridge)
+ *
+ *
+ * = XIVE internal tables
+ *
+ * Each of the sub-engines uses a set of tables to redirect exceptions
+ * from event sources to CPU threads.
+ *
+ *                                           +-------+
+ *   User or OS                              |  EQ   |
+ *       or                          +------>|entries|
+ *   Hypervisor                      |       |  ..   |
+ *     Memory                        |       +-------+
+ *                                   |           ^
+ *                                   |           |
+ *              +-------------------------------------------------+
+ *                                   |           |
+ *   Hypervisor      +------+    +---+--+    +---+--+   +------+
+ *     Memory        | ESB  |    | EAT  |    | ENDT |   | NVTT |
+ *    (skiboot)      +----+-+    +----+-+    +----+-+   +------+
+ *                     ^  |        ^  |        ^  |       ^
+ *                     |  |        |  |        |  |       |
+ *              +-------------------------------------------------+
+ *                     |  |        |  |        |  |       |
+ *                     |  |        |  |        |  |       |
+ *                +----|--|--------|--|--------|--|-+   +-|-----+    +------+
+ *                |    |  |        |  |        |  | |   | | tctx|    |Thread|
+ *   IPI or   --> |    +  v        +  v        +  v |---| +  .. |----->     |
+ *  HW events --> |                                 |   |       |    |      |
+ *    IVSE        |             IVRE                |   | IVPE  |    +------+
+ *                +---------------------------------+   +-------+
+ *
+ *
+ *
+ * The IVSE have a 2-bits state machine, P for pending and Q for queued,
+ * for each source that allows events to be triggered. They are stored in
+ * an Event State Buffer (ESB) array and can be controlled by MMIOs.
+ *
+ * If the event is let through, the IVRE looks up in the Event Assignment
+ * Structure (EAS) table for an Event Notification Descriptor (END)
+ * configured for the source. Each Event Notification Descriptor defines
+ * a notification path to a CPU and an in-memory Event Queue, in which
+ * will be enqueued an EQ data for the OS to pull.
+ *
+ * The IVPE determines if a Notification Virtual Target (NVT) can
+ * handle the event by scanning the thread contexts of the VCPUs
+ * dispatched on the processor HW threads. It maintains the state of
+ * the thread interrupt context (TCTX) of each thread in a NVT table.
+ *
+ * = Acronyms
+ *
+ *          Description                     In XIVE 1.0, used to be referred as
+ *
+ *   EAS    Event Assignment Structure      IVE   Interrupt Virt. Entry
+ *   EAT    Event Assignment Table          IVT   Interrupt Virt. Table
+ *   ENDT   Event Notif. Descriptor Table   EQDT  Event Queue Desc. Table
+ *   EQ     Event Queue                     same
+ *   ESB    Event State Buffer              SBE   State Bit Entry
+ *   NVT    Notif. Virtual Target           VPD   Virtual Processor Desc.
+ *   NVTT   Notif. Virtual Target Table     VPDT  Virtual Processor Desc. Table
+ *   TCTX   Thread interrupt Context
+ *
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PPC_XIVE_H
+#define PPC_XIVE_H
+
+#include "hw/qdev-core.h"
+#include "hw/sysbus.h"
+#include "hw/ppc/xive_regs.h"
+
+/*
+ * XIVE Fabric (Interface between Source and Router)
+ */
+
+typedef struct XiveNotifier {
+    Object parent;
+} XiveNotifier;
+
+#define TYPE_XIVE_NOTIFIER "xive-notifier"
+#define XIVE_NOTIFIER(obj)                                     \
+    OBJECT_CHECK(XiveNotifier, (obj), TYPE_XIVE_NOTIFIER)
+#define XIVE_NOTIFIER_CLASS(klass)                                     \
+    OBJECT_CLASS_CHECK(XiveNotifierClass, (klass), TYPE_XIVE_NOTIFIER)
+#define XIVE_NOTIFIER_GET_CLASS(obj)                                   \
+    OBJECT_GET_CLASS(XiveNotifierClass, (obj), TYPE_XIVE_NOTIFIER)
+
+typedef struct XiveNotifierClass {
+    InterfaceClass parent;
+    void (*notify)(XiveNotifier *xn, uint32_t lisn);
+} XiveNotifierClass;
+
+/*
+ * XIVE Interrupt Source
+ */
+
+#define TYPE_XIVE_SOURCE "xive-source"
+#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
+
+/*
+ * XIVE Interrupt Source characteristics, which define how the ESB are
+ * controlled.
+ */
+#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
+#define XIVE_SRC_STORE_EOI     0x2 /* Store EOI supported */
+
+typedef struct XiveSource {
+    DeviceState parent;
+
+    /* IRQs */
+    uint32_t        nr_irqs;
+    qemu_irq        *qirqs;
+    unsigned long   *lsi_map;
+
+    /* PQ bits and LSI assertion bit */
+    uint8_t         *status;
+
+    /* ESB memory region */
+    uint64_t        esb_flags;
+    uint32_t        esb_shift;
+    MemoryRegion    esb_mmio;
+
+    XiveNotifier    *xive;
+} XiveSource;
+
+/*
+ * ESB MMIO setting. Can be one page, for both source triggering and
+ * source management, or two different pages. See below for magic
+ * values.
+ */
+#define XIVE_ESB_4K          12 /* PSI HB only */
+#define XIVE_ESB_4K_2PAGE    13
+#define XIVE_ESB_64K         16
+#define XIVE_ESB_64K_2PAGE   17
+
+static inline bool xive_source_esb_has_2page(XiveSource *xsrc)
+{
+    return xsrc->esb_shift == XIVE_ESB_64K_2PAGE ||
+        xsrc->esb_shift == XIVE_ESB_4K_2PAGE;
+}
+
+/* The trigger page is always the first/even page */
+static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+    return (1ull << xsrc->esb_shift) * srcno;
+}
+
+/* In a two pages ESB MMIO setting, the odd page is for management */
+static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
+{
+    hwaddr addr = xive_source_esb_page(xsrc, srcno);
+
+    if (xive_source_esb_has_2page(xsrc)) {
+        addr += (1 << (xsrc->esb_shift - 1));
+    }
+
+    return addr;
+}
+
+/*
+ * Each interrupt source has a 2-bit state machine which can be
+ * controlled by MMIO. P indicates that an interrupt is pending (has
+ * been sent to a queue and is waiting for an EOI). Q indicates that
+ * the interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee that a
+ * given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ */
+#define XIVE_STATUS_ASSERTED  0x4  /* Extra bit for LSI */
+#define XIVE_ESB_VAL_P        0x2
+#define XIVE_ESB_VAL_Q        0x1
+
+#define XIVE_ESB_RESET        0x0
+#define XIVE_ESB_PENDING      XIVE_ESB_VAL_P
+#define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
+#define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
+
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * The following offsets into the ESB MMIO allow to read or manipulate
+ * the PQ bits. They must be used with an 8-byte load instruction.
+ * They all return the previous state of the interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a store and
+ * some ESBs support doing a trigger via a separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI      0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI       0x000 /* Load */
+#define XIVE_ESB_GET            0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00      0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01      0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10      0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11      0xf00 /* Load */
+
+uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno);
+uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
+
+void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset,
+                                Monitor *mon);
+
+static inline qemu_irq xive_source_qirq(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+    return xsrc->qirqs[srcno];
+}
+
+static inline bool xive_source_irq_is_lsi(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+    return test_bit(srcno, xsrc->lsi_map);
+}
+
+static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
+                                       bool lsi)
+{
+    assert(srcno < xsrc->nr_irqs);
+    if (lsi) {
+        bitmap_set(xsrc->lsi_map, srcno, 1);
+    }
+}
+
+/*
+ * XIVE Router
+ */
+
+typedef struct XiveRouter {
+    SysBusDevice    parent;
+} XiveRouter;
+
+#define TYPE_XIVE_ROUTER "xive-router"
+#define XIVE_ROUTER(obj)                                \
+    OBJECT_CHECK(XiveRouter, (obj), TYPE_XIVE_ROUTER)
+#define XIVE_ROUTER_CLASS(klass)                                        \
+    OBJECT_CLASS_CHECK(XiveRouterClass, (klass), TYPE_XIVE_ROUTER)
+#define XIVE_ROUTER_GET_CLASS(obj)                              \
+    OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
+
+typedef struct XiveRouterClass {
+    SysBusDeviceClass parent;
+
+    /* XIVE table accessors */
+    int (*get_eas)(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                   XiveEAS *eas);
+    int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                   XiveEND *end);
+    int (*write_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                     XiveEND *end, uint8_t word_number);
+    int (*get_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+                   XiveNVT *nvt);
+    int (*write_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+                     XiveNVT *nvt, uint8_t word_number);
+} XiveRouterClass;
+
+void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
+
+int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                        XiveEAS *eas);
+int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                        XiveEND *end);
+int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                          XiveEND *end, uint8_t word_number);
+int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+                        XiveNVT *nvt);
+int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+                          XiveNVT *nvt, uint8_t word_number);
+
+
+/*
+ * XIVE END ESBs
+ */
+
+#define TYPE_XIVE_END_SOURCE "xive-end-source"
+#define XIVE_END_SOURCE(obj) \
+    OBJECT_CHECK(XiveENDSource, (obj), TYPE_XIVE_END_SOURCE)
+
+typedef struct XiveENDSource {
+    DeviceState parent;
+
+    uint32_t        nr_ends;
+    uint8_t         block_id;
+
+    /* ESB memory region */
+    uint32_t        esb_shift;
+    MemoryRegion    esb_mmio;
+
+    XiveRouter      *xrtr;
+} XiveENDSource;
+
+/*
+ * For legacy compatibility, the exceptions define up to 256 different
+ * priorities. P9 implements only 9 levels : 8 active levels [0 - 7]
+ * and the least favored level 0xFF.
+ */
+#define XIVE_PRIORITY_MAX  7
+
+void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon);
+void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon);
+
+/*
+ * XIVE Thread interrupt Management (TM) context
+ */
+
+#define TYPE_XIVE_TCTX "xive-tctx"
+#define XIVE_TCTX(obj) OBJECT_CHECK(XiveTCTX, (obj), TYPE_XIVE_TCTX)
+
+/*
+ * XIVE Thread interrupt Management register rings :
+ *
+ *   QW-0  User       event-based exception state
+ *   QW-1  O/S        OS context for priority management, interrupt acks
+ *   QW-2  Pool       hypervisor pool context for virtual processors dispatched
+ *   QW-3  Physical   physical thread context and security context
+ */
+#define XIVE_TM_RING_COUNT      4
+#define XIVE_TM_RING_SIZE       0x10
+
+typedef struct XiveTCTX {
+    DeviceState parent_obj;
+
+    CPUState    *cs;
+    qemu_irq    output;
+
+    uint8_t     regs[XIVE_TM_RING_COUNT * XIVE_TM_RING_SIZE];
+} XiveTCTX;
+
+/*
+ * XIVE Thread Interrupt Management Aera (TIMA)
+ *
+ * This region gives access to the registers of the thread interrupt
+ * management context. It is four page wide, each page providing a
+ * different view of the registers. The page with the lower offset is
+ * the most privileged and gives access to the entire context.
+ */
+#define XIVE_TM_HW_PAGE         0x0
+#define XIVE_TM_HV_PAGE         0x1
+#define XIVE_TM_OS_PAGE         0x2
+#define XIVE_TM_USER_PAGE       0x3
+
+extern const MemoryRegionOps xive_tm_ops;
+
+void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon);
+Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp);
+
+static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx)
+{
+    return (nvt_blk << 19) | nvt_idx;
+}
+
+#endif /* PPC_XIVE_H */
diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
new file mode 100644
index 0000000000..bf36678a24
--- /dev/null
+++ b/include/hw/ppc/xive_regs.h
@@ -0,0 +1,235 @@
+/*
+ * QEMU PowerPC XIVE internal structure definitions
+ *
+ *
+ * The XIVE structures are accessed by the HW and their format is
+ * architected to be big-endian. Some macros are provided to ease
+ * access to the different fields.
+ *
+ *
+ * Copyright (c) 2016-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_XIVE_REGS_H
+#define PPC_XIVE_REGS_H
+
+/*
+ * Interrupt source number encoding on PowerBUS
+ */
+#define XIVE_SRCNO_BLOCK(srcno) (((srcno) >> 28) & 0xf)
+#define XIVE_SRCNO_INDEX(srcno) ((srcno) & 0x0fffffff)
+#define XIVE_SRCNO(blk, idx)    ((uint32_t)(blk) << 28 | (idx))
+
+#define TM_SHIFT                16
+
+/* TM register offsets */
+#define TM_QW0_USER             0x000 /* All rings */
+#define TM_QW1_OS               0x010 /* Ring 0..2 */
+#define TM_QW2_HV_POOL          0x020 /* Ring 0..1 */
+#define TM_QW3_HV_PHYS          0x030 /* Ring 0..1 */
+
+/* Byte offsets inside a QW             QW0 QW1 QW2 QW3 */
+#define TM_NSR                  0x0  /*  +   +   -   +  */
+#define TM_CPPR                 0x1  /*  -   +   -   +  */
+#define TM_IPB                  0x2  /*  -   +   +   +  */
+#define TM_LSMFB                0x3  /*  -   +   +   +  */
+#define TM_ACK_CNT              0x4  /*  -   +   -   -  */
+#define TM_INC                  0x5  /*  -   +   -   +  */
+#define TM_AGE                  0x6  /*  -   +   -   +  */
+#define TM_PIPR                 0x7  /*  -   +   -   +  */
+
+#define TM_WORD0                0x0
+#define TM_WORD1                0x4
+
+/*
+ * QW word 2 contains the valid bit at the top and other fields
+ * depending on the QW.
+ */
+#define TM_WORD2                0x8
+#define   TM_QW0W2_VU           PPC_BIT32(0)
+#define   TM_QW0W2_LOGIC_SERV   PPC_BITMASK32(1, 31) /* XX 2,31 ? */
+#define   TM_QW1W2_VO           PPC_BIT32(0)
+#define   TM_QW1W2_OS_CAM       PPC_BITMASK32(8, 31)
+#define   TM_QW2W2_VP           PPC_BIT32(0)
+#define   TM_QW2W2_POOL_CAM     PPC_BITMASK32(8, 31)
+#define   TM_QW3W2_VT           PPC_BIT32(0)
+#define   TM_QW3W2_LP           PPC_BIT32(6)
+#define   TM_QW3W2_LE           PPC_BIT32(7)
+#define   TM_QW3W2_T            PPC_BIT32(31)
+
+/*
+ * In addition to normal loads to "peek" and writes (only when invalid)
+ * using 4 and 8 bytes accesses, the above registers support these
+ * "special" byte operations:
+ *
+ *   - Byte load from QW0[NSR] - User level NSR (EBB)
+ *   - Byte store to QW0[NSR] - User level NSR (EBB)
+ *   - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access
+ *   - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0
+ *                                    otherwise VT||0000000
+ *   - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present)
+ *
+ * Then we have all these "special" CI ops at these offset that trigger
+ * all sorts of side effects:
+ */
+#define TM_SPC_ACK_EBB          0x800   /* Load8 ack EBB to reg*/
+#define TM_SPC_ACK_OS_REG       0x810   /* Load16 ack OS irq to reg */
+#define TM_SPC_PUSH_USR_CTX     0x808   /* Store32 Push/Validate user context */
+#define TM_SPC_PULL_USR_CTX     0x808   /* Load32 Pull/Invalidate user
+                                         * context */
+#define TM_SPC_SET_OS_PENDING   0x812   /* Store8 Set OS irq pending bit */
+#define TM_SPC_PULL_OS_CTX      0x818   /* Load32/Load64 Pull/Invalidate OS
+                                         * context to reg */
+#define TM_SPC_PULL_POOL_CTX    0x828   /* Load32/Load64 Pull/Invalidate Pool
+                                         * context to reg*/
+#define TM_SPC_ACK_HV_REG       0x830   /* Load16 ack HV irq to reg */
+#define TM_SPC_PULL_USR_CTX_OL  0xc08   /* Store8 Pull/Inval usr ctx to odd
+                                         * line */
+#define TM_SPC_ACK_OS_EL        0xc10   /* Store8 ack OS irq to even line */
+#define TM_SPC_ACK_HV_POOL_EL   0xc20   /* Store8 ack HV evt pool to even
+                                         * line */
+#define TM_SPC_ACK_HV_EL        0xc30   /* Store8 ack HV irq to even line */
+/* XXX more... */
+
+/* NSR fields for the various QW ack types */
+#define TM_QW0_NSR_EB           PPC_BIT8(0)
+#define TM_QW1_NSR_EO           PPC_BIT8(0)
+#define TM_QW3_NSR_HE           PPC_BITMASK8(0, 1)
+#define  TM_QW3_NSR_HE_NONE     0
+#define  TM_QW3_NSR_HE_POOL     1
+#define  TM_QW3_NSR_HE_PHYS     2
+#define  TM_QW3_NSR_HE_LSI      3
+#define TM_QW3_NSR_I            PPC_BIT8(2)
+#define TM_QW3_NSR_GRP_LVL      PPC_BIT8(3, 7)
+
+/*
+ * EAS (Event Assignment Structure)
+ *
+ * One per interrupt source. Targets an interrupt to a given Event
+ * Notification Descriptor (END) and provides the corresponding
+ * logical interrupt number (END data)
+ */
+typedef struct XiveEAS {
+        /*
+         * Use a single 64-bit definition to make it easier to perform
+         * atomic updates
+         */
+        uint64_t        w;
+#define EAS_VALID       PPC_BIT(0)
+#define EAS_END_BLOCK   PPC_BITMASK(4, 7)        /* Destination END block# */
+#define EAS_END_INDEX   PPC_BITMASK(8, 31)       /* Destination END index */
+#define EAS_MASKED      PPC_BIT(32)              /* Masked */
+#define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
+} XiveEAS;
+
+#define xive_eas_is_valid(eas)   (be64_to_cpu((eas)->w) & EAS_VALID)
+#define xive_eas_is_masked(eas)  (be64_to_cpu((eas)->w) & EAS_MASKED)
+
+static inline uint64_t xive_get_field64(uint64_t mask, uint64_t word)
+{
+    return (be64_to_cpu(word) & mask) >> ctz64(mask);
+}
+
+static inline uint64_t xive_set_field64(uint64_t mask, uint64_t word,
+                                        uint64_t value)
+{
+    uint64_t tmp =
+        (be64_to_cpu(word) & ~mask) | ((value << ctz64(mask)) & mask);
+    return cpu_to_be64(tmp);
+}
+
+static inline uint32_t xive_get_field32(uint32_t mask, uint32_t word)
+{
+    return (be32_to_cpu(word) & mask) >> ctz32(mask);
+}
+
+static inline uint32_t xive_set_field32(uint32_t mask, uint32_t word,
+                                        uint32_t value)
+{
+    uint32_t tmp =
+        (be32_to_cpu(word) & ~mask) | ((value << ctz32(mask)) & mask);
+    return cpu_to_be32(tmp);
+}
+
+/* Event Notification Descriptor (END) */
+typedef struct XiveEND {
+        uint32_t        w0;
+#define END_W0_VALID             PPC_BIT32(0) /* "v" bit */
+#define END_W0_ENQUEUE           PPC_BIT32(1) /* "q" bit */
+#define END_W0_UCOND_NOTIFY      PPC_BIT32(2) /* "n" bit */
+#define END_W0_BACKLOG           PPC_BIT32(3) /* "b" bit */
+#define END_W0_PRECL_ESC_CTL     PPC_BIT32(4) /* "p" bit */
+#define END_W0_ESCALATE_CTL      PPC_BIT32(5) /* "e" bit */
+#define END_W0_UNCOND_ESCALATE   PPC_BIT32(6) /* "u" bit - DD2.0 */
+#define END_W0_SILENT_ESCALATE   PPC_BIT32(7) /* "s" bit - DD2.0 */
+#define END_W0_QSIZE             PPC_BITMASK32(12, 15)
+#define END_W0_SW0               PPC_BIT32(16)
+#define END_W0_FIRMWARE          END_W0_SW0 /* Owned by FW */
+#define END_QSIZE_4K             0
+#define END_QSIZE_64K            4
+#define END_W0_HWDEP             PPC_BITMASK32(24, 31)
+        uint32_t        w1;
+#define END_W1_ESn               PPC_BITMASK32(0, 1)
+#define END_W1_ESn_P             PPC_BIT32(0)
+#define END_W1_ESn_Q             PPC_BIT32(1)
+#define END_W1_ESe               PPC_BITMASK32(2, 3)
+#define END_W1_ESe_P             PPC_BIT32(2)
+#define END_W1_ESe_Q             PPC_BIT32(3)
+#define END_W1_GENERATION        PPC_BIT32(9)
+#define END_W1_PAGE_OFF          PPC_BITMASK32(10, 31)
+        uint32_t        w2;
+#define END_W2_MIGRATION_REG     PPC_BITMASK32(0, 3)
+#define END_W2_OP_DESC_HI        PPC_BITMASK32(4, 31)
+        uint32_t        w3;
+#define END_W3_OP_DESC_LO        PPC_BITMASK32(0, 31)
+        uint32_t        w4;
+#define END_W4_ESC_END_BLOCK     PPC_BITMASK32(4, 7)
+#define END_W4_ESC_END_INDEX     PPC_BITMASK32(8, 31)
+        uint32_t        w5;
+#define END_W5_ESC_END_DATA      PPC_BITMASK32(1, 31)
+        uint32_t        w6;
+#define END_W6_FORMAT_BIT        PPC_BIT32(8)
+#define END_W6_NVT_BLOCK         PPC_BITMASK32(9, 12)
+#define END_W6_NVT_INDEX         PPC_BITMASK32(13, 31)
+        uint32_t        w7;
+#define END_W7_F0_IGNORE         PPC_BIT32(0)
+#define END_W7_F0_BLK_GROUPING   PPC_BIT32(1)
+#define END_W7_F0_PRIORITY       PPC_BITMASK32(8, 15)
+#define END_W7_F1_WAKEZ          PPC_BIT32(0)
+#define END_W7_F1_LOG_SERVER_ID  PPC_BITMASK32(1, 31)
+} XiveEND;
+
+#define xive_end_is_valid(end)    (be32_to_cpu((end)->w0) & END_W0_VALID)
+#define xive_end_is_enqueue(end)  (be32_to_cpu((end)->w0) & END_W0_ENQUEUE)
+#define xive_end_is_notify(end)   (be32_to_cpu((end)->w0) & END_W0_UCOND_NOTIFY)
+#define xive_end_is_backlog(end)  (be32_to_cpu((end)->w0) & END_W0_BACKLOG)
+#define xive_end_is_escalate(end) (be32_to_cpu((end)->w0) & END_W0_ESCALATE_CTL)
+
+/* Notification Virtual Target (NVT) */
+typedef struct XiveNVT {
+        uint32_t        w0;
+#define NVT_W0_VALID             PPC_BIT32(0)
+        uint32_t        w1;
+        uint32_t        w2;
+        uint32_t        w3;
+        uint32_t        w4;
+        uint32_t        w5;
+        uint32_t        w6;
+        uint32_t        w7;
+        uint32_t        w8;
+#define NVT_W8_GRP_VALID         PPC_BIT32(0)
+        uint32_t        w9;
+        uint32_t        wa;
+        uint32_t        wb;
+        uint32_t        wc;
+        uint32_t        wd;
+        uint32_t        we;
+        uint32_t        wf;
+} XiveNVT;
+
+#define xive_nvt_is_valid(nvt)    (be32_to_cpu((nvt)->w0) & NVT_W0_VALID)
+
+#endif /* PPC_XIVE_REGS_H */
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 3ab9cd2eb6..b6758c852e 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -36,6 +36,8 @@ extern const PropertyInfo qdev_prop_uuid;
 extern const PropertyInfo qdev_prop_arraylen;
 extern const PropertyInfo qdev_prop_link;
 extern const PropertyInfo qdev_prop_off_auto_pcibar;
+extern const PropertyInfo qdev_prop_pcie_link_speed;
+extern const PropertyInfo qdev_prop_pcie_link_width;
 
 #define DEFINE_PROP(_name, _state, _field, _prop, _type) { \
         .name      = (_name),                                    \
@@ -217,6 +219,12 @@ extern const PropertyInfo qdev_prop_off_auto_pcibar;
 #define DEFINE_PROP_OFF_AUTO_PCIBAR(_n, _s, _f, _d) \
     DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_off_auto_pcibar, \
                         OffAutoPCIBAR)
+#define DEFINE_PROP_PCIE_LINK_SPEED(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_speed, \
+                        PCIExpLinkSpeed)
+#define DEFINE_PROP_PCIE_LINK_WIDTH(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_width, \
+                        PCIExpLinkWidth)
 
 #define DEFINE_PROP_UUID(_name, _state, _field) {                  \
         .name      = (_name),                                      \
diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h
index cbd7552e7a..47ef9de869 100644
--- a/include/hw/s390x/tod.h
+++ b/include/hw/s390x/tod.h
@@ -56,7 +56,7 @@ typedef struct S390TODClass {
 /* Converts ns to s390's clock format */
 static inline uint64_t time2tod(uint64_t ns)
 {
-    return (ns << 9) / 125 + (((ns & 0xff10000000000000ull) / 125) << 9);
+    return (ns << 9) / 125 + (((ns & 0xff80000000000000ull) / 125) << 9);
 }
 
 /* Converts s390's clock format to ns */
diff --git a/include/hw/smbios/ipmi.h b/include/hw/smbios/ipmi.h
deleted file mode 100644
index 1c9aae38f2..0000000000
--- a/include/hw/smbios/ipmi.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * IPMI SMBIOS firmware handling
- *
- * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef QEMU_SMBIOS_IPMI_H
-#define QEMU_SMBIOS_IPMI_H
-
-void smbios_build_type_38_table(void);
-
-#endif /* QEMU_SMBIOS_IPMI_H */
diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h
index ce7e7b057f..1f057c2b9e 100644
--- a/include/qemu/vfio-helpers.h
+++ b/include/qemu/vfio-helpers.h
@@ -12,7 +12,6 @@
 
 #ifndef QEMU_VFIO_HELPERS_H
 #define QEMU_VFIO_HELPERS_H
-#include "qemu/typedefs.h"
 
 typedef struct QEMUVFIOState QEMUVFIOState;
 
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index c8efdeb376..e0d15da937 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -62,6 +62,7 @@ void qemu_register_wakeup_support(void);
 void qemu_system_shutdown_request(ShutdownCause reason);
 void qemu_system_powerdown_request(void);
 void qemu_register_powerdown_notifier(Notifier *notifier);
+void qemu_register_shutdown_notifier(Notifier *notifier);
 void qemu_system_debug_request(void);
 void qemu_system_vmstop_request(RunState reason);
 void qemu_system_vmstop_request_prepare(void);
diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h
index 89592ae4fa..d200ee01d0 100644
--- a/include/sysemu/whpx.h
+++ b/include/sysemu/whpx.h
@@ -13,7 +13,6 @@
 #ifndef QEMU_WHPX_H
 #define QEMU_WHPX_H
 
-#include "config-host.h"
 #include "qemu-common.h"
 
 int whpx_init_vcpu(CPUState *cpu);
diff --git a/linux-user/host/riscv32/hostdep.h b/linux-user/host/riscv32/hostdep.h
new file mode 100644
index 0000000000..adf9edbf2d
--- /dev/null
+++ b/linux-user/host/riscv32/hostdep.h
@@ -0,0 +1,11 @@
+/*
+ * hostdep.h : things which are dependent on the host architecture
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef RISCV32_HOSTDEP_H
+#define RISCV32_HOSTDEP_H
+
+#endif
diff --git a/linux-user/host/riscv64/hostdep.h b/linux-user/host/riscv64/hostdep.h
new file mode 100644
index 0000000000..865f0fb9ff
--- /dev/null
+++ b/linux-user/host/riscv64/hostdep.h
@@ -0,0 +1,34 @@
+/*
+ * hostdep.h : things which are dependent on the host architecture
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef RISCV64_HOSTDEP_H
+#define RISCV64_HOSTDEP_H
+
+/* We have a safe-syscall.inc.S */
+#define HAVE_SAFE_SYSCALL
+
+#ifndef __ASSEMBLER__
+
+/* These are defined by the safe-syscall.inc.S file */
+extern char safe_syscall_start[];
+extern char safe_syscall_end[];
+
+/* Adjust the signal context to rewind out of safe-syscall if we're in it */
+static inline void rewind_if_in_safe_syscall(void *puc)
+{
+    ucontext_t *uc = puc;
+    unsigned long *pcreg = &uc->uc_mcontext.__gregs[REG_PC];
+
+    if (*pcreg > (uintptr_t)safe_syscall_start
+        && *pcreg < (uintptr_t)safe_syscall_end) {
+        *pcreg = (uintptr_t)safe_syscall_start;
+    }
+}
+
+#endif /* __ASSEMBLER__ */
+
+#endif
diff --git a/linux-user/host/riscv64/safe-syscall.inc.S b/linux-user/host/riscv64/safe-syscall.inc.S
new file mode 100644
index 0000000000..9ca3fbfd1e
--- /dev/null
+++ b/linux-user/host/riscv64/safe-syscall.inc.S
@@ -0,0 +1,77 @@
+/*
+ * safe-syscall.inc.S : host-specific assembly fragment
+ * to handle signals occurring at the same time as system calls.
+ * This is intended to be included by linux-user/safe-syscall.S
+ *
+ * Written by Richard Henderson <rth@twiddle.net>
+ * Copyright (C) 2018 Linaro, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+	.global safe_syscall_base
+	.global safe_syscall_start
+	.global safe_syscall_end
+	.type	safe_syscall_base, @function
+	.type	safe_syscall_start, @function
+	.type	safe_syscall_end, @function
+
+	/*
+	 * This is the entry point for making a system call. The calling
+	 * convention here is that of a C varargs function with the
+	 * first argument an 'int *' to the signal_pending flag, the
+	 * second one the system call number (as a 'long'), and all further
+	 * arguments being syscall arguments (also 'long').
+	 * We return a long which is the syscall's return value, which
+	 * may be negative-errno on failure. Conversion to the
+	 * -1-and-errno-set convention is done by the calling wrapper.
+	 */
+safe_syscall_base:
+	.cfi_startproc
+	/*
+	 * The syscall calling convention is nearly the same as C:
+	 * we enter with a0 == *signal_pending
+	 *               a1 == syscall number
+	 *               a2 ... a7 == syscall arguments
+	 *               and return the result in a0
+	 * and the syscall instruction needs
+	 *               a7 == syscall number
+	 *               a0 ... a5 == syscall arguments
+	 *               and returns the result in a0
+	 * Shuffle everything around appropriately.
+	 */
+	mv	t0, a0		/* signal_pending pointer */
+	mv	t1, a1		/* syscall number */
+	mv	a0, a2		/* syscall arguments */
+	mv	a1, a3
+	mv	a2, a4
+	mv	a3, a5
+	mv	a4, a6
+	mv	a5, a7
+	mv	a7, t1
+
+	/*
+	 * This next sequence of code works in conjunction with the
+	 * rewind_if_safe_syscall_function(). If a signal is taken
+	 * and the interrupted PC is anywhere between 'safe_syscall_start'
+	 * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
+	 * The code sequence must therefore be able to cope with this, and
+	 * the syscall instruction must be the final one in the sequence.
+	 */
+safe_syscall_start:
+	/* If signal_pending is non-zero, don't do the call */
+	lw	t1, 0(t0)
+	bnez	t1, 0f
+	scall
+safe_syscall_end:
+	/* code path for having successfully executed the syscall */
+	ret
+
+0:
+	/* code path when we didn't execute the syscall */
+	li	a0, -TARGET_ERESTARTSYS
+	ret
+	.cfi_endproc
+
+	.size	safe_syscall_base, .-safe_syscall_base
diff --git a/qapi/common.json b/qapi/common.json
index 021174f04e..99d313ef3b 100644
--- a/qapi/common.json
+++ b/qapi/common.json
@@ -128,6 +128,48 @@
   'data': [ 'off', 'auto', 'bar0', 'bar1', 'bar2', 'bar3', 'bar4', 'bar5' ] }
 
 ##
+# @PCIELinkSpeed:
+#
+# An enumeration of PCIe link speeds in units of GT/s
+#
+# @2_5: 2.5GT/s
+#
+# @5: 5.0GT/s
+#
+# @8: 8.0GT/s
+#
+# @16: 16.0GT/s
+#
+# Since: 4.0
+##
+{ 'enum': 'PCIELinkSpeed',
+  'data': [ '2_5', '5', '8', '16' ] }
+
+##
+# @PCIELinkWidth:
+#
+# An enumeration of PCIe link width
+#
+# @1: x1
+#
+# @2: x2
+#
+# @4: x4
+#
+# @8: x8
+#
+# @12: x12
+#
+# @16: x16
+#
+# @32: x32
+#
+# Since: 4.0
+##
+{ 'enum': 'PCIELinkWidth',
+  'data': [ '1', '2', '4', '8', '12', '16', '32' ] }
+
+##
 # @SysEmuTarget:
 #
 # The comprehensive enumeration of QEMU system emulation ("softmmu")
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index 65b6dc2f6f..3bbdfcee84 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -86,6 +86,7 @@
 { 'include': 'char.json' }
 { 'include': 'job.json' }
 { 'include': 'net.json' }
+{ 'include': 'rdma.json' }
 { 'include': 'rocker.json' }
 { 'include': 'tpm.json' }
 { 'include': 'ui.json' }
diff --git a/qapi/rdma.json b/qapi/rdma.json
new file mode 100644
index 0000000000..b58105b1b6
--- /dev/null
+++ b/qapi/rdma.json
@@ -0,0 +1,38 @@
+# -*- Mode: Python -*-
+#
+
+##
+# = RDMA device
+##
+
+##
+# @RDMA_GID_STATUS_CHANGED:
+#
+# Emitted when guest driver adds/deletes GID to/from device
+#
+# @netdev: RoCE Network Device name
+#
+# @gid-status: Add or delete indication
+#
+# @subnet-prefix: Subnet Prefix
+#
+# @interface-id : Interface ID
+#
+# Since: 4.0
+#
+# Example:
+#
+# <- {"timestamp": {"seconds": 1541579657, "microseconds": 986760},
+#     "event": "RDMA_GID_STATUS_CHANGED",
+#     "data":
+#         {"netdev": "bridge0",
+#         "interface-id": 15880512517475447892,
+#         "gid-status": true,
+#         "subnet-prefix": 33022}}
+#
+##
+{ 'event': 'RDMA_GID_STATUS_CHANGED',
+  'data': { 'netdev'        : 'str',
+            'gid-status'    : 'bool',
+            'subnet-prefix' : 'uint64',
+            'interface-id'  : 'uint64' } }
diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi
index e362d37225..c3735b698e 100644
--- a/qemu-deprecated.texi
+++ b/qemu-deprecated.texi
@@ -134,7 +134,7 @@ their usecases.
 
 @section System emulator machines
 
-@subsection pc-0.10 and pc-0.11 (since 3.0)
+@subsection pc-0.12, pc-0.13, pc-0.14 and pc-0.15 (since 4.0)
 
 These machine types are very old and likely can not be used for live migration
 from old QEMU versions anymore. A newer machine type should be used instead.
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 2395171acf..20b2d325d8 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -11,12 +11,13 @@
  *
  */
 
+#include "qemu/osdep.h"
+
 #include <linux/kvm.h>
 #include <linux/psp-sev.h>
 
 #include <sys/ioctl.h>
 
-#include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qom/object_interfaces.h"
 #include "qemu/base64.h"
diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h
index d8d3485976..4ae3cc8fa5 100644
--- a/target/i386/whp-dispatch.h
+++ b/target/i386/whp-dispatch.h
@@ -1,5 +1,4 @@
 #include "windows.h"
-#include <stdbool.h>
 
 #include <WinHvPlatform.h>
 #include <WinHvEmulation.h>
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index ab68abe8a2..d5f99f1fc7 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -70,26 +70,14 @@
 #define PPC_ELF_MACHINE     EM_PPC
 #endif
 
-#define PPC_BIT(bit)            (0x8000000000000000UL >> (bit))
-#define PPC_BIT32(bit)          (0x80000000UL >> (bit))
-#define PPC_BIT8(bit)           (0x80UL >> (bit))
+#define PPC_BIT(bit)            (0x8000000000000000ULL >> (bit))
+#define PPC_BIT32(bit)          (0x80000000 >> (bit))
+#define PPC_BIT8(bit)           (0x80 >> (bit))
 #define PPC_BITMASK(bs, be)     ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
 #define PPC_BITMASK32(bs, be)   ((PPC_BIT32(bs) - PPC_BIT32(be)) | \
                                  PPC_BIT32(bs))
 #define PPC_BITMASK8(bs, be)    ((PPC_BIT8(bs) - PPC_BIT8(be)) | PPC_BIT8(bs))
 
-#if HOST_LONG_BITS == 32
-# define MASK_TO_LSH(m)          (__builtin_ffsll(m) - 1)
-#elif HOST_LONG_BITS == 64
-# define MASK_TO_LSH(m)          (__builtin_ffsl(m) - 1)
-#else
-# error Unknown sizeof long
-#endif
-
-#define GETFIELD(m, v)          (((v) & (m)) >> MASK_TO_LSH(m))
-#define SETFIELD(m, v, val)                             \
-        (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_TO_LSH(m)) & (m)))
-
 /*****************************************************************************/
 /* Exception vectors definitions                                             */
 enum {
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 2b37910248..96894ab9a8 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -849,7 +849,7 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
 
 static inline void gen_op_arith_compute_ca32(DisasContext *ctx,
                                              TCGv res, TCGv arg0, TCGv arg1,
-                                             int sub)
+                                             TCGv ca32, int sub)
 {
     TCGv t0;
 
@@ -864,13 +864,14 @@ static inline void gen_op_arith_compute_ca32(DisasContext *ctx,
         tcg_gen_xor_tl(t0, arg0, arg1);
     }
     tcg_gen_xor_tl(t0, t0, res);
-    tcg_gen_extract_tl(cpu_ca32, t0, 32, 1);
+    tcg_gen_extract_tl(ca32, t0, 32, 1);
     tcg_temp_free(t0);
 }
 
 /* Common add function */
 static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
-                                    TCGv arg2, bool add_ca, bool compute_ca,
+                                    TCGv arg2, TCGv ca, TCGv ca32,
+                                    bool add_ca, bool compute_ca,
                                     bool compute_ov, bool compute_rc0)
 {
     TCGv t0 = ret;
@@ -888,29 +889,29 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
             tcg_gen_xor_tl(t1, arg1, arg2);        /* add without carry */
             tcg_gen_add_tl(t0, arg1, arg2);
             if (add_ca) {
-                tcg_gen_add_tl(t0, t0, cpu_ca);
+                tcg_gen_add_tl(t0, t0, ca);
             }
-            tcg_gen_xor_tl(cpu_ca, t0, t1);        /* bits changed w/ carry */
+            tcg_gen_xor_tl(ca, t0, t1);        /* bits changed w/ carry */
             tcg_temp_free(t1);
-            tcg_gen_extract_tl(cpu_ca, cpu_ca, 32, 1);
+            tcg_gen_extract_tl(ca, ca, 32, 1);
             if (is_isa300(ctx)) {
-                tcg_gen_mov_tl(cpu_ca32, cpu_ca);
+                tcg_gen_mov_tl(ca32, ca);
             }
         } else {
             TCGv zero = tcg_const_tl(0);
             if (add_ca) {
-                tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, cpu_ca, zero);
-                tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, arg2, zero);
+                tcg_gen_add2_tl(t0, ca, arg1, zero, ca, zero);
+                tcg_gen_add2_tl(t0, ca, t0, ca, arg2, zero);
             } else {
-                tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, arg2, zero);
+                tcg_gen_add2_tl(t0, ca, arg1, zero, arg2, zero);
             }
-            gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 0);
+            gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, ca32, 0);
             tcg_temp_free(zero);
         }
     } else {
         tcg_gen_add_tl(t0, arg1, arg2);
         if (add_ca) {
-            tcg_gen_add_tl(t0, t0, cpu_ca);
+            tcg_gen_add_tl(t0, t0, ca);
         }
     }
 
@@ -927,40 +928,44 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
     }
 }
 /* Add functions with two operands */
-#define GEN_INT_ARITH_ADD(name, opc3, add_ca, compute_ca, compute_ov)         \
+#define GEN_INT_ARITH_ADD(name, opc3, ca, add_ca, compute_ca, compute_ov)     \
 static void glue(gen_, name)(DisasContext *ctx)                               \
 {                                                                             \
     gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)],                           \
                      cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],      \
+                     ca, glue(ca, 32),                                        \
                      add_ca, compute_ca, compute_ov, Rc(ctx->opcode));        \
 }
 /* Add functions with one operand and one immediate */
-#define GEN_INT_ARITH_ADD_CONST(name, opc3, const_val,                        \
+#define GEN_INT_ARITH_ADD_CONST(name, opc3, const_val, ca,                    \
                                 add_ca, compute_ca, compute_ov)               \
 static void glue(gen_, name)(DisasContext *ctx)                               \
 {                                                                             \
     TCGv t0 = tcg_const_tl(const_val);                                        \
     gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)],                           \
                      cpu_gpr[rA(ctx->opcode)], t0,                            \
+                     ca, glue(ca, 32),                                        \
                      add_ca, compute_ca, compute_ov, Rc(ctx->opcode));        \
     tcg_temp_free(t0);                                                        \
 }
 
 /* add  add.  addo  addo. */
-GEN_INT_ARITH_ADD(add, 0x08, 0, 0, 0)
-GEN_INT_ARITH_ADD(addo, 0x18, 0, 0, 1)
+GEN_INT_ARITH_ADD(add, 0x08, cpu_ca, 0, 0, 0)
+GEN_INT_ARITH_ADD(addo, 0x18, cpu_ca, 0, 0, 1)
 /* addc  addc.  addco  addco. */
-GEN_INT_ARITH_ADD(addc, 0x00, 0, 1, 0)
-GEN_INT_ARITH_ADD(addco, 0x10, 0, 1, 1)
+GEN_INT_ARITH_ADD(addc, 0x00, cpu_ca, 0, 1, 0)
+GEN_INT_ARITH_ADD(addco, 0x10, cpu_ca, 0, 1, 1)
 /* adde  adde.  addeo  addeo. */
-GEN_INT_ARITH_ADD(adde, 0x04, 1, 1, 0)
-GEN_INT_ARITH_ADD(addeo, 0x14, 1, 1, 1)
+GEN_INT_ARITH_ADD(adde, 0x04, cpu_ca, 1, 1, 0)
+GEN_INT_ARITH_ADD(addeo, 0x14, cpu_ca, 1, 1, 1)
 /* addme  addme.  addmeo  addmeo.  */
-GEN_INT_ARITH_ADD_CONST(addme, 0x07, -1LL, 1, 1, 0)
-GEN_INT_ARITH_ADD_CONST(addmeo, 0x17, -1LL, 1, 1, 1)
+GEN_INT_ARITH_ADD_CONST(addme, 0x07, -1LL, cpu_ca, 1, 1, 0)
+GEN_INT_ARITH_ADD_CONST(addmeo, 0x17, -1LL, cpu_ca, 1, 1, 1)
+/* addex */
+GEN_INT_ARITH_ADD(addex, 0x05, cpu_ov, 1, 1, 0);
 /* addze  addze.  addzeo  addzeo.*/
-GEN_INT_ARITH_ADD_CONST(addze, 0x06, 0, 1, 1, 0)
-GEN_INT_ARITH_ADD_CONST(addzeo, 0x16, 0, 1, 1, 1)
+GEN_INT_ARITH_ADD_CONST(addze, 0x06, 0, cpu_ca, 1, 1, 0)
+GEN_INT_ARITH_ADD_CONST(addzeo, 0x16, 0, cpu_ca, 1, 1, 1)
 /* addi */
 static void gen_addi(DisasContext *ctx)
 {
@@ -979,7 +984,7 @@ static inline void gen_op_addic(DisasContext *ctx, bool compute_rc0)
 {
     TCGv c = tcg_const_tl(SIMM(ctx->opcode));
     gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                     c, 0, 1, 0, compute_rc0);
+                     c, cpu_ca, cpu_ca32, 0, 1, 0, compute_rc0);
     tcg_temp_free(c);
 }
 
@@ -1432,13 +1437,13 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
             zero = tcg_const_tl(0);
             tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero);
             tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, inv1, zero);
-            gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, 0);
+            gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, cpu_ca32, 0);
             tcg_temp_free(zero);
             tcg_temp_free(inv1);
         } else {
             tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1);
             tcg_gen_sub_tl(t0, arg2, arg1);
-            gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 1);
+            gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, cpu_ca32, 1);
         }
     } else if (add_ca) {
         /* Since we're ignoring carry-out, we can simplify the
@@ -7087,6 +7092,7 @@ GEN_INT_ARITH_ADD(adde, 0x04, 1, 1, 0)
 GEN_INT_ARITH_ADD(addeo, 0x14, 1, 1, 1)
 GEN_INT_ARITH_ADD_CONST(addme, 0x07, -1LL, 1, 1, 0)
 GEN_INT_ARITH_ADD_CONST(addmeo, 0x17, -1LL, 1, 1, 1)
+GEN_HANDLER_E(addex, 0x1F, 0x0A, 0x05, 0x00000000, PPC_NONE, PPC2_ISA300),
 GEN_INT_ARITH_ADD_CONST(addze, 0x06, 0, 1, 1, 0)
 GEN_INT_ARITH_ADD_CONST(addzeo, 0x16, 0, 1, 1, 1)
 
diff --git a/target/ppc/translate/vmx-ops.inc.c b/target/ppc/translate/vmx-ops.inc.c
index 139f80cb24..84e05fb827 100644
--- a/target/ppc/translate/vmx-ops.inc.c
+++ b/target/ppc/translate/vmx-ops.inc.c
@@ -143,7 +143,7 @@ GEN_VXFORM(vaddsws, 0, 14),
 GEN_VXFORM_DUAL(vsububs, bcdadd, 0, 24, PPC_ALTIVEC, PPC_NONE),
 GEN_VXFORM_DUAL(vsubuhs, bcdsub, 0, 25, PPC_ALTIVEC, PPC_NONE),
 GEN_VXFORM(vsubuws, 0, 26),
-GEN_VXFORM_DUAL(vsubsbs, bcdtrunc, 0, 28, PPC_NONE, PPC2_ISA300),
+GEN_VXFORM_DUAL(vsubsbs, bcdtrunc, 0, 28, PPC_ALTIVEC, PPC2_ISA300),
 GEN_VXFORM(vsubshs, 0, 29),
 GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE),
 GEN_VXFORM_207(vadduqm, 0, 4),
diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
index 168d0cec28..03f1d34a97 100644
--- a/target/ppc/translate_init.inc.c
+++ b/target/ppc/translate_init.inc.c
@@ -9081,13 +9081,13 @@ static void init_ppc_proc(PowerPCCPU *cpu)
             nb_tlb *= 2;
         switch (env->tlb_type) {
         case TLB_6XX:
-            env->tlb.tlb6 = g_malloc0(nb_tlb * sizeof(ppc6xx_tlb_t));
+            env->tlb.tlb6 = g_new0(ppc6xx_tlb_t, nb_tlb);
             break;
         case TLB_EMB:
-            env->tlb.tlbe = g_malloc0(nb_tlb * sizeof(ppcemb_tlb_t));
+            env->tlb.tlbe = g_new0(ppcemb_tlb_t, nb_tlb);
             break;
         case TLB_MAS:
-            env->tlb.tlbm = g_malloc0(nb_tlb * sizeof(ppcmas_tlb_t));
+            env->tlb.tlbm = g_new0(ppcmas_tlb_t, nb_tlb);
             break;
         }
         /* Pre-compute some useful values */
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
index fdb87d8d82..01b45ca0ae 100644
--- a/target/riscv/fpu_helper.c
+++ b/target/riscv/fpu_helper.c
@@ -17,7 +17,6 @@
  */
 
 #include "qemu/osdep.h"
-#include <stdlib.h>
 #include "cpu.h"
 #include "qemu/host-utils.h"
 #include "exec/exec-all.h"
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
new file mode 100644
index 0000000000..60918cacb4
--- /dev/null
+++ b/tcg/riscv/tcg-target.h
@@ -0,0 +1,177 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2018 SiFive, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef RISCV_TCG_TARGET_H
+#define RISCV_TCG_TARGET_H
+
+#if __riscv_xlen == 32
+# define TCG_TARGET_REG_BITS 32
+#elif __riscv_xlen == 64
+# define TCG_TARGET_REG_BITS 64
+#endif
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
+#define TCG_TARGET_NB_REGS 32
+
+typedef enum {
+    TCG_REG_ZERO,
+    TCG_REG_RA,
+    TCG_REG_SP,
+    TCG_REG_GP,
+    TCG_REG_TP,
+    TCG_REG_T0,
+    TCG_REG_T1,
+    TCG_REG_T2,
+    TCG_REG_S0,
+    TCG_REG_S1,
+    TCG_REG_A0,
+    TCG_REG_A1,
+    TCG_REG_A2,
+    TCG_REG_A3,
+    TCG_REG_A4,
+    TCG_REG_A5,
+    TCG_REG_A6,
+    TCG_REG_A7,
+    TCG_REG_S2,
+    TCG_REG_S3,
+    TCG_REG_S4,
+    TCG_REG_S5,
+    TCG_REG_S6,
+    TCG_REG_S7,
+    TCG_REG_S8,
+    TCG_REG_S9,
+    TCG_REG_S10,
+    TCG_REG_S11,
+    TCG_REG_T3,
+    TCG_REG_T4,
+    TCG_REG_T5,
+    TCG_REG_T6,
+
+    /* aliases */
+    TCG_AREG0          = TCG_REG_S0,
+    TCG_GUEST_BASE_REG = TCG_REG_S1,
+    TCG_REG_TMP0       = TCG_REG_T6,
+    TCG_REG_TMP1       = TCG_REG_T5,
+    TCG_REG_TMP2       = TCG_REG_T4,
+} TCGReg;
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK              TCG_REG_SP
+#define TCG_TARGET_STACK_ALIGN          16
+#define TCG_TARGET_CALL_ALIGN_ARGS      1
+#define TCG_TARGET_CALL_STACK_OFFSET    0
+
+/* optional instructions */
+#define TCG_TARGET_HAS_goto_ptr         1
+#define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_div_i32          1
+#define TCG_TARGET_HAS_rem_i32          1
+#define TCG_TARGET_HAS_div2_i32         0
+#define TCG_TARGET_HAS_rot_i32          0
+#define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muluh_i32        (TCG_TARGET_REG_BITS == 32)
+#define TCG_TARGET_HAS_mulsh_i32        (TCG_TARGET_REG_BITS == 32)
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8u_i32        1
+#define TCG_TARGET_HAS_ext16u_i32       1
+#define TCG_TARGET_HAS_bswap16_i32      0
+#define TCG_TARGET_HAS_bswap32_i32      0
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_neg_i32          1
+#define TCG_TARGET_HAS_andc_i32         0
+#define TCG_TARGET_HAS_orc_i32          0
+#define TCG_TARGET_HAS_eqv_i32          0
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          0
+#define TCG_TARGET_HAS_clz_i32          0
+#define TCG_TARGET_HAS_ctz_i32          0
+#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_direct_jump      0
+#define TCG_TARGET_HAS_brcond2          1
+#define TCG_TARGET_HAS_setcond2         1
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          1
+#define TCG_TARGET_HAS_div2_i64         0
+#define TCG_TARGET_HAS_rot_i64          0
+#define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extrl_i64_i32    1
+#define TCG_TARGET_HAS_extrh_i64_i32    1
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        1
+#define TCG_TARGET_HAS_ext16u_i64       1
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_bswap16_i64      0
+#define TCG_TARGET_HAS_bswap32_i64      0
+#define TCG_TARGET_HAS_bswap64_i64      0
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_neg_i64          1
+#define TCG_TARGET_HAS_andc_i64         0
+#define TCG_TARGET_HAS_orc_i64          0
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_clz_i64          0
+#define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        1
+#define TCG_TARGET_HAS_mulsh_i64        1
+#endif
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+    __builtin___clear_cache((char *)start, (char *)stop);
+}
+
+/* not defined -- call should be eliminated at compile time */
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
+
+#define TCG_TARGET_DEFAULT_MO (0)
+
+#ifdef CONFIG_SOFTMMU
+#define TCG_TARGET_NEED_LDST_LABELS
+#endif
+#define TCG_TARGET_NEED_POOL_LABELS
+
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
+
+#endif
diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c
new file mode 100644
index 0000000000..6cf8de32b5
--- /dev/null
+++ b/tcg/riscv/tcg-target.inc.c
@@ -0,0 +1,1949 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2018 SiFive, Inc
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Based on i386/tcg-target.c and mips/tcg-target.c
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "tcg-pool.inc.c"
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+    "zero",
+    "ra",
+    "sp",
+    "gp",
+    "tp",
+    "t0",
+    "t1",
+    "t2",
+    "s0",
+    "s1",
+    "a0",
+    "a1",
+    "a2",
+    "a3",
+    "a4",
+    "a5",
+    "a6",
+    "a7",
+    "s2",
+    "s3",
+    "s4",
+    "s5",
+    "s6",
+    "s7",
+    "s8",
+    "s9",
+    "s10",
+    "s11",
+    "t3",
+    "t4",
+    "t5",
+    "t6"
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+    /* Call saved registers */
+    /* TCG_REG_S0 reservered for TCG_AREG0 */
+    TCG_REG_S1,
+    TCG_REG_S2,
+    TCG_REG_S3,
+    TCG_REG_S4,
+    TCG_REG_S5,
+    TCG_REG_S6,
+    TCG_REG_S7,
+    TCG_REG_S8,
+    TCG_REG_S9,
+    TCG_REG_S10,
+    TCG_REG_S11,
+
+    /* Call clobbered registers */
+    TCG_REG_T0,
+    TCG_REG_T1,
+    TCG_REG_T2,
+    TCG_REG_T3,
+    TCG_REG_T4,
+    TCG_REG_T5,
+    TCG_REG_T6,
+
+    /* Argument registers */
+    TCG_REG_A0,
+    TCG_REG_A1,
+    TCG_REG_A2,
+    TCG_REG_A3,
+    TCG_REG_A4,
+    TCG_REG_A5,
+    TCG_REG_A6,
+    TCG_REG_A7,
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+    TCG_REG_A0,
+    TCG_REG_A1,
+    TCG_REG_A2,
+    TCG_REG_A3,
+    TCG_REG_A4,
+    TCG_REG_A5,
+    TCG_REG_A6,
+    TCG_REG_A7,
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+    TCG_REG_A0,
+    TCG_REG_A1,
+};
+
+#define TCG_CT_CONST_ZERO  0x100
+#define TCG_CT_CONST_S12   0x200
+#define TCG_CT_CONST_N12   0x400
+#define TCG_CT_CONST_M12   0x800
+
+static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        return sextract32(val, pos, len);
+    } else {
+        return sextract64(val, pos, len);
+    }
+}
+
+/* parse target specific constraints */
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+                                           const char *ct_str, TCGType type)
+{
+    switch (*ct_str++) {
+    case 'r':
+        ct->ct |= TCG_CT_REG;
+        ct->u.regs = 0xffffffff;
+        break;
+    case 'L':
+        /* qemu_ld/qemu_st constraint */
+        ct->ct |= TCG_CT_REG;
+        ct->u.regs = 0xffffffff;
+        /* qemu_ld/qemu_st uses TCG_REG_TMP0 */
+#if defined(CONFIG_SOFTMMU)
+        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
+        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
+        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
+        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[3]);
+        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[4]);
+#endif
+        break;
+    case 'I':
+        ct->ct |= TCG_CT_CONST_S12;
+        break;
+    case 'N':
+        ct->ct |= TCG_CT_CONST_N12;
+        break;
+    case 'M':
+        ct->ct |= TCG_CT_CONST_M12;
+        break;
+    case 'Z':
+        /* we can use a zero immediate as a zero register argument. */
+        ct->ct |= TCG_CT_CONST_ZERO;
+        break;
+    default:
+        return NULL;
+    }
+    return ct_str;
+}
+
+/* test if a constant matches the constraint */
+static int tcg_target_const_match(tcg_target_long val, TCGType type,
+                                  const TCGArgConstraint *arg_ct)
+{
+    int ct = arg_ct->ct;
+    if (ct & TCG_CT_CONST) {
+        return 1;
+    }
+    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+        return 1;
+    }
+    if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
+        return 1;
+    }
+    if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) {
+        return 1;
+    }
+    if ((ct & TCG_CT_CONST_M12) && val >= -0xfff && val <= 0xfff) {
+        return 1;
+    }
+    return 0;
+}
+
+/*
+ * RISC-V Base ISA opcodes (IM)
+ */
+
+typedef enum {
+    OPC_ADD = 0x33,
+    OPC_ADDI = 0x13,
+    OPC_AND = 0x7033,
+    OPC_ANDI = 0x7013,
+    OPC_AUIPC = 0x17,
+    OPC_BEQ = 0x63,
+    OPC_BGE = 0x5063,
+    OPC_BGEU = 0x7063,
+    OPC_BLT = 0x4063,
+    OPC_BLTU = 0x6063,
+    OPC_BNE = 0x1063,
+    OPC_DIV = 0x2004033,
+    OPC_DIVU = 0x2005033,
+    OPC_JAL = 0x6f,
+    OPC_JALR = 0x67,
+    OPC_LB = 0x3,
+    OPC_LBU = 0x4003,
+    OPC_LD = 0x3003,
+    OPC_LH = 0x1003,
+    OPC_LHU = 0x5003,
+    OPC_LUI = 0x37,
+    OPC_LW = 0x2003,
+    OPC_LWU = 0x6003,
+    OPC_MUL = 0x2000033,
+    OPC_MULH = 0x2001033,
+    OPC_MULHSU = 0x2002033,
+    OPC_MULHU = 0x2003033,
+    OPC_OR = 0x6033,
+    OPC_ORI = 0x6013,
+    OPC_REM = 0x2006033,
+    OPC_REMU = 0x2007033,
+    OPC_SB = 0x23,
+    OPC_SD = 0x3023,
+    OPC_SH = 0x1023,
+    OPC_SLL = 0x1033,
+    OPC_SLLI = 0x1013,
+    OPC_SLT = 0x2033,
+    OPC_SLTI = 0x2013,
+    OPC_SLTIU = 0x3013,
+    OPC_SLTU = 0x3033,
+    OPC_SRA = 0x40005033,
+    OPC_SRAI = 0x40005013,
+    OPC_SRL = 0x5033,
+    OPC_SRLI = 0x5013,
+    OPC_SUB = 0x40000033,
+    OPC_SW = 0x2023,
+    OPC_XOR = 0x4033,
+    OPC_XORI = 0x4013,
+
+#if TCG_TARGET_REG_BITS == 64
+    OPC_ADDIW = 0x1b,
+    OPC_ADDW = 0x3b,
+    OPC_DIVUW = 0x200503b,
+    OPC_DIVW = 0x200403b,
+    OPC_MULW = 0x200003b,
+    OPC_REMUW = 0x200703b,
+    OPC_REMW = 0x200603b,
+    OPC_SLLIW = 0x101b,
+    OPC_SLLW = 0x103b,
+    OPC_SRAIW = 0x4000501b,
+    OPC_SRAW = 0x4000503b,
+    OPC_SRLIW = 0x501b,
+    OPC_SRLW = 0x503b,
+    OPC_SUBW = 0x4000003b,
+#else
+    /* Simplify code throughout by defining aliases for RV32.  */
+    OPC_ADDIW = OPC_ADDI,
+    OPC_ADDW = OPC_ADD,
+    OPC_DIVUW = OPC_DIVU,
+    OPC_DIVW = OPC_DIV,
+    OPC_MULW = OPC_MUL,
+    OPC_REMUW = OPC_REMU,
+    OPC_REMW = OPC_REM,
+    OPC_SLLIW = OPC_SLLI,
+    OPC_SLLW = OPC_SLL,
+    OPC_SRAIW = OPC_SRAI,
+    OPC_SRAW = OPC_SRA,
+    OPC_SRLIW = OPC_SRLI,
+    OPC_SRLW = OPC_SRL,
+    OPC_SUBW = OPC_SUB,
+#endif
+
+    OPC_FENCE = 0x0000000f,
+} RISCVInsn;
+
+/*
+ * RISC-V immediate and instruction encoders (excludes 16-bit RVC)
+ */
+
+/* Type-R */
+
+static int32_t encode_r(RISCVInsn opc, TCGReg rd, TCGReg rs1, TCGReg rs2)
+{
+    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20;
+}
+
+/* Type-I */
+
+static int32_t encode_imm12(uint32_t imm)
+{
+    return (imm & 0xfff) << 20;
+}
+
+static int32_t encode_i(RISCVInsn opc, TCGReg rd, TCGReg rs1, uint32_t imm)
+{
+    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | encode_imm12(imm);
+}
+
+/* Type-S */
+
+static int32_t encode_simm12(uint32_t imm)
+{
+    int32_t ret = 0;
+
+    ret |= (imm & 0xFE0) << 20;
+    ret |= (imm & 0x1F) << 7;
+
+    return ret;
+}
+
+static int32_t encode_s(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
+{
+    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_simm12(imm);
+}
+
+/* Type-SB */
+
+static int32_t encode_sbimm12(uint32_t imm)
+{
+    int32_t ret = 0;
+
+    ret |= (imm & 0x1000) << 19;
+    ret |= (imm & 0x7e0) << 20;
+    ret |= (imm & 0x1e) << 7;
+    ret |= (imm & 0x800) >> 4;
+
+    return ret;
+}
+
+static int32_t encode_sb(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
+{
+    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_sbimm12(imm);
+}
+
+/* Type-U */
+
+static int32_t encode_uimm20(uint32_t imm)
+{
+    return imm & 0xfffff000;
+}
+
+static int32_t encode_u(RISCVInsn opc, TCGReg rd, uint32_t imm)
+{
+    return opc | (rd & 0x1f) << 7 | encode_uimm20(imm);
+}
+
+/* Type-UJ */
+
+static int32_t encode_ujimm20(uint32_t imm)
+{
+    int32_t ret = 0;
+
+    ret |= (imm & 0x0007fe) << (21 - 1);
+    ret |= (imm & 0x000800) << (20 - 11);
+    ret |= (imm & 0x0ff000) << (12 - 12);
+    ret |= (imm & 0x100000) << (31 - 20);
+
+    return ret;
+}
+
+static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm)
+{
+    return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm);
+}
+
+/*
+ * RISC-V instruction emitters
+ */
+
+static void tcg_out_opc_reg(TCGContext *s, RISCVInsn opc,
+                            TCGReg rd, TCGReg rs1, TCGReg rs2)
+{
+    tcg_out32(s, encode_r(opc, rd, rs1, rs2));
+}
+
+static void tcg_out_opc_imm(TCGContext *s, RISCVInsn opc,
+                            TCGReg rd, TCGReg rs1, TCGArg imm)
+{
+    tcg_out32(s, encode_i(opc, rd, rs1, imm));
+}
+
+static void tcg_out_opc_store(TCGContext *s, RISCVInsn opc,
+                              TCGReg rs1, TCGReg rs2, uint32_t imm)
+{
+    tcg_out32(s, encode_s(opc, rs1, rs2, imm));
+}
+
+static void tcg_out_opc_branch(TCGContext *s, RISCVInsn opc,
+                               TCGReg rs1, TCGReg rs2, uint32_t imm)
+{
+    tcg_out32(s, encode_sb(opc, rs1, rs2, imm));
+}
+
+static void tcg_out_opc_upper(TCGContext *s, RISCVInsn opc,
+                              TCGReg rd, uint32_t imm)
+{
+    tcg_out32(s, encode_u(opc, rd, imm));
+}
+
+static void tcg_out_opc_jump(TCGContext *s, RISCVInsn opc,
+                             TCGReg rd, uint32_t imm)
+{
+    tcg_out32(s, encode_uj(opc, rd, imm));
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+    int i;
+    for (i = 0; i < count; ++i) {
+        p[i] = encode_i(OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0);
+    }
+}
+
+/*
+ * Relocations
+ */
+
+static bool reloc_sbimm12(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+{
+    intptr_t offset = (intptr_t)target - (intptr_t)code_ptr;
+
+    if (offset == sextreg(offset, 1, 12) << 1) {
+        code_ptr[0] |= encode_sbimm12(offset);
+        return true;
+    }
+
+    return false;
+}
+
+static bool reloc_jimm20(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+{
+    intptr_t offset = (intptr_t)target - (intptr_t)code_ptr;
+
+    if (offset == sextreg(offset, 1, 20) << 1) {
+        code_ptr[0] |= encode_ujimm20(offset);
+        return true;
+    }
+
+    return false;
+}
+
+static bool reloc_call(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+{
+    intptr_t offset = (intptr_t)target - (intptr_t)code_ptr;
+    int32_t lo = sextreg(offset, 0, 12);
+    int32_t hi = offset - lo;
+
+    if (offset == hi + lo) {
+        code_ptr[0] |= encode_uimm20(hi);
+        code_ptr[1] |= encode_imm12(lo);
+        return true;
+    }
+
+    return false;
+}
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+                        intptr_t value, intptr_t addend)
+{
+    uint32_t insn = *code_ptr;
+    intptr_t diff;
+    bool short_jmp;
+
+    tcg_debug_assert(addend == 0);
+
+    switch (type) {
+    case R_RISCV_BRANCH:
+        diff = value - (uintptr_t)code_ptr;
+        short_jmp = diff == sextreg(diff, 0, 12);
+        if (short_jmp) {
+            return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value);
+        } else {
+            /* Invert the condition */
+            insn = insn ^ (1 << 12);
+            /* Clear the offset */
+            insn &= 0x01fff07f;
+            /* Set the offset to the PC + 8 */
+            insn |= encode_sbimm12(8);
+
+            /* Move forward */
+            code_ptr[0] = insn;
+
+            /* Overwrite the NOP with jal x0,value */
+            diff = value - (uintptr_t)(code_ptr + 1);
+            insn = encode_uj(OPC_JAL, TCG_REG_ZERO, diff);
+            code_ptr[1] = insn;
+
+            return true;
+        }
+        break;
+    case R_RISCV_JAL:
+        return reloc_jimm20(code_ptr, (tcg_insn_unit *)value);
+        break;
+    case R_RISCV_CALL:
+        return reloc_call(code_ptr, (tcg_insn_unit *)value);
+        break;
+    default:
+        tcg_abort();
+    }
+}
+
+/*
+ * TCG intrinsics
+ */
+
+static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+    if (ret == arg) {
+        return;
+    }
+    switch (type) {
+    case TCG_TYPE_I32:
+    case TCG_TYPE_I64:
+        tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
+                         tcg_target_long val)
+{
+    tcg_target_long lo, hi, tmp;
+    int shift, ret;
+
+    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+        val = (int32_t)val;
+    }
+
+    lo = sextreg(val, 0, 12);
+    if (val == lo) {
+        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, lo);
+        return;
+    }
+
+    hi = val - lo;
+    if (TCG_TARGET_REG_BITS == 32 || val == (int32_t)val) {
+        tcg_out_opc_upper(s, OPC_LUI, rd, hi);
+        if (lo != 0) {
+            tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo);
+        }
+        return;
+    }
+
+    /* We can only be here if TCG_TARGET_REG_BITS != 32 */
+    tmp = tcg_pcrel_diff(s, (void *)val);
+    if (tmp == (int32_t)tmp) {
+        tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
+        tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0);
+        ret = reloc_call(s->code_ptr - 2, (tcg_insn_unit *)val);
+        tcg_debug_assert(ret == true);
+        return;
+    }
+
+    /* Look for a single 20-bit section.  */
+    shift = ctz64(val);
+    tmp = val >> shift;
+    if (tmp == sextreg(tmp, 0, 20)) {
+        tcg_out_opc_upper(s, OPC_LUI, rd, tmp << 12);
+        if (shift > 12) {
+            tcg_out_opc_imm(s, OPC_SLLI, rd, rd, shift - 12);
+        } else {
+            tcg_out_opc_imm(s, OPC_SRAI, rd, rd, 12 - shift);
+        }
+        return;
+    }
+
+    /* Look for a few high zero bits, with lots of bits set in the middle.  */
+    shift = clz64(val);
+    tmp = val << shift;
+    if (tmp == sextreg(tmp, 12, 20) << 12) {
+        tcg_out_opc_upper(s, OPC_LUI, rd, tmp);
+        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
+        return;
+    } else if (tmp == sextreg(tmp, 0, 12)) {
+        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, tmp);
+        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
+        return;
+    }
+
+    /* Drop into the constant pool.  */
+    new_pool_label(s, val, R_RISCV_CALL, s->code_ptr, 0);
+    tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
+    tcg_out_opc_imm(s, OPC_LD, rd, rd, 0);
+}
+
+static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff);
+}
+
+static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
+    tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16);
+}
+
+static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32);
+    tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32);
+}
+
+static void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24);
+    tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24);
+}
+
+static void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
+    tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16);
+}
+
+static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_imm(s, OPC_ADDIW, ret, arg, 0);
+}
+
+static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
+                         TCGReg addr, intptr_t offset)
+{
+    intptr_t imm12 = sextreg(offset, 0, 12);
+
+    if (offset != imm12) {
+        intptr_t diff = offset - (uintptr_t)s->code_ptr;
+
+        if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
+            imm12 = sextreg(diff, 0, 12);
+            tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
+            if (addr != TCG_REG_ZERO) {
+                tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr);
+            }
+        }
+        addr = TCG_REG_TMP2;
+    }
+
+    switch (opc) {
+    case OPC_SB:
+    case OPC_SH:
+    case OPC_SW:
+    case OPC_SD:
+        tcg_out_opc_store(s, opc, addr, data, imm12);
+        break;
+    case OPC_LB:
+    case OPC_LBU:
+    case OPC_LH:
+    case OPC_LHU:
+    case OPC_LW:
+    case OPC_LWU:
+    case OPC_LD:
+        tcg_out_opc_imm(s, opc, data, addr, imm12);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
+{
+    bool is32bit = (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32);
+    tcg_out_ldst(s, is32bit ? OPC_LW : OPC_LD, arg, arg1, arg2);
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
+{
+    bool is32bit = (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32);
+    tcg_out_ldst(s, is32bit ? OPC_SW : OPC_SD, arg, arg1, arg2);
+}
+
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+                        TCGReg base, intptr_t ofs)
+{
+    if (val == 0) {
+        tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
+        return true;
+    }
+    return false;
+}
+
+static void tcg_out_addsub2(TCGContext *s,
+                            TCGReg rl, TCGReg rh,
+                            TCGReg al, TCGReg ah,
+                            TCGArg bl, TCGArg bh,
+                            bool cbl, bool cbh, bool is_sub, bool is32bit)
+{
+    const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD;
+    const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI;
+    const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB;
+    TCGReg th = TCG_REG_TMP1;
+
+    /* If we have a negative constant such that negating it would
+       make the high part zero, we can (usually) eliminate one insn.  */
+    if (cbl && cbh && bh == -1 && bl != 0) {
+        bl = -bl;
+        bh = 0;
+        is_sub = !is_sub;
+    }
+
+    /* By operating on the high part first, we get to use the final
+       carry operation to move back from the temporary.  */
+    if (!cbh) {
+        tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh);
+    } else if (bh != 0 || ah == rl) {
+        tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh));
+    } else {
+        th = ah;
+    }
+
+    /* Note that tcg optimization should eliminate the bl == 0 case.  */
+    if (is_sub) {
+        if (cbl) {
+            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl);
+            tcg_out_opc_imm(s, opc_addi, rl, al, -bl);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl);
+            tcg_out_opc_reg(s, opc_sub, rl, al, bl);
+        }
+        tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0);
+    } else {
+        if (cbl) {
+            tcg_out_opc_imm(s, opc_addi, rl, al, bl);
+            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl);
+        } else if (rl == al && rl == bl) {
+            tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0);
+            tcg_out_opc_reg(s, opc_addi, rl, al, bl);
+        } else {
+            tcg_out_opc_reg(s, opc_add, rl, al, bl);
+            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0,
+                            rl, (rl == bl ? al : bl));
+        }
+        tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0);
+    }
+}
+
+static const struct {
+    RISCVInsn op;
+    bool swap;
+} tcg_brcond_to_riscv[] = {
+    [TCG_COND_EQ] =  { OPC_BEQ,  false },
+    [TCG_COND_NE] =  { OPC_BNE,  false },
+    [TCG_COND_LT] =  { OPC_BLT,  false },
+    [TCG_COND_GE] =  { OPC_BGE,  false },
+    [TCG_COND_LE] =  { OPC_BGE,  true  },
+    [TCG_COND_GT] =  { OPC_BLT,  true  },
+    [TCG_COND_LTU] = { OPC_BLTU, false },
+    [TCG_COND_GEU] = { OPC_BGEU, false },
+    [TCG_COND_LEU] = { OPC_BGEU, true  },
+    [TCG_COND_GTU] = { OPC_BLTU, true  }
+};
+
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+                           TCGReg arg2, TCGLabel *l)
+{
+    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
+
+    tcg_debug_assert(op != 0);
+
+    if (tcg_brcond_to_riscv[cond].swap) {
+        TCGReg t = arg1;
+        arg1 = arg2;
+        arg2 = t;
+    }
+
+    if (l->has_value) {
+        intptr_t diff = tcg_pcrel_diff(s, l->u.value_ptr);
+        if (diff == sextreg(diff, 0, 12)) {
+            tcg_out_opc_branch(s, op, arg1, arg2, diff);
+        } else {
+            /* Invert the conditional branch.  */
+            tcg_out_opc_branch(s, op ^ (1 << 12), arg1, arg2, 8);
+            tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, diff - 4);
+        }
+    } else {
+        tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0);
+        tcg_out_opc_branch(s, op, arg1, arg2, 0);
+        /* NOP to allow patching later */
+        tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0);
+    }
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGReg arg1, TCGReg arg2)
+{
+    switch (cond) {
+    case TCG_COND_EQ:
+        tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
+        tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1);
+        break;
+    case TCG_COND_NE:
+        tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
+        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret);
+        break;
+    case TCG_COND_LT:
+        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
+        break;
+    case TCG_COND_GE:
+        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
+        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+        break;
+    case TCG_COND_LE:
+        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
+        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+        break;
+    case TCG_COND_GT:
+        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
+        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
+        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
+        break;
+    default:
+         g_assert_not_reached();
+         break;
+     }
+}
+
+static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
+                            TCGReg bl, TCGReg bh, TCGLabel *l)
+{
+    /* todo */
+    g_assert_not_reached();
+}
+
+static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
+                             TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh)
+{
+    /* todo */
+    g_assert_not_reached();
+}
+
+static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
+{
+    ptrdiff_t offset = tcg_pcrel_diff(s, target);
+    tcg_debug_assert(offset == sextreg(offset, 1, 20) << 1);
+    tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset);
+}
+
+static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
+{
+    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
+    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
+    int ret;
+
+    if (offset == sextreg(offset, 1, 20) << 1) {
+        /* short jump: -2097150 to 2097152 */
+        tcg_out_opc_jump(s, OPC_JAL, link, offset);
+    } else if (TCG_TARGET_REG_BITS == 32 ||
+        offset == sextreg(offset, 1, 31) << 1) {
+        /* long jump: -2147483646 to 2147483648 */
+        tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
+        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
+        ret = reloc_call(s->code_ptr - 2, arg);\
+        tcg_debug_assert(ret == true);
+    } else if (TCG_TARGET_REG_BITS == 64) {
+        /* far jump: 64-bit */
+        tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12);
+        tcg_target_long base = (tcg_target_long)arg - imm;
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
+        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);
+    } else {
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
+{
+    tcg_out_call_int(s, arg, false);
+}
+
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    tcg_insn_unit insn = OPC_FENCE;
+
+    if (a0 & TCG_MO_LD_LD) {
+        insn |= 0x02200000;
+    }
+    if (a0 & TCG_MO_ST_LD) {
+        insn |= 0x01200000;
+    }
+    if (a0 & TCG_MO_LD_ST) {
+        insn |= 0x02100000;
+    }
+    if (a0 & TCG_MO_ST_ST) {
+        insn |= 0x02200000;
+    }
+    tcg_out32(s, insn);
+}
+
+/*
+ * Load/store and TLB
+ */
+
+#if defined(CONFIG_SOFTMMU)
+#include "tcg-ldst.inc.c"
+
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ *                                     TCGMemOpIdx oi, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[16] = {
+    [MO_UB]   = helper_ret_ldub_mmu,
+    [MO_SB]   = helper_ret_ldsb_mmu,
+    [MO_LEUW] = helper_le_lduw_mmu,
+    [MO_LESW] = helper_le_ldsw_mmu,
+    [MO_LEUL] = helper_le_ldul_mmu,
+#if TCG_TARGET_REG_BITS == 64
+    [MO_LESL] = helper_le_ldsl_mmu,
+#endif
+    [MO_LEQ]  = helper_le_ldq_mmu,
+    [MO_BEUW] = helper_be_lduw_mmu,
+    [MO_BESW] = helper_be_ldsw_mmu,
+    [MO_BEUL] = helper_be_ldul_mmu,
+#if TCG_TARGET_REG_BITS == 64
+    [MO_BESL] = helper_be_ldsl_mmu,
+#endif
+    [MO_BEQ]  = helper_be_ldq_mmu,
+};
+
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ *                                     uintxx_t val, TCGMemOpIdx oi,
+ *                                     uintptr_t ra)
+ */
+static void * const qemu_st_helpers[16] = {
+    [MO_UB]   = helper_ret_stb_mmu,
+    [MO_LEUW] = helper_le_stw_mmu,
+    [MO_LEUL] = helper_le_stl_mmu,
+    [MO_LEQ]  = helper_le_stq_mmu,
+    [MO_BEUW] = helper_be_stw_mmu,
+    [MO_BEUL] = helper_be_stl_mmu,
+    [MO_BEQ]  = helper_be_stq_mmu,
+};
+
+static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
+                             TCGReg addrh, TCGMemOpIdx oi,
+                             tcg_insn_unit **label_ptr, bool is_load)
+{
+    TCGMemOp opc = get_memop(oi);
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
+    target_ulong mask;
+    int mem_index = get_mmuidx(oi);
+    int cmp_off
+        = (is_load
+           ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+           : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+    RISCVInsn load_cmp_op = (TARGET_LONG_BITS == 64 ? OPC_LD :
+                             TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW);
+    RISCVInsn load_add_op = TCG_TARGET_REG_BITS == 64 ? OPC_LD : OPC_LW;
+    TCGReg base = TCG_AREG0;
+
+    /* We don't support oversize guests */
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+        g_assert_not_reached();
+    }
+
+    /* We don't support unaligned accesses. */
+    if (a_bits < s_bits) {
+        a_bits = s_bits;
+    }
+    mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+
+
+    /* Compensate for very large offsets.  */
+    if (add_off >= 0x1000) {
+        int adj;
+        base = TCG_REG_TMP2;
+        if (cmp_off <= 2 * 0xfff) {
+            adj = 0xfff;
+            tcg_out_opc_imm(s, OPC_ADDI, base, TCG_AREG0, adj);
+        } else {
+            adj = cmp_off - sextreg(cmp_off, 0, 12);
+            tcg_debug_assert(add_off - adj >= -0x1000
+                             && add_off - adj < 0x1000);
+
+            tcg_out_opc_upper(s, OPC_LUI, base, adj);
+            tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_AREG0);
+        }
+        add_off -= adj;
+        cmp_off -= adj;
+    }
+
+    /* Extract the page index.  */
+    if (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS < 12) {
+        tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, addrl,
+                        TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+        tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP0, TCG_REG_TMP0,
+                        MAKE_64BIT_MASK(CPU_TLB_ENTRY_BITS, CPU_TLB_BITS));
+    } else if (TARGET_PAGE_BITS >= 12) {
+        tcg_out_opc_upper(s, OPC_LUI, TCG_REG_TMP0,
+                          MAKE_64BIT_MASK(TARGET_PAGE_BITS, CPU_TLB_BITS));
+        tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP0, TCG_REG_TMP0, addrl);
+        tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, TCG_REG_TMP0,
+                        CPU_TLB_BITS - CPU_TLB_ENTRY_BITS);
+    } else {
+        tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, addrl, TARGET_PAGE_BITS);
+        tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP0, TCG_REG_TMP0,
+                        MAKE_64BIT_MASK(0, CPU_TLB_BITS));
+        tcg_out_opc_imm(s, OPC_SLLI, TCG_REG_TMP0, TCG_REG_TMP0,
+                        CPU_TLB_ENTRY_BITS);
+    }
+
+    /* Add that to the base address to index the tlb.  */
+    tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, base, TCG_REG_TMP0);
+    base = TCG_REG_TMP2;
+
+    /* Load the tlb comparator and the addend.  */
+    tcg_out_ldst(s, load_cmp_op, TCG_REG_TMP0, base, cmp_off);
+    tcg_out_ldst(s, load_add_op, TCG_REG_TMP2, base, add_off);
+
+    /* Clear the non-page, non-alignment bits from the address.  */
+    if (mask == sextreg(mask, 0, 12)) {
+        tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addrl, mask);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, mask);
+        tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
+     }
+
+    /* Compare masked address with the TLB entry. */
+    label_ptr[0] = s->code_ptr;
+    tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
+    /* NOP to allow patching later */
+    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0);
+    /* TODO: Move this out of line
+     * see:
+     *   https://lists.nongnu.org/archive/html/qemu-devel/2018-11/msg02234.html
+     */
+
+    /* TLB Hit - translate address using addend.  */
+    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+        tcg_out_ext32u(s, TCG_REG_TMP0, addrl);
+        addrl = TCG_REG_TMP0;
+    }
+    tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
+}
+
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
+                                TCGType ext,
+                                TCGReg datalo, TCGReg datahi,
+                                TCGReg addrlo, TCGReg addrhi,
+                                void *raddr, tcg_insn_unit **label_ptr)
+{
+    TCGLabelQemuLdst *label = new_ldst_label(s);
+
+    label->is_ld = is_ld;
+    label->oi = oi;
+    label->type = ext;
+    label->datalo_reg = datalo;
+    label->datahi_reg = datahi;
+    label->addrlo_reg = addrlo;
+    label->addrhi_reg = addrhi;
+    label->raddr = raddr;
+    label->label_ptr[0] = label_ptr[0];
+}
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    TCGMemOpIdx oi = l->oi;
+    TCGMemOp opc = get_memop(oi);
+    TCGReg a0 = tcg_target_call_iarg_regs[0];
+    TCGReg a1 = tcg_target_call_iarg_regs[1];
+    TCGReg a2 = tcg_target_call_iarg_regs[2];
+    TCGReg a3 = tcg_target_call_iarg_regs[3];
+
+    /* We don't support oversize guests */
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+        g_assert_not_reached();
+    }
+
+    /* resolve label address */
+    patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0);
+
+    /* call load helper */
+    tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
+    tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
+    tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
+
+    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
+    tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
+
+    tcg_out_goto(s, l->raddr);
+}
+
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    TCGMemOpIdx oi = l->oi;
+    TCGMemOp opc = get_memop(oi);
+    TCGMemOp s_bits = opc & MO_SIZE;
+    TCGReg a0 = tcg_target_call_iarg_regs[0];
+    TCGReg a1 = tcg_target_call_iarg_regs[1];
+    TCGReg a2 = tcg_target_call_iarg_regs[2];
+    TCGReg a3 = tcg_target_call_iarg_regs[3];
+    TCGReg a4 = tcg_target_call_iarg_regs[4];
+
+    /* We don't support oversize guests */
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+        g_assert_not_reached();
+    }
+
+    /* resolve label address */
+    patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0);
+
+    /* call store helper */
+    tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
+    tcg_out_mov(s, TCG_TYPE_PTR, a2, l->datalo_reg);
+    switch (s_bits) {
+    case MO_8:
+        tcg_out_ext8u(s, a2, a2);
+        break;
+    case MO_16:
+        tcg_out_ext16u(s, a2, a2);
+        break;
+    default:
+        break;
+    }
+    tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
+    tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
+
+    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
+
+    tcg_out_goto(s, l->raddr);
+}
+#endif /* CONFIG_SOFTMMU */
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
+                                   TCGReg base, TCGMemOp opc, bool is_64)
+{
+    const TCGMemOp bswap = opc & MO_BSWAP;
+
+    /* We don't yet handle byteswapping, assert */
+    g_assert(!bswap);
+
+    switch (opc & (MO_SSIZE)) {
+    case MO_UB:
+        tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
+        break;
+    case MO_SB:
+        tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
+        break;
+    case MO_UW:
+        tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
+        break;
+    case MO_SW:
+        tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
+        break;
+    case MO_UL:
+        if (TCG_TARGET_REG_BITS == 64 && is_64) {
+            tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+            break;
+        }
+        /* FALLTHRU */
+    case MO_SL:
+        tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+        break;
+    case MO_Q:
+        /* Prefer to load from offset 0 first, but allow for overlap.  */
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+        } else if (lo != base) {
+            tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+            tcg_out_opc_imm(s, OPC_LW, hi, base, 4);
+        } else {
+            tcg_out_opc_imm(s, OPC_LW, hi, base, 4);
+            tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
+{
+    TCGReg addr_regl, addr_regh __attribute__((unused));
+    TCGReg data_regl, data_regh;
+    TCGMemOpIdx oi;
+    TCGMemOp opc;
+#if defined(CONFIG_SOFTMMU)
+    tcg_insn_unit *label_ptr[1];
+#endif
+    TCGReg base = TCG_REG_TMP0;
+
+    data_regl = *args++;
+    data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
+    addr_regl = *args++;
+    addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
+    oi = *args++;
+    opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+    tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
+    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+    add_qemu_ldst_label(s, 1, oi,
+                        (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+                        data_regl, data_regh, addr_regl, addr_regh,
+                        s->code_ptr, label_ptr);
+#else
+    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+        tcg_out_ext32u(s, base, addr_regl);
+        addr_regl = base;
+    }
+
+    if (guest_base == 0) {
+        tcg_out_opc_reg(s, OPC_ADD, base, addr_regl, TCG_REG_ZERO);
+    } else {
+        tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
+    }
+    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+#endif
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
+                                   TCGReg base, TCGMemOp opc)
+{
+    const TCGMemOp bswap = opc & MO_BSWAP;
+
+    /* We don't yet handle byteswapping, assert */
+    g_assert(!bswap);
+
+    switch (opc & (MO_SSIZE)) {
+    case MO_8:
+        tcg_out_opc_store(s, OPC_SB, base, lo, 0);
+        break;
+    case MO_16:
+        tcg_out_opc_store(s, OPC_SH, base, lo, 0);
+        break;
+    case MO_32:
+        tcg_out_opc_store(s, OPC_SW, base, lo, 0);
+        break;
+    case MO_64:
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_opc_store(s, OPC_SD, base, lo, 0);
+        } else {
+            tcg_out_opc_store(s, OPC_SW, base, lo, 0);
+            tcg_out_opc_store(s, OPC_SW, base, hi, 4);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
+{
+    TCGReg addr_regl, addr_regh __attribute__((unused));
+    TCGReg data_regl, data_regh;
+    TCGMemOpIdx oi;
+    TCGMemOp opc;
+#if defined(CONFIG_SOFTMMU)
+    tcg_insn_unit *label_ptr[1];
+#endif
+    TCGReg base = TCG_REG_TMP0;
+
+    data_regl = *args++;
+    data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
+    addr_regl = *args++;
+    addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
+    oi = *args++;
+    opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+    tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
+    tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+    add_qemu_ldst_label(s, 0, oi,
+                        (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+                        data_regl, data_regh, addr_regl, addr_regh,
+                        s->code_ptr, label_ptr);
+#else
+    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+        tcg_out_ext32u(s, base, addr_regl);
+        addr_regl = base;
+    }
+
+    if (guest_base == 0) {
+        tcg_out_opc_reg(s, OPC_ADD, base, addr_regl, TCG_REG_ZERO);
+    } else {
+        tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
+    }
+    tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+#endif
+}
+
+static tcg_insn_unit *tb_ret_addr;
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                       const TCGArg *args, const int *const_args)
+{
+    TCGArg a0 = args[0];
+    TCGArg a1 = args[1];
+    TCGArg a2 = args[2];
+    int c2 = const_args[2];
+
+    switch (opc) {
+    case INDEX_op_exit_tb:
+        /* Reuse the zeroing that exists for goto_ptr.  */
+        if (a0 == 0) {
+            tcg_out_call_int(s, s->code_gen_epilogue, true);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
+            tcg_out_call_int(s, tb_ret_addr, true);
+        }
+        break;
+
+    case INDEX_op_goto_tb:
+        assert(s->tb_jmp_insn_offset == 0);
+        /* indirect jump method */
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
+                   (uintptr_t)(s->tb_jmp_target_addr + a0));
+        tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+        set_jmp_reset_offset(s, a0);
+        break;
+
+    case INDEX_op_goto_ptr:
+        tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0);
+        break;
+
+    case INDEX_op_br:
+        tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0);
+        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
+        break;
+
+    case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8u_i64:
+        tcg_out_ldst(s, OPC_LBU, a0, a1, a2);
+        break;
+    case INDEX_op_ld8s_i32:
+    case INDEX_op_ld8s_i64:
+        tcg_out_ldst(s, OPC_LB, a0, a1, a2);
+        break;
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16u_i64:
+        tcg_out_ldst(s, OPC_LHU, a0, a1, a2);
+        break;
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld16s_i64:
+        tcg_out_ldst(s, OPC_LH, a0, a1, a2);
+        break;
+    case INDEX_op_ld32u_i64:
+        tcg_out_ldst(s, OPC_LWU, a0, a1, a2);
+        break;
+    case INDEX_op_ld_i32:
+    case INDEX_op_ld32s_i64:
+        tcg_out_ldst(s, OPC_LW, a0, a1, a2);
+        break;
+    case INDEX_op_ld_i64:
+        tcg_out_ldst(s, OPC_LD, a0, a1, a2);
+        break;
+
+    case INDEX_op_st8_i32:
+    case INDEX_op_st8_i64:
+        tcg_out_ldst(s, OPC_SB, a0, a1, a2);
+        break;
+    case INDEX_op_st16_i32:
+    case INDEX_op_st16_i64:
+        tcg_out_ldst(s, OPC_SH, a0, a1, a2);
+        break;
+    case INDEX_op_st_i32:
+    case INDEX_op_st32_i64:
+        tcg_out_ldst(s, OPC_SW, a0, a1, a2);
+        break;
+    case INDEX_op_st_i64:
+        tcg_out_ldst(s, OPC_SD, a0, a1, a2);
+        break;
+
+    case INDEX_op_add_i32:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_add_i64:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_sub_i32:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_sub_i64:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_and_i32:
+    case INDEX_op_and_i64:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_AND, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_or_i32:
+    case INDEX_op_or_i64:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_OR, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_xor_i32:
+    case INDEX_op_xor_i64:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_not_i32:
+    case INDEX_op_not_i64:
+        tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1);
+        break;
+
+    case INDEX_op_neg_i32:
+        tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1);
+        break;
+    case INDEX_op_neg_i64:
+        tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1);
+        break;
+
+    case INDEX_op_mul_i32:
+        tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2);
+        break;
+    case INDEX_op_mul_i64:
+        tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2);
+        break;
+
+    case INDEX_op_div_i32:
+        tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2);
+        break;
+    case INDEX_op_div_i64:
+        tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2);
+        break;
+
+    case INDEX_op_divu_i32:
+        tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2);
+        break;
+    case INDEX_op_divu_i64:
+        tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2);
+        break;
+
+    case INDEX_op_rem_i32:
+        tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2);
+        break;
+    case INDEX_op_rem_i64:
+        tcg_out_opc_reg(s, OPC_REM, a0, a1, a2);
+        break;
+
+    case INDEX_op_remu_i32:
+        tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2);
+        break;
+    case INDEX_op_remu_i64:
+        tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2);
+        break;
+
+    case INDEX_op_shl_i32:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_shl_i64:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_shr_i32:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_shr_i64:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_sar_i32:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_sar_i64:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2);
+        } else {
+            tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_add2_i32:
+        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+                        const_args[4], const_args[5], false, true);
+        break;
+    case INDEX_op_add2_i64:
+        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+                        const_args[4], const_args[5], false, false);
+        break;
+    case INDEX_op_sub2_i32:
+        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+                        const_args[4], const_args[5], true, true);
+        break;
+    case INDEX_op_sub2_i64:
+        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+                        const_args[4], const_args[5], true, false);
+        break;
+
+    case INDEX_op_brcond_i32:
+    case INDEX_op_brcond_i64:
+        tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
+        break;
+    case INDEX_op_brcond2_i32:
+        tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5]));
+        break;
+
+    case INDEX_op_setcond_i32:
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], a0, a1, a2);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
+        break;
+
+    case INDEX_op_qemu_ld_i32:
+        tcg_out_qemu_ld(s, args, false);
+        break;
+    case INDEX_op_qemu_ld_i64:
+        tcg_out_qemu_ld(s, args, true);
+        break;
+    case INDEX_op_qemu_st_i32:
+        tcg_out_qemu_st(s, args, false);
+        break;
+    case INDEX_op_qemu_st_i64:
+        tcg_out_qemu_st(s, args, true);
+        break;
+
+    case INDEX_op_ext8u_i32:
+    case INDEX_op_ext8u_i64:
+        tcg_out_ext8u(s, a0, a1);
+        break;
+
+    case INDEX_op_ext16u_i32:
+    case INDEX_op_ext16u_i64:
+        tcg_out_ext16u(s, a0, a1);
+        break;
+
+    case INDEX_op_ext32u_i64:
+    case INDEX_op_extu_i32_i64:
+        tcg_out_ext32u(s, a0, a1);
+        break;
+
+    case INDEX_op_ext8s_i32:
+    case INDEX_op_ext8s_i64:
+        tcg_out_ext8s(s, a0, a1);
+        break;
+
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+        tcg_out_ext16s(s, a0, a1);
+        break;
+
+    case INDEX_op_ext32s_i64:
+    case INDEX_op_extrl_i64_i32:
+    case INDEX_op_ext_i32_i64:
+        tcg_out_ext32s(s, a0, a1);
+        break;
+
+    case INDEX_op_extrh_i64_i32:
+        tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32);
+        break;
+
+    case INDEX_op_mulsh_i32:
+    case INDEX_op_mulsh_i64:
+        tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2);
+        break;
+
+    case INDEX_op_muluh_i32:
+    case INDEX_op_muluh_i64:
+        tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2);
+        break;
+
+    case INDEX_op_mb:
+        tcg_out_mb(s, a0);
+        break;
+
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_movi_i64:
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+    static const TCGTargetOpDef r
+        = { .args_ct_str = { "r" } };
+    static const TCGTargetOpDef r_r
+        = { .args_ct_str = { "r", "r" } };
+    static const TCGTargetOpDef rZ_r
+        = { .args_ct_str = { "rZ", "r" } };
+    static const TCGTargetOpDef rZ_rZ
+        = { .args_ct_str = { "rZ", "rZ" } };
+    static const TCGTargetOpDef rZ_rZ_rZ_rZ
+        = { .args_ct_str = { "rZ", "rZ", "rZ", "rZ" } };
+    static const TCGTargetOpDef r_r_ri
+        = { .args_ct_str = { "r", "r", "ri" } };
+    static const TCGTargetOpDef r_r_rI
+        = { .args_ct_str = { "r", "r", "rI" } };
+    static const TCGTargetOpDef r_rZ_rN
+        = { .args_ct_str = { "r", "rZ", "rN" } };
+    static const TCGTargetOpDef r_rZ_rZ
+        = { .args_ct_str = { "r", "rZ", "rZ" } };
+    static const TCGTargetOpDef r_rZ_rZ_rZ_rZ
+        = { .args_ct_str = { "r", "rZ", "rZ", "rZ", "rZ" } };
+    static const TCGTargetOpDef r_L
+        = { .args_ct_str = { "r", "L" } };
+    static const TCGTargetOpDef r_r_L
+        = { .args_ct_str = { "r", "r", "L" } };
+    static const TCGTargetOpDef r_L_L
+        = { .args_ct_str = { "r", "L", "L" } };
+    static const TCGTargetOpDef r_r_L_L
+        = { .args_ct_str = { "r", "r", "L", "L" } };
+    static const TCGTargetOpDef LZ_L
+        = { .args_ct_str = { "LZ", "L" } };
+    static const TCGTargetOpDef LZ_L_L
+        = { .args_ct_str = { "LZ", "L", "L" } };
+    static const TCGTargetOpDef LZ_LZ_L
+        = { .args_ct_str = { "LZ", "LZ", "L" } };
+    static const TCGTargetOpDef LZ_LZ_L_L
+        = { .args_ct_str = { "LZ", "LZ", "L", "L" } };
+    static const TCGTargetOpDef r_r_rZ_rZ_rM_rM
+        = { .args_ct_str = { "r", "r", "rZ", "rZ", "rM", "rM" } };
+
+    switch (op) {
+    case INDEX_op_goto_ptr:
+        return &r;
+
+    case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8s_i32:
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld_i32:
+    case INDEX_op_not_i32:
+    case INDEX_op_neg_i32:
+    case INDEX_op_ld8u_i64:
+    case INDEX_op_ld8s_i64:
+    case INDEX_op_ld16u_i64:
+    case INDEX_op_ld16s_i64:
+    case INDEX_op_ld32s_i64:
+    case INDEX_op_ld32u_i64:
+    case INDEX_op_ld_i64:
+    case INDEX_op_not_i64:
+    case INDEX_op_neg_i64:
+    case INDEX_op_ext8u_i32:
+    case INDEX_op_ext8u_i64:
+    case INDEX_op_ext16u_i32:
+    case INDEX_op_ext16u_i64:
+    case INDEX_op_ext32u_i64:
+    case INDEX_op_extu_i32_i64:
+    case INDEX_op_ext8s_i32:
+    case INDEX_op_ext8s_i64:
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+    case INDEX_op_ext32s_i64:
+    case INDEX_op_extrl_i64_i32:
+    case INDEX_op_extrh_i64_i32:
+    case INDEX_op_ext_i32_i64:
+        return &r_r;
+
+    case INDEX_op_st8_i32:
+    case INDEX_op_st16_i32:
+    case INDEX_op_st_i32:
+    case INDEX_op_st8_i64:
+    case INDEX_op_st16_i64:
+    case INDEX_op_st32_i64:
+    case INDEX_op_st_i64:
+        return &rZ_r;
+
+    case INDEX_op_add_i32:
+    case INDEX_op_and_i32:
+    case INDEX_op_or_i32:
+    case INDEX_op_xor_i32:
+    case INDEX_op_add_i64:
+    case INDEX_op_and_i64:
+    case INDEX_op_or_i64:
+    case INDEX_op_xor_i64:
+        return &r_r_rI;
+
+    case INDEX_op_sub_i32:
+    case INDEX_op_sub_i64:
+        return &r_rZ_rN;
+
+    case INDEX_op_mul_i32:
+    case INDEX_op_mulsh_i32:
+    case INDEX_op_muluh_i32:
+    case INDEX_op_div_i32:
+    case INDEX_op_divu_i32:
+    case INDEX_op_rem_i32:
+    case INDEX_op_remu_i32:
+    case INDEX_op_setcond_i32:
+    case INDEX_op_mul_i64:
+    case INDEX_op_mulsh_i64:
+    case INDEX_op_muluh_i64:
+    case INDEX_op_div_i64:
+    case INDEX_op_divu_i64:
+    case INDEX_op_rem_i64:
+    case INDEX_op_remu_i64:
+    case INDEX_op_setcond_i64:
+        return &r_rZ_rZ;
+
+    case INDEX_op_shl_i32:
+    case INDEX_op_shr_i32:
+    case INDEX_op_sar_i32:
+    case INDEX_op_shl_i64:
+    case INDEX_op_shr_i64:
+    case INDEX_op_sar_i64:
+        return &r_r_ri;
+
+    case INDEX_op_brcond_i32:
+    case INDEX_op_brcond_i64:
+        return &rZ_rZ;
+
+    case INDEX_op_add2_i32:
+    case INDEX_op_add2_i64:
+    case INDEX_op_sub2_i32:
+    case INDEX_op_sub2_i64:
+        return &r_r_rZ_rZ_rM_rM;
+
+    case INDEX_op_brcond2_i32:
+        return &rZ_rZ_rZ_rZ;
+
+    case INDEX_op_setcond2_i32:
+        return &r_rZ_rZ_rZ_rZ;
+
+    case INDEX_op_qemu_ld_i32:
+        return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
+    case INDEX_op_qemu_st_i32:
+        return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &LZ_L : &LZ_L_L;
+    case INDEX_op_qemu_ld_i64:
+        return TCG_TARGET_REG_BITS == 64 ? &r_L
+               : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
+               : &r_r_L_L;
+    case INDEX_op_qemu_st_i64:
+        return TCG_TARGET_REG_BITS == 64 ? &LZ_L
+               : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &LZ_LZ_L
+               : &LZ_LZ_L_L;
+
+    default:
+        return NULL;
+    }
+}
+
+static const int tcg_target_callee_save_regs[] = {
+    TCG_REG_S0,       /* used for the global env (TCG_AREG0) */
+    TCG_REG_S1,
+    TCG_REG_S2,
+    TCG_REG_S3,
+    TCG_REG_S4,
+    TCG_REG_S5,
+    TCG_REG_S6,
+    TCG_REG_S7,
+    TCG_REG_S8,
+    TCG_REG_S9,
+    TCG_REG_S10,
+    TCG_REG_S11,
+    TCG_REG_RA,       /* should be last for ABI compliance */
+};
+
+/* Stack frame parameters.  */
+#define REG_SIZE   (TCG_TARGET_REG_BITS / 8)
+#define SAVE_SIZE  ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
+#define TEMP_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
+#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
+                     + TCG_TARGET_STACK_ALIGN - 1) \
+                    & -TCG_TARGET_STACK_ALIGN)
+#define SAVE_OFS   (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
+
+/* We're expecting to be able to use an immediate for frame allocation.  */
+QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+    int i;
+
+    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
+
+    /* TB prologue */
+    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
+    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+    }
+
+#if !defined(CONFIG_SOFTMMU)
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
+    tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+#endif
+
+    /* Call generated code */
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
+
+    /* Return path for goto_ptr. Set return value to 0 */
+    s->code_gen_epilogue = s->code_ptr;
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
+
+    /* TB epilogue */
+    tb_ret_addr = s->code_ptr;
+    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+    }
+
+    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
+    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
+    }
+
+    tcg_target_call_clobber_regs = -1u;
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S10);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S11);
+
+    s->reserved_regs = 0;
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
+}
+
+typedef struct {
+    DebugFrameHeader h;
+    uint8_t fde_def_cfa[4];
+    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_RISCV
+
+static const DebugFrame debug_frame = {
+    .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
+    .h.cie.id = -1,
+    .h.cie.version = 1,
+    .h.cie.code_align = 1,
+    .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
+    .h.cie.return_column = TCG_REG_RA,
+
+    /* Total FDE size does not include the "len" member.  */
+    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+    .fde_def_cfa = {
+        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
+        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
+        (FRAME_SIZE >> 7)
+    },
+    .fde_reg_ofs = {
+        0x80 + 9,  12,                  /* DW_CFA_offset, s1,  -96 */
+        0x80 + 18, 11,                  /* DW_CFA_offset, s2,  -88 */
+        0x80 + 19, 10,                  /* DW_CFA_offset, s3,  -80 */
+        0x80 + 20, 9,                   /* DW_CFA_offset, s4,  -72 */
+        0x80 + 21, 8,                   /* DW_CFA_offset, s5,  -64 */
+        0x80 + 22, 7,                   /* DW_CFA_offset, s6,  -56 */
+        0x80 + 23, 6,                   /* DW_CFA_offset, s7,  -48 */
+        0x80 + 24, 5,                   /* DW_CFA_offset, s8,  -40 */
+        0x80 + 25, 4,                   /* DW_CFA_offset, s9,  -32 */
+        0x80 + 26, 3,                   /* DW_CFA_offset, s10, -24 */
+        0x80 + 27, 2,                   /* DW_CFA_offset, s11, -16 */
+        0x80 + 1 , 1,                   /* DW_CFA_offset, ra,  -8 */
+    }
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 38652db32c..1bd7ef24af 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -240,6 +240,7 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
     if (cond == TCG_COND_ALWAYS) {
         tcg_gen_br(l);
     } else if (cond != TCG_COND_NEVER) {
+        l->refs++;
         tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
     }
 }
@@ -1405,6 +1406,7 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
     if (cond == TCG_COND_ALWAYS) {
         tcg_gen_br(l);
     } else if (cond != TCG_COND_NEVER) {
+        l->refs++;
         if (TCG_TARGET_REG_BITS == 32) {
             tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
                               TCGV_HIGH(arg1), TCGV_LOW(arg2),
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index db4e9188f4..7007ec0d4d 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -260,6 +260,7 @@ static inline void gen_set_label(TCGLabel *l)
 
 static inline void tcg_gen_br(TCGLabel *l)
 {
+    l->refs++;
     tcg_gen_op1(INDEX_op_br, label_arg(l));
 }
 
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index e3a43aabb6..7a8a3edb5b 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -191,9 +191,10 @@ DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
 /* QEMU specific */
 DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
     TCG_OPF_NOT_PRESENT)
-DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
-DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
-DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr))
+DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
+DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
+DEF(goto_ptr, 0, 1, 0,
+    TCG_OPF_BB_EXIT | TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr))
 
 DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 963cb37892..c54b119020 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1887,7 +1887,21 @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
 };
 
-void tcg_dump_ops(TCGContext *s)
+static inline bool tcg_regset_single(TCGRegSet d)
+{
+    return (d & (d - 1)) == 0;
+}
+
+static inline TCGReg tcg_regset_first(TCGRegSet d)
+{
+    if (TCG_TARGET_NB_REGS <= 32) {
+        return ctz32(d);
+    } else {
+        return ctz64(d);
+    }
+}
+
+static void tcg_dump_ops(TCGContext *s, bool have_prefs)
 {
     char buf[128];
     TCGOp *op;
@@ -1902,6 +1916,7 @@ void tcg_dump_ops(TCGContext *s)
         def = &tcg_op_defs[c];
 
         if (c == INDEX_op_insn_start) {
+            nb_oargs = 0;
             col += qemu_log("\n ----");
 
             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
@@ -2021,12 +2036,15 @@ void tcg_dump_ops(TCGContext *s)
                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
             }
         }
-        if (op->life) {
-            unsigned life = op->life;
 
-            for (; col < 48; ++col) {
+        if (have_prefs || op->life) {
+            for (; col < 40; ++col) {
                 putc(' ', qemu_logfile);
             }
+        }
+
+        if (op->life) {
+            unsigned life = op->life;
 
             if (life & (SYNC_ARG * 3)) {
                 qemu_log("  sync:");
@@ -2046,6 +2064,33 @@ void tcg_dump_ops(TCGContext *s)
                 }
             }
         }
+
+        if (have_prefs) {
+            for (i = 0; i < nb_oargs; ++i) {
+                TCGRegSet set = op->output_pref[i];
+
+                if (i == 0) {
+                    qemu_log("  pref=");
+                } else {
+                    qemu_log(",");
+                }
+                if (set == 0) {
+                    qemu_log("none");
+                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
+                    qemu_log("all");
+#ifdef CONFIG_DEBUG_TCG
+                } else if (tcg_regset_single(set)) {
+                    TCGReg reg = tcg_regset_first(set);
+                    qemu_log("%s", tcg_target_reg_names[reg]);
+#endif
+                } else if (TCG_TARGET_NB_REGS <= 32) {
+                    qemu_log("%#x", (uint32_t)set);
+                } else {
+                    qemu_log("%#" PRIx64, (uint64_t)set);
+                }
+            }
+        }
+
         qemu_log("\n");
     }
 }
@@ -2171,6 +2216,26 @@ static void process_op_defs(TCGContext *s)
 
 void tcg_op_remove(TCGContext *s, TCGOp *op)
 {
+    TCGLabel *label;
+
+    switch (op->opc) {
+    case INDEX_op_br:
+        label = arg_label(op->args[0]);
+        label->refs--;
+        break;
+    case INDEX_op_brcond_i32:
+    case INDEX_op_brcond_i64:
+        label = arg_label(op->args[3]);
+        label->refs--;
+        break;
+    case INDEX_op_brcond2_i32:
+        label = arg_label(op->args[5]);
+        label->refs--;
+        break;
+    default:
+        break;
+    }
+
     QTAILQ_REMOVE(&s->ops, op, link);
     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
     s->nb_ops--;
@@ -2219,43 +2284,181 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
     return new_op;
 }
 
+/* Reachable analysis : remove unreachable code.  */
+static void reachable_code_pass(TCGContext *s)
+{
+    TCGOp *op, *op_next;
+    bool dead = false;
+
+    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
+        bool remove = dead;
+        TCGLabel *label;
+        int call_flags;
+
+        switch (op->opc) {
+        case INDEX_op_set_label:
+            label = arg_label(op->args[0]);
+            if (label->refs == 0) {
+                /*
+                 * While there is an occasional backward branch, virtually
+                 * all branches generated by the translators are forward.
+                 * Which means that generally we will have already removed
+                 * all references to the label that will be, and there is
+                 * little to be gained by iterating.
+                 */
+                remove = true;
+            } else {
+                /* Once we see a label, insns become live again.  */
+                dead = false;
+                remove = false;
+
+                /*
+                 * Optimization can fold conditional branches to unconditional.
+                 * If we find a label with one reference which is preceded by
+                 * an unconditional branch to it, remove both.  This needed to
+                 * wait until the dead code in between them was removed.
+                 */
+                if (label->refs == 1) {
+                    TCGOp *op_prev = QTAILQ_PREV(op, TCGOpHead, link);
+                    if (op_prev->opc == INDEX_op_br &&
+                        label == arg_label(op_prev->args[0])) {
+                        tcg_op_remove(s, op_prev);
+                        remove = true;
+                    }
+                }
+            }
+            break;
+
+        case INDEX_op_br:
+        case INDEX_op_exit_tb:
+        case INDEX_op_goto_ptr:
+            /* Unconditional branches; everything following is dead.  */
+            dead = true;
+            break;
+
+        case INDEX_op_call:
+            /* Notice noreturn helper calls, raising exceptions.  */
+            call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
+            if (call_flags & TCG_CALL_NO_RETURN) {
+                dead = true;
+            }
+            break;
+
+        case INDEX_op_insn_start:
+            /* Never remove -- we need to keep these for unwind.  */
+            remove = false;
+            break;
+
+        default:
+            break;
+        }
+
+        if (remove) {
+            tcg_op_remove(s, op);
+        }
+    }
+}
+
 #define TS_DEAD  1
 #define TS_MEM   2
 
 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
 
+/* For liveness_pass_1, the register preferences for a given temp.  */
+static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
+{
+    return ts->state_ptr;
+}
+
+/* For liveness_pass_1, reset the preferences for a given temp to the
+ * maximal regset for its type.
+ */
+static inline void la_reset_pref(TCGTemp *ts)
+{
+    *la_temp_pref(ts)
+        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
+}
+
 /* liveness analysis: end of function: all temps are dead, and globals
    should be in memory. */
-static void tcg_la_func_end(TCGContext *s)
+static void la_func_end(TCGContext *s, int ng, int nt)
 {
-    int ng = s->nb_globals;
-    int nt = s->nb_temps;
     int i;
 
     for (i = 0; i < ng; ++i) {
         s->temps[i].state = TS_DEAD | TS_MEM;
+        la_reset_pref(&s->temps[i]);
     }
     for (i = ng; i < nt; ++i) {
         s->temps[i].state = TS_DEAD;
+        la_reset_pref(&s->temps[i]);
     }
 }
 
 /* liveness analysis: end of basic block: all temps are dead, globals
    and local temps should be in memory. */
-static void tcg_la_bb_end(TCGContext *s)
+static void la_bb_end(TCGContext *s, int ng, int nt)
 {
-    int ng = s->nb_globals;
-    int nt = s->nb_temps;
     int i;
 
     for (i = 0; i < ng; ++i) {
         s->temps[i].state = TS_DEAD | TS_MEM;
+        la_reset_pref(&s->temps[i]);
     }
     for (i = ng; i < nt; ++i) {
         s->temps[i].state = (s->temps[i].temp_local
                              ? TS_DEAD | TS_MEM
                              : TS_DEAD);
+        la_reset_pref(&s->temps[i]);
+    }
+}
+
+/* liveness analysis: sync globals back to memory.  */
+static void la_global_sync(TCGContext *s, int ng)
+{
+    int i;
+
+    for (i = 0; i < ng; ++i) {
+        int state = s->temps[i].state;
+        s->temps[i].state = state | TS_MEM;
+        if (state == TS_DEAD) {
+            /* If the global was previously dead, reset prefs.  */
+            la_reset_pref(&s->temps[i]);
+        }
+    }
+}
+
+/* liveness analysis: sync globals back to memory and kill.  */
+static void la_global_kill(TCGContext *s, int ng)
+{
+    int i;
+
+    for (i = 0; i < ng; i++) {
+        s->temps[i].state = TS_DEAD | TS_MEM;
+        la_reset_pref(&s->temps[i]);
+    }
+}
+
+/* liveness analysis: note live globals crossing calls.  */
+static void la_cross_call(TCGContext *s, int nt)
+{
+    TCGRegSet mask = ~tcg_target_call_clobber_regs;
+    int i;
+
+    for (i = 0; i < nt; i++) {
+        TCGTemp *ts = &s->temps[i];
+        if (!(ts->state & TS_DEAD)) {
+            TCGRegSet *pset = la_temp_pref(ts);
+            TCGRegSet set = *pset;
+
+            set &= mask;
+            /* If the combination is not possible, restart.  */
+            if (set == 0) {
+                set = tcg_target_available_regs[ts->type] & mask;
+            }
+            *pset = set;
+        }
     }
 }
 
@@ -2265,16 +2468,25 @@ static void tcg_la_bb_end(TCGContext *s)
 static void liveness_pass_1(TCGContext *s)
 {
     int nb_globals = s->nb_globals;
+    int nb_temps = s->nb_temps;
     TCGOp *op, *op_prev;
+    TCGRegSet *prefs;
+    int i;
+
+    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
+    for (i = 0; i < nb_temps; ++i) {
+        s->temps[i].state_ptr = prefs + i;
+    }
 
-    tcg_la_func_end(s);
+    /* ??? Should be redundant with the exit_tb that ends the TB.  */
+    la_func_end(s, nb_globals, nb_temps);
 
     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) {
-        int i, nb_iargs, nb_oargs;
+        int nb_iargs, nb_oargs;
         TCGOpcode opc_new, opc_new2;
         bool have_opc_new2;
         TCGLifeData arg_life = 0;
-        TCGTemp *arg_ts;
+        TCGTemp *ts;
         TCGOpcode opc = op->opc;
         const TCGOpDef *def = &tcg_op_defs[opc];
 
@@ -2282,6 +2494,7 @@ static void liveness_pass_1(TCGContext *s)
         case INDEX_op_call:
             {
                 int call_flags;
+                int nb_call_regs;
 
                 nb_oargs = TCGOP_CALLO(op);
                 nb_iargs = TCGOP_CALLI(op);
@@ -2290,53 +2503,74 @@ static void liveness_pass_1(TCGContext *s)
                 /* pure functions can be removed if their result is unused */
                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
                     for (i = 0; i < nb_oargs; i++) {
-                        arg_ts = arg_temp(op->args[i]);
-                        if (arg_ts->state != TS_DEAD) {
+                        ts = arg_temp(op->args[i]);
+                        if (ts->state != TS_DEAD) {
                             goto do_not_remove_call;
                         }
                     }
                     goto do_remove;
-                } else {
-                do_not_remove_call:
+                }
+            do_not_remove_call:
 
-                    /* output args are dead */
-                    for (i = 0; i < nb_oargs; i++) {
-                        arg_ts = arg_temp(op->args[i]);
-                        if (arg_ts->state & TS_DEAD) {
-                            arg_life |= DEAD_ARG << i;
-                        }
-                        if (arg_ts->state & TS_MEM) {
-                            arg_life |= SYNC_ARG << i;
-                        }
-                        arg_ts->state = TS_DEAD;
+                /* Output args are dead.  */
+                for (i = 0; i < nb_oargs; i++) {
+                    ts = arg_temp(op->args[i]);
+                    if (ts->state & TS_DEAD) {
+                        arg_life |= DEAD_ARG << i;
                     }
+                    if (ts->state & TS_MEM) {
+                        arg_life |= SYNC_ARG << i;
+                    }
+                    ts->state = TS_DEAD;
+                    la_reset_pref(ts);
 
-                    if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
-                                        TCG_CALL_NO_READ_GLOBALS))) {
-                        /* globals should go back to memory */
-                        for (i = 0; i < nb_globals; i++) {
-                            s->temps[i].state = TS_DEAD | TS_MEM;
-                        }
-                    } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
-                        /* globals should be synced to memory */
-                        for (i = 0; i < nb_globals; i++) {
-                            s->temps[i].state |= TS_MEM;
-                        }
+                    /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
+                    op->output_pref[i] = 0;
+                }
+
+                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
+                                    TCG_CALL_NO_READ_GLOBALS))) {
+                    la_global_kill(s, nb_globals);
+                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+                    la_global_sync(s, nb_globals);
+                }
+
+                /* Record arguments that die in this helper.  */
+                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+                    ts = arg_temp(op->args[i]);
+                    if (ts && ts->state & TS_DEAD) {
+                        arg_life |= DEAD_ARG << i;
                     }
+                }
 
-                    /* record arguments that die in this helper */
-                    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
-                        arg_ts = arg_temp(op->args[i]);
-                        if (arg_ts && arg_ts->state & TS_DEAD) {
-                            arg_life |= DEAD_ARG << i;
-                        }
+                /* For all live registers, remove call-clobbered prefs.  */
+                la_cross_call(s, nb_temps);
+
+                nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
+
+                /* Input arguments are live for preceding opcodes.  */
+                for (i = 0; i < nb_iargs; i++) {
+                    ts = arg_temp(op->args[i + nb_oargs]);
+                    if (ts && ts->state & TS_DEAD) {
+                        /* For those arguments that die, and will be allocated
+                         * in registers, clear the register set for that arg,
+                         * to be filled in below.  For args that will be on
+                         * the stack, reset to any available reg.
+                         */
+                        *la_temp_pref(ts)
+                            = (i < nb_call_regs ? 0 :
+                               tcg_target_available_regs[ts->type]);
+                        ts->state &= ~TS_DEAD;
                     }
-                    /* input arguments are live for preceding opcodes */
-                    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
-                        arg_ts = arg_temp(op->args[i]);
-                        if (arg_ts) {
-                            arg_ts->state &= ~TS_DEAD;
-                        }
+                }
+
+                /* For each input argument, add its input register to prefs.
+                   If a temp is used once, this produces a single set bit.  */
+                for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
+                    ts = arg_temp(op->args[i + nb_oargs]);
+                    if (ts) {
+                        tcg_regset_set_reg(*la_temp_pref(ts),
+                                           tcg_target_call_iarg_regs[i]);
                     }
                 }
             }
@@ -2345,7 +2579,9 @@ static void liveness_pass_1(TCGContext *s)
             break;
         case INDEX_op_discard:
             /* mark the temporary as dead */
-            arg_temp(op->args[0])->state = TS_DEAD;
+            ts = arg_temp(op->args[0]);
+            ts->state = TS_DEAD;
+            la_reset_pref(ts);
             break;
 
         case INDEX_op_add2_i32:
@@ -2440,43 +2676,96 @@ static void liveness_pass_1(TCGContext *s)
                         goto do_not_remove;
                     }
                 }
-            do_remove:
-                tcg_op_remove(s, op);
-            } else {
-            do_not_remove:
-                /* output args are dead */
-                for (i = 0; i < nb_oargs; i++) {
-                    arg_ts = arg_temp(op->args[i]);
-                    if (arg_ts->state & TS_DEAD) {
-                        arg_life |= DEAD_ARG << i;
-                    }
-                    if (arg_ts->state & TS_MEM) {
-                        arg_life |= SYNC_ARG << i;
-                    }
-                    arg_ts->state = TS_DEAD;
+                goto do_remove;
+            }
+            goto do_not_remove;
+
+        do_remove:
+            tcg_op_remove(s, op);
+            break;
+
+        do_not_remove:
+            for (i = 0; i < nb_oargs; i++) {
+                ts = arg_temp(op->args[i]);
+
+                /* Remember the preference of the uses that followed.  */
+                op->output_pref[i] = *la_temp_pref(ts);
+
+                /* Output args are dead.  */
+                if (ts->state & TS_DEAD) {
+                    arg_life |= DEAD_ARG << i;
+                }
+                if (ts->state & TS_MEM) {
+                    arg_life |= SYNC_ARG << i;
                 }
+                ts->state = TS_DEAD;
+                la_reset_pref(ts);
+            }
 
-                /* if end of basic block, update */
-                if (def->flags & TCG_OPF_BB_END) {
-                    tcg_la_bb_end(s);
-                } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
-                    /* globals should be synced to memory */
-                    for (i = 0; i < nb_globals; i++) {
-                        s->temps[i].state |= TS_MEM;
-                    }
+            /* If end of basic block, update.  */
+            if (def->flags & TCG_OPF_BB_EXIT) {
+                la_func_end(s, nb_globals, nb_temps);
+            } else if (def->flags & TCG_OPF_BB_END) {
+                la_bb_end(s, nb_globals, nb_temps);
+            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+                la_global_sync(s, nb_globals);
+                if (def->flags & TCG_OPF_CALL_CLOBBER) {
+                    la_cross_call(s, nb_temps);
                 }
+            }
 
-                /* record arguments that die in this opcode */
-                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-                    arg_ts = arg_temp(op->args[i]);
-                    if (arg_ts->state & TS_DEAD) {
-                        arg_life |= DEAD_ARG << i;
-                    }
+            /* Record arguments that die in this opcode.  */
+            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+                ts = arg_temp(op->args[i]);
+                if (ts->state & TS_DEAD) {
+                    arg_life |= DEAD_ARG << i;
+                }
+            }
+
+            /* Input arguments are live for preceding opcodes.  */
+            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+                ts = arg_temp(op->args[i]);
+                if (ts->state & TS_DEAD) {
+                    /* For operands that were dead, initially allow
+                       all regs for the type.  */
+                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
+                    ts->state &= ~TS_DEAD;
+                }
+            }
+
+            /* Incorporate constraints for this operand.  */
+            switch (opc) {
+            case INDEX_op_mov_i32:
+            case INDEX_op_mov_i64:
+                /* Note that these are TCG_OPF_NOT_PRESENT and do not
+                   have proper constraints.  That said, special case
+                   moves to propagate preferences backward.  */
+                if (IS_DEAD_ARG(1)) {
+                    *la_temp_pref(arg_temp(op->args[0]))
+                        = *la_temp_pref(arg_temp(op->args[1]));
                 }
-                /* input arguments are live for preceding opcodes */
+                break;
+
+            default:
                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-                    arg_temp(op->args[i])->state &= ~TS_DEAD;
+                    const TCGArgConstraint *ct = &def->args_ct[i];
+                    TCGRegSet set, *pset;
+
+                    ts = arg_temp(op->args[i]);
+                    pset = la_temp_pref(ts);
+                    set = *pset;
+
+                    set &= ct->u.regs;
+                    if (ct->ct & TCG_CT_IALIAS) {
+                        set &= op->output_pref[ct->alias_index];
+                    }
+                    /* If the combination is not possible, restart.  */
+                    if (set == 0) {
+                        set = ct->u.regs;
+                    }
+                    *pset = set;
                 }
+                break;
             }
             break;
         }
@@ -2727,7 +3016,7 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
     s->current_frame_offset += sizeof(tcg_target_long);
 }
 
-static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
+static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
 
 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
    mark it free; otherwise mark it dead.  */
@@ -2755,8 +3044,8 @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts)
    registers needs to be allocated to store a constant.  If 'free_or_dead'
    is non-zero, subsequently release the temporary; if it is positive, the
    temp is dead; if it is negative, the temp is free.  */
-static void temp_sync(TCGContext *s, TCGTemp *ts,
-                      TCGRegSet allocated_regs, int free_or_dead)
+static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
+                      TCGRegSet preferred_regs, int free_or_dead)
 {
     if (ts->fixed_reg) {
         return;
@@ -2776,7 +3065,7 @@ static void temp_sync(TCGContext *s, TCGTemp *ts,
                 break;
             }
             temp_load(s, ts, tcg_target_available_regs[ts->type],
-                      allocated_regs);
+                      allocated_regs, preferred_regs);
             /* fallthrough */
 
         case TEMP_VAL_REG:
@@ -2803,35 +3092,76 @@ static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
 {
     TCGTemp *ts = s->reg_to_temp[reg];
     if (ts != NULL) {
-        temp_sync(s, ts, allocated_regs, -1);
+        temp_sync(s, ts, allocated_regs, 0, -1);
     }
 }
 
-/* Allocate a register belonging to reg1 & ~reg2 */
-static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
-                            TCGRegSet allocated_regs, bool rev)
+/**
+ * tcg_reg_alloc:
+ * @required_regs: Set of registers in which we must allocate.
+ * @allocated_regs: Set of registers which must be avoided.
+ * @preferred_regs: Set of registers we should prefer.
+ * @rev: True if we search the registers in "indirect" order.
+ *
+ * The allocated register must be in @required_regs & ~@allocated_regs,
+ * but if we can put it in @preferred_regs we may save a move later.
+ */
+static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
+                            TCGRegSet allocated_regs,
+                            TCGRegSet preferred_regs, bool rev)
 {
-    int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
+    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
+    TCGRegSet reg_ct[2];
     const int *order;
-    TCGReg reg;
-    TCGRegSet reg_ct;
 
-    reg_ct = desired_regs & ~allocated_regs;
+    reg_ct[1] = required_regs & ~allocated_regs;
+    tcg_debug_assert(reg_ct[1] != 0);
+    reg_ct[0] = reg_ct[1] & preferred_regs;
+
+    /* Skip the preferred_regs option if it cannot be satisfied,
+       or if the preference made no difference.  */
+    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
+
     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
 
-    /* first try free registers */
-    for(i = 0; i < n; i++) {
-        reg = order[i];
-        if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
-            return reg;
+    /* Try free registers, preferences first.  */
+    for (j = f; j < 2; j++) {
+        TCGRegSet set = reg_ct[j];
+
+        if (tcg_regset_single(set)) {
+            /* One register in the set.  */
+            TCGReg reg = tcg_regset_first(set);
+            if (s->reg_to_temp[reg] == NULL) {
+                return reg;
+            }
+        } else {
+            for (i = 0; i < n; i++) {
+                TCGReg reg = order[i];
+                if (s->reg_to_temp[reg] == NULL &&
+                    tcg_regset_test_reg(set, reg)) {
+                    return reg;
+                }
+            }
+        }
     }
 
-    /* XXX: do better spill choice */
-    for(i = 0; i < n; i++) {
-        reg = order[i];
-        if (tcg_regset_test_reg(reg_ct, reg)) {
+    /* We must spill something.  */
+    for (j = f; j < 2; j++) {
+        TCGRegSet set = reg_ct[j];
+
+        if (tcg_regset_single(set)) {
+            /* One register in the set.  */
+            TCGReg reg = tcg_regset_first(set);
             tcg_reg_free(s, reg, allocated_regs);
             return reg;
+        } else {
+            for (i = 0; i < n; i++) {
+                TCGReg reg = order[i];
+                if (tcg_regset_test_reg(set, reg)) {
+                    tcg_reg_free(s, reg, allocated_regs);
+                    return reg;
+                }
+            }
         }
     }
 
@@ -2841,7 +3171,7 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
 /* Make sure the temporary is in a register.  If needed, allocate the register
    from DESIRED while avoiding ALLOCATED.  */
 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
-                      TCGRegSet allocated_regs)
+                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
 {
     TCGReg reg;
 
@@ -2849,12 +3179,14 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
     case TEMP_VAL_REG:
         return;
     case TEMP_VAL_CONST:
-        reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
+        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
+                            preferred_regs, ts->indirect_base);
         tcg_out_movi(s, ts->type, reg, ts->val);
         ts->mem_coherent = 0;
         break;
     case TEMP_VAL_MEM:
-        reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
+        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
+                            preferred_regs, ts->indirect_base);
         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
         ts->mem_coherent = 1;
         break;
@@ -2924,7 +3256,8 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
 }
 
 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
-                                  tcg_target_ulong val, TCGLifeData arg_life)
+                                  tcg_target_ulong val, TCGLifeData arg_life,
+                                  TCGRegSet preferred_regs)
 {
     if (ots->fixed_reg) {
         /* For fixed registers, we do not do any constant propagation.  */
@@ -2940,7 +3273,7 @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
     ots->val = val;
     ots->mem_coherent = 0;
     if (NEED_SYNC_ARG(0)) {
-        temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
+        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
     } else if (IS_DEAD_ARG(0)) {
         temp_dead(s, ots);
     }
@@ -2951,17 +3284,18 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
     TCGTemp *ots = arg_temp(op->args[0]);
     tcg_target_ulong val = op->args[1];
 
-    tcg_reg_alloc_do_movi(s, ots, val, op->life);
+    tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
 }
 
 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
 {
     const TCGLifeData arg_life = op->life;
-    TCGRegSet allocated_regs;
+    TCGRegSet allocated_regs, preferred_regs;
     TCGTemp *ts, *ots;
     TCGType otype, itype;
 
     allocated_regs = s->reserved_regs;
+    preferred_regs = op->output_pref[0];
     ots = arg_temp(op->args[0]);
     ts = arg_temp(op->args[1]);
 
@@ -2975,7 +3309,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
         if (IS_DEAD_ARG(1)) {
             temp_dead(s, ts);
         }
-        tcg_reg_alloc_do_movi(s, ots, val, arg_life);
+        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
         return;
     }
 
@@ -2984,7 +3318,8 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
        the SOURCE value into its own register first, that way we
        don't have to reload SOURCE the next time it is used. */
     if (ts->val_type == TEMP_VAL_MEM) {
-        temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
+        temp_load(s, ts, tcg_target_available_regs[itype],
+                  allocated_regs, preferred_regs);
     }
 
     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
@@ -3014,7 +3349,8 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
                    input one. */
                 tcg_regset_set_reg(allocated_regs, ts->reg);
                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
-                                         allocated_regs, ots->indirect_base);
+                                         allocated_regs, preferred_regs,
+                                         ots->indirect_base);
             }
             tcg_out_mov(s, otype, ots->reg, ts->reg);
         }
@@ -3022,7 +3358,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
         ots->mem_coherent = 0;
         s->reg_to_temp[ots->reg] = ots;
         if (NEED_SYNC_ARG(0)) {
-            temp_sync(s, ots, allocated_regs, 0);
+            temp_sync(s, ots, allocated_regs, 0, 0);
         }
     }
 }
@@ -3054,6 +3390,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
 
     /* satisfy input constraints */ 
     for (k = 0; k < nb_iargs; k++) {
+        TCGRegSet i_preferred_regs, o_preferred_regs;
+
         i = def->sorted_args[nb_oargs + k];
         arg = op->args[i];
         arg_ct = &def->args_ct[i];
@@ -3064,17 +3402,18 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
             /* constant is OK for instruction */
             const_args[i] = 1;
             new_args[i] = ts->val;
-            goto iarg_end;
+            continue;
         }
 
-        temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
-
+        i_preferred_regs = o_preferred_regs = 0;
         if (arg_ct->ct & TCG_CT_IALIAS) {
+            o_preferred_regs = op->output_pref[arg_ct->alias_index];
             if (ts->fixed_reg) {
                 /* if fixed register, we must allocate a new register
                    if the alias is not the same register */
-                if (arg != op->args[arg_ct->alias_index])
+                if (arg != op->args[arg_ct->alias_index]) {
                     goto allocate_in_reg;
+                }
             } else {
                 /* if the input is aliased to an output and if it is
                    not dead after the instruction, we must allocate
@@ -3082,33 +3421,42 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
                 if (!IS_DEAD_ARG(i)) {
                     goto allocate_in_reg;
                 }
+
                 /* check if the current register has already been allocated
                    for another input aliased to an output */
-                int k2, i2;
-                for (k2 = 0 ; k2 < k ; k2++) {
-                    i2 = def->sorted_args[nb_oargs + k2];
-                    if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
-                        (new_args[i2] == ts->reg)) {
-                        goto allocate_in_reg;
+                if (ts->val_type == TEMP_VAL_REG) {
+                    int k2, i2;
+                    reg = ts->reg;
+                    for (k2 = 0 ; k2 < k ; k2++) {
+                        i2 = def->sorted_args[nb_oargs + k2];
+                        if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
+                            reg == new_args[i2]) {
+                            goto allocate_in_reg;
+                        }
                     }
                 }
+                i_preferred_regs = o_preferred_regs;
             }
         }
+
+        temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
         reg = ts->reg;
+
         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
             /* nothing to do : the constraint is satisfied */
         } else {
         allocate_in_reg:
             /* allocate a new register matching the constraint 
                and move the temporary register into it */
+            temp_load(s, ts, tcg_target_available_regs[ts->type],
+                      i_allocated_regs, 0);
             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
-                                ts->indirect_base);
+                                o_preferred_regs, ts->indirect_base);
             tcg_out_mov(s, ts->type, reg, ts->reg);
         }
         new_args[i] = reg;
         const_args[i] = 0;
         tcg_regset_set_reg(i_allocated_regs, reg);
-    iarg_end: ;
     }
     
     /* mark dead temporaries and free the associated registers */
@@ -3147,7 +3495,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
             } else if (arg_ct->ct & TCG_CT_NEWREG) {
                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
                                     i_allocated_regs | o_allocated_regs,
-                                    ts->indirect_base);
+                                    op->output_pref[k], ts->indirect_base);
             } else {
                 /* if fixed register, we try to use it */
                 reg = ts->reg;
@@ -3156,7 +3504,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
                     goto oarg_end;
                 }
                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
-                                    ts->indirect_base);
+                                    op->output_pref[k], ts->indirect_base);
             }
             tcg_regset_set_reg(o_allocated_regs, reg);
             /* if a fixed register is used, then a move will be done afterwards */
@@ -3192,7 +3540,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
             tcg_out_mov(s, ts->type, ts->reg, reg);
         }
         if (NEED_SYNC_ARG(i)) {
-            temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
+            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
         } else if (IS_DEAD_ARG(i)) {
             temp_dead(s, ts);
         }
@@ -3248,7 +3596,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
         if (arg != TCG_CALL_DUMMY_ARG) {
             ts = arg_temp(arg);
             temp_load(s, ts, tcg_target_available_regs[ts->type],
-                      s->reserved_regs);
+                      s->reserved_regs, 0);
             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
         }
 #ifndef TCG_TARGET_STACK_GROWSUP
@@ -3263,17 +3611,18 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
         if (arg != TCG_CALL_DUMMY_ARG) {
             ts = arg_temp(arg);
             reg = tcg_target_call_iarg_regs[i];
-            tcg_reg_free(s, reg, allocated_regs);
 
             if (ts->val_type == TEMP_VAL_REG) {
                 if (ts->reg != reg) {
+                    tcg_reg_free(s, reg, allocated_regs);
                     tcg_out_mov(s, ts->type, reg, ts->reg);
                 }
             } else {
                 TCGRegSet arg_set = 0;
 
+                tcg_reg_free(s, reg, allocated_regs);
                 tcg_regset_set_reg(arg_set, reg);
-                temp_load(s, ts, arg_set, allocated_regs);
+                temp_load(s, ts, arg_set, allocated_regs, 0);
             }
 
             tcg_regset_set_reg(allocated_regs, reg);
@@ -3326,7 +3675,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
             ts->mem_coherent = 0;
             s->reg_to_temp[reg] = ts;
             if (NEED_SYNC_ARG(i)) {
-                temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
+                temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
             } else if (IS_DEAD_ARG(i)) {
                 temp_dead(s, ts);
             }
@@ -3476,7 +3825,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
                  && qemu_log_in_addr_range(tb->pc))) {
         qemu_log_lock();
         qemu_log("OP:\n");
-        tcg_dump_ops(s);
+        tcg_dump_ops(s, false);
         qemu_log("\n");
         qemu_log_unlock();
     }
@@ -3495,6 +3844,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
 #endif
 
+    reachable_code_pass(s);
     liveness_pass_1(s);
 
     if (s->nb_indirects > 0) {
@@ -3503,7 +3853,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
                      && qemu_log_in_addr_range(tb->pc))) {
             qemu_log_lock();
             qemu_log("OP before indirect lowering:\n");
-            tcg_dump_ops(s);
+            tcg_dump_ops(s, false);
             qemu_log("\n");
             qemu_log_unlock();
         }
@@ -3524,7 +3874,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
                  && qemu_log_in_addr_range(tb->pc))) {
         qemu_log_lock();
         qemu_log("OP after optimization and liveness analysis:\n");
-        tcg_dump_ops(s);
+        tcg_dump_ops(s, true);
         qemu_log("\n");
         qemu_log_unlock();
     }
diff --git a/tcg/tcg.h b/tcg/tcg.h
index ade692fdf5..3a629991ca 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -244,7 +244,8 @@ typedef struct TCGRelocation {
 
 typedef struct TCGLabel {
     unsigned has_value : 1;
-    unsigned id : 31;
+    unsigned id : 15;
+    unsigned refs : 16;
     union {
         uintptr_t value;
         tcg_insn_unit *value_ptr;
@@ -462,11 +463,13 @@ typedef TCGv_ptr TCGv_env;
 /* call flags */
 /* Helper does not read globals (either directly or through an exception). It
    implies TCG_CALL_NO_WRITE_GLOBALS. */
-#define TCG_CALL_NO_READ_GLOBALS    0x0010
+#define TCG_CALL_NO_READ_GLOBALS    0x0001
 /* Helper does not write globals */
-#define TCG_CALL_NO_WRITE_GLOBALS   0x0020
+#define TCG_CALL_NO_WRITE_GLOBALS   0x0002
 /* Helper can be safely suppressed if the return value is not used. */
-#define TCG_CALL_NO_SIDE_EFFECTS    0x0040
+#define TCG_CALL_NO_SIDE_EFFECTS    0x0004
+/* Helper is QEMU_NORETURN.  */
+#define TCG_CALL_NO_RETURN          0x0008
 
 /* convenience version of most used call flags */
 #define TCG_CALL_NO_RWG         TCG_CALL_NO_READ_GLOBALS
@@ -616,6 +619,9 @@ typedef struct TCGOp {
 
     /* Arguments for the opcode.  */
     TCGArg args[MAX_OPC_PARAM];
+
+    /* Register preferences for the output(s).  */
+    TCGRegSet output_pref[2];
 } TCGOp;
 
 #define TCGOP_CALLI(X)    (X)->param1
@@ -1024,20 +1030,22 @@ typedef struct TCGArgConstraint {
 
 /* Bits for TCGOpDef->flags, 8 bits available.  */
 enum {
+    /* Instruction exits the translation block.  */
+    TCG_OPF_BB_EXIT      = 0x01,
     /* Instruction defines the end of a basic block.  */
-    TCG_OPF_BB_END       = 0x01,
+    TCG_OPF_BB_END       = 0x02,
     /* Instruction clobbers call registers and potentially update globals.  */
-    TCG_OPF_CALL_CLOBBER = 0x02,
+    TCG_OPF_CALL_CLOBBER = 0x04,
     /* Instruction has side effects: it cannot be removed if its outputs
        are not used, and might trigger exceptions.  */
-    TCG_OPF_SIDE_EFFECTS = 0x04,
+    TCG_OPF_SIDE_EFFECTS = 0x08,
     /* Instruction operands are 64-bits (otherwise 32-bits).  */
-    TCG_OPF_64BIT        = 0x08,
+    TCG_OPF_64BIT        = 0x10,
     /* Instruction is optional and not implemented by the host, or insn
        is generic and should not be implemened by the host.  */
-    TCG_OPF_NOT_PRESENT  = 0x10,
+    TCG_OPF_NOT_PRESENT  = 0x20,
     /* Instruction operands are vectors.  */
-    TCG_OPF_VECTOR       = 0x20,
+    TCG_OPF_VECTOR       = 0x40,
 };
 
 typedef struct TCGOpDef {
@@ -1076,9 +1084,6 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc);
 
 void tcg_optimize(TCGContext *s);
 
-/* only used for debugging purposes */
-void tcg_dump_ops(TCGContext *s);
-
 TCGv_i32 tcg_const_i32(int32_t val);
 TCGv_i64 tcg_const_i64(int64_t val);
 TCGv_i32 tcg_const_local_i32(int32_t val);
diff --git a/tests/acceptance/virtio_version.py b/tests/acceptance/virtio_version.py
new file mode 100644
index 0000000000..ce990250d8
--- /dev/null
+++ b/tests/acceptance/virtio_version.py
@@ -0,0 +1,176 @@
+"""
+Check compatibility of virtio device types
+"""
+# Copyright (c) 2018 Red Hat, Inc.
+#
+# Author:
+#  Eduardo Habkost <ehabkost@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+import sys
+import os
+
+sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "scripts"))
+from qemu import QEMUMachine
+from avocado_qemu import Test
+
+# Virtio Device IDs:
+VIRTIO_NET = 1
+VIRTIO_BLOCK = 2
+VIRTIO_CONSOLE = 3
+VIRTIO_RNG = 4
+VIRTIO_BALLOON = 5
+VIRTIO_RPMSG = 7
+VIRTIO_SCSI = 8
+VIRTIO_9P = 9
+VIRTIO_RPROC_SERIAL = 11
+VIRTIO_CAIF = 12
+VIRTIO_GPU = 16
+VIRTIO_INPUT = 18
+VIRTIO_VSOCK = 19
+VIRTIO_CRYPTO = 20
+
+PCI_VENDOR_ID_REDHAT_QUMRANET = 0x1af4
+
+# Device IDs for legacy/transitional devices:
+PCI_LEGACY_DEVICE_IDS = {
+    VIRTIO_NET:     0x1000,
+    VIRTIO_BLOCK:   0x1001,
+    VIRTIO_BALLOON: 0x1002,
+    VIRTIO_CONSOLE: 0x1003,
+    VIRTIO_SCSI:    0x1004,
+    VIRTIO_RNG:     0x1005,
+    VIRTIO_9P:      0x1009,
+    VIRTIO_VSOCK:   0x1012,
+}
+
+def pci_modern_device_id(virtio_devid):
+    return virtio_devid + 0x1040
+
+def devtype_implements(vm, devtype, implements):
+    return devtype in [d['name'] for d in vm.command('qom-list-types', implements=implements)]
+
+def get_pci_interfaces(vm, devtype):
+    interfaces = ('pci-express-device', 'conventional-pci-device')
+    return [i for i in interfaces if devtype_implements(vm, devtype, i)]
+
+class VirtioVersionCheck(Test):
+    """
+    Check if virtio-version-specific device types result in the
+    same device tree created by `disable-modern` and
+    `disable-legacy`.
+
+    :avocado: enable
+    :avocado: tags=x86_64
+    """
+
+    # just in case there are failures, show larger diff:
+    maxDiff = 4096
+
+    def run_device(self, devtype, opts=None, machine='pc'):
+        """
+        Run QEMU with `-device DEVTYPE`, return device info from `query-pci`
+        """
+        with QEMUMachine(self.qemu_bin) as vm:
+            vm.set_machine(machine)
+            if opts:
+                devtype += ',' + opts
+            vm.add_args('-device', '%s,id=devfortest' % (devtype))
+            vm.add_args('-S')
+            vm.launch()
+
+            pcibuses = vm.command('query-pci')
+            alldevs = [dev for bus in pcibuses for dev in bus['devices']]
+            devfortest = [dev for dev in alldevs
+                          if dev['qdev_id'] == 'devfortest']
+            return devfortest[0], get_pci_interfaces(vm, devtype)
+
+
+    def assert_devids(self, dev, devid, non_transitional=False):
+        self.assertEqual(dev['id']['vendor'], PCI_VENDOR_ID_REDHAT_QUMRANET)
+        self.assertEqual(dev['id']['device'], devid)
+        if non_transitional:
+            self.assertTrue(0x1040 <= dev['id']['device'] <= 0x107f)
+            self.assertGreaterEqual(dev['id']['subsystem'], 0x40)
+
+    def check_all_variants(self, qemu_devtype, virtio_devid):
+        """Check if a virtio device type and its variants behave as expected"""
+        # Force modern mode:
+        dev_modern, _ = self.run_device(qemu_devtype,
+                                       'disable-modern=off,disable-legacy=on')
+        self.assert_devids(dev_modern, pci_modern_device_id(virtio_devid),
+                           non_transitional=True)
+
+        # <prefix>-non-transitional device types should be 100% equivalent to
+        # <prefix>,disable-modern=off,disable-legacy=on
+        dev_1_0, nt_ifaces = self.run_device('%s-non-transitional' % (qemu_devtype))
+        self.assertEqual(dev_modern, dev_1_0)
+
+        # Force transitional mode:
+        dev_trans, _ = self.run_device(qemu_devtype,
+                                      'disable-modern=off,disable-legacy=off')
+        self.assert_devids(dev_trans, PCI_LEGACY_DEVICE_IDS[virtio_devid])
+
+        # Force legacy mode:
+        dev_legacy, _ = self.run_device(qemu_devtype,
+                                       'disable-modern=on,disable-legacy=off')
+        self.assert_devids(dev_legacy, PCI_LEGACY_DEVICE_IDS[virtio_devid])
+
+        # No options: default to transitional on PC machine-type:
+        no_opts_pc, generic_ifaces = self.run_device(qemu_devtype)
+        self.assertEqual(dev_trans, no_opts_pc)
+
+        #TODO: check if plugging on a PCI Express bus will make the
+        #      device non-transitional
+        #no_opts_q35 = self.run_device(qemu_devtype, machine='q35')
+        #self.assertEqual(dev_modern, no_opts_q35)
+
+        # <prefix>-transitional device types should be 100% equivalent to
+        # <prefix>,disable-modern=off,disable-legacy=off
+        dev_trans, trans_ifaces = self.run_device('%s-transitional' % (qemu_devtype))
+        self.assertEqual(dev_trans, dev_trans)
+
+        # ensure the interface information is correct:
+        self.assertIn('conventional-pci-device', generic_ifaces)
+        self.assertIn('pci-express-device', generic_ifaces)
+
+        self.assertIn('conventional-pci-device', nt_ifaces)
+        self.assertIn('pci-express-device', nt_ifaces)
+
+        self.assertIn('conventional-pci-device', trans_ifaces)
+        self.assertNotIn('pci-express-device', trans_ifaces)
+
+
+    def test_conventional_devs(self):
+        self.check_all_variants('virtio-net-pci', VIRTIO_NET)
+        # virtio-blk requires 'driver' parameter
+        #self.check_all_variants('virtio-blk-pci', VIRTIO_BLOCK)
+        self.check_all_variants('virtio-serial-pci', VIRTIO_CONSOLE)
+        self.check_all_variants('virtio-rng-pci', VIRTIO_RNG)
+        self.check_all_variants('virtio-balloon-pci', VIRTIO_BALLOON)
+        self.check_all_variants('virtio-scsi-pci', VIRTIO_SCSI)
+        # virtio-9p requires 'fsdev' parameter
+        #self.check_all_variants('virtio-9p-pci', VIRTIO_9P)
+
+    def check_modern_only(self, qemu_devtype, virtio_devid):
+        """Check if a modern-only virtio device type behaves as expected"""
+        # Force modern mode:
+        dev_modern, _ = self.run_device(qemu_devtype,
+                                       'disable-modern=off,disable-legacy=on')
+        self.assert_devids(dev_modern, pci_modern_device_id(virtio_devid),
+                           non_transitional=True)
+
+        # No options: should be modern anyway
+        dev_no_opts, ifaces = self.run_device(qemu_devtype)
+        self.assertEqual(dev_modern, dev_no_opts)
+
+        self.assertIn('conventional-pci-device', ifaces)
+        self.assertIn('pci-express-device', ifaces)
+
+    def test_modern_only_devs(self):
+        self.check_modern_only('virtio-vga', VIRTIO_GPU)
+        self.check_modern_only('virtio-gpu-pci', VIRTIO_GPU)
+        self.check_modern_only('virtio-mouse-pci', VIRTIO_INPUT)
+        self.check_modern_only('virtio-tablet-pci', VIRTIO_INPUT)
+        self.check_modern_only('virtio-keyboard-pci', VIRTIO_INPUT)
diff --git a/tests/acpi-utils.c b/tests/acpi-utils.c
index 6dc8ca1a8c..17abcc43a4 100644
--- a/tests/acpi-utils.c
+++ b/tests/acpi-utils.c
@@ -15,7 +15,6 @@
 #include "qemu/osdep.h"
 #include <glib/gstdio.h>
 #include "qemu-common.h"
-#include "hw/smbios/smbios.h"
 #include "qemu/bitmap.h"
 #include "acpi-utils.h"
 #include "boot-sector.h"
@@ -52,15 +51,44 @@ uint32_t acpi_find_rsdp_address(QTestState *qts)
     return off;
 }
 
-void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr,
-                           AcpiRsdpDescriptor *rsdp_table)
+uint32_t acpi_get_rsdt_address(uint8_t *rsdp_table)
 {
-    ACPI_READ_FIELD(qts, rsdp_table->signature, addr);
-    ACPI_ASSERT_CMP64(rsdp_table->signature, "RSD PTR ");
-
-    ACPI_READ_FIELD(qts, rsdp_table->checksum, addr);
-    ACPI_READ_ARRAY(qts, rsdp_table->oem_id, addr);
-    ACPI_READ_FIELD(qts, rsdp_table->revision, addr);
-    ACPI_READ_FIELD(qts, rsdp_table->rsdt_physical_address, addr);
-    ACPI_READ_FIELD(qts, rsdp_table->length, addr);
+    uint32_t rsdt_physical_address;
+
+    memcpy(&rsdt_physical_address, &rsdp_table[16 /* RsdtAddress offset */], 4);
+    return le32_to_cpu(rsdt_physical_address);
+}
+
+uint64_t acpi_get_xsdt_address(uint8_t *rsdp_table)
+{
+    uint64_t xsdt_physical_address;
+    uint8_t revision = rsdp_table[15 /* Revision offset */];
+
+    /* We must have revision 2 if we're looking for an XSDT pointer */
+    g_assert(revision == 2);
+
+    memcpy(&xsdt_physical_address, &rsdp_table[24 /* XsdtAddress offset */], 8);
+    return le64_to_cpu(xsdt_physical_address);
+}
+
+void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr, uint8_t *rsdp_table)
+{
+    uint8_t revision;
+
+    /* Read mandatory revision 0 table data (20 bytes) first */
+    qtest_memread(qts, addr, rsdp_table, 20);
+    revision = rsdp_table[15 /* Revision offset */];
+
+    switch (revision) {
+    case 0: /* ACPI 1.0 RSDP */
+        break;
+    case 2: /* ACPI 2.0+ RSDP */
+        /* Read the rest of the RSDP table */
+        qtest_memread(qts, addr + 20, rsdp_table + 20, 16);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    ACPI_ASSERT_CMP64(*((uint64_t *)(rsdp_table)), "RSD PTR ");
 }
diff --git a/tests/acpi-utils.h b/tests/acpi-utils.h
index 791bd5410e..c5b0e12aa2 100644
--- a/tests/acpi-utils.h
+++ b/tests/acpi-utils.h
@@ -74,7 +74,8 @@ typedef struct {
 
 uint8_t acpi_calc_checksum(const uint8_t *data, int len);
 uint32_t acpi_find_rsdp_address(QTestState *qts);
-void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr,
-                           AcpiRsdpDescriptor *rsdp_table);
+uint32_t acpi_get_rsdt_address(uint8_t *rsdp_table);
+uint64_t acpi_get_xsdt_address(uint8_t *rsdp_table);
+void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr, uint8_t *rsdp_table);
 
 #endif  /* TEST_ACPI_UTILS_H */
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index bac4b0171b..d455b2abfc 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -13,7 +13,7 @@
 #include "qemu/osdep.h"
 #include <glib/gstdio.h>
 #include "qemu-common.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
 #include "qemu/bitmap.h"
 #include "acpi-utils.h"
 #include "boot-sector.h"
@@ -27,7 +27,7 @@ typedef struct {
     const char *machine;
     const char *variant;
     uint32_t rsdp_addr;
-    AcpiRsdpDescriptor rsdp_table;
+    uint8_t rsdp_table[36 /* ACPI 2.0+ RSDP size */];
     AcpiRsdtDescriptorRev1 rsdt_table;
     uint32_t dsdt_addr;
     uint32_t facs_addr;
@@ -86,19 +86,31 @@ static void test_acpi_rsdp_address(test_data *data)
 
 static void test_acpi_rsdp_table(test_data *data)
 {
-    AcpiRsdpDescriptor *rsdp_table = &data->rsdp_table;
+    uint8_t *rsdp_table = data->rsdp_table, revision;
     uint32_t addr = data->rsdp_addr;
 
     acpi_parse_rsdp_table(data->qts, addr, rsdp_table);
+    revision = rsdp_table[15 /* Revision offset */];
 
-    /* rsdp checksum is not for the whole table, but for the first 20 bytes */
-    g_assert(!acpi_calc_checksum((uint8_t *)rsdp_table, 20));
+    switch (revision) {
+    case 0: /* ACPI 1.0 RSDP */
+        /* With rev 1, checksum is only for the first 20 bytes */
+        g_assert(!acpi_calc_checksum(rsdp_table,  20));
+        break;
+    case 2: /* ACPI 2.0+ RSDP */
+        /* With revision 2, we have 2 checksums */
+        g_assert(!acpi_calc_checksum(rsdp_table, 20));
+        g_assert(!acpi_calc_checksum(rsdp_table, 36));
+        break;
+    default:
+        g_assert_not_reached();
+    }
 }
 
 static void test_acpi_rsdt_table(test_data *data)
 {
     AcpiRsdtDescriptorRev1 *rsdt_table = &data->rsdt_table;
-    uint32_t addr = le32_to_cpu(data->rsdp_table.rsdt_physical_address);
+    uint32_t addr = acpi_get_rsdt_address(data->rsdp_table);
     uint32_t *tables;
     int tables_nr;
     uint8_t checksum;
diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c
index f4a677d238..668f00144e 100644
--- a/tests/cpu-plug-test.c
+++ b/tests/cpu-plug-test.c
@@ -157,9 +157,7 @@ static void add_pc_test_case(const char *mname)
         (strcmp(mname, "pc-0.15") == 0) ||
         (strcmp(mname, "pc-0.14") == 0) ||
         (strcmp(mname, "pc-0.13") == 0) ||
-        (strcmp(mname, "pc-0.12") == 0) ||
-        (strcmp(mname, "pc-0.11") == 0) ||
-        (strcmp(mname, "pc-0.10") == 0)) {
+        (strcmp(mname, "pc-0.12") == 0)) {
         path = g_strdup_printf("cpu-plug/%s/init/%ux%ux%u&maxcpus=%u",
                                mname, data->sockets, data->cores,
                                data->threads, data->maxcpus);
diff --git a/tests/fp/platform.h b/tests/fp/platform.h
index c20ba70baa..f8c423dde3 100644
--- a/tests/fp/platform.h
+++ b/tests/fp/platform.h
@@ -29,7 +29,6 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-#include "config-host.h"
 
 #ifndef HOST_WORDS_BIGENDIAN
 #define LITTLEENDIAN 1
diff --git a/tests/tpm-util.h b/tests/tpm-util.h
index 9e98bc5124..5755698ad2 100644
--- a/tests/tpm-util.h
+++ b/tests/tpm-util.h
@@ -13,7 +13,6 @@
 #ifndef TESTS_TPM_UTIL_H
 #define TESTS_TPM_UTIL_H
 
-#include "qemu/osdep.h"
 #include "io/channel-socket.h"
 
 typedef void (tx_func)(QTestState *s,
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 0cf8d0baca..0033b61f2e 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -29,8 +29,8 @@
 
 #define _FILE_OFFSET_BITS 64
 
-#include "qemu/atomic.h"
 #include "qemu/osdep.h"
+#include "qemu/atomic.h"
 #include "qemu/iov.h"
 #include "standard-headers/linux/virtio_net.h"
 #include "contrib/libvhost-user/libvhost-user.h"
diff --git a/tests/vmgenid-test.c b/tests/vmgenid-test.c
index 84449ce8ae..1c1d435bbd 100644
--- a/tests/vmgenid-test.c
+++ b/tests/vmgenid-test.c
@@ -35,7 +35,7 @@ static uint32_t acpi_find_vgia(QTestState *qts)
 {
     uint32_t rsdp_offset;
     uint32_t guid_offset = 0;
-    AcpiRsdpDescriptor rsdp_table;
+    uint8_t rsdp_table[36 /* ACPI 2.0+ RSDP size */];
     uint32_t rsdt, rsdt_table_length;
     AcpiRsdtDescriptorRev1 rsdt_table;
     size_t tables_nr;
@@ -52,9 +52,11 @@ static uint32_t acpi_find_vgia(QTestState *qts)
 
     g_assert_cmphex(rsdp_offset, <, RSDP_ADDR_INVALID);
 
-    acpi_parse_rsdp_table(qts, rsdp_offset, &rsdp_table);
+    acpi_parse_rsdp_table(qts, rsdp_offset, rsdp_table);
+
+    rsdt = acpi_get_rsdt_address(rsdp_table);
+    g_assert(rsdt);
 
-    rsdt = le32_to_cpu(rsdp_table.rsdt_physical_address);
     /* read the header */
     ACPI_READ_TABLE_HEADER(qts, &rsdt_table, rsdt);
     ACPI_ASSERT_CMP(rsdt_table.signature, "RSDT");
diff --git a/util/qemu-thread-common.h b/util/qemu-thread-common.h
index a0ea7c0d92..2af6b12085 100644
--- a/util/qemu-thread-common.h
+++ b/util/qemu-thread-common.h
@@ -13,7 +13,6 @@
 #ifndef QEMU_THREAD_COMMON_H
 #define QEMU_THREAD_COMMON_H
 
-#include "qemu/typedefs.h"
 #include "qemu/thread.h"
 #include "trace.h"
 
diff --git a/vl.c b/vl.c
index f5c8ef973c..8353d3c718 100644
--- a/vl.c
+++ b/vl.c
@@ -61,7 +61,7 @@ int main(int argc, char **argv)
 #include "hw/display/vga.h"
 #include "hw/bt.h"
 #include "sysemu/watchdog.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
 #include "hw/acpi/acpi.h"
 #include "hw/xen/xen.h"
 #include "hw/qdev.h"
@@ -1577,6 +1577,8 @@ static NotifierList suspend_notifiers =
     NOTIFIER_LIST_INITIALIZER(suspend_notifiers);
 static NotifierList wakeup_notifiers =
     NOTIFIER_LIST_INITIALIZER(wakeup_notifiers);
+static NotifierList shutdown_notifiers =
+    NOTIFIER_LIST_INITIALIZER(shutdown_notifiers);
 static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE);
 
 ShutdownCause qemu_shutdown_requested_get(void)
@@ -1828,6 +1830,12 @@ static void qemu_system_powerdown(void)
     notifier_list_notify(&powerdown_notifiers, NULL);
 }
 
+static void qemu_system_shutdown(ShutdownCause cause)
+{
+    qapi_event_send_shutdown(shutdown_caused_by_guest(cause), cause);
+    notifier_list_notify(&shutdown_notifiers, &cause);
+}
+
 void qemu_system_powerdown_request(void)
 {
     trace_qemu_system_powerdown_request();
@@ -1840,6 +1848,11 @@ void qemu_register_powerdown_notifier(Notifier *notifier)
     notifier_list_add(&powerdown_notifiers, notifier);
 }
 
+void qemu_register_shutdown_notifier(Notifier *notifier)
+{
+    notifier_list_add(&shutdown_notifiers, notifier);
+}
+
 void qemu_system_debug_request(void)
 {
     debug_requested = 1;
@@ -1867,7 +1880,7 @@ static bool main_loop_should_exit(void)
     request = qemu_shutdown_requested();
     if (request) {
         qemu_kill_report();
-        qapi_event_send_shutdown(shutdown_caused_by_guest(request), request);
+        qemu_system_shutdown(request);
         if (no_shutdown) {
             vm_stop(RUN_STATE_SHUTDOWN);
         } else {