summary refs log tree commit diff stats
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/9pfs/virtio-9p.c23
-rw-r--r--hw/Makefile.objs4
-rw-r--r--hw/ac97.c3
-rw-r--r--hw/apic-msidef.h30
-rw-r--r--hw/apic.c38
-rw-r--r--hw/apic.h6
-rw-r--r--hw/apic_common.c16
-rw-r--r--hw/apic_internal.h1
-rw-r--r--hw/arm-misc.h2
-rw-r--r--hw/arm/Makefile.objs3
-rw-r--r--hw/arm_boot.c46
-rw-r--r--hw/arm_gic.c2
-rw-r--r--hw/block-common.c64
-rw-r--r--hw/block-common.h79
-rw-r--r--hw/bt-l2cap.c11
-rw-r--r--hw/cadence_gem.c14
-rw-r--r--hw/cirrus_vga.c8
-rw-r--r--hw/dp8393x.c9
-rw-r--r--hw/e1000.c15
-rw-r--r--hw/eepro100.c28
-rw-r--r--hw/es1370.c3
-rw-r--r--hw/escc.c1
-rw-r--r--hw/esp-pci.c518
-rw-r--r--hw/esp.c319
-rw-r--r--hw/esp.h119
-rw-r--r--hw/etraxfs_eth.c10
-rw-r--r--hw/exynos4210.c37
-rw-r--r--hw/exynos4210.h3
-rw-r--r--hw/exynos4210_i2c.c334
-rw-r--r--hw/exynos4210_mct.c4
-rw-r--r--hw/exynos4210_pwm.c2
-rw-r--r--hw/exynos4210_rtc.c592
-rw-r--r--hw/exynos4_boards.c2
-rw-r--r--hw/fdc.c244
-rw-r--r--hw/fdc.h10
-rw-r--r--hw/hd-geometry.c157
-rw-r--r--hw/highbank.c2
-rw-r--r--hw/i386/Makefile.objs2
-rw-r--r--hw/ide.h4
-rw-r--r--hw/ide/ahci.c33
-rw-r--r--hw/ide/ahci.h3
-rw-r--r--hw/ide/atapi.c31
-rw-r--r--hw/ide/cmd646.c10
-rw-r--r--hw/ide/core.c50
-rw-r--r--hw/ide/ich.c6
-rw-r--r--hw/ide/internal.h8
-rw-r--r--hw/ide/macio.c4
-rw-r--r--hw/ide/piix.c7
-rw-r--r--hw/ide/qdev.c42
-rw-r--r--hw/ide/via.c7
-rw-r--r--hw/imx.h34
-rw-r--r--hw/imx_avic.c408
-rw-r--r--hw/imx_ccm.c321
-rw-r--r--hw/imx_serial.c467
-rw-r--r--hw/imx_timer.c689
-rw-r--r--hw/integratorcp.c2
-rw-r--r--hw/intel-hda.c3
-rw-r--r--hw/ioh3420.c8
-rw-r--r--hw/ivshmem.c81
-rw-r--r--hw/kzm.c154
-rw-r--r--hw/lan9118.c16
-rw-r--r--hw/lance.c4
-rw-r--r--hw/lsi53c895a.c55
-rw-r--r--hw/mcf5208.c2
-rw-r--r--hw/mcf_fec.c9
-rw-r--r--hw/megasas.c2209
-rw-r--r--hw/mfi.h1249
-rw-r--r--hw/milkymist-minimac2.c8
-rw-r--r--hw/mips_jazz.c2
-rw-r--r--hw/mips_mipssim.c2
-rw-r--r--hw/mips_r4k.c2
-rw-r--r--hw/mipsnet.c8
-rw-r--r--hw/msi.c17
-rw-r--r--hw/msi.h1
-rw-r--r--hw/msix.c290
-rw-r--r--hw/msix.h19
-rw-r--r--hw/musicpal.c8
-rw-r--r--hw/ne2000-isa.c4
-rw-r--r--hw/ne2000.c21
-rw-r--r--hw/ne2000.h4
-rw-r--r--hw/omap.h8
-rw-r--r--hw/opencores_eth.c10
-rw-r--r--hw/openrisc/Makefile.objs3
-rw-r--r--hw/openrisc_pic.c60
-rw-r--r--hw/openrisc_sim.c150
-rw-r--r--hw/openrisc_timer.c101
-rw-r--r--hw/pc.c99
-rw-r--r--hw/pc_piix.c36
-rw-r--r--hw/pci.c74
-rw-r--r--hw/pci.h67
-rw-r--r--hw/pci_bridge.c3
-rw-r--r--hw/pci_bridge.h2
-rw-r--r--hw/pci_bridge_dev.c13
-rw-r--r--hw/pci_ids.h4
-rw-r--r--hw/pci_internals.h3
-rw-r--r--hw/pcnet-pci.c9
-rw-r--r--hw/pcnet.c6
-rw-r--r--hw/pcnet.h6
-rw-r--r--hw/piix_pci.c20
-rw-r--r--hw/pl011.c4
-rw-r--r--hw/ppc/Makefile.objs2
-rw-r--r--hw/ppce500_spin.c2
-rw-r--r--hw/qdev-dma.h10
-rw-r--r--hw/qdev-monitor.c4
-rw-r--r--hw/qdev-properties.c323
-rw-r--r--hw/qdev.c7
-rw-r--r--hw/qdev.h26
-rw-r--r--hw/qxl.c138
-rw-r--r--hw/qxl.h8
-rw-r--r--hw/rtl8139.c39
-rw-r--r--hw/s390-virtio-bus.c1
-rw-r--r--hw/scsi-bus.c292
-rw-r--r--hw/scsi-defs.h19
-rw-r--r--hw/scsi-disk.c870
-rw-r--r--hw/scsi-generic.c13
-rw-r--r--hw/scsi.h15
-rw-r--r--hw/sh_serial.c6
-rw-r--r--hw/smc91c111.c8
-rw-r--r--hw/spapr.c3
-rw-r--r--hw/spapr.h18
-rw-r--r--hw/spapr_iommu.c246
-rw-r--r--hw/spapr_llan.c69
-rw-r--r--hw/spapr_pci.c17
-rw-r--r--hw/spapr_pci.h1
-rw-r--r--hw/spapr_vio.c281
-rw-r--r--hw/spapr_vio.h73
-rw-r--r--hw/spapr_vscsi.c26
-rw-r--r--hw/spapr_vty.c2
-rw-r--r--hw/stellaris_enet.c8
-rw-r--r--hw/sun4m.c18
-rw-r--r--hw/usb.h6
-rw-r--r--hw/usb/Makefile.objs1
-rw-r--r--hw/usb/bus.c13
-rw-r--r--hw/usb/core.c23
-rw-r--r--hw/usb/dev-network.c10
-rw-r--r--hw/usb/dev-storage.c21
-rw-r--r--hw/usb/dev-uas.c779
-rw-r--r--hw/usb/hcd-ehci.c246
-rw-r--r--hw/usb/hcd-ohci.c93
-rw-r--r--hw/usb/hcd-uhci.c21
-rw-r--r--hw/usb/host-linux.c94
-rw-r--r--hw/usb/libhw.c21
-rw-r--r--hw/usb/redirect.c2
-rw-r--r--hw/vexpress.c15
-rw-r--r--hw/vga-isa-mm.c5
-rw-r--r--hw/vga-isa.c8
-rw-r--r--hw/vga-pci.c8
-rw-r--r--hw/vga.c27
-rw-r--r--hw/vga_int.h10
-rw-r--r--hw/vhost.c4
-rw-r--r--hw/vhost_net.c26
-rw-r--r--hw/vhost_net.h2
-rw-r--r--hw/virtio-balloon.c4
-rw-r--r--hw/virtio-blk.c25
-rw-r--r--hw/virtio-blk.h2
-rw-r--r--hw/virtio-net.c26
-rw-r--r--hw/virtio-pci.c131
-rw-r--r--hw/virtio-pci.h1
-rw-r--r--hw/virtio-scsi.c113
-rw-r--r--hw/virtio-serial-bus.c10
-rw-r--r--hw/virtio.c49
-rw-r--r--hw/virtio.h5
-rw-r--r--hw/vmware_vga.c22
-rw-r--r--hw/watchdog.c2
-rw-r--r--hw/wdt_i6300esb.c4
-rw-r--r--hw/xen-host-pci-device.c396
-rw-r--r--hw/xen-host-pci-device.h55
-rw-r--r--hw/xen_backend.c6
-rw-r--r--hw/xen_backend.h1
-rw-r--r--hw/xen_common.h9
-rw-r--r--hw/xen_console.c5
-rw-r--r--hw/xen_devconfig.c10
-rw-r--r--hw/xen_disk.c6
-rw-r--r--hw/xen_nic.c16
-rw-r--r--hw/xen_platform.c8
-rw-r--r--hw/xen_pt.c849
-rw-r--r--hw/xen_pt.h301
-rw-r--r--hw/xen_pt_config_init.c1869
-rw-r--r--hw/xen_pt_msi.c620
-rw-r--r--hw/xenfb.c13
-rw-r--r--hw/xgmac.c8
-rw-r--r--hw/xilinx_axienet.c9
-rw-r--r--hw/xilinx_ethlite.c8
-rw-r--r--hw/xio3130_downstream.c8
-rw-r--r--hw/xio3130_upstream.c8
-rw-r--r--hw/xtensa_lx60.c8
-rw-r--r--hw/xtensa_sim.c5
187 files changed, 16252 insertions, 2081 deletions
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index c633fb9b7e..4b52540116 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -983,11 +983,16 @@ static void v9fs_attach(void *opaque)
     err += offset;
     trace_v9fs_attach_return(pdu->tag, pdu->id,
                              qid.type, qid.version, qid.path);
-    s->root_fid = fid;
-    /* disable migration */
-    error_set(&s->migration_blocker, QERR_VIRTFS_FEATURE_BLOCKS_MIGRATION,
-              s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
-    migrate_add_blocker(s->migration_blocker);
+    /*
+     * disable migration if we haven't done already.
+     * attach could get called multiple times for the same export.
+     */
+    if (!s->migration_blocker) {
+        s->root_fid = fid;
+        error_set(&s->migration_blocker, QERR_VIRTFS_FEATURE_BLOCKS_MIGRATION,
+                  s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
+        migrate_add_blocker(s->migration_blocker);
+    }
 out:
     put_fid(pdu, fidp);
 out_nofid:
@@ -1648,7 +1653,7 @@ out:
  * with qemu_iovec_destroy().
  */
 static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
-                                    uint64_t skip, size_t size,
+                                    size_t skip, size_t size,
                                     bool is_write)
 {
     QEMUIOVector elem;
@@ -1665,7 +1670,7 @@ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
 
     qemu_iovec_init_external(&elem, iov, niov);
     qemu_iovec_init(qiov, niov);
-    qemu_iovec_copy(qiov, &elem, skip, size);
+    qemu_iovec_concat(qiov, &elem, skip, size);
 }
 
 static void v9fs_read(void *opaque)
@@ -1715,7 +1720,7 @@ static void v9fs_read(void *opaque)
         qemu_iovec_init(&qiov, qiov_full.niov);
         do {
             qemu_iovec_reset(&qiov);
-            qemu_iovec_copy(&qiov, &qiov_full, count, qiov_full.size - count);
+            qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
             if (0) {
                 print_sg(qiov.iov, qiov.niov);
             }
@@ -1970,7 +1975,7 @@ static void v9fs_write(void *opaque)
     qemu_iovec_init(&qiov, qiov_full.niov);
     do {
         qemu_iovec_reset(&qiov);
-        qemu_iovec_copy(&qiov, &qiov_full, total, qiov_full.size - total);
+        qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
         if (0) {
             print_sg(qiov.iov, qiov.niov);
         }
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 3d7725934f..12cc141796 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -86,7 +86,9 @@ hw-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o
 
 # SCSI layer
 hw-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
+hw-obj-$(CONFIG_MEGASAS_SCSI_PCI) += megasas.o
 hw-obj-$(CONFIG_ESP) += esp.o
+hw-obj-$(CONFIG_ESP_PCI) += esp-pci.o
 
 hw-obj-y += sysbus.o isa-bus.o
 hw-obj-y += qdev-addr.o
@@ -137,7 +139,7 @@ common-obj-$(CONFIG_MAX111X) += max111x.o
 common-obj-$(CONFIG_DS1338) += ds1338.o
 common-obj-y += i2c.o smbus.o smbus_eeprom.o
 common-obj-y += eeprom93xx.o
-common-obj-y += scsi-disk.o cdrom.o
+common-obj-y += scsi-disk.o cdrom.o hd-geometry.o block-common.o
 common-obj-y += scsi-generic.o scsi-bus.o
 common-obj-y += hid.o
 common-obj-$(CONFIG_SSI) += ssi.o
diff --git a/hw/ac97.c b/hw/ac97.c
index e791b9d3e6..0f561fa5c1 100644
--- a/hw/ac97.c
+++ b/hw/ac97.c
@@ -1319,13 +1319,12 @@ static int ac97_initfn (PCIDevice *dev)
     return 0;
 }
 
-static int ac97_exitfn (PCIDevice *dev)
+static void ac97_exitfn (PCIDevice *dev)
 {
     AC97LinkState *s = DO_UPCAST (AC97LinkState, dev, dev);
 
     memory_region_destroy (&s->io_nam);
     memory_region_destroy (&s->io_nabm);
-    return 0;
 }
 
 int ac97_init (PCIBus *bus)
diff --git a/hw/apic-msidef.h b/hw/apic-msidef.h
new file mode 100644
index 0000000000..6e2eb71f2f
--- /dev/null
+++ b/hw/apic-msidef.h
@@ -0,0 +1,30 @@
+#ifndef HW_APIC_MSIDEF_H
+#define HW_APIC_MSIDEF_H
+
+/*
+ * Intel APIC constants: from include/asm/msidef.h
+ */
+
+/*
+ * Shifts for MSI data
+ */
+
+#define MSI_DATA_VECTOR_SHIFT           0
+#define  MSI_DATA_VECTOR_MASK           0x000000ff
+
+#define MSI_DATA_DELIVERY_MODE_SHIFT    8
+#define MSI_DATA_LEVEL_SHIFT            14
+#define MSI_DATA_TRIGGER_SHIFT          15
+
+/*
+ * Shift/mask fields for msi address
+ */
+
+#define MSI_ADDR_DEST_MODE_SHIFT        2
+
+#define MSI_ADDR_REDIRECTION_SHIFT      3
+
+#define MSI_ADDR_DEST_ID_SHIFT          12
+#define  MSI_ADDR_DEST_ID_MASK          0x00ffff0
+
+#endif /* HW_APIC_MSIDEF_H */
diff --git a/hw/apic.c b/hw/apic.c
index 5fbf01c278..385555eb43 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -16,6 +16,7 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>
  */
+#include "qemu-thread.h"
 #include "apic_internal.h"
 #include "apic.h"
 #include "ioapic.h"
@@ -23,19 +24,10 @@
 #include "host-utils.h"
 #include "trace.h"
 #include "pc.h"
+#include "apic-msidef.h"
 
 #define MAX_APIC_WORDS 8
 
-/* Intel APIC constants: from include/asm/msidef.h */
-#define MSI_DATA_VECTOR_SHIFT		0
-#define MSI_DATA_VECTOR_MASK		0x000000ff
-#define MSI_DATA_DELIVERY_MODE_SHIFT	8
-#define MSI_DATA_TRIGGER_SHIFT		15
-#define MSI_DATA_LEVEL_SHIFT		14
-#define MSI_ADDR_DEST_MODE_SHIFT	2
-#define MSI_ADDR_DEST_ID_SHIFT		12
-#define	MSI_ADDR_DEST_ID_MASK		0x00ffff0
-
 #define SYNC_FROM_VAPIC                 0x1
 #define SYNC_TO_VAPIC                   0x2
 #define SYNC_ISR_IRR_TO_VAPIC           0x4
@@ -370,11 +362,10 @@ static void apic_update_irq(APICCommonState *s)
     if (!(s->spurious_vec & APIC_SV_ENABLE)) {
         return;
     }
-    if (apic_irq_pending(s) > 0) {
+    if (!qemu_cpu_is_self(s->cpu_env)) {
+        cpu_interrupt(s->cpu_env, CPU_INTERRUPT_POLL);
+    } else if (apic_irq_pending(s) > 0) {
         cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD);
-    } else if (apic_accept_pic_intr(&s->busdev.qdev) &&
-               pic_get_output(isa_pic)) {
-        apic_deliver_pic_intr(&s->busdev.qdev, 1);
     }
 }
 
@@ -544,6 +535,15 @@ static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode,
     apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+static bool apic_check_pic(APICCommonState *s)
+{
+    if (!apic_accept_pic_intr(&s->busdev.qdev) || !pic_get_output(isa_pic)) {
+        return false;
+    }
+    apic_deliver_pic_intr(&s->busdev.qdev, 1);
+    return true;
+}
+
 int apic_get_interrupt(DeviceState *d)
 {
     APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
@@ -569,7 +569,12 @@ int apic_get_interrupt(DeviceState *d)
     reset_bit(s->irr, intno);
     set_bit(s->isr, intno);
     apic_sync_vapic(s, SYNC_TO_VAPIC);
+
+    /* re-inject if there is still a pending PIC interrupt */
+    apic_check_pic(s);
+
     apic_update_irq(s);
+
     return intno;
 }
 
@@ -809,8 +814,11 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
         {
             int n = index - 0x32;
             s->lvt[n] = val;
-            if (n == APIC_LVT_TIMER)
+            if (n == APIC_LVT_TIMER) {
                 apic_timer_update(s, qemu_get_clock_ns(vm_clock));
+            } else if (n == APIC_LVT_LINT0 && apic_check_pic(s)) {
+                apic_update_irq(s);
+            }
         }
         break;
     case 0x38:
diff --git a/hw/apic.h b/hw/apic.h
index 62179cebee..1d48e027c3 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -20,9 +20,13 @@ void apic_init_reset(DeviceState *s);
 void apic_sipi(DeviceState *s);
 void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip,
                                    TPRAccess access);
+void apic_poll_irq(DeviceState *d);
+void apic_designate_bsp(DeviceState *d);
 
 /* pc.c */
-int cpu_is_bsp(CPUX86State *env);
 DeviceState *cpu_get_current_apic(void);
 
+/* cpu.c */
+bool cpu_is_bsp(X86CPU *cpu);
+
 #endif
diff --git a/hw/apic_common.c b/hw/apic_common.c
index 60b82596e7..58e63b00da 100644
--- a/hw/apic_common.c
+++ b/hw/apic_common.c
@@ -43,8 +43,8 @@ uint64_t cpu_get_apic_base(DeviceState *d)
         trace_cpu_get_apic_base((uint64_t)s->apicbase);
         return s->apicbase;
     } else {
-        trace_cpu_get_apic_base(0);
-        return 0;
+        trace_cpu_get_apic_base(MSR_IA32_APICBASE_BSP);
+        return MSR_IA32_APICBASE_BSP;
     }
 }
 
@@ -201,13 +201,23 @@ void apic_init_reset(DeviceState *d)
     s->timer_expiry = -1;
 }
 
+void apic_designate_bsp(DeviceState *d)
+{
+    if (d == NULL) {
+        return;
+    }
+
+    APICCommonState *s = APIC_COMMON(d);
+    s->apicbase |= MSR_IA32_APICBASE_BSP;
+}
+
 static void apic_reset_common(DeviceState *d)
 {
     APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
     APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
     bool bsp;
 
-    bsp = cpu_is_bsp(s->cpu_env);
+    bsp = cpu_is_bsp(x86_env_get_cpu(s->cpu_env));
     s->apicbase = 0xfee00000 |
         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
 
diff --git a/hw/apic_internal.h b/hw/apic_internal.h
index 60a6a8bdae..4d8ff490ce 100644
--- a/hw/apic_internal.h
+++ b/hw/apic_internal.h
@@ -141,7 +141,6 @@ void apic_report_irq_delivered(int delivered);
 bool apic_next_timer(APICCommonState *s, int64_t current_time);
 void apic_enable_tpr_access_reporting(DeviceState *d, bool enable);
 void apic_enable_vapic(DeviceState *d, target_phys_addr_t paddr);
-void apic_poll_irq(DeviceState *d);
 
 void vapic_report_tpr_access(DeviceState *dev, void *cpu, target_ulong ip,
                              TPRAccess access);
diff --git a/hw/arm-misc.h b/hw/arm-misc.h
index 1f96229d3c..bdd8fecc99 100644
--- a/hw/arm-misc.h
+++ b/hw/arm-misc.h
@@ -25,7 +25,7 @@ qemu_irq *armv7m_init(MemoryRegion *address_space_mem,
 
 /* arm_boot.c */
 struct arm_boot_info {
-    int ram_size;
+    uint64_t ram_size;
     const char *kernel_filename;
     const char *kernel_cmdline;
     const char *initrd_filename;
diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index 88ff47d95e..c413780784 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -11,6 +11,7 @@ obj-y += realview_gic.o realview.o arm_sysctl.o arm11mpcore.o a9mpcore.o
 obj-y += exynos4210_gic.o exynos4210_combiner.o exynos4210.o
 obj-y += exynos4_boards.o exynos4210_uart.o exynos4210_pwm.o
 obj-y += exynos4210_pmu.o exynos4210_mct.o exynos4210_fimd.o
+obj-y += exynos4210_rtc.o exynos4210_i2c.o
 obj-y += arm_l2x0.o
 obj-y += arm_mptimer.o a15mpcore.o
 obj-y += armv7m.o armv7m_nvic.o stellaris.o pl022.o stellaris_enet.o
@@ -34,6 +35,8 @@ obj-y += framebuffer.o
 obj-y += vexpress.o
 obj-y += strongarm.o
 obj-y += collie.o
+obj-y += imx_serial.o imx_ccm.o imx_timer.o imx_avic.o
+obj-y += kzm.o
 obj-y += pl041.o lm4549.o
 obj-$(CONFIG_FDT) += ../device_tree.o
 
diff --git a/hw/arm_boot.c b/hw/arm_boot.c
index a1e6ddbc1c..a6e9143662 100644
--- a/hw/arm_boot.c
+++ b/hw/arm_boot.c
@@ -216,11 +216,12 @@ static void set_kernel_args_old(const struct arm_boot_info *info)
 static int load_dtb(target_phys_addr_t addr, const struct arm_boot_info *binfo)
 {
 #ifdef CONFIG_FDT
-    uint32_t mem_reg_property[] = { cpu_to_be32(binfo->loader_start),
-                                    cpu_to_be32(binfo->ram_size) };
+    uint32_t *mem_reg_property;
+    uint32_t mem_reg_propsize;
     void *fdt = NULL;
     char *filename;
     int size, rc;
+    uint32_t acells, scells, hival;
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, binfo->dtb_filename);
     if (!filename) {
@@ -236,8 +237,36 @@ static int load_dtb(target_phys_addr_t addr, const struct arm_boot_info *binfo)
     }
     g_free(filename);
 
+    acells = qemu_devtree_getprop_cell(fdt, "/", "#address-cells");
+    scells = qemu_devtree_getprop_cell(fdt, "/", "#size-cells");
+    if (acells == 0 || scells == 0) {
+        fprintf(stderr, "dtb file invalid (#address-cells or #size-cells 0)\n");
+        return -1;
+    }
+
+    mem_reg_propsize = acells + scells;
+    mem_reg_property = g_new0(uint32_t, mem_reg_propsize);
+    mem_reg_property[acells - 1] = cpu_to_be32(binfo->loader_start);
+    hival = cpu_to_be32(binfo->loader_start >> 32);
+    if (acells > 1) {
+        mem_reg_property[acells - 2] = hival;
+    } else if (hival != 0) {
+        fprintf(stderr, "qemu: dtb file not compatible with "
+                "RAM start address > 4GB\n");
+        exit(1);
+    }
+    mem_reg_property[acells + scells - 1] = cpu_to_be32(binfo->ram_size);
+    hival = cpu_to_be32(binfo->ram_size >> 32);
+    if (scells > 1) {
+        mem_reg_property[acells + scells - 2] = hival;
+    } else if (hival != 0) {
+        fprintf(stderr, "qemu: dtb file not compatible with "
+                "RAM size > 4GB\n");
+        exit(1);
+    }
+
     rc = qemu_devtree_setprop(fdt, "/memory", "reg", mem_reg_property,
-                               sizeof(mem_reg_property));
+                              mem_reg_propsize * sizeof(uint32_t));
     if (rc < 0) {
         fprintf(stderr, "couldn't set /memory/reg\n");
     }
@@ -357,7 +386,7 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
     if (kernel_size < 0) {
         entry = info->loader_start + KERNEL_LOAD_ADDR;
         kernel_size = load_image_targphys(info->kernel_filename, entry,
-                                          ram_size - KERNEL_LOAD_ADDR);
+                                          info->ram_size - KERNEL_LOAD_ADDR);
         is_linux = 1;
     }
     if (kernel_size < 0) {
@@ -371,7 +400,8 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
             initrd_size = load_image_targphys(info->initrd_filename,
                                               info->loader_start
                                               + INITRD_LOAD_ADDR,
-                                              ram_size - INITRD_LOAD_ADDR);
+                                              info->ram_size
+                                              - INITRD_LOAD_ADDR);
             if (initrd_size < 0) {
                 fprintf(stderr, "qemu: could not load initrd '%s'\n",
                         info->initrd_filename);
@@ -398,6 +428,12 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
             bootloader[5] = dtb_start;
         } else {
             bootloader[5] = info->loader_start + KERNEL_ARGS_ADDR;
+            if (info->ram_size >= (1ULL << 32)) {
+                fprintf(stderr, "qemu: RAM size must be less than 4GB to boot"
+                        " Linux kernel using ATAGS (try passing a device tree"
+                        " using -dtb)\n");
+                exit(1);
+            }
         }
         bootloader[6] = entry;
         for (n = 0; n < sizeof(bootloader) / 4; n++) {
diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index ec22322930..186ac66f00 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -25,7 +25,7 @@
 
 #ifdef DEBUG_GIC
 #define DPRINTF(fmt, ...) \
-do { printf("arm_gic: " fmt , ## __VA_ARGS__); } while (0)
+do { fprintf(stderr, "arm_gic: " fmt , ## __VA_ARGS__); } while (0)
 #else
 #define DPRINTF(fmt, ...) do {} while(0)
 #endif
diff --git a/hw/block-common.c b/hw/block-common.c
new file mode 100644
index 0000000000..f0196d78dc
--- /dev/null
+++ b/hw/block-common.c
@@ -0,0 +1,64 @@
+/*
+ * Common code for block device models
+ *
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "blockdev.h"
+#include "hw/block-common.h"
+#include "qemu-error.h"
+
+void blkconf_serial(BlockConf *conf, char **serial)
+{
+    DriveInfo *dinfo;
+
+    if (!*serial) {
+        /* try to fall back to value set with legacy -drive serial=... */
+        dinfo = drive_get_by_blockdev(conf->bs);
+        if (dinfo->serial) {
+            *serial = g_strdup(dinfo->serial);
+        }
+    }
+}
+
+int blkconf_geometry(BlockConf *conf, int *ptrans,
+                     unsigned cyls_max, unsigned heads_max, unsigned secs_max)
+{
+    DriveInfo *dinfo;
+
+    if (!conf->cyls && !conf->heads && !conf->secs) {
+        /* try to fall back to value set with legacy -drive cyls=... */
+        dinfo = drive_get_by_blockdev(conf->bs);
+        conf->cyls  = dinfo->cyls;
+        conf->heads = dinfo->heads;
+        conf->secs  = dinfo->secs;
+        if (ptrans) {
+            *ptrans = dinfo->trans;
+        }
+    }
+    if (!conf->cyls && !conf->heads && !conf->secs) {
+        hd_geometry_guess(conf->bs,
+                          &conf->cyls, &conf->heads, &conf->secs,
+                          ptrans);
+    } else if (ptrans && *ptrans == BIOS_ATA_TRANSLATION_AUTO) {
+        *ptrans = hd_bios_chs_auto_trans(conf->cyls, conf->heads, conf->secs);
+    }
+    if (conf->cyls || conf->heads || conf->secs) {
+        if (conf->cyls < 1 || conf->cyls > cyls_max) {
+            error_report("cyls must be between 1 and %u", cyls_max);
+            return -1;
+        }
+        if (conf->heads < 1 || conf->heads > heads_max) {
+            error_report("heads must be between 1 and %u", heads_max);
+            return -1;
+        }
+        if (conf->secs < 1 || conf->secs > secs_max) {
+            error_report("secs must be between 1 and %u", secs_max);
+            return -1;
+        }
+    }
+    return 0;
+}
diff --git a/hw/block-common.h b/hw/block-common.h
new file mode 100644
index 0000000000..bb808f7f56
--- /dev/null
+++ b/hw/block-common.h
@@ -0,0 +1,79 @@
+/*
+ * Common code for block device models
+ *
+ * Copyright (C) 2012 Red Hat, Inc.
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_BLOCK_COMMON_H
+#define HW_BLOCK_COMMON_H
+
+#include "qemu-common.h"
+
+/* Configuration */
+
+typedef struct BlockConf {
+    BlockDriverState *bs;
+    uint16_t physical_block_size;
+    uint16_t logical_block_size;
+    uint16_t min_io_size;
+    uint32_t opt_io_size;
+    int32_t bootindex;
+    uint32_t discard_granularity;
+    /* geometry, not all devices use this */
+    uint32_t cyls, heads, secs;
+} BlockConf;
+
+static inline unsigned int get_physical_block_exp(BlockConf *conf)
+{
+    unsigned int exp = 0, size;
+
+    for (size = conf->physical_block_size;
+        size > conf->logical_block_size;
+        size >>= 1) {
+        exp++;
+    }
+
+    return exp;
+}
+
+#define DEFINE_BLOCK_PROPERTIES(_state, _conf)                          \
+    DEFINE_PROP_DRIVE("drive", _state, _conf.bs),                       \
+    DEFINE_PROP_BLOCKSIZE("logical_block_size", _state,                 \
+                          _conf.logical_block_size, 512),               \
+    DEFINE_PROP_BLOCKSIZE("physical_block_size", _state,                \
+                          _conf.physical_block_size, 512),              \
+    DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),  \
+    DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0),    \
+    DEFINE_PROP_INT32("bootindex", _state, _conf.bootindex, -1),        \
+    DEFINE_PROP_UINT32("discard_granularity", _state, \
+                       _conf.discard_granularity, 0)
+
+#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)      \
+    DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
+    DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0), \
+    DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0)
+
+/* Configuration helpers */
+
+void blkconf_serial(BlockConf *conf, char **serial);
+int blkconf_geometry(BlockConf *conf, int *trans,
+                     unsigned cyls_max, unsigned heads_max, unsigned secs_max);
+
+/* Hard disk geometry */
+
+#define BIOS_ATA_TRANSLATION_AUTO   0
+#define BIOS_ATA_TRANSLATION_NONE   1
+#define BIOS_ATA_TRANSLATION_LBA    2
+#define BIOS_ATA_TRANSLATION_LARGE  3
+#define BIOS_ATA_TRANSLATION_RECHS  4
+
+void hd_geometry_guess(BlockDriverState *bs,
+                       uint32_t *pcyls, uint32_t *pheads, uint32_t *psecs,
+                       int *ptrans);
+int hd_bios_chs_auto_trans(uint32_t cyls, uint32_t heads, uint32_t secs);
+
+#endif
diff --git a/hw/bt-l2cap.c b/hw/bt-l2cap.c
index 2ccba6071c..cb43ee7733 100644
--- a/hw/bt-l2cap.c
+++ b/hw/bt-l2cap.c
@@ -1000,7 +1000,8 @@ static void l2cap_iframe_in(struct l2cap_chan_s *ch, uint16_t cid,
             /* TODO: Signal an error? */
             return;
         }
-        return l2cap_sframe_in(ch, le16_to_cpup((void *) hdr->data));
+        l2cap_sframe_in(ch, le16_to_cpup((void *) hdr->data));
+        return;
     }
 
     switch (hdr->data[1] >> 6) {	/* SAR */
@@ -1010,7 +1011,8 @@ static void l2cap_iframe_in(struct l2cap_chan_s *ch, uint16_t cid,
         if (len - 4 > ch->mps)
             goto len_error;
 
-        return ch->params.sdu_in(ch->params.opaque, hdr->data + 2, len - 4);
+        ch->params.sdu_in(ch->params.opaque, hdr->data + 2, len - 4);
+        break;
 
     case L2CAP_SAR_START:
         if (ch->len_total || len < 6)
@@ -1033,7 +1035,8 @@ static void l2cap_iframe_in(struct l2cap_chan_s *ch, uint16_t cid,
             goto len_error;
 
         memcpy(ch->sdu + ch->len_cur, hdr->data + 2, len - 4);
-        return ch->params.sdu_in(ch->params.opaque, ch->sdu, ch->len_total);
+        ch->params.sdu_in(ch->params.opaque, ch->sdu, ch->len_total);
+        break;
 
     case L2CAP_SAR_CONT:
         if (!ch->len_total || ch->len_cur + len - 4 >= ch->len_total)
@@ -1136,7 +1139,7 @@ static void l2cap_bframe_submit(struct bt_l2cap_conn_params_s *parms)
 {
     struct l2cap_chan_s *chan = (struct l2cap_chan_s *) parms;
 
-    return l2cap_pdu_submit(chan->l2cap);
+    l2cap_pdu_submit(chan->l2cap);
 }
 
 #if 0
diff --git a/hw/cadence_gem.c b/hw/cadence_gem.c
index dbde3920d0..967f62513e 100644
--- a/hw/cadence_gem.c
+++ b/hw/cadence_gem.c
@@ -339,8 +339,8 @@ typedef struct {
     uint8_t phy_loop; /* Are we in phy loopback? */
 
     /* The current DMA descriptor pointers */
-    target_phys_addr_t rx_desc_addr;
-    target_phys_addr_t tx_desc_addr;
+    uint32_t rx_desc_addr;
+    uint32_t tx_desc_addr;
 
 } GemState;
 
@@ -405,7 +405,7 @@ static void phy_update_link(GemState *s)
     }
 }
 
-static int gem_can_receive(VLANClientState *nc)
+static int gem_can_receive(NetClientState *nc)
 {
     GemState *s;
 
@@ -602,7 +602,7 @@ static int gem_mac_address_filter(GemState *s, const uint8_t *packet)
  * gem_receive:
  * Fit a packet handed to us by QEMU into the receive descriptor ring.
  */
-static ssize_t gem_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     unsigned    desc[2];
     target_phys_addr_t packet_desc_addr, last_desc_addr;
@@ -1146,7 +1146,7 @@ static const MemoryRegionOps gem_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
-static void gem_cleanup(VLANClientState *nc)
+static void gem_cleanup(NetClientState *nc)
 {
     GemState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -1154,14 +1154,14 @@ static void gem_cleanup(VLANClientState *nc)
     s->nic = NULL;
 }
 
-static void gem_set_link(VLANClientState *nc)
+static void gem_set_link(NetClientState *nc)
 {
     DB_PRINT("\n");
     phy_update_link(DO_UPCAST(NICState, nc, nc)->opaque);
 }
 
 static NetClientInfo net_gem_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = gem_can_receive,
     .receive = gem_receive,
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index afedaa43d3..623dd688d9 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -43,6 +43,8 @@
 //#define DEBUG_CIRRUS
 //#define DEBUG_BITBLT
 
+#define VGA_RAM_SIZE (8192 * 1024)
+
 /***************************************
  *
  *  definitions
@@ -2891,7 +2893,8 @@ static int vga_initfn(ISADevice *dev)
     ISACirrusVGAState *d = DO_UPCAST(ISACirrusVGAState, dev, dev);
     VGACommonState *s = &d->cirrus_vga.vga;
 
-    vga_common_init(s, VGA_RAM_SIZE);
+    s->vram_size_mb = VGA_RAM_SIZE >> 20;
+    vga_common_init(s);
     cirrus_init_common(&d->cirrus_vga, CIRRUS_ID_CLGD5430, 0,
                        isa_address_space(dev));
     s->ds = graphic_console_init(s->update, s->invalidate,
@@ -2933,7 +2936,8 @@ static int pci_cirrus_vga_initfn(PCIDevice *dev)
      int16_t device_id = pc->device_id;
 
      /* setup VGA */
-     vga_common_init(&s->vga, VGA_RAM_SIZE);
+     s->vga.vram_size_mb = VGA_RAM_SIZE >> 20;
+     vga_common_init(&s->vga);
      cirrus_init_common(s, device_id, 1, pci_address_space(dev));
      s->vga.ds = graphic_console_init(s->vga.update, s->vga.invalidate,
                                       s->vga.screen_dump, s->vga.text_update,
diff --git a/hw/dp8393x.c b/hw/dp8393x.c
index 017d0742ae..4fa6eccba4 100644
--- a/hw/dp8393x.c
+++ b/hw/dp8393x.c
@@ -673,7 +673,7 @@ static const MemoryRegionOps dp8393x_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int nic_can_receive(VLANClientState *nc)
+static int nic_can_receive(NetClientState *nc)
 {
     dp8393xState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -722,7 +722,7 @@ static int receive_filter(dp8393xState *s, const uint8_t * buf, int size)
     return -1;
 }
 
-static ssize_t nic_receive(VLANClientState *nc, const uint8_t * buf, size_t size)
+static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
 {
     dp8393xState *s = DO_UPCAST(NICState, nc, nc)->opaque;
     uint16_t data[10];
@@ -858,7 +858,7 @@ static void nic_reset(void *opaque)
     dp8393x_update_irq(s);
 }
 
-static void nic_cleanup(VLANClientState *nc)
+static void nic_cleanup(NetClientState *nc)
 {
     dp8393xState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -872,7 +872,7 @@ static void nic_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_dp83932_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = nic_can_receive,
     .receive = nic_receive,
@@ -899,7 +899,6 @@ void dp83932_init(NICInfo *nd, target_phys_addr_t base, int it_shift,
     s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */
 
     s->conf.macaddr = nd->macaddr;
-    s->conf.vlan = nd->vlan;
     s->conf.peer = nd->netdev;
 
     s->nic = qemu_new_nic(&net_dp83932_info, &s->conf, nd->model, nd->name, s);
diff --git a/hw/e1000.c b/hw/e1000.c
index 4573f1301e..ae8a6c5523 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -720,7 +720,7 @@ receive_filter(E1000State *s, const uint8_t *buf, int size)
 }
 
 static void
-e1000_set_link_status(VLANClientState *nc)
+e1000_set_link_status(NetClientState *nc)
 {
     E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
     uint32_t old_status = s->mac_reg[STATUS];
@@ -754,7 +754,7 @@ static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
 }
 
 static int
-e1000_can_receive(VLANClientState *nc)
+e1000_can_receive(NetClientState *nc)
 {
     E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -770,7 +770,7 @@ static uint64_t rx_desc_base(E1000State *s)
 }
 
 static ssize_t
-e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
     struct e1000_rx_desc desc;
@@ -1185,14 +1185,14 @@ e1000_mmio_setup(E1000State *d)
 }
 
 static void
-e1000_cleanup(VLANClientState *nc)
+e1000_cleanup(NetClientState *nc)
 {
     E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
     s->nic = NULL;
 }
 
-static int
+static void
 pci_e1000_uninit(PCIDevice *dev)
 {
     E1000State *d = DO_UPCAST(E1000State, dev, dev);
@@ -1201,12 +1201,11 @@ pci_e1000_uninit(PCIDevice *dev)
     qemu_free_timer(d->autoneg_timer);
     memory_region_destroy(&d->mmio);
     memory_region_destroy(&d->io);
-    qemu_del_vlan_client(&d->nic->nc);
-    return 0;
+    qemu_del_net_client(&d->nic->nc);
 }
 
 static NetClientInfo net_e1000_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = e1000_can_receive,
     .receive = e1000_receive,
diff --git a/hw/eepro100.c b/hw/eepro100.c
index 6279ae36ec..50d117e35e 100644
--- a/hw/eepro100.c
+++ b/hw/eepro100.c
@@ -1596,10 +1596,17 @@ static void eepro100_write(void *opaque, target_phys_addr_t addr,
     EEPRO100State *s = opaque;
 
     switch (size) {
-    case 1: return eepro100_write1(s, addr, data);
-    case 2: return eepro100_write2(s, addr, data);
-    case 4: return eepro100_write4(s, addr, data);
-    default: abort();
+    case 1:
+        eepro100_write1(s, addr, data);
+        break;
+    case 2:
+        eepro100_write2(s, addr, data);
+        break;
+    case 4:
+        eepro100_write4(s, addr, data);
+        break;
+    default:
+        abort();
     }
 }
 
@@ -1609,7 +1616,7 @@ static const MemoryRegionOps eepro100_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
-static int nic_can_receive(VLANClientState *nc)
+static int nic_can_receive(NetClientState *nc)
 {
     EEPRO100State *s = DO_UPCAST(NICState, nc, nc)->opaque;
     TRACE(RXTX, logout("%p\n", s));
@@ -1619,7 +1626,7 @@ static int nic_can_receive(VLANClientState *nc)
 #endif
 }
 
-static ssize_t nic_receive(VLANClientState *nc, const uint8_t * buf, size_t size)
+static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
 {
     /* TODO:
      * - Magic packets should set bit 30 in power management driver register.
@@ -1824,14 +1831,14 @@ static const VMStateDescription vmstate_eepro100 = {
     }
 };
 
-static void nic_cleanup(VLANClientState *nc)
+static void nic_cleanup(NetClientState *nc)
 {
     EEPRO100State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
     s->nic = NULL;
 }
 
-static int pci_nic_uninit(PCIDevice *pci_dev)
+static void pci_nic_uninit(PCIDevice *pci_dev)
 {
     EEPRO100State *s = DO_UPCAST(EEPRO100State, dev, pci_dev);
 
@@ -1840,12 +1847,11 @@ static int pci_nic_uninit(PCIDevice *pci_dev)
     memory_region_destroy(&s->flash_bar);
     vmstate_unregister(&pci_dev->qdev, s->vmstate, s);
     eeprom93xx_free(&pci_dev->qdev, s->eeprom);
-    qemu_del_vlan_client(&s->nic->nc);
-    return 0;
+    qemu_del_net_client(&s->nic->nc);
 }
 
 static NetClientInfo net_eepro100_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = nic_can_receive,
     .receive = nic_receive,
diff --git a/hw/es1370.c b/hw/es1370.c
index 573f747362..e34234c350 100644
--- a/hw/es1370.c
+++ b/hw/es1370.c
@@ -1018,12 +1018,11 @@ static int es1370_initfn (PCIDevice *dev)
     return 0;
 }
 
-static int es1370_exitfn (PCIDevice *dev)
+static void es1370_exitfn (PCIDevice *dev)
 {
     ES1370State *s = DO_UPCAST (ES1370State, dev, dev);
 
     memory_region_destroy (&s->io);
-    return 0;
 }
 
 int es1370_init (PCIBus *bus)
diff --git a/hw/escc.c b/hw/escc.c
index 4d8a8e8886..e1f5e73ba2 100644
--- a/hw/escc.c
+++ b/hw/escc.c
@@ -905,7 +905,6 @@ static Property escc_properties[] = {
     DEFINE_PROP_UINT32("frequency", SerialState, frequency,   0),
     DEFINE_PROP_UINT32("it_shift",  SerialState, it_shift,    0),
     DEFINE_PROP_UINT32("disabled",  SerialState, disabled,    0),
-    DEFINE_PROP_UINT32("disabled",  SerialState, disabled,    0),
     DEFINE_PROP_UINT32("chnBtype",  SerialState, chn[0].type, 0),
     DEFINE_PROP_UINT32("chnAtype",  SerialState, chn[1].type, 0),
     DEFINE_PROP_CHR("chrB", SerialState, chn[0].chr),
diff --git a/hw/esp-pci.c b/hw/esp-pci.c
new file mode 100644
index 0000000000..170e007be9
--- /dev/null
+++ b/hw/esp-pci.c
@@ -0,0 +1,518 @@
+/*
+ * QEMU ESP/NCR53C9x emulation
+ *
+ * Copyright (c) 2005-2006 Fabrice Bellard
+ * Copyright (c) 2012 Herve Poussineau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "pci.h"
+#include "eeprom93xx.h"
+#include "esp.h"
+#include "trace.h"
+#include "qemu-log.h"
+
+#define TYPE_AM53C974_DEVICE "am53c974"
+
+#define DMA_CMD   0x0
+#define DMA_STC   0x1
+#define DMA_SPA   0x2
+#define DMA_WBC   0x3
+#define DMA_WAC   0x4
+#define DMA_STAT  0x5
+#define DMA_SMDLA 0x6
+#define DMA_WMAC  0x7
+
+#define DMA_CMD_MASK   0x03
+#define DMA_CMD_DIAG   0x04
+#define DMA_CMD_MDL    0x10
+#define DMA_CMD_INTE_P 0x20
+#define DMA_CMD_INTE_D 0x40
+#define DMA_CMD_DIR    0x80
+
+#define DMA_STAT_PWDN    0x01
+#define DMA_STAT_ERROR   0x02
+#define DMA_STAT_ABORT   0x04
+#define DMA_STAT_DONE    0x08
+#define DMA_STAT_SCSIINT 0x10
+#define DMA_STAT_BCMBLT  0x20
+
+#define SBAC_STATUS 0x1000
+
+typedef struct PCIESPState {
+    PCIDevice dev;
+    MemoryRegion io;
+    uint32_t dma_regs[8];
+    uint32_t sbac;
+    ESPState esp;
+} PCIESPState;
+
+static void esp_pci_handle_idle(PCIESPState *pci, uint32_t val)
+{
+    trace_esp_pci_dma_idle(val);
+    esp_dma_enable(&pci->esp, 0, 0);
+}
+
+static void esp_pci_handle_blast(PCIESPState *pci, uint32_t val)
+{
+    trace_esp_pci_dma_blast(val);
+    qemu_log_mask(LOG_UNIMP, "am53c974: cmd BLAST not implemented\n");
+}
+
+static void esp_pci_handle_abort(PCIESPState *pci, uint32_t val)
+{
+    trace_esp_pci_dma_abort(val);
+    if (pci->esp.current_req) {
+        scsi_req_cancel(pci->esp.current_req);
+    }
+}
+
+static void esp_pci_handle_start(PCIESPState *pci, uint32_t val)
+{
+    trace_esp_pci_dma_start(val);
+
+    pci->dma_regs[DMA_WBC] = pci->dma_regs[DMA_STC];
+    pci->dma_regs[DMA_WAC] = pci->dma_regs[DMA_SPA];
+    pci->dma_regs[DMA_WMAC] = pci->dma_regs[DMA_SMDLA];
+
+    pci->dma_regs[DMA_STAT] &= ~(DMA_STAT_BCMBLT | DMA_STAT_SCSIINT
+                               | DMA_STAT_DONE | DMA_STAT_ABORT
+                               | DMA_STAT_ERROR | DMA_STAT_PWDN);
+
+    esp_dma_enable(&pci->esp, 0, 1);
+}
+
+static void esp_pci_dma_write(PCIESPState *pci, uint32_t saddr, uint32_t val)
+{
+    trace_esp_pci_dma_write(saddr, pci->dma_regs[saddr], val);
+    switch (saddr) {
+    case DMA_CMD:
+        pci->dma_regs[saddr] = val;
+        switch (val & DMA_CMD_MASK) {
+        case 0x0: /* IDLE */
+            esp_pci_handle_idle(pci, val);
+            break;
+        case 0x1: /* BLAST */
+            esp_pci_handle_blast(pci, val);
+            break;
+        case 0x2: /* ABORT */
+            esp_pci_handle_abort(pci, val);
+            break;
+        case 0x3: /* START */
+            esp_pci_handle_start(pci, val);
+            break;
+        default: /* can't happen */
+            abort();
+        }
+        break;
+    case DMA_STC:
+    case DMA_SPA:
+    case DMA_SMDLA:
+        pci->dma_regs[saddr] = val;
+        break;
+    case DMA_STAT:
+        if (!(pci->sbac & SBAC_STATUS)) {
+            /* clear some bits on write */
+            uint32_t mask = DMA_STAT_ERROR | DMA_STAT_ABORT | DMA_STAT_DONE;
+            pci->dma_regs[DMA_STAT] &= ~(val & mask);
+        }
+        break;
+    default:
+        trace_esp_pci_error_invalid_write_dma(val, saddr);
+        return;
+    }
+}
+
+static uint32_t esp_pci_dma_read(PCIESPState *pci, uint32_t saddr)
+{
+    uint32_t val;
+
+    val = pci->dma_regs[saddr];
+    if (saddr == DMA_STAT) {
+        if (pci->esp.rregs[ESP_RSTAT] & STAT_INT) {
+            val |= DMA_STAT_SCSIINT;
+        }
+        if (pci->sbac & SBAC_STATUS) {
+            pci->dma_regs[DMA_STAT] &= ~(DMA_STAT_ERROR | DMA_STAT_ABORT |
+                                         DMA_STAT_DONE);
+        }
+    }
+
+    trace_esp_pci_dma_read(saddr, val);
+    return val;
+}
+
+static void esp_pci_io_write(void *opaque, target_phys_addr_t addr,
+                             uint64_t val, unsigned int size)
+{
+    PCIESPState *pci = opaque;
+
+    if (size < 4 || addr & 3) {
+        /* need to upgrade request: we only support 4-bytes accesses */
+        uint32_t current = 0, mask;
+        int shift;
+
+        if (addr < 0x40) {
+            current = pci->esp.wregs[addr >> 2];
+        } else if (addr < 0x60) {
+            current = pci->dma_regs[(addr - 0x40) >> 2];
+        } else if (addr < 0x74) {
+            current = pci->sbac;
+        }
+
+        shift = (4 - size) * 8;
+        mask = (~(uint32_t)0 << shift) >> shift;
+
+        shift = ((4 - (addr & 3)) & 3) * 8;
+        val <<= shift;
+        val |= current & ~(mask << shift);
+        addr &= ~3;
+        size = 4;
+    }
+
+    if (addr < 0x40) {
+        /* SCSI core reg */
+        esp_reg_write(&pci->esp, addr >> 2, val);
+    } else if (addr < 0x60) {
+        /* PCI DMA CCB */
+        esp_pci_dma_write(pci, (addr - 0x40) >> 2, val);
+    } else if (addr == 0x70) {
+        /* DMA SCSI Bus and control */
+        trace_esp_pci_sbac_write(pci->sbac, val);
+        pci->sbac = val;
+    } else {
+        trace_esp_pci_error_invalid_write((int)addr);
+    }
+}
+
+static uint64_t esp_pci_io_read(void *opaque, target_phys_addr_t addr,
+                                unsigned int size)
+{
+    PCIESPState *pci = opaque;
+    uint32_t ret;
+
+    if (addr < 0x40) {
+        /* SCSI core reg */
+        ret = esp_reg_read(&pci->esp, addr >> 2);
+    } else if (addr < 0x60) {
+        /* PCI DMA CCB */
+        ret = esp_pci_dma_read(pci, (addr - 0x40) >> 2);
+    } else if (addr == 0x70) {
+        /* DMA SCSI Bus and control */
+        trace_esp_pci_sbac_read(pci->sbac);
+        ret = pci->sbac;
+    } else {
+        /* Invalid region */
+        trace_esp_pci_error_invalid_read((int)addr);
+        ret = 0;
+    }
+
+    /* give only requested data */
+    ret >>= (addr & 3) * 8;
+    ret &= ~(~(uint64_t)0 << (8 * size));
+
+    return ret;
+}
+
+static void esp_pci_dma_memory_rw(PCIESPState *pci, uint8_t *buf, int len,
+                                  DMADirection dir)
+{
+    dma_addr_t addr;
+    DMADirection expected_dir;
+
+    if (pci->dma_regs[DMA_CMD] & DMA_CMD_DIR) {
+        expected_dir = DMA_DIRECTION_FROM_DEVICE;
+    } else {
+        expected_dir = DMA_DIRECTION_TO_DEVICE;
+    }
+
+    if (dir != expected_dir) {
+        trace_esp_pci_error_invalid_dma_direction();
+        return;
+    }
+
+    if (pci->dma_regs[DMA_STAT] & DMA_CMD_MDL) {
+        qemu_log_mask(LOG_UNIMP, "am53c974: MDL transfer not implemented\n");
+    }
+
+    addr = pci->dma_regs[DMA_SPA];
+    if (pci->dma_regs[DMA_WBC] < len) {
+        len = pci->dma_regs[DMA_WBC];
+    }
+
+    pci_dma_rw(&pci->dev, addr, buf, len, dir);
+
+    /* update status registers */
+    pci->dma_regs[DMA_WBC] -= len;
+    pci->dma_regs[DMA_WAC] += len;
+}
+
+static void esp_pci_dma_memory_read(void *opaque, uint8_t *buf, int len)
+{
+    PCIESPState *pci = opaque;
+    esp_pci_dma_memory_rw(pci, buf, len, DMA_DIRECTION_TO_DEVICE);
+}
+
+static void esp_pci_dma_memory_write(void *opaque, uint8_t *buf, int len)
+{
+    PCIESPState *pci = opaque;
+    esp_pci_dma_memory_rw(pci, buf, len, DMA_DIRECTION_FROM_DEVICE);
+}
+
+static const MemoryRegionOps esp_pci_io_ops = {
+    .read = esp_pci_io_read,
+    .write = esp_pci_io_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+};
+
+static void esp_pci_hard_reset(DeviceState *dev)
+{
+    PCIESPState *pci = DO_UPCAST(PCIESPState, dev.qdev, dev);
+    esp_hard_reset(&pci->esp);
+    pci->dma_regs[DMA_CMD] &= ~(DMA_CMD_DIR | DMA_CMD_INTE_D | DMA_CMD_INTE_P
+                              | DMA_CMD_MDL | DMA_CMD_DIAG | DMA_CMD_MASK);
+    pci->dma_regs[DMA_WBC] &= ~0xffff;
+    pci->dma_regs[DMA_WAC] = 0xffffffff;
+    pci->dma_regs[DMA_STAT] &= ~(DMA_STAT_BCMBLT | DMA_STAT_SCSIINT
+                               | DMA_STAT_DONE | DMA_STAT_ABORT
+                               | DMA_STAT_ERROR);
+    pci->dma_regs[DMA_WMAC] = 0xfffffffd;
+}
+
+static const VMStateDescription vmstate_esp_pci_scsi = {
+    .name = "pciespscsi",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .minimum_version_id_old = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(dev, PCIESPState),
+        VMSTATE_BUFFER_UNSAFE(dma_regs, PCIESPState, 0, 8 * sizeof(uint32_t)),
+        VMSTATE_STRUCT(esp, PCIESPState, 0, vmstate_esp, ESPState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void esp_pci_command_complete(SCSIRequest *req, uint32_t status,
+                                     size_t resid)
+{
+    ESPState *s = req->hba_private;
+    PCIESPState *pci = container_of(s, PCIESPState, esp);
+
+    esp_command_complete(req, status, resid);
+    pci->dma_regs[DMA_WBC] = 0;
+    pci->dma_regs[DMA_STAT] |= DMA_STAT_DONE;
+}
+
+static const struct SCSIBusInfo esp_pci_scsi_info = {
+    .tcq = false,
+    .max_target = ESP_MAX_DEVS,
+    .max_lun = 7,
+
+    .transfer_data = esp_transfer_data,
+    .complete = esp_pci_command_complete,
+    .cancel = esp_request_cancelled,
+};
+
+static int esp_pci_scsi_init(PCIDevice *dev)
+{
+    PCIESPState *pci = DO_UPCAST(PCIESPState, dev, dev);
+    ESPState *s = &pci->esp;
+    uint8_t *pci_conf;
+
+    pci_conf = pci->dev.config;
+
+    /* Interrupt pin A */
+    pci_conf[PCI_INTERRUPT_PIN] = 0x01;
+
+    s->dma_memory_read = esp_pci_dma_memory_read;
+    s->dma_memory_write = esp_pci_dma_memory_write;
+    s->dma_opaque = pci;
+    s->chip_id = TCHI_AM53C974;
+    memory_region_init_io(&pci->io, &esp_pci_io_ops, pci, "esp-io", 0x80);
+
+    pci_register_bar(&pci->dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &pci->io);
+    s->irq = pci->dev.irq[0];
+
+    scsi_bus_new(&s->bus, &dev->qdev, &esp_pci_scsi_info);
+    if (!dev->qdev.hotplugged) {
+        return scsi_bus_legacy_handle_cmdline(&s->bus);
+    }
+    return 0;
+}
+
+static void esp_pci_scsi_uninit(PCIDevice *d)
+{
+    PCIESPState *pci = DO_UPCAST(PCIESPState, dev, d);
+
+    memory_region_destroy(&pci->io);
+}
+
+static void esp_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->init = esp_pci_scsi_init;
+    k->exit = esp_pci_scsi_uninit;
+    k->vendor_id = PCI_VENDOR_ID_AMD;
+    k->device_id = PCI_DEVICE_ID_AMD_SCSI;
+    k->revision = 0x10;
+    k->class_id = PCI_CLASS_STORAGE_SCSI;
+    dc->desc = "AMD Am53c974 PCscsi-PCI SCSI adapter";
+    dc->reset = esp_pci_hard_reset;
+    dc->vmsd = &vmstate_esp_pci_scsi;
+}
+
+static const TypeInfo esp_pci_info = {
+    .name = TYPE_AM53C974_DEVICE,
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(PCIESPState),
+    .class_init = esp_pci_class_init,
+};
+
+typedef struct {
+    PCIESPState pci;
+    eeprom_t *eeprom;
+} DC390State;
+
+#define TYPE_DC390_DEVICE "dc390"
+#define DC390(obj) \
+    OBJECT_CHECK(DC390State, obj, TYPE_DC390_DEVICE)
+
+#define EE_ADAPT_SCSI_ID 64
+#define EE_MODE2         65
+#define EE_DELAY         66
+#define EE_TAG_CMD_NUM   67
+#define EE_ADAPT_OPTIONS 68
+#define EE_BOOT_SCSI_ID  69
+#define EE_BOOT_SCSI_LUN 70
+#define EE_CHKSUM1       126
+#define EE_CHKSUM2       127
+
+#define EE_ADAPT_OPTION_F6_F8_AT_BOOT   0x01
+#define EE_ADAPT_OPTION_BOOT_FROM_CDROM 0x02
+#define EE_ADAPT_OPTION_INT13           0x04
+#define EE_ADAPT_OPTION_SCAM_SUPPORT    0x08
+
+
+static uint32_t dc390_read_config(PCIDevice *dev, uint32_t addr, int l)
+{
+    DC390State *pci = DC390(dev);
+    uint32_t val;
+
+    val = pci_default_read_config(dev, addr, l);
+
+    if (addr == 0x00 && l == 1) {
+        /* First byte of address space is AND-ed with EEPROM DO line */
+        if (!eeprom93xx_read(pci->eeprom)) {
+            val &= ~0xff;
+        }
+    }
+
+    return val;
+}
+
+static void dc390_write_config(PCIDevice *dev,
+                               uint32_t addr, uint32_t val, int l)
+{
+    DC390State *pci = DC390(dev);
+    if (addr == 0x80) {
+        /* EEPROM write */
+        int eesk = val & 0x80 ? 1 : 0;
+        int eedi = val & 0x40 ? 1 : 0;
+        eeprom93xx_write(pci->eeprom, 1, eesk, eedi);
+    } else if (addr == 0xc0) {
+        /* EEPROM CS low */
+        eeprom93xx_write(pci->eeprom, 0, 0, 0);
+    } else {
+        pci_default_write_config(dev, addr, val, l);
+    }
+}
+
+static int dc390_scsi_init(PCIDevice *dev)
+{
+    DC390State *pci = DC390(dev);
+    uint8_t *contents;
+    uint16_t chksum = 0;
+    int i, ret;
+
+    /* init base class */
+    ret = esp_pci_scsi_init(dev);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* EEPROM */
+    pci->eeprom = eeprom93xx_new(DEVICE(dev), 64);
+
+    /* set default eeprom values */
+    contents = (uint8_t *)eeprom93xx_data(pci->eeprom);
+
+    for (i = 0; i < 16; i++) {
+        contents[i * 2] = 0x57;
+        contents[i * 2 + 1] = 0x00;
+    }
+    contents[EE_ADAPT_SCSI_ID] = 7;
+    contents[EE_MODE2] = 0x0f;
+    contents[EE_TAG_CMD_NUM] = 0x04;
+    contents[EE_ADAPT_OPTIONS] = EE_ADAPT_OPTION_F6_F8_AT_BOOT
+                               | EE_ADAPT_OPTION_BOOT_FROM_CDROM
+                               | EE_ADAPT_OPTION_INT13;
+
+    /* update eeprom checksum */
+    for (i = 0; i < EE_CHKSUM1; i += 2) {
+        chksum += contents[i] + (((uint16_t)contents[i + 1]) << 8);
+    }
+    chksum = 0x1234 - chksum;
+    contents[EE_CHKSUM1] = chksum & 0xff;
+    contents[EE_CHKSUM2] = chksum >> 8;
+
+    return 0;
+}
+
+static void dc390_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->init = dc390_scsi_init;
+    k->config_read = dc390_read_config;
+    k->config_write = dc390_write_config;
+    dc->desc = "Tekram DC-390 SCSI adapter";
+}
+
+static const TypeInfo dc390_info = {
+    .name = "dc390",
+    .parent = TYPE_AM53C974_DEVICE,
+    .instance_size = sizeof(DC390State),
+    .class_init = dc390_class_init,
+};
+
+static void esp_pci_register_types(void)
+{
+    type_register_static(&esp_pci_info);
+    type_register_static(&dc390_info);
+}
+
+type_init(esp_pci_register_types)
diff --git a/hw/esp.c b/hw/esp.c
index 8d73e56886..52c46e615f 100644
--- a/hw/esp.c
+++ b/hw/esp.c
@@ -2,6 +2,7 @@
  * QEMU ESP/NCR53C9x emulation
  *
  * Copyright (c) 2005-2006 Fabrice Bellard
+ * Copyright (c) 2012 Herve Poussineau
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -23,9 +24,9 @@
  */
 
 #include "sysbus.h"
-#include "scsi.h"
 #include "esp.h"
 #include "trace.h"
+#include "qemu-log.h"
 
 /*
  * On Sparc32, this is the ESP (NCR53C90) part of chip STP2000 (Master I/O),
@@ -35,116 +36,6 @@
  * http://www.ibiblio.org/pub/historic-linux/early-ports/Sparc/NCR/NCR53C9X.txt
  */
 
-#define ESP_ERROR(fmt, ...)                                             \
-    do { printf("ESP ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
-
-#define ESP_REGS 16
-#define TI_BUFSZ 16
-
-typedef struct ESPState ESPState;
-
-struct ESPState {
-    SysBusDevice busdev;
-    MemoryRegion iomem;
-    uint8_t rregs[ESP_REGS];
-    uint8_t wregs[ESP_REGS];
-    qemu_irq irq;
-    uint32_t it_shift;
-    int32_t ti_size;
-    uint32_t ti_rptr, ti_wptr;
-    uint32_t status;
-    uint32_t dma;
-    uint8_t ti_buf[TI_BUFSZ];
-    SCSIBus bus;
-    SCSIDevice *current_dev;
-    SCSIRequest *current_req;
-    uint8_t cmdbuf[TI_BUFSZ];
-    uint32_t cmdlen;
-    uint32_t do_cmd;
-
-    /* The amount of data left in the current DMA transfer.  */
-    uint32_t dma_left;
-    /* The size of the current DMA transfer.  Zero if no transfer is in
-       progress.  */
-    uint32_t dma_counter;
-    int dma_enabled;
-
-    uint32_t async_len;
-    uint8_t *async_buf;
-
-    ESPDMAMemoryReadWriteFunc dma_memory_read;
-    ESPDMAMemoryReadWriteFunc dma_memory_write;
-    void *dma_opaque;
-    void (*dma_cb)(ESPState *s);
-};
-
-#define ESP_TCLO   0x0
-#define ESP_TCMID  0x1
-#define ESP_FIFO   0x2
-#define ESP_CMD    0x3
-#define ESP_RSTAT  0x4
-#define ESP_WBUSID 0x4
-#define ESP_RINTR  0x5
-#define ESP_WSEL   0x5
-#define ESP_RSEQ   0x6
-#define ESP_WSYNTP 0x6
-#define ESP_RFLAGS 0x7
-#define ESP_WSYNO  0x7
-#define ESP_CFG1   0x8
-#define ESP_RRES1  0x9
-#define ESP_WCCF   0x9
-#define ESP_RRES2  0xa
-#define ESP_WTEST  0xa
-#define ESP_CFG2   0xb
-#define ESP_CFG3   0xc
-#define ESP_RES3   0xd
-#define ESP_TCHI   0xe
-#define ESP_RES4   0xf
-
-#define CMD_DMA 0x80
-#define CMD_CMD 0x7f
-
-#define CMD_NOP      0x00
-#define CMD_FLUSH    0x01
-#define CMD_RESET    0x02
-#define CMD_BUSRESET 0x03
-#define CMD_TI       0x10
-#define CMD_ICCS     0x11
-#define CMD_MSGACC   0x12
-#define CMD_PAD      0x18
-#define CMD_SATN     0x1a
-#define CMD_SEL      0x41
-#define CMD_SELATN   0x42
-#define CMD_SELATNS  0x43
-#define CMD_ENSEL    0x44
-
-#define STAT_DO 0x00
-#define STAT_DI 0x01
-#define STAT_CD 0x02
-#define STAT_ST 0x03
-#define STAT_MO 0x06
-#define STAT_MI 0x07
-#define STAT_PIO_MASK 0x06
-
-#define STAT_TC 0x10
-#define STAT_PE 0x20
-#define STAT_GE 0x40
-#define STAT_INT 0x80
-
-#define BUSID_DID 0x07
-
-#define INTR_FC 0x08
-#define INTR_BS 0x10
-#define INTR_DC 0x20
-#define INTR_RST 0x80
-
-#define SEQ_0 0x0
-#define SEQ_CD 0x4
-
-#define CFG1_RESREPT 0x40
-
-#define TCHI_FAS100A 0x4
-
 static void esp_raise_irq(ESPState *s)
 {
     if (!(s->rregs[ESP_RSTAT] & STAT_INT)) {
@@ -163,11 +54,8 @@ static void esp_lower_irq(ESPState *s)
     }
 }
 
-static void esp_dma_enable(void *opaque, int irq, int level)
+void esp_dma_enable(ESPState *s, int irq, int level)
 {
-    DeviceState *d = opaque;
-    ESPState *s = container_of(d, ESPState, busdev.qdev);
-
     if (level) {
         s->dma_enabled = 1;
         trace_esp_dma_enable();
@@ -181,9 +69,9 @@ static void esp_dma_enable(void *opaque, int irq, int level)
     }
 }
 
-static void esp_request_cancelled(SCSIRequest *req)
+void esp_request_cancelled(SCSIRequest *req)
 {
-    ESPState *s = DO_UPCAST(ESPState, busdev.qdev, req->bus->qbus.parent);
+    ESPState *s = req->hba_private;
 
     if (req == s->current_req) {
         scsi_req_unref(s->current_req);
@@ -239,7 +127,7 @@ static void do_busid_cmd(ESPState *s, uint8_t *buf, uint8_t busid)
     trace_esp_do_busid_cmd(busid);
     lun = busid & 7;
     current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, lun);
-    s->current_req = scsi_req_new(current_lun, 0, lun, buf, NULL);
+    s->current_req = scsi_req_new(current_lun, 0, lun, buf, s);
     datalen = scsi_req_enqueue(s->current_req);
     s->ti_size = datalen;
     if (datalen != 0) {
@@ -270,7 +158,7 @@ static void handle_satn(ESPState *s)
     uint8_t buf[32];
     int len;
 
-    if (!s->dma_enabled) {
+    if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_satn;
         return;
     }
@@ -284,7 +172,7 @@ static void handle_s_without_atn(ESPState *s)
     uint8_t buf[32];
     int len;
 
-    if (!s->dma_enabled) {
+    if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_s_without_atn;
         return;
     }
@@ -296,7 +184,7 @@ static void handle_s_without_atn(ESPState *s)
 
 static void handle_satn_stop(ESPState *s)
 {
-    if (!s->dma_enabled) {
+    if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_satn_stop;
         return;
     }
@@ -390,10 +278,10 @@ static void esp_do_dma(ESPState *s)
     esp_dma_done(s);
 }
 
-static void esp_command_complete(SCSIRequest *req, uint32_t status,
+void esp_command_complete(SCSIRequest *req, uint32_t status,
                                  size_t resid)
 {
-    ESPState *s = DO_UPCAST(ESPState, busdev.qdev, req->bus->qbus.parent);
+    ESPState *s = req->hba_private;
 
     trace_esp_command_complete();
     if (s->ti_size != 0) {
@@ -415,9 +303,9 @@ static void esp_command_complete(SCSIRequest *req, uint32_t status,
     }
 }
 
-static void esp_transfer_data(SCSIRequest *req, uint32_t len)
+void esp_transfer_data(SCSIRequest *req, uint32_t len)
 {
-    ESPState *s = DO_UPCAST(ESPState, busdev.qdev, req->bus->qbus.parent);
+    ESPState *s = req->hba_private;
 
     trace_esp_transfer_data(s->dma_left, s->ti_size);
     s->async_len = len;
@@ -435,6 +323,11 @@ static void handle_ti(ESPState *s)
 {
     uint32_t dmalen, minlen;
 
+    if (s->dma && !s->dma_enabled) {
+        s->dma_cb = handle_ti;
+        return;
+    }
+
     dmalen = s->rregs[ESP_TCLO] | (s->rregs[ESP_TCMID] << 8);
     if (dmalen==0) {
       dmalen=0x10000;
@@ -462,13 +355,11 @@ static void handle_ti(ESPState *s)
     }
 }
 
-static void esp_hard_reset(DeviceState *d)
+void esp_hard_reset(ESPState *s)
 {
-    ESPState *s = container_of(d, ESPState, busdev.qdev);
-
     memset(s->rregs, 0, ESP_REGS);
     memset(s->wregs, 0, ESP_REGS);
-    s->rregs[ESP_TCHI] = TCHI_FAS100A; // Indicate fas100a
+    s->rregs[ESP_TCHI] = s->chip_id;
     s->ti_size = 0;
     s->ti_rptr = 0;
     s->ti_wptr = 0;
@@ -479,40 +370,23 @@ static void esp_hard_reset(DeviceState *d)
     s->rregs[ESP_CFG1] = 7;
 }
 
-static void esp_soft_reset(DeviceState *d)
+static void esp_soft_reset(ESPState *s)
 {
-    ESPState *s = container_of(d, ESPState, busdev.qdev);
-
     qemu_irq_lower(s->irq);
-    esp_hard_reset(d);
+    esp_hard_reset(s);
 }
 
-static void parent_esp_reset(void *opaque, int irq, int level)
+static void parent_esp_reset(ESPState *s, int irq, int level)
 {
     if (level) {
-        esp_soft_reset(opaque);
-    }
-}
-
-static void esp_gpio_demux(void *opaque, int irq, int level)
-{
-    switch (irq) {
-    case 0:
-        parent_esp_reset(opaque, irq, level);
-        break;
-    case 1:
-        esp_dma_enable(opaque, irq, level);
-        break;
+        esp_soft_reset(s);
     }
 }
 
-static uint64_t esp_mem_read(void *opaque, target_phys_addr_t addr,
-                             unsigned size)
+uint64_t esp_reg_read(ESPState *s, uint32_t saddr)
 {
-    ESPState *s = opaque;
-    uint32_t saddr, old_val;
+    uint32_t old_val;
 
-    saddr = addr >> s->it_shift;
     trace_esp_mem_readb(saddr, s->rregs[saddr]);
     switch (saddr) {
     case ESP_FIFO:
@@ -520,7 +394,8 @@ static uint64_t esp_mem_read(void *opaque, target_phys_addr_t addr,
             s->ti_size--;
             if ((s->rregs[ESP_RSTAT] & STAT_PIO_MASK) == 0) {
                 /* Data out.  */
-                ESP_ERROR("PIO data read not implemented\n");
+                qemu_log_mask(LOG_UNIMP,
+                              "esp: PIO data read not implemented\n");
                 s->rregs[ESP_FIFO] = 0;
             } else {
                 s->rregs[ESP_FIFO] = s->ti_buf[s->ti_rptr++];
@@ -548,13 +423,8 @@ static uint64_t esp_mem_read(void *opaque, target_phys_addr_t addr,
     return s->rregs[saddr];
 }
 
-static void esp_mem_write(void *opaque, target_phys_addr_t addr,
-                          uint64_t val, unsigned size)
+void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
 {
-    ESPState *s = opaque;
-    uint32_t saddr;
-
-    saddr = addr >> s->it_shift;
     trace_esp_mem_writeb(saddr, s->wregs[saddr], val);
     switch (saddr) {
     case ESP_TCLO:
@@ -565,7 +435,7 @@ static void esp_mem_write(void *opaque, target_phys_addr_t addr,
         if (s->do_cmd) {
             s->cmdbuf[s->cmdlen++] = val & 0xff;
         } else if (s->ti_size == TI_BUFSZ - 1) {
-            ESP_ERROR("fifo overrun\n");
+            trace_esp_error_fifo_overrun();
         } else {
             s->ti_size++;
             s->ti_buf[s->ti_wptr++] = val & 0xff;
@@ -594,7 +464,7 @@ static void esp_mem_write(void *opaque, target_phys_addr_t addr,
             break;
         case CMD_RESET:
             trace_esp_mem_writeb_cmd_reset(val);
-            esp_soft_reset(&s->busdev.qdev);
+            esp_soft_reset(s);
             break;
         case CMD_BUSRESET:
             trace_esp_mem_writeb_cmd_bus_reset(val);
@@ -628,6 +498,9 @@ static void esp_mem_write(void *opaque, target_phys_addr_t addr,
         case CMD_SATN:
             trace_esp_mem_writeb_cmd_satn(val);
             break;
+        case CMD_RSTATN:
+            trace_esp_mem_writeb_cmd_rstatn(val);
+            break;
         case CMD_SEL:
             trace_esp_mem_writeb_cmd_sel(val);
             handle_s_without_atn(s);
@@ -644,8 +517,13 @@ static void esp_mem_write(void *opaque, target_phys_addr_t addr,
             trace_esp_mem_writeb_cmd_ensel(val);
             s->rregs[ESP_RINTR] = 0;
             break;
+        case CMD_DISSEL:
+            trace_esp_mem_writeb_cmd_dissel(val);
+            s->rregs[ESP_RINTR] = 0;
+            esp_raise_irq(s);
+            break;
         default:
-            ESP_ERROR("Unhandled ESP command (%2.2x)\n", (unsigned)val);
+            trace_esp_error_unhandled_command(val);
             break;
         }
         break;
@@ -660,7 +538,7 @@ static void esp_mem_write(void *opaque, target_phys_addr_t addr,
         s->rregs[saddr] = val;
         break;
     default:
-        ESP_ERROR("invalid write of 0x%02x at [0x%x]\n", (unsigned)val, saddr);
+        trace_esp_error_invalid_write(val, saddr);
         return;
     }
     s->wregs[saddr] = val;
@@ -672,14 +550,7 @@ static bool esp_mem_accepts(void *opaque, target_phys_addr_t addr,
     return (size == 1) || (is_write && size == 4);
 }
 
-static const MemoryRegionOps esp_mem_ops = {
-    .read = esp_mem_read,
-    .write = esp_mem_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid.accepts = esp_mem_accepts,
-};
-
-static const VMStateDescription vmstate_esp = {
+const VMStateDescription vmstate_esp = {
     .name ="esp",
     .version_id = 3,
     .minimum_version_id = 3,
@@ -701,6 +572,40 @@ static const VMStateDescription vmstate_esp = {
     }
 };
 
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    uint32_t it_shift;
+    ESPState esp;
+} SysBusESPState;
+
+static void sysbus_esp_mem_write(void *opaque, target_phys_addr_t addr,
+                                 uint64_t val, unsigned int size)
+{
+    SysBusESPState *sysbus = opaque;
+    uint32_t saddr;
+
+    saddr = addr >> sysbus->it_shift;
+    esp_reg_write(&sysbus->esp, saddr, val);
+}
+
+static uint64_t sysbus_esp_mem_read(void *opaque, target_phys_addr_t addr,
+                                    unsigned int size)
+{
+    SysBusESPState *sysbus = opaque;
+    uint32_t saddr;
+
+    saddr = addr >> sysbus->it_shift;
+    return esp_reg_read(&sysbus->esp, saddr);
+}
+
+static const MemoryRegionOps sysbus_esp_mem_ops = {
+    .read = sysbus_esp_mem_read,
+    .write = sysbus_esp_mem_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid.accepts = esp_mem_accepts,
+};
+
 void esp_init(target_phys_addr_t espaddr, int it_shift,
               ESPDMAMemoryReadWriteFunc dma_memory_read,
               ESPDMAMemoryReadWriteFunc dma_memory_write,
@@ -709,14 +614,16 @@ void esp_init(target_phys_addr_t espaddr, int it_shift,
 {
     DeviceState *dev;
     SysBusDevice *s;
+    SysBusESPState *sysbus;
     ESPState *esp;
 
     dev = qdev_create(NULL, "esp");
-    esp = DO_UPCAST(ESPState, busdev.qdev, dev);
+    sysbus = DO_UPCAST(SysBusESPState, busdev.qdev, dev);
+    esp = &sysbus->esp;
     esp->dma_memory_read = dma_memory_read;
     esp->dma_memory_write = dma_memory_write;
     esp->dma_opaque = dma_opaque;
-    esp->it_shift = it_shift;
+    sysbus->it_shift = it_shift;
     /* XXX for now until rc4030 has been changed to use DMA enable signal */
     esp->dma_enabled = 1;
     qdev_init_nofail(dev);
@@ -737,48 +644,78 @@ static const struct SCSIBusInfo esp_scsi_info = {
     .cancel = esp_request_cancelled
 };
 
-static int esp_init1(SysBusDevice *dev)
+static void sysbus_esp_gpio_demux(void *opaque, int irq, int level)
 {
-    ESPState *s = FROM_SYSBUS(ESPState, dev);
+    DeviceState *d = opaque;
+    SysBusESPState *sysbus = container_of(d, SysBusESPState, busdev.qdev);
+    ESPState *s = &sysbus->esp;
+
+    switch (irq) {
+    case 0:
+        parent_esp_reset(s, irq, level);
+        break;
+    case 1:
+        esp_dma_enable(opaque, irq, level);
+        break;
+    }
+}
+
+static int sysbus_esp_init(SysBusDevice *dev)
+{
+    SysBusESPState *sysbus = FROM_SYSBUS(SysBusESPState, dev);
+    ESPState *s = &sysbus->esp;
 
     sysbus_init_irq(dev, &s->irq);
-    assert(s->it_shift != -1);
+    assert(sysbus->it_shift != -1);
 
-    memory_region_init_io(&s->iomem, &esp_mem_ops, s,
-                          "esp", ESP_REGS << s->it_shift);
-    sysbus_init_mmio(dev, &s->iomem);
+    s->chip_id = TCHI_FAS100A;
+    memory_region_init_io(&sysbus->iomem, &sysbus_esp_mem_ops, sysbus,
+                          "esp", ESP_REGS << sysbus->it_shift);
+    sysbus_init_mmio(dev, &sysbus->iomem);
 
-    qdev_init_gpio_in(&dev->qdev, esp_gpio_demux, 2);
+    qdev_init_gpio_in(&dev->qdev, sysbus_esp_gpio_demux, 2);
 
     scsi_bus_new(&s->bus, &dev->qdev, &esp_scsi_info);
     return scsi_bus_legacy_handle_cmdline(&s->bus);
 }
 
-static Property esp_properties[] = {
-    {.name = NULL},
+static void sysbus_esp_hard_reset(DeviceState *dev)
+{
+    SysBusESPState *sysbus = DO_UPCAST(SysBusESPState, busdev.qdev, dev);
+    esp_hard_reset(&sysbus->esp);
+}
+
+static const VMStateDescription vmstate_sysbus_esp_scsi = {
+    .name = "sysbusespscsi",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .minimum_version_id_old = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT(esp, SysBusESPState, 0, vmstate_esp, ESPState),
+        VMSTATE_END_OF_LIST()
+    }
 };
 
-static void esp_class_init(ObjectClass *klass, void *data)
+static void sysbus_esp_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = esp_init1;
-    dc->reset = esp_hard_reset;
-    dc->vmsd = &vmstate_esp;
-    dc->props = esp_properties;
+    k->init = sysbus_esp_init;
+    dc->reset = sysbus_esp_hard_reset;
+    dc->vmsd = &vmstate_sysbus_esp_scsi;
 }
 
-static TypeInfo esp_info = {
+static const TypeInfo sysbus_esp_info = {
     .name          = "esp",
     .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(ESPState),
-    .class_init    = esp_class_init,
+    .instance_size = sizeof(SysBusESPState),
+    .class_init    = sysbus_esp_class_init,
 };
 
 static void esp_register_types(void)
 {
-    type_register_static(&esp_info);
+    type_register_static(&sysbus_esp_info);
 }
 
 type_init(esp_register_types)
diff --git a/hw/esp.h b/hw/esp.h
index 62bfd4d129..fa855e2fdf 100644
--- a/hw/esp.h
+++ b/hw/esp.h
@@ -1,6 +1,8 @@
 #ifndef QEMU_HW_ESP_H
 #define QEMU_HW_ESP_H
 
+#include "scsi.h"
+
 /* esp.c */
 #define ESP_MAX_DEVS 7
 typedef void (*ESPDMAMemoryReadWriteFunc)(void *opaque, uint8_t *buf, int len);
@@ -10,4 +12,121 @@ void esp_init(target_phys_addr_t espaddr, int it_shift,
               void *dma_opaque, qemu_irq irq, qemu_irq *reset,
               qemu_irq *dma_enable);
 
+#define ESP_REGS 16
+#define TI_BUFSZ 16
+
+typedef struct ESPState ESPState;
+
+struct ESPState {
+    uint8_t rregs[ESP_REGS];
+    uint8_t wregs[ESP_REGS];
+    qemu_irq irq;
+    uint8_t chip_id;
+    int32_t ti_size;
+    uint32_t ti_rptr, ti_wptr;
+    uint32_t status;
+    uint32_t dma;
+    uint8_t ti_buf[TI_BUFSZ];
+    SCSIBus bus;
+    SCSIDevice *current_dev;
+    SCSIRequest *current_req;
+    uint8_t cmdbuf[TI_BUFSZ];
+    uint32_t cmdlen;
+    uint32_t do_cmd;
+
+    /* The amount of data left in the current DMA transfer.  */
+    uint32_t dma_left;
+    /* The size of the current DMA transfer.  Zero if no transfer is in
+       progress.  */
+    uint32_t dma_counter;
+    int dma_enabled;
+
+    uint32_t async_len;
+    uint8_t *async_buf;
+
+    ESPDMAMemoryReadWriteFunc dma_memory_read;
+    ESPDMAMemoryReadWriteFunc dma_memory_write;
+    void *dma_opaque;
+    void (*dma_cb)(ESPState *s);
+};
+
+#define ESP_TCLO   0x0
+#define ESP_TCMID  0x1
+#define ESP_FIFO   0x2
+#define ESP_CMD    0x3
+#define ESP_RSTAT  0x4
+#define ESP_WBUSID 0x4
+#define ESP_RINTR  0x5
+#define ESP_WSEL   0x5
+#define ESP_RSEQ   0x6
+#define ESP_WSYNTP 0x6
+#define ESP_RFLAGS 0x7
+#define ESP_WSYNO  0x7
+#define ESP_CFG1   0x8
+#define ESP_RRES1  0x9
+#define ESP_WCCF   0x9
+#define ESP_RRES2  0xa
+#define ESP_WTEST  0xa
+#define ESP_CFG2   0xb
+#define ESP_CFG3   0xc
+#define ESP_RES3   0xd
+#define ESP_TCHI   0xe
+#define ESP_RES4   0xf
+
+#define CMD_DMA 0x80
+#define CMD_CMD 0x7f
+
+#define CMD_NOP      0x00
+#define CMD_FLUSH    0x01
+#define CMD_RESET    0x02
+#define CMD_BUSRESET 0x03
+#define CMD_TI       0x10
+#define CMD_ICCS     0x11
+#define CMD_MSGACC   0x12
+#define CMD_PAD      0x18
+#define CMD_SATN     0x1a
+#define CMD_RSTATN   0x1b
+#define CMD_SEL      0x41
+#define CMD_SELATN   0x42
+#define CMD_SELATNS  0x43
+#define CMD_ENSEL    0x44
+#define CMD_DISSEL   0x45
+
+#define STAT_DO 0x00
+#define STAT_DI 0x01
+#define STAT_CD 0x02
+#define STAT_ST 0x03
+#define STAT_MO 0x06
+#define STAT_MI 0x07
+#define STAT_PIO_MASK 0x06
+
+#define STAT_TC 0x10
+#define STAT_PE 0x20
+#define STAT_GE 0x40
+#define STAT_INT 0x80
+
+#define BUSID_DID 0x07
+
+#define INTR_FC 0x08
+#define INTR_BS 0x10
+#define INTR_DC 0x20
+#define INTR_RST 0x80
+
+#define SEQ_0 0x0
+#define SEQ_CD 0x4
+
+#define CFG1_RESREPT 0x40
+
+#define TCHI_FAS100A 0x4
+#define TCHI_AM53C974 0x12
+
+void esp_dma_enable(ESPState *s, int irq, int level);
+void esp_request_cancelled(SCSIRequest *req);
+void esp_command_complete(SCSIRequest *req, uint32_t status, size_t resid);
+void esp_transfer_data(SCSIRequest *req, uint32_t len);
+void esp_hard_reset(ESPState *s);
+uint64_t esp_reg_read(ESPState *s, uint32_t saddr);
+void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val);
+extern const VMStateDescription vmstate_esp;
+
 #endif
diff --git a/hw/etraxfs_eth.c b/hw/etraxfs_eth.c
index 16a0637a4a..b124f5bb3a 100644
--- a/hw/etraxfs_eth.c
+++ b/hw/etraxfs_eth.c
@@ -507,12 +507,12 @@ static int eth_match_groupaddr(struct fs_eth *eth, const unsigned char *sa)
 	return match;
 }
 
-static int eth_can_receive(VLANClientState *nc)
+static int eth_can_receive(NetClientState *nc)
 {
 	return 1;
 }
 
-static ssize_t eth_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
 	unsigned char sa_bcast[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	struct fs_eth *eth = DO_UPCAST(NICState, nc, nc)->opaque;
@@ -549,7 +549,7 @@ static int eth_tx_push(void *opaque, unsigned char *buf, int len, bool eop)
 	return len;
 }
 
-static void eth_set_link(VLANClientState *nc)
+static void eth_set_link(NetClientState *nc)
 {
 	struct fs_eth *eth = DO_UPCAST(NICState, nc, nc)->opaque;
 	D(printf("%s %d\n", __func__, nc->link_down));
@@ -566,7 +566,7 @@ static const MemoryRegionOps eth_ops = {
 	}
 };
 
-static void eth_cleanup(VLANClientState *nc)
+static void eth_cleanup(NetClientState *nc)
 {
 	struct fs_eth *eth = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -579,7 +579,7 @@ static void eth_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_etraxfs_info = {
-	.type = NET_CLIENT_TYPE_NIC,
+	.type = NET_CLIENT_OPTIONS_KIND_NIC,
 	.size = sizeof(NICState),
 	.can_receive = eth_can_receive,
 	.receive = eth_receive,
diff --git a/hw/exynos4210.c b/hw/exynos4210.c
index 9c20b3f22d..00d4db8871 100644
--- a/hw/exynos4210.c
+++ b/hw/exynos4210.c
@@ -33,9 +33,19 @@
 /* PWM */
 #define EXYNOS4210_PWM_BASE_ADDR       0x139D0000
 
+/* RTC */
+#define EXYNOS4210_RTC_BASE_ADDR       0x10070000
+
 /* MCT */
 #define EXYNOS4210_MCT_BASE_ADDR       0x10050000
 
+/* I2C */
+#define EXYNOS4210_I2C_SHIFT           0x00010000
+#define EXYNOS4210_I2C_BASE_ADDR       0x13860000
+/* Interrupt Group of External Interrupt Combiner for I2C */
+#define EXYNOS4210_I2C_INTG            27
+#define EXYNOS4210_HDMI_INTG           16
+
 /* UART's definitions */
 #define EXYNOS4210_UART0_BASE_ADDR     0x13800000
 #define EXYNOS4210_UART1_BASE_ADDR     0x13810000
@@ -216,7 +226,7 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
     /* mirror of iROM */
     memory_region_init_alias(&s->irom_alias_mem, "exynos4210.irom_alias",
                              &s->irom_mem,
-                             EXYNOS4210_IROM_BASE_ADDR,
+                             0,
                              EXYNOS4210_IROM_SIZE);
     memory_region_set_readonly(&s->irom_alias_mem, true);
     memory_region_add_subregion(system_mem, EXYNOS4210_IROM_MIRROR_BASE_ADDR,
@@ -258,6 +268,11 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
                           s->irq_table[exynos4210_get_irq(22, 3)],
                           s->irq_table[exynos4210_get_irq(22, 4)],
                           NULL);
+    /* RTC */
+    sysbus_create_varargs("exynos4210.rtc", EXYNOS4210_RTC_BASE_ADDR,
+                          s->irq_table[exynos4210_get_irq(23, 0)],
+                          s->irq_table[exynos4210_get_irq(23, 1)],
+                          NULL);
 
     /* Multi Core Timer */
     dev = qdev_create(NULL, "exynos4210.mct");
@@ -275,6 +290,26 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
             s->irq_table[exynos4210_get_irq(35, 3)]);
     sysbus_mmio_map(busdev, 0, EXYNOS4210_MCT_BASE_ADDR);
 
+    /*** I2C ***/
+    for (n = 0; n < EXYNOS4210_I2C_NUMBER; n++) {
+        uint32_t addr = EXYNOS4210_I2C_BASE_ADDR + EXYNOS4210_I2C_SHIFT * n;
+        qemu_irq i2c_irq;
+
+        if (n < 8) {
+            i2c_irq = s->irq_table[exynos4210_get_irq(EXYNOS4210_I2C_INTG, n)];
+        } else {
+            i2c_irq = s->irq_table[exynos4210_get_irq(EXYNOS4210_HDMI_INTG, 1)];
+        }
+
+        dev = qdev_create(NULL, "exynos4210.i2c");
+        qdev_init_nofail(dev);
+        busdev = sysbus_from_qdev(dev);
+        sysbus_connect_irq(busdev, 0, i2c_irq);
+        sysbus_mmio_map(busdev, 0, addr);
+        s->i2c_if[n] = (i2c_bus *)qdev_get_child_bus(dev, "i2c");
+    }
+
+
     /*** UARTs ***/
     exynos4210_uart_create(EXYNOS4210_UART0_BASE_ADDR,
                            EXYNOS4210_UART0_FIFO_SIZE, 0, NULL,
diff --git a/hw/exynos4210.h b/hw/exynos4210.h
index 9b1ae4c8b1..a43ba3aedc 100644
--- a/hw/exynos4210.h
+++ b/hw/exynos4210.h
@@ -74,6 +74,8 @@
 #define EXYNOS4210_EXT_GIC_NIRQ     (160-32)
 #define EXYNOS4210_INT_GIC_NIRQ     64
 
+#define EXYNOS4210_I2C_NUMBER               9
+
 typedef struct Exynos4210Irq {
     qemu_irq int_combiner_irq[EXYNOS4210_MAX_INT_COMBINER_IN_IRQ];
     qemu_irq ext_combiner_irq[EXYNOS4210_MAX_EXT_COMBINER_IN_IRQ];
@@ -95,6 +97,7 @@ typedef struct Exynos4210State {
     MemoryRegion dram1_mem;
     MemoryRegion boot_secondary;
     MemoryRegion bootreg_mem;
+    i2c_bus *i2c_if[EXYNOS4210_I2C_NUMBER];
 } Exynos4210State;
 
 void exynos4210_write_secondary(ARMCPU *cpu,
diff --git a/hw/exynos4210_i2c.c b/hw/exynos4210_i2c.c
new file mode 100644
index 0000000000..3f72a5c464
--- /dev/null
+++ b/hw/exynos4210_i2c.c
@@ -0,0 +1,334 @@
+/*
+ *  Exynos4210 I2C Bus Serial Interface Emulation
+ *
+ *  Copyright (C) 2012 Samsung Electronics Co Ltd.
+ *    Maksim Kozlov, <m.kozlov@samsung.com>
+ *    Igor Mitsyanko, <i.mitsyanko@samsung.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu-timer.h"
+#include "sysbus.h"
+#include "i2c.h"
+
+#ifndef EXYNOS4_I2C_DEBUG
+#define EXYNOS4_I2C_DEBUG                 0
+#endif
+
+#define TYPE_EXYNOS4_I2C                  "exynos4210.i2c"
+#define EXYNOS4_I2C(obj)                  \
+    OBJECT_CHECK(Exynos4210I2CState, (obj), TYPE_EXYNOS4_I2C)
+
+/* Exynos4210 I2C memory map */
+#define EXYNOS4_I2C_MEM_SIZE              0x14
+#define I2CCON_ADDR                       0x00  /* control register */
+#define I2CSTAT_ADDR                      0x04  /* control/status register */
+#define I2CADD_ADDR                       0x08  /* address register */
+#define I2CDS_ADDR                        0x0c  /* data shift register */
+#define I2CLC_ADDR                        0x10  /* line control register */
+
+#define I2CCON_ACK_GEN                    (1 << 7)
+#define I2CCON_INTRS_EN                   (1 << 5)
+#define I2CCON_INT_PEND                   (1 << 4)
+
+#define EXYNOS4_I2C_MODE(reg)             (((reg) >> 6) & 3)
+#define I2C_IN_MASTER_MODE(reg)           (((reg) >> 6) & 2)
+#define I2CMODE_MASTER_Rx                 0x2
+#define I2CMODE_MASTER_Tx                 0x3
+#define I2CSTAT_LAST_BIT                  (1 << 0)
+#define I2CSTAT_OUTPUT_EN                 (1 << 4)
+#define I2CSTAT_START_BUSY                (1 << 5)
+
+
+#if EXYNOS4_I2C_DEBUG
+#define DPRINT(fmt, args...)              \
+    do { fprintf(stderr, "QEMU I2C: "fmt, ## args); } while (0)
+
+static const char *exynos4_i2c_get_regname(unsigned offset)
+{
+    switch (offset) {
+    case I2CCON_ADDR:
+        return "I2CCON";
+    case I2CSTAT_ADDR:
+        return "I2CSTAT";
+    case I2CADD_ADDR:
+        return "I2CADD";
+    case I2CDS_ADDR:
+        return "I2CDS";
+    case I2CLC_ADDR:
+        return "I2CLC";
+    default:
+        return "[?]";
+    }
+}
+
+#else
+#define DPRINT(fmt, args...)              do { } while (0)
+#endif
+
+typedef struct Exynos4210I2CState {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    i2c_bus *bus;
+    qemu_irq irq;
+
+    uint8_t i2ccon;
+    uint8_t i2cstat;
+    uint8_t i2cadd;
+    uint8_t i2cds;
+    uint8_t i2clc;
+    bool scl_free;
+} Exynos4210I2CState;
+
+static inline void exynos4210_i2c_raise_interrupt(Exynos4210I2CState *s)
+{
+    if (s->i2ccon & I2CCON_INTRS_EN) {
+        s->i2ccon |= I2CCON_INT_PEND;
+        qemu_irq_raise(s->irq);
+    }
+}
+
+static void exynos4210_i2c_data_receive(void *opaque)
+{
+    Exynos4210I2CState *s = (Exynos4210I2CState *)opaque;
+    int ret;
+
+    s->i2cstat &= ~I2CSTAT_LAST_BIT;
+    s->scl_free = false;
+    ret = i2c_recv(s->bus);
+    if (ret < 0 && (s->i2ccon & I2CCON_ACK_GEN)) {
+        s->i2cstat |= I2CSTAT_LAST_BIT;  /* Data is not acknowledged */
+    } else {
+        s->i2cds = ret;
+    }
+    exynos4210_i2c_raise_interrupt(s);
+}
+
+static void exynos4210_i2c_data_send(void *opaque)
+{
+    Exynos4210I2CState *s = (Exynos4210I2CState *)opaque;
+
+    s->i2cstat &= ~I2CSTAT_LAST_BIT;
+    s->scl_free = false;
+    if (i2c_send(s->bus, s->i2cds) < 0 && (s->i2ccon & I2CCON_ACK_GEN)) {
+        s->i2cstat |= I2CSTAT_LAST_BIT;
+    }
+    exynos4210_i2c_raise_interrupt(s);
+}
+
+static uint64_t exynos4210_i2c_read(void *opaque, target_phys_addr_t offset,
+                                 unsigned size)
+{
+    Exynos4210I2CState *s = (Exynos4210I2CState *)opaque;
+    uint8_t value;
+
+    switch (offset) {
+    case I2CCON_ADDR:
+        value = s->i2ccon;
+        break;
+    case I2CSTAT_ADDR:
+        value = s->i2cstat;
+        break;
+    case I2CADD_ADDR:
+        value = s->i2cadd;
+        break;
+    case I2CDS_ADDR:
+        value = s->i2cds;
+        s->scl_free = true;
+        if (EXYNOS4_I2C_MODE(s->i2cstat) == I2CMODE_MASTER_Rx &&
+               (s->i2cstat & I2CSTAT_START_BUSY) &&
+               !(s->i2ccon & I2CCON_INT_PEND)) {
+            exynos4210_i2c_data_receive(s);
+        }
+        break;
+    case I2CLC_ADDR:
+        value = s->i2clc;
+        break;
+    default:
+        value = 0;
+        DPRINT("ERROR: Bad read offset 0x%x\n", (unsigned int)offset);
+        break;
+    }
+
+    DPRINT("read %s [0x%02x] -> 0x%02x\n", exynos4_i2c_get_regname(offset),
+            (unsigned int)offset, value);
+    return value;
+}
+
+static void exynos4210_i2c_write(void *opaque, target_phys_addr_t offset,
+                              uint64_t value, unsigned size)
+{
+    Exynos4210I2CState *s = (Exynos4210I2CState *)opaque;
+    uint8_t v = value & 0xff;
+
+    DPRINT("write %s [0x%02x] <- 0x%02x\n", exynos4_i2c_get_regname(offset),
+            (unsigned int)offset, v);
+
+    switch (offset) {
+    case I2CCON_ADDR:
+        s->i2ccon = (v & ~I2CCON_INT_PEND) | (s->i2ccon & I2CCON_INT_PEND);
+        if ((s->i2ccon & I2CCON_INT_PEND) && !(v & I2CCON_INT_PEND)) {
+            s->i2ccon &= ~I2CCON_INT_PEND;
+            qemu_irq_lower(s->irq);
+            if (!(s->i2ccon & I2CCON_INTRS_EN)) {
+                s->i2cstat &= ~I2CSTAT_START_BUSY;
+            }
+
+            if (s->i2cstat & I2CSTAT_START_BUSY) {
+                if (s->scl_free) {
+                    if (EXYNOS4_I2C_MODE(s->i2cstat) == I2CMODE_MASTER_Tx) {
+                        exynos4210_i2c_data_send(s);
+                    } else if (EXYNOS4_I2C_MODE(s->i2cstat) ==
+                            I2CMODE_MASTER_Rx) {
+                        exynos4210_i2c_data_receive(s);
+                    }
+                } else {
+                    s->i2ccon |= I2CCON_INT_PEND;
+                    qemu_irq_raise(s->irq);
+                }
+            }
+        }
+        break;
+    case I2CSTAT_ADDR:
+        s->i2cstat =
+                (s->i2cstat & I2CSTAT_START_BUSY) | (v & ~I2CSTAT_START_BUSY);
+
+        if (!(s->i2cstat & I2CSTAT_OUTPUT_EN)) {
+            s->i2cstat &= ~I2CSTAT_START_BUSY;
+            s->scl_free = true;
+            qemu_irq_lower(s->irq);
+            break;
+        }
+
+        /* Nothing to do if in i2c slave mode */
+        if (!I2C_IN_MASTER_MODE(s->i2cstat)) {
+            break;
+        }
+
+        if (v & I2CSTAT_START_BUSY) {
+            s->i2cstat &= ~I2CSTAT_LAST_BIT;
+            s->i2cstat |= I2CSTAT_START_BUSY;    /* Line is busy */
+            s->scl_free = false;
+
+            /* Generate start bit and send slave address */
+            if (i2c_start_transfer(s->bus, s->i2cds >> 1, s->i2cds & 0x1) &&
+                    (s->i2ccon & I2CCON_ACK_GEN)) {
+                s->i2cstat |= I2CSTAT_LAST_BIT;
+            } else if (EXYNOS4_I2C_MODE(s->i2cstat) == I2CMODE_MASTER_Rx) {
+                exynos4210_i2c_data_receive(s);
+            }
+            exynos4210_i2c_raise_interrupt(s);
+        } else {
+            i2c_end_transfer(s->bus);
+            if (!(s->i2ccon & I2CCON_INT_PEND)) {
+                s->i2cstat &= ~I2CSTAT_START_BUSY;
+            }
+            s->scl_free = true;
+        }
+        break;
+    case I2CADD_ADDR:
+        if ((s->i2cstat & I2CSTAT_OUTPUT_EN) == 0) {
+            s->i2cadd = v;
+        }
+        break;
+    case I2CDS_ADDR:
+        if (s->i2cstat & I2CSTAT_OUTPUT_EN) {
+            s->i2cds = v;
+            s->scl_free = true;
+            if (EXYNOS4_I2C_MODE(s->i2cstat) == I2CMODE_MASTER_Tx &&
+                    (s->i2cstat & I2CSTAT_START_BUSY) &&
+                    !(s->i2ccon & I2CCON_INT_PEND)) {
+                exynos4210_i2c_data_send(s);
+            }
+        }
+        break;
+    case I2CLC_ADDR:
+        s->i2clc = v;
+        break;
+    default:
+        DPRINT("ERROR: Bad write offset 0x%x\n", (unsigned int)offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps exynos4210_i2c_ops = {
+    .read = exynos4210_i2c_read,
+    .write = exynos4210_i2c_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription exynos4210_i2c_vmstate = {
+    .name = TYPE_EXYNOS4_I2C,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(i2ccon, Exynos4210I2CState),
+        VMSTATE_UINT8(i2cstat, Exynos4210I2CState),
+        VMSTATE_UINT8(i2cds, Exynos4210I2CState),
+        VMSTATE_UINT8(i2cadd, Exynos4210I2CState),
+        VMSTATE_UINT8(i2clc, Exynos4210I2CState),
+        VMSTATE_BOOL(scl_free, Exynos4210I2CState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void exynos4210_i2c_reset(DeviceState *d)
+{
+    Exynos4210I2CState *s = EXYNOS4_I2C(d);
+
+    s->i2ccon  = 0x00;
+    s->i2cstat = 0x00;
+    s->i2cds   = 0xFF;
+    s->i2clc   = 0x00;
+    s->i2cadd  = 0xFF;
+    s->scl_free = true;
+}
+
+static int exynos4210_i2c_realize(SysBusDevice *dev)
+{
+    Exynos4210I2CState *s = EXYNOS4_I2C(dev);
+
+    memory_region_init_io(&s->iomem, &exynos4210_i2c_ops, s, TYPE_EXYNOS4_I2C,
+                          EXYNOS4_I2C_MEM_SIZE);
+    sysbus_init_mmio(dev, &s->iomem);
+    sysbus_init_irq(dev, &s->irq);
+    s->bus = i2c_init_bus(&dev->qdev, "i2c");
+    return 0;
+}
+
+static void exynos4210_i2c_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *sbdc = SYS_BUS_DEVICE_CLASS(klass);
+
+    dc->vmsd = &exynos4210_i2c_vmstate;
+    dc->reset = exynos4210_i2c_reset;
+    sbdc->init = exynos4210_i2c_realize;
+}
+
+static const TypeInfo exynos4210_i2c_type_info = {
+    .name = TYPE_EXYNOS4_I2C,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(Exynos4210I2CState),
+    .class_init = exynos4210_i2c_class_init,
+};
+
+static void exynos4210_i2c_register_types(void)
+{
+    type_register_static(&exynos4210_i2c_type_info);
+}
+
+type_init(exynos4210_i2c_register_types)
diff --git a/hw/exynos4210_mct.c b/hw/exynos4210_mct.c
index 7474fcf802..7a22b1f900 100644
--- a/hw/exynos4210_mct.c
+++ b/hw/exynos4210_mct.c
@@ -376,10 +376,6 @@ static uint64_t exynos4210_gfrc_get_count(Exynos4210MCTGT *s)
 {
     uint64_t count = 0;
     count = ptimer_get_count(s->ptimer_frc);
-    if (!count) {
-        /* Timer event was generated and s->reg.cnt holds adequate value */
-        return s->reg.cnt;
-    }
     count = s->count - count;
     return s->reg.cnt + count;
 }
diff --git a/hw/exynos4210_pwm.c b/hw/exynos4210_pwm.c
index 6243e59c48..0c228280a9 100644
--- a/hw/exynos4210_pwm.c
+++ b/hw/exynos4210_pwm.c
@@ -200,7 +200,7 @@ static void exynos4210_pwm_tick(void *opaque)
         ptimer_run(p->timer[id].ptimer, 1);
     } else {
         /* stop timer, set status to STOP, see Basic Timer Operation */
-        p->reg_tcon = ~TCON_TIMER_START(id);
+        p->reg_tcon &= ~TCON_TIMER_START(id);
         ptimer_stop(p->timer[id].ptimer);
     }
 }
diff --git a/hw/exynos4210_rtc.c b/hw/exynos4210_rtc.c
new file mode 100644
index 0000000000..42a4ddc327
--- /dev/null
+++ b/hw/exynos4210_rtc.c
@@ -0,0 +1,592 @@
+/*
+ * Samsung exynos4210 Real Time Clock
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *  Ogurtsov Oleg <o.ogurtsov@samsung.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/* Description:
+ * Register RTCCON:
+ *  CLKSEL Bit[1] not used
+ *  CLKOUTEN Bit[9] not used
+ */
+
+#include "sysbus.h"
+#include "qemu-timer.h"
+#include "qemu-common.h"
+#include "ptimer.h"
+
+#include "hw.h"
+#include "qemu-timer.h"
+#include "sysemu.h"
+
+#include "exynos4210.h"
+
+#define DEBUG_RTC 0
+
+#if DEBUG_RTC
+#define DPRINTF(fmt, ...) \
+        do { fprintf(stdout, "RTC: [%24s:%5d] " fmt, __func__, __LINE__, \
+                ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#define     EXYNOS4210_RTC_REG_MEM_SIZE     0x0100
+
+#define     INTP            0x0030
+#define     RTCCON          0x0040
+#define     TICCNT          0x0044
+#define     RTCALM          0x0050
+#define     ALMSEC          0x0054
+#define     ALMMIN          0x0058
+#define     ALMHOUR         0x005C
+#define     ALMDAY          0x0060
+#define     ALMMON          0x0064
+#define     ALMYEAR         0x0068
+#define     BCDSEC          0x0070
+#define     BCDMIN          0x0074
+#define     BCDHOUR         0x0078
+#define     BCDDAY          0x007C
+#define     BCDDAYWEEK      0x0080
+#define     BCDMON          0x0084
+#define     BCDYEAR         0x0088
+#define     CURTICNT        0x0090
+
+#define     TICK_TIMER_ENABLE   0x0100
+#define     TICNT_THRESHHOLD    2
+
+
+#define     RTC_ENABLE          0x0001
+
+#define     INTP_TICK_ENABLE    0x0001
+#define     INTP_ALM_ENABLE     0x0002
+
+#define     ALARM_INT_ENABLE    0x0040
+
+#define     RTC_BASE_FREQ       32768
+
+typedef struct Exynos4210RTCState {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+
+    /* registers */
+    uint32_t    reg_intp;
+    uint32_t    reg_rtccon;
+    uint32_t    reg_ticcnt;
+    uint32_t    reg_rtcalm;
+    uint32_t    reg_almsec;
+    uint32_t    reg_almmin;
+    uint32_t    reg_almhour;
+    uint32_t    reg_almday;
+    uint32_t    reg_almmon;
+    uint32_t    reg_almyear;
+    uint32_t    reg_curticcnt;
+
+    ptimer_state    *ptimer;        /* tick timer */
+    ptimer_state    *ptimer_1Hz;    /* clock timer */
+    uint32_t        freq;
+
+    qemu_irq        tick_irq;   /* Time Tick Generator irq */
+    qemu_irq        alm_irq;    /* alarm irq */
+
+    struct tm   current_tm;     /* current time */
+} Exynos4210RTCState;
+
+#define TICCKSEL(value) ((value & (0x0F << 4)) >> 4)
+
+/*** VMState ***/
+static const VMStateDescription vmstate_exynos4210_rtc_state = {
+    .name = "exynos4210.rtc",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(reg_intp, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_rtccon, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_ticcnt, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_rtcalm, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almsec, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almmin, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almhour, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almday, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almmon, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almyear, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_curticcnt, Exynos4210RTCState),
+        VMSTATE_PTIMER(ptimer, Exynos4210RTCState),
+        VMSTATE_PTIMER(ptimer_1Hz, Exynos4210RTCState),
+        VMSTATE_UINT32(freq, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_sec, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_min, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_hour, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_wday, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_mday, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_mon, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_year, Exynos4210RTCState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+#define BCD3DIGITS(x) \
+    ((uint32_t)to_bcd((uint8_t)(x % 100)) + \
+    ((uint32_t)to_bcd((uint8_t)((x % 1000) / 100)) << 8))
+
+static void check_alarm_raise(Exynos4210RTCState *s)
+{
+    unsigned int alarm_raise = 0;
+    struct tm stm = s->current_tm;
+
+    if ((s->reg_rtcalm & 0x01) &&
+        (to_bcd((uint8_t)stm.tm_sec) == (uint8_t)s->reg_almsec)) {
+        alarm_raise = 1;
+    }
+    if ((s->reg_rtcalm & 0x02) &&
+        (to_bcd((uint8_t)stm.tm_min) == (uint8_t)s->reg_almmin)) {
+        alarm_raise = 1;
+    }
+    if ((s->reg_rtcalm & 0x04) &&
+        (to_bcd((uint8_t)stm.tm_hour) == (uint8_t)s->reg_almhour)) {
+        alarm_raise = 1;
+    }
+    if ((s->reg_rtcalm & 0x08) &&
+        (to_bcd((uint8_t)stm.tm_mday) == (uint8_t)s->reg_almday)) {
+        alarm_raise = 1;
+    }
+    if ((s->reg_rtcalm & 0x10) &&
+         (to_bcd((uint8_t)stm.tm_mon) == (uint8_t)s->reg_almmon)) {
+        alarm_raise = 1;
+    }
+    if ((s->reg_rtcalm & 0x20) &&
+        (BCD3DIGITS(stm.tm_year) == s->reg_almyear)) {
+        alarm_raise = 1;
+    }
+
+    if (alarm_raise) {
+        DPRINTF("ALARM IRQ\n");
+        /* set irq status */
+        s->reg_intp |= INTP_ALM_ENABLE;
+        qemu_irq_raise(s->alm_irq);
+    }
+}
+
+/*
+ * RTC update frequency
+ * Parameters:
+ *     reg_value - current RTCCON register or his new value
+ */
+static void exynos4210_rtc_update_freq(Exynos4210RTCState *s,
+                                       uint32_t reg_value)
+{
+    uint32_t freq;
+
+    freq = s->freq;
+    /* set frequncy for time generator */
+    s->freq = RTC_BASE_FREQ / (1 << TICCKSEL(reg_value));
+
+    if (freq != s->freq) {
+        ptimer_set_freq(s->ptimer, s->freq);
+        DPRINTF("freq=%dHz\n", s->freq);
+    }
+}
+
+/* month is between 0 and 11. */
+static int get_days_in_month(int month, int year)
+{
+    static const int days_tab[12] = {
+        31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+    };
+    int d;
+    if ((unsigned)month >= 12) {
+        return 31;
+    }
+    d = days_tab[month];
+    if (month == 1) {
+        if ((year % 4) == 0 && ((year % 100) != 0 || (year % 400) == 0)) {
+            d++;
+        }
+    }
+    return d;
+}
+
+/* update 'tm' to the next second */
+static void rtc_next_second(struct tm *tm)
+{
+    int days_in_month;
+
+    tm->tm_sec++;
+    if ((unsigned)tm->tm_sec >= 60) {
+        tm->tm_sec = 0;
+        tm->tm_min++;
+        if ((unsigned)tm->tm_min >= 60) {
+            tm->tm_min = 0;
+            tm->tm_hour++;
+            if ((unsigned)tm->tm_hour >= 24) {
+                tm->tm_hour = 0;
+                /* next day */
+                tm->tm_wday++;
+                if ((unsigned)tm->tm_wday >= 7) {
+                    tm->tm_wday = 0;
+                }
+                days_in_month = get_days_in_month(tm->tm_mon,
+                                                  tm->tm_year + 1900);
+                tm->tm_mday++;
+                if (tm->tm_mday < 1) {
+                    tm->tm_mday = 1;
+                } else if (tm->tm_mday > days_in_month) {
+                    tm->tm_mday = 1;
+                    tm->tm_mon++;
+                    if (tm->tm_mon >= 12) {
+                        tm->tm_mon = 0;
+                        tm->tm_year++;
+                    }
+                }
+            }
+        }
+    }
+}
+
+/*
+ * tick handler
+ */
+static void exynos4210_rtc_tick(void *opaque)
+{
+    Exynos4210RTCState *s = (Exynos4210RTCState *)opaque;
+
+    DPRINTF("TICK IRQ\n");
+    /* set irq status */
+    s->reg_intp |= INTP_TICK_ENABLE;
+    /* raise IRQ */
+    qemu_irq_raise(s->tick_irq);
+
+    /* restart timer */
+    ptimer_set_count(s->ptimer, s->reg_ticcnt);
+    ptimer_run(s->ptimer, 1);
+}
+
+/*
+ * 1Hz clock handler
+ */
+static void exynos4210_rtc_1Hz_tick(void *opaque)
+{
+    Exynos4210RTCState *s = (Exynos4210RTCState *)opaque;
+
+    rtc_next_second(&s->current_tm);
+    /* DPRINTF("1Hz tick\n"); */
+
+    /* raise IRQ */
+    if (s->reg_rtcalm & ALARM_INT_ENABLE) {
+        check_alarm_raise(s);
+    }
+
+    ptimer_set_count(s->ptimer_1Hz, RTC_BASE_FREQ);
+    ptimer_run(s->ptimer_1Hz, 1);
+}
+
+/*
+ * RTC Read
+ */
+static uint64_t exynos4210_rtc_read(void *opaque, target_phys_addr_t offset,
+        unsigned size)
+{
+    uint32_t value = 0;
+    Exynos4210RTCState *s = (Exynos4210RTCState *)opaque;
+
+    switch (offset) {
+    case INTP:
+        value = s->reg_intp;
+        break;
+    case RTCCON:
+        value = s->reg_rtccon;
+        break;
+    case TICCNT:
+        value = s->reg_ticcnt;
+        break;
+    case RTCALM:
+        value = s->reg_rtcalm;
+        break;
+    case ALMSEC:
+        value = s->reg_almsec;
+        break;
+    case ALMMIN:
+        value = s->reg_almmin;
+        break;
+    case ALMHOUR:
+        value = s->reg_almhour;
+        break;
+    case ALMDAY:
+        value = s->reg_almday;
+        break;
+    case ALMMON:
+        value = s->reg_almmon;
+        break;
+    case ALMYEAR:
+        value = s->reg_almyear;
+        break;
+
+    case BCDSEC:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_sec);
+        break;
+    case BCDMIN:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_min);
+        break;
+    case BCDHOUR:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_hour);
+        break;
+    case BCDDAYWEEK:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_wday);
+        break;
+    case BCDDAY:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_mday);
+        break;
+    case BCDMON:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_mon + 1);
+        break;
+    case BCDYEAR:
+        value = BCD3DIGITS(s->current_tm.tm_year);
+        break;
+
+    case CURTICNT:
+        s->reg_curticcnt = ptimer_get_count(s->ptimer);
+        value = s->reg_curticcnt;
+        break;
+
+    default:
+        fprintf(stderr,
+                "[exynos4210.rtc: bad read offset " TARGET_FMT_plx "]\n",
+                offset);
+        break;
+    }
+    return value;
+}
+
+/*
+ * RTC Write
+ */
+static void exynos4210_rtc_write(void *opaque, target_phys_addr_t offset,
+        uint64_t value, unsigned size)
+{
+    Exynos4210RTCState *s = (Exynos4210RTCState *)opaque;
+
+    switch (offset) {
+    case INTP:
+        if (value & INTP_ALM_ENABLE) {
+            qemu_irq_lower(s->alm_irq);
+            s->reg_intp &= (~INTP_ALM_ENABLE);
+        }
+        if (value & INTP_TICK_ENABLE) {
+            qemu_irq_lower(s->tick_irq);
+            s->reg_intp &= (~INTP_TICK_ENABLE);
+        }
+        break;
+    case RTCCON:
+        if (value & RTC_ENABLE) {
+            exynos4210_rtc_update_freq(s, value);
+        }
+        if ((value & RTC_ENABLE) > (s->reg_rtccon & RTC_ENABLE)) {
+            /* clock timer */
+            ptimer_set_count(s->ptimer_1Hz, RTC_BASE_FREQ);
+            ptimer_run(s->ptimer_1Hz, 1);
+            DPRINTF("run clock timer\n");
+        }
+        if ((value & RTC_ENABLE) < (s->reg_rtccon & RTC_ENABLE)) {
+            /* tick timer */
+            ptimer_stop(s->ptimer);
+            /* clock timer */
+            ptimer_stop(s->ptimer_1Hz);
+            DPRINTF("stop all timers\n");
+        }
+        if (value & RTC_ENABLE) {
+            if ((value & TICK_TIMER_ENABLE) >
+                (s->reg_rtccon & TICK_TIMER_ENABLE) &&
+                (s->reg_ticcnt)) {
+                ptimer_set_count(s->ptimer, s->reg_ticcnt);
+                ptimer_run(s->ptimer, 1);
+                DPRINTF("run tick timer\n");
+            }
+            if ((value & TICK_TIMER_ENABLE) <
+                (s->reg_rtccon & TICK_TIMER_ENABLE)) {
+                ptimer_stop(s->ptimer);
+            }
+        }
+        s->reg_rtccon = value;
+        break;
+    case TICCNT:
+        if (value > TICNT_THRESHHOLD) {
+            s->reg_ticcnt = value;
+        } else {
+            fprintf(stderr,
+                    "[exynos4210.rtc: bad TICNT value %u ]\n",
+                    (uint32_t)value);
+        }
+        break;
+
+    case RTCALM:
+        s->reg_rtcalm = value;
+        break;
+    case ALMSEC:
+        s->reg_almsec = (value & 0x7f);
+        break;
+    case ALMMIN:
+        s->reg_almmin = (value & 0x7f);
+        break;
+    case ALMHOUR:
+        s->reg_almhour = (value & 0x3f);
+        break;
+    case ALMDAY:
+        s->reg_almday = (value & 0x3f);
+        break;
+    case ALMMON:
+        s->reg_almmon = (value & 0x1f);
+        break;
+    case ALMYEAR:
+        s->reg_almyear = (value & 0x0fff);
+        break;
+
+    case BCDSEC:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_sec = (int)from_bcd((uint8_t)value);
+        }
+        break;
+    case BCDMIN:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_min = (int)from_bcd((uint8_t)value);
+        }
+        break;
+    case BCDHOUR:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_hour = (int)from_bcd((uint8_t)value);
+        }
+        break;
+    case BCDDAYWEEK:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_wday = (int)from_bcd((uint8_t)value);
+        }
+        break;
+    case BCDDAY:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_mday = (int)from_bcd((uint8_t)value);
+        }
+        break;
+    case BCDMON:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_mon = (int)from_bcd((uint8_t)value) - 1;
+        }
+        break;
+    case BCDYEAR:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            /* 3 digits */
+            s->current_tm.tm_year = (int)from_bcd((uint8_t)value) +
+                    (int)from_bcd((uint8_t)((value >> 8) & 0x0f)) * 100;
+        }
+        break;
+
+    default:
+        fprintf(stderr,
+                "[exynos4210.rtc: bad write offset " TARGET_FMT_plx "]\n",
+                offset);
+        break;
+
+    }
+}
+
+/*
+ * Set default values to timer fields and registers
+ */
+static void exynos4210_rtc_reset(DeviceState *d)
+{
+    Exynos4210RTCState *s = (Exynos4210RTCState *)d;
+
+    qemu_get_timedate(&s->current_tm, 0);
+
+    DPRINTF("Get time from host: %d-%d-%d %2d:%02d:%02d\n",
+            s->current_tm.tm_year, s->current_tm.tm_mon, s->current_tm.tm_mday,
+            s->current_tm.tm_hour, s->current_tm.tm_min, s->current_tm.tm_sec);
+
+    s->reg_intp = 0;
+    s->reg_rtccon = 0;
+    s->reg_ticcnt = 0;
+    s->reg_rtcalm = 0;
+    s->reg_almsec = 0;
+    s->reg_almmin = 0;
+    s->reg_almhour = 0;
+    s->reg_almday = 0;
+    s->reg_almmon = 0;
+    s->reg_almyear = 0;
+
+    s->reg_curticcnt = 0;
+
+    exynos4210_rtc_update_freq(s, s->reg_rtccon);
+    ptimer_stop(s->ptimer);
+    ptimer_stop(s->ptimer_1Hz);
+}
+
+static const MemoryRegionOps exynos4210_rtc_ops = {
+    .read = exynos4210_rtc_read,
+    .write = exynos4210_rtc_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+/*
+ * RTC timer initialization
+ */
+static int exynos4210_rtc_init(SysBusDevice *dev)
+{
+    Exynos4210RTCState *s = FROM_SYSBUS(Exynos4210RTCState, dev);
+    QEMUBH *bh;
+
+    bh = qemu_bh_new(exynos4210_rtc_tick, s);
+    s->ptimer = ptimer_init(bh);
+    ptimer_set_freq(s->ptimer, RTC_BASE_FREQ);
+    exynos4210_rtc_update_freq(s, 0);
+
+    bh = qemu_bh_new(exynos4210_rtc_1Hz_tick, s);
+    s->ptimer_1Hz = ptimer_init(bh);
+    ptimer_set_freq(s->ptimer_1Hz, RTC_BASE_FREQ);
+
+    sysbus_init_irq(dev, &s->alm_irq);
+    sysbus_init_irq(dev, &s->tick_irq);
+
+    memory_region_init_io(&s->iomem, &exynos4210_rtc_ops, s, "exynos4210-rtc",
+            EXYNOS4210_RTC_REG_MEM_SIZE);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    return 0;
+}
+
+static void exynos4210_rtc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+
+    k->init = exynos4210_rtc_init;
+    dc->reset = exynos4210_rtc_reset;
+    dc->vmsd = &vmstate_exynos4210_rtc_state;
+}
+
+static const TypeInfo exynos4210_rtc_info = {
+    .name          = "exynos4210.rtc",
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(Exynos4210RTCState),
+    .class_init    = exynos4210_rtc_class_init,
+};
+
+static void exynos4210_rtc_register_types(void)
+{
+    type_register_static(&exynos4210_rtc_info);
+}
+
+type_init(exynos4210_rtc_register_types)
diff --git a/hw/exynos4_boards.c b/hw/exynos4_boards.c
index e5c2a5f388..4bb0a60cb1 100644
--- a/hw/exynos4_boards.c
+++ b/hw/exynos4_boards.c
@@ -81,7 +81,7 @@ static void lan9215_init(uint32_t base, qemu_irq irq)
     SysBusDevice *s;
 
     /* This should be a 9215 but the 9118 is close enough */
-    if (nd_table[0].vlan) {
+    if (nd_table[0].used) {
         qemu_check_nic_model(&nd_table[0], "lan9118");
         dev = qdev_create(NULL, "lan9118");
         qdev_set_nic_properties(dev, &nd_table[0]);
diff --git a/hw/fdc.c b/hw/fdc.c
index 5b3224b39b..08830c1ba2 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -52,6 +52,113 @@
 /********************************************************/
 /* Floppy drive emulation                               */
 
+typedef enum FDriveRate {
+    FDRIVE_RATE_500K = 0x00,  /* 500 Kbps */
+    FDRIVE_RATE_300K = 0x01,  /* 300 Kbps */
+    FDRIVE_RATE_250K = 0x02,  /* 250 Kbps */
+    FDRIVE_RATE_1M   = 0x03,  /*   1 Mbps */
+} FDriveRate;
+
+typedef struct FDFormat {
+    FDriveType drive;
+    uint8_t last_sect;
+    uint8_t max_track;
+    uint8_t max_head;
+    FDriveRate rate;
+} FDFormat;
+
+static const FDFormat fd_formats[] = {
+    /* First entry is default format */
+    /* 1.44 MB 3"1/2 floppy disks */
+    { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
+    /* 2.88 MB 3"1/2 floppy disks */
+    { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
+    { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
+    { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
+    { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
+    { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
+    /* 720 kB 3"1/2 floppy disks */
+    { FDRIVE_DRV_144,  9, 80, 1, FDRIVE_RATE_250K, },
+    { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
+    { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
+    { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
+    { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
+    { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
+    /* 1.2 MB 5"1/4 floppy disks */
+    { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
+    { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
+    /* 720 kB 5"1/4 floppy disks */
+    { FDRIVE_DRV_120,  9, 80, 1, FDRIVE_RATE_250K, },
+    { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
+    /* 360 kB 5"1/4 floppy disks */
+    { FDRIVE_DRV_120,  9, 40, 1, FDRIVE_RATE_300K, },
+    { FDRIVE_DRV_120,  9, 40, 0, FDRIVE_RATE_300K, },
+    { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
+    { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
+    /* 320 kB 5"1/4 floppy disks */
+    { FDRIVE_DRV_120,  8, 40, 1, FDRIVE_RATE_250K, },
+    { FDRIVE_DRV_120,  8, 40, 0, FDRIVE_RATE_250K, },
+    /* 360 kB must match 5"1/4 better than 3"1/2... */
+    { FDRIVE_DRV_144,  9, 80, 0, FDRIVE_RATE_250K, },
+    /* end */
+    { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
+};
+
+static void pick_geometry(BlockDriverState *bs, int *nb_heads,
+                          int *max_track, int *last_sect,
+                          FDriveType drive_in, FDriveType *drive,
+                          FDriveRate *rate)
+{
+    const FDFormat *parse;
+    uint64_t nb_sectors, size;
+    int i, first_match, match;
+
+    bdrv_get_geometry(bs, &nb_sectors);
+    match = -1;
+    first_match = -1;
+    for (i = 0; ; i++) {
+        parse = &fd_formats[i];
+        if (parse->drive == FDRIVE_DRV_NONE) {
+            break;
+        }
+        if (drive_in == parse->drive ||
+            drive_in == FDRIVE_DRV_NONE) {
+            size = (parse->max_head + 1) * parse->max_track *
+                parse->last_sect;
+            if (nb_sectors == size) {
+                match = i;
+                break;
+            }
+            if (first_match == -1) {
+                first_match = i;
+            }
+        }
+    }
+    if (match == -1) {
+        if (first_match == -1) {
+            match = 1;
+        } else {
+            match = first_match;
+        }
+        parse = &fd_formats[match];
+    }
+    *nb_heads = parse->max_head + 1;
+    *max_track = parse->max_track;
+    *last_sect = parse->last_sect;
+    *drive = parse->drive;
+    *rate = parse->rate;
+}
+
 #define GET_CUR_DRV(fdctrl) ((fdctrl)->cur_drv)
 #define SET_CUR_DRV(fdctrl, drive) ((fdctrl)->cur_drv = (drive))
 
@@ -153,8 +260,12 @@ static int fd_seek(FDrive *drv, uint8_t head, uint8_t track, uint8_t sect,
         }
 #endif
         drv->head = head;
-        if (drv->track != track)
+        if (drv->track != track) {
+            if (drv->bs != NULL && bdrv_is_inserted(drv->bs)) {
+                drv->media_changed = 0;
+            }
             ret = 1;
+        }
         drv->track = track;
         drv->sect = sect;
     }
@@ -170,9 +281,7 @@ static int fd_seek(FDrive *drv, uint8_t head, uint8_t track, uint8_t sect,
 static void fd_recalibrate(FDrive *drv)
 {
     FLOPPY_DPRINTF("recalibrate\n");
-    drv->head = 0;
-    drv->track = 0;
-    drv->sect = 1;
+    fd_seek(drv, 0, 0, 1, 1);
 }
 
 /* Revalidate a disk drive after a disk change */
@@ -185,13 +294,10 @@ static void fd_revalidate(FDrive *drv)
     FLOPPY_DPRINTF("revalidate\n");
     if (drv->bs != NULL) {
         ro = bdrv_is_read_only(drv->bs);
-        bdrv_get_floppy_geometry_hint(drv->bs, &nb_heads, &max_track,
-                                      &last_sect, drv->drive, &drive, &rate);
+        pick_geometry(drv->bs, &nb_heads, &max_track,
+                      &last_sect, drv->drive, &drive, &rate);
         if (!bdrv_is_inserted(drv->bs)) {
             FLOPPY_DPRINTF("No disk in drive\n");
-        } else if (nb_heads != 0 && max_track != 0 && last_sect != 0) {
-            FLOPPY_DPRINTF("User defined disk (%d %d %d)\n",
-                           nb_heads - 1, max_track, last_sect);
         } else {
             FLOPPY_DPRINTF("Floppy disk (%d h %d t %d s) %s\n", nb_heads,
                            max_track, last_sect, ro ? "ro" : "rw");
@@ -305,6 +411,9 @@ enum {
 };
 
 enum {
+    FD_SR0_DS0      = 0x01,
+    FD_SR0_DS1      = 0x02,
+    FD_SR0_HEAD     = 0x04,
     FD_SR0_EQPMT    = 0x10,
     FD_SR0_SEEK     = 0x20,
     FD_SR0_ABNTERM  = 0x40,
@@ -711,14 +820,6 @@ static void fdctrl_raise_irq(FDCtrl *fdctrl, uint8_t status0)
         qemu_set_irq(fdctrl->irq, 1);
         fdctrl->sra |= FD_SRA_INTPEND;
     }
-    if (status0 & FD_SR0_SEEK) {
-        FDrive *cur_drv;
-        /* A seek clears the disk change line (if a disk is inserted) */
-        cur_drv = get_cur_drv(fdctrl);
-        if (cur_drv->bs != NULL && bdrv_is_inserted(cur_drv->bs)) {
-            cur_drv->media_changed = 0;
-        }
-    }
 
     fdctrl->reset_sensei = 0;
     fdctrl->status0 = status0;
@@ -978,14 +1079,15 @@ static void fdctrl_reset_fifo(FDCtrl *fdctrl)
 }
 
 /* Set FIFO status for the host to read */
-static void fdctrl_set_fifo(FDCtrl *fdctrl, int fifo_len, int do_irq)
+static void fdctrl_set_fifo(FDCtrl *fdctrl, int fifo_len, uint8_t status0)
 {
     fdctrl->data_dir = FD_DIR_READ;
     fdctrl->data_len = fifo_len;
     fdctrl->data_pos = 0;
     fdctrl->msr |= FD_MSR_CMDBUSY | FD_MSR_RQM | FD_MSR_DIO;
-    if (do_irq)
-        fdctrl_raise_irq(fdctrl, 0x00);
+    if (status0) {
+        fdctrl_raise_irq(fdctrl, status0);
+    }
 }
 
 /* Set an error: unimplemented/unknown command */
@@ -997,7 +1099,10 @@ static void fdctrl_unimplemented(FDCtrl *fdctrl, int direction)
     fdctrl_set_fifo(fdctrl, 1, 0);
 }
 
-/* Seek to next sector */
+/* Seek to next sector
+ * returns 0 when end of track reached (for DBL_SIDES on head 1)
+ * otherwise returns 1
+ */
 static int fdctrl_seek_to_next_sect(FDCtrl *fdctrl, FDrive *cur_drv)
 {
     FLOPPY_DPRINTF("seek to next sector (%d %02x %02x => %d)\n",
@@ -1005,30 +1110,39 @@ static int fdctrl_seek_to_next_sect(FDCtrl *fdctrl, FDrive *cur_drv)
                    fd_sector(cur_drv));
     /* XXX: cur_drv->sect >= cur_drv->last_sect should be an
        error in fact */
-    if (cur_drv->sect >= cur_drv->last_sect ||
-        cur_drv->sect == fdctrl->eot) {
-        cur_drv->sect = 1;
+    uint8_t new_head = cur_drv->head;
+    uint8_t new_track = cur_drv->track;
+    uint8_t new_sect = cur_drv->sect;
+
+    int ret = 1;
+
+    if (new_sect >= cur_drv->last_sect ||
+        new_sect == fdctrl->eot) {
+        new_sect = 1;
         if (FD_MULTI_TRACK(fdctrl->data_state)) {
-            if (cur_drv->head == 0 &&
+            if (new_head == 0 &&
                 (cur_drv->flags & FDISK_DBL_SIDES) != 0) {
-                cur_drv->head = 1;
+                new_head = 1;
             } else {
-                cur_drv->head = 0;
-                cur_drv->track++;
-                if ((cur_drv->flags & FDISK_DBL_SIDES) == 0)
-                    return 0;
+                new_head = 0;
+                new_track++;
+                if ((cur_drv->flags & FDISK_DBL_SIDES) == 0) {
+                    ret = 0;
+                }
             }
         } else {
-            cur_drv->track++;
-            return 0;
+            new_track++;
+            ret = 0;
+        }
+        if (ret == 1) {
+            FLOPPY_DPRINTF("seek to next track (%d %02x %02x => %d)\n",
+                    new_head, new_track, new_sect, fd_sector(cur_drv));
         }
-        FLOPPY_DPRINTF("seek to next track (%d %02x %02x => %d)\n",
-                       cur_drv->head, cur_drv->track,
-                       cur_drv->sect, fd_sector(cur_drv));
     } else {
-        cur_drv->sect++;
+        new_sect++;
     }
-    return 1;
+    fd_seek(cur_drv, new_head, new_track, new_sect, 1);
+    return ret;
 }
 
 /* Callback for transfer end (stop or abort) */
@@ -1038,10 +1152,12 @@ static void fdctrl_stop_transfer(FDCtrl *fdctrl, uint8_t status0,
     FDrive *cur_drv;
 
     cur_drv = get_cur_drv(fdctrl);
+    fdctrl->status0 = status0 | FD_SR0_SEEK | (cur_drv->head << 2) |
+                      GET_CUR_DRV(fdctrl);
+
     FLOPPY_DPRINTF("transfer status: %02x %02x %02x (%02x)\n",
-                   status0, status1, status2,
-                   status0 | (cur_drv->head << 2) | GET_CUR_DRV(fdctrl));
-    fdctrl->fifo[0] = status0 | (cur_drv->head << 2) | GET_CUR_DRV(fdctrl);
+                   status0, status1, status2, fdctrl->status0);
+    fdctrl->fifo[0] = fdctrl->status0;
     fdctrl->fifo[1] = status1;
     fdctrl->fifo[2] = status2;
     fdctrl->fifo[3] = cur_drv->track;
@@ -1054,7 +1170,7 @@ static void fdctrl_stop_transfer(FDCtrl *fdctrl, uint8_t status0,
     }
     fdctrl->msr |= FD_MSR_RQM | FD_MSR_DIO;
     fdctrl->msr &= ~FD_MSR_NONDMA;
-    fdctrl_set_fifo(fdctrl, 7, 1);
+    fdctrl_set_fifo(fdctrl, 7, fdctrl->status0);
 }
 
 /* Prepare a data transfer (either DMA or FIFO) */
@@ -1169,7 +1285,7 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
     if (direction != FD_DIR_WRITE)
         fdctrl->msr |= FD_MSR_DIO;
     /* IO based transfer: calculate len */
-    fdctrl_raise_irq(fdctrl, 0x00);
+    fdctrl_raise_irq(fdctrl, FD_SR0_SEEK);
 
     return;
 }
@@ -1598,16 +1714,18 @@ static void fdctrl_handle_sense_interrupt_status(FDCtrl *fdctrl, int direction)
 {
     FDrive *cur_drv = get_cur_drv(fdctrl);
 
-    if(fdctrl->reset_sensei > 0) {
+    if (fdctrl->reset_sensei > 0) {
         fdctrl->fifo[0] =
             FD_SR0_RDYCHG + FD_RESET_SENSEI_COUNT - fdctrl->reset_sensei;
         fdctrl->reset_sensei--;
+    } else if (!(fdctrl->sra & FD_SRA_INTPEND)) {
+        fdctrl->fifo[0] = FD_SR0_INVCMD;
+        fdctrl_set_fifo(fdctrl, 1, 0);
+        return;
     } else {
-        /* XXX: status0 handling is broken for read/write
-           commands, so we do this hack. It should be suppressed
-           ASAP */
         fdctrl->fifo[0] =
-            FD_SR0_SEEK | (cur_drv->head << 2) | GET_CUR_DRV(fdctrl);
+                (fdctrl->status0 & ~(FD_SR0_HEAD | FD_SR0_DS1 | FD_SR0_DS0))
+                | GET_CUR_DRV(fdctrl);
     }
 
     fdctrl->fifo[1] = cur_drv->track;
@@ -1626,11 +1744,7 @@ static void fdctrl_handle_seek(FDCtrl *fdctrl, int direction)
     /* The seek command just sends step pulses to the drive and doesn't care if
      * there is a medium inserted of if it's banging the head against the drive.
      */
-    if (fdctrl->fifo[2] > cur_drv->max_track) {
-        cur_drv->track = cur_drv->max_track;
-    } else {
-        cur_drv->track = fdctrl->fifo[2];
-    }
+    fd_seek(cur_drv, cur_drv->head, fdctrl->fifo[2], cur_drv->sect, 1);
     /* Raise Interrupt */
     fdctrl_raise_irq(fdctrl, FD_SR0_SEEK);
 }
@@ -1688,32 +1802,35 @@ static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direct
     }
 }
 
-static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction)
+static void fdctrl_handle_relative_seek_in(FDCtrl *fdctrl, int direction)
 {
     FDrive *cur_drv;
 
     SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK);
     cur_drv = get_cur_drv(fdctrl);
     if (fdctrl->fifo[2] + cur_drv->track >= cur_drv->max_track) {
-        cur_drv->track = cur_drv->max_track - 1;
+        fd_seek(cur_drv, cur_drv->head, cur_drv->max_track - 1,
+                cur_drv->sect, 1);
     } else {
-        cur_drv->track += fdctrl->fifo[2];
+        fd_seek(cur_drv, cur_drv->head,
+                cur_drv->track + fdctrl->fifo[2], cur_drv->sect, 1);
     }
     fdctrl_reset_fifo(fdctrl);
     /* Raise Interrupt */
     fdctrl_raise_irq(fdctrl, FD_SR0_SEEK);
 }
 
-static void fdctrl_handle_relative_seek_in(FDCtrl *fdctrl, int direction)
+static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction)
 {
     FDrive *cur_drv;
 
     SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK);
     cur_drv = get_cur_drv(fdctrl);
     if (fdctrl->fifo[2] > cur_drv->track) {
-        cur_drv->track = 0;
+        fd_seek(cur_drv, cur_drv->head, 0, cur_drv->sect, 1);
     } else {
-        cur_drv->track -= fdctrl->fifo[2];
+        fd_seek(cur_drv, cur_drv->head,
+                cur_drv->track - fdctrl->fifo[2], cur_drv->sect, 1);
     }
     fdctrl_reset_fifo(fdctrl);
     /* Raise Interrupt */
@@ -2046,18 +2163,13 @@ static int sun4m_fdc_init1(SysBusDevice *dev)
     return fdctrl_init_common(fdctrl);
 }
 
-void fdc_get_bs(BlockDriverState *bs[], ISADevice *dev)
+FDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i)
 {
-    FDCtrlISABus *isa = DO_UPCAST(FDCtrlISABus, busdev, dev);
-    FDCtrl *fdctrl = &isa->state;
-    int i;
+    FDCtrlISABus *isa = DO_UPCAST(FDCtrlISABus, busdev, fdc);
 
-    for (i = 0; i < MAX_FD; i++) {
-        bs[i] = fdctrl->drives[i].bs;
-    }
+    return isa->state.drives[i].drive;
 }
 
-
 static const VMStateDescription vmstate_isa_fdc ={
     .name = "fdc",
     .version_id = 2,
diff --git a/hw/fdc.h b/hw/fdc.h
index 1b32b17bef..b5c9f31074 100644
--- a/hw/fdc.h
+++ b/hw/fdc.h
@@ -6,11 +6,19 @@
 /* fdc.c */
 #define MAX_FD 2
 
+typedef enum FDriveType {
+    FDRIVE_DRV_144  = 0x00,   /* 1.44 MB 3"5 drive      */
+    FDRIVE_DRV_288  = 0x01,   /* 2.88 MB 3"5 drive      */
+    FDRIVE_DRV_120  = 0x02,   /* 1.2  MB 5"25 drive     */
+    FDRIVE_DRV_NONE = 0x03,   /* No drive connected     */
+} FDriveType;
+
 ISADevice *fdctrl_init_isa(ISABus *bus, DriveInfo **fds);
 void fdctrl_init_sysbus(qemu_irq irq, int dma_chann,
                         target_phys_addr_t mmio_base, DriveInfo **fds);
 void sun4m_fdctrl_init(qemu_irq irq, target_phys_addr_t io_base,
                        DriveInfo **fds, qemu_irq *fdc_tc);
-void fdc_get_bs(BlockDriverState *bs[], ISADevice *dev);
+
+FDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i);
 
 #endif
diff --git a/hw/hd-geometry.c b/hw/hd-geometry.c
new file mode 100644
index 0000000000..1cdb9fb753
--- /dev/null
+++ b/hw/hd-geometry.c
@@ -0,0 +1,157 @@
+/*
+ * Hard disk geometry utilities
+ *
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block.h"
+#include "hw/block-common.h"
+#include "trace.h"
+
+struct partition {
+        uint8_t boot_ind;           /* 0x80 - active */
+        uint8_t head;               /* starting head */
+        uint8_t sector;             /* starting sector */
+        uint8_t cyl;                /* starting cylinder */
+        uint8_t sys_ind;            /* What partition type */
+        uint8_t end_head;           /* end head */
+        uint8_t end_sector;         /* end sector */
+        uint8_t end_cyl;            /* end cylinder */
+        uint32_t start_sect;        /* starting sector counting from 0 */
+        uint32_t nr_sects;          /* nr of sectors in partition */
+} QEMU_PACKED;
+
+/* try to guess the disk logical geometry from the MSDOS partition table.
+   Return 0 if OK, -1 if could not guess */
+static int guess_disk_lchs(BlockDriverState *bs,
+                           int *pcylinders, int *pheads, int *psectors)
+{
+    uint8_t buf[BDRV_SECTOR_SIZE];
+    int i, heads, sectors, cylinders;
+    struct partition *p;
+    uint32_t nr_sects;
+    uint64_t nb_sectors;
+
+    bdrv_get_geometry(bs, &nb_sectors);
+
+    /**
+     * The function will be invoked during startup not only in sync I/O mode,
+     * but also in async I/O mode. So the I/O throttling function has to
+     * be disabled temporarily here, not permanently.
+     */
+    if (bdrv_read_unthrottled(bs, 0, buf, 1) < 0) {
+        return -1;
+    }
+    /* test msdos magic */
+    if (buf[510] != 0x55 || buf[511] != 0xaa) {
+        return -1;
+    }
+    for (i = 0; i < 4; i++) {
+        p = ((struct partition *)(buf + 0x1be)) + i;
+        nr_sects = le32_to_cpu(p->nr_sects);
+        if (nr_sects && p->end_head) {
+            /* We make the assumption that the partition terminates on
+               a cylinder boundary */
+            heads = p->end_head + 1;
+            sectors = p->end_sector & 63;
+            if (sectors == 0) {
+                continue;
+            }
+            cylinders = nb_sectors / (heads * sectors);
+            if (cylinders < 1 || cylinders > 16383) {
+                continue;
+            }
+            *pheads = heads;
+            *psectors = sectors;
+            *pcylinders = cylinders;
+            trace_hd_geometry_lchs_guess(bs, cylinders, heads, sectors);
+            return 0;
+        }
+    }
+    return -1;
+}
+
+static void guess_chs_for_size(BlockDriverState *bs,
+                uint32_t *pcyls, uint32_t *pheads, uint32_t *psecs)
+{
+    uint64_t nb_sectors;
+    int cylinders;
+
+    bdrv_get_geometry(bs, &nb_sectors);
+
+    cylinders = nb_sectors / (16 * 63);
+    if (cylinders > 16383) {
+        cylinders = 16383;
+    } else if (cylinders < 2) {
+        cylinders = 2;
+    }
+    *pcyls = cylinders;
+    *pheads = 16;
+    *psecs = 63;
+}
+
+void hd_geometry_guess(BlockDriverState *bs,
+                       uint32_t *pcyls, uint32_t *pheads, uint32_t *psecs,
+                       int *ptrans)
+{
+    int cylinders, heads, secs, translation;
+
+    if (guess_disk_lchs(bs, &cylinders, &heads, &secs) < 0) {
+        /* no LCHS guess: use a standard physical disk geometry  */
+        guess_chs_for_size(bs, pcyls, pheads, psecs);
+        translation = hd_bios_chs_auto_trans(*pcyls, *pheads, *psecs);
+    } else if (heads > 16) {
+        /* LCHS guess with heads > 16 means that a BIOS LBA
+           translation was active, so a standard physical disk
+           geometry is OK */
+        guess_chs_for_size(bs, pcyls, pheads, psecs);
+        translation = *pcyls * *pheads <= 131072
+            ? BIOS_ATA_TRANSLATION_LARGE
+            : BIOS_ATA_TRANSLATION_LBA;
+    } else {
+        /* LCHS guess with heads <= 16: use as physical geometry */
+        *pcyls = cylinders;
+        *pheads = heads;
+        *psecs = secs;
+        /* disable any translation to be in sync with
+           the logical geometry */
+        translation = BIOS_ATA_TRANSLATION_NONE;
+    }
+    if (ptrans) {
+        *ptrans = translation;
+    }
+    trace_hd_geometry_guess(bs, *pcyls, *pheads, *psecs, translation);
+}
+
+int hd_bios_chs_auto_trans(uint32_t cyls, uint32_t heads, uint32_t secs)
+{
+    return cyls <= 1024 && heads <= 16 && secs <= 63
+        ? BIOS_ATA_TRANSLATION_NONE
+        : BIOS_ATA_TRANSLATION_LBA;
+}
diff --git a/hw/highbank.c b/hw/highbank.c
index 4bdea5df7d..11aa1312c0 100644
--- a/hw/highbank.c
+++ b/hw/highbank.c
@@ -284,7 +284,7 @@ static void highbank_init(ram_addr_t ram_size,
 
     sysbus_create_simple("sysbus-ahci", 0xffe08000, pic[83]);
 
-    if (nd_table[0].vlan) {
+    if (nd_table[0].used) {
         qemu_check_nic_model(&nd_table[0], "xgmac");
         dev = qdev_create(NULL, "xgmac");
         qdev_set_nic_properties(dev, &nd_table[0]);
diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index eb171b7c47..8c764bbfef 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -7,6 +7,8 @@ obj-y += debugcon.o multiboot.o
 obj-y += pc_piix.o
 obj-y += pc_sysfw.o
 obj-$(CONFIG_XEN) += xen_platform.o xen_apic.o
+obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o
+obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o xen_pt_msi.o
 obj-y += kvm/
 obj-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
 
diff --git a/hw/ide.h b/hw/ide.h
index 0b18c9016b..2db4079f68 100644
--- a/hw/ide.h
+++ b/hw/ide.h
@@ -29,7 +29,9 @@ void mmio_ide_init (target_phys_addr_t membase, target_phys_addr_t membase2,
                     qemu_irq irq, int shift,
                     DriveInfo *hd0, DriveInfo *hd1);
 
-void ide_get_bs(BlockDriverState *bs[], BusState *qbus);
+int ide_get_geometry(BusState *bus, int unit,
+                     int16_t *cyls, int8_t *heads, int8_t *secs);
+int ide_get_bios_chs_trans(BusState *bus, int unit);
 
 /* ide/core.c */
 void ide_drive_get(DriveInfo **hd, int max_bus);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index e275e68934..efea93f0b4 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -588,7 +588,7 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
     AHCIPortRegs *pr = &ad->port_regs;
     uint8_t *d2h_fis;
     int i;
-    target_phys_addr_t cmd_len = 0x80;
+    dma_addr_t cmd_len = 0x80;
     int cmd_mapped = 0;
 
     if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) {
@@ -598,7 +598,8 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
     if (!cmd_fis) {
         /* map cmd_fis */
         uint64_t tbl_addr = le64_to_cpu(ad->cur_cmd->tbl_addr);
-        cmd_fis = cpu_physical_memory_map(tbl_addr, &cmd_len, 0);
+        cmd_fis = dma_memory_map(ad->hba->dma, tbl_addr, &cmd_len,
+                                 DMA_DIRECTION_TO_DEVICE);
         cmd_mapped = 1;
     }
 
@@ -630,7 +631,8 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
     ahci_trigger_irq(ad->hba, ad, PORT_IRQ_D2H_REG_FIS);
 
     if (cmd_mapped) {
-        cpu_physical_memory_unmap(cmd_fis, cmd_len, 0, cmd_len);
+        dma_memory_unmap(ad->hba->dma, cmd_fis, cmd_len,
+                         DMA_DIRECTION_TO_DEVICE, cmd_len);
     }
 }
 
@@ -640,8 +642,8 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist)
     uint32_t opts = le32_to_cpu(cmd->opts);
     uint64_t prdt_addr = le64_to_cpu(cmd->tbl_addr) + 0x80;
     int sglist_alloc_hint = opts >> AHCI_CMD_HDR_PRDT_LEN;
-    target_phys_addr_t prdt_len = (sglist_alloc_hint * sizeof(AHCI_SG));
-    target_phys_addr_t real_prdt_len = prdt_len;
+    dma_addr_t prdt_len = (sglist_alloc_hint * sizeof(AHCI_SG));
+    dma_addr_t real_prdt_len = prdt_len;
     uint8_t *prdt;
     int i;
     int r = 0;
@@ -652,7 +654,8 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist)
     }
 
     /* map PRDT */
-    if (!(prdt = cpu_physical_memory_map(prdt_addr, &prdt_len, 0))){
+    if (!(prdt = dma_memory_map(ad->hba->dma, prdt_addr, &prdt_len,
+                                DMA_DIRECTION_TO_DEVICE))){
         DPRINTF(ad->port_no, "map failed\n");
         return -1;
     }
@@ -667,7 +670,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist)
     if (sglist_alloc_hint > 0) {
         AHCI_SG *tbl = (AHCI_SG *)prdt;
 
-        qemu_sglist_init(sglist, sglist_alloc_hint);
+        qemu_sglist_init(sglist, sglist_alloc_hint, ad->hba->dma);
         for (i = 0; i < sglist_alloc_hint; i++) {
             /* flags_size is zero-based */
             qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
@@ -676,7 +679,8 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist)
     }
 
 out:
-    cpu_physical_memory_unmap(prdt, prdt_len, 0, prdt_len);
+    dma_memory_unmap(ad->hba->dma, prdt, prdt_len,
+                     DMA_DIRECTION_TO_DEVICE, prdt_len);
     return r;
 }
 
@@ -786,7 +790,7 @@ static int handle_cmd(AHCIState *s, int port, int slot)
     uint64_t tbl_addr;
     AHCICmdHdr *cmd;
     uint8_t *cmd_fis;
-    target_phys_addr_t cmd_len;
+    dma_addr_t cmd_len;
 
     if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
         /* Engine currently busy, try again later */
@@ -808,7 +812,8 @@ static int handle_cmd(AHCIState *s, int port, int slot)
     tbl_addr = le64_to_cpu(cmd->tbl_addr);
 
     cmd_len = 0x80;
-    cmd_fis = cpu_physical_memory_map(tbl_addr, &cmd_len, 1);
+    cmd_fis = dma_memory_map(s->dma, tbl_addr, &cmd_len,
+                             DMA_DIRECTION_FROM_DEVICE);
 
     if (!cmd_fis) {
         DPRINTF(port, "error: guest passed us an invalid cmd fis\n");
@@ -934,7 +939,8 @@ static int handle_cmd(AHCIState *s, int port, int slot)
     }
 
 out:
-    cpu_physical_memory_unmap(cmd_fis, cmd_len, 1, cmd_len);
+    dma_memory_unmap(s->dma, cmd_fis, cmd_len, DMA_DIRECTION_FROM_DEVICE,
+                     cmd_len);
 
     if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
         /* async command, complete later */
@@ -1114,11 +1120,12 @@ static const IDEDMAOps ahci_dma_ops = {
     .reset = ahci_dma_reset,
 };
 
-void ahci_init(AHCIState *s, DeviceState *qdev, int ports)
+void ahci_init(AHCIState *s, DeviceState *qdev, DMAContext *dma, int ports)
 {
     qemu_irq *irqs;
     int i;
 
+    s->dma = dma;
     s->ports = ports;
     s->dev = g_malloc0(sizeof(AHCIDevice) * ports);
     ahci_reg_init(s);
@@ -1187,7 +1194,7 @@ static void sysbus_ahci_reset(DeviceState *dev)
 static int sysbus_ahci_init(SysBusDevice *dev)
 {
     SysbusAHCIState *s = FROM_SYSBUS(SysbusAHCIState, dev);
-    ahci_init(&s->ahci, &dev->qdev, s->num_ports);
+    ahci_init(&s->ahci, &dev->qdev, NULL, s->num_ports);
 
     sysbus_init_mmio(dev, &s->ahci.mem);
     sysbus_init_irq(dev, &s->ahci.irq);
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index ec1b6a5f66..1200a56ada 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -299,6 +299,7 @@ typedef struct AHCIState {
     uint32_t idp_index;     /* Current IDP index */
     int ports;
     qemu_irq irq;
+    DMAContext *dma;
 } AHCIState;
 
 typedef struct AHCIPCIState {
@@ -329,7 +330,7 @@ typedef struct NCQFrame {
     uint8_t reserved10;
 } QEMU_PACKED NCQFrame;
 
-void ahci_init(AHCIState *s, DeviceState *qdev, int ports);
+void ahci_init(AHCIState *s, DeviceState *qdev, DMAContext *dma, int ports);
 void ahci_uninit(AHCIState *s);
 
 void ahci_reset(AHCIState *s);
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 5919cf52d8..f7f714c726 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -956,6 +956,36 @@ static void cmd_read_cdvd_capacity(IDEState *s, uint8_t* buf)
     ide_atapi_cmd_reply(s, 8, 8);
 }
 
+static void cmd_read_disc_information(IDEState *s, uint8_t* buf)
+{
+    uint8_t type = buf[1] & 7;
+    uint32_t max_len = ube16_to_cpu(buf + 7);
+
+    /* Types 1/2 are only defined for Blu-Ray.  */
+    if (type != 0) {
+        ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
+                            ASC_INV_FIELD_IN_CMD_PACKET);
+        return;
+    }
+
+    memset(buf, 0, 34);
+    buf[1] = 32;
+    buf[2] = 0xe; /* last session complete, disc finalized */
+    buf[3] = 1;   /* first track on disc */
+    buf[4] = 1;   /* # of sessions */
+    buf[5] = 1;   /* first track of last session */
+    buf[6] = 1;   /* last track of last session */
+    buf[7] = 0x20; /* unrestricted use */
+    buf[8] = 0x00; /* CD-ROM or DVD-ROM */
+    /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
+    /* 12-23: not meaningful for CD-ROM or DVD-ROM */
+    /* 24-31: disc bar code */
+    /* 32: disc application code */
+    /* 33: number of OPC tables */
+
+    ide_atapi_cmd_reply(s, 34, max_len);
+}
+
 static void cmd_read_dvd_structure(IDEState *s, uint8_t* buf)
 {
     int max_len;
@@ -1045,6 +1075,7 @@ static const struct {
     [ 0x43 ] = { cmd_read_toc_pma_atip,             CHECK_READY },
     [ 0x46 ] = { cmd_get_configuration,             ALLOW_UA },
     [ 0x4a ] = { cmd_get_event_status_notification, ALLOW_UA },
+    [ 0x51 ] = { cmd_read_disc_information,         CHECK_READY },
     [ 0x5a ] = { cmd_mode_sense, /* (10) */         0 },
     [ 0xa8 ] = { cmd_read, /* (12) */               CHECK_READY },
     [ 0xad ] = { cmd_read_dvd_structure,            CHECK_READY },
diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c
index bf8ece4708..e0b9443496 100644
--- a/hw/ide/cmd646.c
+++ b/hw/ide/cmd646.c
@@ -94,12 +94,12 @@ static void cmd646_data_write(void *opaque, target_phys_addr_t addr,
     CMD646BAR *cmd646bar = opaque;
 
     if (size == 1) {
-        return ide_ioport_write(cmd646bar->bus, addr, data);
+        ide_ioport_write(cmd646bar->bus, addr, data);
     } else if (addr == 0) {
         if (size == 2) {
-            return ide_data_writew(cmd646bar->bus, addr, data);
+            ide_data_writew(cmd646bar->bus, addr, data);
         } else {
-            return ide_data_writel(cmd646bar->bus, addr, data);
+            ide_data_writel(cmd646bar->bus, addr, data);
         }
     }
 }
@@ -295,7 +295,7 @@ static int pci_cmd646_ide_initfn(PCIDevice *dev)
     return 0;
 }
 
-static int pci_cmd646_ide_exitfn(PCIDevice *dev)
+static void pci_cmd646_ide_exitfn(PCIDevice *dev)
 {
     PCIIDEState *d = DO_UPCAST(PCIIDEState, dev, dev);
     unsigned i;
@@ -309,8 +309,6 @@ static int pci_cmd646_ide_exitfn(PCIDevice *dev)
         memory_region_destroy(&d->cmd646_bar[i].data);
     }
     memory_region_destroy(&d->bmdma_bar);
-
-    return 0;
 }
 
 void pci_cmd646_ide_init(PCIBus *bus, DriveInfo **hd_table,
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 71d4d7732a..d65ef3d58d 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -30,6 +30,7 @@
 #include "qemu-timer.h"
 #include "sysemu.h"
 #include "dma.h"
+#include "hw/block-common.h"
 #include "blockdev.h"
 
 #include <hw/ide/internal.h>
@@ -1924,31 +1925,20 @@ static const BlockDevOps ide_cd_block_ops = {
 
 int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
                    const char *version, const char *serial, const char *model,
-                   uint64_t wwn)
+                   uint64_t wwn,
+                   uint32_t cylinders, uint32_t heads, uint32_t secs,
+                   int chs_trans)
 {
-    int cylinders, heads, secs;
     uint64_t nb_sectors;
 
     s->bs = bs;
     s->drive_kind = kind;
 
     bdrv_get_geometry(bs, &nb_sectors);
-    bdrv_guess_geometry(bs, &cylinders, &heads, &secs);
-    if (cylinders < 1 || cylinders > 16383) {
-        error_report("cyls must be between 1 and 16383");
-        return -1;
-    }
-    if (heads < 1 || heads > 16) {
-        error_report("heads must be between 1 and 16");
-        return -1;
-    }
-    if (secs < 1 || secs > 63) {
-        error_report("secs must be between 1 and 63");
-        return -1;
-    }
     s->cylinders = cylinders;
     s->heads = heads;
     s->sectors = secs;
+    s->chs_trans = chs_trans;
     s->nb_sectors = nb_sectors;
     s->wwn = wwn;
     /* The SMART values should be preserved across power cycles
@@ -2075,17 +2065,39 @@ void ide_init2(IDEBus *bus, qemu_irq irq)
 void ide_init2_with_non_qdev_drives(IDEBus *bus, DriveInfo *hd0,
                                     DriveInfo *hd1, qemu_irq irq)
 {
-    int i;
+    int i, trans;
     DriveInfo *dinfo;
+    uint32_t cyls, heads, secs;
 
     for(i = 0; i < 2; i++) {
         dinfo = i == 0 ? hd0 : hd1;
         ide_init1(bus, i);
         if (dinfo) {
+            cyls  = dinfo->cyls;
+            heads = dinfo->heads;
+            secs  = dinfo->secs;
+            trans = dinfo->trans;
+            if (!cyls && !heads && !secs) {
+                hd_geometry_guess(dinfo->bdrv, &cyls, &heads, &secs, &trans);
+            } else if (trans == BIOS_ATA_TRANSLATION_AUTO) {
+                trans = hd_bios_chs_auto_trans(cyls, heads, secs);
+            }
+            if (cyls < 1 || cyls > 65535) {
+                error_report("cyls must be between 1 and 65535");
+                exit(1);
+            }
+            if (heads < 1 || heads > 16) {
+                error_report("heads must be between 1 and 16");
+                exit(1);
+            }
+            if (secs < 1 || secs > 255) {
+                error_report("secs must be between 1 and 255");
+                exit(1);
+            }
             if (ide_init_drive(&bus->ifs[i], dinfo->bdrv,
-                               dinfo->media_cd ? IDE_CD : IDE_HD, NULL,
-                               *dinfo->serial ? dinfo->serial : NULL,
-                               NULL, 0) < 0) {
+                               dinfo->media_cd ? IDE_CD : IDE_HD,
+                               NULL, dinfo->serial, NULL, 0,
+                               cyls, heads, secs, trans) < 0) {
                 error_report("Can't set up IDE drive %s", dinfo->id);
                 exit(1);
             }
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index e3eaaea882..272b7734b5 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -98,7 +98,7 @@ static int pci_ich9_ahci_init(PCIDevice *dev)
     uint8_t *sata_cap;
     d = DO_UPCAST(struct AHCIPCIState, card, dev);
 
-    ahci_init(&d->ahci, &dev->qdev, 6);
+    ahci_init(&d->ahci, &dev->qdev, pci_dma_context(dev), 6);
 
     pci_config_set_prog_interface(d->card.config, AHCI_PROGMODE_MAJOR_REV_1);
 
@@ -132,15 +132,13 @@ static int pci_ich9_ahci_init(PCIDevice *dev)
     return 0;
 }
 
-static int pci_ich9_uninit(PCIDevice *dev)
+static void pci_ich9_uninit(PCIDevice *dev)
 {
     struct AHCIPCIState *d;
     d = DO_UPCAST(struct AHCIPCIState, card, dev);
 
     msi_uninit(dev);
     ahci_uninit(&d->ahci);
-
-    return 0;
 }
 
 static void ich_ahci_class_init(ObjectClass *klass, void *data)
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index 1a02f57bf5..7170bd9cd0 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -11,6 +11,7 @@
 #include "iorange.h"
 #include "dma.h"
 #include "sysemu.h"
+#include "hw/block-common.h"
 #include "hw/scsi-defs.h"
 
 /* debug IDE devices */
@@ -344,7 +345,7 @@ struct IDEState {
     uint8_t unit;
     /* ide config */
     IDEDriveKind drive_kind;
-    int cylinders, heads, sectors;
+    int cylinders, heads, sectors, chs_trans;
     int64_t nb_sectors;
     int mult_sectors;
     int identify_set;
@@ -474,6 +475,7 @@ struct IDEDevice {
     DeviceState qdev;
     uint32_t unit;
     BlockConf conf;
+    int chs_trans;
     char *version;
     char *serial;
     char *model;
@@ -545,7 +547,9 @@ uint32_t ide_data_readl(void *opaque, uint32_t addr);
 
 int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
                    const char *version, const char *serial, const char *model,
-                   uint64_t wwn);
+                   uint64_t wwn,
+                   uint32_t cylinders, uint32_t heads, uint32_t secs,
+                   int chs_trans);
 void ide_init2(IDEBus *bus, qemu_irq irq);
 void ide_init2_with_non_qdev_drives(IDEBus *bus, DriveInfo *hd0,
                                     DriveInfo *hd1, qemu_irq irq);
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 7b38d9e683..848cb31429 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -76,7 +76,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
 
     s->io_buffer_size = io->len;
 
-    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1);
+    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1, NULL);
     qemu_sglist_add(&s->sg, io->addr, io->len);
     io->addr += io->len;
     io->len = 0;
@@ -133,7 +133,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
     s->io_buffer_index = 0;
     s->io_buffer_size = io->len;
 
-    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1);
+    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1, NULL);
     qemu_sglist_add(&s->sg, io->addr, io->len);
     io->addr += io->len;
     io->len = 0;
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index f5a74c293a..4ded9ee13d 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -73,7 +73,8 @@ static void bmdma_write(void *opaque, target_phys_addr_t addr,
 #endif
     switch(addr & 3) {
     case 0:
-        return bmdma_cmd_writeb(bm, val);
+        bmdma_cmd_writeb(bm, val);
+        break;
     case 2:
         bm->status = (val & 0x60) | (bm->status & 1) | (bm->status & ~val & 0x06);
         break;
@@ -200,7 +201,7 @@ PCIDevice *pci_piix3_xen_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn)
     return dev;
 }
 
-static int pci_piix_ide_exitfn(PCIDevice *dev)
+static void pci_piix_ide_exitfn(PCIDevice *dev)
 {
     PCIIDEState *d = DO_UPCAST(PCIIDEState, dev, dev);
     unsigned i;
@@ -212,8 +213,6 @@ static int pci_piix_ide_exitfn(PCIDevice *dev)
         memory_region_destroy(&d->bmdma[i].addr_ioport);
     }
     memory_region_destroy(&d->bmdma_bar);
-
-    return 0;
 }
 
 /* hd_table must contain 4 block drivers */
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index c122395401..5ea9b8f4b2 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -21,6 +21,7 @@
 #include "qemu-error.h"
 #include <hw/ide/internal.h>
 #include "blockdev.h"
+#include "hw/block-common.h"
 #include "sysemu.h"
 
 /* --------------------------------- */
@@ -111,11 +112,24 @@ IDEDevice *ide_create_drive(IDEBus *bus, int unit, DriveInfo *drive)
     return DO_UPCAST(IDEDevice, qdev, dev);
 }
 
-void ide_get_bs(BlockDriverState *bs[], BusState *qbus)
+int ide_get_geometry(BusState *bus, int unit,
+                     int16_t *cyls, int8_t *heads, int8_t *secs)
 {
-    IDEBus *bus = DO_UPCAST(IDEBus, qbus, qbus);
-    bs[0] = bus->master ? bus->master->conf.bs : NULL;
-    bs[1] = bus->slave  ? bus->slave->conf.bs  : NULL;
+    IDEState *s = &DO_UPCAST(IDEBus, qbus, bus)->ifs[unit];
+
+    if (s->drive_kind != IDE_HD || !s->bs) {
+        return -1;
+    }
+
+    *cyls = s->cylinders;
+    *heads = s->heads;
+    *secs = s->sectors;
+    return 0;
+}
+
+int ide_get_bios_chs_trans(BusState *bus, int unit)
+{
+    return DO_UPCAST(IDEBus, qbus, bus)->ifs[unit].chs_trans;
 }
 
 /* --------------------------------- */
@@ -128,25 +142,22 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
 {
     IDEBus *bus = DO_UPCAST(IDEBus, qbus, dev->qdev.parent_bus);
     IDEState *s = bus->ifs + dev->unit;
-    const char *serial;
-    DriveInfo *dinfo;
 
     if (dev->conf.discard_granularity && dev->conf.discard_granularity != 512) {
         error_report("discard_granularity must be 512 for ide");
         return -1;
     }
 
-    serial = dev->serial;
-    if (!serial) {
-        /* try to fall back to value set with legacy -drive serial=... */
-        dinfo = drive_get_by_blockdev(dev->conf.bs);
-        if (*dinfo->serial) {
-            serial = dinfo->serial;
-        }
+    blkconf_serial(&dev->conf, &dev->serial);
+    if (kind != IDE_CD
+        && blkconf_geometry(&dev->conf, &dev->chs_trans, 65536, 16, 255) < 0) {
+        return -1;
     }
 
     if (ide_init_drive(s, dev->conf.bs, kind,
-                       dev->version, serial, dev->model, dev->wwn) < 0) {
+                       dev->version, dev->serial, dev->model, dev->wwn,
+                       dev->conf.cyls, dev->conf.heads, dev->conf.secs,
+                       dev->chs_trans) < 0) {
         return -1;
     }
 
@@ -189,6 +200,9 @@ static int ide_drive_initfn(IDEDevice *dev)
 
 static Property ide_hd_properties[] = {
     DEFINE_IDE_DEV_PROPERTIES(),
+    DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf),
+    DEFINE_PROP_BIOS_CHS_TRANS("bios-chs-trans",
+                IDEDrive, dev.chs_trans, BIOS_ATA_TRANSLATION_AUTO),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/ide/via.c b/hw/ide/via.c
index eec5136019..b20e4f094e 100644
--- a/hw/ide/via.c
+++ b/hw/ide/via.c
@@ -74,7 +74,8 @@ static void bmdma_write(void *opaque, target_phys_addr_t addr,
 #endif
     switch (addr & 3) {
     case 0:
-        return bmdma_cmd_writeb(bm, val);
+        bmdma_cmd_writeb(bm, val);
+        break;
     case 2:
         bm->status = (val & 0x60) | (bm->status & 1) | (bm->status & ~val & 0x06);
         break;
@@ -189,7 +190,7 @@ static int vt82c686b_ide_initfn(PCIDevice *dev)
     return 0;
 }
 
-static int vt82c686b_ide_exitfn(PCIDevice *dev)
+static void vt82c686b_ide_exitfn(PCIDevice *dev)
 {
     PCIIDEState *d = DO_UPCAST(PCIIDEState, dev, dev);
     unsigned i;
@@ -201,8 +202,6 @@ static int vt82c686b_ide_exitfn(PCIDevice *dev)
         memory_region_destroy(&d->bmdma[i].addr_ioport);
     }
     memory_region_destroy(&d->bmdma_bar);
-
-    return 0;
 }
 
 void vt82c686b_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn)
diff --git a/hw/imx.h b/hw/imx.h
new file mode 100644
index 0000000000..ccf586fefe
--- /dev/null
+++ b/hw/imx.h
@@ -0,0 +1,34 @@
+/*
+ * i.MX31 emulation
+ *
+ * Copyright (C) 2012 Peter Chubb
+ * NICTA
+ *
+ * This code is released under the GPL, version 2.0 or later
+ * See the file `../COPYING' for details.
+ */
+
+#ifndef IMX_H
+#define IMX_H
+
+void imx_serial_create(int uart, const target_phys_addr_t addr, qemu_irq irq);
+
+typedef enum  {
+    NOCLK,
+    MCU,
+    HSP,
+    IPG,
+    CLK_32k
+} IMXClk;
+
+uint32_t imx_clock_frequency(DeviceState *s, IMXClk clock);
+
+void imx_timerp_create(const target_phys_addr_t addr,
+                      qemu_irq irq,
+                      DeviceState *ccm);
+void imx_timerg_create(const target_phys_addr_t addr,
+                      qemu_irq irq,
+                      DeviceState *ccm);
+
+
+#endif /* IMX_H */
diff --git a/hw/imx_avic.c b/hw/imx_avic.c
new file mode 100644
index 0000000000..4f010e8ee2
--- /dev/null
+++ b/hw/imx_avic.c
@@ -0,0 +1,408 @@
+/*
+ * i.MX31 Vectored Interrupt Controller
+ *
+ * Note this is NOT the PL192 provided by ARM, but
+ * a custom implementation by Freescale.
+ *
+ * Copyright (c) 2008 OKL
+ * Copyright (c) 2011 NICTA Pty Ltd
+ * Originally Written by Hans Jiang
+ *
+ * This code is licenced under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ *
+ * TODO: implement vectors.
+ */
+
+#include "hw.h"
+#include "sysbus.h"
+#include "host-utils.h"
+
+#define DEBUG_INT 1
+#undef DEBUG_INT /* comment out for debugging */
+
+#ifdef DEBUG_INT
+#define DPRINTF(fmt, args...) \
+do { printf("imx_avic: " fmt , ##args); } while (0)
+#else
+#define DPRINTF(fmt, args...) do {} while (0)
+#endif
+
+/*
+ * Define to 1 for messages about attempts to
+ * access unimplemented registers or similar.
+ */
+#define DEBUG_IMPLEMENTATION 1
+#if DEBUG_IMPLEMENTATION
+#  define IPRINTF(fmt, args...) \
+    do  { fprintf(stderr, "imx_avic: " fmt, ##args); } while (0)
+#else
+#  define IPRINTF(fmt, args...) do {} while (0)
+#endif
+
+#define IMX_AVIC_NUM_IRQS 64
+
+/* Interrupt Control Bits */
+#define ABFLAG (1<<25)
+#define ABFEN (1<<24)
+#define NIDIS (1<<22) /* Normal Interrupt disable */
+#define FIDIS (1<<21) /* Fast interrupt disable */
+#define NIAD  (1<<20) /* Normal Interrupt Arbiter Rise ARM level */
+#define FIAD  (1<<19) /* Fast Interrupt Arbiter Rise ARM level */
+#define NM    (1<<18) /* Normal interrupt mode */
+
+
+#define PRIO_PER_WORD (sizeof(uint32_t) * 8 / 4)
+#define PRIO_WORDS (IMX_AVIC_NUM_IRQS/PRIO_PER_WORD)
+
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    uint64_t pending;
+    uint64_t enabled;
+    uint64_t is_fiq;
+    uint32_t intcntl;
+    uint32_t intmask;
+    qemu_irq irq;
+    qemu_irq fiq;
+    uint32_t prio[PRIO_WORDS]; /* Priorities are 4-bits each */
+} IMXAVICState;
+
+static const VMStateDescription vmstate_imx_avic = {
+    .name = "imx-avic",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(pending, IMXAVICState),
+        VMSTATE_UINT64(enabled, IMXAVICState),
+        VMSTATE_UINT64(is_fiq, IMXAVICState),
+        VMSTATE_UINT32(intcntl, IMXAVICState),
+        VMSTATE_UINT32(intmask, IMXAVICState),
+        VMSTATE_UINT32_ARRAY(prio, IMXAVICState, PRIO_WORDS),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+
+
+static inline int imx_avic_prio(IMXAVICState *s, int irq)
+{
+    uint32_t word = irq / PRIO_PER_WORD;
+    uint32_t part = 4 * (irq % PRIO_PER_WORD);
+    return 0xf & (s->prio[word] >> part);
+}
+
+static inline void imx_avic_set_prio(IMXAVICState *s, int irq, int prio)
+{
+    uint32_t word = irq / PRIO_PER_WORD;
+    uint32_t part = 4 * (irq % PRIO_PER_WORD);
+    uint32_t mask = ~(0xf << part);
+    s->prio[word] &= mask;
+    s->prio[word] |= prio << part;
+}
+
+/* Update interrupts.  */
+static void imx_avic_update(IMXAVICState *s)
+{
+    int i;
+    uint64_t new = s->pending & s->enabled;
+    uint64_t flags;
+
+    flags = new & s->is_fiq;
+    qemu_set_irq(s->fiq, !!flags);
+
+    flags = new & ~s->is_fiq;
+    if (!flags || (s->intmask == 0x1f)) {
+        qemu_set_irq(s->irq, !!flags);
+        return;
+    }
+
+    /*
+     * Take interrupt if there's a pending interrupt with
+     * priority higher than the value of intmask
+     */
+    for (i = 0; i < IMX_AVIC_NUM_IRQS; i++) {
+        if (flags & (1UL << i)) {
+            if (imx_avic_prio(s, i) > s->intmask) {
+                qemu_set_irq(s->irq, 1);
+                return;
+            }
+        }
+    }
+    qemu_set_irq(s->irq, 0);
+}
+
+static void imx_avic_set_irq(void *opaque, int irq, int level)
+{
+    IMXAVICState *s = (IMXAVICState *)opaque;
+
+    if (level) {
+        DPRINTF("Raising IRQ %d, prio %d\n",
+                irq, imx_avic_prio(s, irq));
+        s->pending |= (1ULL << irq);
+    } else {
+        DPRINTF("Clearing IRQ %d, prio %d\n",
+                irq, imx_avic_prio(s, irq));
+        s->pending &= ~(1ULL << irq);
+    }
+
+    imx_avic_update(s);
+}
+
+
+static uint64_t imx_avic_read(void *opaque,
+                             target_phys_addr_t offset, unsigned size)
+{
+    IMXAVICState *s = (IMXAVICState *)opaque;
+
+
+    DPRINTF("read(offset = 0x%x)\n", offset >> 2);
+    switch (offset >> 2) {
+    case 0: /* INTCNTL */
+        return s->intcntl;
+
+    case 1: /* Normal Interrupt Mask Register, NIMASK */
+        return s->intmask;
+
+    case 2: /* Interrupt Enable Number Register, INTENNUM */
+    case 3: /* Interrupt Disable Number Register, INTDISNUM */
+        return 0;
+
+    case 4: /* Interrupt Enabled Number Register High */
+        return s->enabled >> 32;
+
+    case 5: /* Interrupt Enabled Number Register Low */
+        return s->enabled & 0xffffffffULL;
+
+    case 6: /* Interrupt Type Register High */
+        return s->is_fiq >> 32;
+
+    case 7: /* Interrupt Type Register Low */
+        return s->is_fiq & 0xffffffffULL;
+
+    case 8: /* Normal Interrupt Priority Register 7 */
+    case 9: /* Normal Interrupt Priority Register 6 */
+    case 10:/* Normal Interrupt Priority Register 5 */
+    case 11:/* Normal Interrupt Priority Register 4 */
+    case 12:/* Normal Interrupt Priority Register 3 */
+    case 13:/* Normal Interrupt Priority Register 2 */
+    case 14:/* Normal Interrupt Priority Register 1 */
+    case 15:/* Normal Interrupt Priority Register 0 */
+        return s->prio[15-(offset>>2)];
+
+    case 16: /* Normal interrupt vector and status register */
+    {
+        /*
+         * This returns the highest priority
+         * outstanding interrupt.  Where there is more than
+         * one pending IRQ with the same priority,
+         * take the highest numbered one.
+         */
+        uint64_t flags = s->pending & s->enabled & ~s->is_fiq;
+        int i;
+        int prio = -1;
+        int irq = -1;
+        for (i = 63; i >= 0; --i) {
+            if (flags & (1ULL<<i)) {
+                int irq_prio = imx_avic_prio(s, i);
+                if (irq_prio > prio) {
+                    irq = i;
+                    prio = irq_prio;
+                }
+            }
+        }
+        if (irq >= 0) {
+            imx_avic_set_irq(s, irq, 0);
+            return irq << 16 | prio;
+        }
+        return 0xffffffffULL;
+    }
+    case 17:/* Fast Interrupt vector and status register */
+    {
+        uint64_t flags = s->pending & s->enabled & s->is_fiq;
+        int i = ctz64(flags);
+        if (i < 64) {
+            imx_avic_set_irq(opaque, i, 0);
+            return i;
+        }
+        return 0xffffffffULL;
+    }
+    case 18:/* Interrupt source register high */
+        return s->pending >> 32;
+
+    case 19:/* Interrupt source register low */
+        return s->pending & 0xffffffffULL;
+
+    case 20:/* Interrupt Force Register high */
+    case 21:/* Interrupt Force Register low */
+        return 0;
+
+    case 22:/* Normal Interrupt Pending Register High */
+        return (s->pending & s->enabled & ~s->is_fiq) >> 32;
+
+    case 23:/* Normal Interrupt Pending Register Low */
+        return (s->pending & s->enabled & ~s->is_fiq) & 0xffffffffULL;
+
+    case 24: /* Fast Interrupt Pending Register High  */
+        return (s->pending & s->enabled & s->is_fiq) >> 32;
+
+    case 25: /* Fast Interrupt Pending Register Low  */
+        return (s->pending & s->enabled & s->is_fiq) & 0xffffffffULL;
+
+    case 0x40:            /* AVIC vector 0, use for WFI WAR */
+        return 0x4;
+
+    default:
+        IPRINTF("imx_avic_read: Bad offset 0x%x\n", (int)offset);
+        return 0;
+    }
+}
+
+static void imx_avic_write(void *opaque, target_phys_addr_t offset,
+                          uint64_t val, unsigned size)
+{
+    IMXAVICState *s = (IMXAVICState *)opaque;
+
+    /* Vector Registers not yet supported */
+    if (offset >= 0x100 && offset <= 0x2fc) {
+        IPRINTF("imx_avic_write to vector register %d ignored\n",
+                (unsigned int)((offset - 0x100) >> 2));
+        return;
+    }
+
+    DPRINTF("imx_avic_write(0x%x) = %x\n",
+            (unsigned int)offset>>2, (unsigned int)val);
+    switch (offset >> 2) {
+    case 0: /* Interrupt Control Register, INTCNTL */
+        s->intcntl = val & (ABFEN | NIDIS | FIDIS | NIAD | FIAD | NM);
+        if (s->intcntl & ABFEN) {
+            s->intcntl &= ~(val & ABFLAG);
+        }
+        break;
+
+    case 1: /* Normal Interrupt Mask Register, NIMASK */
+        s->intmask = val & 0x1f;
+        break;
+
+    case 2: /* Interrupt Enable Number Register, INTENNUM */
+        DPRINTF("enable(%d)\n", (int)val);
+        val &= 0x3f;
+        s->enabled |= (1ULL << val);
+        break;
+
+    case 3: /* Interrupt Disable Number Register, INTDISNUM */
+        DPRINTF("disable(%d)\n", (int)val);
+        val &= 0x3f;
+        s->enabled &= ~(1ULL << val);
+        break;
+
+    case 4: /* Interrupt Enable Number Register High */
+        s->enabled = (s->enabled & 0xffffffffULL) | (val << 32);
+        break;
+
+    case 5: /* Interrupt Enable Number Register Low */
+        s->enabled = (s->enabled & 0xffffffff00000000ULL) | val;
+        break;
+
+    case 6: /* Interrupt Type Register High */
+        s->is_fiq = (s->is_fiq & 0xffffffffULL) | (val << 32);
+        break;
+
+    case 7: /* Interrupt Type Register Low */
+        s->is_fiq = (s->is_fiq & 0xffffffff00000000ULL) | val;
+        break;
+
+    case 8: /* Normal Interrupt Priority Register 7 */
+    case 9: /* Normal Interrupt Priority Register 6 */
+    case 10:/* Normal Interrupt Priority Register 5 */
+    case 11:/* Normal Interrupt Priority Register 4 */
+    case 12:/* Normal Interrupt Priority Register 3 */
+    case 13:/* Normal Interrupt Priority Register 2 */
+    case 14:/* Normal Interrupt Priority Register 1 */
+    case 15:/* Normal Interrupt Priority Register 0 */
+        s->prio[15-(offset>>2)] = val;
+        break;
+
+        /* Read-only registers, writes ignored */
+    case 16:/* Normal Interrupt Vector and Status register */
+    case 17:/* Fast Interrupt vector and status register */
+    case 18:/* Interrupt source register high */
+    case 19:/* Interrupt source register low */
+        return;
+
+    case 20:/* Interrupt Force Register high */
+        s->pending = (s->pending & 0xffffffffULL) | (val << 32);
+        break;
+
+    case 21:/* Interrupt Force Register low */
+        s->pending = (s->pending & 0xffffffff00000000ULL) | val;
+        break;
+
+    case 22:/* Normal Interrupt Pending Register High */
+    case 23:/* Normal Interrupt Pending Register Low */
+    case 24: /* Fast Interrupt Pending Register High  */
+    case 25: /* Fast Interrupt Pending Register Low  */
+        return;
+
+    default:
+        IPRINTF("imx_avic_write: Bad offset %x\n", (int)offset);
+    }
+    imx_avic_update(s);
+}
+
+static const MemoryRegionOps imx_avic_ops = {
+    .read = imx_avic_read,
+    .write = imx_avic_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void imx_avic_reset(DeviceState *dev)
+{
+    IMXAVICState *s = container_of(dev, IMXAVICState, busdev.qdev);
+    s->pending = 0;
+    s->enabled = 0;
+    s->is_fiq = 0;
+    s->intmask = 0x1f;
+    s->intcntl = 0;
+    memset(s->prio, 0, sizeof s->prio);
+}
+
+static int imx_avic_init(SysBusDevice *dev)
+{
+    IMXAVICState *s = FROM_SYSBUS(IMXAVICState, dev);;
+
+    memory_region_init_io(&s->iomem, &imx_avic_ops, s, "imx_avic", 0x1000);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    qdev_init_gpio_in(&dev->qdev, imx_avic_set_irq, IMX_AVIC_NUM_IRQS);
+    sysbus_init_irq(dev, &s->irq);
+    sysbus_init_irq(dev, &s->fiq);
+
+    return 0;
+}
+
+
+static void imx_avic_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    k->init = imx_avic_init;
+    dc->vmsd = &vmstate_imx_avic;
+    dc->reset = imx_avic_reset;
+    dc->desc = "i.MX Advanced Vector Interrupt Controller";
+}
+
+static const TypeInfo imx_avic_info = {
+    .name = "imx_avic",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXAVICState),
+    .class_init = imx_avic_class_init,
+};
+
+static void imx_avic_register_types(void)
+{
+    type_register_static(&imx_avic_info);
+}
+
+type_init(imx_avic_register_types)
diff --git a/hw/imx_ccm.c b/hw/imx_ccm.c
new file mode 100644
index 0000000000..10952c6ea1
--- /dev/null
+++ b/hw/imx_ccm.c
@@ -0,0 +1,321 @@
+/*
+ * IMX31 Clock Control Module
+ *
+ * Copyright (C) 2012 NICTA
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * To get the timer frequencies right, we need to emulate at least part of
+ * the CCM.
+ */
+
+#include "hw.h"
+#include "sysbus.h"
+#include "sysemu.h"
+#include "imx.h"
+
+#define CKIH_FREQ 26000000 /* 26MHz crystal input */
+#define CKIL_FREQ    32768 /* nominal 32khz clock */
+
+
+//#define DEBUG_CCM 1
+#ifdef DEBUG_CCM
+#define DPRINTF(fmt, args...) \
+do { printf("imx_ccm: " fmt , ##args); } while (0)
+#else
+#define DPRINTF(fmt, args...) do {} while (0)
+#endif
+
+static int imx_ccm_post_load(void *opaque, int version_id);
+
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+
+    uint32_t ccmr;
+    uint32_t pdr0;
+    uint32_t pdr1;
+    uint32_t mpctl;
+    uint32_t spctl;
+    uint32_t cgr[3];
+    uint32_t pmcr0;
+    uint32_t pmcr1;
+
+    /* Frequencies precalculated on register changes */
+    uint32_t pll_refclk_freq;
+    uint32_t mcu_clk_freq;
+    uint32_t hsp_clk_freq;
+    uint32_t ipg_clk_freq;
+} IMXCCMState;
+
+static const VMStateDescription vmstate_imx_ccm = {
+    .name = "imx-ccm",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(ccmr, IMXCCMState),
+        VMSTATE_UINT32(pdr0, IMXCCMState),
+        VMSTATE_UINT32(pdr1, IMXCCMState),
+        VMSTATE_UINT32(mpctl, IMXCCMState),
+        VMSTATE_UINT32(spctl, IMXCCMState),
+        VMSTATE_UINT32_ARRAY(cgr, IMXCCMState, 3),
+        VMSTATE_UINT32(pmcr0, IMXCCMState),
+        VMSTATE_UINT32(pmcr1, IMXCCMState),
+        VMSTATE_UINT32(pll_refclk_freq, IMXCCMState),
+    },
+    .post_load = imx_ccm_post_load,
+};
+
+/* CCMR */
+#define CCMR_FPME (1<<0)
+#define CCMR_MPE  (1<<3)
+#define CCMR_MDS  (1<<7)
+#define CCMR_FPMF (1<<26)
+#define CCMR_PRCS (3<<1)
+
+/* PDR0 */
+#define PDR0_MCU_PODF_SHIFT (0)
+#define PDR0_MCU_PODF_MASK (0x7)
+#define PDR0_MAX_PODF_SHIFT (3)
+#define PDR0_MAX_PODF_MASK (0x7)
+#define PDR0_IPG_PODF_SHIFT (6)
+#define PDR0_IPG_PODF_MASK (0x3)
+#define PDR0_NFC_PODF_SHIFT (8)
+#define PDR0_NFC_PODF_MASK (0x7)
+#define PDR0_HSP_PODF_SHIFT (11)
+#define PDR0_HSP_PODF_MASK (0x7)
+#define PDR0_PER_PODF_SHIFT (16)
+#define PDR0_PER_PODF_MASK (0x1f)
+#define PDR0_CSI_PODF_SHIFT (23)
+#define PDR0_CSI_PODF_MASK (0x1ff)
+
+#define EXTRACT(value, name) (((value) >> PDR0_##name##_PODF_SHIFT) \
+                              & PDR0_##name##_PODF_MASK)
+#define INSERT(value, name) (((value) & PDR0_##name##_PODF_MASK) << \
+                             PDR0_##name##_PODF_SHIFT)
+/* PLL control registers */
+#define PD(v) (((v) >> 26) & 0xf)
+#define MFD(v) (((v) >> 16) & 0x3ff)
+#define MFI(v) (((v) >> 10) & 0xf);
+#define MFN(v) ((v) & 0x3ff)
+
+#define PLL_PD(x)               (((x) & 0xf) << 26)
+#define PLL_MFD(x)              (((x) & 0x3ff) << 16)
+#define PLL_MFI(x)              (((x) & 0xf) << 10)
+#define PLL_MFN(x)              (((x) & 0x3ff) << 0)
+
+uint32_t imx_clock_frequency(DeviceState *dev, IMXClk clock)
+{
+    IMXCCMState *s = container_of(dev, IMXCCMState, busdev.qdev);
+
+    switch (clock) {
+    case NOCLK:
+        return 0;
+    case MCU:
+        return s->mcu_clk_freq;
+    case HSP:
+        return s->hsp_clk_freq;
+    case IPG:
+        return s->ipg_clk_freq;
+    case CLK_32k:
+        return CKIL_FREQ;
+    }
+    return 0;
+}
+
+/*
+ * Calculate PLL output frequency
+ */
+static uint32_t calc_pll(uint32_t pllreg, uint32_t base_freq)
+{
+    int32_t mfn = MFN(pllreg);  /* Numerator */
+    uint32_t mfi = MFI(pllreg); /* Integer part */
+    uint32_t mfd = 1 + MFD(pllreg); /* Denominator */
+    uint32_t pd = 1 + PD(pllreg);   /* Pre-divider */
+
+    if (mfi < 5) {
+        mfi = 5;
+    }
+    /* mfn is 10-bit signed twos-complement */
+    mfn <<= 32 - 10;
+    mfn >>= 32 - 10;
+
+    return ((2 * (base_freq >> 10) * (mfi * mfd + mfn)) /
+            (mfd * pd)) << 10;
+}
+
+static void update_clocks(IMXCCMState *s)
+{
+    /*
+     * If we ever emulate more clocks, this should switch to a data-driven
+     * approach
+     */
+
+    if ((s->ccmr & CCMR_PRCS) == 1) {
+        s->pll_refclk_freq = CKIL_FREQ * 1024;
+    } else {
+        s->pll_refclk_freq = CKIH_FREQ;
+    }
+
+    /* ipg_clk_arm aka MCU clock */
+    if ((s->ccmr & CCMR_MDS) || !(s->ccmr & CCMR_MPE)) {
+        s->mcu_clk_freq = s->pll_refclk_freq;
+    } else {
+        s->mcu_clk_freq = calc_pll(s->mpctl, s->pll_refclk_freq);
+    }
+
+    /* High-speed clock */
+    s->hsp_clk_freq = s->mcu_clk_freq / (1 + EXTRACT(s->pdr0, HSP));
+    s->ipg_clk_freq = s->hsp_clk_freq / (1 + EXTRACT(s->pdr0, IPG));
+
+    DPRINTF("Clocks: mcu %uMHz, HSP %uMHz, IPG %uHz\n",
+            s->mcu_clk_freq / 1000000,
+            s->hsp_clk_freq / 1000000,
+            s->ipg_clk_freq);
+}
+
+static void imx_ccm_reset(DeviceState *dev)
+{
+    IMXCCMState *s = container_of(dev, IMXCCMState, busdev.qdev);
+
+    s->ccmr = 0x074b0b7b;
+    s->pdr0 = 0xff870b48;
+    s->pdr1 = 0x49fcfe7f;
+    s->mpctl = PLL_PD(1) | PLL_MFD(0) | PLL_MFI(6) | PLL_MFN(0);
+    s->cgr[0] = s->cgr[1] = s->cgr[2] = 0xffffffff;
+    s->spctl = PLL_PD(1) | PLL_MFD(4) | PLL_MFI(0xc) | PLL_MFN(1);
+    s->pmcr0 = 0x80209828;
+
+    update_clocks(s);
+}
+
+static uint64_t imx_ccm_read(void *opaque, target_phys_addr_t offset,
+                                unsigned size)
+{
+    IMXCCMState *s = (IMXCCMState *)opaque;
+
+    DPRINTF("read(offset=%x)", offset >> 2);
+    switch (offset >> 2) {
+    case 0: /* CCMR */
+        DPRINTF(" ccmr = 0x%x\n", s->ccmr);
+        return s->ccmr;
+    case 1:
+        DPRINTF(" pdr0 = 0x%x\n", s->pdr0);
+        return s->pdr0;
+    case 2:
+        DPRINTF(" pdr1 = 0x%x\n", s->pdr1);
+        return s->pdr1;
+    case 4:
+        DPRINTF(" mpctl = 0x%x\n", s->mpctl);
+        return s->mpctl;
+    case 6:
+        DPRINTF(" spctl = 0x%x\n", s->spctl);
+        return s->spctl;
+    case 8:
+        DPRINTF(" cgr0 = 0x%x\n", s->cgr[0]);
+        return s->cgr[0];
+    case 9:
+        DPRINTF(" cgr1 = 0x%x\n", s->cgr[1]);
+        return s->cgr[1];
+    case 10:
+        DPRINTF(" cgr2 = 0x%x\n", s->cgr[2]);
+        return s->cgr[2];
+    case 18: /* LTR1 */
+        return 0x00004040;
+    case 23:
+        DPRINTF(" pcmr0 = 0x%x\n", s->pmcr0);
+        return s->pmcr0;
+    }
+    DPRINTF(" return 0\n");
+    return 0;
+}
+
+static void imx_ccm_write(void *opaque, target_phys_addr_t offset,
+                          uint64_t value, unsigned size)
+{
+    IMXCCMState *s = (IMXCCMState *)opaque;
+
+    DPRINTF("write(offset=%x, value = %x)\n",
+            offset >> 2, (unsigned int)value);
+    switch (offset >> 2) {
+    case 0:
+        s->ccmr = CCMR_FPMF | (value & 0x3b6fdfff);
+        break;
+    case 1:
+        s->pdr0 = value & 0xff9f3fff;
+        break;
+    case 2:
+        s->pdr1 = value;
+        break;
+    case 4:
+        s->mpctl = value & 0xbfff3fff;
+        break;
+    case 6:
+        s->spctl = value & 0xbfff3fff;
+        break;
+    case 8:
+        s->cgr[0] = value;
+        return;
+    case 9:
+        s->cgr[1] = value;
+        return;
+    case 10:
+        s->cgr[2] = value;
+        return;
+
+    default:
+        return;
+    }
+    update_clocks(s);
+}
+
+static const struct MemoryRegionOps imx_ccm_ops = {
+    .read = imx_ccm_read,
+    .write = imx_ccm_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static int imx_ccm_init(SysBusDevice *dev)
+{
+    IMXCCMState *s = FROM_SYSBUS(typeof(*s), dev);
+
+    memory_region_init_io(&s->iomem, &imx_ccm_ops, s, "imx_ccm", 0x1000);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    return 0;
+}
+
+static int imx_ccm_post_load(void *opaque, int version_id)
+{
+    IMXCCMState *s = (IMXCCMState *)opaque;
+
+    update_clocks(s);
+    return 0;
+}
+
+static void imx_ccm_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+
+    sbc->init = imx_ccm_init;
+    dc->reset = imx_ccm_reset;
+    dc->vmsd = &vmstate_imx_ccm;
+    dc->desc = "i.MX Clock Control Module";
+}
+
+static TypeInfo imx_ccm_info = {
+    .name = "imx_ccm",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXCCMState),
+    .class_init = imx_ccm_class_init,
+};
+
+static void imx_ccm_register_types(void)
+{
+    type_register_static(&imx_ccm_info);
+}
+
+type_init(imx_ccm_register_types)
diff --git a/hw/imx_serial.c b/hw/imx_serial.c
new file mode 100644
index 0000000000..d4eae430f5
--- /dev/null
+++ b/hw/imx_serial.c
@@ -0,0 +1,467 @@
+/*
+ * IMX31 UARTS
+ *
+ * Copyright (c) 2008 OKL
+ * Originally Written by Hans Jiang
+ * Copyright (c) 2011 NICTA Pty Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * This is a `bare-bones' implementation of the IMX series serial ports.
+ * TODO:
+ *  -- implement FIFOs.  The real hardware has 32 word transmit
+ *                       and receive FIFOs; we currently use a 1-char buffer
+ *  -- implement DMA
+ *  -- implement BAUD-rate and modem lines, for when the backend
+ *     is a real serial device.
+ */
+
+#include "hw.h"
+#include "sysbus.h"
+#include "sysemu.h"
+#include "qemu-char.h"
+#include "imx.h"
+
+//#define DEBUG_SERIAL 1
+#ifdef DEBUG_SERIAL
+#define DPRINTF(fmt, args...) \
+do { printf("imx_serial: " fmt , ##args); } while (0)
+#else
+#define DPRINTF(fmt, args...) do {} while (0)
+#endif
+
+/*
+ * Define to 1 for messages about attempts to
+ * access unimplemented registers or similar.
+ */
+//#define DEBUG_IMPLEMENTATION 1
+#ifdef DEBUG_IMPLEMENTATION
+#  define IPRINTF(fmt, args...) \
+    do  { fprintf(stderr, "imx_serial: " fmt, ##args); } while (0)
+#else
+#  define IPRINTF(fmt, args...) do {} while (0)
+#endif
+
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    int32_t readbuff;
+
+    uint32_t usr1;
+    uint32_t usr2;
+    uint32_t ucr1;
+    uint32_t ucr2;
+    uint32_t uts1;
+
+    /*
+     * The registers below are implemented just so that the
+     * guest OS sees what it has written
+     */
+    uint32_t onems;
+    uint32_t ufcr;
+    uint32_t ubmr;
+    uint32_t ubrc;
+    uint32_t ucr3;
+
+    qemu_irq irq;
+    CharDriverState *chr;
+} IMXSerialState;
+
+static const VMStateDescription vmstate_imx_serial = {
+    .name = "imx-serial",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(readbuff, IMXSerialState),
+        VMSTATE_UINT32(usr1, IMXSerialState),
+        VMSTATE_UINT32(usr2, IMXSerialState),
+        VMSTATE_UINT32(ucr1, IMXSerialState),
+        VMSTATE_UINT32(uts1, IMXSerialState),
+        VMSTATE_UINT32(onems, IMXSerialState),
+        VMSTATE_UINT32(ufcr, IMXSerialState),
+        VMSTATE_UINT32(ubmr, IMXSerialState),
+        VMSTATE_UINT32(ubrc, IMXSerialState),
+        VMSTATE_UINT32(ucr3, IMXSerialState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+
+#define URXD_CHARRDY    (1<<15)   /* character read is valid */
+#define URXD_ERR        (1<<14)   /* Character has error */
+#define URXD_BRK        (1<<11)   /* Break received */
+
+#define USR1_PARTYER    (1<<15)   /* Parity Error */
+#define USR1_RTSS       (1<<14)   /* RTS pin status */
+#define USR1_TRDY       (1<<13)   /* Tx ready */
+#define USR1_RTSD       (1<<12)   /* RTS delta: pin changed state */
+#define USR1_ESCF       (1<<11)   /* Escape sequence interrupt */
+#define USR1_FRAMERR    (1<<10)   /* Framing error  */
+#define USR1_RRDY       (1<<9)    /* receiver ready */
+#define USR1_AGTIM      (1<<8)    /* Aging timer interrupt */
+#define USR1_DTRD       (1<<7)    /* DTR changed */
+#define USR1_RXDS       (1<<6)    /* Receiver is idle */
+#define USR1_AIRINT     (1<<5)    /* Aysnch IR interrupt */
+#define USR1_AWAKE      (1<<4)    /* Falling edge detected on RXd pin */
+
+#define USR2_ADET       (1<<15)   /* Autobaud complete */
+#define USR2_TXFE       (1<<14)   /* Transmit FIFO empty */
+#define USR2_DTRF       (1<<13)   /* DTR/DSR transition */
+#define USR2_IDLE       (1<<12)   /* UART has been idle for too long */
+#define USR2_ACST       (1<<11)   /* Autobaud counter stopped */
+#define USR2_RIDELT     (1<<10)   /* Ring Indicator delta */
+#define USR2_RIIN       (1<<9)    /* Ring Indicator Input */
+#define USR2_IRINT      (1<<8)    /* Serial Infrared Interrupt */
+#define USR2_WAKE       (1<<7)    /* Start bit detected */
+#define USR2_DCDDELT    (1<<6)    /* Data Carrier Detect delta */
+#define USR2_DCDIN      (1<<5)    /* Data Carrier Detect Input */
+#define USR2_RTSF       (1<<4)    /* RTS transition */
+#define USR2_TXDC       (1<<3)    /* Transmission complete */
+#define USR2_BRCD       (1<<2)    /* Break condition detected */
+#define USR2_ORE        (1<<1)    /* Overrun error */
+#define USR2_RDR        (1<<0)    /* Receive data ready */
+
+#define UCR1_TRDYEN     (1<<13)   /* Tx Ready Interrupt Enable */
+#define UCR1_RRDYEN     (1<<9)    /* Rx Ready Interrupt Enable */
+#define UCR1_TXMPTYEN   (1<<6)    /* Tx Empty Interrupt Enable */
+#define UCR1_UARTEN     (1<<0)    /* UART Enable */
+
+#define UCR2_TXEN       (1<<2)    /* Transmitter enable */
+#define UCR2_RXEN       (1<<1)    /* Receiver enable */
+#define UCR2_SRST       (1<<0)    /* Reset complete */
+
+#define UTS1_TXEMPTY    (1<<6)
+#define UTS1_RXEMPTY    (1<<5)
+#define UTS1_TXFULL     (1<<4)
+#define UTS1_RXFULL     (1<<3)
+
+static void imx_update(IMXSerialState *s)
+{
+    uint32_t flags;
+
+    flags = (s->usr1 & s->ucr1) & (USR1_TRDY|USR1_RRDY);
+    if (!(s->ucr1 & UCR1_TXMPTYEN)) {
+        flags &= ~USR1_TRDY;
+    }
+
+    qemu_set_irq(s->irq, !!flags);
+}
+
+static void imx_serial_reset(IMXSerialState *s)
+{
+
+    s->usr1 = USR1_TRDY | USR1_RXDS;
+    /*
+     * Fake attachment of a terminal: assert RTS.
+     */
+    s->usr1 |= USR1_RTSS;
+    s->usr2 = USR2_TXFE | USR2_TXDC | USR2_DCDIN;
+    s->uts1 = UTS1_RXEMPTY | UTS1_TXEMPTY;
+    s->ucr1 = 0;
+    s->ucr2 = UCR2_SRST;
+    s->ucr3 = 0x700;
+    s->ubmr = 0;
+    s->ubrc = 4;
+    s->readbuff = URXD_ERR;
+}
+
+static void imx_serial_reset_at_boot(DeviceState *dev)
+{
+    IMXSerialState *s = container_of(dev, IMXSerialState, busdev.qdev);
+
+    imx_serial_reset(s);
+
+    /*
+     * enable the uart on boot, so messages from the linux decompresser
+     * are visible.  On real hardware this is done by the boot rom
+     * before anything else is loaded.
+     */
+    s->ucr1 = UCR1_UARTEN;
+    s->ucr2 = UCR2_TXEN;
+
+}
+
+static uint64_t imx_serial_read(void *opaque, target_phys_addr_t offset,
+                                unsigned size)
+{
+    IMXSerialState *s = (IMXSerialState *)opaque;
+    uint32_t c;
+
+    DPRINTF("read(offset=%x)\n", offset >> 2);
+    switch (offset >> 2) {
+    case 0x0: /* URXD */
+        c = s->readbuff;
+        if (!(s->uts1 & UTS1_RXEMPTY)) {
+            /* Character is valid */
+            c |= URXD_CHARRDY;
+            s->usr1 &= ~USR1_RRDY;
+            s->usr2 &= ~USR2_RDR;
+            s->uts1 |= UTS1_RXEMPTY;
+            imx_update(s);
+            qemu_chr_accept_input(s->chr);
+        }
+        return c;
+
+    case 0x20: /* UCR1 */
+        return s->ucr1;
+
+    case 0x21: /* UCR2 */
+        return s->ucr2;
+
+    case 0x25: /* USR1 */
+        return s->usr1;
+
+    case 0x26: /* USR2 */
+        return s->usr2;
+
+    case 0x2A: /* BRM Modulator */
+        return s->ubmr;
+
+    case 0x2B: /* Baud Rate Count */
+        return s->ubrc;
+
+    case 0x2d: /* Test register */
+        return s->uts1;
+
+    case 0x24: /* UFCR */
+        return s->ufcr;
+
+    case 0x2c:
+        return s->onems;
+
+    case 0x22: /* UCR3 */
+        return s->ucr3;
+
+    case 0x23: /* UCR4 */
+    case 0x29: /* BRM Incremental */
+        return 0x0; /* TODO */
+
+    default:
+        IPRINTF("imx_serial_read: bad offset: 0x%x\n", (int)offset);
+        return 0;
+    }
+}
+
+static void imx_serial_write(void *opaque, target_phys_addr_t offset,
+                      uint64_t value, unsigned size)
+{
+    IMXSerialState *s = (IMXSerialState *)opaque;
+    unsigned char ch;
+
+    DPRINTF("write(offset=%x, value = %x) to %s\n",
+            offset >> 2,
+            (unsigned int)value, s->chr ? s->chr->label : "NODEV");
+
+    switch (offset >> 2) {
+    case 0x10: /* UTXD */
+        ch = value;
+        if (s->ucr2 & UCR2_TXEN) {
+            if (s->chr) {
+                qemu_chr_fe_write(s->chr, &ch, 1);
+            }
+            s->usr1 &= ~USR1_TRDY;
+            imx_update(s);
+            s->usr1 |= USR1_TRDY;
+            imx_update(s);
+        }
+        break;
+
+    case 0x20: /* UCR1 */
+        s->ucr1 = value & 0xffff;
+        DPRINTF("write(ucr1=%x)\n", (unsigned int)value);
+        imx_update(s);
+        break;
+
+    case 0x21: /* UCR2 */
+        /*
+         * Only a few bits in control register 2 are implemented as yet.
+         * If it's intended to use a real serial device as a back-end, this
+         * register will have to be implemented more fully.
+         */
+        if (!(value & UCR2_SRST)) {
+            imx_serial_reset(s);
+            imx_update(s);
+            value |= UCR2_SRST;
+        }
+        if (value & UCR2_RXEN) {
+            if (!(s->ucr2 & UCR2_RXEN)) {
+                qemu_chr_accept_input(s->chr);
+            }
+        }
+        s->ucr2 = value & 0xffff;
+        break;
+
+    case 0x25: /* USR1 */
+        value &= USR1_AWAKE | USR1_AIRINT | USR1_DTRD | USR1_AGTIM |
+            USR1_FRAMERR | USR1_ESCF | USR1_RTSD | USR1_PARTYER;
+        s->usr1 &= ~value;
+        break;
+
+    case 0x26: /* USR2 */
+       /*
+        * Writing 1 to some bits clears them; all other
+        * values are ignored
+        */
+        value &= USR2_ADET | USR2_DTRF | USR2_IDLE | USR2_ACST |
+            USR2_RIDELT | USR2_IRINT | USR2_WAKE |
+            USR2_DCDDELT | USR2_RTSF | USR2_BRCD | USR2_ORE;
+        s->usr2 &= ~value;
+        break;
+
+        /*
+         * Linux expects to see what it writes to these registers
+         * We don't currently alter the baud rate
+         */
+    case 0x29: /* UBIR */
+        s->ubrc = value & 0xffff;
+        break;
+
+    case 0x2a: /* UBMR */
+        s->ubmr = value & 0xffff;
+        break;
+
+    case 0x2c: /* One ms reg */
+        s->onems = value & 0xffff;
+        break;
+
+    case 0x24: /* FIFO control register */
+        s->ufcr = value & 0xffff;
+        break;
+
+    case 0x22: /* UCR3 */
+        s->ucr3 = value & 0xffff;
+        break;
+
+    case 0x2d: /* UTS1 */
+    case 0x23: /* UCR4 */
+        IPRINTF("Unimplemented Register %x written to\n", offset >> 2);
+        /* TODO */
+        break;
+
+    default:
+        IPRINTF("imx_serial_write: Bad offset 0x%x\n", (int)offset);
+    }
+}
+
+static int imx_can_receive(void *opaque)
+{
+    IMXSerialState *s = (IMXSerialState *)opaque;
+    return !(s->usr1 & USR1_RRDY);
+}
+
+static void imx_put_data(void *opaque, uint32_t value)
+{
+    IMXSerialState *s = (IMXSerialState *)opaque;
+    DPRINTF("received char\n");
+    s->usr1 |= USR1_RRDY;
+    s->usr2 |= USR2_RDR;
+    s->uts1 &= ~UTS1_RXEMPTY;
+    s->readbuff = value;
+    imx_update(s);
+}
+
+static void imx_receive(void *opaque, const uint8_t *buf, int size)
+{
+    imx_put_data(opaque, *buf);
+}
+
+static void imx_event(void *opaque, int event)
+{
+    if (event == CHR_EVENT_BREAK) {
+        imx_put_data(opaque, URXD_BRK);
+    }
+}
+
+
+static const struct MemoryRegionOps imx_serial_ops = {
+    .read = imx_serial_read,
+    .write = imx_serial_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static int imx_serial_init(SysBusDevice *dev)
+{
+    IMXSerialState *s = FROM_SYSBUS(IMXSerialState, dev);
+
+
+    memory_region_init_io(&s->iomem, &imx_serial_ops, s, "imx-serial", 0x1000);
+    sysbus_init_mmio(dev, &s->iomem);
+    sysbus_init_irq(dev, &s->irq);
+
+    if (s->chr) {
+        qemu_chr_add_handlers(s->chr, imx_can_receive, imx_receive,
+                              imx_event, s);
+    } else {
+        DPRINTF("No char dev for uart at 0x%lx\n",
+                (unsigned long)s->iomem.ram_addr);
+    }
+
+    return 0;
+}
+
+void imx_serial_create(int uart, const target_phys_addr_t addr, qemu_irq irq)
+{
+    DeviceState *dev;
+    SysBusDevice *bus;
+    CharDriverState *chr;
+    const char chr_name[] = "serial";
+    char label[ARRAY_SIZE(chr_name) + 1];
+
+    dev = qdev_create(NULL, "imx-serial");
+
+    if (uart >= MAX_SERIAL_PORTS) {
+        hw_error("Cannot assign uart %d: QEMU supports only %d ports\n",
+                 uart, MAX_SERIAL_PORTS);
+    }
+    chr = serial_hds[uart];
+    if (!chr) {
+        snprintf(label, ARRAY_SIZE(label), "%s%d", chr_name, uart);
+        chr = qemu_chr_new(label, "null", NULL);
+        if (!(chr)) {
+            hw_error("Can't assign serial port to imx-uart%d.\n", uart);
+        }
+    }
+
+    qdev_prop_set_chr(dev, "chardev", chr);
+    bus = sysbus_from_qdev(dev);
+    qdev_init_nofail(dev);
+    if (addr != (target_phys_addr_t)-1) {
+        sysbus_mmio_map(bus, 0, addr);
+    }
+    sysbus_connect_irq(bus, 0, irq);
+
+}
+
+
+static Property imx32_serial_properties[] = {
+    DEFINE_PROP_CHR("chardev", IMXSerialState, chr),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void imx_serial_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+
+    k->init = imx_serial_init;
+    dc->vmsd = &vmstate_imx_serial;
+    dc->reset = imx_serial_reset_at_boot;
+    dc->desc = "i.MX series UART";
+    dc->props = imx32_serial_properties;
+}
+
+static TypeInfo imx_serial_info = {
+    .name = "imx-serial",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXSerialState),
+    .class_init = imx_serial_class_init,
+};
+
+static void imx_serial_register_types(void)
+{
+    type_register_static(&imx_serial_info);
+}
+
+type_init(imx_serial_register_types)
diff --git a/hw/imx_timer.c b/hw/imx_timer.c
new file mode 100644
index 0000000000..16215ccf04
--- /dev/null
+++ b/hw/imx_timer.c
@@ -0,0 +1,689 @@
+/*
+ * IMX31 Timer
+ *
+ * Copyright (c) 2008 OK Labs
+ * Copyright (c) 2011 NICTA Pty Ltd
+ * Originally Written by Hans Jiang
+ * Updated by Peter Chubb
+ *
+ * This code is licenced under GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw.h"
+#include "qemu-timer.h"
+#include "ptimer.h"
+#include "sysbus.h"
+#include "imx.h"
+
+//#define DEBUG_TIMER 1
+#ifdef DEBUG_TIMER
+#  define DPRINTF(fmt, args...) \
+      do { printf("imx_timer: " fmt , ##args); } while (0)
+#else
+#  define DPRINTF(fmt, args...) do {} while (0)
+#endif
+
+/*
+ * Define to 1 for messages about attempts to
+ * access unimplemented registers or similar.
+ */
+#define DEBUG_IMPLEMENTATION 1
+#if DEBUG_IMPLEMENTATION
+#  define IPRINTF(fmt, args...)                                         \
+    do  { fprintf(stderr, "imx_timer: " fmt, ##args); } while (0)
+#else
+#  define IPRINTF(fmt, args...) do {} while (0)
+#endif
+
+/*
+ * GPT : General purpose timer
+ *
+ * This timer counts up continuously while it is enabled, resetting itself
+ * to 0 when it reaches TIMER_MAX (in freerun mode) or when it
+ * reaches the value of ocr1 (in periodic mode).  WE simulate this using a
+ * QEMU ptimer counting down from ocr1 and reloading from ocr1 in
+ * periodic mode, or counting from ocr1 to zero, then TIMER_MAX - ocr1.
+ * waiting_rov is set when counting from TIMER_MAX.
+ *
+ * In the real hardware, there are three comparison registers that can
+ * trigger interrupts, and compare channel 1 can be used to
+ * force-reset the timer. However, this is a `bare-bones'
+ * implementation: only what Linux 3.x uses has been implemented
+ * (free-running timer from 0 to OCR1 or TIMER_MAX) .
+ */
+
+
+#define TIMER_MAX  0XFFFFFFFFUL
+
+/* Control register.  Not all of these bits have any effect (yet) */
+#define GPT_CR_EN     (1 << 0)  /* GPT Enable */
+#define GPT_CR_ENMOD  (1 << 1)  /* GPT Enable Mode */
+#define GPT_CR_DBGEN  (1 << 2)  /* GPT Debug mode enable */
+#define GPT_CR_WAITEN (1 << 3)  /* GPT Wait Mode Enable  */
+#define GPT_CR_DOZEN  (1 << 4)  /* GPT Doze mode enable */
+#define GPT_CR_STOPEN (1 << 5)  /* GPT Stop Mode Enable */
+#define GPT_CR_CLKSRC_SHIFT (6)
+#define GPT_CR_CLKSRC_MASK  (0x7)
+
+#define GPT_CR_FRR    (1 << 9)  /* Freerun or Restart */
+#define GPT_CR_SWR    (1 << 15) /* Software Reset */
+#define GPT_CR_IM1    (3 << 16) /* Input capture channel 1 mode (2 bits) */
+#define GPT_CR_IM2    (3 << 18) /* Input capture channel 2 mode (2 bits) */
+#define GPT_CR_OM1    (7 << 20) /* Output Compare Channel 1 Mode (3 bits) */
+#define GPT_CR_OM2    (7 << 23) /* Output Compare Channel 2 Mode (3 bits) */
+#define GPT_CR_OM3    (7 << 26) /* Output Compare Channel 3 Mode (3 bits) */
+#define GPT_CR_FO1    (1 << 29) /* Force Output Compare Channel 1 */
+#define GPT_CR_FO2    (1 << 30) /* Force Output Compare Channel 2 */
+#define GPT_CR_FO3    (1 << 31) /* Force Output Compare Channel 3 */
+
+#define GPT_SR_OF1  (1 << 0)
+#define GPT_SR_ROV  (1 << 5)
+
+#define GPT_IR_OF1IE  (1 << 0)
+#define GPT_IR_ROVIE  (1 << 5)
+
+typedef struct {
+    SysBusDevice busdev;
+    ptimer_state *timer;
+    MemoryRegion iomem;
+    DeviceState *ccm;
+
+    uint32_t cr;
+    uint32_t pr;
+    uint32_t sr;
+    uint32_t ir;
+    uint32_t ocr1;
+    uint32_t cnt;
+
+    uint32_t waiting_rov;
+    qemu_irq irq;
+} IMXTimerGState;
+
+static const VMStateDescription vmstate_imx_timerg = {
+    .name = "imx-timerg",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT32(cr, IMXTimerGState),
+        VMSTATE_UINT32(pr, IMXTimerGState),
+        VMSTATE_UINT32(sr, IMXTimerGState),
+        VMSTATE_UINT32(ir, IMXTimerGState),
+        VMSTATE_UINT32(ocr1, IMXTimerGState),
+        VMSTATE_UINT32(cnt, IMXTimerGState),
+        VMSTATE_UINT32(waiting_rov, IMXTimerGState),
+        VMSTATE_PTIMER(timer, IMXTimerGState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const IMXClk imx_timerg_clocks[] = {
+    NOCLK,    /* 000 No clock source */
+    IPG,      /* 001 ipg_clk, 532MHz*/
+    IPG,      /* 010 ipg_clk_highfreq */
+    NOCLK,    /* 011 not defined */
+    CLK_32k,  /* 100 ipg_clk_32k */
+    NOCLK,    /* 101 not defined */
+    NOCLK,    /* 110 not defined */
+    NOCLK,    /* 111 not defined */
+};
+
+
+static void imx_timerg_set_freq(IMXTimerGState *s)
+{
+    int clksrc;
+    uint32_t freq;
+
+    clksrc = (s->cr >> GPT_CR_CLKSRC_SHIFT) & GPT_CR_CLKSRC_MASK;
+    freq = imx_clock_frequency(s->ccm, imx_timerg_clocks[clksrc]) / (1 + s->pr);
+
+    DPRINTF("Setting gtimer clksrc %d to frequency %d\n", clksrc, freq);
+    if (freq) {
+        ptimer_set_freq(s->timer, freq);
+    }
+}
+
+static void imx_timerg_update(IMXTimerGState *s)
+{
+    uint32_t flags = s->sr & s->ir & (GPT_SR_OF1 | GPT_SR_ROV);
+
+    DPRINTF("g-timer SR: %s %s IR=%s %s, %s\n",
+            s->sr & GPT_SR_OF1 ? "OF1" : "",
+            s->sr & GPT_SR_ROV ? "ROV" : "",
+            s->ir & GPT_SR_OF1 ? "OF1" : "",
+            s->ir & GPT_SR_ROV ? "ROV" : "",
+            s->cr & GPT_CR_EN ? "CR_EN" : "Not Enabled");
+
+
+    qemu_set_irq(s->irq, (s->cr & GPT_CR_EN) && flags);
+}
+
+static uint32_t imx_timerg_update_counts(IMXTimerGState *s)
+{
+    uint64_t target = s->waiting_rov ? TIMER_MAX : s->ocr1;
+    uint64_t cnt = ptimer_get_count(s->timer);
+    s->cnt = target - cnt;
+    return s->cnt;
+}
+
+static void imx_timerg_reload(IMXTimerGState *s, uint32_t timeout)
+{
+    uint64_t diff_cnt;
+
+    if (!(s->cr & GPT_CR_FRR)) {
+        IPRINTF("IMX_timerg_reload --- called in reset-mode\n");
+        return;
+    }
+
+    /*
+     * For small timeouts, qemu sometimes runs too slow.
+     * Better deliver a late interrupt than none.
+     *
+     * In Reset mode (FRR bit clear)
+     * the ptimer reloads itself from OCR1;
+     * in free-running mode we need to fake
+     * running from 0 to ocr1 to TIMER_MAX
+     */
+    if (timeout > s->cnt) {
+        diff_cnt = timeout - s->cnt;
+    } else {
+        diff_cnt = 0;
+    }
+    ptimer_set_count(s->timer, diff_cnt);
+}
+
+static uint64_t imx_timerg_read(void *opaque, target_phys_addr_t offset,
+                                unsigned size)
+{
+    IMXTimerGState *s = (IMXTimerGState *)opaque;
+
+    DPRINTF("g-read(offset=%x)", offset >> 2);
+    switch (offset >> 2) {
+    case 0: /* Control Register */
+        DPRINTF(" cr = %x\n", s->cr);
+        return s->cr;
+
+    case 1: /* prescaler */
+        DPRINTF(" pr = %x\n", s->pr);
+        return s->pr;
+
+    case 2: /* Status Register */
+        DPRINTF(" sr = %x\n", s->sr);
+        return s->sr;
+
+    case 3: /* Interrupt Register */
+        DPRINTF(" ir = %x\n", s->ir);
+        return s->ir;
+
+    case 4: /* Output Compare Register 1 */
+        DPRINTF(" ocr1 = %x\n", s->ocr1);
+        return s->ocr1;
+
+
+    case 9: /* cnt */
+        imx_timerg_update_counts(s);
+        DPRINTF(" cnt = %x\n", s->cnt);
+        return s->cnt;
+    }
+
+    IPRINTF("imx_timerg_read: Bad offset %x\n",
+            (int)offset >> 2);
+    return 0;
+}
+
+static void imx_timerg_reset(DeviceState *dev)
+{
+    IMXTimerGState *s = container_of(dev, IMXTimerGState, busdev.qdev);
+
+    /*
+     * Soft reset doesn't touch some bits; hard reset clears them
+     */
+    s->cr &= ~(GPT_CR_EN|GPT_CR_DOZEN|GPT_CR_WAITEN|GPT_CR_DBGEN);
+    s->sr = 0;
+    s->pr = 0;
+    s->ir = 0;
+    s->cnt = 0;
+    s->ocr1 = TIMER_MAX;
+    ptimer_stop(s->timer);
+    ptimer_set_limit(s->timer, TIMER_MAX, 1);
+    imx_timerg_set_freq(s);
+}
+
+static void imx_timerg_write(void *opaque, target_phys_addr_t offset,
+                             uint64_t value, unsigned size)
+{
+    IMXTimerGState *s = (IMXTimerGState *)opaque;
+    DPRINTF("g-write(offset=%x, value = 0x%x)\n", (unsigned int)offset >> 2,
+            (unsigned int)value);
+
+    switch (offset >> 2) {
+    case 0: {
+        uint32_t oldcr = s->cr;
+        /* CR */
+        if (value & GPT_CR_SWR) { /* force reset */
+            value &= ~GPT_CR_SWR;
+            imx_timerg_reset(&s->busdev.qdev);
+            imx_timerg_update(s);
+        }
+
+        s->cr = value & ~0x7c00;
+        imx_timerg_set_freq(s);
+        if ((oldcr ^ value) & GPT_CR_EN) {
+            if (value & GPT_CR_EN) {
+                if (value & GPT_CR_ENMOD) {
+                    ptimer_set_count(s->timer, s->ocr1);
+                    s->cnt = 0;
+                }
+                ptimer_run(s->timer,
+                           (value & GPT_CR_FRR) && (s->ocr1 != TIMER_MAX));
+            } else {
+                ptimer_stop(s->timer);
+            };
+        }
+        return;
+    }
+
+    case 1: /* Prescaler */
+        s->pr = value & 0xfff;
+        imx_timerg_set_freq(s);
+        return;
+
+    case 2: /* SR */
+        /*
+         * No point in implementing the status register bits to do with
+         * external interrupt sources.
+         */
+        value &= GPT_SR_OF1 | GPT_SR_ROV;
+        s->sr &= ~value;
+        imx_timerg_update(s);
+        return;
+
+    case 3: /* IR -- interrupt register */
+        s->ir = value & 0x3f;
+        imx_timerg_update(s);
+        return;
+
+    case 4: /* OCR1 -- output compare register */
+        /* In non-freerun mode, reset count when this register is written */
+        if (!(s->cr & GPT_CR_FRR)) {
+            s->waiting_rov = 0;
+            ptimer_set_limit(s->timer, value, 1);
+        } else {
+            imx_timerg_update_counts(s);
+            if (value > s->cnt) {
+                s->waiting_rov = 0;
+                imx_timerg_reload(s, value);
+            } else {
+                s->waiting_rov = 1;
+                imx_timerg_reload(s, TIMER_MAX - s->cnt);
+            }
+        }
+        s->ocr1 = value;
+        return;
+
+    default:
+        IPRINTF("imx_timerg_write: Bad offset %x\n",
+                (int)offset >> 2);
+    }
+}
+
+static void imx_timerg_timeout(void *opaque)
+{
+    IMXTimerGState *s = (IMXTimerGState *)opaque;
+
+    DPRINTF("imx_timerg_timeout, waiting rov=%d\n", s->waiting_rov);
+    if (s->cr & GPT_CR_FRR) {
+        /*
+         * Free running timer from 0 -> TIMERMAX
+         * Generates interrupt at TIMER_MAX and at cnt==ocr1
+         * If ocr1 == TIMER_MAX, then no need to reload timer.
+         */
+        if (s->ocr1 == TIMER_MAX) {
+            DPRINTF("s->ocr1 == TIMER_MAX, FRR\n");
+            s->sr |= GPT_SR_OF1 | GPT_SR_ROV;
+            imx_timerg_update(s);
+            return;
+        }
+
+        if (s->waiting_rov) {
+            /*
+             * We were waiting for cnt==TIMER_MAX
+             */
+            s->sr |= GPT_SR_ROV;
+            s->waiting_rov = 0;
+            s->cnt = 0;
+            imx_timerg_reload(s, s->ocr1);
+        } else {
+            /* Must have got a cnt==ocr1 timeout. */
+            s->sr |= GPT_SR_OF1;
+            s->cnt = s->ocr1;
+            s->waiting_rov = 1;
+            imx_timerg_reload(s, TIMER_MAX);
+        }
+        imx_timerg_update(s);
+        return;
+    }
+
+    s->sr |= GPT_SR_OF1;
+    imx_timerg_update(s);
+}
+
+static const MemoryRegionOps imx_timerg_ops = {
+    .read = imx_timerg_read,
+    .write = imx_timerg_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+
+static int imx_timerg_init(SysBusDevice *dev)
+{
+    IMXTimerGState *s = FROM_SYSBUS(IMXTimerGState, dev);
+    QEMUBH *bh;
+
+    sysbus_init_irq(dev, &s->irq);
+    memory_region_init_io(&s->iomem, &imx_timerg_ops,
+                          s, "imxg-timer",
+                          0x00001000);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    bh = qemu_bh_new(imx_timerg_timeout, s);
+    s->timer = ptimer_init(bh);
+
+    /* Hard reset resets extra bits in CR */
+    s->cr = 0;
+    return 0;
+}
+
+
+
+/*
+ * EPIT: Enhanced periodic interrupt timer
+ */
+
+#define CR_EN       (1 << 0)
+#define CR_ENMOD    (1 << 1)
+#define CR_OCIEN    (1 << 2)
+#define CR_RLD      (1 << 3)
+#define CR_PRESCALE_SHIFT (4)
+#define CR_PRESCALE_MASK  (0xfff)
+#define CR_SWR      (1 << 16)
+#define CR_IOVW     (1 << 17)
+#define CR_DBGEN    (1 << 18)
+#define CR_EPIT     (1 << 19)
+#define CR_DOZEN    (1 << 20)
+#define CR_STOPEN   (1 << 21)
+#define CR_CLKSRC_SHIFT (24)
+#define CR_CLKSRC_MASK  (0x3 << CR_CLKSRC_SHIFT)
+
+
+/*
+ * Exact clock frequencies vary from board to board.
+ * These are typical.
+ */
+static const IMXClk imx_timerp_clocks[] =  {
+    0,        /* disabled */
+    IPG, /* ipg_clk, ~532MHz */
+    IPG, /* ipg_clk_highfreq */
+    CLK_32k,    /* ipg_clk_32k -- ~32kHz */
+};
+
+typedef struct {
+    SysBusDevice busdev;
+    ptimer_state *timer;
+    MemoryRegion iomem;
+    DeviceState *ccm;
+
+    uint32_t cr;
+    uint32_t lr;
+    uint32_t cmp;
+
+    uint32_t freq;
+    int int_level;
+    qemu_irq irq;
+} IMXTimerPState;
+
+/*
+ * Update interrupt status
+ */
+static void imx_timerp_update(IMXTimerPState *s)
+{
+    if (s->int_level && (s->cr & CR_OCIEN)) {
+        qemu_irq_raise(s->irq);
+    } else {
+        qemu_irq_lower(s->irq);
+    }
+}
+
+static void imx_timerp_reset(DeviceState *dev)
+{
+    IMXTimerPState *s = container_of(dev, IMXTimerPState, busdev.qdev);
+
+    s->cr = 0;
+    s->lr = TIMER_MAX;
+    s->int_level = 0;
+    s->cmp = 0;
+    ptimer_stop(s->timer);
+    ptimer_set_count(s->timer, TIMER_MAX);
+}
+
+static uint64_t imx_timerp_read(void *opaque, target_phys_addr_t offset,
+                                unsigned size)
+{
+    IMXTimerPState *s = (IMXTimerPState *)opaque;
+
+    DPRINTF("p-read(offset=%x)", offset >> 2);
+    switch (offset >> 2) {
+    case 0: /* Control Register */
+        DPRINTF("cr %x\n", s->cr);
+        return s->cr;
+
+    case 1: /* Status Register */
+        DPRINTF("int_level %x\n", s->int_level);
+        return s->int_level;
+
+    case 2: /* LR - ticks*/
+        DPRINTF("lr %x\n", s->lr);
+        return s->lr;
+
+    case 3: /* CMP */
+        DPRINTF("cmp %x\n", s->cmp);
+        return s->cmp;
+
+    case 4: /* CNT */
+        return ptimer_get_count(s->timer);
+    }
+    IPRINTF("imx_timerp_read: Bad offset %x\n",
+            (int)offset >> 2);
+    return 0;
+}
+
+static void set_timerp_freq(IMXTimerPState *s)
+{
+    int clksrc;
+    unsigned prescaler;
+    uint32_t freq;
+
+    clksrc = (s->cr & CR_CLKSRC_MASK) >> CR_CLKSRC_SHIFT;
+    prescaler = 1 + ((s->cr >> CR_PRESCALE_SHIFT) & CR_PRESCALE_MASK);
+    freq = imx_clock_frequency(s->ccm, imx_timerp_clocks[clksrc]) / prescaler;
+
+    s->freq = freq;
+    DPRINTF("Setting ptimer frequency to %u\n", freq);
+
+    if (freq) {
+        ptimer_set_freq(s->timer, freq);
+    }
+}
+
+static void imx_timerp_write(void *opaque, target_phys_addr_t offset,
+                             uint64_t value, unsigned size)
+{
+    IMXTimerPState *s = (IMXTimerPState *)opaque;
+    DPRINTF("p-write(offset=%x, value = %x)\n", (unsigned int)offset >> 2,
+            (unsigned int)value);
+
+    switch (offset >> 2) {
+    case 0: /* CR */
+        if (value & CR_SWR) {
+            imx_timerp_reset(&s->busdev.qdev);
+            value &= ~CR_SWR;
+        }
+        s->cr = value & 0x03ffffff;
+        set_timerp_freq(s);
+
+        if (s->freq && (s->cr & CR_EN)) {
+            if (!(s->cr & CR_ENMOD)) {
+                ptimer_set_count(s->timer, s->lr);
+            }
+            ptimer_run(s->timer, 0);
+        } else {
+            ptimer_stop(s->timer);
+        }
+        break;
+
+    case 1: /* SR - ACK*/
+        s->int_level = 0;
+        imx_timerp_update(s);
+        break;
+
+    case 2: /* LR - set ticks */
+        s->lr = value;
+        ptimer_set_limit(s->timer, value, !!(s->cr & CR_IOVW));
+        break;
+
+    case 3: /* CMP */
+        s->cmp = value;
+        if (value) {
+            IPRINTF(
+                "Values for EPIT comparison other than zero not supported\n"
+            );
+        }
+        break;
+
+    default:
+        IPRINTF("imx_timerp_write: Bad offset %x\n",
+                   (int)offset >> 2);
+    }
+}
+
+static void imx_timerp_tick(void *opaque)
+{
+    IMXTimerPState *s = (IMXTimerPState *)opaque;
+
+   DPRINTF("imxp tick\n");
+    if (!(s->cr & CR_RLD)) {
+        ptimer_set_count(s->timer, TIMER_MAX);
+    }
+    s->int_level = 1;
+    imx_timerp_update(s);
+}
+
+void imx_timerp_create(const target_phys_addr_t addr,
+                              qemu_irq irq,
+                              DeviceState *ccm)
+{
+    IMXTimerPState *pp;
+    DeviceState *dev;
+
+    dev = sysbus_create_simple("imx_timerp", addr, irq);
+    pp = container_of(dev, IMXTimerPState, busdev.qdev);
+    pp->ccm = ccm;
+}
+
+static const MemoryRegionOps imx_timerp_ops = {
+  .read = imx_timerp_read,
+  .write = imx_timerp_write,
+  .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_imx_timerp = {
+    .name = "imx-timerp",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT32(cr, IMXTimerPState),
+        VMSTATE_UINT32(lr, IMXTimerPState),
+        VMSTATE_UINT32(cmp, IMXTimerPState),
+        VMSTATE_UINT32(freq, IMXTimerPState),
+        VMSTATE_INT32(int_level, IMXTimerPState),
+        VMSTATE_PTIMER(timer, IMXTimerPState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static int imx_timerp_init(SysBusDevice *dev)
+{
+    IMXTimerPState *s = FROM_SYSBUS(IMXTimerPState, dev);
+    QEMUBH *bh;
+
+    DPRINTF("imx_timerp_init\n");
+
+    sysbus_init_irq(dev, &s->irq);
+    memory_region_init_io(&s->iomem, &imx_timerp_ops,
+                          s, "imxp-timer",
+                          0x00001000);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    bh = qemu_bh_new(imx_timerp_tick, s);
+    s->timer = ptimer_init(bh);
+
+    return 0;
+}
+
+
+void imx_timerg_create(const target_phys_addr_t addr,
+                              qemu_irq irq,
+                              DeviceState *ccm)
+{
+    IMXTimerGState *pp;
+    DeviceState *dev;
+
+    dev = sysbus_create_simple("imx_timerg", addr, irq);
+    pp = container_of(dev, IMXTimerGState, busdev.qdev);
+    pp->ccm = ccm;
+}
+
+static void imx_timerg_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc  = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    k->init = imx_timerg_init;
+    dc->vmsd = &vmstate_imx_timerg;
+    dc->reset = imx_timerg_reset;
+    dc->desc = "i.MX general timer";
+}
+
+static void imx_timerp_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc  = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    k->init = imx_timerp_init;
+    dc->vmsd = &vmstate_imx_timerp;
+    dc->reset = imx_timerp_reset;
+    dc->desc = "i.MX periodic timer";
+}
+
+static const TypeInfo imx_timerp_info = {
+    .name = "imx_timerp",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXTimerPState),
+    .class_init = imx_timerp_class_init,
+};
+
+static const TypeInfo imx_timerg_info = {
+    .name = "imx_timerg",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXTimerGState),
+    .class_init = imx_timerg_class_init,
+};
+
+static void imx_timer_register_types(void)
+{
+    type_register_static(&imx_timerp_info);
+    type_register_static(&imx_timerg_info);
+}
+
+type_init(imx_timer_register_types)
diff --git a/hw/integratorcp.c b/hw/integratorcp.c
index deacbf4d0d..d0e2e9068e 100644
--- a/hw/integratorcp.c
+++ b/hw/integratorcp.c
@@ -493,7 +493,7 @@ static void integratorcp_init(ram_addr_t ram_size,
     sysbus_create_simple("pl050_keyboard", 0x18000000, pic[3]);
     sysbus_create_simple("pl050_mouse", 0x19000000, pic[4]);
     sysbus_create_varargs("pl181", 0x1c000000, pic[23], pic[24], NULL);
-    if (nd_table[0].vlan)
+    if (nd_table[0].used)
         smc91c111_init(&nd_table[0], 0xc8000000, pic[27]);
 
     sysbus_create_simple("pl110", 0xc0000000, pic[22]);
diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index 31fe1c54f6..127e81888b 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -1149,13 +1149,12 @@ static int intel_hda_init(PCIDevice *pci)
     return 0;
 }
 
-static int intel_hda_exit(PCIDevice *pci)
+static void intel_hda_exit(PCIDevice *pci)
 {
     IntelHDAState *d = DO_UPCAST(IntelHDAState, pci, pci);
 
     msi_uninit(&d->pci);
     memory_region_destroy(&d->mmio);
-    return 0;
 }
 
 static int intel_hda_post_load(void *opaque, int version)
diff --git a/hw/ioh3420.c b/hw/ioh3420.c
index 0a2601cac4..94a537c9b3 100644
--- a/hw/ioh3420.c
+++ b/hw/ioh3420.c
@@ -96,7 +96,6 @@ static int ioh3420_initfn(PCIDevice *d)
     PCIEPort *p = DO_UPCAST(PCIEPort, br, br);
     PCIESlot *s = DO_UPCAST(PCIESlot, port, p);
     int rc;
-    int tmp;
 
     rc = pci_bridge_initfn(d);
     if (rc < 0) {
@@ -144,12 +143,11 @@ err_pcie_cap:
 err_msi:
     msi_uninit(d);
 err_bridge:
-    tmp = pci_bridge_exitfn(d);
-    assert(!tmp);
+    pci_bridge_exitfn(d);
     return rc;
 }
 
-static int ioh3420_exitfn(PCIDevice *d)
+static void ioh3420_exitfn(PCIDevice *d)
 {
     PCIBridge* br = DO_UPCAST(PCIBridge, dev, d);
     PCIEPort *p = DO_UPCAST(PCIEPort, br, br);
@@ -159,7 +157,7 @@ static int ioh3420_exitfn(PCIDevice *d)
     pcie_chassis_del_slot(s);
     pcie_cap_exit(d);
     msi_uninit(d);
-    return pci_bridge_exitfn(d);
+    pci_bridge_exitfn(d);
 }
 
 PCIESlot *ioh3420_init(PCIBus *bus, int devfn, bool multifunction,
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 05559b639c..0c58161565 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -23,6 +23,7 @@
 #include "kvm.h"
 #include "migration.h"
 #include "qerror.h"
+#include "event_notifier.h"
 
 #include <sys/mman.h>
 #include <sys/types.h>
@@ -45,7 +46,7 @@
 
 typedef struct Peer {
     int nb_eventfds;
-    int *eventfds;
+    EventNotifier *eventfds;
 } Peer;
 
 typedef struct EventfdEntry {
@@ -63,14 +64,12 @@ typedef struct IVShmemState {
     CharDriverState *server_chr;
     MemoryRegion ivshmem_mmio;
 
-    pcibus_t mmio_addr;
     /* We might need to register the BAR before we actually have the memory.
      * So prepare a container MemoryRegion for the BAR immediately and
      * add a subregion when we have the memory.
      */
     MemoryRegion bar;
     MemoryRegion ivshmem;
-    MemoryRegion msix_bar;
     uint64_t ivshmem_size; /* size of shared memory region */
     int shm_fd; /* shared memory file descriptor */
 
@@ -168,7 +167,6 @@ static void ivshmem_io_write(void *opaque, target_phys_addr_t addr,
 {
     IVShmemState *s = opaque;
 
-    uint64_t write_one = 1;
     uint16_t dest = val >> 16;
     uint16_t vector = val & 0xff;
 
@@ -194,12 +192,8 @@ static void ivshmem_io_write(void *opaque, target_phys_addr_t addr,
 
             /* check doorbell range */
             if (vector < s->peers[dest].nb_eventfds) {
-                IVSHMEM_DPRINTF("Writing %" PRId64 " to VM %d on vector %d\n",
-                                                    write_one, dest, vector);
-                if (write(s->peers[dest].eventfds[vector],
-                                                    &(write_one), 8) != 8) {
-                    IVSHMEM_DPRINTF("error writing to eventfd\n");
-                }
+                IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
+                event_notifier_set(&s->peers[dest].eventfds[vector]);
             }
             break;
         default:
@@ -279,12 +273,13 @@ static void fake_irqfd(void *opaque, const uint8_t *buf, int size) {
     msix_notify(pdev, entry->vector);
 }
 
-static CharDriverState* create_eventfd_chr_device(void * opaque, int eventfd,
-                                                                    int vector)
+static CharDriverState* create_eventfd_chr_device(void * opaque, EventNotifier *n,
+                                                  int vector)
 {
     /* create a event character device based on the passed eventfd */
     IVShmemState *s = opaque;
     CharDriverState * chr;
+    int eventfd = event_notifier_get_fd(n);
 
     chr = qemu_chr_open_eventfd(eventfd);
 
@@ -347,16 +342,39 @@ static void create_shared_memory_BAR(IVShmemState *s, int fd) {
     pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar);
 }
 
+static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
+{
+    memory_region_add_eventfd(&s->ivshmem_mmio,
+                              DOORBELL,
+                              4,
+                              true,
+                              (posn << 16) | i,
+                              &s->peers[posn].eventfds[i]);
+}
+
+static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
+{
+    memory_region_del_eventfd(&s->ivshmem_mmio,
+                              DOORBELL,
+                              4,
+                              true,
+                              (posn << 16) | i,
+                              &s->peers[posn].eventfds[i]);
+}
+
 static void close_guest_eventfds(IVShmemState *s, int posn)
 {
     int i, guest_curr_max;
 
     guest_curr_max = s->peers[posn].nb_eventfds;
 
+    memory_region_transaction_begin();
     for (i = 0; i < guest_curr_max; i++) {
-        kvm_set_ioeventfd_mmio(s->peers[posn].eventfds[i],
-                    s->mmio_addr + DOORBELL, (posn << 16) | i, 0, 4);
-        close(s->peers[posn].eventfds[i]);
+        ivshmem_del_eventfd(s, posn, i);
+    }
+    memory_region_transaction_commit();
+    for (i = 0; i < guest_curr_max; i++) {
+        event_notifier_cleanup(&s->peers[posn].eventfds[i]);
     }
 
     g_free(s->peers[posn].eventfds);
@@ -369,12 +387,7 @@ static void setup_ioeventfds(IVShmemState *s) {
 
     for (i = 0; i <= s->max_peer; i++) {
         for (j = 0; j < s->peers[i].nb_eventfds; j++) {
-            memory_region_add_eventfd(&s->ivshmem_mmio,
-                                      DOORBELL,
-                                      4,
-                                      true,
-                                      (i << 16) | j,
-                                      s->peers[i].eventfds[j]);
+            ivshmem_add_eventfd(s, i, j);
         }
     }
 }
@@ -476,14 +489,14 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
 
     if (guest_max_eventfd == 0) {
         /* one eventfd per MSI vector */
-        s->peers[incoming_posn].eventfds = (int *) g_malloc(s->vectors *
-                                                                sizeof(int));
+        s->peers[incoming_posn].eventfds = g_new(EventNotifier, s->vectors);
     }
 
     /* this is an eventfd for a particular guest VM */
     IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn,
                                             guest_max_eventfd, incoming_fd);
-    s->peers[incoming_posn].eventfds[guest_max_eventfd] = incoming_fd;
+    event_notifier_init_fd(&s->peers[incoming_posn].eventfds[guest_max_eventfd],
+                           incoming_fd);
 
     /* increment count for particular guest */
     s->peers[incoming_posn].nb_eventfds++;
@@ -495,15 +508,12 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
 
     if (incoming_posn == s->vm_id) {
         s->eventfd_chr[guest_max_eventfd] = create_eventfd_chr_device(s,
-                   s->peers[s->vm_id].eventfds[guest_max_eventfd],
+                   &s->peers[s->vm_id].eventfds[guest_max_eventfd],
                    guest_max_eventfd);
     }
 
     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
-        if (kvm_set_ioeventfd_mmio(incoming_fd, s->mmio_addr + DOORBELL,
-                        (incoming_posn << 16) | guest_max_eventfd, 1, 4) < 0) {
-            fprintf(stderr, "ivshmem: ioeventfd not available\n");
-        }
+        ivshmem_add_eventfd(s, incoming_posn, guest_max_eventfd);
     }
 
     return;
@@ -563,16 +573,13 @@ static uint64_t ivshmem_get_size(IVShmemState * s) {
 
 static void ivshmem_setup_msi(IVShmemState * s)
 {
-    memory_region_init(&s->msix_bar, "ivshmem-msix", 4096);
-    if (!msix_init(&s->dev, s->vectors, &s->msix_bar, 1, 0)) {
-        pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
-                         &s->msix_bar);
-        IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
-    } else {
+    if (msix_init_exclusive_bar(&s->dev, s->vectors, 1)) {
         IVSHMEM_DPRINTF("msix initialization failed\n");
         exit(1);
     }
 
+    IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
+
     /* allocate QEMU char devices for receiving interrupts */
     s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
 
@@ -764,7 +771,7 @@ static int pci_ivshmem_init(PCIDevice *dev)
     return 0;
 }
 
-static int pci_ivshmem_uninit(PCIDevice *dev)
+static void pci_ivshmem_uninit(PCIDevice *dev)
 {
     IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev);
 
@@ -779,8 +786,6 @@ static int pci_ivshmem_uninit(PCIDevice *dev)
     memory_region_destroy(&s->ivshmem);
     memory_region_destroy(&s->bar);
     unregister_savevm(&dev->qdev, "ivshmem", s);
-
-    return 0;
 }
 
 static Property ivshmem_properties[] = {
diff --git a/hw/kzm.c b/hw/kzm.c
new file mode 100644
index 0000000000..6a5e9dfaca
--- /dev/null
+++ b/hw/kzm.c
@@ -0,0 +1,154 @@
+/*
+ * KZM Board System emulation.
+ *
+ * Copyright (c) 2008 OKL and 2011 NICTA
+ * Written by Hans at OK-Labs
+ * Updated by Peter Chubb.
+ *
+ * This code is licenced under the GPL, version 2 or later.
+ * See the file `COPYING' in the top level directory.
+ *
+ * It (partially) emulates a Kyoto Microcomputer
+ * KZM-ARM11-01 evaluation board, with a Freescale
+ * i.MX31 SoC
+ */
+
+#include "sysbus.h"
+#include "exec-memory.h"
+#include "hw.h"
+#include "arm-misc.h"
+#include "devices.h"
+#include "net.h"
+#include "sysemu.h"
+#include "boards.h"
+#include "pc.h" /* for the FPGA UART that emulates a 16550 */
+#include "imx.h"
+
+    /* Memory map for Kzm Emulation Baseboard:
+     * 0x00000000-0x00003fff 16k secure ROM       IGNORED
+     * 0x00004000-0x00407fff Reserved             IGNORED
+     * 0x00404000-0x00407fff ROM                  IGNORED
+     * 0x00408000-0x0fffffff Reserved             IGNORED
+     * 0x10000000-0x1fffbfff RAM aliasing         IGNORED
+     * 0x1fffc000-0x1fffffff RAM                  EMULATED
+     * 0x20000000-0x2fffffff Reserved             IGNORED
+     * 0x30000000-0x7fffffff I.MX31 Internal Register Space
+     *   0x43f00000 IO_AREA0
+     *   0x43f90000 UART1                         EMULATED
+     *   0x43f94000 UART2                         EMULATED
+     *   0x68000000 AVIC                          EMULATED
+     *   0x53f80000 CCM                           EMULATED
+     *   0x53f94000 PIT 1                         EMULATED
+     *   0x53f98000 PIT 2                         EMULATED
+     *   0x53f90000 GPT                           EMULATED
+     * 0x80000000-0x87ffffff RAM                  EMULATED
+     * 0x88000000-0x8fffffff RAM Aliasing         EMULATED
+     * 0xa0000000-0xafffffff NAND Flash           IGNORED
+     * 0xb0000000-0xb3ffffff Unavailable          IGNORED
+     * 0xb4000000-0xb4000fff 8-bit free space     IGNORED
+     * 0xb4001000-0xb400100f Board control        IGNORED
+     *  0xb4001003           DIP switch
+     * 0xb4001010-0xb400101f 7-segment LED        IGNORED
+     * 0xb4001020-0xb400102f LED                  IGNORED
+     * 0xb4001030-0xb400103f LED                  IGNORED
+     * 0xb4001040-0xb400104f FPGA, UART           EMULATED
+     * 0xb4001050-0xb400105f FPGA, UART           EMULATED
+     * 0xb4001060-0xb40fffff FPGA                 IGNORED
+     * 0xb6000000-0xb61fffff LAN controller       EMULATED
+     * 0xb6200000-0xb62fffff FPGA NAND Controller IGNORED
+     * 0xb6300000-0xb7ffffff Free                 IGNORED
+     * 0xb8000000-0xb8004fff Memory control registers IGNORED
+     * 0xc0000000-0xc3ffffff PCMCIA/CF            IGNORED
+     * 0xc4000000-0xffffffff Reserved             IGNORED
+     */
+
+#define KZM_RAMADDRESS (0x80000000)
+#define KZM_FPGA       (0xb4001040)
+
+static struct arm_boot_info kzm_binfo = {
+    .loader_start = KZM_RAMADDRESS,
+    .board_id = 1722,
+};
+
+static void kzm_init(ram_addr_t ram_size,
+                     const char *boot_device,
+                     const char *kernel_filename, const char *kernel_cmdline,
+                     const char *initrd_filename, const char *cpu_model)
+{
+    ARMCPU *cpu;
+    MemoryRegion *address_space_mem = get_system_memory();
+    MemoryRegion *ram = g_new(MemoryRegion, 1);
+    MemoryRegion *sram = g_new(MemoryRegion, 1);
+    MemoryRegion *ram_alias = g_new(MemoryRegion, 1);
+    qemu_irq *cpu_pic;
+    DeviceState *dev;
+    DeviceState *ccm;
+
+    if (!cpu_model) {
+        cpu_model = "arm1136";
+    }
+
+    cpu = cpu_arm_init(cpu_model);
+    if (!cpu) {
+        fprintf(stderr, "Unable to find CPU definition\n");
+        exit(1);
+    }
+
+    /* On a real system, the first 16k is a `secure boot rom' */
+
+    memory_region_init_ram(ram, "kzm.ram", ram_size);
+    vmstate_register_ram_global(ram);
+    memory_region_add_subregion(address_space_mem, KZM_RAMADDRESS, ram);
+
+    memory_region_init_alias(ram_alias, "ram.alias", ram, 0, ram_size);
+    memory_region_add_subregion(address_space_mem, 0x88000000, ram_alias);
+
+    memory_region_init_ram(sram, "kzm.sram", 0x4000);
+    memory_region_add_subregion(address_space_mem, 0x1FFFC000, sram);
+
+    cpu_pic = arm_pic_init_cpu(cpu);
+    dev = sysbus_create_varargs("imx_avic", 0x68000000,
+                                cpu_pic[ARM_PIC_CPU_IRQ],
+                                cpu_pic[ARM_PIC_CPU_FIQ], NULL);
+
+
+    imx_serial_create(0, 0x43f90000, qdev_get_gpio_in(dev, 45));
+    imx_serial_create(1, 0x43f94000, qdev_get_gpio_in(dev, 32));
+
+    ccm = sysbus_create_simple("imx_ccm", 0x53f80000, NULL);
+
+    imx_timerp_create(0x53f94000, qdev_get_gpio_in(dev, 28), ccm);
+    imx_timerp_create(0x53f98000, qdev_get_gpio_in(dev, 27), ccm);
+    imx_timerg_create(0x53f90000, qdev_get_gpio_in(dev, 29), ccm);
+
+    if (nd_table[0].used) {
+        lan9118_init(&nd_table[0], 0xb6000000, qdev_get_gpio_in(dev, 52));
+    }
+
+    if (serial_hds[2]) { /* touchscreen */
+        serial_mm_init(address_space_mem, KZM_FPGA+0x10, 0,
+                       qdev_get_gpio_in(dev, 52),
+                       14745600, serial_hds[2],
+                       DEVICE_NATIVE_ENDIAN);
+    }
+
+    kzm_binfo.ram_size = ram_size;
+    kzm_binfo.kernel_filename = kernel_filename;
+    kzm_binfo.kernel_cmdline = kernel_cmdline;
+    kzm_binfo.initrd_filename = initrd_filename;
+    kzm_binfo.nb_cpus = 1;
+    arm_load_kernel(cpu, &kzm_binfo);
+}
+
+static QEMUMachine kzm_machine = {
+    .name = "kzm",
+    .desc = "ARM KZM Emulation Baseboard (ARM1136)",
+    .init = kzm_init,
+};
+
+static void kzm_machine_init(void)
+{
+    qemu_register_machine(&kzm_machine);
+}
+
+machine_init(kzm_machine_init)
diff --git a/hw/lan9118.c b/hw/lan9118.c
index 7b4fe87fca..ff0a50be19 100644
--- a/hw/lan9118.c
+++ b/hw/lan9118.c
@@ -384,7 +384,7 @@ static void phy_update_link(lan9118_state *s)
     phy_update_irq(s);
 }
 
-static void lan9118_set_link(VLANClientState *nc)
+static void lan9118_set_link(NetClientState *nc)
 {
     phy_update_link(DO_UPCAST(NICState, nc, nc)->opaque);
 }
@@ -456,7 +456,7 @@ static void lan9118_reset(DeviceState *d)
     lan9118_reload_eeprom(s);
 }
 
-static int lan9118_can_receive(VLANClientState *nc)
+static int lan9118_can_receive(NetClientState *nc)
 {
     return 1;
 }
@@ -509,7 +509,7 @@ static int lan9118_filter(lan9118_state *s, const uint8_t *addr)
     }
 }
 
-static ssize_t lan9118_receive(VLANClientState *nc, const uint8_t *buf,
+static ssize_t lan9118_receive(NetClientState *nc, const uint8_t *buf,
                                size_t size)
 {
     lan9118_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
@@ -1166,9 +1166,11 @@ static void lan9118_16bit_mode_write(void *opaque, target_phys_addr_t offset,
 {
     switch (size) {
     case 2:
-        return lan9118_writew(opaque, offset, (uint32_t)val);
+        lan9118_writew(opaque, offset, (uint32_t)val);
+        return;
     case 4:
-        return lan9118_writel(opaque, offset, val, size);
+        lan9118_writel(opaque, offset, val, size);
+        return;
     }
 
     hw_error("lan9118_write: Bad size 0x%x\n", size);
@@ -1302,7 +1304,7 @@ static const MemoryRegionOps lan9118_16bit_mem_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void lan9118_cleanup(VLANClientState *nc)
+static void lan9118_cleanup(NetClientState *nc)
 {
     lan9118_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -1310,7 +1312,7 @@ static void lan9118_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_lan9118_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = lan9118_can_receive,
     .receive = lan9118_receive,
diff --git a/hw/lance.c b/hw/lance.c
index ce3d46c17b..9b98bb849a 100644
--- a/hw/lance.c
+++ b/hw/lance.c
@@ -85,7 +85,7 @@ static const MemoryRegionOps lance_mem_ops = {
     },
 };
 
-static void lance_cleanup(VLANClientState *nc)
+static void lance_cleanup(NetClientState *nc)
 {
     PCNetState *d = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -93,7 +93,7 @@ static void lance_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_lance_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = pcnet_can_receive,
     .receive = pcnet_receive,
diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c
index 2fe141d24e..34afe96742 100644
--- a/hw/lsi53c895a.c
+++ b/hw/lsi53c895a.c
@@ -282,8 +282,6 @@ static inline int lsi_irq_on_rsl(LSIState *s)
 
 static void lsi_soft_reset(LSIState *s)
 {
-    lsi_request *p;
-
     DPRINTF("Reset\n");
     s->carry = 0;
 
@@ -350,15 +348,8 @@ static void lsi_soft_reset(LSIState *s)
     s->sbc = 0;
     s->csbc = 0;
     s->sbr = 0;
-    while (!QTAILQ_EMPTY(&s->queue)) {
-        p = QTAILQ_FIRST(&s->queue);
-        QTAILQ_REMOVE(&s->queue, p, next);
-        g_free(p);
-    }
-    if (s->current) {
-        g_free(s->current);
-        s->current = NULL;
-    }
+    assert(QTAILQ_EMPTY(&s->queue));
+    assert(!s->current);
 }
 
 static int lsi_dma_40bit(LSIState *s)
@@ -650,23 +641,24 @@ static lsi_request *lsi_find_by_tag(LSIState *s, uint32_t tag)
     return NULL;
 }
 
+static void lsi_request_free(LSIState *s, lsi_request *p)
+{
+    if (p == s->current) {
+        s->current = NULL;
+    } else {
+        QTAILQ_REMOVE(&s->queue, p, next);
+    }
+    g_free(p);
+}
+
 static void lsi_request_cancelled(SCSIRequest *req)
 {
     LSIState *s = DO_UPCAST(LSIState, dev.qdev, req->bus->qbus.parent);
     lsi_request *p = req->hba_private;
 
-    if (s->current && req == s->current->req) {
-        scsi_req_unref(req);
-        g_free(s->current);
-        s->current = NULL;
-        return;
-    }
-
-    if (p) {
-        QTAILQ_REMOVE(&s->queue, p, next);
-        scsi_req_unref(req);
-        g_free(p);
-    }
+    req->hba_private = NULL;
+    lsi_request_free(s, p);
+    scsi_req_unref(req);
 }
 
 /* Record that data is available for a queued command.  Returns zero if
@@ -714,10 +706,10 @@ static void lsi_command_complete(SCSIRequest *req, uint32_t status, size_t resid
         lsi_set_phase(s, PHASE_ST);
     }
 
-    if (s->current && req == s->current->req) {
-        scsi_req_unref(s->current->req);
-        g_free(s->current);
-        s->current = NULL;
+    if (req->hba_private == s->current) {
+        req->hba_private = NULL;
+        lsi_request_free(s, s->current);
+        scsi_req_unref(req);
     }
     lsi_resume_script(s);
 }
@@ -728,7 +720,8 @@ static void lsi_transfer_data(SCSIRequest *req, uint32_t len)
     LSIState *s = DO_UPCAST(LSIState, dev.qdev, req->bus->qbus.parent);
     int out;
 
-    if (s->waiting == 1 || !s->current || req->hba_private != s->current ||
+    assert(req->hba_private);
+    if (s->waiting == 1 || req->hba_private != s->current ||
         (lsi_irq_on_rsl(s) && !(s->scntl1 & LSI_SCNTL1_CON))) {
         if (lsi_queue_req(s, req, len)) {
             return;
@@ -1738,7 +1731,7 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val)
             lsi_execute_script(s);
         }
         if (val & LSI_ISTAT0_SRST) {
-            lsi_soft_reset(s);
+            qdev_reset_all(&s->dev.qdev);
         }
         break;
     case 0x16: /* MBOX0 */
@@ -2071,15 +2064,13 @@ static const VMStateDescription vmstate_lsi_scsi = {
     }
 };
 
-static int lsi_scsi_uninit(PCIDevice *d)
+static void lsi_scsi_uninit(PCIDevice *d)
 {
     LSIState *s = DO_UPCAST(LSIState, dev, d);
 
     memory_region_destroy(&s->mmio_io);
     memory_region_destroy(&s->ram_io);
     memory_region_destroy(&s->io_io);
-
-    return 0;
 }
 
 static const struct SCSIBusInfo lsi_scsi_info = {
diff --git a/hw/mcf5208.c b/hw/mcf5208.c
index d3ebe8d9ad..ee25b1b387 100644
--- a/hw/mcf5208.c
+++ b/hw/mcf5208.c
@@ -236,7 +236,7 @@ static void mcf5208evb_init(ram_addr_t ram_size,
         fprintf(stderr, "Too many NICs\n");
         exit(1);
     }
-    if (nd_table[0].vlan)
+    if (nd_table[0].used)
         mcf_fec_init(address_space_mem, &nd_table[0],
                      0xfc030000, pic + 36);
 
diff --git a/hw/mcf_fec.c b/hw/mcf_fec.c
index ae37bef0f0..2fec5bc73e 100644
--- a/hw/mcf_fec.c
+++ b/hw/mcf_fec.c
@@ -351,13 +351,13 @@ static void mcf_fec_write(void *opaque, target_phys_addr_t addr,
     mcf_fec_update(s);
 }
 
-static int mcf_fec_can_receive(VLANClientState *nc)
+static int mcf_fec_can_receive(NetClientState *nc)
 {
     mcf_fec_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
     return s->rx_enabled;
 }
 
-static ssize_t mcf_fec_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t mcf_fec_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     mcf_fec_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
     mcf_fec_bd bd;
@@ -439,7 +439,7 @@ static const MemoryRegionOps mcf_fec_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void mcf_fec_cleanup(VLANClientState *nc)
+static void mcf_fec_cleanup(NetClientState *nc)
 {
     mcf_fec_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -450,7 +450,7 @@ static void mcf_fec_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_mcf_fec_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = mcf_fec_can_receive,
     .receive = mcf_fec_receive,
@@ -472,7 +472,6 @@ void mcf_fec_init(MemoryRegion *sysmem, NICInfo *nd,
     memory_region_add_subregion(sysmem, base, &s->iomem);
 
     s->conf.macaddr = nd->macaddr;
-    s->conf.vlan = nd->vlan;
     s->conf.peer = nd->netdev;
 
     s->nic = qemu_new_nic(&net_mcf_fec_info, &s->conf, nd->model, nd->name, s);
diff --git a/hw/megasas.c b/hw/megasas.c
new file mode 100644
index 0000000000..c35a15db4f
--- /dev/null
+++ b/hw/megasas.c
@@ -0,0 +1,2209 @@
+/*
+ * QEMU MegaRAID SAS 8708EM2 Host Bus Adapter emulation
+ * Based on the linux driver code at drivers/scsi/megaraid
+ *
+ * Copyright (c) 2009-2012 Hannes Reinecke, SUSE Labs
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw.h"
+#include "pci.h"
+#include "dma.h"
+#include "msix.h"
+#include "iov.h"
+#include "scsi.h"
+#include "scsi-defs.h"
+#include "block_int.h"
+#include "trace.h"
+
+#include "mfi.h"
+
+#define MEGASAS_VERSION "1.70"
+#define MEGASAS_MAX_FRAMES 2048         /* Firmware limit at 65535 */
+#define MEGASAS_DEFAULT_FRAMES 1000     /* Windows requires this */
+#define MEGASAS_MAX_SGE 128             /* Firmware limit */
+#define MEGASAS_DEFAULT_SGE 80
+#define MEGASAS_MAX_SECTORS 0xFFFF      /* No real limit */
+#define MEGASAS_MAX_ARRAYS 128
+
+#define NAA_LOCALLY_ASSIGNED_ID 0x3ULL
+#define IEEE_COMPANY_LOCALLY_ASSIGNED 0x525400
+
+#define MEGASAS_FLAG_USE_JBOD      0
+#define MEGASAS_MASK_USE_JBOD      (1 << MEGASAS_FLAG_USE_JBOD)
+#define MEGASAS_FLAG_USE_MSIX      1
+#define MEGASAS_MASK_USE_MSIX      (1 << MEGASAS_FLAG_USE_MSIX)
+#define MEGASAS_FLAG_USE_QUEUE64   2
+#define MEGASAS_MASK_USE_QUEUE64   (1 << MEGASAS_FLAG_USE_QUEUE64)
+
+static const char *mfi_frame_desc[] = {
+    "MFI init", "LD Read", "LD Write", "LD SCSI", "PD SCSI",
+    "MFI Doorbell", "MFI Abort", "MFI SMP", "MFI Stop"};
+
+typedef struct MegasasCmd {
+    uint32_t index;
+    uint16_t flags;
+    uint16_t count;
+    uint64_t context;
+
+    target_phys_addr_t pa;
+    target_phys_addr_t pa_size;
+    union mfi_frame *frame;
+    SCSIRequest *req;
+    QEMUSGList qsg;
+    void *iov_buf;
+    size_t iov_size;
+    size_t iov_offset;
+    struct MegasasState *state;
+} MegasasCmd;
+
+typedef struct MegasasState {
+    PCIDevice dev;
+    MemoryRegion mmio_io;
+    MemoryRegion port_io;
+    MemoryRegion queue_io;
+    uint32_t frame_hi;
+
+    int fw_state;
+    uint32_t fw_sge;
+    uint32_t fw_cmds;
+    uint32_t flags;
+    int fw_luns;
+    int intr_mask;
+    int doorbell;
+    int busy;
+
+    MegasasCmd *event_cmd;
+    int event_locale;
+    int event_class;
+    int event_count;
+    int shutdown_event;
+    int boot_event;
+
+    uint64_t sas_addr;
+
+    uint64_t reply_queue_pa;
+    void *reply_queue;
+    int reply_queue_len;
+    int reply_queue_head;
+    int reply_queue_tail;
+    uint64_t consumer_pa;
+    uint64_t producer_pa;
+
+    MegasasCmd frames[MEGASAS_MAX_FRAMES];
+
+    SCSIBus bus;
+} MegasasState;
+
+#define MEGASAS_INTR_DISABLED_MASK 0xFFFFFFFF
+
+static bool megasas_intr_enabled(MegasasState *s)
+{
+    if ((s->intr_mask & MEGASAS_INTR_DISABLED_MASK) !=
+        MEGASAS_INTR_DISABLED_MASK) {
+        return true;
+    }
+    return false;
+}
+
+static bool megasas_use_queue64(MegasasState *s)
+{
+    return s->flags & MEGASAS_MASK_USE_QUEUE64;
+}
+
+static bool megasas_use_msix(MegasasState *s)
+{
+    return s->flags & MEGASAS_MASK_USE_MSIX;
+}
+
+static bool megasas_is_jbod(MegasasState *s)
+{
+    return s->flags & MEGASAS_MASK_USE_JBOD;
+}
+
+static void megasas_frame_set_cmd_status(unsigned long frame, uint8_t v)
+{
+    stb_phys(frame + offsetof(struct mfi_frame_header, cmd_status), v);
+}
+
+static void megasas_frame_set_scsi_status(unsigned long frame, uint8_t v)
+{
+    stb_phys(frame + offsetof(struct mfi_frame_header, scsi_status), v);
+}
+
+/*
+ * Context is considered opaque, but the HBA firmware is running
+ * in little endian mode. So convert it to little endian, too.
+ */
+static uint64_t megasas_frame_get_context(unsigned long frame)
+{
+    return ldq_le_phys(frame + offsetof(struct mfi_frame_header, context));
+}
+
+static bool megasas_frame_is_ieee_sgl(MegasasCmd *cmd)
+{
+    return cmd->flags & MFI_FRAME_IEEE_SGL;
+}
+
+static bool megasas_frame_is_sgl64(MegasasCmd *cmd)
+{
+    return cmd->flags & MFI_FRAME_SGL64;
+}
+
+static bool megasas_frame_is_sense64(MegasasCmd *cmd)
+{
+    return cmd->flags & MFI_FRAME_SENSE64;
+}
+
+static uint64_t megasas_sgl_get_addr(MegasasCmd *cmd,
+                                     union mfi_sgl *sgl)
+{
+    uint64_t addr;
+
+    if (megasas_frame_is_ieee_sgl(cmd)) {
+        addr = le64_to_cpu(sgl->sg_skinny->addr);
+    } else if (megasas_frame_is_sgl64(cmd)) {
+        addr = le64_to_cpu(sgl->sg64->addr);
+    } else {
+        addr = le32_to_cpu(sgl->sg32->addr);
+    }
+    return addr;
+}
+
+static uint32_t megasas_sgl_get_len(MegasasCmd *cmd,
+                                    union mfi_sgl *sgl)
+{
+    uint32_t len;
+
+    if (megasas_frame_is_ieee_sgl(cmd)) {
+        len = le32_to_cpu(sgl->sg_skinny->len);
+    } else if (megasas_frame_is_sgl64(cmd)) {
+        len = le32_to_cpu(sgl->sg64->len);
+    } else {
+        len = le32_to_cpu(sgl->sg32->len);
+    }
+    return len;
+}
+
+static union mfi_sgl *megasas_sgl_next(MegasasCmd *cmd,
+                                       union mfi_sgl *sgl)
+{
+    uint8_t *next = (uint8_t *)sgl;
+
+    if (megasas_frame_is_ieee_sgl(cmd)) {
+        next += sizeof(struct mfi_sg_skinny);
+    } else if (megasas_frame_is_sgl64(cmd)) {
+        next += sizeof(struct mfi_sg64);
+    } else {
+        next += sizeof(struct mfi_sg32);
+    }
+
+    if (next >= (uint8_t *)cmd->frame + cmd->pa_size) {
+        return NULL;
+    }
+    return (union mfi_sgl *)next;
+}
+
+static void megasas_soft_reset(MegasasState *s);
+
+static int megasas_map_sgl(MegasasState *s, MegasasCmd *cmd, union mfi_sgl *sgl)
+{
+    int i;
+    int iov_count = 0;
+    size_t iov_size = 0;
+
+    cmd->flags = le16_to_cpu(cmd->frame->header.flags);
+    iov_count = cmd->frame->header.sge_count;
+    if (iov_count > MEGASAS_MAX_SGE) {
+        trace_megasas_iovec_sgl_overflow(cmd->index, iov_count,
+                                         MEGASAS_MAX_SGE);
+        return iov_count;
+    }
+    qemu_sglist_init(&cmd->qsg, iov_count, pci_dma_context(&s->dev));
+    for (i = 0; i < iov_count; i++) {
+        dma_addr_t iov_pa, iov_size_p;
+
+        if (!sgl) {
+            trace_megasas_iovec_sgl_underflow(cmd->index, i);
+            goto unmap;
+        }
+        iov_pa = megasas_sgl_get_addr(cmd, sgl);
+        iov_size_p = megasas_sgl_get_len(cmd, sgl);
+        if (!iov_pa || !iov_size_p) {
+            trace_megasas_iovec_sgl_invalid(cmd->index, i,
+                                            iov_pa, iov_size_p);
+            goto unmap;
+        }
+        qemu_sglist_add(&cmd->qsg, iov_pa, iov_size_p);
+        sgl = megasas_sgl_next(cmd, sgl);
+        iov_size += (size_t)iov_size_p;
+    }
+    if (cmd->iov_size > iov_size) {
+        trace_megasas_iovec_overflow(cmd->index, iov_size, cmd->iov_size);
+    } else if (cmd->iov_size < iov_size) {
+        trace_megasas_iovec_underflow(cmd->iov_size, iov_size, cmd->iov_size);
+    }
+    cmd->iov_offset = 0;
+    return 0;
+unmap:
+    qemu_sglist_destroy(&cmd->qsg);
+    return iov_count - i;
+}
+
+static void megasas_unmap_sgl(MegasasCmd *cmd)
+{
+    qemu_sglist_destroy(&cmd->qsg);
+    cmd->iov_offset = 0;
+}
+
+/*
+ * passthrough sense and io sense are at the same offset
+ */
+static int megasas_build_sense(MegasasCmd *cmd, uint8_t *sense_ptr,
+    uint8_t sense_len)
+{
+    uint32_t pa_hi = 0, pa_lo;
+    target_phys_addr_t pa;
+
+    if (sense_len > cmd->frame->header.sense_len) {
+        sense_len = cmd->frame->header.sense_len;
+    }
+    if (sense_len) {
+        pa_lo = le32_to_cpu(cmd->frame->pass.sense_addr_lo);
+        if (megasas_frame_is_sense64(cmd)) {
+            pa_hi = le32_to_cpu(cmd->frame->pass.sense_addr_hi);
+        }
+        pa = ((uint64_t) pa_hi << 32) | pa_lo;
+        cpu_physical_memory_write(pa, sense_ptr, sense_len);
+        cmd->frame->header.sense_len = sense_len;
+    }
+    return sense_len;
+}
+
+static void megasas_write_sense(MegasasCmd *cmd, SCSISense sense)
+{
+    uint8_t sense_buf[SCSI_SENSE_BUF_SIZE];
+    uint8_t sense_len = 18;
+
+    memset(sense_buf, 0, sense_len);
+    sense_buf[0] = 0xf0;
+    sense_buf[2] = sense.key;
+    sense_buf[7] = 10;
+    sense_buf[12] = sense.asc;
+    sense_buf[13] = sense.ascq;
+    megasas_build_sense(cmd, sense_buf, sense_len);
+}
+
+static void megasas_copy_sense(MegasasCmd *cmd)
+{
+    uint8_t sense_buf[SCSI_SENSE_BUF_SIZE];
+    uint8_t sense_len;
+
+    sense_len = scsi_req_get_sense(cmd->req, sense_buf,
+                                   SCSI_SENSE_BUF_SIZE);
+    megasas_build_sense(cmd, sense_buf, sense_len);
+}
+
+/*
+ * Format an INQUIRY CDB
+ */
+static int megasas_setup_inquiry(uint8_t *cdb, int pg, int len)
+{
+    memset(cdb, 0, 6);
+    cdb[0] = INQUIRY;
+    if (pg > 0) {
+        cdb[1] = 0x1;
+        cdb[2] = pg;
+    }
+    cdb[3] = (len >> 8) & 0xff;
+    cdb[4] = (len & 0xff);
+    return len;
+}
+
+/*
+ * Encode lba and len into a READ_16/WRITE_16 CDB
+ */
+static void megasas_encode_lba(uint8_t *cdb, uint64_t lba,
+                               uint32_t len, bool is_write)
+{
+    memset(cdb, 0x0, 16);
+    if (is_write) {
+        cdb[0] = WRITE_16;
+    } else {
+        cdb[0] = READ_16;
+    }
+    cdb[2] = (lba >> 56) & 0xff;
+    cdb[3] = (lba >> 48) & 0xff;
+    cdb[4] = (lba >> 40) & 0xff;
+    cdb[5] = (lba >> 32) & 0xff;
+    cdb[6] = (lba >> 24) & 0xff;
+    cdb[7] = (lba >> 16) & 0xff;
+    cdb[8] = (lba >> 8) & 0xff;
+    cdb[9] = (lba) & 0xff;
+    cdb[10] = (len >> 24) & 0xff;
+    cdb[11] = (len >> 16) & 0xff;
+    cdb[12] = (len >> 8) & 0xff;
+    cdb[13] = (len) & 0xff;
+}
+
+/*
+ * Utility functions
+ */
+static uint64_t megasas_fw_time(void)
+{
+    struct tm curtime;
+    uint64_t bcd_time;
+
+    qemu_get_timedate(&curtime, 0);
+    bcd_time = ((uint64_t)curtime.tm_sec & 0xff) << 48 |
+        ((uint64_t)curtime.tm_min & 0xff)  << 40 |
+        ((uint64_t)curtime.tm_hour & 0xff) << 32 |
+        ((uint64_t)curtime.tm_mday & 0xff) << 24 |
+        ((uint64_t)curtime.tm_mon & 0xff)  << 16 |
+        ((uint64_t)(curtime.tm_year + 1900) & 0xffff);
+
+    return bcd_time;
+}
+
+/*
+ * Default disk sata address
+ * 0x1221 is the magic number as
+ * present in real hardware,
+ * so use it here, too.
+ */
+static uint64_t megasas_get_sata_addr(uint16_t id)
+{
+    uint64_t addr = (0x1221ULL << 48);
+    return addr & (id << 24);
+}
+
+/*
+ * Frame handling
+ */
+static int megasas_next_index(MegasasState *s, int index, int limit)
+{
+    index++;
+    if (index == limit) {
+        index = 0;
+    }
+    return index;
+}
+
+static MegasasCmd *megasas_lookup_frame(MegasasState *s,
+    target_phys_addr_t frame)
+{
+    MegasasCmd *cmd = NULL;
+    int num = 0, index;
+
+    index = s->reply_queue_head;
+
+    while (num < s->fw_cmds) {
+        if (s->frames[index].pa && s->frames[index].pa == frame) {
+            cmd = &s->frames[index];
+            break;
+        }
+        index = megasas_next_index(s, index, s->fw_cmds);
+        num++;
+    }
+
+    return cmd;
+}
+
+static MegasasCmd *megasas_next_frame(MegasasState *s,
+    target_phys_addr_t frame)
+{
+    MegasasCmd *cmd = NULL;
+    int num = 0, index;
+
+    cmd = megasas_lookup_frame(s, frame);
+    if (cmd) {
+        trace_megasas_qf_found(cmd->index, cmd->pa);
+        return cmd;
+    }
+    index = s->reply_queue_head;
+    num = 0;
+    while (num < s->fw_cmds) {
+        if (!s->frames[index].pa) {
+            cmd = &s->frames[index];
+            break;
+        }
+        index = megasas_next_index(s, index, s->fw_cmds);
+        num++;
+    }
+    if (!cmd) {
+        trace_megasas_qf_failed(frame);
+    }
+    trace_megasas_qf_new(index, cmd);
+    return cmd;
+}
+
+static MegasasCmd *megasas_enqueue_frame(MegasasState *s,
+    target_phys_addr_t frame, uint64_t context, int count)
+{
+    MegasasCmd *cmd = NULL;
+    int frame_size = MFI_FRAME_SIZE * 16;
+    target_phys_addr_t frame_size_p = frame_size;
+
+    cmd = megasas_next_frame(s, frame);
+    /* All frames busy */
+    if (!cmd) {
+        return NULL;
+    }
+    if (!cmd->pa) {
+        cmd->pa = frame;
+        /* Map all possible frames */
+        cmd->frame = cpu_physical_memory_map(frame, &frame_size_p, 0);
+        if (frame_size_p != frame_size) {
+            trace_megasas_qf_map_failed(cmd->index, (unsigned long)frame);
+            if (cmd->frame) {
+                cpu_physical_memory_unmap(cmd->frame, frame_size_p, 0, 0);
+                cmd->frame = NULL;
+                cmd->pa = 0;
+            }
+            s->event_count++;
+            return NULL;
+        }
+        cmd->pa_size = frame_size_p;
+        cmd->context = context;
+        if (!megasas_use_queue64(s)) {
+            cmd->context &= (uint64_t)0xFFFFFFFF;
+        }
+    }
+    cmd->count = count;
+    s->busy++;
+
+    trace_megasas_qf_enqueue(cmd->index, cmd->count, cmd->context,
+                             s->reply_queue_head, s->busy);
+
+    return cmd;
+}
+
+static void megasas_complete_frame(MegasasState *s, uint64_t context)
+{
+    int tail, queue_offset;
+
+    /* Decrement busy count */
+    s->busy--;
+
+    if (s->reply_queue_pa) {
+        /*
+         * Put command on the reply queue.
+         * Context is opaque, but emulation is running in
+         * little endian. So convert it.
+         */
+        tail = s->reply_queue_head;
+        if (megasas_use_queue64(s)) {
+            queue_offset = tail * sizeof(uint64_t);
+            stq_le_phys(s->reply_queue_pa + queue_offset, context);
+        } else {
+            queue_offset = tail * sizeof(uint32_t);
+            stl_le_phys(s->reply_queue_pa + queue_offset, context);
+        }
+        s->reply_queue_head = megasas_next_index(s, tail, s->fw_cmds);
+        trace_megasas_qf_complete(context, tail, queue_offset,
+                                  s->busy, s->doorbell);
+    }
+
+    if (megasas_intr_enabled(s)) {
+        /* Notify HBA */
+        s->doorbell++;
+        if (s->doorbell == 1) {
+            if (msix_enabled(&s->dev)) {
+                trace_megasas_msix_raise(0);
+                msix_notify(&s->dev, 0);
+            } else {
+                trace_megasas_irq_raise();
+                qemu_irq_raise(s->dev.irq[0]);
+            }
+        }
+    } else {
+        trace_megasas_qf_complete_noirq(context);
+    }
+}
+
+static void megasas_reset_frames(MegasasState *s)
+{
+    int i;
+    MegasasCmd *cmd;
+
+    for (i = 0; i < s->fw_cmds; i++) {
+        cmd = &s->frames[i];
+        if (cmd->pa) {
+            cpu_physical_memory_unmap(cmd->frame, cmd->pa_size, 0, 0);
+            cmd->frame = NULL;
+            cmd->pa = 0;
+        }
+    }
+}
+
+static void megasas_abort_command(MegasasCmd *cmd)
+{
+    if (cmd->req) {
+        scsi_req_cancel(cmd->req);
+        cmd->req = NULL;
+    }
+}
+
+static int megasas_init_firmware(MegasasState *s, MegasasCmd *cmd)
+{
+    uint32_t pa_hi, pa_lo;
+    target_phys_addr_t iq_pa, initq_size;
+    struct mfi_init_qinfo *initq;
+    uint32_t flags;
+    int ret = MFI_STAT_OK;
+
+    pa_lo = le32_to_cpu(cmd->frame->init.qinfo_new_addr_lo);
+    pa_hi = le32_to_cpu(cmd->frame->init.qinfo_new_addr_hi);
+    iq_pa = (((uint64_t) pa_hi << 32) | pa_lo);
+    trace_megasas_init_firmware((uint64_t)iq_pa);
+    initq_size = sizeof(*initq);
+    initq = cpu_physical_memory_map(iq_pa, &initq_size, 0);
+    if (!initq || initq_size != sizeof(*initq)) {
+        trace_megasas_initq_map_failed(cmd->index);
+        s->event_count++;
+        ret = MFI_STAT_MEMORY_NOT_AVAILABLE;
+        goto out;
+    }
+    s->reply_queue_len = le32_to_cpu(initq->rq_entries) & 0xFFFF;
+    if (s->reply_queue_len > s->fw_cmds) {
+        trace_megasas_initq_mismatch(s->reply_queue_len, s->fw_cmds);
+        s->event_count++;
+        ret = MFI_STAT_INVALID_PARAMETER;
+        goto out;
+    }
+    pa_lo = le32_to_cpu(initq->rq_addr_lo);
+    pa_hi = le32_to_cpu(initq->rq_addr_hi);
+    s->reply_queue_pa = ((uint64_t) pa_hi << 32) | pa_lo;
+    pa_lo = le32_to_cpu(initq->ci_addr_lo);
+    pa_hi = le32_to_cpu(initq->ci_addr_hi);
+    s->consumer_pa = ((uint64_t) pa_hi << 32) | pa_lo;
+    pa_lo = le32_to_cpu(initq->pi_addr_lo);
+    pa_hi = le32_to_cpu(initq->pi_addr_hi);
+    s->producer_pa = ((uint64_t) pa_hi << 32) | pa_lo;
+    s->reply_queue_head = ldl_le_phys(s->producer_pa);
+    s->reply_queue_tail = ldl_le_phys(s->consumer_pa);
+    flags = le32_to_cpu(initq->flags);
+    if (flags & MFI_QUEUE_FLAG_CONTEXT64) {
+        s->flags |= MEGASAS_MASK_USE_QUEUE64;
+    }
+    trace_megasas_init_queue((unsigned long)s->reply_queue_pa,
+                             s->reply_queue_len, s->reply_queue_head,
+                             s->reply_queue_tail, flags);
+    megasas_reset_frames(s);
+    s->fw_state = MFI_FWSTATE_OPERATIONAL;
+out:
+    if (initq) {
+        cpu_physical_memory_unmap(initq, initq_size, 0, 0);
+    }
+    return ret;
+}
+
+static int megasas_map_dcmd(MegasasState *s, MegasasCmd *cmd)
+{
+    dma_addr_t iov_pa, iov_size;
+
+    cmd->flags = le16_to_cpu(cmd->frame->header.flags);
+    if (!cmd->frame->header.sge_count) {
+        trace_megasas_dcmd_zero_sge(cmd->index);
+        cmd->iov_size = 0;
+        return 0;
+    } else if (cmd->frame->header.sge_count > 1) {
+        trace_megasas_dcmd_invalid_sge(cmd->index,
+                                       cmd->frame->header.sge_count);
+        cmd->iov_size = 0;
+        return -1;
+    }
+    iov_pa = megasas_sgl_get_addr(cmd, &cmd->frame->dcmd.sgl);
+    iov_size = megasas_sgl_get_len(cmd, &cmd->frame->dcmd.sgl);
+    qemu_sglist_init(&cmd->qsg, 1, pci_dma_context(&s->dev));
+    qemu_sglist_add(&cmd->qsg, iov_pa, iov_size);
+    cmd->iov_size = iov_size;
+    return cmd->iov_size;
+}
+
+static void megasas_finish_dcmd(MegasasCmd *cmd, uint32_t iov_size)
+{
+    trace_megasas_finish_dcmd(cmd->index, iov_size);
+
+    if (cmd->frame->header.sge_count) {
+        qemu_sglist_destroy(&cmd->qsg);
+    }
+    if (iov_size > cmd->iov_size) {
+        if (megasas_frame_is_ieee_sgl(cmd)) {
+            cmd->frame->dcmd.sgl.sg_skinny->len = cpu_to_le32(iov_size);
+        } else if (megasas_frame_is_sgl64(cmd)) {
+            cmd->frame->dcmd.sgl.sg64->len = cpu_to_le32(iov_size);
+        } else {
+            cmd->frame->dcmd.sgl.sg32->len = cpu_to_le32(iov_size);
+        }
+    }
+    cmd->iov_size = 0;
+    return;
+}
+
+static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_ctrl_info info;
+    size_t dcmd_size = sizeof(info);
+    BusChild *kid;
+    int num_ld_disks = 0;
+    uint16_t sdev_id;
+
+    memset(&info, 0x0, cmd->iov_size);
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    info.pci.vendor = cpu_to_le16(PCI_VENDOR_ID_LSI_LOGIC);
+    info.pci.device = cpu_to_le16(PCI_DEVICE_ID_LSI_SAS1078);
+    info.pci.subvendor = cpu_to_le16(PCI_VENDOR_ID_LSI_LOGIC);
+    info.pci.subdevice = cpu_to_le16(0x1013);
+
+    /*
+     * For some reason the firmware supports
+     * only up to 8 device ports.
+     * Despite supporting a far larger number
+     * of devices for the physical devices.
+     * So just display the first 8 devices
+     * in the device port list, independent
+     * of how many logical devices are actually
+     * present.
+     */
+    info.host.type = MFI_INFO_HOST_PCIE;
+    info.device.type = MFI_INFO_DEV_SAS3G;
+    info.device.port_count = 8;
+    QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+        SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child);
+
+        if (num_ld_disks < 8) {
+            sdev_id = ((sdev->id & 0xFF) >> 8) | (sdev->lun & 0xFF);
+            info.device.port_addr[num_ld_disks] =
+                cpu_to_le64(megasas_get_sata_addr(sdev_id));
+        }
+        num_ld_disks++;
+    }
+
+    memcpy(info.product_name, "MegaRAID SAS 8708EM2", 20);
+    snprintf(info.serial_number, 32, "QEMU%08lx",
+             (unsigned long)s & 0xFFFFFFFF);
+    snprintf(info.package_version, 0x60, "%s-QEMU", QEMU_VERSION);
+    memcpy(info.image_component[0].name, "APP", 3);
+    memcpy(info.image_component[0].version, MEGASAS_VERSION "-QEMU", 9);
+    memcpy(info.image_component[0].build_date, __DATE__, 11);
+    memcpy(info.image_component[0].build_time, __TIME__, 8);
+    info.image_component_count = 1;
+    if (s->dev.has_rom) {
+        uint8_t biosver[32];
+        uint8_t *ptr;
+
+        ptr = memory_region_get_ram_ptr(&s->dev.rom);
+        memcpy(biosver, ptr + 0x41, 31);
+        qemu_put_ram_ptr(ptr);
+        memcpy(info.image_component[1].name, "BIOS", 4);
+        memcpy(info.image_component[1].version, biosver,
+               strlen((const char *)biosver));
+        info.image_component_count++;
+    }
+    info.current_fw_time = cpu_to_le32(megasas_fw_time());
+    info.max_arms = 32;
+    info.max_spans = 8;
+    info.max_arrays = MEGASAS_MAX_ARRAYS;
+    info.max_lds = s->fw_luns;
+    info.max_cmds = cpu_to_le16(s->fw_cmds);
+    info.max_sg_elements = cpu_to_le16(s->fw_sge);
+    info.max_request_size = cpu_to_le32(MEGASAS_MAX_SECTORS);
+    info.lds_present = cpu_to_le16(num_ld_disks);
+    info.pd_present = cpu_to_le16(num_ld_disks);
+    info.pd_disks_present = cpu_to_le16(num_ld_disks);
+    info.hw_present = cpu_to_le32(MFI_INFO_HW_NVRAM |
+                                   MFI_INFO_HW_MEM |
+                                   MFI_INFO_HW_FLASH);
+    info.memory_size = cpu_to_le16(512);
+    info.nvram_size = cpu_to_le16(32);
+    info.flash_size = cpu_to_le16(16);
+    info.raid_levels = cpu_to_le32(MFI_INFO_RAID_0);
+    info.adapter_ops = cpu_to_le32(MFI_INFO_AOPS_RBLD_RATE |
+                                    MFI_INFO_AOPS_SELF_DIAGNOSTIC |
+                                    MFI_INFO_AOPS_MIXED_ARRAY);
+    info.ld_ops = cpu_to_le32(MFI_INFO_LDOPS_DISK_CACHE_POLICY |
+                               MFI_INFO_LDOPS_ACCESS_POLICY |
+                               MFI_INFO_LDOPS_IO_POLICY |
+                               MFI_INFO_LDOPS_WRITE_POLICY |
+                               MFI_INFO_LDOPS_READ_POLICY);
+    info.max_strips_per_io = cpu_to_le16(s->fw_sge);
+    info.stripe_sz_ops.min = 3;
+    info.stripe_sz_ops.max = ffs(MEGASAS_MAX_SECTORS + 1) - 1;
+    info.properties.pred_fail_poll_interval = cpu_to_le16(300);
+    info.properties.intr_throttle_cnt = cpu_to_le16(16);
+    info.properties.intr_throttle_timeout = cpu_to_le16(50);
+    info.properties.rebuild_rate = 30;
+    info.properties.patrol_read_rate = 30;
+    info.properties.bgi_rate = 30;
+    info.properties.cc_rate = 30;
+    info.properties.recon_rate = 30;
+    info.properties.cache_flush_interval = 4;
+    info.properties.spinup_drv_cnt = 2;
+    info.properties.spinup_delay = 6;
+    info.properties.ecc_bucket_size = 15;
+    info.properties.ecc_bucket_leak_rate = cpu_to_le16(1440);
+    info.properties.expose_encl_devices = 1;
+    info.properties.OnOffProperties = cpu_to_le32(MFI_CTRL_PROP_EnableJBOD);
+    info.pd_ops = cpu_to_le32(MFI_INFO_PDOPS_FORCE_ONLINE |
+                               MFI_INFO_PDOPS_FORCE_OFFLINE);
+    info.pd_mix_support = cpu_to_le32(MFI_INFO_PDMIX_SAS |
+                                       MFI_INFO_PDMIX_SATA |
+                                       MFI_INFO_PDMIX_LD);
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_mfc_get_defaults(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_defaults info;
+    size_t dcmd_size = sizeof(struct mfi_defaults);
+
+    memset(&info, 0x0, dcmd_size);
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    info.sas_addr = cpu_to_le64(s->sas_addr);
+    info.stripe_size = 3;
+    info.flush_time = 4;
+    info.background_rate = 30;
+    info.allow_mix_in_enclosure = 1;
+    info.allow_mix_in_ld = 1;
+    info.direct_pd_mapping = 1;
+    /* Enable for BIOS support */
+    info.bios_enumerate_lds = 1;
+    info.disable_ctrl_r = 1;
+    info.expose_enclosure_devices = 1;
+    info.disable_preboot_cli = 1;
+    info.cluster_disable = 1;
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_get_bios_info(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_bios_data info;
+    size_t dcmd_size = sizeof(info);
+
+    memset(&info, 0x0, dcmd_size);
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+    info.continue_on_error = 1;
+    info.verbose = 1;
+    if (megasas_is_jbod(s)) {
+        info.expose_all_drives = 1;
+    }
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_get_fw_time(MegasasState *s, MegasasCmd *cmd)
+{
+    uint64_t fw_time;
+    size_t dcmd_size = sizeof(fw_time);
+
+    fw_time = cpu_to_le64(megasas_fw_time());
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&fw_time, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_set_fw_time(MegasasState *s, MegasasCmd *cmd)
+{
+    uint64_t fw_time;
+
+    /* This is a dummy; setting of firmware time is not allowed */
+    memcpy(&fw_time, cmd->frame->dcmd.mbox, sizeof(fw_time));
+
+    trace_megasas_dcmd_set_fw_time(cmd->index, fw_time);
+    fw_time = cpu_to_le64(megasas_fw_time());
+    return MFI_STAT_OK;
+}
+
+static int megasas_event_info(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_evt_log_state info;
+    size_t dcmd_size = sizeof(info);
+
+    memset(&info, 0, dcmd_size);
+
+    info.newest_seq_num = cpu_to_le32(s->event_count);
+    info.shutdown_seq_num = cpu_to_le32(s->shutdown_event);
+    info.boot_seq_num = cpu_to_le32(s->boot_event);
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_event_wait(MegasasState *s, MegasasCmd *cmd)
+{
+    union mfi_evt event;
+
+    if (cmd->iov_size < sizeof(struct mfi_evt_detail)) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            sizeof(struct mfi_evt_detail));
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+    s->event_count = cpu_to_le32(cmd->frame->dcmd.mbox[0]);
+    event.word = cpu_to_le32(cmd->frame->dcmd.mbox[4]);
+    s->event_locale = event.members.locale;
+    s->event_class = event.members.class;
+    s->event_cmd = cmd;
+    /* Decrease busy count; event frame doesn't count here */
+    s->busy--;
+    cmd->iov_size = sizeof(struct mfi_evt_detail);
+    return MFI_STAT_INVALID_STATUS;
+}
+
+static int megasas_dcmd_pd_get_list(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_pd_list info;
+    size_t dcmd_size = sizeof(info);
+    BusChild *kid;
+    uint32_t offset, dcmd_limit, num_pd_disks = 0, max_pd_disks;
+    uint16_t sdev_id;
+
+    memset(&info, 0, dcmd_size);
+    offset = 8;
+    dcmd_limit = offset + sizeof(struct mfi_pd_address);
+    if (cmd->iov_size < dcmd_limit) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_limit);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    max_pd_disks = (cmd->iov_size - offset) / sizeof(struct mfi_pd_address);
+    if (max_pd_disks > s->fw_luns) {
+        max_pd_disks = s->fw_luns;
+    }
+
+    QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+        SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child);
+
+        sdev_id = ((sdev->id & 0xFF) >> 8) | (sdev->lun & 0xFF);
+        info.addr[num_pd_disks].device_id = cpu_to_le16(sdev_id);
+        info.addr[num_pd_disks].encl_device_id = 0xFFFF;
+        info.addr[num_pd_disks].encl_index = 0;
+        info.addr[num_pd_disks].slot_number = (sdev->id & 0xFF);
+        info.addr[num_pd_disks].scsi_dev_type = sdev->type;
+        info.addr[num_pd_disks].connect_port_bitmap = 0x1;
+        info.addr[num_pd_disks].sas_addr[0] =
+            cpu_to_le64(megasas_get_sata_addr(sdev_id));
+        num_pd_disks++;
+        offset += sizeof(struct mfi_pd_address);
+    }
+    trace_megasas_dcmd_pd_get_list(cmd->index, num_pd_disks,
+                                   max_pd_disks, offset);
+
+    info.size = cpu_to_le32(offset);
+    info.count = cpu_to_le32(num_pd_disks);
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, offset, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_pd_list_query(MegasasState *s, MegasasCmd *cmd)
+{
+    uint16_t flags;
+
+    /* mbox0 contains flags */
+    flags = le16_to_cpu(cmd->frame->dcmd.mbox[0]);
+    trace_megasas_dcmd_pd_list_query(cmd->index, flags);
+    if (flags == MR_PD_QUERY_TYPE_ALL ||
+        megasas_is_jbod(s)) {
+        return megasas_dcmd_pd_get_list(s, cmd);
+    }
+
+    return MFI_STAT_OK;
+}
+
+static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun,
+                                      MegasasCmd *cmd)
+{
+    struct mfi_pd_info *info = cmd->iov_buf;
+    size_t dcmd_size = sizeof(struct mfi_pd_info);
+    BlockConf *conf = &sdev->conf;
+    uint64_t pd_size;
+    uint16_t sdev_id = ((sdev->id & 0xFF) >> 8) | (lun & 0xFF);
+    uint8_t cmdbuf[6];
+    SCSIRequest *req;
+    size_t len, resid;
+
+    if (!cmd->iov_buf) {
+        cmd->iov_buf = g_malloc(dcmd_size);
+        memset(cmd->iov_buf, 0, dcmd_size);
+        info = cmd->iov_buf;
+        info->inquiry_data[0] = 0x7f; /* Force PQual 0x3, PType 0x1f */
+        info->vpd_page83[0] = 0x7f;
+        megasas_setup_inquiry(cmdbuf, 0, sizeof(info->inquiry_data));
+        req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
+        if (!req) {
+            trace_megasas_dcmd_req_alloc_failed(cmd->index,
+                                                "PD get info std inquiry");
+            g_free(cmd->iov_buf);
+            cmd->iov_buf = NULL;
+            return MFI_STAT_FLASH_ALLOC_FAIL;
+        }
+        trace_megasas_dcmd_internal_submit(cmd->index,
+                                           "PD get info std inquiry", lun);
+        len = scsi_req_enqueue(req);
+        if (len > 0) {
+            cmd->iov_size = len;
+            scsi_req_continue(req);
+        }
+        return MFI_STAT_INVALID_STATUS;
+    } else if (info->inquiry_data[0] != 0x7f && info->vpd_page83[0] == 0x7f) {
+        megasas_setup_inquiry(cmdbuf, 0x83, sizeof(info->vpd_page83));
+        req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
+        if (!req) {
+            trace_megasas_dcmd_req_alloc_failed(cmd->index,
+                                                "PD get info vpd inquiry");
+            return MFI_STAT_FLASH_ALLOC_FAIL;
+        }
+        trace_megasas_dcmd_internal_submit(cmd->index,
+                                           "PD get info vpd inquiry", lun);
+        len = scsi_req_enqueue(req);
+        if (len > 0) {
+            cmd->iov_size = len;
+            scsi_req_continue(req);
+        }
+        return MFI_STAT_INVALID_STATUS;
+    }
+    /* Finished, set FW state */
+    if ((info->inquiry_data[0] >> 5) == 0) {
+        if (megasas_is_jbod(cmd->state)) {
+            info->fw_state = cpu_to_le16(MFI_PD_STATE_SYSTEM);
+        } else {
+            info->fw_state = cpu_to_le16(MFI_PD_STATE_ONLINE);
+        }
+    } else {
+        info->fw_state = cpu_to_le16(MFI_PD_STATE_OFFLINE);
+    }
+
+    info->ref.v.device_id = cpu_to_le16(sdev_id);
+    info->state.ddf.pd_type = cpu_to_le16(MFI_PD_DDF_TYPE_IN_VD|
+                                          MFI_PD_DDF_TYPE_INTF_SAS);
+    bdrv_get_geometry(conf->bs, &pd_size);
+    info->raw_size = cpu_to_le64(pd_size);
+    info->non_coerced_size = cpu_to_le64(pd_size);
+    info->coerced_size = cpu_to_le64(pd_size);
+    info->encl_device_id = 0xFFFF;
+    info->slot_number = (sdev->id & 0xFF);
+    info->path_info.count = 1;
+    info->path_info.sas_addr[0] =
+        cpu_to_le64(megasas_get_sata_addr(sdev_id));
+    info->connected_port_bitmap = 0x1;
+    info->device_speed = 1;
+    info->link_speed = 1;
+    resid = dma_buf_read(cmd->iov_buf, dcmd_size, &cmd->qsg);
+    g_free(cmd->iov_buf);
+    cmd->iov_size = dcmd_size - resid;
+    cmd->iov_buf = NULL;
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_pd_get_info(MegasasState *s, MegasasCmd *cmd)
+{
+    size_t dcmd_size = sizeof(struct mfi_pd_info);
+    uint16_t pd_id;
+    SCSIDevice *sdev = NULL;
+    int retval = MFI_STAT_DEVICE_NOT_FOUND;
+
+    if (cmd->iov_size < dcmd_size) {
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    /* mbox0 has the ID */
+    pd_id = le16_to_cpu(cmd->frame->dcmd.mbox[0]);
+    sdev = scsi_device_find(&s->bus, 0, pd_id, 0);
+    trace_megasas_dcmd_pd_get_info(cmd->index, pd_id);
+
+    if (sdev) {
+        /* Submit inquiry */
+        retval = megasas_pd_get_info_submit(sdev, pd_id, cmd);
+    }
+
+    return retval;
+}
+
+static int megasas_dcmd_ld_get_list(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_ld_list info;
+    size_t dcmd_size = sizeof(info), resid;
+    uint32_t num_ld_disks = 0, max_ld_disks = s->fw_luns;
+    uint64_t ld_size;
+    BusChild *kid;
+
+    memset(&info, 0, dcmd_size);
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    if (megasas_is_jbod(s)) {
+        max_ld_disks = 0;
+    }
+    QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+        SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child);
+        BlockConf *conf = &sdev->conf;
+
+        if (num_ld_disks >= max_ld_disks) {
+            break;
+        }
+        /* Logical device size is in blocks */
+        bdrv_get_geometry(conf->bs, &ld_size);
+        info.ld_list[num_ld_disks].ld.v.target_id = sdev->id;
+        info.ld_list[num_ld_disks].state = MFI_LD_STATE_OPTIMAL;
+        info.ld_list[num_ld_disks].size = cpu_to_le64(ld_size);
+        num_ld_disks++;
+    }
+    info.ld_count = cpu_to_le32(num_ld_disks);
+    trace_megasas_dcmd_ld_get_list(cmd->index, num_ld_disks, max_ld_disks);
+
+    resid = dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    cmd->iov_size = dcmd_size - resid;
+    return MFI_STAT_OK;
+}
+
+static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun,
+                                      MegasasCmd *cmd)
+{
+    struct mfi_ld_info *info = cmd->iov_buf;
+    size_t dcmd_size = sizeof(struct mfi_ld_info);
+    uint8_t cdb[6];
+    SCSIRequest *req;
+    ssize_t len, resid;
+    BlockConf *conf = &sdev->conf;
+    uint16_t sdev_id = ((sdev->id & 0xFF) >> 8) | (lun & 0xFF);
+    uint64_t ld_size;
+
+    if (!cmd->iov_buf) {
+        cmd->iov_buf = g_malloc(dcmd_size);
+        memset(cmd->iov_buf, 0x0, dcmd_size);
+        info = cmd->iov_buf;
+        megasas_setup_inquiry(cdb, 0x83, sizeof(info->vpd_page83));
+        req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd);
+        if (!req) {
+            trace_megasas_dcmd_req_alloc_failed(cmd->index,
+                                                "LD get info vpd inquiry");
+            g_free(cmd->iov_buf);
+            cmd->iov_buf = NULL;
+            return MFI_STAT_FLASH_ALLOC_FAIL;
+        }
+        trace_megasas_dcmd_internal_submit(cmd->index,
+                                           "LD get info vpd inquiry", lun);
+        len = scsi_req_enqueue(req);
+        if (len > 0) {
+            cmd->iov_size = len;
+            scsi_req_continue(req);
+        }
+        return MFI_STAT_INVALID_STATUS;
+    }
+
+    info->ld_config.params.state = MFI_LD_STATE_OPTIMAL;
+    info->ld_config.properties.ld.v.target_id = lun;
+    info->ld_config.params.stripe_size = 3;
+    info->ld_config.params.num_drives = 1;
+    info->ld_config.params.is_consistent = 1;
+    /* Logical device size is in blocks */
+    bdrv_get_geometry(conf->bs, &ld_size);
+    info->size = cpu_to_le64(ld_size);
+    memset(info->ld_config.span, 0, sizeof(info->ld_config.span));
+    info->ld_config.span[0].start_block = 0;
+    info->ld_config.span[0].num_blocks = info->size;
+    info->ld_config.span[0].array_ref = cpu_to_le16(sdev_id);
+
+    resid = dma_buf_read(cmd->iov_buf, dcmd_size, &cmd->qsg);
+    g_free(cmd->iov_buf);
+    cmd->iov_size = dcmd_size - resid;
+    cmd->iov_buf = NULL;
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_ld_get_info(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_ld_info info;
+    size_t dcmd_size = sizeof(info);
+    uint16_t ld_id;
+    uint32_t max_ld_disks = s->fw_luns;
+    SCSIDevice *sdev = NULL;
+    int retval = MFI_STAT_DEVICE_NOT_FOUND;
+
+    if (cmd->iov_size < dcmd_size) {
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    /* mbox0 has the ID */
+    ld_id = le16_to_cpu(cmd->frame->dcmd.mbox[0]);
+    trace_megasas_dcmd_ld_get_info(cmd->index, ld_id);
+
+    if (megasas_is_jbod(s)) {
+        return MFI_STAT_DEVICE_NOT_FOUND;
+    }
+
+    if (ld_id < max_ld_disks) {
+        sdev = scsi_device_find(&s->bus, 0, ld_id, 0);
+    }
+
+    if (sdev) {
+        retval = megasas_ld_get_info_submit(sdev, ld_id, cmd);
+    }
+
+    return retval;
+}
+
+static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd)
+{
+    uint8_t data[4096];
+    struct mfi_config_data *info;
+    int num_pd_disks = 0, array_offset, ld_offset;
+    BusChild *kid;
+
+    if (cmd->iov_size > 4096) {
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+        num_pd_disks++;
+    }
+    info = (struct mfi_config_data *)&data;
+    /*
+     * Array mapping:
+     * - One array per SCSI device
+     * - One logical drive per SCSI device
+     *   spanning the entire device
+     */
+    info->array_count = num_pd_disks;
+    info->array_size = sizeof(struct mfi_array) * num_pd_disks;
+    info->log_drv_count = num_pd_disks;
+    info->log_drv_size = sizeof(struct mfi_ld_config) * num_pd_disks;
+    info->spares_count = 0;
+    info->spares_size = sizeof(struct mfi_spare);
+    info->size = sizeof(struct mfi_config_data) + info->array_size +
+        info->log_drv_size;
+    if (info->size > 4096) {
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    array_offset = sizeof(struct mfi_config_data);
+    ld_offset = array_offset + sizeof(struct mfi_array) * num_pd_disks;
+
+    QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+        SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child);
+        BlockConf *conf = &sdev->conf;
+        uint16_t sdev_id = ((sdev->id & 0xFF) >> 8) | (sdev->lun & 0xFF);
+        struct mfi_array *array;
+        struct mfi_ld_config *ld;
+        uint64_t pd_size;
+        int i;
+
+        array = (struct mfi_array *)(data + array_offset);
+        bdrv_get_geometry(conf->bs, &pd_size);
+        array->size = cpu_to_le64(pd_size);
+        array->num_drives = 1;
+        array->array_ref = cpu_to_le16(sdev_id);
+        array->pd[0].ref.v.device_id = cpu_to_le16(sdev_id);
+        array->pd[0].ref.v.seq_num = 0;
+        array->pd[0].fw_state = MFI_PD_STATE_ONLINE;
+        array->pd[0].encl.pd = 0xFF;
+        array->pd[0].encl.slot = (sdev->id & 0xFF);
+        for (i = 1; i < MFI_MAX_ROW_SIZE; i++) {
+            array->pd[i].ref.v.device_id = 0xFFFF;
+            array->pd[i].ref.v.seq_num = 0;
+            array->pd[i].fw_state = MFI_PD_STATE_UNCONFIGURED_GOOD;
+            array->pd[i].encl.pd = 0xFF;
+            array->pd[i].encl.slot = 0xFF;
+        }
+        array_offset += sizeof(struct mfi_array);
+        ld = (struct mfi_ld_config *)(data + ld_offset);
+        memset(ld, 0, sizeof(struct mfi_ld_config));
+        ld->properties.ld.v.target_id = (sdev->id & 0xFF);
+        ld->properties.default_cache_policy = MR_LD_CACHE_READ_AHEAD |
+            MR_LD_CACHE_READ_ADAPTIVE;
+        ld->properties.current_cache_policy = MR_LD_CACHE_READ_AHEAD |
+            MR_LD_CACHE_READ_ADAPTIVE;
+        ld->params.state = MFI_LD_STATE_OPTIMAL;
+        ld->params.stripe_size = 3;
+        ld->params.num_drives = 1;
+        ld->params.span_depth = 1;
+        ld->params.is_consistent = 1;
+        ld->span[0].start_block = 0;
+        ld->span[0].num_blocks = cpu_to_le64(pd_size);
+        ld->span[0].array_ref = cpu_to_le16(sdev_id);
+        ld_offset += sizeof(struct mfi_ld_config);
+    }
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)data, info->size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_get_properties(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_ctrl_props info;
+    size_t dcmd_size = sizeof(info);
+
+    memset(&info, 0x0, dcmd_size);
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+    info.pred_fail_poll_interval = cpu_to_le16(300);
+    info.intr_throttle_cnt = cpu_to_le16(16);
+    info.intr_throttle_timeout = cpu_to_le16(50);
+    info.rebuild_rate = 30;
+    info.patrol_read_rate = 30;
+    info.bgi_rate = 30;
+    info.cc_rate = 30;
+    info.recon_rate = 30;
+    info.cache_flush_interval = 4;
+    info.spinup_drv_cnt = 2;
+    info.spinup_delay = 6;
+    info.ecc_bucket_size = 15;
+    info.ecc_bucket_leak_rate = cpu_to_le16(1440);
+    info.expose_encl_devices = 1;
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_cache_flush(MegasasState *s, MegasasCmd *cmd)
+{
+    qemu_aio_flush();
+    return MFI_STAT_OK;
+}
+
+static int megasas_ctrl_shutdown(MegasasState *s, MegasasCmd *cmd)
+{
+    s->fw_state = MFI_FWSTATE_READY;
+    return MFI_STAT_OK;
+}
+
+static int megasas_cluster_reset_ld(MegasasState *s, MegasasCmd *cmd)
+{
+    return MFI_STAT_INVALID_DCMD;
+}
+
+static int megasas_dcmd_set_properties(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_ctrl_props info;
+    size_t dcmd_size = sizeof(info);
+
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+    dma_buf_write((uint8_t *)&info, cmd->iov_size, &cmd->qsg);
+    trace_megasas_dcmd_unsupported(cmd->index, cmd->iov_size);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_dummy(MegasasState *s, MegasasCmd *cmd)
+{
+    trace_megasas_dcmd_dummy(cmd->index, cmd->iov_size);
+    return MFI_STAT_OK;
+}
+
+static const struct dcmd_cmd_tbl_t {
+    int opcode;
+    const char *desc;
+    int (*func)(MegasasState *s, MegasasCmd *cmd);
+} dcmd_cmd_tbl[] = {
+    { MFI_DCMD_CTRL_MFI_HOST_MEM_ALLOC, "CTRL_HOST_MEM_ALLOC",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_GET_INFO, "CTRL_GET_INFO",
+      megasas_ctrl_get_info },
+    { MFI_DCMD_CTRL_GET_PROPERTIES, "CTRL_GET_PROPERTIES",
+      megasas_dcmd_get_properties },
+    { MFI_DCMD_CTRL_SET_PROPERTIES, "CTRL_SET_PROPERTIES",
+      megasas_dcmd_set_properties },
+    { MFI_DCMD_CTRL_ALARM_GET, "CTRL_ALARM_GET",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_ALARM_ENABLE, "CTRL_ALARM_ENABLE",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_ALARM_DISABLE, "CTRL_ALARM_DISABLE",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_ALARM_SILENCE, "CTRL_ALARM_SILENCE",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_ALARM_TEST, "CTRL_ALARM_TEST",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_EVENT_GETINFO, "CTRL_EVENT_GETINFO",
+      megasas_event_info },
+    { MFI_DCMD_CTRL_EVENT_GET, "CTRL_EVENT_GET",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_EVENT_WAIT, "CTRL_EVENT_WAIT",
+      megasas_event_wait },
+    { MFI_DCMD_CTRL_SHUTDOWN, "CTRL_SHUTDOWN",
+      megasas_ctrl_shutdown },
+    { MFI_DCMD_HIBERNATE_STANDBY, "CTRL_STANDBY",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_GET_TIME, "CTRL_GET_TIME",
+      megasas_dcmd_get_fw_time },
+    { MFI_DCMD_CTRL_SET_TIME, "CTRL_SET_TIME",
+      megasas_dcmd_set_fw_time },
+    { MFI_DCMD_CTRL_BIOS_DATA_GET, "CTRL_BIOS_DATA_GET",
+      megasas_dcmd_get_bios_info },
+    { MFI_DCMD_CTRL_FACTORY_DEFAULTS, "CTRL_FACTORY_DEFAULTS",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_MFC_DEFAULTS_GET, "CTRL_MFC_DEFAULTS_GET",
+      megasas_mfc_get_defaults },
+    { MFI_DCMD_CTRL_MFC_DEFAULTS_SET, "CTRL_MFC_DEFAULTS_SET",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_CACHE_FLUSH, "CTRL_CACHE_FLUSH",
+      megasas_cache_flush },
+    { MFI_DCMD_PD_GET_LIST, "PD_GET_LIST",
+      megasas_dcmd_pd_get_list },
+    { MFI_DCMD_PD_LIST_QUERY, "PD_LIST_QUERY",
+      megasas_dcmd_pd_list_query },
+    { MFI_DCMD_PD_GET_INFO, "PD_GET_INFO",
+      megasas_dcmd_pd_get_info },
+    { MFI_DCMD_PD_STATE_SET, "PD_STATE_SET",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_PD_REBUILD, "PD_REBUILD",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_PD_BLINK, "PD_BLINK",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_PD_UNBLINK, "PD_UNBLINK",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_LD_GET_LIST, "LD_GET_LIST",
+      megasas_dcmd_ld_get_list},
+    { MFI_DCMD_LD_GET_INFO, "LD_GET_INFO",
+      megasas_dcmd_ld_get_info },
+    { MFI_DCMD_LD_GET_PROP, "LD_GET_PROP",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_LD_SET_PROP, "LD_SET_PROP",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_LD_DELETE, "LD_DELETE",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CFG_READ, "CFG_READ",
+      megasas_dcmd_cfg_read },
+    { MFI_DCMD_CFG_ADD, "CFG_ADD",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CFG_CLEAR, "CFG_CLEAR",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CFG_FOREIGN_READ, "CFG_FOREIGN_READ",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CFG_FOREIGN_IMPORT, "CFG_FOREIGN_IMPORT",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_BBU_STATUS, "BBU_STATUS",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_BBU_CAPACITY_INFO, "BBU_CAPACITY_INFO",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_BBU_DESIGN_INFO, "BBU_DESIGN_INFO",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_BBU_PROP_GET, "BBU_PROP_GET",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CLUSTER, "CLUSTER",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CLUSTER_RESET_ALL, "CLUSTER_RESET_ALL",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CLUSTER_RESET_LD, "CLUSTER_RESET_LD",
+      megasas_cluster_reset_ld },
+    { -1, NULL, NULL }
+};
+
+static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd)
+{
+    int opcode, len;
+    int retval = 0;
+    const struct dcmd_cmd_tbl_t *cmdptr = dcmd_cmd_tbl;
+
+    opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
+    trace_megasas_handle_dcmd(cmd->index, opcode);
+    len = megasas_map_dcmd(s, cmd);
+    if (len < 0) {
+        return MFI_STAT_MEMORY_NOT_AVAILABLE;
+    }
+    while (cmdptr->opcode != -1 && cmdptr->opcode != opcode) {
+        cmdptr++;
+    }
+    if (cmdptr->opcode == -1) {
+        trace_megasas_dcmd_unhandled(cmd->index, opcode, len);
+        retval = megasas_dcmd_dummy(s, cmd);
+    } else {
+        trace_megasas_dcmd_enter(cmd->index, cmdptr->desc, len);
+        retval = cmdptr->func(s, cmd);
+    }
+    if (retval != MFI_STAT_INVALID_STATUS) {
+        megasas_finish_dcmd(cmd, len);
+    }
+    return retval;
+}
+
+static int megasas_finish_internal_dcmd(MegasasCmd *cmd,
+                                        SCSIRequest *req)
+{
+    int opcode;
+    int retval = MFI_STAT_OK;
+    int lun = req->lun;
+
+    opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
+    scsi_req_unref(req);
+    trace_megasas_dcmd_internal_finish(cmd->index, opcode, lun);
+    switch (opcode) {
+    case MFI_DCMD_PD_GET_INFO:
+        retval = megasas_pd_get_info_submit(req->dev, lun, cmd);
+        break;
+    case MFI_DCMD_LD_GET_INFO:
+        retval = megasas_ld_get_info_submit(req->dev, lun, cmd);
+        break;
+    default:
+        trace_megasas_dcmd_internal_invalid(cmd->index, opcode);
+        retval = MFI_STAT_INVALID_DCMD;
+        break;
+    }
+    if (retval != MFI_STAT_INVALID_STATUS) {
+        megasas_finish_dcmd(cmd, cmd->iov_size);
+    }
+    return retval;
+}
+
+static int megasas_enqueue_req(MegasasCmd *cmd, bool is_write)
+{
+    int len;
+
+    len = scsi_req_enqueue(cmd->req);
+    if (len < 0) {
+        len = -len;
+    }
+    if (len > 0) {
+        if (len > cmd->iov_size) {
+            if (is_write) {
+                trace_megasas_iov_write_overflow(cmd->index, len,
+                                                 cmd->iov_size);
+            } else {
+                trace_megasas_iov_read_overflow(cmd->index, len,
+                                                cmd->iov_size);
+            }
+        }
+        if (len < cmd->iov_size) {
+            if (is_write) {
+                trace_megasas_iov_write_underflow(cmd->index, len,
+                                                  cmd->iov_size);
+            } else {
+                trace_megasas_iov_read_underflow(cmd->index, len,
+                                                 cmd->iov_size);
+            }
+            cmd->iov_size = len;
+        }
+        scsi_req_continue(cmd->req);
+    }
+    return len;
+}
+
+static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd,
+                               bool is_logical)
+{
+    uint8_t *cdb;
+    int len;
+    bool is_write;
+    struct SCSIDevice *sdev = NULL;
+
+    cdb = cmd->frame->pass.cdb;
+
+    if (cmd->frame->header.target_id < s->fw_luns) {
+        sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id,
+                                cmd->frame->header.lun_id);
+    }
+    cmd->iov_size = le32_to_cpu(cmd->frame->header.data_len);
+    trace_megasas_handle_scsi(mfi_frame_desc[cmd->frame->header.frame_cmd],
+                              is_logical, cmd->frame->header.target_id,
+                              cmd->frame->header.lun_id, sdev, cmd->iov_size);
+
+    if (!sdev || (megasas_is_jbod(s) && is_logical)) {
+        trace_megasas_scsi_target_not_present(
+            mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical,
+            cmd->frame->header.target_id, cmd->frame->header.lun_id);
+        return MFI_STAT_DEVICE_NOT_FOUND;
+    }
+
+    if (cmd->frame->header.cdb_len > 16) {
+        trace_megasas_scsi_invalid_cdb_len(
+                mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical,
+                cmd->frame->header.target_id, cmd->frame->header.lun_id,
+                cmd->frame->header.cdb_len);
+        megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
+        cmd->frame->header.scsi_status = CHECK_CONDITION;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+
+    if (megasas_map_sgl(s, cmd, &cmd->frame->pass.sgl)) {
+        megasas_write_sense(cmd, SENSE_CODE(TARGET_FAILURE));
+        cmd->frame->header.scsi_status = CHECK_CONDITION;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+
+    cmd->req = scsi_req_new(sdev, cmd->index,
+                            cmd->frame->header.lun_id, cdb, cmd);
+    if (!cmd->req) {
+        trace_megasas_scsi_req_alloc_failed(
+                mfi_frame_desc[cmd->frame->header.frame_cmd],
+                cmd->frame->header.target_id, cmd->frame->header.lun_id);
+        megasas_write_sense(cmd, SENSE_CODE(NO_SENSE));
+        cmd->frame->header.scsi_status = BUSY;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+
+    is_write = (cmd->req->cmd.mode == SCSI_XFER_TO_DEV);
+    len = megasas_enqueue_req(cmd, is_write);
+    if (len > 0) {
+        if (is_write) {
+            trace_megasas_scsi_write_start(cmd->index, len);
+        } else {
+            trace_megasas_scsi_read_start(cmd->index, len);
+        }
+    } else {
+        trace_megasas_scsi_nodata(cmd->index);
+    }
+    return MFI_STAT_INVALID_STATUS;
+}
+
+static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd)
+{
+    uint32_t lba_count, lba_start_hi, lba_start_lo;
+    uint64_t lba_start;
+    bool is_write = (cmd->frame->header.frame_cmd == MFI_CMD_LD_WRITE);
+    uint8_t cdb[16];
+    int len;
+    struct SCSIDevice *sdev = NULL;
+
+    lba_count = le32_to_cpu(cmd->frame->io.header.data_len);
+    lba_start_lo = le32_to_cpu(cmd->frame->io.lba_lo);
+    lba_start_hi = le32_to_cpu(cmd->frame->io.lba_hi);
+    lba_start = ((uint64_t)lba_start_hi << 32) | lba_start_lo;
+
+    if (cmd->frame->header.target_id < s->fw_luns) {
+        sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id,
+                                cmd->frame->header.lun_id);
+    }
+
+    trace_megasas_handle_io(cmd->index,
+                            mfi_frame_desc[cmd->frame->header.frame_cmd],
+                            cmd->frame->header.target_id,
+                            cmd->frame->header.lun_id,
+                            (unsigned long)lba_start, (unsigned long)lba_count);
+    if (!sdev) {
+        trace_megasas_io_target_not_present(cmd->index,
+            mfi_frame_desc[cmd->frame->header.frame_cmd],
+            cmd->frame->header.target_id, cmd->frame->header.lun_id);
+        return MFI_STAT_DEVICE_NOT_FOUND;
+    }
+
+    if (cmd->frame->header.cdb_len > 16) {
+        trace_megasas_scsi_invalid_cdb_len(
+            mfi_frame_desc[cmd->frame->header.frame_cmd], 1,
+            cmd->frame->header.target_id, cmd->frame->header.lun_id,
+            cmd->frame->header.cdb_len);
+        megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
+        cmd->frame->header.scsi_status = CHECK_CONDITION;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+
+    cmd->iov_size = lba_count * sdev->blocksize;
+    if (megasas_map_sgl(s, cmd, &cmd->frame->io.sgl)) {
+        megasas_write_sense(cmd, SENSE_CODE(TARGET_FAILURE));
+        cmd->frame->header.scsi_status = CHECK_CONDITION;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+
+    megasas_encode_lba(cdb, lba_start, lba_count, is_write);
+    cmd->req = scsi_req_new(sdev, cmd->index,
+                            cmd->frame->header.lun_id, cdb, cmd);
+    if (!cmd->req) {
+        trace_megasas_scsi_req_alloc_failed(
+            mfi_frame_desc[cmd->frame->header.frame_cmd],
+            cmd->frame->header.target_id, cmd->frame->header.lun_id);
+        megasas_write_sense(cmd, SENSE_CODE(NO_SENSE));
+        cmd->frame->header.scsi_status = BUSY;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+    len = megasas_enqueue_req(cmd, is_write);
+    if (len > 0) {
+        if (is_write) {
+            trace_megasas_io_write_start(cmd->index, lba_start, lba_count, len);
+        } else {
+            trace_megasas_io_read_start(cmd->index, lba_start, lba_count, len);
+        }
+    }
+    return MFI_STAT_INVALID_STATUS;
+}
+
+static int megasas_finish_internal_command(MegasasCmd *cmd,
+                                           SCSIRequest *req, size_t resid)
+{
+    int retval = MFI_STAT_INVALID_CMD;
+
+    if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) {
+        cmd->iov_size -= resid;
+        retval = megasas_finish_internal_dcmd(cmd, req);
+    }
+    return retval;
+}
+
+static QEMUSGList *megasas_get_sg_list(SCSIRequest *req)
+{
+    MegasasCmd *cmd = req->hba_private;
+
+    if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) {
+        return NULL;
+    } else {
+        return &cmd->qsg;
+    }
+}
+
+static void megasas_xfer_complete(SCSIRequest *req, uint32_t len)
+{
+    MegasasCmd *cmd = req->hba_private;
+    uint8_t *buf;
+    uint32_t opcode;
+
+    trace_megasas_io_complete(cmd->index, len);
+
+    if (cmd->frame->header.frame_cmd != MFI_CMD_DCMD) {
+        scsi_req_continue(req);
+        return;
+    }
+
+    buf = scsi_req_get_buf(req);
+    opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
+    if (opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) {
+        struct mfi_pd_info *info = cmd->iov_buf;
+
+        if (info->inquiry_data[0] == 0x7f) {
+            memset(info->inquiry_data, 0, sizeof(info->inquiry_data));
+            memcpy(info->inquiry_data, buf, len);
+        } else if (info->vpd_page83[0] == 0x7f) {
+            memset(info->vpd_page83, 0, sizeof(info->vpd_page83));
+            memcpy(info->vpd_page83, buf, len);
+        }
+        scsi_req_continue(req);
+    } else if (opcode == MFI_DCMD_LD_GET_INFO) {
+        struct mfi_ld_info *info = cmd->iov_buf;
+
+        if (cmd->iov_buf) {
+            memcpy(info->vpd_page83, buf, sizeof(info->vpd_page83));
+            scsi_req_continue(req);
+        }
+    }
+}
+
+static void megasas_command_complete(SCSIRequest *req, uint32_t status,
+                                     size_t resid)
+{
+    MegasasCmd *cmd = req->hba_private;
+    uint8_t cmd_status = MFI_STAT_OK;
+
+    trace_megasas_command_complete(cmd->index, status, resid);
+
+    if (cmd->req != req) {
+        /*
+         * Internal command complete
+         */
+        cmd_status = megasas_finish_internal_command(cmd, req, resid);
+        if (cmd_status == MFI_STAT_INVALID_STATUS) {
+            return;
+        }
+    } else {
+        req->status = status;
+        trace_megasas_scsi_complete(cmd->index, req->status,
+                                    cmd->iov_size, req->cmd.xfer);
+        if (req->status != GOOD) {
+            cmd_status = MFI_STAT_SCSI_DONE_WITH_ERROR;
+        }
+        if (req->status == CHECK_CONDITION) {
+            megasas_copy_sense(cmd);
+        }
+
+        megasas_unmap_sgl(cmd);
+        cmd->frame->header.scsi_status = req->status;
+        scsi_req_unref(cmd->req);
+        cmd->req = NULL;
+    }
+    cmd->frame->header.cmd_status = cmd_status;
+    megasas_complete_frame(cmd->state, cmd->context);
+}
+
+static void megasas_command_cancel(SCSIRequest *req)
+{
+    MegasasCmd *cmd = req->hba_private;
+
+    if (cmd) {
+        megasas_abort_command(cmd);
+    } else {
+        scsi_req_unref(req);
+    }
+}
+
+static int megasas_handle_abort(MegasasState *s, MegasasCmd *cmd)
+{
+    uint64_t abort_ctx = le64_to_cpu(cmd->frame->abort.abort_context);
+    target_phys_addr_t abort_addr, addr_hi, addr_lo;
+    MegasasCmd *abort_cmd;
+
+    addr_hi = le32_to_cpu(cmd->frame->abort.abort_mfi_addr_hi);
+    addr_lo = le32_to_cpu(cmd->frame->abort.abort_mfi_addr_lo);
+    abort_addr = ((uint64_t)addr_hi << 32) | addr_lo;
+
+    abort_cmd = megasas_lookup_frame(s, abort_addr);
+    if (!abort_cmd) {
+        trace_megasas_abort_no_cmd(cmd->index, abort_ctx);
+        s->event_count++;
+        return MFI_STAT_OK;
+    }
+    if (!megasas_use_queue64(s)) {
+        abort_ctx &= (uint64_t)0xFFFFFFFF;
+    }
+    if (abort_cmd->context != abort_ctx) {
+        trace_megasas_abort_invalid_context(cmd->index, abort_cmd->index,
+                                            abort_cmd->context);
+        s->event_count++;
+        return MFI_STAT_ABORT_NOT_POSSIBLE;
+    }
+    trace_megasas_abort_frame(cmd->index, abort_cmd->index);
+    megasas_abort_command(abort_cmd);
+    if (!s->event_cmd || abort_cmd != s->event_cmd) {
+        s->event_cmd = NULL;
+    }
+    s->event_count++;
+    return MFI_STAT_OK;
+}
+
+static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr,
+                                 uint32_t frame_count)
+{
+    uint8_t frame_status = MFI_STAT_INVALID_CMD;
+    uint64_t frame_context;
+    MegasasCmd *cmd;
+
+    /*
+     * Always read 64bit context, top bits will be
+     * masked out if required in megasas_enqueue_frame()
+     */
+    frame_context = megasas_frame_get_context(frame_addr);
+
+    cmd = megasas_enqueue_frame(s, frame_addr, frame_context, frame_count);
+    if (!cmd) {
+        /* reply queue full */
+        trace_megasas_frame_busy(frame_addr);
+        megasas_frame_set_scsi_status(frame_addr, BUSY);
+        megasas_frame_set_cmd_status(frame_addr, MFI_STAT_SCSI_DONE_WITH_ERROR);
+        megasas_complete_frame(s, frame_context);
+        s->event_count++;
+        return;
+    }
+    switch (cmd->frame->header.frame_cmd) {
+    case MFI_CMD_INIT:
+        frame_status = megasas_init_firmware(s, cmd);
+        break;
+    case MFI_CMD_DCMD:
+        frame_status = megasas_handle_dcmd(s, cmd);
+        break;
+    case MFI_CMD_ABORT:
+        frame_status = megasas_handle_abort(s, cmd);
+        break;
+    case MFI_CMD_PD_SCSI_IO:
+        frame_status = megasas_handle_scsi(s, cmd, 0);
+        break;
+    case MFI_CMD_LD_SCSI_IO:
+        frame_status = megasas_handle_scsi(s, cmd, 1);
+        break;
+    case MFI_CMD_LD_READ:
+    case MFI_CMD_LD_WRITE:
+        frame_status = megasas_handle_io(s, cmd);
+        break;
+    default:
+        trace_megasas_unhandled_frame_cmd(cmd->index,
+                                          cmd->frame->header.frame_cmd);
+        s->event_count++;
+        break;
+    }
+    if (frame_status != MFI_STAT_INVALID_STATUS) {
+        if (cmd->frame) {
+            cmd->frame->header.cmd_status = frame_status;
+        } else {
+            megasas_frame_set_cmd_status(frame_addr, frame_status);
+        }
+        megasas_complete_frame(s, cmd->context);
+    }
+}
+
+static uint64_t megasas_mmio_read(void *opaque, target_phys_addr_t addr,
+                                  unsigned size)
+{
+    MegasasState *s = opaque;
+    uint32_t retval = 0;
+
+    switch (addr) {
+    case MFI_IDB:
+        retval = 0;
+        break;
+    case MFI_OMSG0:
+    case MFI_OSP0:
+        retval = (megasas_use_msix(s) ? MFI_FWSTATE_MSIX_SUPPORTED : 0) |
+            (s->fw_state & MFI_FWSTATE_MASK) |
+            ((s->fw_sge & 0xff) << 16) |
+            (s->fw_cmds & 0xFFFF);
+        break;
+    case MFI_OSTS:
+        if (megasas_intr_enabled(s) && s->doorbell) {
+            retval = MFI_1078_RM | 1;
+        }
+        break;
+    case MFI_OMSK:
+        retval = s->intr_mask;
+        break;
+    case MFI_ODCR0:
+        retval = s->doorbell;
+        break;
+    default:
+        trace_megasas_mmio_invalid_readl(addr);
+        break;
+    }
+    trace_megasas_mmio_readl(addr, retval);
+    return retval;
+}
+
+static void megasas_mmio_write(void *opaque, target_phys_addr_t addr,
+                               uint64_t val, unsigned size)
+{
+    MegasasState *s = opaque;
+    uint64_t frame_addr;
+    uint32_t frame_count;
+    int i;
+
+    trace_megasas_mmio_writel(addr, val);
+    switch (addr) {
+    case MFI_IDB:
+        if (val & MFI_FWINIT_ABORT) {
+            /* Abort all pending cmds */
+            for (i = 0; i < s->fw_cmds; i++) {
+                megasas_abort_command(&s->frames[i]);
+            }
+        }
+        if (val & MFI_FWINIT_READY) {
+            /* move to FW READY */
+            megasas_soft_reset(s);
+        }
+        if (val & MFI_FWINIT_MFIMODE) {
+            /* discard MFIs */
+        }
+        break;
+    case MFI_OMSK:
+        s->intr_mask = val;
+        if (!megasas_intr_enabled(s) && !msix_enabled(&s->dev)) {
+            trace_megasas_irq_lower();
+            qemu_irq_lower(s->dev.irq[0]);
+        }
+        if (megasas_intr_enabled(s)) {
+            trace_megasas_intr_enabled();
+        } else {
+            trace_megasas_intr_disabled();
+        }
+        break;
+    case MFI_ODCR0:
+        s->doorbell = 0;
+        if (s->producer_pa && megasas_intr_enabled(s)) {
+            /* Update reply queue pointer */
+            trace_megasas_qf_update(s->reply_queue_head, s->busy);
+            stl_le_phys(s->producer_pa, s->reply_queue_head);
+            if (!msix_enabled(&s->dev)) {
+                trace_megasas_irq_lower();
+                qemu_irq_lower(s->dev.irq[0]);
+            }
+        }
+        break;
+    case MFI_IQPH:
+        /* Received high 32 bits of a 64 bit MFI frame address */
+        s->frame_hi = val;
+        break;
+    case MFI_IQPL:
+        /* Received low 32 bits of a 64 bit MFI frame address */
+    case MFI_IQP:
+        /* Received 32 bit MFI frame address */
+        frame_addr = (val & ~0x1F);
+        /* Add possible 64 bit offset */
+        frame_addr |= ((uint64_t)s->frame_hi << 32);
+        s->frame_hi = 0;
+        frame_count = (val >> 1) & 0xF;
+        megasas_handle_frame(s, frame_addr, frame_count);
+        break;
+    default:
+        trace_megasas_mmio_invalid_writel(addr, val);
+        break;
+    }
+}
+
+static const MemoryRegionOps megasas_mmio_ops = {
+    .read = megasas_mmio_read,
+    .write = megasas_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    }
+};
+
+static uint64_t megasas_port_read(void *opaque, target_phys_addr_t addr,
+                                  unsigned size)
+{
+    return megasas_mmio_read(opaque, addr & 0xff, size);
+}
+
+static void megasas_port_write(void *opaque, target_phys_addr_t addr,
+                               uint64_t val, unsigned size)
+{
+    megasas_mmio_write(opaque, addr & 0xff, val, size);
+}
+
+static const MemoryRegionOps megasas_port_ops = {
+    .read = megasas_port_read,
+    .write = megasas_port_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    }
+};
+
+static uint64_t megasas_queue_read(void *opaque, target_phys_addr_t addr,
+                                   unsigned size)
+{
+    return 0;
+}
+
+static const MemoryRegionOps megasas_queue_ops = {
+    .read = megasas_queue_read,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    }
+};
+
+static void megasas_soft_reset(MegasasState *s)
+{
+    int i;
+    MegasasCmd *cmd;
+
+    trace_megasas_reset();
+    for (i = 0; i < s->fw_cmds; i++) {
+        cmd = &s->frames[i];
+        megasas_abort_command(cmd);
+    }
+    megasas_reset_frames(s);
+    s->reply_queue_len = s->fw_cmds;
+    s->reply_queue_pa = 0;
+    s->consumer_pa = 0;
+    s->producer_pa = 0;
+    s->fw_state = MFI_FWSTATE_READY;
+    s->doorbell = 0;
+    s->intr_mask = MEGASAS_INTR_DISABLED_MASK;
+    s->frame_hi = 0;
+    s->flags &= ~MEGASAS_MASK_USE_QUEUE64;
+    s->event_count++;
+    s->boot_event = s->event_count;
+}
+
+static void megasas_scsi_reset(DeviceState *dev)
+{
+    MegasasState *s = DO_UPCAST(MegasasState, dev.qdev, dev);
+
+    megasas_soft_reset(s);
+}
+
+static const VMStateDescription vmstate_megasas = {
+    .name = "megasas",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .minimum_version_id_old = 0,
+    .fields      = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(dev, MegasasState),
+
+        VMSTATE_INT32(fw_state, MegasasState),
+        VMSTATE_INT32(intr_mask, MegasasState),
+        VMSTATE_INT32(doorbell, MegasasState),
+        VMSTATE_UINT64(reply_queue_pa, MegasasState),
+        VMSTATE_UINT64(consumer_pa, MegasasState),
+        VMSTATE_UINT64(producer_pa, MegasasState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void megasas_scsi_uninit(PCIDevice *d)
+{
+    MegasasState *s = DO_UPCAST(MegasasState, dev, d);
+
+#ifdef USE_MSIX
+    msix_uninit(&s->dev, &s->mmio_io);
+#endif
+    memory_region_destroy(&s->mmio_io);
+    memory_region_destroy(&s->port_io);
+    memory_region_destroy(&s->queue_io);
+}
+
+static const struct SCSIBusInfo megasas_scsi_info = {
+    .tcq = true,
+    .max_target = MFI_MAX_LD,
+    .max_lun = 255,
+
+    .transfer_data = megasas_xfer_complete,
+    .get_sg_list = megasas_get_sg_list,
+    .complete = megasas_command_complete,
+    .cancel = megasas_command_cancel,
+};
+
+static int megasas_scsi_init(PCIDevice *dev)
+{
+    MegasasState *s = DO_UPCAST(MegasasState, dev, dev);
+    uint8_t *pci_conf;
+    int i, bar_type;
+
+    pci_conf = s->dev.config;
+
+    /* PCI latency timer = 0 */
+    pci_conf[PCI_LATENCY_TIMER] = 0;
+    /* Interrupt pin 1 */
+    pci_conf[PCI_INTERRUPT_PIN] = 0x01;
+
+    memory_region_init_io(&s->mmio_io, &megasas_mmio_ops, s,
+                          "megasas-mmio", 0x4000);
+    memory_region_init_io(&s->port_io, &megasas_port_ops, s,
+                          "megasas-io", 256);
+    memory_region_init_io(&s->queue_io, &megasas_queue_ops, s,
+                          "megasas-queue", 0x40000);
+
+#ifdef USE_MSIX
+    /* MSI-X support is currently broken */
+    if (megasas_use_msix(s) &&
+        msix_init(&s->dev, 15, &s->mmio_io, 0, 0x2000)) {
+        s->flags &= ~MEGASAS_MASK_USE_MSIX;
+    }
+#else
+    s->flags &= ~MEGASAS_MASK_USE_MSIX;
+#endif
+
+    bar_type = PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64;
+    pci_register_bar(&s->dev, 0, bar_type, &s->mmio_io);
+    pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_IO, &s->port_io);
+    pci_register_bar(&s->dev, 3, bar_type, &s->queue_io);
+
+    if (megasas_use_msix(s)) {
+        msix_vector_use(&s->dev, 0);
+    }
+
+    if (!s->sas_addr) {
+        s->sas_addr = ((NAA_LOCALLY_ASSIGNED_ID << 24) |
+                       IEEE_COMPANY_LOCALLY_ASSIGNED) << 36;
+        s->sas_addr |= (pci_bus_num(dev->bus) << 16);
+        s->sas_addr |= (PCI_SLOT(dev->devfn) << 8);
+        s->sas_addr |= PCI_FUNC(dev->devfn);
+    }
+    if (s->fw_sge >= MEGASAS_MAX_SGE - MFI_PASS_FRAME_SIZE) {
+        s->fw_sge = MEGASAS_MAX_SGE - MFI_PASS_FRAME_SIZE;
+    } else if (s->fw_sge >= 128 - MFI_PASS_FRAME_SIZE) {
+        s->fw_sge = 128 - MFI_PASS_FRAME_SIZE;
+    } else {
+        s->fw_sge = 64 - MFI_PASS_FRAME_SIZE;
+    }
+    if (s->fw_cmds > MEGASAS_MAX_FRAMES) {
+        s->fw_cmds = MEGASAS_MAX_FRAMES;
+    }
+    trace_megasas_init(s->fw_sge, s->fw_cmds,
+                       megasas_use_msix(s) ? "MSI-X" : "INTx",
+                       megasas_is_jbod(s) ? "jbod" : "raid");
+    s->fw_luns = (MFI_MAX_LD > MAX_SCSI_DEVS) ?
+        MAX_SCSI_DEVS : MFI_MAX_LD;
+    s->producer_pa = 0;
+    s->consumer_pa = 0;
+    for (i = 0; i < s->fw_cmds; i++) {
+        s->frames[i].index = i;
+        s->frames[i].context = -1;
+        s->frames[i].pa = 0;
+        s->frames[i].state = s;
+    }
+
+    scsi_bus_new(&s->bus, &dev->qdev, &megasas_scsi_info);
+    scsi_bus_legacy_handle_cmdline(&s->bus);
+    return 0;
+}
+
+static Property megasas_properties[] = {
+    DEFINE_PROP_UINT32("max_sge", MegasasState, fw_sge,
+                       MEGASAS_DEFAULT_SGE),
+    DEFINE_PROP_UINT32("max_cmds", MegasasState, fw_cmds,
+                       MEGASAS_DEFAULT_FRAMES),
+    DEFINE_PROP_HEX64("sas_address", MegasasState, sas_addr, 0),
+#ifdef USE_MSIX
+    DEFINE_PROP_BIT("use_msix", MegasasState, flags,
+                    MEGASAS_FLAG_USE_MSIX, false),
+#endif
+    DEFINE_PROP_BIT("use_jbod", MegasasState, flags,
+                    MEGASAS_FLAG_USE_JBOD, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void megasas_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
+
+    pc->init = megasas_scsi_init;
+    pc->exit = megasas_scsi_uninit;
+    pc->vendor_id = PCI_VENDOR_ID_LSI_LOGIC;
+    pc->device_id = PCI_DEVICE_ID_LSI_SAS1078;
+    pc->subsystem_vendor_id = PCI_VENDOR_ID_LSI_LOGIC;
+    pc->subsystem_id = 0x1013;
+    pc->class_id = PCI_CLASS_STORAGE_RAID;
+    dc->props = megasas_properties;
+    dc->reset = megasas_scsi_reset;
+    dc->vmsd = &vmstate_megasas;
+    dc->desc = "LSI MegaRAID SAS 1078";
+}
+
+static const TypeInfo megasas_info = {
+    .name  = "megasas",
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(MegasasState),
+    .class_init = megasas_class_init,
+};
+
+static void megasas_register_types(void)
+{
+    type_register_static(&megasas_info);
+}
+
+type_init(megasas_register_types)
diff --git a/hw/mfi.h b/hw/mfi.h
new file mode 100644
index 0000000000..436b6906b1
--- /dev/null
+++ b/hw/mfi.h
@@ -0,0 +1,1249 @@
+/*
+ * NetBSD header file, copied from
+ * http://gitorious.org/freebsd/freebsd/blobs/HEAD/sys/dev/mfi/mfireg.h
+ */
+/*-
+ * Copyright (c) 2006 IronPort Systems
+ * Copyright (c) 2007 LSI Corp.
+ * Copyright (c) 2007 Rajesh Prabhakaran.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef MFI_REG_H
+#define MFI_REG_H
+
+/*
+ * MegaRAID SAS MFI firmware definitions
+ */
+
+/*
+ * Start with the register set.  All registers are 32 bits wide.
+ * The usual Intel IOP style setup.
+ */
+#define MFI_IMSG0 0x10    /* Inbound message 0 */
+#define MFI_IMSG1 0x14    /* Inbound message 1 */
+#define MFI_OMSG0 0x18    /* Outbound message 0 */
+#define MFI_OMSG1 0x1c    /* Outbound message 1 */
+#define MFI_IDB   0x20    /* Inbound doorbell */
+#define MFI_ISTS  0x24    /* Inbound interrupt status */
+#define MFI_IMSK  0x28    /* Inbound interrupt mask */
+#define MFI_ODB   0x2c    /* Outbound doorbell */
+#define MFI_OSTS  0x30    /* Outbound interrupt status */
+#define MFI_OMSK  0x34    /* Outbound interrupt mask */
+#define MFI_IQP   0x40    /* Inbound queue port */
+#define MFI_OQP   0x44    /* Outbound queue port */
+
+/*
+ * 1078 specific related register
+ */
+#define MFI_ODR0        0x9c            /* outbound doorbell register0 */
+#define MFI_ODCR0       0xa0            /* outbound doorbell clear register0  */
+#define MFI_OSP0        0xb0            /* outbound scratch pad0  */
+#define MFI_IQPL        0xc0            /* Inbound queue port (low bytes)  */
+#define MFI_IQPH        0xc4            /* Inbound queue port (high bytes)  */
+#define MFI_DIAG        0xf8            /* Host diag */
+#define MFI_SEQ         0xfc            /* Sequencer offset */
+#define MFI_1078_EIM    0x80000004      /* 1078 enable intrrupt mask  */
+#define MFI_RMI         0x2             /* reply message interrupt  */
+#define MFI_1078_RM     0x80000000      /* reply 1078 message interrupt  */
+#define MFI_ODC         0x4             /* outbound doorbell change interrupt */
+
+/*
+ * gen2 specific changes
+ */
+#define MFI_GEN2_EIM    0x00000005      /* gen2 enable interrupt mask */
+#define MFI_GEN2_RM     0x00000001      /* reply gen2 message interrupt */
+
+/*
+ * skinny specific changes
+ */
+#define MFI_SKINNY_IDB  0x00    /* Inbound doorbell is at 0x00 for skinny */
+#define MFI_SKINNY_RM   0x00000001      /* reply skinny message interrupt */
+
+/* Bits for MFI_OSTS */
+#define MFI_OSTS_INTR_VALID     0x00000002
+
+/*
+ * Firmware state values.  Found in OMSG0 during initialization.
+ */
+#define MFI_FWSTATE_MASK                0xf0000000
+#define MFI_FWSTATE_UNDEFINED           0x00000000
+#define MFI_FWSTATE_BB_INIT             0x10000000
+#define MFI_FWSTATE_FW_INIT             0x40000000
+#define MFI_FWSTATE_WAIT_HANDSHAKE      0x60000000
+#define MFI_FWSTATE_FW_INIT_2           0x70000000
+#define MFI_FWSTATE_DEVICE_SCAN         0x80000000
+#define MFI_FWSTATE_BOOT_MSG_PENDING    0x90000000
+#define MFI_FWSTATE_FLUSH_CACHE         0xa0000000
+#define MFI_FWSTATE_READY               0xb0000000
+#define MFI_FWSTATE_OPERATIONAL         0xc0000000
+#define MFI_FWSTATE_FAULT               0xf0000000
+#define MFI_FWSTATE_MAXSGL_MASK         0x00ff0000
+#define MFI_FWSTATE_MAXCMD_MASK         0x0000ffff
+#define MFI_FWSTATE_MSIX_SUPPORTED      0x04000000
+#define MFI_FWSTATE_HOSTMEMREQD_MASK    0x08000000
+
+/*
+ * Control bits to drive the card to ready state.  These go into the IDB
+ * register.
+ */
+#define MFI_FWINIT_ABORT        0x00000001 /* Abort all pending commands */
+#define MFI_FWINIT_READY        0x00000002 /* Move from operational to ready */
+#define MFI_FWINIT_MFIMODE      0x00000004 /* unknown */
+#define MFI_FWINIT_CLEAR_HANDSHAKE 0x00000008 /* Respond to WAIT_HANDSHAKE */
+#define MFI_FWINIT_HOTPLUG      0x00000010
+#define MFI_FWINIT_STOP_ADP     0x00000020 /* Move to operational, stop */
+#define MFI_FWINIT_ADP_RESET    0x00000040 /* Reset ADP */
+
+/* MFI Commands */
+typedef enum {
+    MFI_CMD_INIT = 0x00,
+    MFI_CMD_LD_READ,
+    MFI_CMD_LD_WRITE,
+    MFI_CMD_LD_SCSI_IO,
+    MFI_CMD_PD_SCSI_IO,
+    MFI_CMD_DCMD,
+    MFI_CMD_ABORT,
+    MFI_CMD_SMP,
+    MFI_CMD_STP
+} mfi_cmd_t;
+
+/* Direct commands */
+typedef enum {
+    MFI_DCMD_CTRL_MFI_HOST_MEM_ALLOC =  0x0100e100,
+    MFI_DCMD_CTRL_GET_INFO =            0x01010000,
+    MFI_DCMD_CTRL_GET_PROPERTIES =      0x01020100,
+    MFI_DCMD_CTRL_SET_PROPERTIES =      0x01020200,
+    MFI_DCMD_CTRL_ALARM =               0x01030000,
+    MFI_DCMD_CTRL_ALARM_GET =           0x01030100,
+    MFI_DCMD_CTRL_ALARM_ENABLE =        0x01030200,
+    MFI_DCMD_CTRL_ALARM_DISABLE =       0x01030300,
+    MFI_DCMD_CTRL_ALARM_SILENCE =       0x01030400,
+    MFI_DCMD_CTRL_ALARM_TEST =          0x01030500,
+    MFI_DCMD_CTRL_EVENT_GETINFO =       0x01040100,
+    MFI_DCMD_CTRL_EVENT_CLEAR =         0x01040200,
+    MFI_DCMD_CTRL_EVENT_GET =           0x01040300,
+    MFI_DCMD_CTRL_EVENT_COUNT =         0x01040400,
+    MFI_DCMD_CTRL_EVENT_WAIT =          0x01040500,
+    MFI_DCMD_CTRL_SHUTDOWN =            0x01050000,
+    MFI_DCMD_HIBERNATE_STANDBY =        0x01060000,
+    MFI_DCMD_CTRL_GET_TIME =            0x01080101,
+    MFI_DCMD_CTRL_SET_TIME =            0x01080102,
+    MFI_DCMD_CTRL_BIOS_DATA_GET =       0x010c0100,
+    MFI_DCMD_CTRL_BIOS_DATA_SET =       0x010c0200,
+    MFI_DCMD_CTRL_FACTORY_DEFAULTS =    0x010d0000,
+    MFI_DCMD_CTRL_MFC_DEFAULTS_GET =    0x010e0201,
+    MFI_DCMD_CTRL_MFC_DEFAULTS_SET =    0x010e0202,
+    MFI_DCMD_CTRL_CACHE_FLUSH =         0x01101000,
+    MFI_DCMD_PD_GET_LIST =              0x02010000,
+    MFI_DCMD_PD_LIST_QUERY =            0x02010100,
+    MFI_DCMD_PD_GET_INFO =              0x02020000,
+    MFI_DCMD_PD_STATE_SET =             0x02030100,
+    MFI_DCMD_PD_REBUILD =               0x02040100,
+    MFI_DCMD_PD_BLINK =                 0x02070100,
+    MFI_DCMD_PD_UNBLINK =               0x02070200,
+    MFI_DCMD_LD_GET_LIST =              0x03010000,
+    MFI_DCMD_LD_GET_INFO =              0x03020000,
+    MFI_DCMD_LD_GET_PROP =              0x03030000,
+    MFI_DCMD_LD_SET_PROP =              0x03040000,
+    MFI_DCMD_LD_DELETE =                0x03090000,
+    MFI_DCMD_CFG_READ =                 0x04010000,
+    MFI_DCMD_CFG_ADD =                  0x04020000,
+    MFI_DCMD_CFG_CLEAR =                0x04030000,
+    MFI_DCMD_CFG_FOREIGN_READ =         0x04060100,
+    MFI_DCMD_CFG_FOREIGN_IMPORT =       0x04060400,
+    MFI_DCMD_BBU_STATUS =               0x05010000,
+    MFI_DCMD_BBU_CAPACITY_INFO =        0x05020000,
+    MFI_DCMD_BBU_DESIGN_INFO =          0x05030000,
+    MFI_DCMD_BBU_PROP_GET =             0x05050100,
+    MFI_DCMD_CLUSTER =                  0x08000000,
+    MFI_DCMD_CLUSTER_RESET_ALL =        0x08010100,
+    MFI_DCMD_CLUSTER_RESET_LD =         0x08010200
+} mfi_dcmd_t;
+
+/* Modifiers for MFI_DCMD_CTRL_FLUSHCACHE */
+#define MFI_FLUSHCACHE_CTRL     0x01
+#define MFI_FLUSHCACHE_DISK     0x02
+
+/* Modifiers for MFI_DCMD_CTRL_SHUTDOWN */
+#define MFI_SHUTDOWN_SPINDOWN   0x01
+
+/*
+ * MFI Frame flags
+ */
+typedef enum {
+    MFI_FRAME_DONT_POST_IN_REPLY_QUEUE =        0x0001,
+    MFI_FRAME_SGL64 =                           0x0002,
+    MFI_FRAME_SENSE64 =                         0x0004,
+    MFI_FRAME_DIR_WRITE =                       0x0008,
+    MFI_FRAME_DIR_READ =                        0x0010,
+    MFI_FRAME_IEEE_SGL =                        0x0020,
+} mfi_frame_flags;
+
+/* MFI Status codes */
+typedef enum {
+    MFI_STAT_OK =                       0x00,
+    MFI_STAT_INVALID_CMD,
+    MFI_STAT_INVALID_DCMD,
+    MFI_STAT_INVALID_PARAMETER,
+    MFI_STAT_INVALID_SEQUENCE_NUMBER,
+    MFI_STAT_ABORT_NOT_POSSIBLE,
+    MFI_STAT_APP_HOST_CODE_NOT_FOUND,
+    MFI_STAT_APP_IN_USE,
+    MFI_STAT_APP_NOT_INITIALIZED,
+    MFI_STAT_ARRAY_INDEX_INVALID,
+    MFI_STAT_ARRAY_ROW_NOT_EMPTY,
+    MFI_STAT_CONFIG_RESOURCE_CONFLICT,
+    MFI_STAT_DEVICE_NOT_FOUND,
+    MFI_STAT_DRIVE_TOO_SMALL,
+    MFI_STAT_FLASH_ALLOC_FAIL,
+    MFI_STAT_FLASH_BUSY,
+    MFI_STAT_FLASH_ERROR =              0x10,
+    MFI_STAT_FLASH_IMAGE_BAD,
+    MFI_STAT_FLASH_IMAGE_INCOMPLETE,
+    MFI_STAT_FLASH_NOT_OPEN,
+    MFI_STAT_FLASH_NOT_STARTED,
+    MFI_STAT_FLUSH_FAILED,
+    MFI_STAT_HOST_CODE_NOT_FOUNT,
+    MFI_STAT_LD_CC_IN_PROGRESS,
+    MFI_STAT_LD_INIT_IN_PROGRESS,
+    MFI_STAT_LD_LBA_OUT_OF_RANGE,
+    MFI_STAT_LD_MAX_CONFIGURED,
+    MFI_STAT_LD_NOT_OPTIMAL,
+    MFI_STAT_LD_RBLD_IN_PROGRESS,
+    MFI_STAT_LD_RECON_IN_PROGRESS,
+    MFI_STAT_LD_WRONG_RAID_LEVEL,
+    MFI_STAT_MAX_SPARES_EXCEEDED,
+    MFI_STAT_MEMORY_NOT_AVAILABLE =     0x20,
+    MFI_STAT_MFC_HW_ERROR,
+    MFI_STAT_NO_HW_PRESENT,
+    MFI_STAT_NOT_FOUND,
+    MFI_STAT_NOT_IN_ENCL,
+    MFI_STAT_PD_CLEAR_IN_PROGRESS,
+    MFI_STAT_PD_TYPE_WRONG,
+    MFI_STAT_PR_DISABLED,
+    MFI_STAT_ROW_INDEX_INVALID,
+    MFI_STAT_SAS_CONFIG_INVALID_ACTION,
+    MFI_STAT_SAS_CONFIG_INVALID_DATA,
+    MFI_STAT_SAS_CONFIG_INVALID_PAGE,
+    MFI_STAT_SAS_CONFIG_INVALID_TYPE,
+    MFI_STAT_SCSI_DONE_WITH_ERROR,
+    MFI_STAT_SCSI_IO_FAILED,
+    MFI_STAT_SCSI_RESERVATION_CONFLICT,
+    MFI_STAT_SHUTDOWN_FAILED =          0x30,
+    MFI_STAT_TIME_NOT_SET,
+    MFI_STAT_WRONG_STATE,
+    MFI_STAT_LD_OFFLINE,
+    MFI_STAT_PEER_NOTIFICATION_REJECTED,
+    MFI_STAT_PEER_NOTIFICATION_FAILED,
+    MFI_STAT_RESERVATION_IN_PROGRESS,
+    MFI_STAT_I2C_ERRORS_DETECTED,
+    MFI_STAT_PCI_ERRORS_DETECTED,
+    MFI_STAT_DIAG_FAILED,
+    MFI_STAT_BOOT_MSG_PENDING,
+    MFI_STAT_FOREIGN_CONFIG_INCOMPLETE,
+    MFI_STAT_INVALID_SGL,
+    MFI_STAT_UNSUPPORTED_HW,
+    MFI_STAT_CC_SCHEDULE_DISABLED,
+    MFI_STAT_PD_COPYBACK_IN_PROGRESS,
+    MFI_STAT_MULTIPLE_PDS_IN_ARRAY =    0x40,
+    MFI_STAT_FW_DOWNLOAD_ERROR,
+    MFI_STAT_FEATURE_SECURITY_NOT_ENABLED,
+    MFI_STAT_LOCK_KEY_ALREADY_EXISTS,
+    MFI_STAT_LOCK_KEY_BACKUP_NOT_ALLOWED,
+    MFI_STAT_LOCK_KEY_VERIFY_NOT_ALLOWED,
+    MFI_STAT_LOCK_KEY_VERIFY_FAILED,
+    MFI_STAT_LOCK_KEY_REKEY_NOT_ALLOWED,
+    MFI_STAT_LOCK_KEY_INVALID,
+    MFI_STAT_LOCK_KEY_ESCROW_INVALID,
+    MFI_STAT_LOCK_KEY_BACKUP_REQUIRED,
+    MFI_STAT_SECURE_LD_EXISTS,
+    MFI_STAT_LD_SECURE_NOT_ALLOWED,
+    MFI_STAT_REPROVISION_NOT_ALLOWED,
+    MFI_STAT_PD_SECURITY_TYPE_WRONG,
+    MFI_STAT_LD_ENCRYPTION_TYPE_INVALID,
+    MFI_STAT_CONFIG_FDE_NON_FDE_MIX_NOT_ALLOWED = 0x50,
+    MFI_STAT_CONFIG_LD_ENCRYPTION_TYPE_MIX_NOT_ALLOWED,
+    MFI_STAT_SECRET_KEY_NOT_ALLOWED,
+    MFI_STAT_PD_HW_ERRORS_DETECTED,
+    MFI_STAT_LD_CACHE_PINNED,
+    MFI_STAT_POWER_STATE_SET_IN_PROGRESS,
+    MFI_STAT_POWER_STATE_SET_BUSY,
+    MFI_STAT_POWER_STATE_WRONG,
+    MFI_STAT_PR_NO_AVAILABLE_PD_FOUND,
+    MFI_STAT_CTRL_RESET_REQUIRED,
+    MFI_STAT_LOCK_KEY_EKM_NO_BOOT_AGENT,
+    MFI_STAT_SNAP_NO_SPACE,
+    MFI_STAT_SNAP_PARTIAL_FAILURE,
+    MFI_STAT_UPGRADE_KEY_INCOMPATIBLE,
+    MFI_STAT_PFK_INCOMPATIBLE,
+    MFI_STAT_PD_MAX_UNCONFIGURED,
+    MFI_STAT_IO_METRICS_DISABLED =      0x60,
+    MFI_STAT_AEC_NOT_STOPPED,
+    MFI_STAT_PI_TYPE_WRONG,
+    MFI_STAT_LD_PD_PI_INCOMPATIBLE,
+    MFI_STAT_PI_NOT_ENABLED,
+    MFI_STAT_LD_BLOCK_SIZE_MISMATCH,
+    MFI_STAT_INVALID_STATUS =           0xFF
+} mfi_status_t;
+
+/* Event classes */
+typedef enum {
+    MFI_EVT_CLASS_DEBUG =      -2,
+    MFI_EVT_CLASS_PROGRESS =   -1,
+    MFI_EVT_CLASS_INFO =        0,
+    MFI_EVT_CLASS_WARNING =     1,
+    MFI_EVT_CLASS_CRITICAL =    2,
+    MFI_EVT_CLASS_FATAL =       3,
+    MFI_EVT_CLASS_DEAD =        4
+} mfi_evt_class_t;
+
+/* Event locales */
+typedef enum {
+    MFI_EVT_LOCALE_LD =         0x0001,
+    MFI_EVT_LOCALE_PD =         0x0002,
+    MFI_EVT_LOCALE_ENCL =       0x0004,
+    MFI_EVT_LOCALE_BBU =        0x0008,
+    MFI_EVT_LOCALE_SAS =        0x0010,
+    MFI_EVT_LOCALE_CTRL =       0x0020,
+    MFI_EVT_LOCALE_CONFIG =     0x0040,
+    MFI_EVT_LOCALE_CLUSTER =    0x0080,
+    MFI_EVT_LOCALE_ALL =        0xffff
+} mfi_evt_locale_t;
+
+/* Event args */
+typedef enum {
+    MR_EVT_ARGS_NONE =          0x00,
+    MR_EVT_ARGS_CDB_SENSE,
+    MR_EVT_ARGS_LD,
+    MR_EVT_ARGS_LD_COUNT,
+    MR_EVT_ARGS_LD_LBA,
+    MR_EVT_ARGS_LD_OWNER,
+    MR_EVT_ARGS_LD_LBA_PD_LBA,
+    MR_EVT_ARGS_LD_PROG,
+    MR_EVT_ARGS_LD_STATE,
+    MR_EVT_ARGS_LD_STRIP,
+    MR_EVT_ARGS_PD,
+    MR_EVT_ARGS_PD_ERR,
+    MR_EVT_ARGS_PD_LBA,
+    MR_EVT_ARGS_PD_LBA_LD,
+    MR_EVT_ARGS_PD_PROG,
+    MR_EVT_ARGS_PD_STATE,
+    MR_EVT_ARGS_PCI,
+    MR_EVT_ARGS_RATE,
+    MR_EVT_ARGS_STR,
+    MR_EVT_ARGS_TIME,
+    MR_EVT_ARGS_ECC,
+    MR_EVT_ARGS_LD_PROP,
+    MR_EVT_ARGS_PD_SPARE,
+    MR_EVT_ARGS_PD_INDEX,
+    MR_EVT_ARGS_DIAG_PASS,
+    MR_EVT_ARGS_DIAG_FAIL,
+    MR_EVT_ARGS_PD_LBA_LBA,
+    MR_EVT_ARGS_PORT_PHY,
+    MR_EVT_ARGS_PD_MISSING,
+    MR_EVT_ARGS_PD_ADDRESS,
+    MR_EVT_ARGS_BITMAP,
+    MR_EVT_ARGS_CONNECTOR,
+    MR_EVT_ARGS_PD_PD,
+    MR_EVT_ARGS_PD_FRU,
+    MR_EVT_ARGS_PD_PATHINFO,
+    MR_EVT_ARGS_PD_POWER_STATE,
+    MR_EVT_ARGS_GENERIC,
+} mfi_evt_args;
+
+/* Event codes */
+#define MR_EVT_CFG_CLEARED                          0x0004
+#define MR_EVT_CTRL_SHUTDOWN                        0x002a
+#define MR_EVT_LD_STATE_CHANGE                      0x0051
+#define MR_EVT_PD_INSERTED                          0x005b
+#define MR_EVT_PD_REMOVED                           0x0070
+#define MR_EVT_PD_STATE_CHANGED                     0x0072
+#define MR_EVT_LD_CREATED                           0x008a
+#define MR_EVT_LD_DELETED                           0x008b
+#define MR_EVT_FOREIGN_CFG_IMPORTED                 0x00db
+#define MR_EVT_LD_OFFLINE                           0x00fc
+#define MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED         0x0152
+
+typedef enum {
+    MR_LD_CACHE_WRITE_BACK =            0x01,
+    MR_LD_CACHE_WRITE_ADAPTIVE =        0x02,
+    MR_LD_CACHE_READ_AHEAD =            0x04,
+    MR_LD_CACHE_READ_ADAPTIVE =         0x08,
+    MR_LD_CACHE_WRITE_CACHE_BAD_BBU =   0x10,
+    MR_LD_CACHE_ALLOW_WRITE_CACHE =     0x20,
+    MR_LD_CACHE_ALLOW_READ_CACHE =      0x40
+} mfi_ld_cache;
+
+typedef enum {
+    MR_PD_CACHE_UNCHANGED  =    0,
+    MR_PD_CACHE_ENABLE =        1,
+    MR_PD_CACHE_DISABLE =       2
+} mfi_pd_cache;
+
+typedef enum {
+    MR_PD_QUERY_TYPE_ALL =              0,
+    MR_PD_QUERY_TYPE_STATE =            1,
+    MR_PD_QUERY_TYPE_POWER_STATE =      2,
+    MR_PD_QUERY_TYPE_MEDIA_TYPE =       3,
+    MR_PD_QUERY_TYPE_SPEED =            4,
+    MR_PD_QUERY_TYPE_EXPOSED_TO_HOST =  5, /*query for system drives */
+} mfi_pd_query_type;
+
+/*
+ * Other propertities and definitions
+ */
+#define MFI_MAX_PD_CHANNELS     2
+#define MFI_MAX_LD_CHANNELS     2
+#define MFI_MAX_CHANNELS        (MFI_MAX_PD_CHANNELS + MFI_MAX_LD_CHANNELS)
+#define MFI_MAX_CHANNEL_DEVS  128
+#define MFI_DEFAULT_ID         -1
+#define MFI_MAX_LUN             8
+#define MFI_MAX_LD             64
+
+#define MFI_FRAME_SIZE         64
+#define MFI_MBOX_SIZE          12
+
+/* Firmware flashing can take 40s */
+#define MFI_POLL_TIMEOUT_SECS  50
+
+/* Allow for speedier math calculations */
+#define MFI_SECTOR_LEN        512
+
+/* Scatter Gather elements */
+struct mfi_sg32 {
+    uint32_t addr;
+    uint32_t len;
+} QEMU_PACKED;
+
+struct mfi_sg64 {
+    uint64_t addr;
+    uint32_t len;
+} QEMU_PACKED;
+
+struct mfi_sg_skinny {
+    uint64_t addr;
+    uint32_t len;
+    uint32_t flag;
+} QEMU_PACKED;
+
+union mfi_sgl {
+    struct mfi_sg32 sg32[1];
+    struct mfi_sg64 sg64[1];
+    struct mfi_sg_skinny sg_skinny[1];
+} QEMU_PACKED;
+
+/* Message frames.  All messages have a common header */
+struct mfi_frame_header {
+    uint8_t frame_cmd;
+    uint8_t sense_len;
+    uint8_t cmd_status;
+    uint8_t scsi_status;
+    uint8_t target_id;
+    uint8_t lun_id;
+    uint8_t cdb_len;
+    uint8_t sge_count;
+    uint64_t context;
+    uint16_t flags;
+    uint16_t timeout;
+    uint32_t data_len;
+} QEMU_PACKED;
+
+struct mfi_init_frame {
+    struct mfi_frame_header header;
+    uint32_t qinfo_new_addr_lo;
+    uint32_t qinfo_new_addr_hi;
+    uint32_t qinfo_old_addr_lo;
+    uint32_t qinfo_old_addr_hi;
+    uint32_t reserved[6];
+};
+
+#define MFI_IO_FRAME_SIZE 40
+struct mfi_io_frame {
+    struct mfi_frame_header header;
+    uint32_t sense_addr_lo;
+    uint32_t sense_addr_hi;
+    uint32_t lba_lo;
+    uint32_t lba_hi;
+    union mfi_sgl sgl;
+} QEMU_PACKED;
+
+#define MFI_PASS_FRAME_SIZE 48
+struct mfi_pass_frame {
+    struct mfi_frame_header header;
+    uint32_t sense_addr_lo;
+    uint32_t sense_addr_hi;
+    uint8_t cdb[16];
+    union mfi_sgl sgl;
+} QEMU_PACKED;
+
+#define MFI_DCMD_FRAME_SIZE 40
+struct mfi_dcmd_frame {
+    struct mfi_frame_header header;
+    uint32_t opcode;
+    uint8_t mbox[MFI_MBOX_SIZE];
+    union mfi_sgl sgl;
+} QEMU_PACKED;
+
+struct mfi_abort_frame {
+    struct mfi_frame_header header;
+    uint64_t abort_context;
+    uint32_t abort_mfi_addr_lo;
+    uint32_t abort_mfi_addr_hi;
+    uint32_t reserved1[6];
+} QEMU_PACKED;
+
+struct mfi_smp_frame {
+    struct mfi_frame_header header;
+    uint64_t sas_addr;
+    union {
+        struct mfi_sg32 sg32[2];
+        struct mfi_sg64 sg64[2];
+    } sgl;
+} QEMU_PACKED;
+
+struct mfi_stp_frame {
+    struct mfi_frame_header header;
+    uint16_t fis[10];
+    uint32_t stp_flags;
+    union {
+        struct mfi_sg32 sg32[2];
+        struct mfi_sg64 sg64[2];
+    } sgl;
+} QEMU_PACKED;
+
+union mfi_frame {
+    struct mfi_frame_header header;
+    struct mfi_init_frame init;
+    struct mfi_io_frame io;
+    struct mfi_pass_frame pass;
+    struct mfi_dcmd_frame dcmd;
+    struct mfi_abort_frame abort;
+    struct mfi_smp_frame smp;
+    struct mfi_stp_frame stp;
+    uint64_t raw[8];
+    uint8_t bytes[MFI_FRAME_SIZE];
+};
+
+#define MFI_SENSE_LEN 128
+struct mfi_sense {
+    uint8_t     data[MFI_SENSE_LEN];
+};
+
+#define MFI_QUEUE_FLAG_CONTEXT64 0x00000002
+
+/* The queue init structure that is passed with the init message */
+struct mfi_init_qinfo {
+    uint32_t flags;
+    uint32_t rq_entries;
+    uint32_t rq_addr_lo;
+    uint32_t rq_addr_hi;
+    uint32_t pi_addr_lo;
+    uint32_t pi_addr_hi;
+    uint32_t ci_addr_lo;
+    uint32_t ci_addr_hi;
+} QEMU_PACKED;
+
+/* Controller properties */
+struct mfi_ctrl_props {
+    uint16_t seq_num;
+    uint16_t pred_fail_poll_interval;
+    uint16_t intr_throttle_cnt;
+    uint16_t intr_throttle_timeout;
+    uint8_t rebuild_rate;
+    uint8_t patrol_read_rate;
+    uint8_t bgi_rate;
+    uint8_t cc_rate;
+    uint8_t recon_rate;
+    uint8_t cache_flush_interval;
+    uint8_t spinup_drv_cnt;
+    uint8_t spinup_delay;
+    uint8_t cluster_enable;
+    uint8_t coercion_mode;
+    uint8_t alarm_enable;
+    uint8_t disable_auto_rebuild;
+    uint8_t disable_battery_warn;
+    uint8_t ecc_bucket_size;
+    uint16_t ecc_bucket_leak_rate;
+    uint8_t restore_hotspare_on_insertion;
+    uint8_t expose_encl_devices;
+    uint8_t maintainPdFailHistory;
+    uint8_t disallowHostRequestReordering;
+    uint8_t abortCCOnError;
+    uint8_t loadBalanceMode;
+    uint8_t disableAutoDetectBackplane;
+    uint8_t snapVDSpace;
+    uint32_t OnOffProperties;
+/* set TRUE to disable copyBack (0=copyback enabled) */
+#define MFI_CTRL_PROP_CopyBackDisabled           (1 << 0)
+#define MFI_CTRL_PROP_SMARTerEnabled             (1 << 1)
+#define MFI_CTRL_PROP_PRCorrectUnconfiguredAreas (1 << 2)
+#define MFI_CTRL_PROP_UseFdeOnly                 (1 << 3)
+#define MFI_CTRL_PROP_DisableNCQ                 (1 << 4)
+#define MFI_CTRL_PROP_SSDSMARTerEnabled          (1 << 5)
+#define MFI_CTRL_PROP_SSDPatrolReadEnabled       (1 << 6)
+#define MFI_CTRL_PROP_EnableSpinDownUnconfigured (1 << 7)
+#define MFI_CTRL_PROP_AutoEnhancedImport         (1 << 8)
+#define MFI_CTRL_PROP_EnableSecretKeyControl     (1 << 9)
+#define MFI_CTRL_PROP_DisableOnlineCtrlReset     (1 << 10)
+#define MFI_CTRL_PROP_AllowBootWithPinnedCache   (1 << 11)
+#define MFI_CTRL_PROP_DisableSpinDownHS          (1 << 12)
+#define MFI_CTRL_PROP_EnableJBOD                 (1 << 13)
+
+    uint8_t autoSnapVDSpace; /* % of source LD to be
+                              * reserved for auto snapshot
+                              * in snapshot repository, for
+                              * metadata and user data
+                              * 1=5%, 2=10%, 3=15% and so on
+                              */
+    uint8_t viewSpace;       /* snapshot writeable VIEWs
+                              * capacity as a % of source LD
+                              * capacity. 0=READ only
+                              * 1=5%, 2=10%, 3=15% and so on
+                              */
+    uint16_t spinDownTime;    /* # of idle minutes before device
+                               * is spun down (0=use FW defaults)
+                               */
+    uint8_t reserved[24];
+} QEMU_PACKED;
+
+/* PCI information about the card. */
+struct mfi_info_pci {
+    uint16_t vendor;
+    uint16_t device;
+    uint16_t subvendor;
+    uint16_t subdevice;
+    uint8_t reserved[24];
+} QEMU_PACKED;
+
+/* Host (front end) interface information */
+struct mfi_info_host {
+    uint8_t type;
+#define MFI_INFO_HOST_PCIX      0x01
+#define MFI_INFO_HOST_PCIE      0x02
+#define MFI_INFO_HOST_ISCSI     0x04
+#define MFI_INFO_HOST_SAS3G     0x08
+    uint8_t reserved[6];
+    uint8_t port_count;
+    uint64_t port_addr[8];
+} QEMU_PACKED;
+
+/* Device (back end) interface information */
+struct mfi_info_device {
+    uint8_t type;
+#define MFI_INFO_DEV_SPI        0x01
+#define MFI_INFO_DEV_SAS3G      0x02
+#define MFI_INFO_DEV_SATA1      0x04
+#define MFI_INFO_DEV_SATA3G     0x08
+#define MFI_INFO_DEV_PCIE       0x10
+    uint8_t reserved[6];
+    uint8_t port_count;
+    uint64_t port_addr[8];
+} QEMU_PACKED;
+
+/* Firmware component information */
+struct mfi_info_component {
+    char name[8];
+    char version[32];
+    char build_date[16];
+    char build_time[16];
+} QEMU_PACKED;
+
+/* Controller default settings */
+struct mfi_defaults {
+    uint64_t sas_addr;
+    uint8_t phy_polarity;
+    uint8_t background_rate;
+    uint8_t stripe_size;
+    uint8_t flush_time;
+    uint8_t write_back;
+    uint8_t read_ahead;
+    uint8_t cache_when_bbu_bad;
+    uint8_t cached_io;
+    uint8_t smart_mode;
+    uint8_t alarm_disable;
+    uint8_t coercion;
+    uint8_t zrc_config;
+    uint8_t dirty_led_shows_drive_activity;
+    uint8_t bios_continue_on_error;
+    uint8_t spindown_mode;
+    uint8_t allowed_device_types;
+    uint8_t allow_mix_in_enclosure;
+    uint8_t allow_mix_in_ld;
+    uint8_t allow_sata_in_cluster;
+    uint8_t max_chained_enclosures;
+    uint8_t disable_ctrl_r;
+    uint8_t enable_web_bios;
+    uint8_t phy_polarity_split;
+    uint8_t direct_pd_mapping;
+    uint8_t bios_enumerate_lds;
+    uint8_t restored_hot_spare_on_insertion;
+    uint8_t expose_enclosure_devices;
+    uint8_t maintain_pd_fail_history;
+    uint8_t disable_puncture;
+    uint8_t zero_based_enumeration;
+    uint8_t disable_preboot_cli;
+    uint8_t show_drive_led_on_activity;
+    uint8_t cluster_disable;
+    uint8_t sas_disable;
+    uint8_t auto_detect_backplane;
+    uint8_t fde_only;
+    uint8_t delay_during_post;
+    uint8_t resv[19];
+} QEMU_PACKED;
+
+/* Controller default settings */
+struct mfi_bios_data {
+    uint16_t boot_target_id;
+    uint8_t do_not_int_13;
+    uint8_t continue_on_error;
+    uint8_t verbose;
+    uint8_t geometry;
+    uint8_t expose_all_drives;
+    uint8_t reserved[56];
+    uint8_t check_sum;
+} QEMU_PACKED;
+
+/* SAS (?) controller info, returned from MFI_DCMD_CTRL_GETINFO. */
+struct mfi_ctrl_info {
+    struct mfi_info_pci pci;
+    struct mfi_info_host host;
+    struct mfi_info_device device;
+
+    /* Firmware components that are present and active. */
+    uint32_t image_check_word;
+    uint32_t image_component_count;
+    struct mfi_info_component image_component[8];
+
+    /* Firmware components that have been flashed but are inactive */
+    uint32_t pending_image_component_count;
+    struct mfi_info_component pending_image_component[8];
+
+    uint8_t max_arms;
+    uint8_t max_spans;
+    uint8_t max_arrays;
+    uint8_t max_lds;
+    char product_name[80];
+    char serial_number[32];
+    uint32_t hw_present;
+#define MFI_INFO_HW_BBU         0x01
+#define MFI_INFO_HW_ALARM       0x02
+#define MFI_INFO_HW_NVRAM       0x04
+#define MFI_INFO_HW_UART        0x08
+#define MFI_INFO_HW_MEM         0x10
+#define MFI_INFO_HW_FLASH       0x20
+    uint32_t current_fw_time;
+    uint16_t max_cmds;
+    uint16_t max_sg_elements;
+    uint32_t max_request_size;
+    uint16_t lds_present;
+    uint16_t lds_degraded;
+    uint16_t lds_offline;
+    uint16_t pd_present;
+    uint16_t pd_disks_present;
+    uint16_t pd_disks_pred_failure;
+    uint16_t pd_disks_failed;
+    uint16_t nvram_size;
+    uint16_t memory_size;
+    uint16_t flash_size;
+    uint16_t ram_correctable_errors;
+    uint16_t ram_uncorrectable_errors;
+    uint8_t cluster_allowed;
+    uint8_t cluster_active;
+    uint16_t max_strips_per_io;
+
+    uint32_t raid_levels;
+#define MFI_INFO_RAID_0         0x01
+#define MFI_INFO_RAID_1         0x02
+#define MFI_INFO_RAID_5         0x04
+#define MFI_INFO_RAID_1E        0x08
+#define MFI_INFO_RAID_6         0x10
+
+    uint32_t adapter_ops;
+#define MFI_INFO_AOPS_RBLD_RATE         0x0001
+#define MFI_INFO_AOPS_CC_RATE           0x0002
+#define MFI_INFO_AOPS_BGI_RATE          0x0004
+#define MFI_INFO_AOPS_RECON_RATE        0x0008
+#define MFI_INFO_AOPS_PATROL_RATE       0x0010
+#define MFI_INFO_AOPS_ALARM_CONTROL     0x0020
+#define MFI_INFO_AOPS_CLUSTER_SUPPORTED 0x0040
+#define MFI_INFO_AOPS_BBU               0x0080
+#define MFI_INFO_AOPS_SPANNING_ALLOWED  0x0100
+#define MFI_INFO_AOPS_DEDICATED_SPARES  0x0200
+#define MFI_INFO_AOPS_REVERTIBLE_SPARES 0x0400
+#define MFI_INFO_AOPS_FOREIGN_IMPORT    0x0800
+#define MFI_INFO_AOPS_SELF_DIAGNOSTIC   0x1000
+#define MFI_INFO_AOPS_MIXED_ARRAY       0x2000
+#define MFI_INFO_AOPS_GLOBAL_SPARES     0x4000
+
+    uint32_t ld_ops;
+#define MFI_INFO_LDOPS_READ_POLICY      0x01
+#define MFI_INFO_LDOPS_WRITE_POLICY     0x02
+#define MFI_INFO_LDOPS_IO_POLICY        0x04
+#define MFI_INFO_LDOPS_ACCESS_POLICY    0x08
+#define MFI_INFO_LDOPS_DISK_CACHE_POLICY 0x10
+
+    struct {
+        uint8_t min;
+        uint8_t max;
+        uint8_t reserved[2];
+    } QEMU_PACKED stripe_sz_ops;
+
+    uint32_t pd_ops;
+#define MFI_INFO_PDOPS_FORCE_ONLINE     0x01
+#define MFI_INFO_PDOPS_FORCE_OFFLINE    0x02
+#define MFI_INFO_PDOPS_FORCE_REBUILD    0x04
+
+    uint32_t pd_mix_support;
+#define MFI_INFO_PDMIX_SAS              0x01
+#define MFI_INFO_PDMIX_SATA             0x02
+#define MFI_INFO_PDMIX_ENCL             0x04
+#define MFI_INFO_PDMIX_LD               0x08
+#define MFI_INFO_PDMIX_SATA_CLUSTER     0x10
+
+    uint8_t ecc_bucket_count;
+    uint8_t reserved2[11];
+    struct mfi_ctrl_props properties;
+    char package_version[0x60];
+    uint8_t pad[0x800 - 0x6a0];
+} QEMU_PACKED;
+
+/* keep track of an event. */
+union mfi_evt {
+    struct {
+        uint16_t locale;
+        uint8_t reserved;
+        int8_t class;
+    } members;
+    uint32_t word;
+} QEMU_PACKED;
+
+/* event log state. */
+struct mfi_evt_log_state {
+    uint32_t newest_seq_num;
+    uint32_t oldest_seq_num;
+    uint32_t clear_seq_num;
+    uint32_t shutdown_seq_num;
+    uint32_t boot_seq_num;
+} QEMU_PACKED;
+
+struct mfi_progress {
+    uint16_t progress;
+    uint16_t elapsed_seconds;
+} QEMU_PACKED;
+
+struct mfi_evt_ld {
+    uint16_t target_id;
+    uint8_t ld_index;
+    uint8_t reserved;
+} QEMU_PACKED;
+
+struct mfi_evt_pd {
+    uint16_t device_id;
+    uint8_t enclosure_index;
+    uint8_t slot_number;
+} QEMU_PACKED;
+
+/* event detail, returned from MFI_DCMD_CTRL_EVENT_WAIT. */
+struct mfi_evt_detail {
+    uint32_t seq;
+    uint32_t time;
+    uint32_t code;
+    union mfi_evt class;
+    uint8_t arg_type;
+    uint8_t reserved1[15];
+
+    union {
+        struct {
+            struct mfi_evt_pd pd;
+            uint8_t cdb_len;
+            uint8_t sense_len;
+            uint8_t reserved[2];
+            uint8_t cdb[16];
+            uint8_t sense[64];
+        } cdb_sense;
+
+        struct mfi_evt_ld ld;
+
+        struct {
+            struct mfi_evt_ld ld;
+            uint64_t count;
+        } ld_count;
+
+        struct {
+            uint64_t lba;
+            struct mfi_evt_ld ld;
+        } ld_lba;
+
+        struct {
+            struct mfi_evt_ld ld;
+            uint32_t pre_owner;
+            uint32_t new_owner;
+        } ld_owner;
+
+        struct {
+            uint64_t ld_lba;
+            uint64_t pd_lba;
+            struct mfi_evt_ld ld;
+            struct mfi_evt_pd pd;
+        } ld_lba_pd_lba;
+
+        struct {
+            struct mfi_evt_ld ld;
+            struct mfi_progress prog;
+        } ld_prog;
+
+        struct {
+            struct mfi_evt_ld ld;
+            uint32_t prev_state;
+            uint32_t new_state;
+        } ld_state;
+
+        struct {
+            uint64_t strip;
+            struct mfi_evt_ld ld;
+        } ld_strip;
+
+        struct mfi_evt_pd pd;
+
+        struct {
+            struct mfi_evt_pd pd;
+            uint32_t err;
+        } pd_err;
+
+        struct {
+            uint64_t lba;
+            struct mfi_evt_pd pd;
+        } pd_lba;
+
+        struct {
+            uint64_t lba;
+            struct mfi_evt_pd pd;
+            struct mfi_evt_ld ld;
+        } pd_lba_ld;
+
+        struct {
+            struct mfi_evt_pd pd;
+            struct mfi_progress prog;
+        } pd_prog;
+
+        struct {
+            struct mfi_evt_pd ld;
+            uint32_t prev_state;
+            uint32_t new_state;
+        } pd_state;
+
+        struct {
+            uint16_t venderId;
+            uint16_t deviceId;
+            uint16_t subVenderId;
+            uint16_t subDeviceId;
+        } pci;
+
+        uint32_t rate;
+
+        char str[96];
+
+        struct {
+            uint32_t rtc;
+            uint16_t elapsedSeconds;
+        } time;
+
+        struct {
+            uint32_t ecar;
+            uint32_t elog;
+            char str[64];
+        } ecc;
+
+        uint8_t b[96];
+        uint16_t s[48];
+        uint32_t w[24];
+        uint64_t d[12];
+    } args;
+
+    char description[128];
+} QEMU_PACKED;
+
+struct mfi_evt_list {
+    uint32_t count;
+    uint32_t reserved;
+    struct mfi_evt_detail event[1];
+} QEMU_PACKED;
+
+union mfi_pd_ref {
+    struct {
+        uint16_t device_id;
+        uint16_t seq_num;
+    } v;
+    uint32_t ref;
+} QEMU_PACKED;
+
+union mfi_pd_ddf_type {
+    struct {
+        uint16_t pd_type;
+#define MFI_PD_DDF_TYPE_FORCED_PD_GUID (1 << 0)
+#define MFI_PD_DDF_TYPE_IN_VD          (1 << 1)
+#define MFI_PD_DDF_TYPE_IS_GLOBAL_SPARE (1 << 2)
+#define MFI_PD_DDF_TYPE_IS_SPARE        (1 << 3)
+#define MFI_PD_DDF_TYPE_IS_FOREIGN      (1 << 4)
+#define MFI_PD_DDF_TYPE_INTF_SPI        (1 << 12)
+#define MFI_PD_DDF_TYPE_INTF_SAS        (1 << 13)
+#define MFI_PD_DDF_TYPE_INTF_SATA1      (1 << 14)
+#define MFI_PD_DDF_TYPE_INTF_SATA3G     (1 << 15)
+        uint16_t reserved;
+    } ddf;
+    struct {
+        uint32_t reserved;
+    } non_disk;
+    uint32_t type;
+} QEMU_PACKED;
+
+struct mfi_pd_progress {
+    uint32_t active;
+#define PD_PROGRESS_ACTIVE_REBUILD (1 << 0)
+#define PD_PROGRESS_ACTIVE_PATROL  (1 << 1)
+#define PD_PROGRESS_ACTIVE_CLEAR   (1 << 2)
+    struct mfi_progress rbld;
+    struct mfi_progress patrol;
+    struct mfi_progress clear;
+    struct mfi_progress reserved[4];
+} QEMU_PACKED;
+
+struct mfi_pd_info {
+    union mfi_pd_ref ref;
+    uint8_t inquiry_data[96];
+    uint8_t vpd_page83[64];
+    uint8_t not_supported;
+    uint8_t scsi_dev_type;
+    uint8_t connected_port_bitmap;
+    uint8_t device_speed;
+    uint32_t media_err_count;
+    uint32_t other_err_count;
+    uint32_t pred_fail_count;
+    uint32_t last_pred_fail_event_seq_num;
+    uint16_t fw_state;
+    uint8_t disable_for_removal;
+    uint8_t link_speed;
+    union mfi_pd_ddf_type state;
+    struct {
+        uint8_t count;
+        uint8_t is_path_broken;
+        uint8_t reserved[6];
+        uint64_t sas_addr[4];
+    } path_info;
+    uint64_t raw_size;
+    uint64_t non_coerced_size;
+    uint64_t coerced_size;
+    uint16_t encl_device_id;
+    uint8_t encl_index;
+    uint8_t slot_number;
+    struct mfi_pd_progress prog_info;
+    uint8_t bad_block_table_full;
+    uint8_t unusable_in_current_config;
+    uint8_t vpd_page83_ext[64];
+    uint8_t reserved[512-358];
+} QEMU_PACKED;
+
+struct mfi_pd_address {
+    uint16_t device_id;
+    uint16_t encl_device_id;
+    uint8_t encl_index;
+    uint8_t slot_number;
+    uint8_t scsi_dev_type;
+    uint8_t connect_port_bitmap;
+    uint64_t sas_addr[2];
+} QEMU_PACKED;
+
+#define MFI_MAX_SYS_PDS 240
+struct mfi_pd_list {
+    uint32_t size;
+    uint32_t count;
+    struct mfi_pd_address addr[MFI_MAX_SYS_PDS];
+} QEMU_PACKED;
+
+union mfi_ld_ref {
+    struct {
+        uint8_t target_id;
+        uint8_t reserved;
+        uint16_t seq;
+    } v;
+    uint32_t ref;
+} QEMU_PACKED;
+
+struct mfi_ld_list {
+    uint32_t ld_count;
+    uint32_t reserved1;
+    struct {
+        union mfi_ld_ref ld;
+        uint8_t state;
+        uint8_t reserved2[3];
+        uint64_t size;
+    } ld_list[MFI_MAX_LD];
+} QEMU_PACKED;
+
+enum mfi_ld_access {
+    MFI_LD_ACCESS_RW =          0,
+    MFI_LD_ACCSSS_RO =          2,
+    MFI_LD_ACCESS_BLOCKED =     3,
+};
+#define MFI_LD_ACCESS_MASK      3
+
+enum mfi_ld_state {
+    MFI_LD_STATE_OFFLINE =              0,
+    MFI_LD_STATE_PARTIALLY_DEGRADED =   1,
+    MFI_LD_STATE_DEGRADED =             2,
+    MFI_LD_STATE_OPTIMAL =              3
+};
+
+enum mfi_syspd_state {
+    MFI_PD_STATE_UNCONFIGURED_GOOD =    0x00,
+    MFI_PD_STATE_UNCONFIGURED_BAD =     0x01,
+    MFI_PD_STATE_HOT_SPARE =            0x02,
+    MFI_PD_STATE_OFFLINE =              0x10,
+    MFI_PD_STATE_FAILED =               0x11,
+    MFI_PD_STATE_REBUILD =              0x14,
+    MFI_PD_STATE_ONLINE =               0x18,
+    MFI_PD_STATE_COPYBACK =             0x20,
+    MFI_PD_STATE_SYSTEM =               0x40
+};
+
+struct mfi_ld_props {
+    union mfi_ld_ref ld;
+    char name[16];
+    uint8_t default_cache_policy;
+    uint8_t access_policy;
+    uint8_t disk_cache_policy;
+    uint8_t current_cache_policy;
+    uint8_t no_bgi;
+    uint8_t reserved[7];
+} QEMU_PACKED;
+
+struct mfi_ld_params {
+    uint8_t primary_raid_level;
+    uint8_t raid_level_qualifier;
+    uint8_t secondary_raid_level;
+    uint8_t stripe_size;
+    uint8_t num_drives;
+    uint8_t span_depth;
+    uint8_t state;
+    uint8_t init_state;
+    uint8_t is_consistent;
+    uint8_t reserved[23];
+} QEMU_PACKED;
+
+struct mfi_ld_progress {
+    uint32_t            active;
+#define MFI_LD_PROGRESS_CC      (1<<0)
+#define MFI_LD_PROGRESS_BGI     (1<<1)
+#define MFI_LD_PROGRESS_FGI     (1<<2)
+#define MFI_LD_PORGRESS_RECON   (1<<3)
+    struct mfi_progress cc;
+    struct mfi_progress bgi;
+    struct mfi_progress fgi;
+    struct mfi_progress recon;
+    struct mfi_progress reserved[4];
+} QEMU_PACKED;
+
+struct mfi_span {
+    uint64_t start_block;
+    uint64_t num_blocks;
+    uint16_t array_ref;
+    uint8_t reserved[6];
+} QEMU_PACKED;
+
+#define MFI_MAX_SPAN_DEPTH      8
+struct mfi_ld_config {
+    struct mfi_ld_props properties;
+    struct mfi_ld_params params;
+    struct mfi_span span[MFI_MAX_SPAN_DEPTH];
+} QEMU_PACKED;
+
+struct mfi_ld_info {
+    struct mfi_ld_config ld_config;
+    uint64_t size;
+    struct mfi_ld_progress progress;
+    uint16_t cluster_owner;
+    uint8_t reconstruct_active;
+    uint8_t reserved1[1];
+    uint8_t vpd_page83[64];
+    uint8_t reserved2[16];
+} QEMU_PACKED;
+
+union mfi_spare_type {
+    uint8_t flags;
+#define MFI_SPARE_IS_DEDICATED (1 << 0)
+#define MFI_SPARE_IS_REVERTABLE (1 << 1)
+#define MFI_SPARE_IS_ENCL_AFFINITY (1 << 2)
+    uint8_t type;
+} QEMU_PACKED;
+
+#define MFI_MAX_ARRAYS 16
+struct mfi_spare {
+    union mfi_pd_ref ref;
+    union mfi_spare_type spare_type;
+    uint8_t reserved[2];
+    uint8_t array_count;
+    uint16_t array_refd[MFI_MAX_ARRAYS];
+} QEMU_PACKED;
+
+#define MFI_MAX_ROW_SIZE 32
+struct mfi_array {
+    uint64_t size;
+    uint8_t num_drives;
+    uint8_t reserved;
+    uint16_t array_ref;
+    uint8_t pad[20];
+    struct {
+        union mfi_pd_ref ref;
+        uint16_t fw_state; /* enum mfi_syspd_state */
+        struct {
+            uint8_t pd;
+            uint8_t slot;
+        } encl;
+    } pd[MFI_MAX_ROW_SIZE];
+} QEMU_PACKED;
+
+struct mfi_config_data {
+    uint32_t size;
+    uint16_t array_count;
+    uint16_t array_size;
+    uint16_t log_drv_count;
+    uint16_t log_drv_size;
+    uint16_t spares_count;
+    uint16_t spares_size;
+    uint8_t reserved[16];
+    /*
+      struct mfi_array  array[];
+      struct mfi_ld_config ld[];
+      struct mfi_spare  spare[];
+    */
+} QEMU_PACKED;
+
+#define MFI_SCSI_MAX_TARGETS  128
+#define MFI_SCSI_MAX_LUNS       8
+#define MFI_SCSI_INITIATOR_ID 255
+#define MFI_SCSI_MAX_CMDS       8
+#define MFI_SCSI_MAX_CDB_LEN   16
+
+#endif /* MFI_REG_H */
diff --git a/hw/milkymist-minimac2.c b/hw/milkymist-minimac2.c
index 70bf336add..b483a02f21 100644
--- a/hw/milkymist-minimac2.c
+++ b/hw/milkymist-minimac2.c
@@ -278,7 +278,7 @@ static void update_rx_interrupt(MilkymistMinimac2State *s)
     }
 }
 
-static ssize_t minimac2_rx(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t minimac2_rx(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     MilkymistMinimac2State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -408,7 +408,7 @@ static const MemoryRegionOps minimac2_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int minimac2_can_rx(VLANClientState *nc)
+static int minimac2_can_rx(NetClientState *nc)
 {
     MilkymistMinimac2State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -422,7 +422,7 @@ static int minimac2_can_rx(VLANClientState *nc)
     return 0;
 }
 
-static void minimac2_cleanup(VLANClientState *nc)
+static void minimac2_cleanup(NetClientState *nc)
 {
     MilkymistMinimac2State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -448,7 +448,7 @@ static void milkymist_minimac2_reset(DeviceState *d)
 }
 
 static NetClientInfo net_milkymist_minimac2_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = minimac2_can_rx,
     .receive = minimac2_rx,
diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c
index bf1b799c4d..db927f14d0 100644
--- a/hw/mips_jazz.c
+++ b/hw/mips_jazz.c
@@ -239,7 +239,7 @@ static void mips_jazz_init(MemoryRegion *address_space,
             dp83932_init(nd, 0x80001000, 2, get_system_memory(), rc4030[4],
                          rc4030_opaque, rc4030_dma_memory_rw);
             break;
-        } else if (strcmp(nd->model, "?") == 0) {
+        } else if (is_help_option(nd->model)) {
             fprintf(stderr, "qemu: Supported NICs: dp83932\n");
             exit(1);
         } else {
diff --git a/hw/mips_mipssim.c b/hw/mips_mipssim.c
index eb03047433..830f635597 100644
--- a/hw/mips_mipssim.c
+++ b/hw/mips_mipssim.c
@@ -217,7 +217,7 @@ mips_mipssim_init (ram_addr_t ram_size,
     if (serial_hds[0])
         serial_init(0x3f8, env->irq[4], 115200, serial_hds[0]);
 
-    if (nd_table[0].vlan)
+    if (nd_table[0].used)
         /* MIPSnet uses the MIPS CPU INT0, which is interrupt 2. */
         mipsnet_init(0x4200, env->irq[2], &nd_table[0]);
 }
diff --git a/hw/mips_r4k.c b/hw/mips_r4k.c
index d68599965a..967a76e533 100644
--- a/hw/mips_r4k.c
+++ b/hw/mips_r4k.c
@@ -283,7 +283,7 @@ void mips_r4k_init (ram_addr_t ram_size,
 
     isa_vga_init(isa_bus);
 
-    if (nd_table[0].vlan)
+    if (nd_table[0].used)
         isa_ne2000_init(isa_bus, 0x300, 9, &nd_table[0]);
 
     ide_drive_get(hd, MAX_IDE_BUS);
diff --git a/hw/mipsnet.c b/hw/mipsnet.c
index 31072463f4..28063b1106 100644
--- a/hw/mipsnet.c
+++ b/hw/mipsnet.c
@@ -62,7 +62,7 @@ static int mipsnet_buffer_full(MIPSnetState *s)
     return 0;
 }
 
-static int mipsnet_can_receive(VLANClientState *nc)
+static int mipsnet_can_receive(NetClientState *nc)
 {
     MIPSnetState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -71,7 +71,7 @@ static int mipsnet_can_receive(VLANClientState *nc)
     return !mipsnet_buffer_full(s);
 }
 
-static ssize_t mipsnet_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t mipsnet_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     MIPSnetState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -209,7 +209,7 @@ static const VMStateDescription vmstate_mipsnet = {
     }
 };
 
-static void mipsnet_cleanup(VLANClientState *nc)
+static void mipsnet_cleanup(NetClientState *nc)
 {
     MIPSnetState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -217,7 +217,7 @@ static void mipsnet_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_mipsnet_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = mipsnet_can_receive,
     .receive = mipsnet_receive,
diff --git a/hw/msi.c b/hw/msi.c
index 52332041e7..e2273a09ae 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -105,6 +105,23 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
     return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
 }
 
+/*
+ * Special API for POWER to configure the vectors through
+ * a side channel. Should never be used by devices.
+ */
+void msi_set_message(PCIDevice *dev, MSIMessage msg)
+{
+    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
+    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
+
+    if (msi64bit) {
+        pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
+    } else {
+        pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
+    }
+    pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
+}
+
 bool msi_enabled(const PCIDevice *dev)
 {
     return msi_present(dev) &&
diff --git a/hw/msi.h b/hw/msi.h
index 75747abc25..6ec1f99f80 100644
--- a/hw/msi.h
+++ b/hw/msi.h
@@ -31,6 +31,7 @@ struct MSIMessage {
 
 extern bool msi_supported;
 
+void msi_set_message(PCIDevice *dev, MSIMessage msg);
 bool msi_enabled(const PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
              unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
diff --git a/hw/msix.c b/hw/msix.c
index ded3c55b92..800fc32f0b 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -27,17 +27,9 @@
 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
 
-/* How much space does an MSIX table need. */
-/* The spec requires giving the table structure
- * a 4K aligned region all by itself. */
-#define MSIX_PAGE_SIZE 0x1000
-/* Reserve second half of the page for pending bits */
-#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
-#define MSIX_MAX_ENTRIES 32
-
 static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
 {
-    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
+    uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
     MSIMessage msg;
 
     msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
@@ -45,62 +37,17 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
     return msg;
 }
 
-/* Add MSI-X capability to the config space for the device. */
-/* Given a bar and its size, add MSI-X table on top of it
- * and fill MSI-X capability in the config space.
- * Original bar size must be a power of 2 or 0.
- * New bar size is returned. */
-static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
-                           unsigned bar_nr, unsigned bar_size)
-{
-    int config_offset;
-    uint8_t *config;
-    uint32_t new_size;
-
-    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
-        return -EINVAL;
-    if (bar_size > 0x80000000)
-        return -ENOSPC;
-
-    /* Add space for MSI-X structures */
-    if (!bar_size) {
-        new_size = MSIX_PAGE_SIZE;
-    } else if (bar_size < MSIX_PAGE_SIZE) {
-        bar_size = MSIX_PAGE_SIZE;
-        new_size = MSIX_PAGE_SIZE * 2;
-    } else {
-        new_size = bar_size * 2;
-    }
-
-    pdev->msix_bar_size = new_size;
-    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
-                                       0, MSIX_CAP_LENGTH);
-    if (config_offset < 0)
-        return config_offset;
-    config = pdev->config + config_offset;
-
-    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
-    /* Table on top of BAR */
-    pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
-    /* Pending bits on top of that */
-    pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
-                 bar_nr);
-    pdev->msix_cap = config_offset;
-    /* Make flags bit writable. */
-    pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
-	    MSIX_MASKALL_MASK;
-    pdev->msix_function_masked = true;
-    return 0;
-}
-
-static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr,
-                               unsigned size)
+/*
+ * Special API for POWER to configure the vectors through
+ * a side channel. Should never be used by devices.
+ */
+void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
 {
-    PCIDevice *dev = opaque;
-    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
-    void *page = dev->msix_table_page;
+    uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
 
-    return pci_get_long(page + offset);
+    pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
+    pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
+    table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
 }
 
 static uint8_t msix_pending_mask(int vector)
@@ -110,7 +57,7 @@ static uint8_t msix_pending_mask(int vector)
 
 static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
 {
-    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+    return dev->msix_pba + vector / 8;
 }
 
 static int msix_is_pending(PCIDevice *dev, int vector)
@@ -131,7 +78,7 @@ static void msix_clr_pending(PCIDevice *dev, int vector)
 static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask)
 {
     unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
-    return fmask || dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
+    return fmask || dev->msix_table[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
 }
 
 static bool msix_is_masked(PCIDevice *dev, int vector)
@@ -210,27 +157,30 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
     }
 }
 
-static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
-                            uint64_t val, unsigned size)
+static uint64_t msix_table_mmio_read(void *opaque, target_phys_addr_t addr,
+                                     unsigned size)
 {
     PCIDevice *dev = opaque;
-    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
-    int vector = offset / PCI_MSIX_ENTRY_SIZE;
-    bool was_masked;
 
-    /* MSI-X page includes a read-only PBA and a writeable Vector Control. */
-    if (vector >= dev->msix_entries_nr) {
-        return;
-    }
+    return pci_get_long(dev->msix_table + addr);
+}
+
+static void msix_table_mmio_write(void *opaque, target_phys_addr_t addr,
+                                  uint64_t val, unsigned size)
+{
+    PCIDevice *dev = opaque;
+    int vector = addr / PCI_MSIX_ENTRY_SIZE;
+    bool was_masked;
 
     was_masked = msix_is_masked(dev, vector);
-    pci_set_long(dev->msix_table_page + offset, val);
+    pci_set_long(dev->msix_table + addr, val);
     msix_handle_mask_update(dev, vector, was_masked);
 }
 
-static const MemoryRegionOps msix_mmio_ops = {
-    .read = msix_mmio_read,
-    .write = msix_mmio_write,
+static const MemoryRegionOps msix_table_mmio_ops = {
+    .read = msix_table_mmio_read,
+    .write = msix_table_mmio_write,
+    /* TODO: MSIX should be LITTLE_ENDIAN. */
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid = {
         .min_access_size = 4,
@@ -238,17 +188,24 @@ static const MemoryRegionOps msix_mmio_ops = {
     },
 };
 
-static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
+static uint64_t msix_pba_mmio_read(void *opaque, target_phys_addr_t addr,
+                                   unsigned size)
 {
-    uint8_t *config = d->config + d->msix_cap;
-    uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
-    uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
-    /* TODO: for assigned devices, we'll want to make it possible to map
-     * pending bits separately in case they are in a separate bar. */
+    PCIDevice *dev = opaque;
 
-    memory_region_add_subregion(bar, offset, &d->msix_mmio);
+    return pci_get_long(dev->msix_pba + addr);
 }
 
+static const MemoryRegionOps msix_pba_mmio_ops = {
+    .read = msix_pba_mmio_read,
+    /* TODO: MSIX should be LITTLE_ENDIAN. */
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
 static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
 {
     int vector;
@@ -258,52 +215,119 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
             vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
         bool was_masked = msix_is_masked(dev, vector);
 
-        dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+        dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
         msix_handle_mask_update(dev, vector, was_masked);
     }
 }
 
-/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
- * modified, it should be retrieved with msix_bar_size. */
+/* Initialize the MSI-X structures */
 int msix_init(struct PCIDevice *dev, unsigned short nentries,
-              MemoryRegion *bar,
-              unsigned bar_nr, unsigned bar_size)
+              MemoryRegion *table_bar, uint8_t table_bar_nr,
+              unsigned table_offset, MemoryRegion *pba_bar,
+              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
 {
-    int ret;
+    int cap;
+    unsigned table_size, pba_size;
+    uint8_t *config;
 
     /* Nothing to do if MSI is not supported by interrupt controller */
     if (!msi_supported) {
         return -ENOTSUP;
     }
-    if (nentries > MSIX_MAX_ENTRIES)
+
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
         return -EINVAL;
+    }
 
-    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
-                                        sizeof *dev->msix_entry_used);
+    table_size = nentries * PCI_MSIX_ENTRY_SIZE;
+    pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
 
-    dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE);
-    msix_mask_all(dev, nentries);
+    /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
+    if ((table_bar_nr == pba_bar_nr &&
+         ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
+        table_offset + table_size > memory_region_size(table_bar) ||
+        pba_offset + pba_size > memory_region_size(pba_bar) ||
+        (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
+        return -EINVAL;
+    }
 
-    memory_region_init_io(&dev->msix_mmio, &msix_mmio_ops, dev,
-                          "msix", MSIX_PAGE_SIZE);
+    cap = pci_add_capability(dev, PCI_CAP_ID_MSIX, cap_pos, MSIX_CAP_LENGTH);
+    if (cap < 0) {
+        return cap;
+    }
+
+    dev->msix_cap = cap;
+    dev->cap_present |= QEMU_PCI_CAP_MSIX;
+    config = dev->config + cap;
 
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
     dev->msix_entries_nr = nentries;
-    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
-    if (ret)
-        goto err_config;
+    dev->msix_function_masked = true;
+
+    pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr);
+    pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr);
+
+    /* Make flags bit writable. */
+    dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
+                                             MSIX_MASKALL_MASK;
+
+    dev->msix_table = g_malloc0(table_size);
+    dev->msix_pba = g_malloc0(pba_size);
+    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
+
+    msix_mask_all(dev, nentries);
+
+    memory_region_init_io(&dev->msix_table_mmio, &msix_table_mmio_ops, dev,
+                          "msix-table", table_size);
+    memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
+    memory_region_init_io(&dev->msix_pba_mmio, &msix_pba_mmio_ops, dev,
+                          "msix-pba", pba_size);
+    memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
 
-    dev->cap_present |= QEMU_PCI_CAP_MSIX;
-    msix_mmio_setup(dev, bar);
     return 0;
+}
 
-err_config:
-    dev->msix_entries_nr = 0;
-    memory_region_destroy(&dev->msix_mmio);
-    g_free(dev->msix_table_page);
-    dev->msix_table_page = NULL;
-    g_free(dev->msix_entry_used);
-    dev->msix_entry_used = NULL;
-    return ret;
+int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
+                            uint8_t bar_nr)
+{
+    int ret;
+    char *name;
+
+    /*
+     * Migration compatibility dictates that this remains a 4k
+     * BAR with the vector table in the lower half and PBA in
+     * the upper half.  Do not use these elsewhere!
+     */
+#define MSIX_EXCLUSIVE_BAR_SIZE 4096
+#define MSIX_EXCLUSIVE_BAR_TABLE_OFFSET 0
+#define MSIX_EXCLUSIVE_BAR_PBA_OFFSET (MSIX_EXCLUSIVE_BAR_SIZE / 2)
+#define MSIX_EXCLUSIVE_CAP_OFFSET 0
+
+    if (nentries * PCI_MSIX_ENTRY_SIZE > MSIX_EXCLUSIVE_BAR_PBA_OFFSET) {
+        return -EINVAL;
+    }
+
+    if (asprintf(&name, "%s-msix", dev->name) == -1) {
+        return -ENOMEM;
+    }
+
+    memory_region_init(&dev->msix_exclusive_bar, name, MSIX_EXCLUSIVE_BAR_SIZE);
+
+    free(name);
+
+    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr,
+                    MSIX_EXCLUSIVE_BAR_TABLE_OFFSET, &dev->msix_exclusive_bar,
+                    bar_nr, MSIX_EXCLUSIVE_BAR_PBA_OFFSET,
+                    MSIX_EXCLUSIVE_CAP_OFFSET);
+    if (ret) {
+        memory_region_destroy(&dev->msix_exclusive_bar);
+        return ret;
+    }
+
+    pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
+                     &dev->msix_exclusive_bar);
+
+    return 0;
 }
 
 static void msix_free_irq_entries(PCIDevice *dev)
@@ -317,23 +341,35 @@ static void msix_free_irq_entries(PCIDevice *dev)
 }
 
 /* Clean up resources for the device. */
-int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
+void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
 {
     if (!msix_present(dev)) {
-        return 0;
+        return;
     }
     pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
     dev->msix_cap = 0;
     msix_free_irq_entries(dev);
     dev->msix_entries_nr = 0;
-    memory_region_del_subregion(bar, &dev->msix_mmio);
-    memory_region_destroy(&dev->msix_mmio);
-    g_free(dev->msix_table_page);
-    dev->msix_table_page = NULL;
+    memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
+    memory_region_destroy(&dev->msix_pba_mmio);
+    g_free(dev->msix_pba);
+    dev->msix_pba = NULL;
+    memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
+    memory_region_destroy(&dev->msix_table_mmio);
+    g_free(dev->msix_table);
+    dev->msix_table = NULL;
     g_free(dev->msix_entry_used);
     dev->msix_entry_used = NULL;
     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
-    return 0;
+    return;
+}
+
+void msix_uninit_exclusive_bar(PCIDevice *dev)
+{
+    if (msix_present(dev)) {
+        msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
+        memory_region_destroy(&dev->msix_exclusive_bar);
+    }
 }
 
 void msix_save(PCIDevice *dev, QEMUFile *f)
@@ -344,8 +380,8 @@ void msix_save(PCIDevice *dev, QEMUFile *f)
         return;
     }
 
-    qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
-    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
+    qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
+    qemu_put_buffer(f, dev->msix_pba, (n + 7) / 8);
 }
 
 /* Should be called after restoring the config space. */
@@ -359,8 +395,8 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
     }
 
     msix_free_irq_entries(dev);
-    qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
-    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
+    qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
+    qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8);
     msix_update_function_masked(dev);
 
     for (vector = 0; vector < n; vector++) {
@@ -382,13 +418,6 @@ int msix_enabled(PCIDevice *dev)
          MSIX_ENABLE_MASK);
 }
 
-/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
-uint32_t msix_bar_size(PCIDevice *dev)
-{
-    return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
-        dev->msix_bar_size : 0;
-}
-
 /* Send an MSI-X message */
 void msix_notify(PCIDevice *dev, unsigned vector)
 {
@@ -414,7 +443,8 @@ void msix_reset(PCIDevice *dev)
     msix_free_irq_entries(dev);
     dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
 	    ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
-    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+    memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
+    memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8);
     msix_mask_all(dev, dev->msix_entries_nr);
 }
 
diff --git a/hw/msix.h b/hw/msix.h
index 50aee8221a..15211cb592 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -4,14 +4,19 @@
 #include "qemu-common.h"
 #include "pci.h"
 
-int msix_init(PCIDevice *pdev, unsigned short nentries,
-              MemoryRegion *bar,
-              unsigned bar_nr, unsigned bar_size);
+void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
+int msix_init(PCIDevice *dev, unsigned short nentries,
+              MemoryRegion *table_bar, uint8_t table_bar_nr,
+              unsigned table_offset, MemoryRegion *pba_bar,
+              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos);
+int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
+                            uint8_t bar_nr);
 
-void msix_write_config(PCIDevice *pci_dev, uint32_t address,
-                       uint32_t val, int len);
+void msix_write_config(PCIDevice *dev, uint32_t address, uint32_t val, int len);
 
-int msix_uninit(PCIDevice *d, MemoryRegion *bar);
+void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar,
+                 MemoryRegion *pba_bar);
+void msix_uninit_exclusive_bar(PCIDevice *dev);
 
 unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);
 
@@ -21,8 +26,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f);
 int msix_enabled(PCIDevice *dev);
 int msix_present(PCIDevice *dev);
 
-uint32_t msix_bar_size(PCIDevice *dev);
-
 int msix_vector_use(PCIDevice *dev, unsigned vector);
 void msix_vector_unuse(PCIDevice *dev, unsigned vector);
 void msix_unuse_all_vectors(PCIDevice *dev);
diff --git a/hw/musicpal.c b/hw/musicpal.c
index f14f20d689..ad725b5599 100644
--- a/hw/musicpal.c
+++ b/hw/musicpal.c
@@ -182,12 +182,12 @@ static void eth_rx_desc_get(uint32_t addr, mv88w8618_rx_desc *desc)
     le32_to_cpus(&desc->next);
 }
 
-static int eth_can_receive(VLANClientState *nc)
+static int eth_can_receive(NetClientState *nc)
 {
     return 1;
 }
 
-static ssize_t eth_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     mv88w8618_eth_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
     uint32_t desc_addr;
@@ -366,7 +366,7 @@ static const MemoryRegionOps mv88w8618_eth_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void eth_cleanup(VLANClientState *nc)
+static void eth_cleanup(NetClientState *nc)
 {
     mv88w8618_eth_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -374,7 +374,7 @@ static void eth_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_mv88w8618_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = eth_can_receive,
     .receive = eth_receive,
diff --git a/hw/ne2000-isa.c b/hw/ne2000-isa.c
index a4a783ab89..69982a9abb 100644
--- a/hw/ne2000-isa.c
+++ b/hw/ne2000-isa.c
@@ -36,7 +36,7 @@ typedef struct ISANE2000State {
     NE2000State ne2000;
 } ISANE2000State;
 
-static void isa_ne2000_cleanup(VLANClientState *nc)
+static void isa_ne2000_cleanup(NetClientState *nc)
 {
     NE2000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -44,7 +44,7 @@ static void isa_ne2000_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_ne2000_isa_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = ne2000_can_receive,
     .receive = ne2000_receive,
diff --git a/hw/ne2000.c b/hw/ne2000.c
index d02e60c4a6..15605c478f 100644
--- a/hw/ne2000.c
+++ b/hw/ne2000.c
@@ -165,7 +165,7 @@ static int ne2000_buffer_full(NE2000State *s)
     return 0;
 }
 
-int ne2000_can_receive(VLANClientState *nc)
+int ne2000_can_receive(NetClientState *nc)
 {
     NE2000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -176,7 +176,7 @@ int ne2000_can_receive(VLANClientState *nc)
 
 #define MIN_BUF_SIZE 60
 
-ssize_t ne2000_receive(VLANClientState *nc, const uint8_t *buf, size_t size_)
+ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
 {
     NE2000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
     int size = size_;
@@ -677,15 +677,15 @@ static void ne2000_write(void *opaque, target_phys_addr_t addr,
     NE2000State *s = opaque;
 
     if (addr < 0x10 && size == 1) {
-        return ne2000_ioport_write(s, addr, data);
+        ne2000_ioport_write(s, addr, data);
     } else if (addr == 0x10) {
         if (size <= 2) {
-            return ne2000_asic_ioport_write(s, addr, data);
+            ne2000_asic_ioport_write(s, addr, data);
         } else {
-            return ne2000_asic_ioport_writel(s, addr, data);
+            ne2000_asic_ioport_writel(s, addr, data);
         }
     } else if (addr == 0x1f && size == 1) {
-        return ne2000_reset_ioport_write(s, addr, data);
+        ne2000_reset_ioport_write(s, addr, data);
     }
 }
 
@@ -703,7 +703,7 @@ void ne2000_setup_io(NE2000State *s, unsigned size)
     memory_region_init_io(&s->io, &ne2000_ops, s, "ne2000", size);
 }
 
-static void ne2000_cleanup(VLANClientState *nc)
+static void ne2000_cleanup(NetClientState *nc)
 {
     NE2000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -711,7 +711,7 @@ static void ne2000_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_ne2000_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = ne2000_can_receive,
     .receive = ne2000_receive,
@@ -744,14 +744,13 @@ static int pci_ne2000_init(PCIDevice *pci_dev)
     return 0;
 }
 
-static int pci_ne2000_exit(PCIDevice *pci_dev)
+static void pci_ne2000_exit(PCIDevice *pci_dev)
 {
     PCINE2000State *d = DO_UPCAST(PCINE2000State, dev, pci_dev);
     NE2000State *s = &d->ne2000;
 
     memory_region_destroy(&s->io);
-    qemu_del_vlan_client(&s->nic->nc);
-    return 0;
+    qemu_del_net_client(&s->nic->nc);
 }
 
 static Property ne2000_properties[] = {
diff --git a/hw/ne2000.h b/hw/ne2000.h
index 5fee052194..1e7ab073e3 100644
--- a/hw/ne2000.h
+++ b/hw/ne2000.h
@@ -31,5 +31,5 @@ typedef struct NE2000State {
 void ne2000_setup_io(NE2000State *s, unsigned size);
 extern const VMStateDescription vmstate_ne2000;
 void ne2000_reset(NE2000State *s);
-int ne2000_can_receive(VLANClientState *vc);
-ssize_t ne2000_receive(VLANClientState *vc, const uint8_t *buf, size_t size_);
+int ne2000_can_receive(NetClientState *nc);
+ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_);
diff --git a/hw/omap.h b/hw/omap.h
index 3d98941b72..413851bc34 100644
--- a/hw/omap.h
+++ b/hw/omap.h
@@ -942,13 +942,7 @@ struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sysmem,
                 unsigned long sdram_size,
                 const char *core);
 
-# if TARGET_PHYS_ADDR_BITS == 32
-#  define OMAP_FMT_plx "%#08x"
-# elif TARGET_PHYS_ADDR_BITS == 64
-#  define OMAP_FMT_plx "%#08" PRIx64
-# else
-#  error TARGET_PHYS_ADDR_BITS undefined
-# endif
+#define OMAP_FMT_plx "%#08" TARGET_PRIxPHYS
 
 uint32_t omap_badwidth_read8(void *opaque, target_phys_addr_t addr);
 void omap_badwidth_write8(void *opaque, target_phys_addr_t addr,
diff --git a/hw/opencores_eth.c b/hw/opencores_eth.c
index 350f73173a..8c15969e2b 100644
--- a/hw/opencores_eth.c
+++ b/hw/opencores_eth.c
@@ -311,7 +311,7 @@ static void open_eth_int_source_write(OpenEthState *s,
             s->regs[INT_SOURCE] & s->regs[INT_MASK]);
 }
 
-static void open_eth_set_link_status(VLANClientState *nc)
+static void open_eth_set_link_status(NetClientState *nc)
 {
     OpenEthState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -342,7 +342,7 @@ static void open_eth_reset(void *opaque)
     open_eth_set_link_status(&s->nic->nc);
 }
 
-static int open_eth_can_receive(VLANClientState *nc)
+static int open_eth_can_receive(NetClientState *nc)
 {
     OpenEthState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -351,7 +351,7 @@ static int open_eth_can_receive(VLANClientState *nc)
         (rx_desc(s)->len_flags & RXD_E);
 }
 
-static ssize_t open_eth_receive(VLANClientState *nc,
+static ssize_t open_eth_receive(NetClientState *nc,
         const uint8_t *buf, size_t size)
 {
     OpenEthState *s = DO_UPCAST(NICState, nc, nc)->opaque;
@@ -462,12 +462,12 @@ static ssize_t open_eth_receive(VLANClientState *nc,
     return size;
 }
 
-static void open_eth_cleanup(VLANClientState *nc)
+static void open_eth_cleanup(NetClientState *nc)
 {
 }
 
 static NetClientInfo net_open_eth_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = open_eth_can_receive,
     .receive = open_eth_receive,
diff --git a/hw/openrisc/Makefile.objs b/hw/openrisc/Makefile.objs
new file mode 100644
index 0000000000..38ff8f5d6d
--- /dev/null
+++ b/hw/openrisc/Makefile.objs
@@ -0,0 +1,3 @@
+obj-y = openrisc_pic.o openrisc_sim.o openrisc_timer.o
+
+obj-y := $(addprefix ../,$(obj-y))
diff --git a/hw/openrisc_pic.c b/hw/openrisc_pic.c
new file mode 100644
index 0000000000..aaeb9a9171
--- /dev/null
+++ b/hw/openrisc_pic.c
@@ -0,0 +1,60 @@
+/*
+ * OpenRISC Programmable Interrupt Controller support.
+ *
+ * Copyright (c) 2011-2012 Jia Liu <proljc@gmail.com>
+ *                         Feng Gao <gf91597@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw.h"
+#include "cpu.h"
+
+/* OpenRISC pic handler */
+static void openrisc_pic_cpu_handler(void *opaque, int irq, int level)
+{
+    OpenRISCCPU *cpu = (OpenRISCCPU *)opaque;
+    int i;
+    uint32_t irq_bit = 1 << irq;
+
+    if (irq > 31 || irq < 0) {
+        return;
+    }
+
+    if (level) {
+        cpu->env.picsr |= irq_bit;
+    } else {
+        cpu->env.picsr &= ~irq_bit;
+    }
+
+    for (i = 0; i < 32; i++) {
+        if ((cpu->env.picsr && (1 << i)) && (cpu->env.picmr && (1 << i))) {
+            cpu_interrupt(&cpu->env, CPU_INTERRUPT_HARD);
+        } else {
+            cpu_reset_interrupt(&cpu->env, CPU_INTERRUPT_HARD);
+            cpu->env.picsr &= ~(1 << i);
+        }
+    }
+}
+
+void cpu_openrisc_pic_init(OpenRISCCPU *cpu)
+{
+    int i;
+    qemu_irq *qi;
+    qi = qemu_allocate_irqs(openrisc_pic_cpu_handler, cpu, NR_IRQS);
+
+    for (i = 0; i < NR_IRQS; i++) {
+        cpu->env.irq[i] = qi[i];
+    }
+}
diff --git a/hw/openrisc_sim.c b/hw/openrisc_sim.c
new file mode 100644
index 0000000000..55e97f0959
--- /dev/null
+++ b/hw/openrisc_sim.c
@@ -0,0 +1,150 @@
+/*
+ * OpenRISC simulator for use as an IIS.
+ *
+ * Copyright (c) 2011-2012 Jia Liu <proljc@gmail.com>
+ *                         Feng Gao <gf91597@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw.h"
+#include "boards.h"
+#include "elf.h"
+#include "pc.h"
+#include "loader.h"
+#include "exec-memory.h"
+#include "sysemu.h"
+#include "sysbus.h"
+#include "qtest.h"
+
+#define KERNEL_LOAD_ADDR 0x100
+
+static void main_cpu_reset(void *opaque)
+{
+    OpenRISCCPU *cpu = opaque;
+
+    cpu_reset(CPU(cpu));
+}
+
+static void openrisc_sim_net_init(MemoryRegion *address_space,
+                                  target_phys_addr_t base,
+                                  target_phys_addr_t descriptors,
+                                  qemu_irq irq, NICInfo *nd)
+{
+    DeviceState *dev;
+    SysBusDevice *s;
+
+    dev = qdev_create(NULL, "open_eth");
+    qdev_set_nic_properties(dev, nd);
+    qdev_init_nofail(dev);
+
+    s = sysbus_from_qdev(dev);
+    sysbus_connect_irq(s, 0, irq);
+    memory_region_add_subregion(address_space, base,
+                                sysbus_mmio_get_region(s, 0));
+    memory_region_add_subregion(address_space, descriptors,
+                                sysbus_mmio_get_region(s, 1));
+}
+
+static void cpu_openrisc_load_kernel(ram_addr_t ram_size,
+                                     const char *kernel_filename,
+                                     OpenRISCCPU *cpu)
+{
+    long kernel_size;
+    uint64_t elf_entry;
+    target_phys_addr_t entry;
+
+    if (kernel_filename && !qtest_enabled()) {
+        kernel_size = load_elf(kernel_filename, NULL, NULL,
+                               &elf_entry, NULL, NULL, 1, ELF_MACHINE, 1);
+        entry = elf_entry;
+        if (kernel_size < 0) {
+            kernel_size = load_uimage(kernel_filename,
+                                      &entry, NULL, NULL);
+        }
+        if (kernel_size < 0) {
+            kernel_size = load_image_targphys(kernel_filename,
+                                              KERNEL_LOAD_ADDR,
+                                              ram_size - KERNEL_LOAD_ADDR);
+            entry = KERNEL_LOAD_ADDR;
+        }
+
+        if (kernel_size < 0) {
+            qemu_log("QEMU: couldn't load the kernel '%s'\n",
+                    kernel_filename);
+            exit(1);
+        }
+    }
+
+    cpu->env.pc = entry;
+}
+
+static void openrisc_sim_init(ram_addr_t ram_size,
+                              const char *boot_device,
+                              const char *kernel_filename,
+                              const char *kernel_cmdline,
+                              const char *initrd_filename,
+                              const char *cpu_model)
+{
+   OpenRISCCPU *cpu = NULL;
+    MemoryRegion *ram;
+    int n;
+
+    if (!cpu_model) {
+        cpu_model = "or1200";
+    }
+
+    for (n = 0; n < smp_cpus; n++) {
+        cpu = cpu_openrisc_init(cpu_model);
+        if (cpu == NULL) {
+            qemu_log("Unable to find CPU defineition!\n");
+            exit(1);
+        }
+        qemu_register_reset(main_cpu_reset, cpu);
+        main_cpu_reset(cpu);
+    }
+
+    ram = g_malloc(sizeof(*ram));
+    memory_region_init_ram(ram, "openrisc.ram", ram_size);
+    vmstate_register_ram_global(ram);
+    memory_region_add_subregion(get_system_memory(), 0, ram);
+
+    cpu_openrisc_pic_init(cpu);
+    cpu_openrisc_clock_init(cpu);
+
+    serial_mm_init(get_system_memory(), 0x90000000, 0, cpu->env.irq[2],
+                   115200, serial_hds[0], DEVICE_NATIVE_ENDIAN);
+
+    if (nd_table[0].used) {
+        openrisc_sim_net_init(get_system_memory(), 0x92000000,
+                              0x92000400, cpu->env.irq[4], nd_table);
+    }
+
+    cpu_openrisc_load_kernel(ram_size, kernel_filename, cpu);
+}
+
+static QEMUMachine openrisc_sim_machine = {
+    .name = "or32-sim",
+    .desc = "or32 simulation",
+    .init = openrisc_sim_init,
+    .max_cpus = 1,
+    .is_default = 1,
+};
+
+static void openrisc_sim_machine_init(void)
+{
+    qemu_register_machine(&openrisc_sim_machine);
+}
+
+machine_init(openrisc_sim_machine_init);
diff --git a/hw/openrisc_timer.c b/hw/openrisc_timer.c
new file mode 100644
index 0000000000..7916e61d24
--- /dev/null
+++ b/hw/openrisc_timer.c
@@ -0,0 +1,101 @@
+/*
+ * QEMU OpenRISC timer support
+ *
+ * Copyright (c) 2011-2012 Jia Liu <proljc@gmail.com>
+ *                         Zhizhou Zhang <etouzh@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "hw.h"
+#include "qemu-timer.h"
+
+#define TIMER_FREQ    (20 * 1000 * 1000)    /* 20MHz */
+
+/* The time when TTCR changes */
+static uint64_t last_clk;
+static int is_counting;
+
+void cpu_openrisc_count_update(OpenRISCCPU *cpu)
+{
+    uint64_t now, next;
+    uint32_t wait;
+
+    now = qemu_get_clock_ns(vm_clock);
+    if (!is_counting) {
+        qemu_del_timer(cpu->env.timer);
+        last_clk = now;
+        return;
+    }
+
+    cpu->env.ttcr += (uint32_t)muldiv64(now - last_clk, TIMER_FREQ,
+                                        get_ticks_per_sec());
+    last_clk = now;
+
+    if ((cpu->env.ttmr & TTMR_TP) <= (cpu->env.ttcr & TTMR_TP)) {
+        wait = TTMR_TP - (cpu->env.ttcr & TTMR_TP) + 1;
+        wait += cpu->env.ttmr & TTMR_TP;
+    } else {
+        wait = (cpu->env.ttmr & TTMR_TP) - (cpu->env.ttcr & TTMR_TP);
+    }
+
+    next = now + muldiv64(wait, get_ticks_per_sec(), TIMER_FREQ);
+    qemu_mod_timer(cpu->env.timer, next);
+}
+
+void cpu_openrisc_count_start(OpenRISCCPU *cpu)
+{
+    is_counting = 1;
+    cpu_openrisc_count_update(cpu);
+}
+
+void cpu_openrisc_count_stop(OpenRISCCPU *cpu)
+{
+    is_counting = 0;
+    cpu_openrisc_count_update(cpu);
+}
+
+static void openrisc_timer_cb(void *opaque)
+{
+    OpenRISCCPU *cpu = opaque;
+
+    if ((cpu->env.ttmr & TTMR_IE) &&
+         qemu_timer_expired(cpu->env.timer, qemu_get_clock_ns(vm_clock))) {
+        cpu->env.ttmr |= TTMR_IP;
+        cpu->env.interrupt_request |= CPU_INTERRUPT_TIMER;
+    }
+
+    switch (cpu->env.ttmr & TTMR_M) {
+    case TIMER_NONE:
+        break;
+    case TIMER_INTR:
+        cpu->env.ttcr = 0;
+        cpu_openrisc_count_start(cpu);
+        break;
+    case TIMER_SHOT:
+        cpu_openrisc_count_stop(cpu);
+        break;
+    case TIMER_CONT:
+        cpu_openrisc_count_start(cpu);
+        break;
+    }
+}
+
+void cpu_openrisc_clock_init(OpenRISCCPU *cpu)
+{
+    cpu->env.timer = qemu_new_timer_ns(vm_clock, &openrisc_timer_cb, cpu);
+    cpu->env.ttmr = 0x00000000;
+    cpu->env.ttcr = 0x00000000;
+}
diff --git a/hw/pc.c b/hw/pc.c
index c7e9ab3ee1..81c391cd6a 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -44,10 +44,12 @@
 #include "kvm.h"
 #include "xen.h"
 #include "blockdev.h"
+#include "hw/block-common.h"
 #include "ui/qemu-spice.h"
 #include "memory.h"
 #include "exec-memory.h"
 #include "arch_init.h"
+#include "bitmap.h"
 
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
@@ -216,11 +218,9 @@ static int cmos_get_fd_drive_type(FDriveType fd0)
     return val;
 }
 
-static void cmos_init_hd(int type_ofs, int info_ofs, BlockDriverState *hd,
-                         ISADevice *s)
+static void cmos_init_hd(ISADevice *s, int type_ofs, int info_ofs,
+                         int16_t cylinders, int8_t heads, int8_t sectors)
 {
-    int cylinders, heads, sectors;
-    bdrv_get_geometry_hint(hd, &cylinders, &heads, &sectors);
     rtc_set_memory(s, type_ofs, 47);
     rtc_set_memory(s, info_ofs, cylinders);
     rtc_set_memory(s, info_ofs + 1, cylinders >> 8);
@@ -281,48 +281,42 @@ static int pc_boot_set(void *opaque, const char *boot_device)
 
 typedef struct pc_cmos_init_late_arg {
     ISADevice *rtc_state;
-    BusState *idebus0, *idebus1;
+    BusState *idebus[2];
 } pc_cmos_init_late_arg;
 
 static void pc_cmos_init_late(void *opaque)
 {
     pc_cmos_init_late_arg *arg = opaque;
     ISADevice *s = arg->rtc_state;
+    int16_t cylinders;
+    int8_t heads, sectors;
     int val;
-    BlockDriverState *hd_table[4];
-    int i;
-
-    ide_get_bs(hd_table, arg->idebus0);
-    ide_get_bs(hd_table + 2, arg->idebus1);
+    int i, trans;
 
-    rtc_set_memory(s, 0x12, (hd_table[0] ? 0xf0 : 0) | (hd_table[1] ? 0x0f : 0));
-    if (hd_table[0])
-        cmos_init_hd(0x19, 0x1b, hd_table[0], s);
-    if (hd_table[1])
-        cmos_init_hd(0x1a, 0x24, hd_table[1], s);
+    val = 0;
+    if (ide_get_geometry(arg->idebus[0], 0,
+                         &cylinders, &heads, &sectors) >= 0) {
+        cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors);
+        val |= 0xf0;
+    }
+    if (ide_get_geometry(arg->idebus[0], 1,
+                         &cylinders, &heads, &sectors) >= 0) {
+        cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors);
+        val |= 0x0f;
+    }
+    rtc_set_memory(s, 0x12, val);
 
     val = 0;
     for (i = 0; i < 4; i++) {
-        if (hd_table[i]) {
-            int cylinders, heads, sectors, translation;
-            /* NOTE: bdrv_get_geometry_hint() returns the physical
-                geometry.  It is always such that: 1 <= sects <= 63, 1
-                <= heads <= 16, 1 <= cylinders <= 16383. The BIOS
-                geometry can be different if a translation is done. */
-            translation = bdrv_get_translation_hint(hd_table[i]);
-            if (translation == BIOS_ATA_TRANSLATION_AUTO) {
-                bdrv_get_geometry_hint(hd_table[i], &cylinders, &heads, &sectors);
-                if (cylinders <= 1024 && heads <= 16 && sectors <= 63) {
-                    /* No translation. */
-                    translation = 0;
-                } else {
-                    /* LBA translation. */
-                    translation = 1;
-                }
-            } else {
-                translation--;
-            }
-            val |= translation << (i * 2);
+        /* NOTE: ide_get_geometry() returns the physical
+           geometry.  It is always such that: 1 <= sects <= 63, 1
+           <= heads <= 16, 1 <= cylinders <= 16383. The BIOS
+           geometry can be different if a translation is done. */
+        if (ide_get_geometry(arg->idebus[i / 2], i % 2,
+                             &cylinders, &heads, &sectors) >= 0) {
+            trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1;
+            assert((trans & ~3) == 0);
+            val |= trans << (i * 2);
         }
     }
     rtc_set_memory(s, 0x39, val);
@@ -335,10 +329,8 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
                   ISADevice *floppy, BusState *idebus0, BusState *idebus1,
                   ISADevice *s)
 {
-    int val, nb, nb_heads, max_track, last_sect, i;
+    int val, nb, i;
     FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE };
-    FDriveRate rate;
-    BlockDriverState *fd[MAX_FD];
     static pc_cmos_init_late_arg arg;
 
     /* various important CMOS locations needed by PC/Bochs bios */
@@ -381,13 +373,8 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
 
     /* floppy type */
     if (floppy) {
-        fdc_get_bs(fd, floppy);
         for (i = 0; i < 2; i++) {
-            if (fd[i]) {
-                bdrv_get_floppy_geometry_hint(fd[i], &nb_heads, &max_track,
-                                              &last_sect, FDRIVE_DRV_NONE,
-                                              &fd_type[i], &rate);
-            }
+            fd_type[i] = isa_fdc_get_drive_type(floppy, i);
         }
     }
     val = (cmos_get_fd_drive_type(fd_type[0]) << 4) |
@@ -418,8 +405,8 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
 
     /* hard drives */
     arg.rtc_state = s;
-    arg.idebus0 = idebus0;
-    arg.idebus1 = idebus1;
+    arg.idebus[0] = idebus0;
+    arg.idebus[1] = idebus1;
     qemu_register_reset(pc_cmos_init_late, &arg);
 }
 
@@ -639,7 +626,7 @@ static void *bochs_bios_init(void)
     numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
     for (i = 0; i < max_cpus; i++) {
         for (j = 0; j < nb_numa_nodes; j++) {
-            if (node_cpumask[j] & (1 << i)) {
+            if (test_bit(i, node_cpumask[j])) {
                 numa_fw_cfg[i + 1] = cpu_to_le64(j);
                 break;
             }
@@ -871,12 +858,6 @@ void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
     nb_ne2k++;
 }
 
-int cpu_is_bsp(CPUX86State *env)
-{
-    /* We hard-wire the BSP to the first CPU. */
-    return env->cpu_index == 0;
-}
-
 DeviceState *cpu_get_current_apic(void)
 {
     if (cpu_single_env) {
@@ -924,15 +905,6 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
     }
 }
 
-static void pc_cpu_reset(void *opaque)
-{
-    X86CPU *cpu = opaque;
-    CPUX86State *env = &cpu->env;
-
-    cpu_reset(CPU(cpu));
-    env->halted = !cpu_is_bsp(env);
-}
-
 static X86CPU *pc_new_cpu(const char *cpu_model)
 {
     X86CPU *cpu;
@@ -947,8 +919,7 @@ static X86CPU *pc_new_cpu(const char *cpu_model)
     if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
         env->apic_state = apic_init(env, env->cpuid_apic_id);
     }
-    qemu_register_reset(pc_cpu_reset, cpu);
-    pc_cpu_reset(cpu);
+    cpu_reset(CPU(cpu));
     return cpu;
 }
 
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index eae258cefd..0c0096fd7e 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -349,8 +349,8 @@ static void pc_xen_hvm_init(ram_addr_t ram_size,
 }
 #endif
 
-static QEMUMachine pc_machine_v1_1 = {
-    .name = "pc-1.1",
+static QEMUMachine pc_machine_v1_2 = {
+    .name = "pc-1.2",
     .alias = "pc",
     .desc = "Standard PC",
     .init = pc_init_pci,
@@ -358,7 +358,38 @@ static QEMUMachine pc_machine_v1_1 = {
     .is_default = 1,
 };
 
+#define PC_COMPAT_1_1 \
+        {\
+            .driver   = "VGA",\
+            .property = "vgamem_mb",\
+            .value    = stringify(8),\
+        },{\
+            .driver   = "vmware-svga",\
+            .property = "vgamem_mb",\
+            .value    = stringify(8),\
+        },{\
+            .driver   = "qxl-vga",\
+            .property = "vgamem_mb",\
+            .value    = stringify(8),\
+        },{\
+            .driver   = "qxl",\
+            .property = "vgamem_mb",\
+            .value    = stringify(8),\
+        }
+
+static QEMUMachine pc_machine_v1_1 = {
+    .name = "pc-1.1",
+    .desc = "Standard PC",
+    .init = pc_init_pci,
+    .max_cpus = 255,
+    .compat_props = (GlobalProperty[]) {
+        PC_COMPAT_1_1,
+        { /* end of list */ }
+    },
+};
+
 #define PC_COMPAT_1_0 \
+        PC_COMPAT_1_1,\
         {\
             .driver   = "pc-sysfw",\
             .property = "rom_only",\
@@ -612,6 +643,7 @@ static QEMUMachine xenfv_machine = {
 
 static void pc_machine_init(void)
 {
+    qemu_register_machine(&pc_machine_v1_2);
     qemu_register_machine(&pc_machine_v1_1);
     qemu_register_machine(&pc_machine_v1_0);
     qemu_register_machine(&pc_machine_v0_15);
diff --git a/hw/pci.c b/hw/pci.c
index bdfb3d6540..4d95984807 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -775,6 +775,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
         return NULL;
     }
     pci_dev->bus = bus;
+    if (bus->dma_context_fn) {
+        pci_dev->dma = bus->dma_context_fn(bus, bus->dma_context_opaque, devfn);
+    }
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     pci_dev->irq_state = 0;
@@ -846,15 +849,14 @@ static int pci_unregister_device(DeviceState *dev)
 {
     PCIDevice *pci_dev = PCI_DEVICE(dev);
     PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
-    int ret = 0;
-
-    if (pc->exit)
-        ret = pc->exit(pci_dev);
-    if (ret)
-        return ret;
 
     pci_unregister_io_regions(pci_dev);
     pci_del_option_rom(pci_dev);
+
+    if (pc->exit) {
+        pc->exit(pci_dev);
+    }
+
     do_pci_unregister_device(pci_dev);
     return 0;
 }
@@ -1076,6 +1078,49 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
     pci_change_irq_level(pci_dev, irq_num, change);
 }
 
+/* Special hooks used by device assignment */
+void pci_bus_set_route_irq_fn(PCIBus *bus, pci_route_irq_fn route_intx_to_irq)
+{
+    assert(!bus->parent_dev);
+    bus->route_intx_to_irq = route_intx_to_irq;
+}
+
+PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
+{
+    PCIBus *bus;
+
+    do {
+         bus = dev->bus;
+         pin = bus->map_irq(dev, pin);
+         dev = bus->parent_dev;
+    } while (dev);
+    assert(bus->route_intx_to_irq);
+    return bus->route_intx_to_irq(bus->irq_opaque, pin);
+}
+
+void pci_bus_fire_intx_routing_notifier(PCIBus *bus)
+{
+    PCIDevice *dev;
+    PCIBus *sec;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
+        dev = bus->devices[i];
+        if (dev && dev->intx_routing_notifier) {
+            dev->intx_routing_notifier(dev);
+        }
+        QLIST_FOREACH(sec, &bus->child, sibling) {
+            pci_bus_fire_intx_routing_notifier(sec);
+        }
+    }
+}
+
+void pci_device_set_intx_routing_notifier(PCIDevice *dev,
+                                          PCIINTxRoutingNotifier notifier)
+{
+    dev->intx_routing_notifier = notifier;
+}
+
 /***********************************************************/
 /* monitor info on PCI */
 
@@ -1144,7 +1189,9 @@ static const pci_class_desc pci_class_descriptions[] =
 };
 
 static void pci_for_each_device_under_bus(PCIBus *bus,
-                                          void (*fn)(PCIBus *b, PCIDevice *d))
+                                          void (*fn)(PCIBus *b, PCIDevice *d,
+                                                     void *opaque),
+                                          void *opaque)
 {
     PCIDevice *d;
     int devfn;
@@ -1152,18 +1199,19 @@ static void pci_for_each_device_under_bus(PCIBus *bus,
     for(devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
         d = bus->devices[devfn];
         if (d) {
-            fn(bus, d);
+            fn(bus, d, opaque);
         }
     }
 }
 
 void pci_for_each_device(PCIBus *bus, int bus_num,
-                         void (*fn)(PCIBus *b, PCIDevice *d))
+                         void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
+                         void *opaque)
 {
     bus = pci_find_bus_nr(bus, bus_num);
 
     if (bus) {
-        pci_for_each_device_under_bus(bus, fn);
+        pci_for_each_device_under_bus(bus, fn, opaque);
     }
 }
 
@@ -2021,6 +2069,12 @@ static void pci_device_class_init(ObjectClass *klass, void *data)
     k->props = pci_props;
 }
 
+void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque)
+{
+    bus->dma_context_fn = fn;
+    bus->dma_context_opaque = opaque;
+}
+
 static TypeInfo pci_device_type_info = {
     .name = TYPE_PCI_DEVICE,
     .parent = TYPE_DEVICE,
diff --git a/hw/pci.h b/hw/pci.h
index 7f223c01e1..4b6ab3d190 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -85,7 +85,7 @@ typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev,
                                    uint32_t address, int len);
 typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
                                 pcibus_t addr, pcibus_t size, int type);
-typedef int PCIUnregisterFunc(PCIDevice *pci_dev);
+typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
 
 typedef struct PCIIORegion {
     pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
@@ -141,6 +141,15 @@ enum {
 #define PCI_DEVICE_GET_CLASS(obj) \
      OBJECT_GET_CLASS(PCIDeviceClass, (obj), TYPE_PCI_DEVICE)
 
+typedef struct PCIINTxRoute {
+    enum {
+        PCI_INTX_ENABLED,
+        PCI_INTX_INVERTED,
+        PCI_INTX_DISABLED,
+    } mode;
+    int irq;
+} PCIINTxRoute;
+
 typedef struct PCIDeviceClass {
     DeviceClass parent_class;
 
@@ -173,12 +182,14 @@ typedef struct PCIDeviceClass {
     const char *romfile;
 } PCIDeviceClass;
 
+typedef void (*PCIINTxRoutingNotifier)(PCIDevice *dev);
 typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector,
                                       MSIMessage msg);
 typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector);
 
 struct PCIDevice {
     DeviceState qdev;
+
     /* PCI config space */
     uint8_t *config;
 
@@ -200,6 +211,7 @@ struct PCIDevice {
     int32_t devfn;
     char name[64];
     PCIIORegion io_regions[PCI_NUM_REGIONS];
+    DMAContext *dma;
 
     /* do not access the following fields */
     PCIConfigReadFunc *config_read;
@@ -220,14 +232,16 @@ struct PCIDevice {
     /* MSI-X entries */
     int msix_entries_nr;
 
-    /* Space to store MSIX table */
-    uint8_t *msix_table_page;
-    /* MMIO index used to map MSIX table and pending bit entries. */
-    MemoryRegion msix_mmio;
+    /* Space to store MSIX table & pending bit array */
+    uint8_t *msix_table;
+    uint8_t *msix_pba;
+    /* MemoryRegion container for msix exclusive BAR setup */
+    MemoryRegion msix_exclusive_bar;
+    /* Memory Regions for MSIX table and pending bit entries. */
+    MemoryRegion msix_table_mmio;
+    MemoryRegion msix_pba_mmio;
     /* Reference-count for entries actually in use by driver. */
     unsigned *msix_entry_used;
-    /* Region including the MSI-X table */
-    uint32_t msix_bar_size;
     /* MSIX function mask set or MSIX disabled */
     bool msix_function_masked;
     /* Version id needed for VMState */
@@ -248,6 +262,9 @@ struct PCIDevice {
     MemoryRegion rom;
     uint32_t rom_bar;
 
+    /* INTx routing notifier */
+    PCIINTxRoutingNotifier intx_routing_notifier;
+
     /* MSI-X notifiers */
     MSIVectorUseNotifier msix_vector_use_notifier;
     MSIVectorReleaseNotifier msix_vector_release_notifier;
@@ -276,6 +293,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
 
 typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
 typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
+typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
 
 typedef enum {
     PCI_HOTPLUG_DISABLED,
@@ -304,6 +322,11 @@ PCIBus *pci_register_bus(DeviceState *parent, const char *name,
                          MemoryRegion *address_space_mem,
                          MemoryRegion *address_space_io,
                          uint8_t devfn_min, int nirq);
+void pci_bus_set_route_irq_fn(PCIBus *, pci_route_irq_fn);
+PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin);
+void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
+void pci_device_set_intx_routing_notifier(PCIDevice *dev,
+                                          PCIINTxRoutingNotifier notifier);
 void pci_device_reset(PCIDevice *dev);
 void pci_bus_reset(PCIBus *bus);
 
@@ -312,7 +335,9 @@ PCIDevice *pci_nic_init(NICInfo *nd, const char *default_model,
 PCIDevice *pci_nic_init_nofail(NICInfo *nd, const char *default_model,
                                const char *default_devaddr);
 int pci_bus_num(PCIBus *s);
-void pci_for_each_device(PCIBus *bus, int bus_num, void (*fn)(PCIBus *bus, PCIDevice *d));
+void pci_for_each_device(PCIBus *bus, int bus_num,
+                         void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
+                         void *opaque);
 PCIBus *pci_find_root_bus(int domain);
 int pci_find_domain(const PCIBus *bus);
 PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn);
@@ -324,6 +349,10 @@ int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp,
 
 void pci_device_deassert_intx(PCIDevice *dev);
 
+typedef DMAContext *(*PCIDMAContextFunc)(PCIBus *, void *, int);
+
+void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque);
+
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
 {
@@ -558,10 +587,15 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
 }
 
 /* DMA access functions */
+static inline DMAContext *pci_dma_context(PCIDevice *dev)
+{
+    return dev->dma;
+}
+
 static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
                              void *buf, dma_addr_t len, DMADirection dir)
 {
-    cpu_physical_memory_rw(addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
+    dma_memory_rw(pci_dma_context(dev), addr, buf, len, dir);
     return 0;
 }
 
@@ -581,12 +615,12 @@ static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr,
     static inline uint##_bits##_t ld##_l##_pci_dma(PCIDevice *dev,      \
                                                    dma_addr_t addr)     \
     {                                                                   \
-        return ld##_l##_phys(addr);                                     \
+        return ld##_l##_dma(pci_dma_context(dev), addr);                \
     }                                                                   \
     static inline void st##_s##_pci_dma(PCIDevice *dev,                 \
-                          dma_addr_t addr, uint##_bits##_t val)         \
+                                        dma_addr_t addr, uint##_bits##_t val) \
     {                                                                   \
-        st##_s##_phys(addr, val);                                       \
+        st##_s##_dma(pci_dma_context(dev), addr, val);                  \
     }
 
 PCI_DMA_DEFINE_LDST(ub, b, 8);
@@ -602,25 +636,22 @@ PCI_DMA_DEFINE_LDST(q_be, q_be, 64);
 static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t addr,
                                 dma_addr_t *plen, DMADirection dir)
 {
-    target_phys_addr_t len = *plen;
     void *buf;
 
-    buf = cpu_physical_memory_map(addr, &len, dir == DMA_DIRECTION_FROM_DEVICE);
-    *plen = len;
+    buf = dma_memory_map(pci_dma_context(dev), addr, plen, dir);
     return buf;
 }
 
 static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
                                  DMADirection dir, dma_addr_t access_len)
 {
-    cpu_physical_memory_unmap(buffer, len, dir == DMA_DIRECTION_FROM_DEVICE,
-                              access_len);
+    dma_memory_unmap(pci_dma_context(dev), buffer, len, dir, access_len);
 }
 
 static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev,
                                        int alloc_hint)
 {
-    qemu_sglist_init(qsg, alloc_hint);
+    qemu_sglist_init(qsg, alloc_hint, pci_dma_context(dev));
 }
 
 extern const VMStateDescription vmstate_pci_device;
diff --git a/hw/pci_bridge.c b/hw/pci_bridge.c
index 0916276c4d..5c6455f6fa 100644
--- a/hw/pci_bridge.c
+++ b/hw/pci_bridge.c
@@ -333,7 +333,7 @@ int pci_bridge_initfn(PCIDevice *dev)
 }
 
 /* default qdev clean up function for PCI-to-PCI bridge */
-int pci_bridge_exitfn(PCIDevice *pci_dev)
+void pci_bridge_exitfn(PCIDevice *pci_dev)
 {
     PCIBridge *s = DO_UPCAST(PCIBridge, dev, pci_dev);
     assert(QLIST_EMPTY(&s->sec_bus.child));
@@ -342,7 +342,6 @@ int pci_bridge_exitfn(PCIDevice *pci_dev)
     memory_region_destroy(&s->address_space_mem);
     memory_region_destroy(&s->address_space_io);
     /* qbus_free() is called automatically by qdev_free() */
-    return 0;
 }
 
 /*
diff --git a/hw/pci_bridge.h b/hw/pci_bridge.h
index 84411a69dc..a00accc172 100644
--- a/hw/pci_bridge.h
+++ b/hw/pci_bridge.h
@@ -44,7 +44,7 @@ void pci_bridge_reset_reg(PCIDevice *dev);
 void pci_bridge_reset(DeviceState *qdev);
 
 int pci_bridge_initfn(PCIDevice *pci_dev);
-int pci_bridge_exitfn(PCIDevice *pci_dev);
+void pci_bridge_exitfn(PCIDevice *pci_dev);
 
 
 /*
diff --git a/hw/pci_bridge_dev.c b/hw/pci_bridge_dev.c
index 1cc1d2049c..f7063961a0 100644
--- a/hw/pci_bridge_dev.c
+++ b/hw/pci_bridge_dev.c
@@ -52,7 +52,8 @@ static int pci_bridge_dev_initfn(PCIDevice *dev)
 {
     PCIBridge *br = DO_UPCAST(PCIBridge, dev, dev);
     PCIBridgeDev *bridge_dev = DO_UPCAST(PCIBridgeDev, bridge, br);
-    int err, ret;
+    int err;
+
     pci_bridge_map_irq(br, NULL, pci_bridge_dev_map_irq_fn);
     err = pci_bridge_initfn(dev);
     if (err) {
@@ -86,26 +87,22 @@ slotid_error:
     shpc_cleanup(dev, &bridge_dev->bar);
 shpc_error:
     memory_region_destroy(&bridge_dev->bar);
-    ret = pci_bridge_exitfn(dev);
-    assert(!ret);
+    pci_bridge_exitfn(dev);
 bridge_error:
     return err;
 }
 
-static int pci_bridge_dev_exitfn(PCIDevice *dev)
+static void pci_bridge_dev_exitfn(PCIDevice *dev)
 {
     PCIBridge *br = DO_UPCAST(PCIBridge, dev, dev);
     PCIBridgeDev *bridge_dev = DO_UPCAST(PCIBridgeDev, bridge, br);
-    int ret;
     if (msi_present(dev)) {
         msi_uninit(dev);
     }
     slotid_cap_cleanup(dev);
     shpc_cleanup(dev, &bridge_dev->bar);
     memory_region_destroy(&bridge_dev->bar);
-    ret = pci_bridge_exitfn(dev);
-    assert(!ret);
-    return 0;
+    pci_bridge_exitfn(dev);
 }
 
 static void pci_bridge_dev_write_config(PCIDevice *d,
diff --git a/hw/pci_ids.h b/hw/pci_ids.h
index e8235a7d05..301bf1cd86 100644
--- a/hw/pci_ids.h
+++ b/hw/pci_ids.h
@@ -15,6 +15,7 @@
 
 #define PCI_CLASS_STORAGE_SCSI           0x0100
 #define PCI_CLASS_STORAGE_IDE            0x0101
+#define PCI_CLASS_STORAGE_RAID           0x0104
 #define PCI_CLASS_STORAGE_SATA           0x0106
 #define PCI_CLASS_STORAGE_OTHER          0x0180
 
@@ -47,6 +48,7 @@
 
 #define PCI_VENDOR_ID_LSI_LOGIC          0x1000
 #define PCI_DEVICE_ID_LSI_53C895A        0x0012
+#define PCI_DEVICE_ID_LSI_SAS1078        0x0060
 
 #define PCI_VENDOR_ID_DEC                0x1011
 #define PCI_DEVICE_ID_DEC_21154          0x0026
@@ -57,6 +59,7 @@
 
 #define PCI_VENDOR_ID_AMD                0x1022
 #define PCI_DEVICE_ID_AMD_LANCE          0x2000
+#define PCI_DEVICE_ID_AMD_SCSI           0x2020
 
 #define PCI_VENDOR_ID_TI                 0x104c
 
@@ -118,6 +121,7 @@
 #define PCI_DEVICE_ID_INTEL_82801I_UHCI6 0x2939
 #define PCI_DEVICE_ID_INTEL_82801I_EHCI1 0x293a
 #define PCI_DEVICE_ID_INTEL_82801I_EHCI2 0x293c
+#define PCI_DEVICE_ID_INTEL_82599_SFP_VF 0x10ed
 
 #define PCI_VENDOR_ID_XEN               0x5853
 #define PCI_DEVICE_ID_XEN_PLATFORM      0x0001
diff --git a/hw/pci_internals.h b/hw/pci_internals.h
index 399c6d475c..c931b64b46 100644
--- a/hw/pci_internals.h
+++ b/hw/pci_internals.h
@@ -17,9 +17,12 @@
 
 struct PCIBus {
     BusState qbus;
+    PCIDMAContextFunc dma_context_fn;
+    void *dma_context_opaque;
     uint8_t devfn_min;
     pci_set_irq_fn set_irq;
     pci_map_irq_fn map_irq;
+    pci_route_irq_fn route_intx_to_irq;
     pci_hotplug_fn hotplug;
     DeviceState *hotplug_qdev;
     void *irq_opaque;
diff --git a/hw/pcnet-pci.c b/hw/pcnet-pci.c
index 34d73aaea1..48fd447996 100644
--- a/hw/pcnet-pci.c
+++ b/hw/pcnet-pci.c
@@ -264,14 +264,14 @@ static void pci_physical_memory_read(void *dma_opaque, target_phys_addr_t addr,
     pci_dma_read(dma_opaque, addr, buf, len);
 }
 
-static void pci_pcnet_cleanup(VLANClientState *nc)
+static void pci_pcnet_cleanup(NetClientState *nc)
 {
     PCNetState *d = DO_UPCAST(NICState, nc, nc)->opaque;
 
     pcnet_common_cleanup(d);
 }
 
-static int pci_pcnet_uninit(PCIDevice *dev)
+static void pci_pcnet_uninit(PCIDevice *dev)
 {
     PCIPCNetState *d = DO_UPCAST(PCIPCNetState, pci_dev, dev);
 
@@ -279,12 +279,11 @@ static int pci_pcnet_uninit(PCIDevice *dev)
     memory_region_destroy(&d->io_bar);
     qemu_del_timer(d->state.poll_timer);
     qemu_free_timer(d->state.poll_timer);
-    qemu_del_vlan_client(&d->state.nic->nc);
-    return 0;
+    qemu_del_net_client(&d->state.nic->nc);
 }
 
 static NetClientInfo net_pci_pcnet_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = pcnet_can_receive,
     .receive = pcnet_receive,
diff --git a/hw/pcnet.c b/hw/pcnet.c
index d769b08b78..40820b3632 100644
--- a/hw/pcnet.c
+++ b/hw/pcnet.c
@@ -1004,7 +1004,7 @@ static int pcnet_tdte_poll(PCNetState *s)
     return !!(CSR_CXST(s) & 0x8000);
 }
 
-int pcnet_can_receive(VLANClientState *nc)
+int pcnet_can_receive(NetClientState *nc)
 {
     PCNetState *s = DO_UPCAST(NICState, nc, nc)->opaque;
     if (CSR_STOP(s) || CSR_SPND(s))
@@ -1015,7 +1015,7 @@ int pcnet_can_receive(VLANClientState *nc)
 
 #define MIN_BUF_SIZE 60
 
-ssize_t pcnet_receive(VLANClientState *nc, const uint8_t *buf, size_t size_)
+ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
 {
     PCNetState *s = DO_UPCAST(NICState, nc, nc)->opaque;
     int is_padr = 0, is_bcast = 0, is_ladr = 0;
@@ -1197,7 +1197,7 @@ ssize_t pcnet_receive(VLANClientState *nc, const uint8_t *buf, size_t size_)
     return size_;
 }
 
-void pcnet_set_link_status(VLANClientState *nc)
+void pcnet_set_link_status(NetClientState *nc)
 {
     PCNetState *d = DO_UPCAST(NICState, nc, nc)->opaque;
 
diff --git a/hw/pcnet.h b/hw/pcnet.h
index 803a2cc1ec..d0af54a46a 100644
--- a/hw/pcnet.h
+++ b/hw/pcnet.h
@@ -57,9 +57,9 @@ uint32_t pcnet_ioport_readw(void *opaque, uint32_t addr);
 void pcnet_ioport_writel(void *opaque, uint32_t addr, uint32_t val);
 uint32_t pcnet_ioport_readl(void *opaque, uint32_t addr);
 uint32_t pcnet_bcr_readw(PCNetState *s, uint32_t rap);
-int pcnet_can_receive(VLANClientState *nc);
-ssize_t pcnet_receive(VLANClientState *nc, const uint8_t *buf, size_t size_);
-void pcnet_set_link_status(VLANClientState *nc);
+int pcnet_can_receive(NetClientState *nc);
+ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_);
+void pcnet_set_link_status(NetClientState *nc);
 void pcnet_common_cleanup(PCNetState *d);
 int pcnet_common_init(DeviceState *dev, PCNetState *s, NetClientInfo *info);
 extern const VMStateDescription vmstate_pcnet;
diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index 09e84f59b6..c497a014af 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -89,6 +89,7 @@ struct PCII440FXState {
 #define I440FX_SMRAM    0x72
 
 static void piix3_set_irq(void *opaque, int pirq, int level);
+static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pci_intx);
 static void piix3_write_config_xen(PCIDevice *dev,
                                uint32_t address, uint32_t val, int len);
 
@@ -315,6 +316,7 @@ static PCIBus *i440fx_common_init(const char *device_name,
                 pci_create_simple_multifunction(b, -1, true, "PIIX3"));
         pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, piix3,
                 PIIX_NUM_PIRQS);
+        pci_bus_set_route_irq_fn(b, piix3_route_intx_pin_to_irq);
     }
     piix3->pic = pic;
     *isa_bus = DO_UPCAST(ISABus, qbus,
@@ -386,6 +388,22 @@ static void piix3_set_irq(void *opaque, int pirq, int level)
     piix3_set_irq_level(piix3, pirq, level);
 }
 
+static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin)
+{
+    PIIX3State *piix3 = opaque;
+    int irq = piix3->dev.config[PIIX_PIRQC + pin];
+    PCIINTxRoute route;
+
+    if (irq < PIIX_NUM_PIC_IRQS) {
+        route.mode = PCI_INTX_ENABLED;
+        route.irq = irq;
+    } else {
+        route.mode = PCI_INTX_DISABLED;
+        route.irq = -1;
+    }
+    return route;
+}
+
 /* irq routing is changed. so rebuild bitmap */
 static void piix3_update_irq_levels(PIIX3State *piix3)
 {
@@ -405,6 +423,8 @@ static void piix3_write_config(PCIDevice *dev,
     if (ranges_overlap(address, len, PIIX_PIRQC, 4)) {
         PIIX3State *piix3 = DO_UPCAST(PIIX3State, dev, dev);
         int pic_irq;
+
+        pci_bus_fire_intx_routing_notifier(piix3->dev.bus);
         piix3_update_irq_levels(piix3);
         for (pic_irq = 0; pic_irq < PIIX_NUM_PIC_IRQS; pic_irq++) {
             piix3_set_irq_pic(piix3, pic_irq);
diff --git a/hw/pl011.c b/hw/pl011.c
index 8a5a8f554a..3245702df0 100644
--- a/hw/pl011.c
+++ b/hw/pl011.c
@@ -78,7 +78,9 @@ static uint64_t pl011_read(void *opaque, target_phys_addr_t offset,
         if (s->read_count == s->read_trigger - 1)
             s->int_level &= ~ PL011_INT_RX;
         pl011_update(s);
-        qemu_chr_accept_input(s->chr);
+        if (s->chr) {
+            qemu_chr_accept_input(s->chr);
+        }
         return c;
     case 1: /* UARTCR */
         return 0;
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index d18dbaf6cc..aa4bbeb664 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -10,7 +10,7 @@ obj-y += ppc_newworld.o
 # IBM pSeries (sPAPR)
 obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
 obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
-obj-$(CONFIG_PSERIES) += spapr_pci.o pci-hotplug.o
+obj-$(CONFIG_PSERIES) += spapr_pci.o pci-hotplug.o spapr_iommu.o
 # PowerPC 4xx boards
 obj-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
 obj-y += ppc440_bamboo.o
diff --git a/hw/ppce500_spin.c b/hw/ppce500_spin.c
index fddf2197a9..c5b8e051ec 100644
--- a/hw/ppce500_spin.c
+++ b/hw/ppce500_spin.c
@@ -40,7 +40,7 @@ typedef struct spin_info {
     uint32_t resv;
     uint32_t pir;
     uint64_t reserved;
-} __attribute__ ((packed)) SpinInfo;
+} QEMU_PACKED SpinInfo;
 
 typedef struct spin_state {
     SysBusDevice busdev;
diff --git a/hw/qdev-dma.h b/hw/qdev-dma.h
new file mode 100644
index 0000000000..6812735e3d
--- /dev/null
+++ b/hw/qdev-dma.h
@@ -0,0 +1,10 @@
+/*
+ * Support for dma_addr_t typed properties
+ *
+ * Copyright (C) 2012 David Gibson, IBM Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#define DEFINE_PROP_DMAADDR(_n, _s, _f, _d)                               \
+    DEFINE_PROP_HEX64(_n, _s, _f, _d)
diff --git a/hw/qdev-monitor.c b/hw/qdev-monitor.c
index 7915b4500d..b22a37a00c 100644
--- a/hw/qdev-monitor.c
+++ b/hw/qdev-monitor.c
@@ -138,13 +138,13 @@ int qdev_device_help(QemuOpts *opts)
     ObjectClass *klass;
 
     driver = qemu_opt_get(opts, "driver");
-    if (driver && !strcmp(driver, "?")) {
+    if (driver && is_help_option(driver)) {
         bool show_no_user = false;
         object_class_foreach(qdev_print_devinfo, TYPE_DEVICE, false, &show_no_user);
         return 1;
     }
 
-    if (!driver || !qemu_opt_get(opts, "?")) {
+    if (!driver || !qemu_opt_has_help_opt(opts)) {
         return 0;
     }
 
diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c
index 099a7aa96f..8aca0d43fe 100644
--- a/hw/qdev-properties.c
+++ b/hw/qdev-properties.c
@@ -2,6 +2,8 @@
 #include "qdev.h"
 #include "qerror.h"
 #include "blockdev.h"
+#include "hw/block-common.h"
+#include "net/hub.h"
 
 void *qdev_get_prop_ptr(DeviceState *dev, Property *prop)
 {
@@ -10,6 +12,78 @@ void *qdev_get_prop_ptr(DeviceState *dev, Property *prop)
     return ptr;
 }
 
+static void get_pointer(Object *obj, Visitor *v, Property *prop,
+                        const char *(*print)(void *ptr),
+                        const char *name, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    void **ptr = qdev_get_prop_ptr(dev, prop);
+    char *p;
+
+    p = (char *) (*ptr ? print(*ptr) : "");
+    visit_type_str(v, &p, name, errp);
+}
+
+static void set_pointer(Object *obj, Visitor *v, Property *prop,
+                        int (*parse)(DeviceState *dev, const char *str,
+                                     void **ptr),
+                        const char *name, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Error *local_err = NULL;
+    void **ptr = qdev_get_prop_ptr(dev, prop);
+    char *str;
+    int ret;
+
+    if (dev->state != DEV_STATE_CREATED) {
+        error_set(errp, QERR_PERMISSION_DENIED);
+        return;
+    }
+
+    visit_type_str(v, &str, name, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    if (!*str) {
+        g_free(str);
+        *ptr = NULL;
+        return;
+    }
+    ret = parse(dev, str, ptr);
+    error_set_from_qdev_prop_error(errp, ret, dev, prop, str);
+    g_free(str);
+}
+
+static void get_enum(Object *obj, Visitor *v, void *opaque,
+                     const char *name, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    int *ptr = qdev_get_prop_ptr(dev, prop);
+
+    visit_type_enum(v, ptr, prop->info->enum_table,
+                    prop->info->name, prop->name, errp);
+}
+
+static void set_enum(Object *obj, Visitor *v, void *opaque,
+                     const char *name, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    int *ptr = qdev_get_prop_ptr(dev, prop);
+
+    if (dev->state != DEV_STATE_CREATED) {
+        error_set(errp, QERR_PERMISSION_DENIED);
+        return;
+    }
+
+    visit_type_enum(v, ptr, prop->info->enum_table,
+                    prop->info->name, prop->name, errp);
+}
+
+/* Bit */
+
 static uint32_t qdev_get_prop_mask(Property *prop)
 {
     assert(prop->info == &qdev_prop_bit);
@@ -26,8 +100,6 @@ static void bit_prop_set(DeviceState *dev, Property *props, bool val)
         *p &= ~mask;
 }
 
-/* Bit */
-
 static int print_bit(DeviceState *dev, Property *prop, char *dest, size_t len)
 {
     uint32_t *p = qdev_get_prop_ptr(dev, prop);
@@ -435,48 +507,6 @@ static const char *print_drive(void *ptr)
     return bdrv_get_device_name(ptr);
 }
 
-static void get_pointer(Object *obj, Visitor *v, Property *prop,
-                        const char *(*print)(void *ptr),
-                        const char *name, Error **errp)
-{
-    DeviceState *dev = DEVICE(obj);
-    void **ptr = qdev_get_prop_ptr(dev, prop);
-    char *p;
-
-    p = (char *) (*ptr ? print(*ptr) : "");
-    visit_type_str(v, &p, name, errp);
-}
-
-static void set_pointer(Object *obj, Visitor *v, Property *prop,
-                        int (*parse)(DeviceState *dev, const char *str, void **ptr),
-                        const char *name, Error **errp)
-{
-    DeviceState *dev = DEVICE(obj);
-    Error *local_err = NULL;
-    void **ptr = qdev_get_prop_ptr(dev, prop);
-    char *str;
-    int ret;
-
-    if (dev->state != DEV_STATE_CREATED) {
-        error_set(errp, QERR_PERMISSION_DENIED);
-        return;
-    }
-
-    visit_type_str(v, &str, name, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-    if (!*str) {
-        g_free(str);
-        *ptr = NULL;
-        return;
-    }
-    ret = parse(dev, str, ptr);
-    error_set_from_qdev_prop_error(errp, ret, dev, prop, str);
-    g_free(str);
-}
-
 static void get_drive(Object *obj, Visitor *v, void *opaque,
                       const char *name, Error **errp)
 {
@@ -554,7 +584,7 @@ PropertyInfo qdev_prop_chr = {
 
 static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
 {
-    VLANClientState *netdev = qemu_find_netdev(str);
+    NetClientState *netdev = qemu_find_netdev(str);
 
     if (netdev == NULL) {
         return -ENOENT;
@@ -568,7 +598,7 @@ static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
 
 static const char *print_netdev(void *ptr)
 {
-    VLANClientState *netdev = ptr;
+    NetClientState *netdev = ptr;
 
     return netdev->name ? netdev->name : "";
 }
@@ -595,13 +625,16 @@ PropertyInfo qdev_prop_netdev = {
 
 static int print_vlan(DeviceState *dev, Property *prop, char *dest, size_t len)
 {
-    VLANState **ptr = qdev_get_prop_ptr(dev, prop);
+    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
 
     if (*ptr) {
-        return snprintf(dest, len, "%d", (*ptr)->id);
-    } else {
-        return snprintf(dest, len, "<null>");
+        int id;
+        if (!net_hub_id_for_client(*ptr, &id)) {
+            return snprintf(dest, len, "%d", id);
+        }
     }
+
+    return snprintf(dest, len, "<null>");
 }
 
 static void get_vlan(Object *obj, Visitor *v, void *opaque,
@@ -609,11 +642,17 @@ static void get_vlan(Object *obj, Visitor *v, void *opaque,
 {
     DeviceState *dev = DEVICE(obj);
     Property *prop = opaque;
-    VLANState **ptr = qdev_get_prop_ptr(dev, prop);
-    int64_t id;
+    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
+    int32_t id = -1;
+
+    if (*ptr) {
+        int hub_id;
+        if (!net_hub_id_for_client(*ptr, &hub_id)) {
+            id = hub_id;
+        }
+    }
 
-    id = *ptr ? (*ptr)->id : -1;
-    visit_type_int64(v, &id, name, errp);
+    visit_type_int32(v, &id, name, errp);
 }
 
 static void set_vlan(Object *obj, Visitor *v, void *opaque,
@@ -621,17 +660,17 @@ static void set_vlan(Object *obj, Visitor *v, void *opaque,
 {
     DeviceState *dev = DEVICE(obj);
     Property *prop = opaque;
-    VLANState **ptr = qdev_get_prop_ptr(dev, prop);
+    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
     Error *local_err = NULL;
-    int64_t id;
-    VLANState *vlan;
+    int32_t id;
+    NetClientState *hubport;
 
     if (dev->state != DEV_STATE_CREATED) {
         error_set(errp, QERR_PERMISSION_DENIED);
         return;
     }
 
-    visit_type_int64(v, &id, name, &local_err);
+    visit_type_int32(v, &id, name, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -640,13 +679,14 @@ static void set_vlan(Object *obj, Visitor *v, void *opaque,
         *ptr = NULL;
         return;
     }
-    vlan = qemu_find_vlan(id, 1);
-    if (!vlan) {
+
+    hubport = net_hub_port_find(id);
+    if (!hubport) {
         error_set(errp, QERR_INVALID_PARAMETER_VALUE,
                   name, prop->info->name);
         return;
     }
-    *ptr = vlan;
+    *ptr = hubport;
 }
 
 PropertyInfo qdev_prop_vlan = {
@@ -735,7 +775,6 @@ PropertyInfo qdev_prop_macaddr = {
     .set   = set_mac,
 };
 
-
 /* --- lost tick policy --- */
 
 static const char *lost_tick_policy_table[LOST_TICK_MAX+1] = {
@@ -748,33 +787,6 @@ static const char *lost_tick_policy_table[LOST_TICK_MAX+1] = {
 
 QEMU_BUILD_BUG_ON(sizeof(LostTickPolicy) != sizeof(int));
 
-static void get_enum(Object *obj, Visitor *v, void *opaque,
-                     const char *name, Error **errp)
-{
-    DeviceState *dev = DEVICE(obj);
-    Property *prop = opaque;
-    int *ptr = qdev_get_prop_ptr(dev, prop);
-
-    visit_type_enum(v, ptr, prop->info->enum_table,
-                    prop->info->name, prop->name, errp);
-}
-
-static void set_enum(Object *obj, Visitor *v, void *opaque,
-                     const char *name, Error **errp)
-{
-    DeviceState *dev = DEVICE(obj);
-    Property *prop = opaque;
-    int *ptr = qdev_get_prop_ptr(dev, prop);
-
-    if (dev->state != DEV_STATE_CREATED) {
-        error_set(errp, QERR_PERMISSION_DENIED);
-        return;
-    }
-
-    visit_type_enum(v, ptr, prop->info->enum_table,
-                    prop->info->name, prop->name, errp);
-}
-
 PropertyInfo qdev_prop_losttickpolicy = {
     .name  = "LostTickPolicy",
     .enum_table  = lost_tick_policy_table,
@@ -782,6 +794,21 @@ PropertyInfo qdev_prop_losttickpolicy = {
     .set   = set_enum,
 };
 
+/* --- BIOS CHS translation */
+
+static const char *bios_chs_trans_table[] = {
+    [BIOS_ATA_TRANSLATION_AUTO] = "auto",
+    [BIOS_ATA_TRANSLATION_NONE] = "none",
+    [BIOS_ATA_TRANSLATION_LBA]  = "lba",
+};
+
+PropertyInfo qdev_prop_bios_chs_trans = {
+    .name = "bios-chs-trans",
+    .enum_table = bios_chs_trans_table,
+    .get = get_enum,
+    .set = set_enum,
+};
+
 /* --- pci address --- */
 
 /*
@@ -899,6 +926,113 @@ PropertyInfo qdev_prop_blocksize = {
     .set   = set_blocksize,
 };
 
+/* --- pci host address --- */
+
+static void get_pci_host_devaddr(Object *obj, Visitor *v, void *opaque,
+                                 const char *name, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    PCIHostDeviceAddress *addr = qdev_get_prop_ptr(dev, prop);
+    char buffer[] = "xxxx:xx:xx.x";
+    char *p = buffer;
+    int rc = 0;
+
+    rc = snprintf(buffer, sizeof(buffer), "%04x:%02x:%02x.%d",
+                  addr->domain, addr->bus, addr->slot, addr->function);
+    assert(rc == sizeof(buffer) - 1);
+
+    visit_type_str(v, &p, name, errp);
+}
+
+/*
+ * Parse [<domain>:]<bus>:<slot>.<func>
+ *   if <domain> is not supplied, it's assumed to be 0.
+ */
+static void set_pci_host_devaddr(Object *obj, Visitor *v, void *opaque,
+                                 const char *name, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    PCIHostDeviceAddress *addr = qdev_get_prop_ptr(dev, prop);
+    Error *local_err = NULL;
+    char *str, *p;
+    char *e;
+    unsigned long val;
+    unsigned long dom = 0, bus = 0;
+    unsigned int slot = 0, func = 0;
+
+    if (dev->state != DEV_STATE_CREATED) {
+        error_set(errp, QERR_PERMISSION_DENIED);
+        return;
+    }
+
+    visit_type_str(v, &str, name, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    p = str;
+    val = strtoul(p, &e, 16);
+    if (e == p || *e != ':') {
+        goto inval;
+    }
+    bus = val;
+
+    p = e + 1;
+    val = strtoul(p, &e, 16);
+    if (e == p) {
+        goto inval;
+    }
+    if (*e == ':') {
+        dom = bus;
+        bus = val;
+        p = e + 1;
+        val = strtoul(p, &e, 16);
+        if (e == p) {
+            goto inval;
+        }
+    }
+    slot = val;
+
+    if (*e != '.') {
+        goto inval;
+    }
+    p = e + 1;
+    val = strtoul(p, &e, 10);
+    if (e == p) {
+        goto inval;
+    }
+    func = val;
+
+    if (dom > 0xffff || bus > 0xff || slot > 0x1f || func > 7) {
+        goto inval;
+    }
+
+    if (*e) {
+        goto inval;
+    }
+
+    addr->domain = dom;
+    addr->bus = bus;
+    addr->slot = slot;
+    addr->function = func;
+
+    g_free(str);
+    return;
+
+inval:
+    error_set_from_qdev_prop_error(errp, EINVAL, dev, prop, str);
+    g_free(str);
+}
+
+PropertyInfo qdev_prop_pci_host_devaddr = {
+    .name = "pci-host-devaddr",
+    .get = get_pci_host_devaddr,
+    .set = set_pci_host_devaddr,
+};
+
 /* --- public helpers --- */
 
 static Property *qdev_prop_walk(Property *props, const char *name)
@@ -1016,7 +1150,7 @@ void qdev_prop_set_uint64(DeviceState *dev, const char *name, uint64_t value)
     assert_no_error(errp);
 }
 
-void qdev_prop_set_string(DeviceState *dev, const char *name, char *value)
+void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value)
 {
     Error *errp = NULL;
     object_property_set_str(OBJECT(dev), value, name, &errp);
@@ -1052,7 +1186,7 @@ void qdev_prop_set_chr(DeviceState *dev, const char *name, CharDriverState *valu
     assert_no_error(errp);
 }
 
-void qdev_prop_set_netdev(DeviceState *dev, const char *name, VLANClientState *value)
+void qdev_prop_set_netdev(DeviceState *dev, const char *name, NetClientState *value)
 {
     Error *errp = NULL;
     assert(!value || value->name);
@@ -1061,13 +1195,6 @@ void qdev_prop_set_netdev(DeviceState *dev, const char *name, VLANClientState *v
     assert_no_error(errp);
 }
 
-void qdev_prop_set_vlan(DeviceState *dev, const char *name, VLANState *value)
-{
-    Error *errp = NULL;
-    object_property_set_int(OBJECT(dev), value ? value->id : -1, name, &errp);
-    assert_no_error(errp);
-}
-
 void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value)
 {
     Error *errp = NULL;
diff --git a/hw/qdev.c b/hw/qdev.c
index a6c4c02947..b5b74b9135 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -258,9 +258,10 @@ int qdev_simple_unplug_cb(DeviceState *dev)
    way is somewhat unclean, and best avoided.  */
 void qdev_init_nofail(DeviceState *dev)
 {
+    const char *typename = object_get_typename(OBJECT(dev));
+
     if (qdev_init(dev) < 0) {
-        error_report("Initialization of device %s failed",
-                     object_get_typename(OBJECT(dev)));
+        error_report("Initialization of device %s failed", typename);
         exit(1);
     }
 }
@@ -319,8 +320,6 @@ void qdev_connect_gpio_out(DeviceState * dev, int n, qemu_irq pin)
 void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd)
 {
     qdev_prop_set_macaddr(dev, "mac", nd->macaddr.a);
-    if (nd->vlan)
-        qdev_prop_set_vlan(dev, "vlan", nd->vlan);
     if (nd->netdev)
         qdev_prop_set_netdev(dev, "netdev", nd->netdev);
     if (nd->nvectors != DEV_NVECTORS_UNSPECIFIED &&
diff --git a/hw/qdev.h b/hw/qdev.h
index ae1d2812bf..d699194418 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -78,12 +78,6 @@ struct DeviceState {
     int alias_required_for_version;
 };
 
-/*
- * This callback is used to create Open Firmware device path in accordance with
- * OF spec http://forthworks.com/standards/of1275.pdf. Indicidual bus bindings
- * can be found here http://playground.sun.com/1275/bindings/.
- */
-
 #define TYPE_BUS "bus"
 #define BUS(obj) OBJECT_CHECK(BusState, (obj), TYPE_BUS)
 #define BUS_CLASS(klass) OBJECT_CLASS_CHECK(BusClass, (klass), TYPE_BUS)
@@ -95,6 +89,11 @@ struct BusClass {
     /* FIXME first arg should be BusState */
     void (*print_dev)(Monitor *mon, DeviceState *dev, int indent);
     char *(*get_dev_path)(DeviceState *dev);
+    /*
+     * This callback is used to create Open Firmware device path in accordance
+     * with OF spec http://forthworks.com/standards/of1275.pdf. Individual bus
+     * bindings can be found at http://playground.sun.com/1275/bindings/.
+     */
     char *(*get_fw_dev_path)(DeviceState *dev);
     int (*reset)(BusState *bus);
 };
@@ -232,11 +231,13 @@ extern PropertyInfo qdev_prop_chr;
 extern PropertyInfo qdev_prop_ptr;
 extern PropertyInfo qdev_prop_macaddr;
 extern PropertyInfo qdev_prop_losttickpolicy;
+extern PropertyInfo qdev_prop_bios_chs_trans;
 extern PropertyInfo qdev_prop_drive;
 extern PropertyInfo qdev_prop_netdev;
 extern PropertyInfo qdev_prop_vlan;
 extern PropertyInfo qdev_prop_pci_devfn;
 extern PropertyInfo qdev_prop_blocksize;
+extern PropertyInfo qdev_prop_pci_host_devaddr;
 
 #define DEFINE_PROP(_name, _state, _field, _prop, _type) { \
         .name      = (_name),                                    \
@@ -288,9 +289,9 @@ extern PropertyInfo qdev_prop_blocksize;
 #define DEFINE_PROP_STRING(_n, _s, _f)             \
     DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*)
 #define DEFINE_PROP_NETDEV(_n, _s, _f)             \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, VLANClientState*)
+    DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, NetClientState*)
 #define DEFINE_PROP_VLAN(_n, _s, _f)             \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_vlan, VLANState*)
+    DEFINE_PROP(_n, _s, _f, qdev_prop_vlan, NetClientState*)
 #define DEFINE_PROP_DRIVE(_n, _s, _f) \
     DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockDriverState *)
 #define DEFINE_PROP_MACADDR(_n, _s, _f)         \
@@ -298,8 +299,12 @@ extern PropertyInfo qdev_prop_blocksize;
 #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
     DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
                         LostTickPolicy)
+#define DEFINE_PROP_BIOS_CHS_TRANS(_n, _s, _f, _d) \
+    DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_bios_chs_trans, int)
 #define DEFINE_PROP_BLOCKSIZE(_n, _s, _f, _d) \
     DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_blocksize, uint16_t)
+#define DEFINE_PROP_PCI_HOST_DEVADDR(_n, _s, _f) \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress)
 
 #define DEFINE_PROP_END_OF_LIST()               \
     {}
@@ -313,10 +318,9 @@ void qdev_prop_set_uint16(DeviceState *dev, const char *name, uint16_t value);
 void qdev_prop_set_uint32(DeviceState *dev, const char *name, uint32_t value);
 void qdev_prop_set_int32(DeviceState *dev, const char *name, int32_t value);
 void qdev_prop_set_uint64(DeviceState *dev, const char *name, uint64_t value);
-void qdev_prop_set_string(DeviceState *dev, const char *name, char *value);
+void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value);
 void qdev_prop_set_chr(DeviceState *dev, const char *name, CharDriverState *value);
-void qdev_prop_set_netdev(DeviceState *dev, const char *name, VLANClientState *value);
-void qdev_prop_set_vlan(DeviceState *dev, const char *name, VLANState *value);
+void qdev_prop_set_netdev(DeviceState *dev, const char *name, NetClientState *value);
 int qdev_prop_set_drive(DeviceState *dev, const char *name, BlockDriverState *value) QEMU_WARN_UNUSED_RESULT;
 void qdev_prop_set_drive_nofail(DeviceState *dev, const char *name, BlockDriverState *value);
 void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value);
diff --git a/hw/qxl.c b/hw/qxl.c
index 3da3399934..c2dd3b471b 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -30,7 +30,7 @@
 /*
  * NOTE: SPICE_RING_PROD_ITEM accesses memory on the pci bar and as
  * such can be changed by the guest, so to avoid a guest trigerrable
- * abort we just set qxl_guest_bug and set the return to NULL. Still
+ * abort we just qxl_set_guest_bug and set the return to NULL. Still
  * it may happen as a result of emulator bug as well.
  */
 #undef SPICE_RING_PROD_ITEM
@@ -40,7 +40,7 @@
         uint32_t prod = (r)->prod & SPICE_RING_INDEX_MASK(r);           \
         typeof(&(r)->items[prod]) m_item = &(r)->items[prod];           \
         if (!((uint8_t*)m_item >= (uint8_t*)(start) && (uint8_t*)(m_item + 1) <= (uint8_t*)(end))) { \
-            qxl_guest_bug(qxl, "SPICE_RING_PROD_ITEM indices mismatch " \
+            qxl_set_guest_bug(qxl, "SPICE_RING_PROD_ITEM indices mismatch " \
                           "! %p <= %p < %p", (uint8_t *)start,          \
                           (uint8_t *)m_item, (uint8_t *)end);           \
             ret = NULL;                                                 \
@@ -56,7 +56,7 @@
         uint32_t cons = (r)->cons & SPICE_RING_INDEX_MASK(r);           \
         typeof(&(r)->items[cons]) m_item = &(r)->items[cons];           \
         if (!((uint8_t*)m_item >= (uint8_t*)(start) && (uint8_t*)(m_item + 1) <= (uint8_t*)(end))) { \
-            qxl_guest_bug(qxl, "SPICE_RING_CONS_ITEM indices mismatch " \
+            qxl_set_guest_bug(qxl, "SPICE_RING_CONS_ITEM indices mismatch " \
                           "! %p <= %p < %p", (uint8_t *)start,          \
                           (uint8_t *)m_item, (uint8_t *)end);           \
             ret = NULL;                                                 \
@@ -114,20 +114,16 @@ static QXLMode qxl_modes[] = {
     QXL_MODE_EX(1600, 1200),
     QXL_MODE_EX(1680, 1050),
     QXL_MODE_EX(1920, 1080),
-#if VGA_RAM_SIZE >= (16 * 1024 * 1024)
     /* these modes need more than 8 MB video memory */
     QXL_MODE_EX(1920, 1200),
     QXL_MODE_EX(1920, 1440),
     QXL_MODE_EX(2048, 1536),
     QXL_MODE_EX(2560, 1440),
     QXL_MODE_EX(2560, 1600),
-#endif
-#if VGA_RAM_SIZE >= (32 * 1024 * 1024)
     /* these modes need more than 16 MB video memory */
     QXL_MODE_EX(2560, 2048),
     QXL_MODE_EX(2800, 2100),
     QXL_MODE_EX(3200, 2400),
-#endif
 };
 
 static PCIQXLDevice *qxl0;
@@ -138,9 +134,10 @@ static void qxl_reset_memslots(PCIQXLDevice *d);
 static void qxl_reset_surfaces(PCIQXLDevice *d);
 static void qxl_ring_set_dirty(PCIQXLDevice *qxl);
 
-void qxl_guest_bug(PCIQXLDevice *qxl, const char *msg, ...)
+void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...)
 {
     qxl_send_events(qxl, QXL_INTERRUPT_ERROR);
+    qxl->guest_bug = 1;
     if (qxl->guestdebug) {
         va_list ap;
         va_start(ap, msg);
@@ -151,6 +148,10 @@ void qxl_guest_bug(PCIQXLDevice *qxl, const char *msg, ...)
     }
 }
 
+static void qxl_clear_guest_bug(PCIQXLDevice *qxl)
+{
+    qxl->guest_bug = 0;
+}
 
 void qxl_spice_update_area(PCIQXLDevice *qxl, uint32_t surface_id,
                            struct QXLRect *area, struct QXLRect *dirty_rects,
@@ -279,6 +280,7 @@ static inline uint32_t msb_mask(uint32_t val)
 static ram_addr_t qxl_rom_size(void)
 {
     uint32_t rom_size = sizeof(QXLRom) + sizeof(QXLModes) + sizeof(qxl_modes);
+
     rom_size = MAX(rom_size, TARGET_PAGE_SIZE);
     rom_size = msb_mask(rom_size * 2 - 1);
     return rom_size;
@@ -291,8 +293,8 @@ static void init_qxl_rom(PCIQXLDevice *d)
     uint32_t ram_header_size;
     uint32_t surface0_area_size;
     uint32_t num_pages;
-    uint32_t fb, maxfb = 0;
-    int i;
+    uint32_t fb;
+    int i, n;
 
     memset(rom, 0, d->rom_size);
 
@@ -307,26 +309,25 @@ static void init_qxl_rom(PCIQXLDevice *d)
     rom->slots_end     = NUM_MEMSLOTS - 1;
     rom->n_surfaces    = cpu_to_le32(NUM_SURFACES);
 
-    modes->n_modes     = cpu_to_le32(ARRAY_SIZE(qxl_modes));
-    for (i = 0; i < modes->n_modes; i++) {
+    for (i = 0, n = 0; i < ARRAY_SIZE(qxl_modes); i++) {
         fb = qxl_modes[i].y_res * qxl_modes[i].stride;
-        if (maxfb < fb) {
-            maxfb = fb;
+        if (fb > d->vgamem_size) {
+            continue;
         }
-        modes->modes[i].id          = cpu_to_le32(i);
-        modes->modes[i].x_res       = cpu_to_le32(qxl_modes[i].x_res);
-        modes->modes[i].y_res       = cpu_to_le32(qxl_modes[i].y_res);
-        modes->modes[i].bits        = cpu_to_le32(qxl_modes[i].bits);
-        modes->modes[i].stride      = cpu_to_le32(qxl_modes[i].stride);
-        modes->modes[i].x_mili      = cpu_to_le32(qxl_modes[i].x_mili);
-        modes->modes[i].y_mili      = cpu_to_le32(qxl_modes[i].y_mili);
-        modes->modes[i].orientation = cpu_to_le32(qxl_modes[i].orientation);
-    }
-    if (maxfb < VGA_RAM_SIZE && d->id == 0)
-        maxfb = VGA_RAM_SIZE;
+        modes->modes[n].id          = cpu_to_le32(i);
+        modes->modes[n].x_res       = cpu_to_le32(qxl_modes[i].x_res);
+        modes->modes[n].y_res       = cpu_to_le32(qxl_modes[i].y_res);
+        modes->modes[n].bits        = cpu_to_le32(qxl_modes[i].bits);
+        modes->modes[n].stride      = cpu_to_le32(qxl_modes[i].stride);
+        modes->modes[n].x_mili      = cpu_to_le32(qxl_modes[i].x_mili);
+        modes->modes[n].y_mili      = cpu_to_le32(qxl_modes[i].y_mili);
+        modes->modes[n].orientation = cpu_to_le32(qxl_modes[i].orientation);
+        n++;
+    }
+    modes->n_modes     = cpu_to_le32(n);
 
     ram_header_size    = ALIGN(sizeof(QXLRam), 4096);
-    surface0_area_size = ALIGN(maxfb, 4096);
+    surface0_area_size = ALIGN(d->vgamem_size, 4096);
     num_pages          = d->vga.vram_size;
     num_pages         -= ram_header_size;
     num_pages         -= surface0_area_size;
@@ -411,7 +412,8 @@ static int qxl_track_command(PCIQXLDevice *qxl, struct QXLCommandExt *ext)
         uint32_t id = le32_to_cpu(cmd->surface_id);
 
         if (id >= NUM_SURFACES) {
-            qxl_guest_bug(qxl, "QXL_CMD_SURFACE id %d >= %d", id, NUM_SURFACES);
+            qxl_set_guest_bug(qxl, "QXL_CMD_SURFACE id %d >= %d", id,
+                              NUM_SURFACES);
             return 1;
         }
         qemu_mutex_lock(&qxl->track_lock);
@@ -571,7 +573,7 @@ static int interface_get_command(QXLInstance *sin, struct QXLCommandExt *ext)
     case QXL_MODE_NATIVE:
     case QXL_MODE_UNDEFINED:
         ring = &qxl->ram->cmd_ring;
-        if (SPICE_RING_IS_EMPTY(ring)) {
+        if (qxl->guest_bug || SPICE_RING_IS_EMPTY(ring)) {
             return false;
         }
         SPICE_RING_CONS_ITEM(qxl, ring, cmd);
@@ -931,6 +933,7 @@ static void qxl_enter_vga_mode(PCIQXLDevice *d)
     qemu_spice_create_host_primary(&d->ssd);
     d->mode = QXL_MODE_VGA;
     memset(&d->ssd.dirty, 0, sizeof(d->ssd.dirty));
+    vga_dirty_log_start(&d->vga);
 }
 
 static void qxl_exit_vga_mode(PCIQXLDevice *d)
@@ -939,6 +942,7 @@ static void qxl_exit_vga_mode(PCIQXLDevice *d)
         return;
     }
     trace_qxl_exit_vga_mode(d->id);
+    vga_dirty_log_stop(&d->vga);
     qxl_destroy_primary(d, QXL_SYNC);
 }
 
@@ -977,6 +981,8 @@ static void qxl_soft_reset(PCIQXLDevice *d)
 {
     trace_qxl_soft_reset(d->id);
     qxl_check_state(d);
+    qxl_clear_guest_bug(d);
+    d->current_async = QXL_UNDEFINED_IO;
 
     if (d->id == 0) {
         qxl_enter_vga_mode(d);
@@ -1061,12 +1067,12 @@ static int qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta,
     trace_qxl_memslot_add_guest(d->id, slot_id, guest_start, guest_end);
 
     if (slot_id >= NUM_MEMSLOTS) {
-        qxl_guest_bug(d, "%s: slot_id >= NUM_MEMSLOTS %d >= %d", __func__,
+        qxl_set_guest_bug(d, "%s: slot_id >= NUM_MEMSLOTS %d >= %d", __func__,
                       slot_id, NUM_MEMSLOTS);
         return 1;
     }
     if (guest_start > guest_end) {
-        qxl_guest_bug(d, "%s: guest_start > guest_end 0x%" PRIx64
+        qxl_set_guest_bug(d, "%s: guest_start > guest_end 0x%" PRIx64
                          " > 0x%" PRIx64, __func__, guest_start, guest_end);
         return 1;
     }
@@ -1091,7 +1097,7 @@ static int qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta,
         break;
     }
     if (i == ARRAY_SIZE(regions)) {
-        qxl_guest_bug(d, "%s: finished loop without match", __func__);
+        qxl_set_guest_bug(d, "%s: finished loop without match", __func__);
         return 1;
     }
 
@@ -1105,7 +1111,7 @@ static int qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta,
         break;
     default:
         /* should not happen */
-        qxl_guest_bug(d, "%s: pci_region = %d", __func__, pci_region);
+        qxl_set_guest_bug(d, "%s: pci_region = %d", __func__, pci_region);
         return 1;
     }
 
@@ -1156,21 +1162,24 @@ void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id)
         return (void *)(intptr_t)offset;
     case MEMSLOT_GROUP_GUEST:
         if (slot >= NUM_MEMSLOTS) {
-            qxl_guest_bug(qxl, "slot too large %d >= %d", slot, NUM_MEMSLOTS);
+            qxl_set_guest_bug(qxl, "slot too large %d >= %d", slot,
+                              NUM_MEMSLOTS);
             return NULL;
         }
         if (!qxl->guest_slots[slot].active) {
-            qxl_guest_bug(qxl, "inactive slot %d\n", slot);
+            qxl_set_guest_bug(qxl, "inactive slot %d\n", slot);
             return NULL;
         }
         if (offset < qxl->guest_slots[slot].delta) {
-            qxl_guest_bug(qxl, "slot %d offset %"PRIu64" < delta %"PRIu64"\n",
+            qxl_set_guest_bug(qxl,
+                          "slot %d offset %"PRIu64" < delta %"PRIu64"\n",
                           slot, offset, qxl->guest_slots[slot].delta);
             return NULL;
         }
         offset -= qxl->guest_slots[slot].delta;
         if (offset > qxl->guest_slots[slot].size) {
-            qxl_guest_bug(qxl, "slot %d offset %"PRIu64" > size %"PRIu64"\n",
+            qxl_set_guest_bug(qxl,
+                          "slot %d offset %"PRIu64" > size %"PRIu64"\n",
                           slot, offset, qxl->guest_slots[slot].size);
             return NULL;
         }
@@ -1190,9 +1199,19 @@ static void qxl_create_guest_primary(PCIQXLDevice *qxl, int loadvm,
 {
     QXLDevSurfaceCreate surface;
     QXLSurfaceCreate *sc = &qxl->guest_primary.surface;
+    int size;
+    int requested_height = le32_to_cpu(sc->height);
+    int requested_stride = le32_to_cpu(sc->stride);
+
+    size = abs(requested_stride) * requested_height;
+    if (size > qxl->vgamem_size) {
+        qxl_set_guest_bug(qxl, "%s: requested primary larger then framebuffer"
+                               " size", __func__);
+        return;
+    }
 
     if (qxl->mode == QXL_MODE_NATIVE) {
-        qxl_guest_bug(qxl, "%s: nop since already in QXL_MODE_NATIVE",
+        qxl_set_guest_bug(qxl, "%s: nop since already in QXL_MODE_NATIVE",
                       __func__);
     }
     qxl_exit_vga_mode(qxl);
@@ -1291,6 +1310,10 @@ static void ioport_write(void *opaque, target_phys_addr_t addr,
     qxl_async_io async = QXL_SYNC;
     uint32_t orig_io_port = io_port;
 
+    if (d->guest_bug && !io_port == QXL_IO_RESET) {
+        return;
+    }
+
     switch (io_port) {
     case QXL_IO_RESET:
     case QXL_IO_SET_MODE:
@@ -1342,7 +1365,7 @@ async_common:
         async = QXL_ASYNC;
         qemu_mutex_lock(&d->async_lock);
         if (d->current_async != QXL_UNDEFINED_IO) {
-            qxl_guest_bug(d, "%d async started before last (%d) complete",
+            qxl_set_guest_bug(d, "%d async started before last (%d) complete",
                 io_port, d->current_async);
             qemu_mutex_unlock(&d->async_lock);
             return;
@@ -1403,11 +1426,12 @@ async_common:
         break;
     case QXL_IO_MEMSLOT_ADD:
         if (val >= NUM_MEMSLOTS) {
-            qxl_guest_bug(d, "QXL_IO_MEMSLOT_ADD: val out of range");
+            qxl_set_guest_bug(d, "QXL_IO_MEMSLOT_ADD: val out of range");
             break;
         }
         if (d->guest_slots[val].active) {
-            qxl_guest_bug(d, "QXL_IO_MEMSLOT_ADD: memory slot already active");
+            qxl_set_guest_bug(d,
+                        "QXL_IO_MEMSLOT_ADD: memory slot already active");
             break;
         }
         d->guest_slots[val].slot = d->ram->mem_slot;
@@ -1415,14 +1439,14 @@ async_common:
         break;
     case QXL_IO_MEMSLOT_DEL:
         if (val >= NUM_MEMSLOTS) {
-            qxl_guest_bug(d, "QXL_IO_MEMSLOT_DEL: val out of range");
+            qxl_set_guest_bug(d, "QXL_IO_MEMSLOT_DEL: val out of range");
             break;
         }
         qxl_del_memslot(d, val);
         break;
     case QXL_IO_CREATE_PRIMARY:
         if (val != 0) {
-            qxl_guest_bug(d, "QXL_IO_CREATE_PRIMARY (async=%d): val != 0",
+            qxl_set_guest_bug(d, "QXL_IO_CREATE_PRIMARY (async=%d): val != 0",
                           async);
             goto cancel_async;
         }
@@ -1431,7 +1455,7 @@ async_common:
         break;
     case QXL_IO_DESTROY_PRIMARY:
         if (val != 0) {
-            qxl_guest_bug(d, "QXL_IO_DESTROY_PRIMARY (async=%d): val != 0",
+            qxl_set_guest_bug(d, "QXL_IO_DESTROY_PRIMARY (async=%d): val != 0",
                           async);
             goto cancel_async;
         }
@@ -1443,7 +1467,7 @@ async_common:
         break;
     case QXL_IO_DESTROY_SURFACE_WAIT:
         if (val >= NUM_SURFACES) {
-            qxl_guest_bug(d, "QXL_IO_DESTROY_SURFACE (async=%d):"
+            qxl_set_guest_bug(d, "QXL_IO_DESTROY_SURFACE (async=%d):"
                              "%" PRIu64 " >= NUM_SURFACES", async, val);
             goto cancel_async;
         }
@@ -1467,7 +1491,7 @@ async_common:
         qxl_spice_destroy_surfaces(d, async);
         break;
     default:
-        qxl_guest_bug(d, "%s: unexpected ioport=0x%x\n", __func__, io_port);
+        qxl_set_guest_bug(d, "%s: unexpected ioport=0x%x\n", __func__, io_port);
     }
     return;
 cancel_async:
@@ -1694,14 +1718,20 @@ static DisplayChangeListener display_listener = {
     .dpy_refresh = display_refresh,
 };
 
-static void qxl_init_ramsize(PCIQXLDevice *qxl, uint32_t ram_min_mb)
+static void qxl_init_ramsize(PCIQXLDevice *qxl)
 {
-    /* vga ram (bar 0) */
+    /* vga mode framebuffer / primary surface (bar 0, first part) */
+    if (qxl->vgamem_size_mb < 8) {
+        qxl->vgamem_size_mb = 8;
+    }
+    qxl->vgamem_size = qxl->vgamem_size_mb * 1024 * 1024;
+
+    /* vga ram (bar 0, total) */
     if (qxl->ram_size_mb != -1) {
         qxl->vga.vram_size = qxl->ram_size_mb * 1024 * 1024;
     }
-    if (qxl->vga.vram_size < ram_min_mb * 1024 * 1024) {
-        qxl->vga.vram_size = ram_min_mb * 1024 * 1024;
+    if (qxl->vga.vram_size < qxl->vgamem_size * 2) {
+        qxl->vga.vram_size = qxl->vgamem_size * 2;
     }
 
     /* vram32 (surfaces, 32bit, bar 1) */
@@ -1724,6 +1754,7 @@ static void qxl_init_ramsize(PCIQXLDevice *qxl, uint32_t ram_min_mb)
         qxl->vram32_size = 4096;
         qxl->vram_size = 4096;
     }
+    qxl->vgamem_size = msb_mask(qxl->vgamem_size * 2 - 1);
     qxl->vga.vram_size = msb_mask(qxl->vga.vram_size * 2 - 1);
     qxl->vram32_size = msb_mask(qxl->vram32_size * 2 - 1);
     qxl->vram_size = msb_mask(qxl->vram_size * 2 - 1);
@@ -1742,6 +1773,7 @@ static int qxl_init_common(PCIQXLDevice *qxl)
     qemu_mutex_init(&qxl->track_lock);
     qemu_mutex_init(&qxl->async_lock);
     qxl->current_async = QXL_UNDEFINED_IO;
+    qxl->guest_bug = 0;
 
     switch (qxl->revision) {
     case 1: /* spice 0.4 -- qxl-1 */
@@ -1834,8 +1866,9 @@ static int qxl_init_primary(PCIDevice *dev)
     PortioList *qxl_vga_port_list = g_new(PortioList, 1);
 
     qxl->id = 0;
-    qxl_init_ramsize(qxl, 32);
-    vga_common_init(vga, qxl->vga.vram_size);
+    qxl_init_ramsize(qxl);
+    vga->vram_size_mb = qxl->vga.vram_size >> 20;
+    vga_common_init(vga);
     vga_init(vga, pci_address_space(dev), pci_address_space_io(dev), false);
     portio_list_init(qxl_vga_port_list, qxl_vga_portio_list, vga, "vga");
     portio_list_add(qxl_vga_port_list, pci_address_space_io(dev), 0x3b0);
@@ -1856,7 +1889,7 @@ static int qxl_init_secondary(PCIDevice *dev)
     PCIQXLDevice *qxl = DO_UPCAST(PCIQXLDevice, pci, dev);
 
     qxl->id = device_id++;
-    qxl_init_ramsize(qxl, 16);
+    qxl_init_ramsize(qxl);
     memory_region_init_ram(&qxl->vga.vram, "qxl.vgavram", qxl->vga.vram_size);
     vmstate_register_ram(&qxl->vga.vram, &qxl->pci.qdev);
     qxl->vga.vram_ptr = memory_region_get_ram_ptr(&qxl->vga.vram);
@@ -2034,6 +2067,7 @@ static Property qxl_properties[] = {
         DEFINE_PROP_UINT32("ram_size_mb",  PCIQXLDevice, ram_size_mb, -1),
         DEFINE_PROP_UINT32("vram_size_mb", PCIQXLDevice, vram32_size_mb, -1),
         DEFINE_PROP_UINT32("vram64_size_mb", PCIQXLDevice, vram_size_mb, -1),
+        DEFINE_PROP_UINT32("vgamem_mb", PCIQXLDevice, vgamem_size_mb, 16),
         DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/qxl.h b/hw/qxl.h
index 31029503fe..172baf6cc2 100644
--- a/hw/qxl.h
+++ b/hw/qxl.h
@@ -31,6 +31,9 @@ typedef struct PCIQXLDevice {
     uint32_t           debug;
     uint32_t           guestdebug;
     uint32_t           cmdlog;
+
+    uint32_t           guest_bug;
+
     enum qxl_mode      mode;
     uint32_t           cmdflags;
     int                generation;
@@ -81,6 +84,7 @@ typedef struct PCIQXLDevice {
     QXLReleaseInfo     *last_release;
     uint32_t           last_release_offset;
     uint32_t           oom_running;
+    uint32_t           vgamem_size;
 
     /* rom pci bar */
     QXLRom             shadow_rom;
@@ -102,6 +106,7 @@ typedef struct PCIQXLDevice {
     uint32_t          ram_size_mb;
     uint32_t          vram_size_mb;
     uint32_t          vram32_size_mb;
+    uint32_t          vgamem_size_mb;
 
     /* qxl_render_update state */
     int                render_update_cookie_num;
@@ -127,7 +132,8 @@ typedef struct PCIQXLDevice {
 
 /* qxl.c */
 void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id);
-void qxl_guest_bug(PCIQXLDevice *qxl, const char *msg, ...) GCC_FMT_ATTR(2, 3);
+void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...)
+    GCC_FMT_ATTR(2, 3);
 
 void qxl_spice_update_area(PCIQXLDevice *qxl, uint32_t surface_id,
                            struct QXLRect *area, struct QXLRect *dirty_rects,
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index f6f144b525..844f1b8c3f 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -781,7 +781,14 @@ static inline dma_addr_t rtl8139_addr64(uint32_t low, uint32_t high)
 #endif
 }
 
-static int rtl8139_can_receive(VLANClientState *nc)
+/* Workaround for buggy guest driver such as linux who allocates rx
+ * rings after the receiver were enabled. */
+static bool rtl8139_cp_rx_valid(RTL8139State *s)
+{
+    return !(s->RxRingAddrLO == 0 && s->RxRingAddrHI == 0);
+}
+
+static int rtl8139_can_receive(NetClientState *nc)
 {
     RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
     int avail;
@@ -791,11 +798,8 @@ static int rtl8139_can_receive(VLANClientState *nc)
       return 1;
     if (!rtl8139_receiver_enabled(s))
       return 1;
-    /* network/host communication happens only in normal mode */
-    if ((s->Cfg9346 & Chip9346_op_mask) != Cfg9346_Normal)
-	return 0;
 
-    if (rtl8139_cp_receiver_enabled(s)) {
+    if (rtl8139_cp_receiver_enabled(s) && rtl8139_cp_rx_valid(s)) {
         /* ??? Flow control not implemented in c+ mode.
            This is a hack to work around slirp deficiencies anyway.  */
         return 1;
@@ -806,7 +810,7 @@ static int rtl8139_can_receive(VLANClientState *nc)
     }
 }
 
-static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_t size_, int do_interrupt)
+static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t size_, int do_interrupt)
 {
     RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
     /* size is the length of the buffer passed to the driver */
@@ -836,12 +840,6 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
         return -1;
     }
 
-    /* check whether we are in normal mode */
-    if ((s->Cfg9346 & Chip9346_op_mask) != Cfg9346_Normal) {
-        DPRINTF("not in normal op mode\n");
-        return -1;
-    }
-
     /* XXX: check this */
     if (s->RxConfig & AcceptAllPhys) {
         /* promiscuous: receive all */
@@ -946,6 +944,10 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
 
     if (rtl8139_cp_receiver_enabled(s))
     {
+        if (!rtl8139_cp_rx_valid(s)) {
+            return size;
+        }
+
         DPRINTF("in C+ Rx mode ================\n");
 
         /* begin C+ receiver mode */
@@ -1185,7 +1187,7 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
     return size_;
 }
 
-static ssize_t rtl8139_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t rtl8139_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     return rtl8139_do_receive(nc, buf, size, 1);
 }
@@ -1783,7 +1785,7 @@ static void rtl8139_transfer_frame(RTL8139State *s, uint8_t *buf, int size,
         if (iov) {
             buf2_size = iov_size(iov, 3);
             buf2 = g_malloc(buf2_size);
-            iov_to_buf(iov, 3, buf2, 0, buf2_size);
+            iov_to_buf(iov, 3, 0, buf2, buf2_size);
             buf = buf2;
         }
 
@@ -3429,14 +3431,14 @@ static void rtl8139_timer(void *opaque)
     rtl8139_set_next_tctr_time(s, qemu_get_clock_ns(vm_clock));
 }
 
-static void rtl8139_cleanup(VLANClientState *nc)
+static void rtl8139_cleanup(NetClientState *nc)
 {
     RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
     s->nic = NULL;
 }
 
-static int pci_rtl8139_uninit(PCIDevice *dev)
+static void pci_rtl8139_uninit(PCIDevice *dev)
 {
     RTL8139State *s = DO_UPCAST(RTL8139State, dev, dev);
 
@@ -3448,12 +3450,11 @@ static int pci_rtl8139_uninit(PCIDevice *dev)
     }
     qemu_del_timer(s->timer);
     qemu_free_timer(s->timer);
-    qemu_del_vlan_client(&s->nic->nc);
-    return 0;
+    qemu_del_net_client(&s->nic->nc);
 }
 
 static NetClientInfo net_rtl8139_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = rtl8139_can_receive,
     .receive = rtl8139_receive,
diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index 4d49b96f94..a245684692 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -402,6 +402,7 @@ static TypeInfo s390_virtio_net = {
 
 static Property s390_virtio_blk_properties[] = {
     DEFINE_BLOCK_PROPERTIES(VirtIOS390Device, blk.conf),
+    DEFINE_BLOCK_CHS_PROPERTIES(VirtIOS390Device, blk.conf),
     DEFINE_PROP_STRING("serial", VirtIOS390Device, blk.serial),
 #ifdef __linux__
     DEFINE_PROP_BIT("scsi", VirtIOS390Device, blk.scsi, 0, true),
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 14e2f730b8..b8a857d145 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -186,6 +186,10 @@ static int scsi_qdev_init(DeviceState *qdev)
                                                          dev);
     }
 
+    if (bus->info->hotplug) {
+        bus->info->hotplug(bus, dev);
+    }
+
 err:
     return rc;
 }
@@ -729,25 +733,87 @@ static int scsi_get_performance_length(int num_desc, int type, int data_type)
     }
 }
 
+static int ata_passthrough_xfer_unit(SCSIDevice *dev, uint8_t *buf)
+{
+    int byte_block = (buf[2] >> 2) & 0x1;
+    int type = (buf[2] >> 4) & 0x1;
+    int xfer_unit;
+
+    if (byte_block) {
+        if (type) {
+            xfer_unit = dev->blocksize;
+        } else {
+            xfer_unit = 512;
+        }
+    } else {
+        xfer_unit = 1;
+    }
+
+    return xfer_unit;
+}
+
+static int ata_passthrough_12_xfer_size(SCSIDevice *dev, uint8_t *buf)
+{
+    int length = buf[2] & 0x3;
+    int xfer;
+    int unit = ata_passthrough_xfer_unit(dev, buf);
+
+    switch (length) {
+    case 0:
+    case 3: /* USB-specific.  */
+        xfer = 0;
+        break;
+    case 1:
+        xfer = buf[3];
+        break;
+    case 2:
+        xfer = buf[4];
+        break;
+    }
+
+    return xfer * unit;
+}
+
+static int ata_passthrough_16_xfer_size(SCSIDevice *dev, uint8_t *buf)
+{
+    int extend = buf[1] & 0x1;
+    int length = buf[2] & 0x3;
+    int xfer;
+    int unit = ata_passthrough_xfer_unit(dev, buf);
+
+    switch (length) {
+    case 0:
+    case 3: /* USB-specific.  */
+        xfer = 0;
+        break;
+    case 1:
+        xfer = buf[4];
+        xfer |= (extend ? buf[3] << 8 : 0);
+        break;
+    case 2:
+        xfer = buf[6];
+        xfer |= (extend ? buf[5] << 8 : 0);
+        break;
+    }
+
+    return xfer * unit;
+}
+
 static int scsi_req_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
 {
     switch (buf[0] >> 5) {
     case 0:
         cmd->xfer = buf[4];
-        cmd->len = 6;
         break;
     case 1:
     case 2:
         cmd->xfer = lduw_be_p(&buf[7]);
-        cmd->len = 10;
         break;
     case 4:
         cmd->xfer = ldl_be_p(&buf[10]) & 0xffffffffULL;
-        cmd->len = 16;
         break;
     case 5:
         cmd->xfer = ldl_be_p(&buf[6]) & 0xffffffffULL;
-        cmd->len = 12;
         break;
     default:
         return -1;
@@ -771,11 +837,9 @@ static int scsi_req_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
     case SYNCHRONIZE_CACHE_16:
     case LOCATE_16:
     case LOCK_UNLOCK_CACHE:
-    case LOAD_UNLOAD:
     case SET_CD_SPEED:
     case SET_LIMITS:
     case WRITE_LONG_10:
-    case MOVE_MEDIUM:
     case UPDATE_BLOCK:
     case RESERVE_TRACK:
     case SET_READ_AHEAD:
@@ -869,6 +933,17 @@ static int scsi_req_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
             cmd->xfer = buf[9] | (buf[8] << 8);
         }
         break;
+    case ATA_PASSTHROUGH_12:
+        if (dev->type == TYPE_ROM) {
+            /* BLANK command of MMC */
+            cmd->xfer = 0;
+        } else {
+            cmd->xfer = ata_passthrough_12_xfer_size(dev, buf);
+        }
+        break;
+    case ATA_PASSTHROUGH_16:
+        cmd->xfer = ata_passthrough_16_xfer_size(dev, buf);
+        break;
     }
     return 0;
 }
@@ -885,7 +960,6 @@ static int scsi_req_stream_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *bu
     case READ_REVERSE:
     case RECOVER_BUFFERED_DATA:
     case WRITE_6:
-        cmd->len = 6;
         cmd->xfer = buf[4] | (buf[3] << 8) | (buf[2] << 16);
         if (buf[1] & 0x01) { /* fixed */
             cmd->xfer *= dev->blocksize;
@@ -895,22 +969,34 @@ static int scsi_req_stream_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *bu
     case READ_REVERSE_16:
     case VERIFY_16:
     case WRITE_16:
-        cmd->len = 16;
         cmd->xfer = buf[14] | (buf[13] << 8) | (buf[12] << 16);
         if (buf[1] & 0x01) { /* fixed */
             cmd->xfer *= dev->blocksize;
         }
         break;
     case REWIND:
-    case START_STOP:
-        cmd->len = 6;
+    case LOAD_UNLOAD:
         cmd->xfer = 0;
         break;
     case SPACE_16:
         cmd->xfer = buf[13] | (buf[12] << 8);
         break;
     case READ_POSITION:
-        cmd->xfer = buf[8] | (buf[7] << 8);
+        switch (buf[1] & 0x1f) /* operation code */ {
+        case SHORT_FORM_BLOCK_ID:
+        case SHORT_FORM_VENDOR_SPECIFIC:
+            cmd->xfer = 20;
+            break;
+        case LONG_FORM:
+            cmd->xfer = 32;
+            break;
+        case EXTENDED_FORM:
+            cmd->xfer = buf[8] | (buf[7] << 8);
+            break;
+        default:
+            return -1;
+        }
+
         break;
     case FORMAT_UNIT:
         cmd->xfer = buf[4] | (buf[3] << 8);
@@ -922,6 +1008,29 @@ static int scsi_req_stream_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *bu
     return 0;
 }
 
+static int scsi_req_medium_changer_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
+{
+    switch (buf[0]) {
+    /* medium changer commands */
+    case EXCHANGE_MEDIUM:
+    case INITIALIZE_ELEMENT_STATUS:
+    case INITIALIZE_ELEMENT_STATUS_WITH_RANGE:
+    case MOVE_MEDIUM:
+    case POSITION_TO_ELEMENT:
+        cmd->xfer = 0;
+        break;
+    case READ_ELEMENT_STATUS:
+        cmd->xfer = buf[9] | (buf[8] << 8) | (buf[7] << 16);
+        break;
+
+    /* generic commands */
+    default:
+        return scsi_req_length(cmd, dev, buf);
+    }
+    return 0;
+}
+
+
 static void scsi_cmd_xfer_mode(SCSICommand *cmd)
 {
     if (!cmd->xfer) {
@@ -964,9 +1073,14 @@ static void scsi_cmd_xfer_mode(SCSICommand *cmd)
     case SEND_DVD_STRUCTURE:
     case PERSISTENT_RESERVE_OUT:
     case MAINTENANCE_OUT:
-    case ATA_PASSTHROUGH:
         cmd->mode = SCSI_XFER_TO_DEV;
         break;
+    case ATA_PASSTHROUGH_12:
+    case ATA_PASSTHROUGH_16:
+        /* T_DIR */
+        cmd->mode = (cmd->buf[2] & 0x8) ?
+                   SCSI_XFER_FROM_DEV : SCSI_XFER_TO_DEV;
+        break;
     default:
         cmd->mode = SCSI_XFER_FROM_DEV;
         break;
@@ -1001,11 +1115,36 @@ int scsi_req_parse(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
 {
     int rc;
 
-    if (dev->type == TYPE_TAPE) {
+    switch (buf[0] >> 5) {
+    case 0:
+        cmd->len = 6;
+        break;
+    case 1:
+    case 2:
+        cmd->len = 10;
+        break;
+    case 4:
+        cmd->len = 16;
+        break;
+    case 5:
+        cmd->len = 12;
+        break;
+    default:
+        return -1;
+    }
+
+    switch (dev->type) {
+    case TYPE_TAPE:
         rc = scsi_req_stream_length(cmd, dev, buf);
-    } else {
+        break;
+    case TYPE_MEDIUM_CHANGER:
+        rc = scsi_req_medium_changer_length(cmd, dev, buf);
+        break;
+    default:
         rc = scsi_req_length(cmd, dev, buf);
+        break;
     }
+
     if (rc != 0)
         return rc;
 
@@ -1015,6 +1154,16 @@ int scsi_req_parse(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
     return 0;
 }
 
+void scsi_device_report_change(SCSIDevice *dev, SCSISense sense)
+{
+    SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, dev->qdev.parent_bus);
+
+    scsi_device_set_ua(dev, sense);
+    if (bus->info->change) {
+        bus->info->change(bus, dev, sense);
+    }
+}
+
 /*
  * Predefined sense codes
  */
@@ -1036,7 +1185,7 @@ const struct SCSISense sense_code_NO_MEDIUM = {
 
 /* LUN not ready, medium removal prevented */
 const struct SCSISense sense_code_NOT_READY_REMOVAL_PREVENTED = {
-    .key = NOT_READY, .asc = 0x53, .ascq = 0x00
+    .key = NOT_READY, .asc = 0x53, .ascq = 0x02
 };
 
 /* Hardware error, internal target failure */
@@ -1059,6 +1208,16 @@ const struct SCSISense sense_code_INVALID_FIELD = {
     .key = ILLEGAL_REQUEST, .asc = 0x24, .ascq = 0x00
 };
 
+/* Illegal request, Invalid field in parameter list */
+const struct SCSISense sense_code_INVALID_PARAM = {
+    .key = ILLEGAL_REQUEST, .asc = 0x26, .ascq = 0x00
+};
+
+/* Illegal request, Parameter list length error */
+const struct SCSISense sense_code_INVALID_PARAM_LEN = {
+    .key = ILLEGAL_REQUEST, .asc = 0x1a, .ascq = 0x00
+};
+
 /* Illegal request, LUN not supported */
 const struct SCSISense sense_code_LUN_NOT_SUPPORTED = {
     .key = ILLEGAL_REQUEST, .asc = 0x25, .ascq = 0x00
@@ -1076,7 +1235,7 @@ const struct SCSISense sense_code_INCOMPATIBLE_FORMAT = {
 
 /* Illegal request, medium removal prevented */
 const struct SCSISense sense_code_ILLEGAL_REQ_REMOVAL_PREVENTED = {
-    .key = ILLEGAL_REQUEST, .asc = 0x53, .ascq = 0x00
+    .key = ILLEGAL_REQUEST, .asc = 0x53, .ascq = 0x02
 };
 
 /* Command aborted, I/O process terminated */
@@ -1094,6 +1253,11 @@ const struct SCSISense sense_code_LUN_FAILURE = {
     .key = ABORTED_COMMAND, .asc = 0x3e, .ascq = 0x01
 };
 
+/* Unit attention, Capacity data has changed */
+const struct SCSISense sense_code_CAPACITY_CHANGED = {
+    .key = UNIT_ATTENTION, .asc = 0x2a, .ascq = 0x09
+};
+
 /* Unit attention, Power on, reset or bus device reset occurred */
 const struct SCSISense sense_code_RESET = {
     .key = UNIT_ATTENTION, .asc = 0x29, .ascq = 0x00
@@ -1119,6 +1283,11 @@ const struct SCSISense sense_code_DEVICE_INTERNAL_RESET = {
     .key = UNIT_ATTENTION, .asc = 0x29, .ascq = 0x04
 };
 
+/* Data Protection, Write Protected */
+const struct SCSISense sense_code_WRITE_PROTECTED = {
+    .key = DATA_PROTECT, .asc = 0x27, .ascq = 0x00
+};
+
 /*
  * scsi_build_sense
  *
@@ -1183,7 +1352,8 @@ static const char *scsi_command_name(uint8_t cmd)
         [ REQUEST_SENSE            ] = "REQUEST_SENSE",
         [ FORMAT_UNIT              ] = "FORMAT_UNIT",
         [ READ_BLOCK_LIMITS        ] = "READ_BLOCK_LIMITS",
-        [ REASSIGN_BLOCKS          ] = "REASSIGN_BLOCKS",
+        [ REASSIGN_BLOCKS          ] = "REASSIGN_BLOCKS/INITIALIZE ELEMENT STATUS",
+        /* LOAD_UNLOAD and INITIALIZE_ELEMENT_STATUS use the same operation code */
         [ READ_6                   ] = "READ_6",
         [ WRITE_6                  ] = "WRITE_6",
         [ SET_CAPACITY             ] = "SET_CAPACITY",
@@ -1200,14 +1370,16 @@ static const char *scsi_command_name(uint8_t cmd)
         [ COPY                     ] = "COPY",
         [ ERASE                    ] = "ERASE",
         [ MODE_SENSE               ] = "MODE_SENSE",
-        [ START_STOP               ] = "START_STOP",
+        [ START_STOP               ] = "START_STOP/LOAD_UNLOAD",
+        /* LOAD_UNLOAD and START_STOP use the same operation code */
         [ RECEIVE_DIAGNOSTIC       ] = "RECEIVE_DIAGNOSTIC",
         [ SEND_DIAGNOSTIC          ] = "SEND_DIAGNOSTIC",
         [ ALLOW_MEDIUM_REMOVAL     ] = "ALLOW_MEDIUM_REMOVAL",
         [ READ_CAPACITY_10         ] = "READ_CAPACITY_10",
         [ READ_10                  ] = "READ_10",
         [ WRITE_10                 ] = "WRITE_10",
-        [ SEEK_10                  ] = "SEEK_10",
+        [ SEEK_10                  ] = "SEEK_10/POSITION_TO_ELEMENT",
+        /* SEEK_10 and POSITION_TO_ELEMENT use the same operation code */
         [ WRITE_VERIFY_10          ] = "WRITE_VERIFY_10",
         [ VERIFY_10                ] = "VERIFY_10",
         [ SEARCH_HIGH              ] = "SEARCH_HIGH",
@@ -1218,7 +1390,8 @@ static const char *scsi_command_name(uint8_t cmd)
         /* READ_POSITION and PRE_FETCH use the same operation code */
         [ SYNCHRONIZE_CACHE        ] = "SYNCHRONIZE_CACHE",
         [ LOCK_UNLOCK_CACHE        ] = "LOCK_UNLOCK_CACHE",
-        [ READ_DEFECT_DATA         ] = "READ_DEFECT_DATA",
+        [ READ_DEFECT_DATA         ] = "READ_DEFECT_DATA/INITIALIZE_ELEMENT_STATUS_WITH_RANGE",
+        /* READ_DEFECT_DATA and INITIALIZE_ELEMENT_STATUS_WITH_RANGE use the same operation code */
         [ MEDIUM_SCAN              ] = "MEDIUM_SCAN",
         [ COMPARE                  ] = "COMPARE",
         [ COPY_VERIFY              ] = "COPY_VERIFY",
@@ -1244,7 +1417,7 @@ static const char *scsi_command_name(uint8_t cmd)
         [ PERSISTENT_RESERVE_OUT   ] = "PERSISTENT_RESERVE_OUT",
         [ WRITE_FILEMARKS_16       ] = "WRITE_FILEMARKS_16",
         [ EXTENDED_COPY            ] = "EXTENDED_COPY",
-        [ ATA_PASSTHROUGH          ] = "ATA_PASSTHROUGH",
+        [ ATA_PASSTHROUGH_16       ] = "ATA_PASSTHROUGH_16",
         [ ACCESS_CONTROL_IN        ] = "ACCESS_CONTROL_IN",
         [ ACCESS_CONTROL_OUT       ] = "ACCESS_CONTROL_OUT",
         [ READ_16                  ] = "READ_16",
@@ -1261,9 +1434,9 @@ static const char *scsi_command_name(uint8_t cmd)
         [ SERVICE_ACTION_IN_16     ] = "SERVICE_ACTION_IN_16",
         [ WRITE_LONG_16            ] = "WRITE_LONG_16",
         [ REPORT_LUNS              ] = "REPORT_LUNS",
-        [ BLANK                    ] = "BLANK",
+        [ ATA_PASSTHROUGH_12       ] = "BLANK/ATA_PASSTHROUGH_12",
         [ MOVE_MEDIUM              ] = "MOVE_MEDIUM",
-        [ LOAD_UNLOAD              ] = "LOAD_UNLOAD",
+        [ EXCHANGE_MEDIUM          ] = "EXCHANGE MEDIUM",
         [ READ_12                  ] = "READ_12",
         [ WRITE_12                 ] = "WRITE_12",
         [ ERASE_12                 ] = "ERASE_12/GET_PERFORMANCE",
@@ -1296,6 +1469,7 @@ static const char *scsi_command_name(uint8_t cmd)
 
 SCSIRequest *scsi_req_ref(SCSIRequest *req)
 {
+    assert(req->refcount > 0);
     req->refcount++;
     return req;
 }
@@ -1304,6 +1478,10 @@ void scsi_req_unref(SCSIRequest *req)
 {
     assert(req->refcount > 0);
     if (--req->refcount == 0) {
+        SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, req->dev->qdev.parent_bus);
+        if (bus->info->free_request && req->hba_private) {
+            bus->info->free_request(bus, req->hba_private);
+        }
         if (req->ops->free_req) {
             req->ops->free_req(req);
         }
@@ -1389,7 +1567,7 @@ void scsi_req_complete(SCSIRequest *req, int status)
     assert(req->status == -1);
     req->status = status;
 
-    assert(req->sense_len < sizeof(req->sense));
+    assert(req->sense_len <= sizeof(req->sense));
     if (status == GOOD) {
         req->sense_len = 0;
     }
@@ -1418,6 +1596,7 @@ void scsi_req_complete(SCSIRequest *req, int status)
 
 void scsi_req_cancel(SCSIRequest *req)
 {
+    trace_scsi_req_cancel(req->dev->id, req->lun, req->tag);
     if (!req->enqueued) {
         return;
     }
@@ -1448,6 +1627,55 @@ void scsi_req_abort(SCSIRequest *req, int status)
     scsi_req_unref(req);
 }
 
+static int scsi_ua_precedence(SCSISense sense)
+{
+    if (sense.key != UNIT_ATTENTION) {
+        return INT_MAX;
+    }
+    if (sense.asc == 0x29 && sense.ascq == 0x04) {
+        /* DEVICE INTERNAL RESET goes with POWER ON OCCURRED */
+        return 1;
+    } else if (sense.asc == 0x3F && sense.ascq == 0x01) {
+        /* MICROCODE HAS BEEN CHANGED goes with SCSI BUS RESET OCCURRED */
+        return 2;
+    } else if (sense.asc == 0x29 && (sense.ascq == 0x05 || sense.ascq == 0x06)) {
+        /* These two go with "all others". */
+        ;
+    } else if (sense.asc == 0x29 && sense.ascq <= 0x07) {
+        /* POWER ON, RESET OR BUS DEVICE RESET OCCURRED = 0
+         * POWER ON OCCURRED = 1
+         * SCSI BUS RESET OCCURRED = 2
+         * BUS DEVICE RESET FUNCTION OCCURRED = 3
+         * I_T NEXUS LOSS OCCURRED = 7
+         */
+        return sense.ascq;
+    } else if (sense.asc == 0x2F && sense.ascq == 0x01) {
+        /* COMMANDS CLEARED BY POWER LOSS NOTIFICATION  */
+        return 8;
+    }
+    return (sense.asc << 8) | sense.ascq;
+}
+
+void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense)
+{
+    int prec1, prec2;
+    if (sense.key != UNIT_ATTENTION) {
+        return;
+    }
+    trace_scsi_device_set_ua(sdev->id, sdev->lun, sense.key,
+                             sense.asc, sense.ascq);
+
+    /*
+     * Override a pre-existing unit attention condition, except for a more
+     * important reset condition.
+    */
+    prec1 = scsi_ua_precedence(sdev->unit_attention);
+    prec2 = scsi_ua_precedence(sense);
+    if (prec2 < prec1) {
+        sdev->unit_attention = sense;
+    }
+}
+
 void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
 {
     SCSIRequest *req;
@@ -1456,7 +1684,8 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
         req = QTAILQ_FIRST(&sdev->requests);
         scsi_req_cancel(req);
     }
-    sdev->unit_attention = sense;
+
+    scsi_device_set_ua(sdev, sense);
 }
 
 static char *scsibus_get_dev_path(DeviceState *dev)
@@ -1571,6 +1800,17 @@ static int get_scsi_requests(QEMUFile *f, void *pv, size_t size)
     return 0;
 }
 
+static int scsi_qdev_unplug(DeviceState *qdev)
+{
+    SCSIDevice *dev = SCSI_DEVICE(qdev);
+    SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, dev->qdev.parent_bus);
+
+    if (bus->info->hot_unplug) {
+        bus->info->hot_unplug(bus, dev);
+    }
+    return qdev_simple_unplug_cb(qdev);
+}
+
 static const VMStateInfo vmstate_info_scsi_requests = {
     .name = "scsi-requests",
     .get  = get_scsi_requests,
@@ -1607,7 +1847,7 @@ static void scsi_device_class_init(ObjectClass *klass, void *data)
     DeviceClass *k = DEVICE_CLASS(klass);
     k->bus_type = TYPE_SCSI_BUS;
     k->init     = scsi_qdev_init;
-    k->unplug   = qdev_simple_unplug_cb;
+    k->unplug   = scsi_qdev_unplug;
     k->exit     = scsi_qdev_exit;
     k->props    = scsi_props;
 }
diff --git a/hw/scsi-defs.h b/hw/scsi-defs.h
index 219c84dfb1..d7a401912b 100644
--- a/hw/scsi-defs.h
+++ b/hw/scsi-defs.h
@@ -29,6 +29,7 @@
 #define REQUEST_SENSE         0x03
 #define FORMAT_UNIT           0x04
 #define READ_BLOCK_LIMITS     0x05
+#define INITIALIZE_ELEMENT_STATUS 0x07
 #define REASSIGN_BLOCKS       0x07
 #define READ_6                0x08
 #define WRITE_6               0x0a
@@ -44,6 +45,7 @@
 #define COPY                  0x18
 #define ERASE                 0x19
 #define MODE_SENSE            0x1a
+#define LOAD_UNLOAD           0x1b
 #define START_STOP            0x1b
 #define RECEIVE_DIAGNOSTIC    0x1c
 #define SEND_DIAGNOSTIC       0x1d
@@ -53,6 +55,7 @@
 #define WRITE_10              0x2a
 #define SEEK_10               0x2b
 #define LOCATE_10             0x2b
+#define POSITION_TO_ELEMENT   0x2b
 #define WRITE_VERIFY_10       0x2e
 #define VERIFY_10             0x2f
 #define SEARCH_HIGH           0x30
@@ -63,6 +66,7 @@
 #define READ_POSITION         0x34
 #define SYNCHRONIZE_CACHE     0x35
 #define LOCK_UNLOCK_CACHE     0x36
+#define INITIALIZE_ELEMENT_STATUS_WITH_RANGE 0x37
 #define READ_DEFECT_DATA      0x37
 #define MEDIUM_SCAN           0x38
 #define COMPARE               0x39
@@ -82,6 +86,7 @@
 #define GET_EVENT_STATUS_NOTIFICATION 0x4a
 #define LOG_SELECT            0x4c
 #define LOG_SENSE             0x4d
+#define READ_DISC_INFORMATION 0x51
 #define RESERVE_TRACK         0x53
 #define MODE_SELECT_10        0x55
 #define RESERVE_10            0x56
@@ -95,7 +100,7 @@
 #define READ_REVERSE_16       0x81
 #define ALLOW_OVERWRITE       0x82
 #define EXTENDED_COPY         0x83
-#define ATA_PASSTHROUGH       0x85
+#define ATA_PASSTHROUGH_16    0x85
 #define ACCESS_CONTROL_IN     0x86
 #define ACCESS_CONTROL_OUT    0x87
 #define READ_16               0x88
@@ -112,11 +117,11 @@
 #define SERVICE_ACTION_IN_16  0x9e
 #define WRITE_LONG_16         0x9f
 #define REPORT_LUNS           0xa0
-#define BLANK                 0xa1
+#define ATA_PASSTHROUGH_12    0xa1
 #define MAINTENANCE_IN        0xa3
 #define MAINTENANCE_OUT       0xa4
 #define MOVE_MEDIUM           0xa5
-#define LOAD_UNLOAD           0xa6
+#define EXCHANGE_MEDIUM       0xa6
 #define SET_READ_AHEAD        0xa7
 #define READ_12               0xa8
 #define WRITE_12              0xaa
@@ -142,6 +147,14 @@
 #define SAI_READ_CAPACITY_16  0x10
 
 /*
+ * READ POSITION service action codes
+ */
+#define SHORT_FORM_BLOCK_ID  0x00
+#define SHORT_FORM_VENDOR_SPECIFIC 0x01
+#define LONG_FORM            0x06
+#define EXTENDED_FORM        0x08
+
+/*
  *  SAM Status codes
  */
 
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index ae2519458c..c8d5edd86e 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -34,6 +34,7 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
 #include "scsi-defs.h"
 #include "sysemu.h"
 #include "blockdev.h"
+#include "hw/block-common.h"
 #include "dma.h"
 
 #ifdef __linux
@@ -42,6 +43,7 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
 
 #define SCSI_DMA_BUF_SIZE    131072
 #define SCSI_MAX_INQUIRY_LEN 256
+#define SCSI_MAX_MODE_LEN    256
 
 typedef struct SCSIDiskState SCSIDiskState;
 
@@ -67,9 +69,12 @@ struct SCSIDiskState
     bool media_changed;
     bool media_event;
     bool eject_request;
+    uint64_t wwn;
     QEMUBH *bh;
     char *version;
     char *serial;
+    char *vendor;
+    char *product;
     bool tray_open;
     bool tray_locked;
 };
@@ -165,7 +170,7 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
 }
 
-static void scsi_flush_complete(void * opaque, int ret)
+static void scsi_aio_complete(void *opaque, int ret)
 {
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
@@ -218,7 +223,7 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
 
     if (scsi_is_cmd_fua(&r->req.cmd)) {
         bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH);
-        r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_flush_complete, r);
+        r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_aio_complete, r);
         return;
     }
 
@@ -339,13 +344,6 @@ static void scsi_read_data(SCSIRequest *req)
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     bool first;
 
-    if (r->sector_count == (uint32_t)-1) {
-        DPRINTF("Read buf_len=%zd\n", r->iov.iov_len);
-        r->sector_count = 0;
-        r->started = true;
-        scsi_req_data(&r->req, r->iov.iov_len);
-        return;
-    }
     DPRINTF("Read sector_count=%d\n", r->sector_count);
     if (r->sector_count == 0) {
         /* This also clears the sense buffer for REQUEST SENSE.  */
@@ -449,7 +447,7 @@ static void scsi_write_complete(void * opaque, int ret)
         return;
     } else {
         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
-        DPRINTF("Write complete tag=0x%x more=%d\n", r->req.tag, r->qiov.size);
+        DPRINTF("Write complete tag=0x%x more=%zd\n", r->req.tag, r->qiov.size);
         scsi_req_data(&r->req, r->qiov.size);
     }
 
@@ -522,6 +520,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
     int buflen = 0;
+    int start;
 
     if (req->cmd.buf[1] & 0x1) {
         /* Vital product data */
@@ -530,14 +529,14 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
         outbuf[buflen++] = s->qdev.type & 0x1f;
         outbuf[buflen++] = page_code ; // this page
         outbuf[buflen++] = 0x00;
+        outbuf[buflen++] = 0x00;
+        start = buflen;
 
         switch (page_code) {
         case 0x00: /* Supported page codes, mandatory */
         {
-            int pages;
             DPRINTF("Inquiry EVPD[Supported pages] "
                     "buffer size %zd\n", req->cmd.xfer);
-            pages = buflen++;
             outbuf[buflen++] = 0x00; // list of supported pages (this page)
             if (s->serial) {
                 outbuf[buflen++] = 0x80; // unit serial number
@@ -547,7 +546,6 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
                 outbuf[buflen++] = 0xb0; // block limits
                 outbuf[buflen++] = 0xb2; // thin provisioning
             }
-            outbuf[pages] = buflen - pages - 1; // number of pages
             break;
         }
         case 0x80: /* Device serial number, optional */
@@ -566,7 +564,6 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
 
             DPRINTF("Inquiry EVPD[Serial number] "
                     "buffer size %zd\n", req->cmd.xfer);
-            outbuf[buflen++] = l;
             memcpy(outbuf+buflen, s->serial, l);
             buflen += l;
             break;
@@ -584,14 +581,21 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             DPRINTF("Inquiry EVPD[Device identification] "
                     "buffer size %zd\n", req->cmd.xfer);
 
-            outbuf[buflen++] = 4 + id_len;
             outbuf[buflen++] = 0x2; // ASCII
             outbuf[buflen++] = 0;   // not officially assigned
             outbuf[buflen++] = 0;   // reserved
             outbuf[buflen++] = id_len; // length of data following
-
             memcpy(outbuf+buflen, str, id_len);
             buflen += id_len;
+
+            if (s->wwn) {
+                outbuf[buflen++] = 0x1; // Binary
+                outbuf[buflen++] = 0x3; // NAA
+                outbuf[buflen++] = 0;   // reserved
+                outbuf[buflen++] = 8;
+                stq_be_p(&outbuf[buflen], s->wwn);
+                buflen += 8;
+            }
             break;
         }
         case 0xb0: /* block limits */
@@ -609,8 +613,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
                 return -1;
             }
             /* required VPD size with unmap support */
-            outbuf[3] = buflen = 0x3c;
-
+            buflen = 0x40;
             memset(outbuf + 4, 0, buflen - 4);
 
             /* optimal transfer length granularity */
@@ -632,7 +635,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
         }
         case 0xb2: /* thin provisioning */
         {
-            outbuf[3] = buflen = 8;
+            buflen = 8;
             outbuf[4] = 0;
             outbuf[5] = 0x60; /* write_same 10/16 supported */
             outbuf[6] = s->qdev.conf.discard_granularity ? 2 : 1;
@@ -643,6 +646,8 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             return -1;
         }
         /* done with EVPD */
+        assert(buflen - start <= 255);
+        outbuf[start - 1] = buflen - start;
         return buflen;
     }
 
@@ -660,12 +665,10 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
 
     outbuf[0] = s->qdev.type & 0x1f;
     outbuf[1] = (s->features & (1 << SCSI_DISK_F_REMOVABLE)) ? 0x80 : 0;
-    if (s->qdev.type == TYPE_ROM) {
-        memcpy(&outbuf[16], "QEMU CD-ROM     ", 16);
-    } else {
-        memcpy(&outbuf[16], "QEMU HARDDISK   ", 16);
-    }
-    memcpy(&outbuf[8], "QEMU    ", 8);
+
+    strpadcpy((char *) &outbuf[16], 16, s->product, ' ');
+    strpadcpy((char *) &outbuf[8], 8, s->vendor, ' ');
+
     memset(&outbuf[32], 0, 4);
     memcpy(&outbuf[32], s->version, MIN(4, strlen(s->version)));
     /*
@@ -716,6 +719,39 @@ static inline bool media_is_cd(SCSIDiskState *s)
     return nb_sectors <= CD_MAX_SECTORS;
 }
 
+static int scsi_read_disc_information(SCSIDiskState *s, SCSIDiskReq *r,
+                                      uint8_t *outbuf)
+{
+    uint8_t type = r->req.cmd.buf[1] & 7;
+
+    if (s->qdev.type != TYPE_ROM) {
+        return -1;
+    }
+
+    /* Types 1/2 are only defined for Blu-Ray.  */
+    if (type != 0) {
+        scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
+        return -1;
+    }
+
+    memset(outbuf, 0, 34);
+    outbuf[1] = 32;
+    outbuf[2] = 0xe; /* last session complete, disc finalized */
+    outbuf[3] = 1;   /* first track on disc */
+    outbuf[4] = 1;   /* # of sessions */
+    outbuf[5] = 1;   /* first track of last session */
+    outbuf[6] = 1;   /* last track of last session */
+    outbuf[7] = 0x20; /* unrestricted use */
+    outbuf[8] = 0x00; /* CD-ROM or DVD-ROM */
+    /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
+    /* 12-23: not meaningful for CD-ROM or DVD-ROM */
+    /* 24-31: disc bar code */
+    /* 32: disc application code */
+    /* 33: number of OPC tables */
+
+    return 34;
+}
+
 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
                                    uint8_t *outbuf)
 {
@@ -925,152 +961,156 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
         [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
     };
 
-    BlockDriverState *bdrv = s->qdev.conf.bs;
-    int cylinders, heads, secs;
-    uint8_t *p = *p_outbuf;
+    uint8_t *p = *p_outbuf + 2;
+    int length;
 
     if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
         return -1;
     }
 
-    p[0] = page;
-
     /*
      * If Changeable Values are requested, a mask denoting those mode parameters
      * that are changeable shall be returned. As we currently don't support
      * parameter changes via MODE_SELECT all bits are returned set to zero.
      * The buffer was already menset to zero by the caller of this function.
+     *
+     * The offsets here are off by two compared to the descriptions in the
+     * SCSI specs, because those include a 2-byte header.  This is unfortunate,
+     * but it is done so that offsets are consistent within our implementation
+     * of MODE SENSE and MODE SELECT.  MODE SELECT has to deal with both
+     * 2-byte and 4-byte headers.
      */
     switch (page) {
     case MODE_PAGE_HD_GEOMETRY:
-        p[1] = 0x16;
+        length = 0x16;
         if (page_control == 1) { /* Changeable Values */
             break;
         }
         /* if a geometry hint is available, use it */
-        bdrv_guess_geometry(bdrv, &cylinders, &heads, &secs);
-        p[2] = (cylinders >> 16) & 0xff;
-        p[3] = (cylinders >> 8) & 0xff;
-        p[4] = cylinders & 0xff;
-        p[5] = heads & 0xff;
+        p[0] = (s->qdev.conf.cyls >> 16) & 0xff;
+        p[1] = (s->qdev.conf.cyls >> 8) & 0xff;
+        p[2] = s->qdev.conf.cyls & 0xff;
+        p[3] = s->qdev.conf.heads & 0xff;
         /* Write precomp start cylinder, disabled */
-        p[6] = (cylinders >> 16) & 0xff;
-        p[7] = (cylinders >> 8) & 0xff;
-        p[8] = cylinders & 0xff;
+        p[4] = (s->qdev.conf.cyls >> 16) & 0xff;
+        p[5] = (s->qdev.conf.cyls >> 8) & 0xff;
+        p[6] = s->qdev.conf.cyls & 0xff;
         /* Reduced current start cylinder, disabled */
-        p[9] = (cylinders >> 16) & 0xff;
-        p[10] = (cylinders >> 8) & 0xff;
-        p[11] = cylinders & 0xff;
+        p[7] = (s->qdev.conf.cyls >> 16) & 0xff;
+        p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
+        p[9] = s->qdev.conf.cyls & 0xff;
         /* Device step rate [ns], 200ns */
-        p[12] = 0;
-        p[13] = 200;
+        p[10] = 0;
+        p[11] = 200;
         /* Landing zone cylinder */
+        p[12] = 0xff;
+        p[13] =  0xff;
         p[14] = 0xff;
-        p[15] =  0xff;
-        p[16] = 0xff;
         /* Medium rotation rate [rpm], 5400 rpm */
-        p[20] = (5400 >> 8) & 0xff;
-        p[21] = 5400 & 0xff;
+        p[18] = (5400 >> 8) & 0xff;
+        p[19] = 5400 & 0xff;
         break;
 
     case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY:
-        p[1] = 0x1e;
+        length = 0x1e;
         if (page_control == 1) { /* Changeable Values */
             break;
         }
         /* Transfer rate [kbit/s], 5Mbit/s */
-        p[2] = 5000 >> 8;
-        p[3] = 5000 & 0xff;
+        p[0] = 5000 >> 8;
+        p[1] = 5000 & 0xff;
         /* if a geometry hint is available, use it */
-        bdrv_guess_geometry(bdrv, &cylinders, &heads, &secs);
-        p[4] = heads & 0xff;
-        p[5] = secs & 0xff;
-        p[6] = s->qdev.blocksize >> 8;
-        p[8] = (cylinders >> 8) & 0xff;
-        p[9] = cylinders & 0xff;
+        p[2] = s->qdev.conf.heads & 0xff;
+        p[3] = s->qdev.conf.secs & 0xff;
+        p[4] = s->qdev.blocksize >> 8;
+        p[6] = (s->qdev.conf.cyls >> 8) & 0xff;
+        p[7] = s->qdev.conf.cyls & 0xff;
         /* Write precomp start cylinder, disabled */
-        p[10] = (cylinders >> 8) & 0xff;
-        p[11] = cylinders & 0xff;
+        p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
+        p[9] = s->qdev.conf.cyls & 0xff;
         /* Reduced current start cylinder, disabled */
-        p[12] = (cylinders >> 8) & 0xff;
-        p[13] = cylinders & 0xff;
+        p[10] = (s->qdev.conf.cyls >> 8) & 0xff;
+        p[11] = s->qdev.conf.cyls & 0xff;
         /* Device step rate [100us], 100us */
-        p[14] = 0;
-        p[15] = 1;
+        p[12] = 0;
+        p[13] = 1;
         /* Device step pulse width [us], 1us */
-        p[16] = 1;
+        p[14] = 1;
         /* Device head settle delay [100us], 100us */
-        p[17] = 0;
-        p[18] = 1;
+        p[15] = 0;
+        p[16] = 1;
         /* Motor on delay [0.1s], 0.1s */
-        p[19] = 1;
+        p[17] = 1;
         /* Motor off delay [0.1s], 0.1s */
-        p[20] = 1;
+        p[18] = 1;
         /* Medium rotation rate [rpm], 5400 rpm */
-        p[28] = (5400 >> 8) & 0xff;
-        p[29] = 5400 & 0xff;
+        p[26] = (5400 >> 8) & 0xff;
+        p[27] = 5400 & 0xff;
         break;
 
     case MODE_PAGE_CACHING:
-        p[0] = 8;
-        p[1] = 0x12;
-        if (page_control == 1) { /* Changeable Values */
-            break;
-        }
-        if (bdrv_enable_write_cache(s->qdev.conf.bs)) {
-            p[2] = 4; /* WCE */
+        length = 0x12;
+        if (page_control == 1 || /* Changeable Values */
+            bdrv_enable_write_cache(s->qdev.conf.bs)) {
+            p[0] = 4; /* WCE */
         }
         break;
 
     case MODE_PAGE_R_W_ERROR:
-        p[1] = 10;
-        p[2] = 0x80; /* Automatic Write Reallocation Enabled */
+        length = 10;
+        if (page_control == 1) { /* Changeable Values */
+            break;
+        }
+        p[0] = 0x80; /* Automatic Write Reallocation Enabled */
         if (s->qdev.type == TYPE_ROM) {
-            p[3] = 0x20; /* Read Retry Count */
+            p[1] = 0x20; /* Read Retry Count */
         }
         break;
 
     case MODE_PAGE_AUDIO_CTL:
-        p[1] = 14;
+        length = 14;
         break;
 
     case MODE_PAGE_CAPABILITIES:
-        p[1] = 0x14;
+        length = 0x14;
         if (page_control == 1) { /* Changeable Values */
             break;
         }
 
-        p[2] = 0x3b; /* CD-R & CD-RW read */
-        p[3] = 0; /* Writing not supported */
-        p[4] = 0x7f; /* Audio, composite, digital out,
+        p[0] = 0x3b; /* CD-R & CD-RW read */
+        p[1] = 0; /* Writing not supported */
+        p[2] = 0x7f; /* Audio, composite, digital out,
                         mode 2 form 1&2, multi session */
-        p[5] = 0xff; /* CD DA, DA accurate, RW supported,
+        p[3] = 0xff; /* CD DA, DA accurate, RW supported,
                         RW corrected, C2 errors, ISRC,
                         UPC, Bar code */
-        p[6] = 0x2d | (s->tray_locked ? 2 : 0);
+        p[4] = 0x2d | (s->tray_locked ? 2 : 0);
         /* Locking supported, jumper present, eject, tray */
-        p[7] = 0; /* no volume & mute control, no
+        p[5] = 0; /* no volume & mute control, no
                      changer */
-        p[8] = (50 * 176) >> 8; /* 50x read speed */
-        p[9] = (50 * 176) & 0xff;
-        p[10] = 2 >> 8; /* Two volume levels */
-        p[11] = 2 & 0xff;
-        p[12] = 2048 >> 8; /* 2M buffer */
-        p[13] = 2048 & 0xff;
-        p[14] = (16 * 176) >> 8; /* 16x read speed current */
-        p[15] = (16 * 176) & 0xff;
-        p[18] = (16 * 176) >> 8; /* 16x write speed */
+        p[6] = (50 * 176) >> 8; /* 50x read speed */
+        p[7] = (50 * 176) & 0xff;
+        p[8] = 2 >> 8; /* Two volume levels */
+        p[9] = 2 & 0xff;
+        p[10] = 2048 >> 8; /* 2M buffer */
+        p[11] = 2048 & 0xff;
+        p[12] = (16 * 176) >> 8; /* 16x read speed current */
+        p[13] = (16 * 176) & 0xff;
+        p[16] = (16 * 176) >> 8; /* 16x write speed */
+        p[17] = (16 * 176) & 0xff;
+        p[18] = (16 * 176) >> 8; /* 16x write speed current */
         p[19] = (16 * 176) & 0xff;
-        p[20] = (16 * 176) >> 8; /* 16x write speed current */
-        p[21] = (16 * 176) & 0xff;
         break;
 
     default:
         return -1;
     }
 
-    *p_outbuf += p[1] + 2;
-    return p[1] + 2;
+    assert(length < 256);
+    (*p_outbuf)[0] = page;
+    (*p_outbuf)[1] = length;
+    *p_outbuf += length + 2;
+    return length + 2;
 }
 
 static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
@@ -1207,8 +1247,14 @@ static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
     bool start = req->cmd.buf[4] & 1;
     bool loej = req->cmd.buf[4] & 2; /* load on start, eject on !start */
+    int pwrcnd = req->cmd.buf[4] & 0xf0;
+
+    if (pwrcnd) {
+        /* eject/load only happens for power condition == 0 */
+        return 0;
+    }
 
-    if (s->qdev.type == TYPE_ROM && loej) {
+    if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) && loej) {
         if (!start && !s->tray_open && s->tray_locked) {
             scsi_check_condition(r,
                                  bdrv_is_inserted(s->qdev.conf.bs)
@@ -1225,13 +1271,239 @@ static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
     return 0;
 }
 
-static int scsi_disk_emulate_command(SCSIDiskReq *r)
+static void scsi_disk_emulate_read_data(SCSIRequest *req)
 {
-    SCSIRequest *req = &r->req;
+    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
+    int buflen = r->iov.iov_len;
+
+    if (buflen) {
+        DPRINTF("Read buf_len=%d\n", buflen);
+        r->iov.iov_len = 0;
+        r->started = true;
+        scsi_req_data(&r->req, buflen);
+        return;
+    }
+
+    /* This also clears the sense buffer for REQUEST SENSE.  */
+    scsi_req_complete(&r->req, GOOD);
+}
+
+static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
+                                       uint8_t *inbuf, int inlen)
+{
+    uint8_t mode_current[SCSI_MAX_MODE_LEN];
+    uint8_t mode_changeable[SCSI_MAX_MODE_LEN];
+    uint8_t *p;
+    int len, expected_len, changeable_len, i;
+
+    /* The input buffer does not include the page header, so it is
+     * off by 2 bytes.
+     */
+    expected_len = inlen + 2;
+    if (expected_len > SCSI_MAX_MODE_LEN) {
+        return -1;
+    }
+
+    p = mode_current;
+    memset(mode_current, 0, inlen + 2);
+    len = mode_sense_page(s, page, &p, 0);
+    if (len < 0 || len != expected_len) {
+        return -1;
+    }
+
+    p = mode_changeable;
+    memset(mode_changeable, 0, inlen + 2);
+    changeable_len = mode_sense_page(s, page, &p, 1);
+    assert(changeable_len == len);
+
+    /* Check that unchangeable bits are the same as what MODE SENSE
+     * would return.
+     */
+    for (i = 2; i < len; i++) {
+        if (((mode_current[i] ^ inbuf[i - 2]) & ~mode_changeable[i]) != 0) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static void scsi_disk_apply_mode_select(SCSIDiskState *s, int page, uint8_t *p)
+{
+    switch (page) {
+    case MODE_PAGE_CACHING:
+        bdrv_set_enable_write_cache(s->qdev.conf.bs, (p[0] & 4) != 0);
+        break;
+
+    default:
+        break;
+    }
+}
+
+static int mode_select_pages(SCSIDiskReq *r, uint8_t *p, int len, bool change)
+{
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+    while (len > 0) {
+        int page, subpage, page_len;
+
+        /* Parse both possible formats for the mode page headers.  */
+        page = p[0] & 0x3f;
+        if (p[0] & 0x40) {
+            if (len < 4) {
+                goto invalid_param_len;
+            }
+            subpage = p[1];
+            page_len = lduw_be_p(&p[2]);
+            p += 4;
+            len -= 4;
+        } else {
+            if (len < 2) {
+                goto invalid_param_len;
+            }
+            subpage = 0;
+            page_len = p[1];
+            p += 2;
+            len -= 2;
+        }
+
+        if (subpage) {
+            goto invalid_param;
+        }
+        if (page_len > len) {
+            goto invalid_param_len;
+        }
+
+        if (!change) {
+            if (scsi_disk_check_mode_select(s, page, p, page_len) < 0) {
+                goto invalid_param;
+            }
+        } else {
+            scsi_disk_apply_mode_select(s, page, p);
+        }
+
+        p += page_len;
+        len -= page_len;
+    }
+    return 0;
+
+invalid_param:
+    scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
+    return -1;
+
+invalid_param_len:
+    scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
+    return -1;
+}
+
+static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
+{
+    uint8_t *p = inbuf;
+    int cmd = r->req.cmd.buf[0];
+    int len = r->req.cmd.xfer;
+    int hdr_len = (cmd == MODE_SELECT ? 4 : 8);
+    int bd_len;
+    int pass;
+
+    /* We only support PF=1, SP=0.  */
+    if ((r->req.cmd.buf[1] & 0x11) != 0x10) {
+        goto invalid_field;
+    }
+
+    if (len < hdr_len) {
+        goto invalid_param_len;
+    }
+
+    bd_len = (cmd == MODE_SELECT ? p[3] : lduw_be_p(&p[6]));
+    len -= hdr_len;
+    p += hdr_len;
+    if (len < bd_len) {
+        goto invalid_param_len;
+    }
+    if (bd_len != 0 && bd_len != 8) {
+        goto invalid_param;
+    }
+
+    len -= bd_len;
+    p += bd_len;
+
+    /* Ensure no change is made if there is an error!  */
+    for (pass = 0; pass < 2; pass++) {
+        if (mode_select_pages(r, p, len, pass == 1) < 0) {
+            assert(pass == 0);
+            return;
+        }
+    }
+    scsi_req_complete(&r->req, GOOD);
+    return;
+
+invalid_param:
+    scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
+    return;
+
+invalid_param_len:
+    scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
+    return;
+
+invalid_field:
+    scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
+    return;
+}
+
+static void scsi_disk_emulate_write_data(SCSIRequest *req)
+{
+    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
+
+    if (r->iov.iov_len) {
+        int buflen = r->iov.iov_len;
+        DPRINTF("Write buf_len=%d\n", buflen);
+        r->iov.iov_len = 0;
+        scsi_req_data(&r->req, buflen);
+        return;
+    }
+
+    switch (req->cmd.buf[0]) {
+    case MODE_SELECT:
+    case MODE_SELECT_10:
+        /* This also clears the sense buffer for REQUEST SENSE.  */
+        scsi_disk_emulate_mode_select(r, r->iov.iov_base);
+        break;
+
+    default:
+        abort();
+    }
+}
+
+static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
+{
+    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
     uint64_t nb_sectors;
     uint8_t *outbuf;
-    int buflen = 0;
+    int buflen;
+
+    switch (req->cmd.buf[0]) {
+    case INQUIRY:
+    case MODE_SENSE:
+    case MODE_SENSE_10:
+    case RESERVE:
+    case RESERVE_10:
+    case RELEASE:
+    case RELEASE_10:
+    case START_STOP:
+    case ALLOW_MEDIUM_REMOVAL:
+    case GET_CONFIGURATION:
+    case GET_EVENT_STATUS_NOTIFICATION:
+    case MECHANISM_STATUS:
+    case REQUEST_SENSE:
+        break;
+
+    default:
+        if (s->tray_open || !bdrv_is_inserted(s->qdev.conf.bs)) {
+            scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
+            return 0;
+        }
+        break;
+    }
 
     if (!r->iov.iov_base) {
         /*
@@ -1249,6 +1521,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         r->iov.iov_base = qemu_blockalign(s->qdev.conf.bs, r->buflen);
     }
 
+    buflen = req->cmd.xfer;
     outbuf = r->iov.iov_base;
     switch (req->cmd.buf[0]) {
     case TEST_UNIT_READY:
@@ -1295,7 +1568,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         break;
     case START_STOP:
         if (scsi_disk_emulate_start_stop(r) < 0) {
-            return -1;
+            return 0;
         }
         break;
     case ALLOW_MEDIUM_REMOVAL:
@@ -1355,6 +1628,12 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
             goto illegal_request;
         }
         break;
+    case READ_DISC_INFORMATION:
+        buflen = scsi_read_disc_information(s, r, outbuf);
+        if (buflen < 0) {
+            goto illegal_request;
+        }
+        break;
     case READ_DVD_STRUCTURE:
         buflen = scsi_read_dvd_structure(s, r, outbuf);
         if (buflen < 0) {
@@ -1405,18 +1684,78 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         }
         DPRINTF("Unsupported Service Action In\n");
         goto illegal_request;
+    case SYNCHRONIZE_CACHE:
+        /* The request is used as the AIO opaque value, so add a ref.  */
+        scsi_req_ref(&r->req);
+        bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH);
+        r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_aio_complete, r);
+        return 0;
+    case SEEK_10:
+        DPRINTF("Seek(10) (sector %" PRId64 ")\n", r->req.cmd.lba);
+        if (r->req.cmd.lba > s->qdev.max_lba) {
+            goto illegal_lba;
+        }
+        break;
+    case MODE_SELECT:
+        DPRINTF("Mode Select(6) (len %lu)\n", (long)r->req.cmd.xfer);
+        break;
+    case MODE_SELECT_10:
+        DPRINTF("Mode Select(10) (len %lu)\n", (long)r->req.cmd.xfer);
+        break;
+    case WRITE_SAME_10:
+        nb_sectors = lduw_be_p(&req->cmd.buf[7]);
+        goto write_same;
+    case WRITE_SAME_16:
+        nb_sectors = ldl_be_p(&req->cmd.buf[10]) & 0xffffffffULL;
+    write_same:
+        if (bdrv_is_read_only(s->qdev.conf.bs)) {
+            scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
+            return 0;
+        }
+        if (r->req.cmd.lba > s->qdev.max_lba) {
+            goto illegal_lba;
+        }
+
+        /*
+         * We only support WRITE SAME with the unmap bit set for now.
+         */
+        if (!(req->cmd.buf[1] & 0x8)) {
+            goto illegal_request;
+        }
+
+        /* The request is used as the AIO opaque value, so add a ref.  */
+        scsi_req_ref(&r->req);
+        r->req.aiocb = bdrv_aio_discard(s->qdev.conf.bs,
+                                        r->req.cmd.lba * (s->qdev.blocksize / 512),
+                                        nb_sectors * (s->qdev.blocksize / 512),
+                                        scsi_aio_complete, r);
+        return 0;
     default:
+        DPRINTF("Unknown SCSI command (%2.2x)\n", buf[0]);
         scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
-        return -1;
+        return 0;
+    }
+    assert(!r->req.aiocb);
+    r->iov.iov_len = MIN(buflen, req->cmd.xfer);
+    if (r->iov.iov_len == 0) {
+        scsi_req_complete(&r->req, GOOD);
+    }
+    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
+        assert(r->iov.iov_len == req->cmd.xfer);
+        return -r->iov.iov_len;
+    } else {
+        return r->iov.iov_len;
     }
-    buflen = MIN(buflen, req->cmd.xfer);
-    return buflen;
 
 illegal_request:
     if (r->req.status == -1) {
         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
     }
-    return -1;
+    return 0;
+
+illegal_lba:
+    scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
+    return 0;
 }
 
 /* Execute a scsi command.  Returns the length of the data expected by the
@@ -1424,98 +1763,37 @@ illegal_request:
    (eg. disk reads), negative for transfers to the device (eg. disk writes),
    and zero if the command does not transfer any data.  */
 
-static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
+static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
 {
     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
     int32_t len;
     uint8_t command;
-    int rc;
 
     command = buf[0];
-    DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", req->lun, req->tag, buf[0]);
-
-#ifdef DEBUG_SCSI
-    {
-        int i;
-        for (i = 1; i < r->req.cmd.len; i++) {
-            printf(" 0x%02x", buf[i]);
-        }
-        printf("\n");
-    }
-#endif
-
-    switch (command) {
-    case INQUIRY:
-    case MODE_SENSE:
-    case MODE_SENSE_10:
-    case RESERVE:
-    case RESERVE_10:
-    case RELEASE:
-    case RELEASE_10:
-    case START_STOP:
-    case ALLOW_MEDIUM_REMOVAL:
-    case GET_CONFIGURATION:
-    case GET_EVENT_STATUS_NOTIFICATION:
-    case MECHANISM_STATUS:
-    case REQUEST_SENSE:
-        break;
 
-    default:
-        if (s->tray_open || !bdrv_is_inserted(s->qdev.conf.bs)) {
-            scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
-            return 0;
-        }
-        break;
+    if (s->tray_open || !bdrv_is_inserted(s->qdev.conf.bs)) {
+        scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
+        return 0;
     }
 
     switch (command) {
-    case TEST_UNIT_READY:
-    case INQUIRY:
-    case MODE_SENSE:
-    case MODE_SENSE_10:
-    case RESERVE:
-    case RESERVE_10:
-    case RELEASE:
-    case RELEASE_10:
-    case START_STOP:
-    case ALLOW_MEDIUM_REMOVAL:
-    case READ_CAPACITY_10:
-    case READ_TOC:
-    case READ_DVD_STRUCTURE:
-    case GET_CONFIGURATION:
-    case GET_EVENT_STATUS_NOTIFICATION:
-    case MECHANISM_STATUS:
-    case SERVICE_ACTION_IN_16:
-    case REQUEST_SENSE:
-        rc = scsi_disk_emulate_command(r);
-        if (rc < 0) {
-            return 0;
-        }
-
-        r->iov.iov_len = rc;
-        break;
-    case SYNCHRONIZE_CACHE:
-        /* The request is used as the AIO opaque value, so add a ref.  */
-        scsi_req_ref(&r->req);
-        bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH);
-        r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_flush_complete, r);
-        return 0;
     case READ_6:
     case READ_10:
     case READ_12:
     case READ_16:
         len = r->req.cmd.xfer / s->qdev.blocksize;
         DPRINTF("Read (sector %" PRId64 ", count %d)\n", r->req.cmd.lba, len);
-        if (r->req.cmd.lba > s->qdev.max_lba) {
+        if (r->req.cmd.buf[1] & 0xe0) {
+            goto illegal_request;
+        }
+        if (r->req.cmd.lba > r->req.cmd.lba + len ||
+            r->req.cmd.lba + len - 1 > s->qdev.max_lba) {
             goto illegal_lba;
         }
         r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
         r->sector_count = len * (s->qdev.blocksize / 512);
         break;
-    case VERIFY_10:
-    case VERIFY_12:
-    case VERIFY_16:
     case WRITE_6:
     case WRITE_10:
     case WRITE_12:
@@ -1523,90 +1801,45 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
     case WRITE_VERIFY_10:
     case WRITE_VERIFY_12:
     case WRITE_VERIFY_16:
+        if (bdrv_is_read_only(s->qdev.conf.bs)) {
+            scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
+            return 0;
+        }
+        /* fallthrough */
+    case VERIFY_10:
+    case VERIFY_12:
+    case VERIFY_16:
         len = r->req.cmd.xfer / s->qdev.blocksize;
         DPRINTF("Write %s(sector %" PRId64 ", count %d)\n",
                 (command & 0xe) == 0xe ? "And Verify " : "",
                 r->req.cmd.lba, len);
-        if (r->req.cmd.lba > s->qdev.max_lba) {
+        if (r->req.cmd.buf[1] & 0xe0) {
+            goto illegal_request;
+        }
+        if (r->req.cmd.lba > r->req.cmd.lba + len ||
+            r->req.cmd.lba + len - 1 > s->qdev.max_lba) {
             goto illegal_lba;
         }
         r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
         r->sector_count = len * (s->qdev.blocksize / 512);
         break;
-    case MODE_SELECT:
-        DPRINTF("Mode Select(6) (len %lu)\n", (long)r->req.cmd.xfer);
-        /* We don't support mode parameter changes.
-           Allow the mode parameter header + block descriptors only. */
-        if (r->req.cmd.xfer > 12) {
-            goto fail;
-        }
-        break;
-    case MODE_SELECT_10:
-        DPRINTF("Mode Select(10) (len %lu)\n", (long)r->req.cmd.xfer);
-        /* We don't support mode parameter changes.
-           Allow the mode parameter header + block descriptors only. */
-        if (r->req.cmd.xfer > 16) {
-            goto fail;
-        }
-        break;
-    case SEEK_10:
-        DPRINTF("Seek(10) (sector %" PRId64 ")\n", r->req.cmd.lba);
-        if (r->req.cmd.lba > s->qdev.max_lba) {
-            goto illegal_lba;
-        }
-        break;
-    case WRITE_SAME_10:
-        len = lduw_be_p(&buf[7]);
-        goto write_same;
-    case WRITE_SAME_16:
-        len = ldl_be_p(&buf[10]) & 0xffffffffULL;
-    write_same:
-
-        DPRINTF("WRITE SAME() (sector %" PRId64 ", count %d)\n",
-                r->req.cmd.lba, len);
-
-        if (r->req.cmd.lba > s->qdev.max_lba) {
-            goto illegal_lba;
-        }
-
-        /*
-         * We only support WRITE SAME with the unmap bit set for now.
-         */
-        if (!(buf[1] & 0x8)) {
-            goto fail;
-        }
-
-        rc = bdrv_discard(s->qdev.conf.bs,
-                          r->req.cmd.lba * (s->qdev.blocksize / 512),
-                          len * (s->qdev.blocksize / 512));
-        if (rc < 0) {
-            /* XXX: better error code ?*/
-            goto fail;
-        }
-
-        break;
     default:
-        DPRINTF("Unknown SCSI command (%2.2x)\n", buf[0]);
-        scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
-        return 0;
-    fail:
+        abort();
+    illegal_request:
         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
         return 0;
     illegal_lba:
         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
         return 0;
     }
-    if (r->sector_count == 0 && r->iov.iov_len == 0) {
+    if (r->sector_count == 0) {
         scsi_req_complete(&r->req, GOOD);
     }
-    len = r->sector_count * 512 + r->iov.iov_len;
+    assert(r->iov.iov_len == 0);
     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
-        return -len;
+        return -r->sector_count * 512;
     } else {
-        if (!r->sector_count) {
-            r->sector_count = -1;
-        }
-        return len;
+        return r->sector_count * 512;
     }
 }
 
@@ -1633,6 +1866,19 @@ static void scsi_destroy(SCSIDevice *dev)
     blockdev_mark_auto_del(s->qdev.conf.bs);
 }
 
+static void scsi_disk_resize_cb(void *opaque)
+{
+    SCSIDiskState *s = opaque;
+
+    /* SPC lists this sense code as available only for
+     * direct-access devices.
+     */
+    if (s->qdev.type == TYPE_DISK) {
+        scsi_device_set_ua(&s->qdev, SENSE_CODE(CAPACITY_CHANGED));
+        scsi_device_report_change(&s->qdev, SENSE_CODE(CAPACITY_CHANGED));
+    }
+}
+
 static void scsi_cd_change_media_cb(void *opaque, bool load)
 {
     SCSIDiskState *s = opaque;
@@ -1649,7 +1895,7 @@ static void scsi_cd_change_media_cb(void *opaque, bool load)
      */
     s->media_changed = load;
     s->tray_open = !load;
-    s->qdev.unit_attention = SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM);
+    scsi_device_set_ua(&s->qdev, SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM));
     s->media_event = true;
     s->eject_request = false;
 }
@@ -1674,11 +1920,17 @@ static bool scsi_cd_is_medium_locked(void *opaque)
     return ((SCSIDiskState *)opaque)->tray_locked;
 }
 
-static const BlockDevOps scsi_cd_block_ops = {
+static const BlockDevOps scsi_disk_removable_block_ops = {
     .change_media_cb = scsi_cd_change_media_cb,
     .eject_request_cb = scsi_cd_eject_request_cb,
     .is_tray_open = scsi_cd_is_tray_open,
     .is_medium_locked = scsi_cd_is_medium_locked,
+
+    .resize_cb = scsi_disk_resize_cb,
+};
+
+static const BlockDevOps scsi_disk_block_ops = {
+    .resize_cb = scsi_disk_resize_cb,
 };
 
 static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
@@ -1686,14 +1938,13 @@ static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
     if (s->media_changed) {
         s->media_changed = false;
-        s->qdev.unit_attention = SENSE_CODE(MEDIUM_CHANGED);
+        scsi_device_set_ua(&s->qdev, SENSE_CODE(MEDIUM_CHANGED));
     }
 }
 
 static int scsi_initfn(SCSIDevice *dev)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
-    DriveInfo *dinfo;
 
     if (!s->qdev.conf.bs) {
         error_report("drive property not set");
@@ -1706,17 +1957,18 @@ static int scsi_initfn(SCSIDevice *dev)
         return -1;
     }
 
-    if (!s->serial) {
-        /* try to fall back to value set with legacy -drive serial=... */
-        dinfo = drive_get_by_blockdev(s->qdev.conf.bs);
-        if (*dinfo->serial) {
-            s->serial = g_strdup(dinfo->serial);
-        }
+    blkconf_serial(&s->qdev.conf, &s->serial);
+    if (dev->type == TYPE_DISK
+        && blkconf_geometry(&dev->conf, NULL, 65535, 255, 255) < 0) {
+        return -1;
     }
 
     if (!s->version) {
         s->version = g_strdup(qemu_get_version());
     }
+    if (!s->vendor) {
+        s->vendor = g_strdup("QEMU");
+    }
 
     if (bdrv_is_sg(s->qdev.conf.bs)) {
         error_report("unwanted /dev/sg*");
@@ -1724,7 +1976,9 @@ static int scsi_initfn(SCSIDevice *dev)
     }
 
     if (s->features & (1 << SCSI_DISK_F_REMOVABLE)) {
-        bdrv_set_dev_ops(s->qdev.conf.bs, &scsi_cd_block_ops, s);
+        bdrv_set_dev_ops(s->qdev.conf.bs, &scsi_disk_removable_block_ops, s);
+    } else {
+        bdrv_set_dev_ops(s->qdev.conf.bs, &scsi_disk_block_ops, s);
     }
     bdrv_set_buffer_alignment(s->qdev.conf.bs, s->qdev.blocksize);
 
@@ -1738,6 +1992,9 @@ static int scsi_hd_initfn(SCSIDevice *dev)
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
     s->qdev.blocksize = s->qdev.conf.logical_block_size;
     s->qdev.type = TYPE_DISK;
+    if (!s->product) {
+        s->product = g_strdup("QEMU HARDDISK");
+    }
     return scsi_initfn(&s->qdev);
 }
 
@@ -1747,6 +2004,9 @@ static int scsi_cd_initfn(SCSIDevice *dev)
     s->qdev.blocksize = 2048;
     s->qdev.type = TYPE_ROM;
     s->features |= 1 << SCSI_DISK_F_REMOVABLE;
+    if (!s->product) {
+        s->product = g_strdup("QEMU CD-ROM");
+    }
     return scsi_initfn(&s->qdev);
 }
 
@@ -1766,10 +2026,19 @@ static int scsi_disk_initfn(SCSIDevice *dev)
     }
 }
 
-static const SCSIReqOps scsi_disk_reqops = {
+static const SCSIReqOps scsi_disk_emulate_reqops = {
     .size         = sizeof(SCSIDiskReq),
     .free_req     = scsi_free_request,
-    .send_command = scsi_send_command,
+    .send_command = scsi_disk_emulate_command,
+    .read_data    = scsi_disk_emulate_read_data,
+    .write_data   = scsi_disk_emulate_write_data,
+    .get_buf      = scsi_get_buf,
+};
+
+static const SCSIReqOps scsi_disk_dma_reqops = {
+    .size         = sizeof(SCSIDiskReq),
+    .free_req     = scsi_free_request,
+    .send_command = scsi_disk_dma_command,
     .read_data    = scsi_read_data,
     .write_data   = scsi_write_data,
     .cancel_io    = scsi_cancel_io,
@@ -1778,13 +2047,71 @@ static const SCSIReqOps scsi_disk_reqops = {
     .save_request = scsi_disk_save_request,
 };
 
+static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
+    [TEST_UNIT_READY]                 = &scsi_disk_emulate_reqops,
+    [INQUIRY]                         = &scsi_disk_emulate_reqops,
+    [MODE_SENSE]                      = &scsi_disk_emulate_reqops,
+    [MODE_SENSE_10]                   = &scsi_disk_emulate_reqops,
+    [START_STOP]                      = &scsi_disk_emulate_reqops,
+    [ALLOW_MEDIUM_REMOVAL]            = &scsi_disk_emulate_reqops,
+    [READ_CAPACITY_10]                = &scsi_disk_emulate_reqops,
+    [READ_TOC]                        = &scsi_disk_emulate_reqops,
+    [READ_DVD_STRUCTURE]              = &scsi_disk_emulate_reqops,
+    [READ_DISC_INFORMATION]           = &scsi_disk_emulate_reqops,
+    [GET_CONFIGURATION]               = &scsi_disk_emulate_reqops,
+    [GET_EVENT_STATUS_NOTIFICATION]   = &scsi_disk_emulate_reqops,
+    [MECHANISM_STATUS]                = &scsi_disk_emulate_reqops,
+    [SERVICE_ACTION_IN_16]            = &scsi_disk_emulate_reqops,
+    [REQUEST_SENSE]                   = &scsi_disk_emulate_reqops,
+    [SYNCHRONIZE_CACHE]               = &scsi_disk_emulate_reqops,
+    [SEEK_10]                         = &scsi_disk_emulate_reqops,
+    [MODE_SELECT]                     = &scsi_disk_emulate_reqops,
+    [MODE_SELECT_10]                  = &scsi_disk_emulate_reqops,
+    [WRITE_SAME_10]                   = &scsi_disk_emulate_reqops,
+    [WRITE_SAME_16]                   = &scsi_disk_emulate_reqops,
+
+    [READ_6]                          = &scsi_disk_dma_reqops,
+    [READ_10]                         = &scsi_disk_dma_reqops,
+    [READ_12]                         = &scsi_disk_dma_reqops,
+    [READ_16]                         = &scsi_disk_dma_reqops,
+    [VERIFY_10]                       = &scsi_disk_dma_reqops,
+    [VERIFY_12]                       = &scsi_disk_dma_reqops,
+    [VERIFY_16]                       = &scsi_disk_dma_reqops,
+    [WRITE_6]                         = &scsi_disk_dma_reqops,
+    [WRITE_10]                        = &scsi_disk_dma_reqops,
+    [WRITE_12]                        = &scsi_disk_dma_reqops,
+    [WRITE_16]                        = &scsi_disk_dma_reqops,
+    [WRITE_VERIFY_10]                 = &scsi_disk_dma_reqops,
+    [WRITE_VERIFY_12]                 = &scsi_disk_dma_reqops,
+    [WRITE_VERIFY_16]                 = &scsi_disk_dma_reqops,
+};
+
 static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
                                      uint8_t *buf, void *hba_private)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
     SCSIRequest *req;
+    const SCSIReqOps *ops;
+    uint8_t command;
+
+    command = buf[0];
+    ops = scsi_disk_reqops_dispatch[command];
+    if (!ops) {
+        ops = &scsi_disk_emulate_reqops;
+    }
+    req = scsi_req_alloc(ops, &s->qdev, tag, lun, hba_private);
+
+#ifdef DEBUG_SCSI
+    DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", lun, tag, buf[0]);
+    {
+        int i;
+        for (i = 1; i < req->cmd.len; i++) {
+            printf(" 0x%02x", buf[i]);
+        }
+        printf("\n");
+    }
+#endif
 
-    req = scsi_req_alloc(&scsi_disk_reqops, &s->qdev, tag, lun, hba_private);
     return req;
 }
 
@@ -1898,15 +2225,14 @@ static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
          * unreliable, too.  It is even possible that reads deliver random data
          * from the host page cache (this is probably a Linux bug).
          *
-         * We might use scsi_disk_reqops as long as no writing commands are
+         * We might use scsi_disk_dma_reqops as long as no writing commands are
          * seen, but performance usually isn't paramount on optical media.  So,
          * just make scsi-block operate the same as scsi-generic for them.
          */
-        if (s->qdev.type == TYPE_ROM) {
-            break;
-	}
-        return scsi_req_alloc(&scsi_disk_reqops, &s->qdev, tag, lun,
-                              hba_private);
+        if (s->qdev.type != TYPE_ROM) {
+            return scsi_req_alloc(&scsi_disk_dma_reqops, &s->qdev, tag, lun,
+                                  hba_private);
+        }
     }
 
     return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
@@ -1914,10 +2240,12 @@ static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
 }
 #endif
 
-#define DEFINE_SCSI_DISK_PROPERTIES()                           \
-    DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf),          \
-    DEFINE_PROP_STRING("ver",  SCSIDiskState, version),         \
-    DEFINE_PROP_STRING("serial",  SCSIDiskState, serial)
+#define DEFINE_SCSI_DISK_PROPERTIES()                                \
+    DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf),               \
+    DEFINE_PROP_STRING("ver", SCSIDiskState, version),               \
+    DEFINE_PROP_STRING("serial", SCSIDiskState, serial),             \
+    DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),             \
+    DEFINE_PROP_STRING("product", SCSIDiskState, product)
 
 static Property scsi_hd_properties[] = {
     DEFINE_SCSI_DISK_PROPERTIES(),
@@ -1925,6 +2253,8 @@ static Property scsi_hd_properties[] = {
                     SCSI_DISK_F_REMOVABLE, false),
     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
                     SCSI_DISK_F_DPOFUA, false),
+    DEFINE_PROP_HEX64("wwn", SCSIDiskState, wwn, 0),
+    DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1969,6 +2299,7 @@ static TypeInfo scsi_hd_info = {
 
 static Property scsi_cd_properties[] = {
     DEFINE_SCSI_DISK_PROPERTIES(),
+    DEFINE_PROP_HEX64("wwn", SCSIDiskState, wwn, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1997,7 +2328,7 @@ static TypeInfo scsi_cd_info = {
 
 #ifdef __linux__
 static Property scsi_block_properties[] = {
-    DEFINE_SCSI_DISK_PROPERTIES(),
+    DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.bs),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2030,6 +2361,7 @@ static Property scsi_disk_properties[] = {
                     SCSI_DISK_F_REMOVABLE, false),
     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
                     SCSI_DISK_F_DPOFUA, false),
+    DEFINE_PROP_HEX64("wwn", SCSIDiskState, wwn, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index d856d23b3b..8d5106061e 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -400,12 +400,6 @@ static int scsi_generic_initfn(SCSIDevice *s)
         return -1;
     }
 
-    /* check we are really using a /dev/sg* file */
-    if (!bdrv_is_sg(s->conf.bs)) {
-        error_report("not /dev/sg*");
-        return -1;
-    }
-
     if (bdrv_get_on_error(s->conf.bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
         error_report("Device doesn't support drive option werror");
         return -1;
@@ -416,8 +410,11 @@ static int scsi_generic_initfn(SCSIDevice *s)
     }
 
     /* check we are using a driver managing SG_IO (version 3 and after */
-    if (bdrv_ioctl(s->conf.bs, SG_GET_VERSION_NUM, &sg_version) < 0 ||
-        sg_version < 30000) {
+    if (bdrv_ioctl(s->conf.bs, SG_GET_VERSION_NUM, &sg_version) < 0) {
+        error_report("scsi generic interface not supported");
+        return -1;
+    }
+    if (sg_version < 30000) {
         error_report("scsi generic interface too old");
         return -1;
     }
diff --git a/hw/scsi.h b/hw/scsi.h
index 76f06d41de..1aeee4659c 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -3,6 +3,7 @@
 
 #include "qdev.h"
 #include "block.h"
+#include "hw/block-common.h"
 #include "sysemu.h"
 
 #define MAX_SCSI_DEVS	255
@@ -130,10 +131,14 @@ struct SCSIBusInfo {
     void (*transfer_data)(SCSIRequest *req, uint32_t arg);
     void (*complete)(SCSIRequest *req, uint32_t arg, size_t resid);
     void (*cancel)(SCSIRequest *req);
+    void (*hotplug)(SCSIBus *bus, SCSIDevice *dev);
+    void (*hot_unplug)(SCSIBus *bus, SCSIDevice *dev);
+    void (*change)(SCSIBus *bus, SCSIDevice *dev, SCSISense sense);
     QEMUSGList *(*get_sg_list)(SCSIRequest *req);
 
     void (*save_request)(QEMUFile *f, SCSIRequest *req);
     void *(*load_request)(QEMUFile *f, SCSIRequest *req);
+    void (*free_request)(SCSIBus *bus, void *priv);
 };
 
 #define TYPE_SCSI_BUS "SCSI"
@@ -178,6 +183,10 @@ extern const struct SCSISense sense_code_INVALID_OPCODE;
 extern const struct SCSISense sense_code_LBA_OUT_OF_RANGE;
 /* Illegal request, Invalid field in CDB */
 extern const struct SCSISense sense_code_INVALID_FIELD;
+/* Illegal request, Invalid field in parameter list */
+extern const struct SCSISense sense_code_INVALID_PARAM;
+/* Illegal request, Parameter list length error */
+extern const struct SCSISense sense_code_INVALID_PARAM_LEN;
 /* Illegal request, LUN not supported */
 extern const struct SCSISense sense_code_LUN_NOT_SUPPORTED;
 /* Illegal request, Saving parameters not supported */
@@ -192,6 +201,8 @@ extern const struct SCSISense sense_code_IO_ERROR;
 extern const struct SCSISense sense_code_I_T_NEXUS_LOSS;
 /* Command aborted, Logical Unit failure */
 extern const struct SCSISense sense_code_LUN_FAILURE;
+/* LUN not ready, Capacity data has changed */
+extern const struct SCSISense sense_code_CAPACITY_CHANGED;
 /* LUN not ready, Medium not present */
 extern const struct SCSISense sense_code_UNIT_ATTENTION_NO_MEDIUM;
 /* Unit attention, Power on, reset or bus device reset occurred */
@@ -202,6 +213,8 @@ extern const struct SCSISense sense_code_MEDIUM_CHANGED;
 extern const struct SCSISense sense_code_REPORTED_LUNS_CHANGED;
 /* Unit attention, Device internal reset */
 extern const struct SCSISense sense_code_DEVICE_INTERNAL_RESET;
+/* Data Protection, Write Protected */
+extern const struct SCSISense sense_code_WRITE_PROTECTED;
 
 #define SENSE_CODE(x) sense_code_ ## x
 
@@ -229,6 +242,8 @@ void scsi_req_abort(SCSIRequest *req, int status);
 void scsi_req_cancel(SCSIRequest *req);
 void scsi_req_retry(SCSIRequest *req);
 void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense);
+void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense);
+void scsi_device_report_change(SCSIDevice *dev, SCSISense sense);
 int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed);
 SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun);
 
diff --git a/hw/sh_serial.c b/hw/sh_serial.c
index 43b0eb1c1d..1d1883dd20 100644
--- a/hw/sh_serial.c
+++ b/hw/sh_serial.c
@@ -186,7 +186,8 @@ static void sh_serial_write(void *opaque, target_phys_addr_t offs,
         }
     }
 
-    fprintf(stderr, "sh_serial: unsupported write to 0x%02x\n", offs);
+    fprintf(stderr, "sh_serial: unsupported write to 0x%02"
+            TARGET_PRIxPHYS "\n", offs);
     abort();
 }
 
@@ -287,7 +288,8 @@ static uint64_t sh_serial_read(void *opaque, target_phys_addr_t offs,
 #endif
 
     if (ret & ~((1 << 16) - 1)) {
-        fprintf(stderr, "sh_serial: unsupported read from 0x%02x\n", offs);
+        fprintf(stderr, "sh_serial: unsupported read from 0x%02"
+                TARGET_PRIxPHYS "\n", offs);
         abort();
     }
 
diff --git a/hw/smc91c111.c b/hw/smc91c111.c
index 1a5213fa56..d6ef302c6d 100644
--- a/hw/smc91c111.c
+++ b/hw/smc91c111.c
@@ -628,7 +628,7 @@ static uint32_t smc91c111_readl(void *opaque, target_phys_addr_t offset)
     return val;
 }
 
-static int smc91c111_can_receive(VLANClientState *nc)
+static int smc91c111_can_receive(NetClientState *nc)
 {
     smc91c111_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -639,7 +639,7 @@ static int smc91c111_can_receive(VLANClientState *nc)
     return 1;
 }
 
-static ssize_t smc91c111_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t smc91c111_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     smc91c111_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
     int status;
@@ -728,7 +728,7 @@ static const MemoryRegionOps smc91c111_mem_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void smc91c111_cleanup(VLANClientState *nc)
+static void smc91c111_cleanup(NetClientState *nc)
 {
     smc91c111_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -736,7 +736,7 @@ static void smc91c111_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_smc91c111_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = smc91c111_can_receive,
     .receive = smc91c111_receive,
diff --git a/hw/spapr.c b/hw/spapr.c
index 09a23ff092..81c9343ca5 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -674,6 +674,9 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     spapr->icp = xics_system_init(XICS_IRQS);
     spapr->next_irq = 16;
 
+    /* Set up IOMMU */
+    spapr_iommu_init();
+
     /* Set up VIO bus */
     spapr->vio_bus = spapr_vio_bus_init();
 
diff --git a/hw/spapr.h b/hw/spapr.h
index c75172e0c0..9153f29a60 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -1,6 +1,7 @@
 #if !defined(__HW_SPAPR_H__)
 #define __HW_SPAPR_H__
 
+#include "dma.h"
 #include "hw/xics.h"
 
 struct VIOsPAPRBus;
@@ -320,4 +321,21 @@ target_ulong spapr_rtas_call(sPAPREnvironment *spapr,
 int spapr_rtas_device_tree_setup(void *fdt, target_phys_addr_t rtas_addr,
                                  target_phys_addr_t rtas_size);
 
+#define SPAPR_TCE_PAGE_SHIFT   12
+#define SPAPR_TCE_PAGE_SIZE    (1ULL << SPAPR_TCE_PAGE_SHIFT)
+#define SPAPR_TCE_PAGE_MASK    (SPAPR_TCE_PAGE_SIZE - 1)
+
+typedef struct sPAPRTCE {
+    uint64_t tce;
+} sPAPRTCE;
+
+#define SPAPR_VIO_BASE_LIOBN    0x00000000
+#define SPAPR_PCI_BASE_LIOBN    0x80000000
+
+void spapr_iommu_init(void);
+DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
+void spapr_tce_free(DMAContext *dma);
+int spapr_dma_dt(void *fdt, int node_off, const char *propname,
+                 DMAContext *dma);
+
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
new file mode 100644
index 0000000000..388ffa4b22
--- /dev/null
+++ b/hw/spapr_iommu.c
@@ -0,0 +1,246 @@
+/*
+ * QEMU sPAPR IOMMU (TCE) code
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "hw.h"
+#include "kvm.h"
+#include "qdev.h"
+#include "kvm_ppc.h"
+#include "dma.h"
+
+#include "hw/spapr.h"
+
+#include <libfdt.h>
+
+/* #define DEBUG_TCE */
+
+enum sPAPRTCEAccess {
+    SPAPR_TCE_FAULT = 0,
+    SPAPR_TCE_RO = 1,
+    SPAPR_TCE_WO = 2,
+    SPAPR_TCE_RW = 3,
+};
+
+typedef struct sPAPRTCETable sPAPRTCETable;
+
+struct sPAPRTCETable {
+    DMAContext dma;
+    uint32_t liobn;
+    uint32_t window_size;
+    sPAPRTCE *table;
+    int fd;
+    QLIST_ENTRY(sPAPRTCETable) list;
+};
+
+
+QLIST_HEAD(spapr_tce_tables, sPAPRTCETable) spapr_tce_tables;
+
+static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
+{
+    sPAPRTCETable *tcet;
+
+    QLIST_FOREACH(tcet, &spapr_tce_tables, list) {
+        if (tcet->liobn == liobn) {
+            return tcet;
+        }
+    }
+
+    return NULL;
+}
+
+static int spapr_tce_translate(DMAContext *dma,
+                               dma_addr_t addr,
+                               target_phys_addr_t *paddr,
+                               target_phys_addr_t *len,
+                               DMADirection dir)
+{
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+    enum sPAPRTCEAccess access = (dir == DMA_DIRECTION_FROM_DEVICE)
+        ? SPAPR_TCE_WO : SPAPR_TCE_RO;
+    uint64_t tce;
+
+#ifdef DEBUG_TCE
+    fprintf(stderr, "spapr_tce_translate liobn=0x%" PRIx32 " addr=0x"
+            DMA_ADDR_FMT "\n", tcet->liobn, addr);
+#endif
+
+    /* Check if we are in bound */
+    if (addr >= tcet->window_size) {
+#ifdef DEBUG_TCE
+        fprintf(stderr, "spapr_tce_translate out of bounds\n");
+#endif
+        return -EFAULT;
+    }
+
+    tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT].tce;
+
+    /* Check TCE */
+    if (!(tce & access)) {
+        return -EPERM;
+    }
+
+    /* How much til end of page ? */
+    *len = ((~addr) & SPAPR_TCE_PAGE_MASK) + 1;
+
+    /* Translate */
+    *paddr = (tce & ~SPAPR_TCE_PAGE_MASK) |
+        (addr & SPAPR_TCE_PAGE_MASK);
+
+#ifdef DEBUG_TCE
+    fprintf(stderr, " ->  *paddr=0x" TARGET_FMT_plx ", *len=0x"
+            TARGET_FMT_plx "\n", *paddr, *len);
+#endif
+
+    return 0;
+}
+
+DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size)
+{
+    sPAPRTCETable *tcet;
+
+    if (!window_size) {
+        return NULL;
+    }
+
+    tcet = g_malloc0(sizeof(*tcet));
+    dma_context_init(&tcet->dma, spapr_tce_translate, NULL, NULL);
+
+    tcet->liobn = liobn;
+    tcet->window_size = window_size;
+
+    if (kvm_enabled()) {
+        tcet->table = kvmppc_create_spapr_tce(liobn,
+                                              window_size,
+                                              &tcet->fd);
+    }
+
+    if (!tcet->table) {
+        size_t table_size = (window_size >> SPAPR_TCE_PAGE_SHIFT)
+            * sizeof(sPAPRTCE);
+        tcet->table = g_malloc0(table_size);
+    }
+
+#ifdef DEBUG_TCE
+    fprintf(stderr, "spapr_iommu: New TCE table, liobn=0x%x, context @ %p, "
+            "table @ %p, fd=%d\n", liobn, &tcet->dma, tcet->table, tcet->fd);
+#endif
+
+    QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
+
+    return &tcet->dma;
+}
+
+void spapr_tce_free(DMAContext *dma)
+{
+
+    if (dma) {
+        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+
+        QLIST_REMOVE(tcet, list);
+
+        if (!kvm_enabled() ||
+            (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
+                                     tcet->window_size) != 0)) {
+            g_free(tcet->table);
+        }
+
+        g_free(tcet);
+    }
+}
+
+static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
+                                target_ulong tce)
+{
+    sPAPRTCE *tcep;
+
+    if (ioba >= tcet->window_size) {
+        hcall_dprintf("spapr_vio_put_tce on out-of-boards IOBA 0x"
+                      TARGET_FMT_lx "\n", ioba);
+        return H_PARAMETER;
+    }
+
+    tcep = tcet->table + (ioba >> SPAPR_TCE_PAGE_SHIFT);
+    tcep->tce = tce;
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    target_ulong liobn = args[0];
+    target_ulong ioba = args[1];
+    target_ulong tce = args[2];
+    sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+
+    if (liobn & 0xFFFFFFFF00000000ULL) {
+        hcall_dprintf("spapr_vio_put_tce on out-of-boundsw LIOBN "
+                      TARGET_FMT_lx "\n", liobn);
+        return H_PARAMETER;
+    }
+
+    ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
+
+    if (tcet) {
+        return put_tce_emu(tcet, ioba, tce);
+    }
+#ifdef DEBUG_TCE
+    fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
+            "  ioba 0x" TARGET_FMT_lx "  TCE 0x" TARGET_FMT_lx "\n",
+            __func__, liobn, /*dev->qdev.id, */ioba, tce);
+#endif
+
+    return H_PARAMETER;
+}
+
+void spapr_iommu_init(void)
+{
+    QLIST_INIT(&spapr_tce_tables);
+
+    /* hcall-tce */
+    spapr_register_hypercall(H_PUT_TCE, h_put_tce);
+}
+
+int spapr_dma_dt(void *fdt, int node_off, const char *propname,
+                 DMAContext *dma)
+{
+    if (dma) {
+        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+        uint32_t dma_prop[] = {cpu_to_be32(tcet->liobn),
+                               0, 0,
+                               0, cpu_to_be32(tcet->window_size)};
+        int ret;
+
+        ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-address-cells", 2);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-size-cells", 2);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = fdt_setprop(fdt, node_off, propname, dma_prop,
+                          sizeof(dma_prop));
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
diff --git a/hw/spapr_llan.c b/hw/spapr_llan.c
index 8313043652..01e54f3675 100644
--- a/hw/spapr_llan.c
+++ b/hw/spapr_llan.c
@@ -71,7 +71,7 @@ typedef uint64_t vlan_bd_t;
 #define VLAN_RXQ_BD_OFF      0
 #define VLAN_FILTER_BD_OFF   8
 #define VLAN_RX_BDS_OFF      16
-#define VLAN_MAX_BUFS        ((SPAPR_VIO_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8)
+#define VLAN_MAX_BUFS        ((SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8)
 
 typedef struct VIOsPAPRVLANDevice {
     VIOsPAPRDevice sdev;
@@ -83,19 +83,19 @@ typedef struct VIOsPAPRVLANDevice {
     target_ulong rxq_ptr;
 } VIOsPAPRVLANDevice;
 
-static int spapr_vlan_can_receive(VLANClientState *nc)
+static int spapr_vlan_can_receive(NetClientState *nc)
 {
     VIOsPAPRVLANDevice *dev = DO_UPCAST(NICState, nc, nc)->opaque;
 
     return (dev->isopen && dev->rx_bufs > 0);
 }
 
-static ssize_t spapr_vlan_receive(VLANClientState *nc, const uint8_t *buf,
+static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
                                   size_t size)
 {
     VIOsPAPRDevice *sdev = DO_UPCAST(NICState, nc, nc)->opaque;
     VIOsPAPRVLANDevice *dev = (VIOsPAPRVLANDevice *)sdev;
-    vlan_bd_t rxq_bd = ldq_tce(sdev, dev->buf_list + VLAN_RXQ_BD_OFF);
+    vlan_bd_t rxq_bd = vio_ldq(sdev, dev->buf_list + VLAN_RXQ_BD_OFF);
     vlan_bd_t bd;
     int buf_ptr = dev->use_buf_ptr;
     uint64_t handle;
@@ -114,11 +114,11 @@ static ssize_t spapr_vlan_receive(VLANClientState *nc, const uint8_t *buf,
 
     do {
         buf_ptr += 8;
-        if (buf_ptr >= SPAPR_VIO_TCE_PAGE_SIZE) {
+        if (buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
             buf_ptr = VLAN_RX_BDS_OFF;
         }
 
-        bd = ldq_tce(sdev, dev->buf_list + buf_ptr);
+        bd = vio_ldq(sdev, dev->buf_list + buf_ptr);
         dprintf("use_buf_ptr=%d bd=0x%016llx\n",
                 buf_ptr, (unsigned long long)bd);
     } while ((!(bd & VLAN_BD_VALID) || (VLAN_BD_LEN(bd) < (size + 8)))
@@ -132,12 +132,12 @@ static ssize_t spapr_vlan_receive(VLANClientState *nc, const uint8_t *buf,
     /* Remove the buffer from the pool */
     dev->rx_bufs--;
     dev->use_buf_ptr = buf_ptr;
-    stq_tce(sdev, dev->buf_list + dev->use_buf_ptr, 0);
+    vio_stq(sdev, dev->buf_list + dev->use_buf_ptr, 0);
 
     dprintf("Found buffer: ptr=%d num=%d\n", dev->use_buf_ptr, dev->rx_bufs);
 
     /* Transfer the packet data */
-    if (spapr_tce_dma_write(sdev, VLAN_BD_ADDR(bd) + 8, buf, size) < 0) {
+    if (spapr_vio_dma_write(sdev, VLAN_BD_ADDR(bd) + 8, buf, size) < 0) {
         return -1;
     }
 
@@ -149,23 +149,23 @@ static ssize_t spapr_vlan_receive(VLANClientState *nc, const uint8_t *buf,
         control ^= VLAN_RXQC_TOGGLE;
     }
 
-    handle = ldq_tce(sdev, VLAN_BD_ADDR(bd));
-    stq_tce(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 8, handle);
-    stw_tce(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 4, size);
-    sth_tce(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 2, 8);
-    stb_tce(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr, control);
+    handle = vio_ldq(sdev, VLAN_BD_ADDR(bd));
+    vio_stq(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 8, handle);
+    vio_stl(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 4, size);
+    vio_sth(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 2, 8);
+    vio_stb(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr, control);
 
     dprintf("wrote rxq entry (ptr=0x%llx): 0x%016llx 0x%016llx\n",
             (unsigned long long)dev->rxq_ptr,
-            (unsigned long long)ldq_tce(sdev, VLAN_BD_ADDR(rxq_bd) +
+            (unsigned long long)vio_ldq(sdev, VLAN_BD_ADDR(rxq_bd) +
                                         dev->rxq_ptr),
-            (unsigned long long)ldq_tce(sdev, VLAN_BD_ADDR(rxq_bd) +
+            (unsigned long long)vio_ldq(sdev, VLAN_BD_ADDR(rxq_bd) +
                                         dev->rxq_ptr + 8));
 
     dev->rxq_ptr += 16;
     if (dev->rxq_ptr >= VLAN_BD_LEN(rxq_bd)) {
         dev->rxq_ptr = 0;
-        stq_tce(sdev, dev->buf_list + VLAN_RXQ_BD_OFF, rxq_bd ^ VLAN_BD_TOGGLE);
+        vio_stq(sdev, dev->buf_list + VLAN_RXQ_BD_OFF, rxq_bd ^ VLAN_BD_TOGGLE);
     }
 
     if (sdev->signal_state & 1) {
@@ -176,7 +176,7 @@ static ssize_t spapr_vlan_receive(VLANClientState *nc, const uint8_t *buf,
 }
 
 static NetClientInfo net_spapr_vlan_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = spapr_vlan_can_receive,
     .receive = spapr_vlan_receive,
@@ -254,8 +254,10 @@ static int check_bd(VIOsPAPRVLANDevice *dev, vlan_bd_t bd,
         return -1;
     }
 
-    if (spapr_vio_check_tces(&dev->sdev, VLAN_BD_ADDR(bd),
-                             VLAN_BD_LEN(bd), SPAPR_TCE_RW) != 0) {
+    if (!spapr_vio_dma_valid(&dev->sdev, VLAN_BD_ADDR(bd),
+                             VLAN_BD_LEN(bd), DMA_DIRECTION_FROM_DEVICE)
+        || !spapr_vio_dma_valid(&dev->sdev, VLAN_BD_ADDR(bd),
+                                VLAN_BD_LEN(bd), DMA_DIRECTION_TO_DEVICE)) {
         return -1;
     }
 
@@ -285,14 +287,14 @@ static target_ulong h_register_logical_lan(CPUPPCState *env,
         return H_RESOURCE;
     }
 
-    if (check_bd(dev, VLAN_VALID_BD(buf_list, SPAPR_VIO_TCE_PAGE_SIZE),
-                 SPAPR_VIO_TCE_PAGE_SIZE) < 0) {
+    if (check_bd(dev, VLAN_VALID_BD(buf_list, SPAPR_TCE_PAGE_SIZE),
+                 SPAPR_TCE_PAGE_SIZE) < 0) {
         hcall_dprintf("Bad buf_list 0x" TARGET_FMT_lx "\n", buf_list);
         return H_PARAMETER;
     }
 
-    filter_list_bd = VLAN_VALID_BD(filter_list, SPAPR_VIO_TCE_PAGE_SIZE);
-    if (check_bd(dev, filter_list_bd, SPAPR_VIO_TCE_PAGE_SIZE) < 0) {
+    filter_list_bd = VLAN_VALID_BD(filter_list, SPAPR_TCE_PAGE_SIZE);
+    if (check_bd(dev, filter_list_bd, SPAPR_TCE_PAGE_SIZE) < 0) {
         hcall_dprintf("Bad filter_list 0x" TARGET_FMT_lx "\n", filter_list);
         return H_PARAMETER;
     }
@@ -309,17 +311,17 @@ static target_ulong h_register_logical_lan(CPUPPCState *env,
     rec_queue &= ~VLAN_BD_TOGGLE;
 
     /* Initialize the buffer list */
-    stq_tce(sdev, buf_list, rec_queue);
-    stq_tce(sdev, buf_list + 8, filter_list_bd);
-    spapr_tce_dma_zero(sdev, buf_list + VLAN_RX_BDS_OFF,
-                       SPAPR_VIO_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF);
+    vio_stq(sdev, buf_list, rec_queue);
+    vio_stq(sdev, buf_list + 8, filter_list_bd);
+    spapr_vio_dma_set(sdev, buf_list + VLAN_RX_BDS_OFF, 0,
+                      SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF);
     dev->add_buf_ptr = VLAN_RX_BDS_OFF - 8;
     dev->use_buf_ptr = VLAN_RX_BDS_OFF - 8;
     dev->rx_bufs = 0;
     dev->rxq_ptr = 0;
 
     /* Initialize the receive queue */
-    spapr_tce_dma_zero(sdev, VLAN_BD_ADDR(rec_queue), VLAN_BD_LEN(rec_queue));
+    spapr_vio_dma_set(sdev, VLAN_BD_ADDR(rec_queue), 0, VLAN_BD_LEN(rec_queue));
 
     dev->isopen = 1;
     return H_SUCCESS;
@@ -378,14 +380,14 @@ static target_ulong h_add_logical_lan_buffer(CPUPPCState *env,
 
     do {
         dev->add_buf_ptr += 8;
-        if (dev->add_buf_ptr >= SPAPR_VIO_TCE_PAGE_SIZE) {
+        if (dev->add_buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
             dev->add_buf_ptr = VLAN_RX_BDS_OFF;
         }
 
-        bd = ldq_tce(sdev, dev->buf_list + dev->add_buf_ptr);
+        bd = vio_ldq(sdev, dev->buf_list + dev->add_buf_ptr);
     } while (bd & VLAN_BD_VALID);
 
-    stq_tce(sdev, dev->buf_list + dev->add_buf_ptr, buf);
+    vio_stq(sdev, dev->buf_list + dev->add_buf_ptr, buf);
 
     dev->rx_bufs++;
 
@@ -451,7 +453,7 @@ static target_ulong h_send_logical_lan(CPUPPCState *env, sPAPREnvironment *spapr
     lbuf = alloca(total_len);
     p = lbuf;
     for (i = 0; i < nbufs; i++) {
-        ret = spapr_tce_dma_read(sdev, VLAN_BD_ADDR(bufs[i]),
+        ret = spapr_vio_dma_read(sdev, VLAN_BD_ADDR(bufs[i]),
                                  p, VLAN_BD_LEN(bufs[i]));
         if (ret < 0) {
             return ret;
@@ -479,7 +481,7 @@ static target_ulong h_multicast_ctrl(CPUPPCState *env, sPAPREnvironment *spapr,
 }
 
 static Property spapr_vlan_properties[] = {
-    DEFINE_SPAPR_PROPERTIES(VIOsPAPRVLANDevice, sdev, 0x10000000),
+    DEFINE_SPAPR_PROPERTIES(VIOsPAPRVLANDevice, sdev),
     DEFINE_NIC_PROPERTIES(VIOsPAPRVLANDevice, nicconf),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -497,6 +499,7 @@ static void spapr_vlan_class_init(ObjectClass *klass, void *data)
     k->dt_compatible = "IBM,l-lan";
     k->signal_mask = 0x1;
     dc->props = spapr_vlan_properties;
+    k->rtce_window_size = 0x10000000;
 }
 
 static TypeInfo spapr_vlan_info = {
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 97d417a997..b2e4f785ea 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -266,12 +266,21 @@ static const MemoryRegionOps spapr_io_ops = {
 /*
  * PHB PCI device
  */
+static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
+                                            int devfn)
+{
+    sPAPRPHBState *phb = opaque;
+
+    return phb->dma;
+}
+
 static int spapr_phb_init(SysBusDevice *s)
 {
     sPAPRPHBState *phb = FROM_SYSBUS(sPAPRPHBState, s);
     char *namebuf;
     int i;
     PCIBus *bus;
+    uint32_t liobn;
 
     phb->dtbusname = g_strdup_printf("pci@%" PRIx64, phb->buid);
     namebuf = alloca(strlen(phb->dtbusname) + 32);
@@ -312,6 +321,10 @@ static int spapr_phb_init(SysBusDevice *s)
                            PCI_DEVFN(0, 0), PCI_NUM_PINS);
     phb->host_state.bus = bus;
 
+    liobn = SPAPR_PCI_BASE_LIOBN | (pci_find_domain(bus) << 16);
+    phb->dma = spapr_tce_new_dma_context(liobn, 0x40000000);
+    pci_setup_iommu(bus, spapr_pci_dma_context_fn, phb);
+
     QLIST_INSERT_HEAD(&spapr->phbs, phb, list);
 
     /* Initialize the LSI table */
@@ -405,7 +418,7 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
         uint64_t child;
         uint64_t parent;
         uint64_t size;
-    } __attribute__((packed)) ranges[] = {
+    } QEMU_PACKED ranges[] = {
         {
             cpu_to_be32(b_ss(1)), cpu_to_be64(0),
             cpu_to_be64(phb->io_win_addr),
@@ -472,6 +485,8 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
                      sizeof(interrupt_map)));
 
+    spapr_dma_dt(fdt, bus_off, "ibm,dma-window", phb->dma);
+
     return 0;
 }
 
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
index f54c2e8108..d9e46e22e3 100644
--- a/hw/spapr_pci.h
+++ b/hw/spapr_pci.h
@@ -38,6 +38,7 @@ typedef struct sPAPRPHBState {
     MemoryRegion memspace, iospace;
     target_phys_addr_t mem_win_addr, mem_win_size, io_win_addr, io_win_size;
     MemoryRegion memwindow, iowindow;
+    DMAContext *dma;
 
     struct {
         uint32_t dt_irq;
diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
index c8271c626c..05b55032a9 100644
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@@ -39,7 +39,6 @@
 #endif /* CONFIG_FDT */
 
 /* #define DEBUG_SPAPR */
-/* #define DEBUG_TCE */
 
 #ifdef DEBUG_SPAPR
 #define dprintf(fmt, ...) \
@@ -143,26 +142,9 @@ static int vio_make_devnode(VIOsPAPRDevice *dev,
         }
     }
 
-    if (dev->rtce_window_size) {
-        uint32_t dma_prop[] = {cpu_to_be32(dev->reg),
-                               0, 0,
-                               0, cpu_to_be32(dev->rtce_window_size)};
-
-        ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-address-cells", 2);
-        if (ret < 0) {
-            return ret;
-        }
-
-        ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-size-cells", 2);
-        if (ret < 0) {
-            return ret;
-        }
-
-        ret = fdt_setprop(fdt, node_off, "ibm,my-dma-window", dma_prop,
-                          sizeof(dma_prop));
-        if (ret < 0) {
-            return ret;
-        }
+    ret = spapr_dma_dt(fdt, node_off, "ibm,my-dma-window", dev->dma);
+    if (ret < 0) {
+        return ret;
     }
 
     if (pc->devnode) {
@@ -177,232 +159,6 @@ static int vio_make_devnode(VIOsPAPRDevice *dev,
 #endif /* CONFIG_FDT */
 
 /*
- * RTCE handling
- */
-
-static void rtce_init(VIOsPAPRDevice *dev)
-{
-    size_t size = (dev->rtce_window_size >> SPAPR_VIO_TCE_PAGE_SHIFT)
-        * sizeof(VIOsPAPR_RTCE);
-
-    if (size) {
-        dev->rtce_table = kvmppc_create_spapr_tce(dev->reg,
-                                                  dev->rtce_window_size,
-                                                  &dev->kvmtce_fd);
-
-        if (!dev->rtce_table) {
-            dev->rtce_table = g_malloc0(size);
-        }
-    }
-}
-
-static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
-                              target_ulong opcode, target_ulong *args)
-{
-    target_ulong liobn = args[0];
-    target_ulong ioba = args[1];
-    target_ulong tce = args[2];
-    VIOsPAPRDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, liobn);
-    VIOsPAPR_RTCE *rtce;
-
-    if (!dev) {
-        hcall_dprintf("LIOBN 0x" TARGET_FMT_lx " does not exist\n", liobn);
-        return H_PARAMETER;
-    }
-
-    ioba &= ~(SPAPR_VIO_TCE_PAGE_SIZE - 1);
-
-#ifdef DEBUG_TCE
-    fprintf(stderr, "spapr_vio_put_tce on %s  ioba 0x" TARGET_FMT_lx
-            "  TCE 0x" TARGET_FMT_lx "\n", dev->qdev.id, ioba, tce);
-#endif
-
-    if (ioba >= dev->rtce_window_size) {
-        hcall_dprintf("Out-of-bounds IOBA 0x" TARGET_FMT_lx "\n", ioba);
-        return H_PARAMETER;
-    }
-
-    rtce = dev->rtce_table + (ioba >> SPAPR_VIO_TCE_PAGE_SHIFT);
-    rtce->tce = tce;
-
-    return H_SUCCESS;
-}
-
-int spapr_vio_check_tces(VIOsPAPRDevice *dev, target_ulong ioba,
-                         target_ulong len, enum VIOsPAPR_TCEAccess access)
-{
-    int start, end, i;
-
-    start = ioba >> SPAPR_VIO_TCE_PAGE_SHIFT;
-    end = (ioba + len - 1) >> SPAPR_VIO_TCE_PAGE_SHIFT;
-
-    for (i = start; i <= end; i++) {
-        if ((dev->rtce_table[i].tce & access) != access) {
-#ifdef DEBUG_TCE
-            fprintf(stderr, "FAIL on %d\n", i);
-#endif
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-int spapr_tce_dma_write(VIOsPAPRDevice *dev, uint64_t taddr, const void *buf,
-                        uint32_t size)
-{
-#ifdef DEBUG_TCE
-    fprintf(stderr, "spapr_tce_dma_write taddr=0x%llx size=0x%x\n",
-            (unsigned long long)taddr, size);
-#endif
-
-    /* Check for bypass */
-    if (dev->flags & VIO_PAPR_FLAG_DMA_BYPASS) {
-        cpu_physical_memory_write(taddr, buf, size);
-        return 0;
-    }
-
-    while (size) {
-        uint64_t tce;
-        uint32_t lsize;
-        uint64_t txaddr;
-
-        /* Check if we are in bound */
-        if (taddr >= dev->rtce_window_size) {
-#ifdef DEBUG_TCE
-            fprintf(stderr, "spapr_tce_dma_write out of bounds\n");
-#endif
-            return H_DEST_PARM;
-        }
-        tce = dev->rtce_table[taddr >> SPAPR_VIO_TCE_PAGE_SHIFT].tce;
-
-        /* How much til end of page ? */
-        lsize = MIN(size, ((~taddr) & SPAPR_VIO_TCE_PAGE_MASK) + 1);
-
-        /* Check TCE */
-        if (!(tce & 2)) {
-            return H_DEST_PARM;
-        }
-
-        /* Translate */
-        txaddr = (tce & ~SPAPR_VIO_TCE_PAGE_MASK) |
-            (taddr & SPAPR_VIO_TCE_PAGE_MASK);
-
-#ifdef DEBUG_TCE
-        fprintf(stderr, " -> write to txaddr=0x%llx, size=0x%x\n",
-                (unsigned long long)txaddr, lsize);
-#endif
-
-        /* Do it */
-        cpu_physical_memory_write(txaddr, buf, lsize);
-        buf += lsize;
-        taddr += lsize;
-        size -= lsize;
-    }
-    return 0;
-}
-
-int spapr_tce_dma_zero(VIOsPAPRDevice *dev, uint64_t taddr, uint32_t size)
-{
-    /* FIXME: allocating a temp buffer is nasty, but just stepping
-     * through writing zeroes is awkward.  This will do for now. */
-    uint8_t zeroes[size];
-
-#ifdef DEBUG_TCE
-    fprintf(stderr, "spapr_tce_dma_zero taddr=0x%llx size=0x%x\n",
-            (unsigned long long)taddr, size);
-#endif
-
-    memset(zeroes, 0, size);
-    return spapr_tce_dma_write(dev, taddr, zeroes, size);
-}
-
-void stb_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint8_t val)
-{
-    spapr_tce_dma_write(dev, taddr, &val, sizeof(val));
-}
-
-void sth_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint16_t val)
-{
-    val = tswap16(val);
-    spapr_tce_dma_write(dev, taddr, &val, sizeof(val));
-}
-
-
-void stw_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint32_t val)
-{
-    val = tswap32(val);
-    spapr_tce_dma_write(dev, taddr, &val, sizeof(val));
-}
-
-void stq_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint64_t val)
-{
-    val = tswap64(val);
-    spapr_tce_dma_write(dev, taddr, &val, sizeof(val));
-}
-
-int spapr_tce_dma_read(VIOsPAPRDevice *dev, uint64_t taddr, void *buf,
-                       uint32_t size)
-{
-#ifdef DEBUG_TCE
-    fprintf(stderr, "spapr_tce_dma_write taddr=0x%llx size=0x%x\n",
-            (unsigned long long)taddr, size);
-#endif
-
-    /* Check for bypass */
-    if (dev->flags & VIO_PAPR_FLAG_DMA_BYPASS) {
-        cpu_physical_memory_read(taddr, buf, size);
-        return 0;
-    }
-
-    while (size) {
-        uint64_t tce;
-        uint32_t lsize;
-        uint64_t txaddr;
-
-        /* Check if we are in bound */
-        if (taddr >= dev->rtce_window_size) {
-#ifdef DEBUG_TCE
-            fprintf(stderr, "spapr_tce_dma_read out of bounds\n");
-#endif
-            return H_DEST_PARM;
-        }
-        tce = dev->rtce_table[taddr >> SPAPR_VIO_TCE_PAGE_SHIFT].tce;
-
-        /* How much til end of page ? */
-        lsize = MIN(size, ((~taddr) & SPAPR_VIO_TCE_PAGE_MASK) + 1);
-
-        /* Check TCE */
-        if (!(tce & 1)) {
-            return H_DEST_PARM;
-        }
-
-        /* Translate */
-        txaddr = (tce & ~SPAPR_VIO_TCE_PAGE_MASK) |
-            (taddr & SPAPR_VIO_TCE_PAGE_MASK);
-
-#ifdef DEBUG_TCE
-        fprintf(stderr, " -> write to txaddr=0x%llx, size=0x%x\n",
-                (unsigned long long)txaddr, lsize);
-#endif
-        /* Do it */
-        cpu_physical_memory_read(txaddr, buf, lsize);
-        buf += lsize;
-        taddr += lsize;
-        size -= lsize;
-    }
-    return H_SUCCESS;
-}
-
-uint64_t ldq_tce(VIOsPAPRDevice *dev, uint64_t taddr)
-{
-    uint64_t val;
-
-    spapr_tce_dma_read(dev, taddr, &val, sizeof(val));
-    return tswap64(val);
-}
-
-/*
  * CRQ handling
  */
 static target_ulong h_reg_crq(CPUPPCState *env, sPAPREnvironment *spapr,
@@ -526,7 +282,7 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
     }
 
     /* Maybe do a fast path for KVM just writing to the pages */
-    rc = spapr_tce_dma_read(dev, dev->crq.qladdr + dev->crq.qnext, &byte, 1);
+    rc = spapr_vio_dma_read(dev, dev->crq.qladdr + dev->crq.qnext, &byte, 1);
     if (rc) {
         return rc;
     }
@@ -534,7 +290,7 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
         return 1;
     }
 
-    rc = spapr_tce_dma_write(dev, dev->crq.qladdr + dev->crq.qnext + 8,
+    rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext + 8,
                              &crq[8], 8);
     if (rc) {
         return rc;
@@ -542,7 +298,7 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
 
     kvmppc_eieio();
 
-    rc = spapr_tce_dma_write(dev, dev->crq.qladdr + dev->crq.qnext, crq, 8);
+    rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext, crq, 8);
     if (rc) {
         return rc;
     }
@@ -560,13 +316,13 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
 
 static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev)
 {
-    dev->flags &= ~VIO_PAPR_FLAG_DMA_BYPASS;
+    VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+    uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
 
-    if (dev->rtce_table) {
-        size_t size = (dev->rtce_window_size >> SPAPR_VIO_TCE_PAGE_SHIFT)
-            * sizeof(VIOsPAPR_RTCE);
-        memset(dev->rtce_table, 0, size);
+    if (dev->dma) {
+        spapr_tce_free(dev->dma);
     }
+    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
 
     dev->crq.qladdr = 0;
     dev->crq.qsize = 0;
@@ -593,9 +349,13 @@ static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token,
         return;
     }
     if (enable) {
-        dev->flags |= VIO_PAPR_FLAG_DMA_BYPASS;
+        spapr_tce_free(dev->dma);
+        dev->dma = NULL;
     } else {
-        dev->flags &= ~VIO_PAPR_FLAG_DMA_BYPASS;
+        VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+        uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
+
+        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
     }
 
     rtas_st(rets, 0, 0);
@@ -662,6 +422,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
 {
     VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
     VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+    uint32_t liobn;
     char *id;
 
     if (dev->reg != -1) {
@@ -703,7 +464,8 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
         return -1;
     }
 
-    rtce_init(dev);
+    liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
+    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
 
     return pc->init(dev);
 }
@@ -751,9 +513,6 @@ VIOsPAPRBus *spapr_vio_bus_init(void)
     /* hcall-vio */
     spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal);
 
-    /* hcall-tce */
-    spapr_register_hypercall(H_PUT_TCE, h_put_tce);
-
     /* hcall-crq */
     spapr_register_hypercall(H_REG_CRQ, h_reg_crq);
     spapr_register_hypercall(H_FREE_CRQ, h_free_crq);
diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
index 2adad77d02..6f9a498ccd 100644
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@@ -21,16 +21,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#define SPAPR_VIO_TCE_PAGE_SHIFT   12
-#define SPAPR_VIO_TCE_PAGE_SIZE    (1ULL << SPAPR_VIO_TCE_PAGE_SHIFT)
-#define SPAPR_VIO_TCE_PAGE_MASK    (SPAPR_VIO_TCE_PAGE_SIZE - 1)
-
-enum VIOsPAPR_TCEAccess {
-    SPAPR_TCE_FAULT = 0,
-    SPAPR_TCE_RO = 1,
-    SPAPR_TCE_WO = 2,
-    SPAPR_TCE_RW = 3,
-};
+#include "dma.h"
 
 #define TYPE_VIO_SPAPR_DEVICE "vio-spapr-device"
 #define VIO_SPAPR_DEVICE(obj) \
@@ -45,10 +36,6 @@ enum VIOsPAPR_TCEAccess {
 
 struct VIOsPAPRDevice;
 
-typedef struct VIOsPAPR_RTCE {
-    uint64_t tce;
-} VIOsPAPR_RTCE;
-
 typedef struct VIOsPAPR_CRQ {
     uint64_t qladdr;
     uint32_t qsize;
@@ -64,6 +51,7 @@ typedef struct VIOsPAPRDeviceClass {
 
     const char *dt_name, *dt_type, *dt_compatible;
     target_ulong signal_mask;
+    uint32_t rtce_window_size;
     int (*init)(VIOsPAPRDevice *dev);
     void (*reset)(VIOsPAPRDevice *dev);
     int (*devnode)(VIOsPAPRDevice *dev, void *fdt, int node_off);
@@ -73,20 +61,15 @@ struct VIOsPAPRDevice {
     DeviceState qdev;
     uint32_t reg;
     uint32_t flags;
-#define VIO_PAPR_FLAG_DMA_BYPASS        0x1
     qemu_irq qirq;
     uint32_t vio_irq_num;
     target_ulong signal_state;
-    uint32_t rtce_window_size;
-    VIOsPAPR_RTCE *rtce_table;
-    int kvmtce_fd;
     VIOsPAPR_CRQ crq;
+    DMAContext *dma;
 };
 
-#define DEFINE_SPAPR_PROPERTIES(type, field, default_dma_window)       \
-        DEFINE_PROP_UINT32("reg", type, field.reg, -1),                \
-        DEFINE_PROP_UINT32("dma-window", type, field.rtce_window_size, \
-                           default_dma_window)
+#define DEFINE_SPAPR_PROPERTIES(type, field)           \
+        DEFINE_PROP_UINT32("reg", type, field.reg, -1)
 
 struct VIOsPAPRBus {
     BusState bus;
@@ -102,20 +85,38 @@ extern int spapr_populate_chosen_stdout(void *fdt, VIOsPAPRBus *bus);
 
 extern int spapr_vio_signal(VIOsPAPRDevice *dev, target_ulong mode);
 
-int spapr_vio_check_tces(VIOsPAPRDevice *dev, target_ulong ioba,
-                         target_ulong len,
-                         enum VIOsPAPR_TCEAccess access);
-
-int spapr_tce_dma_read(VIOsPAPRDevice *dev, uint64_t taddr,
-                       void *buf, uint32_t size);
-int spapr_tce_dma_write(VIOsPAPRDevice *dev, uint64_t taddr,
-                        const void *buf, uint32_t size);
-int spapr_tce_dma_zero(VIOsPAPRDevice *dev, uint64_t taddr, uint32_t size);
-void stb_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint8_t val);
-void sth_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint16_t val);
-void stw_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint32_t val);
-void stq_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint64_t val);
-uint64_t ldq_tce(VIOsPAPRDevice *dev, uint64_t taddr);
+static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
+                                       uint32_t size, DMADirection dir)
+{
+    return dma_memory_valid(dev->dma, taddr, size, dir);
+}
+
+static inline int spapr_vio_dma_read(VIOsPAPRDevice *dev, uint64_t taddr,
+                                     void *buf, uint32_t size)
+{
+    return (dma_memory_read(dev->dma, taddr, buf, size) != 0) ?
+        H_DEST_PARM : H_SUCCESS;
+}
+
+static inline int spapr_vio_dma_write(VIOsPAPRDevice *dev, uint64_t taddr,
+                                      const void *buf, uint32_t size)
+{
+    return (dma_memory_write(dev->dma, taddr, buf, size) != 0) ?
+        H_DEST_PARM : H_SUCCESS;
+}
+
+static inline int spapr_vio_dma_set(VIOsPAPRDevice *dev, uint64_t taddr,
+                                    uint8_t c, uint32_t size)
+{
+    return (dma_memory_set(dev->dma, taddr, c, size) != 0) ?
+        H_DEST_PARM : H_SUCCESS;
+}
+
+#define vio_stb(_dev, _addr, _val) (stb_dma((_dev)->dma, (_addr), (_val)))
+#define vio_sth(_dev, _addr, _val) (stw_be_dma((_dev)->dma, (_addr), (_val)))
+#define vio_stl(_dev, _addr, _val) (stl_be_dma((_dev)->dma, (_addr), (_val)))
+#define vio_stq(_dev, _addr, _val) (stq_be_dma((_dev)->dma, (_addr), (_val)))
+#define vio_ldq(_dev, _addr) (ldq_be_dma((_dev)->dma, (_addr)))
 
 int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq);
 
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index 2f09616dd5..3cf5844e0f 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -165,7 +165,7 @@ static int vscsi_send_iu(VSCSIState *s, vscsi_req *req,
     long rc, rc1;
 
     /* First copy the SRP */
-    rc = spapr_tce_dma_write(&s->vdev, req->crq.s.IU_data_ptr,
+    rc = spapr_vio_dma_write(&s->vdev, req->crq.s.IU_data_ptr,
                              &req->iu, length);
     if (rc) {
         fprintf(stderr, "vscsi_send_iu: DMA write failure !\n");
@@ -281,9 +281,9 @@ static int vscsi_srp_direct_data(VSCSIState *s, vscsi_req *req,
     llen = MIN(len, md->len);
     if (llen) {
         if (req->writing) { /* writing = to device = reading from memory */
-            rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen);
+            rc = spapr_vio_dma_read(&s->vdev, md->va, buf, llen);
         } else {
-            rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen);
+            rc = spapr_vio_dma_write(&s->vdev, md->va, buf, llen);
         }
     }
     md->len -= llen;
@@ -329,10 +329,11 @@ static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req,
             md = req->cur_desc = &req->ext_desc;
             dprintf("VSCSI:   Reading desc from 0x%llx\n",
                     (unsigned long long)td->va);
-            rc = spapr_tce_dma_read(&s->vdev, td->va, md,
+            rc = spapr_vio_dma_read(&s->vdev, td->va, md,
                                     sizeof(struct srp_direct_buf));
             if (rc) {
-                dprintf("VSCSI: tce_dma_read -> %d reading ext_desc\n", rc);
+                dprintf("VSCSI: spapr_vio_dma_read -> %d reading ext_desc\n",
+                        rc);
                 break;
             }
             vscsi_swap_desc(md);
@@ -345,12 +346,12 @@ static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req,
         /* Perform transfer */
         llen = MIN(len, md->len);
         if (req->writing) { /* writing = to device = reading from memory */
-            rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen);
+            rc = spapr_vio_dma_read(&s->vdev, md->va, buf, llen);
         } else {
-            rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen);
+            rc = spapr_vio_dma_write(&s->vdev, md->va, buf, llen);
         }
         if (rc) {
-            dprintf("VSCSI: tce_dma_r/w(%d) -> %d\n", req->writing, rc);
+            dprintf("VSCSI: spapr_vio_dma_r/w(%d) -> %d\n", req->writing, rc);
             break;
         }
         dprintf("VSCSI:     data: %02x %02x %02x %02x...\n",
@@ -728,7 +729,7 @@ static int vscsi_send_adapter_info(VSCSIState *s, vscsi_req *req)
     sinfo = &req->iu.mad.adapter_info;
 
 #if 0 /* What for ? */
-    rc = spapr_tce_dma_read(&s->vdev, be64_to_cpu(sinfo->buffer),
+    rc = spapr_vio_dma_read(&s->vdev, be64_to_cpu(sinfo->buffer),
                             &info, be16_to_cpu(sinfo->common.length));
     if (rc) {
         fprintf(stderr, "vscsi_send_adapter_info: DMA read failure !\n");
@@ -742,7 +743,7 @@ static int vscsi_send_adapter_info(VSCSIState *s, vscsi_req *req)
     info.os_type = cpu_to_be32(2);
     info.port_max_txu[0] = cpu_to_be32(VSCSI_MAX_SECTORS << 9);
 
-    rc = spapr_tce_dma_write(&s->vdev, be64_to_cpu(sinfo->buffer),
+    rc = spapr_vio_dma_write(&s->vdev, be64_to_cpu(sinfo->buffer),
                              &info, be16_to_cpu(sinfo->common.length));
     if (rc)  {
         fprintf(stderr, "vscsi_send_adapter_info: DMA write failure !\n");
@@ -805,7 +806,7 @@ static void vscsi_got_payload(VSCSIState *s, vscsi_crq *crq)
     }
 
     /* XXX Handle failure differently ? */
-    if (spapr_tce_dma_read(&s->vdev, crq->s.IU_data_ptr, &req->iu,
+    if (spapr_vio_dma_read(&s->vdev, crq->s.IU_data_ptr, &req->iu,
                            crq->s.IU_length)) {
         fprintf(stderr, "vscsi_got_payload: DMA read failure !\n");
         vscsi_put_req(req);
@@ -947,7 +948,7 @@ static int spapr_vscsi_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off)
 }
 
 static Property spapr_vscsi_properties[] = {
-    DEFINE_SPAPR_PROPERTIES(VSCSIState, vdev, 0x10000000),
+    DEFINE_SPAPR_PROPERTIES(VSCSIState, vdev),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -964,6 +965,7 @@ static void spapr_vscsi_class_init(ObjectClass *klass, void *data)
     k->dt_compatible = "IBM,v-scsi";
     k->signal_mask = 0x00000001;
     dc->props = spapr_vscsi_properties;
+    k->rtce_window_size = 0x10000000;
 }
 
 static TypeInfo spapr_vscsi_info = {
diff --git a/hw/spapr_vty.c b/hw/spapr_vty.c
index f340b83237..99e52cc6b7 100644
--- a/hw/spapr_vty.c
+++ b/hw/spapr_vty.c
@@ -133,7 +133,7 @@ void spapr_vty_create(VIOsPAPRBus *bus, CharDriverState *chardev)
 }
 
 static Property spapr_vty_properties[] = {
-    DEFINE_SPAPR_PROPERTIES(VIOsPAPRVTYDevice, sdev, 0),
+    DEFINE_SPAPR_PROPERTIES(VIOsPAPRVTYDevice, sdev),
     DEFINE_PROP_CHR("chardev", VIOsPAPRVTYDevice, chardev),
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/hw/stellaris_enet.c b/hw/stellaris_enet.c
index fbe99cb4a9..bc97280cca 100644
--- a/hw/stellaris_enet.c
+++ b/hw/stellaris_enet.c
@@ -78,7 +78,7 @@ static void stellaris_enet_update(stellaris_enet_state *s)
 }
 
 /* TODO: Implement MAC address filtering.  */
-static ssize_t stellaris_enet_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t stellaris_enet_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     stellaris_enet_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
     int n;
@@ -120,7 +120,7 @@ static ssize_t stellaris_enet_receive(VLANClientState *nc, const uint8_t *buf, s
     return size;
 }
 
-static int stellaris_enet_can_receive(VLANClientState *nc)
+static int stellaris_enet_can_receive(NetClientState *nc)
 {
     stellaris_enet_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -381,7 +381,7 @@ static int stellaris_enet_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
-static void stellaris_enet_cleanup(VLANClientState *nc)
+static void stellaris_enet_cleanup(NetClientState *nc)
 {
     stellaris_enet_state *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -393,7 +393,7 @@ static void stellaris_enet_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_stellaris_enet_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = stellaris_enet_can_receive,
     .receive = stellaris_enet_receive,
diff --git a/hw/sun4m.c b/hw/sun4m.c
index a959261209..0f909b5f86 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -832,6 +832,10 @@ static void cpu_devinit(const char *cpu_model, unsigned int id,
     env->prom_addr = prom_addr;
 }
 
+static void dummy_fdc_tc(void *opaque, int irq, int level)
+{
+}
+
 static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size,
                           const char *boot_device,
                           const char *kernel_filename,
@@ -942,9 +946,6 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size,
               serial_hds[0], serial_hds[1], ESCC_CLOCK, 1);
 
     cpu_halt = qemu_allocate_irqs(cpu_halt_signal, NULL, 1);
-    slavio_misc_init(hwdef->slavio_base, hwdef->aux1_base, hwdef->aux2_base,
-                     slavio_irq[30], fdc_tc);
-
     if (hwdef->apc_base) {
         apc_init(hwdef->apc_base, cpu_halt[0]);
     }
@@ -955,8 +956,13 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size,
         fd[0] = drive_get(IF_FLOPPY, 0, 0);
         sun4m_fdctrl_init(slavio_irq[22], hwdef->fd_base, fd,
                           &fdc_tc);
+    } else {
+        fdc_tc = *qemu_allocate_irqs(dummy_fdc_tc, NULL, 1);
     }
 
+    slavio_misc_init(hwdef->slavio_base, hwdef->aux1_base, hwdef->aux2_base,
+                     slavio_irq[30], fdc_tc);
+
     if (drive_get_max_bus(IF_SCSI) > 0) {
         fprintf(stderr, "qemu: too many SCSI bus\n");
         exit(1);
@@ -1772,16 +1778,18 @@ static void sun4c_hw_init(const struct sun4c_hwdef *hwdef, ram_addr_t RAM_size,
               slavio_irq[1], serial_hds[0], serial_hds[1],
               ESCC_CLOCK, 1);
 
-    slavio_misc_init(0, hwdef->aux1_base, 0, slavio_irq[1], fdc_tc);
-
     if (hwdef->fd_base != (target_phys_addr_t)-1) {
         /* there is zero or one floppy drive */
         memset(fd, 0, sizeof(fd));
         fd[0] = drive_get(IF_FLOPPY, 0, 0);
         sun4m_fdctrl_init(slavio_irq[1], hwdef->fd_base, fd,
                           &fdc_tc);
+    } else {
+        fdc_tc = *qemu_allocate_irqs(dummy_fdc_tc, NULL, 1);
     }
 
+    slavio_misc_init(0, hwdef->aux1_base, 0, slavio_irq[1], fdc_tc);
+
     if (drive_get_max_bus(IF_SCSI) > 0) {
         fprintf(stderr, "qemu: too many SCSI bus\n");
         exit(1);
diff --git a/hw/usb.h b/hw/usb.h
index 2a56fe554f..432ccae57f 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -25,7 +25,6 @@
  * THE SOFTWARE.
  */
 
-#include "block.h"
 #include "qdev.h"
 #include "qemu-queue.h"
 
@@ -145,6 +144,8 @@
 #define USB_ENDPOINT_XFER_INT		3
 #define USB_ENDPOINT_XFER_INVALID     255
 
+#define USB_INTERFACE_INVALID         255
+
 typedef struct USBBus USBBus;
 typedef struct USBBusOps USBBusOps;
 typedef struct USBPort USBPort;
@@ -345,7 +346,7 @@ void usb_packet_check_state(USBPacket *p, USBPacketState expected);
 void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep);
 void usb_packet_addbuf(USBPacket *p, void *ptr, size_t len);
 int usb_packet_map(USBPacket *p, QEMUSGList *sgl);
-void usb_packet_unmap(USBPacket *p);
+void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl);
 void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes);
 void usb_packet_skip(USBPacket *p, size_t bytes);
 void usb_packet_cleanup(USBPacket *p);
@@ -363,6 +364,7 @@ void usb_packet_complete(USBDevice *dev, USBPacket *p);
 void usb_cancel_packet(USBPacket * p);
 
 void usb_ep_init(USBDevice *dev);
+void usb_ep_reset(USBDevice *dev);
 void usb_ep_dump(USBDevice *dev);
 struct USBEndpoint *usb_ep_get(USBDevice *dev, int pid, int ep);
 uint8_t usb_ep_get_type(USBDevice *dev, int pid, int ep);
diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs
index 9c7ddf5cb2..4225136d0f 100644
--- a/hw/usb/Makefile.objs
+++ b/hw/usb/Makefile.objs
@@ -11,3 +11,4 @@ common-obj-y += core.o bus.o desc.o dev-hub.o
 common-obj-y += host-$(HOST_USB).o dev-bluetooth.o
 common-obj-y += dev-hid.o dev-storage.o dev-wacom.o
 common-obj-y += dev-serial.o dev-network.o dev-audio.o
+common-obj-y += dev-uas.o
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index f87cc5f443..b649360dd3 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -37,10 +37,23 @@ static const TypeInfo usb_bus_info = {
 static int next_usb_bus = 0;
 static QTAILQ_HEAD(, USBBus) busses = QTAILQ_HEAD_INITIALIZER(busses);
 
+static int usb_device_post_load(void *opaque, int version_id)
+{
+    USBDevice *dev = opaque;
+
+    if (dev->state == USB_STATE_NOTATTACHED) {
+        dev->attached = 0;
+    } else {
+        dev->attached = 1;
+    }
+    return 0;
+}
+
 const VMStateDescription vmstate_usb_device = {
     .name = "USBDevice",
     .version_id = 1,
     .minimum_version_id = 1,
+    .post_load = usb_device_post_load,
     .fields = (VMStateField []) {
         VMSTATE_UINT8(addr, USBDevice),
         VMSTATE_INT32(state, USBDevice),
diff --git a/hw/usb/core.c b/hw/usb/core.c
index 0e02da7601..01a7622837 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -522,10 +522,10 @@ void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes)
     switch (p->pid) {
     case USB_TOKEN_SETUP:
     case USB_TOKEN_OUT:
-        iov_to_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes);
+        iov_to_buf(p->iov.iov, p->iov.niov, p->result, ptr, bytes);
         break;
     case USB_TOKEN_IN:
-        iov_from_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes);
+        iov_from_buf(p->iov.iov, p->iov.niov, p->result, ptr, bytes);
         break;
     default:
         fprintf(stderr, "%s: invalid pid: %x\n", __func__, p->pid);
@@ -539,7 +539,7 @@ void usb_packet_skip(USBPacket *p, size_t bytes)
     assert(p->result >= 0);
     assert(p->result + bytes <= p->iov.size);
     if (p->pid == USB_TOKEN_IN) {
-        iov_clear(p->iov.iov, p->iov.niov, p->result, bytes);
+        iov_memset(p->iov.iov, p->iov.niov, p->result, 0, bytes);
     }
     p->result += bytes;
 }
@@ -550,7 +550,7 @@ void usb_packet_cleanup(USBPacket *p)
     qemu_iovec_destroy(&p->iov);
 }
 
-void usb_ep_init(USBDevice *dev)
+void usb_ep_reset(USBDevice *dev)
 {
     int ep;
 
@@ -559,7 +559,6 @@ void usb_ep_init(USBDevice *dev)
     dev->ep_ctl.ifnum = 0;
     dev->ep_ctl.dev = dev;
     dev->ep_ctl.pipeline = false;
-    QTAILQ_INIT(&dev->ep_ctl.queue);
     for (ep = 0; ep < USB_MAX_ENDPOINTS; ep++) {
         dev->ep_in[ep].nr = ep + 1;
         dev->ep_out[ep].nr = ep + 1;
@@ -567,12 +566,22 @@ void usb_ep_init(USBDevice *dev)
         dev->ep_out[ep].pid = USB_TOKEN_OUT;
         dev->ep_in[ep].type = USB_ENDPOINT_XFER_INVALID;
         dev->ep_out[ep].type = USB_ENDPOINT_XFER_INVALID;
-        dev->ep_in[ep].ifnum = 0;
-        dev->ep_out[ep].ifnum = 0;
+        dev->ep_in[ep].ifnum = USB_INTERFACE_INVALID;
+        dev->ep_out[ep].ifnum = USB_INTERFACE_INVALID;
         dev->ep_in[ep].dev = dev;
         dev->ep_out[ep].dev = dev;
         dev->ep_in[ep].pipeline = false;
         dev->ep_out[ep].pipeline = false;
+    }
+}
+
+void usb_ep_init(USBDevice *dev)
+{
+    int ep;
+
+    usb_ep_reset(dev);
+    QTAILQ_INIT(&dev->ep_ctl.queue);
+    for (ep = 0; ep < USB_MAX_ENDPOINTS; ep++) {
         QTAILQ_INIT(&dev->ep_in[ep].queue);
         QTAILQ_INIT(&dev->ep_out[ep].queue);
     }
diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c
index 5d2f0982c9..c84892c98d 100644
--- a/hw/usb/dev-network.c
+++ b/hw/usb/dev-network.c
@@ -1247,7 +1247,7 @@ static int usb_net_handle_data(USBDevice *dev, USBPacket *p)
     return ret;
 }
 
-static ssize_t usbnet_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t usbnet_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     USBNetState *s = DO_UPCAST(NICState, nc, nc)->opaque;
     struct rndis_packet_msg_type *msg;
@@ -1285,7 +1285,7 @@ static ssize_t usbnet_receive(VLANClientState *nc, const uint8_t *buf, size_t si
     return size;
 }
 
-static int usbnet_can_receive(VLANClientState *nc)
+static int usbnet_can_receive(NetClientState *nc)
 {
     USBNetState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -1296,7 +1296,7 @@ static int usbnet_can_receive(VLANClientState *nc)
     return !s->in_len;
 }
 
-static void usbnet_cleanup(VLANClientState *nc)
+static void usbnet_cleanup(NetClientState *nc)
 {
     USBNetState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -1309,11 +1309,11 @@ static void usb_net_handle_destroy(USBDevice *dev)
 
     /* TODO: remove the nd_table[] entry */
     rndis_clear_responsequeue(s);
-    qemu_del_vlan_client(&s->nic->nc);
+    qemu_del_net_client(&s->nic->nc);
 }
 
 static NetClientInfo net_usbnet_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = usbnet_can_receive,
     .receive = usbnet_receive,
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index 251e7de1cd..ff48d91049 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -247,6 +247,9 @@ static void usb_msd_command_complete(SCSIRequest *req, uint32_t status, size_t r
                the status read packet.  */
             usb_msd_send_status(s, p);
             s->mode = USB_MSDM_CBW;
+        } else if (s->mode == USB_MSDM_CSW) {
+            usb_msd_send_status(s, p);
+            s->mode = USB_MSDM_CBW;
         } else {
             if (s->data_len) {
                 int len = (p->iov.size - p->result);
@@ -383,6 +386,9 @@ static int usb_msd_handle_data(USBDevice *dev, USBPacket *p)
             assert(le32_to_cpu(s->csw.residue) == 0);
             s->scsi_len = 0;
             s->req = scsi_req_new(s->scsi_dev, tag, 0, cbw.cmd, NULL);
+#ifdef DEBUG_MSD
+            scsi_req_print(s->req);
+#endif
             scsi_req_enqueue(s->req);
             if (s->req && s->req->cmd.xfer != SCSI_XFER_NONE) {
                 scsi_req_continue(s->req);
@@ -410,7 +416,7 @@ static int usb_msd_handle_data(USBDevice *dev, USBPacket *p)
                 }
             }
             if (p->result < p->iov.size) {
-                DPRINTF("Deferring packet %p\n", p);
+                DPRINTF("Deferring packet %p [wait data-out]\n", p);
                 s->packet = p;
                 ret = USB_RET_ASYNC;
             } else {
@@ -445,6 +451,7 @@ static int usb_msd_handle_data(USBDevice *dev, USBPacket *p)
 
             if (s->req) {
                 /* still in flight */
+                DPRINTF("Deferring packet %p [wait status]\n", p);
                 s->packet = p;
                 ret = USB_RET_ASYNC;
             } else {
@@ -471,7 +478,7 @@ static int usb_msd_handle_data(USBDevice *dev, USBPacket *p)
                 }
             }
             if (p->result < p->iov.size) {
-                DPRINTF("Deferring packet %p\n", p);
+                DPRINTF("Deferring packet %p [wait data-in]\n", p);
                 s->packet = p;
                 ret = USB_RET_ASYNC;
             } else {
@@ -532,13 +539,14 @@ static int usb_msd_initfn(USBDevice *dev)
 {
     MSDState *s = DO_UPCAST(MSDState, dev, dev);
     BlockDriverState *bs = s->conf.bs;
-    DriveInfo *dinfo;
 
     if (!bs) {
         error_report("drive property not set");
         return -1;
     }
 
+    blkconf_serial(&s->conf, &s->serial);
+
     /*
      * Hack alert: this pretends to be a block device, but it's really
      * a SCSI bus that can serve only a single device, which it
@@ -551,13 +559,6 @@ static int usb_msd_initfn(USBDevice *dev)
     bdrv_detach_dev(bs, &s->dev.qdev);
     s->conf.bs = NULL;
 
-    if (!s->serial) {
-        /* try to fall back to value set with legacy -drive serial=... */
-        dinfo = drive_get_by_blockdev(bs);
-        if (*dinfo->serial) {
-            s->serial = strdup(dinfo->serial);
-        }
-    }
     if (s->serial) {
         usb_desc_set_string(dev, STR_SERIALNUMBER, s->serial);
     } else {
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
new file mode 100644
index 0000000000..9b02ff48fa
--- /dev/null
+++ b/hw/usb/dev-uas.c
@@ -0,0 +1,779 @@
+/*
+ * UAS (USB Attached SCSI) emulation
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Author: Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qemu-option.h"
+#include "qemu-config.h"
+#include "trace.h"
+
+#include "hw/usb.h"
+#include "hw/usb/desc.h"
+#include "hw/scsi.h"
+#include "hw/scsi-defs.h"
+
+/* --------------------------------------------------------------------- */
+
+#define UAS_UI_COMMAND              0x01
+#define UAS_UI_SENSE                0x03
+#define UAS_UI_RESPONSE             0x04
+#define UAS_UI_TASK_MGMT            0x05
+#define UAS_UI_READ_READY           0x06
+#define UAS_UI_WRITE_READY          0x07
+
+#define UAS_RC_TMF_COMPLETE         0x00
+#define UAS_RC_INVALID_INFO_UNIT    0x02
+#define UAS_RC_TMF_NOT_SUPPORTED    0x04
+#define UAS_RC_TMF_FAILED           0x05
+#define UAS_RC_TMF_SUCCEEDED        0x08
+#define UAS_RC_INCORRECT_LUN        0x09
+#define UAS_RC_OVERLAPPED_TAG       0x0a
+
+#define UAS_TMF_ABORT_TASK          0x01
+#define UAS_TMF_ABORT_TASK_SET      0x02
+#define UAS_TMF_CLEAR_TASK_SET      0x04
+#define UAS_TMF_LOGICAL_UNIT_RESET  0x08
+#define UAS_TMF_I_T_NEXUS_RESET     0x10
+#define UAS_TMF_CLEAR_ACA           0x40
+#define UAS_TMF_QUERY_TASK          0x80
+#define UAS_TMF_QUERY_TASK_SET      0x81
+#define UAS_TMF_QUERY_ASYNC_EVENT   0x82
+
+#define UAS_PIPE_ID_COMMAND         0x01
+#define UAS_PIPE_ID_STATUS          0x02
+#define UAS_PIPE_ID_DATA_IN         0x03
+#define UAS_PIPE_ID_DATA_OUT        0x04
+
+typedef struct {
+    uint8_t    id;
+    uint8_t    reserved;
+    uint16_t   tag;
+} QEMU_PACKED  uas_ui_header;
+
+typedef struct {
+    uint8_t    prio_taskattr;   /* 6:3 priority, 2:0 task attribute   */
+    uint8_t    reserved_1;
+    uint8_t    add_cdb_length;  /* 7:2 additional adb length (dwords) */
+    uint8_t    reserved_2;
+    uint64_t   lun;
+    uint8_t    cdb[16];
+    uint8_t    add_cdb[];
+} QEMU_PACKED  uas_ui_command;
+
+typedef struct {
+    uint16_t   status_qualifier;
+    uint8_t    status;
+    uint8_t    reserved[7];
+    uint16_t   sense_length;
+    uint8_t    sense_data[18];
+} QEMU_PACKED  uas_ui_sense;
+
+typedef struct {
+    uint16_t   add_response_info;
+    uint8_t    response_code;
+} QEMU_PACKED  uas_ui_response;
+
+typedef struct {
+    uint8_t    function;
+    uint8_t    reserved;
+    uint16_t   task_tag;
+    uint64_t   lun;
+} QEMU_PACKED  uas_ui_task_mgmt;
+
+typedef struct {
+    uas_ui_header  hdr;
+    union {
+        uas_ui_command   command;
+        uas_ui_sense     sense;
+        uas_ui_task_mgmt task;
+        uas_ui_response  response;
+    };
+} QEMU_PACKED  uas_ui;
+
+/* --------------------------------------------------------------------- */
+
+typedef struct UASDevice UASDevice;
+typedef struct UASRequest UASRequest;
+typedef struct UASStatus UASStatus;
+
+struct UASDevice {
+    USBDevice                 dev;
+    SCSIBus                   bus;
+    UASRequest                *datain;
+    UASRequest                *dataout;
+    USBPacket                 *status;
+    QEMUBH                    *status_bh;
+    QTAILQ_HEAD(, UASStatus)  results;
+    QTAILQ_HEAD(, UASRequest) requests;
+};
+
+struct UASRequest {
+    uint16_t     tag;
+    uint64_t     lun;
+    UASDevice    *uas;
+    SCSIDevice   *dev;
+    SCSIRequest  *req;
+    USBPacket    *data;
+    bool         data_async;
+    bool         active;
+    bool         complete;
+    uint32_t     buf_off;
+    uint32_t     buf_size;
+    uint32_t     data_off;
+    uint32_t     data_size;
+    QTAILQ_ENTRY(UASRequest)  next;
+};
+
+struct UASStatus {
+    uas_ui                    status;
+    uint32_t                  length;
+    QTAILQ_ENTRY(UASStatus)   next;
+};
+
+/* --------------------------------------------------------------------- */
+
+enum {
+    STR_MANUFACTURER = 1,
+    STR_PRODUCT,
+    STR_SERIALNUMBER,
+    STR_CONFIG_HIGH,
+};
+
+static const USBDescStrings desc_strings = {
+    [STR_MANUFACTURER] = "QEMU",
+    [STR_PRODUCT]      = "USB Attached SCSI HBA",
+    [STR_SERIALNUMBER] = "27842",
+    [STR_CONFIG_HIGH]  = "High speed config (usb 2.0)",
+};
+
+static const USBDescIface desc_iface_high = {
+    .bInterfaceNumber              = 0,
+    .bNumEndpoints                 = 4,
+    .bInterfaceClass               = USB_CLASS_MASS_STORAGE,
+    .bInterfaceSubClass            = 0x06, /* SCSI */
+    .bInterfaceProtocol            = 0x62, /* UAS  */
+    .eps = (USBDescEndpoint[]) {
+        {
+            .bEndpointAddress      = USB_DIR_OUT | UAS_PIPE_ID_COMMAND,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 512,
+            .extra = (uint8_t[]) {
+                0x04,  /*  u8  bLength */
+                0x24,  /*  u8  bDescriptorType */
+                UAS_PIPE_ID_COMMAND,
+                0x00,  /*  u8  bReserved */
+            },
+        },{
+            .bEndpointAddress      = USB_DIR_IN | UAS_PIPE_ID_STATUS,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 512,
+            .extra = (uint8_t[]) {
+                0x04,  /*  u8  bLength */
+                0x24,  /*  u8  bDescriptorType */
+                UAS_PIPE_ID_STATUS,
+                0x00,  /*  u8  bReserved */
+            },
+        },{
+            .bEndpointAddress      = USB_DIR_IN | UAS_PIPE_ID_DATA_IN,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 512,
+            .extra = (uint8_t[]) {
+                0x04,  /*  u8  bLength */
+                0x24,  /*  u8  bDescriptorType */
+                UAS_PIPE_ID_DATA_IN,
+                0x00,  /*  u8  bReserved */
+            },
+        },{
+            .bEndpointAddress      = USB_DIR_OUT | UAS_PIPE_ID_DATA_OUT,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 512,
+            .extra = (uint8_t[]) {
+                0x04,  /*  u8  bLength */
+                0x24,  /*  u8  bDescriptorType */
+                UAS_PIPE_ID_DATA_OUT,
+                0x00,  /*  u8  bReserved */
+            },
+        },
+    }
+};
+
+static const USBDescDevice desc_device_high = {
+    .bcdUSB                        = 0x0200,
+    .bMaxPacketSize0               = 64,
+    .bNumConfigurations            = 1,
+    .confs = (USBDescConfig[]) {
+        {
+            .bNumInterfaces        = 1,
+            .bConfigurationValue   = 1,
+            .iConfiguration        = STR_CONFIG_HIGH,
+            .bmAttributes          = 0xc0,
+            .nif = 1,
+            .ifs = &desc_iface_high,
+        },
+    },
+};
+
+static const USBDesc desc = {
+    .id = {
+        .idVendor          = 0x46f4, /* CRC16() of "QEMU" */
+        .idProduct         = 0x0002,
+        .bcdDevice         = 0,
+        .iManufacturer     = STR_MANUFACTURER,
+        .iProduct          = STR_PRODUCT,
+        .iSerialNumber     = STR_SERIALNUMBER,
+    },
+    .high = &desc_device_high,
+    .str  = desc_strings,
+};
+
+/* --------------------------------------------------------------------- */
+
+static UASStatus *usb_uas_alloc_status(uint8_t id, uint16_t tag)
+{
+    UASStatus *st = g_new0(UASStatus, 1);
+
+    st->status.hdr.id = id;
+    st->status.hdr.tag = cpu_to_be16(tag);
+    st->length = sizeof(uas_ui_header);
+    return st;
+}
+
+static void usb_uas_send_status_bh(void *opaque)
+{
+    UASDevice *uas = opaque;
+    UASStatus *st = QTAILQ_FIRST(&uas->results);
+    USBPacket *p = uas->status;
+
+    assert(p != NULL);
+    assert(st != NULL);
+
+    uas->status = NULL;
+    usb_packet_copy(p, &st->status, st->length);
+    p->result = st->length;
+    QTAILQ_REMOVE(&uas->results, st, next);
+    g_free(st);
+
+    usb_packet_complete(&uas->dev, p);
+}
+
+static void usb_uas_queue_status(UASDevice *uas, UASStatus *st, int length)
+{
+    st->length += length;
+    QTAILQ_INSERT_TAIL(&uas->results, st, next);
+    if (uas->status) {
+        /*
+         * Just schedule bh make sure any in-flight data transaction
+         * is finished before completing (sending) the status packet.
+         */
+        qemu_bh_schedule(uas->status_bh);
+    } else {
+        USBEndpoint *ep = usb_ep_get(&uas->dev, USB_TOKEN_IN,
+                                     UAS_PIPE_ID_STATUS);
+        usb_wakeup(ep);
+    }
+}
+
+static void usb_uas_queue_response(UASDevice *uas, uint16_t tag,
+                                   uint8_t code, uint16_t add_info)
+{
+    UASStatus *st = usb_uas_alloc_status(UAS_UI_RESPONSE, tag);
+
+    trace_usb_uas_response(uas->dev.addr, tag, code);
+    st->status.response.response_code = code;
+    st->status.response.add_response_info = cpu_to_be16(add_info);
+    usb_uas_queue_status(uas, st, sizeof(uas_ui_response));
+}
+
+static void usb_uas_queue_sense(UASRequest *req, uint8_t status)
+{
+    UASStatus *st = usb_uas_alloc_status(UAS_UI_SENSE, req->tag);
+    int len, slen = 0;
+
+    trace_usb_uas_sense(req->uas->dev.addr, req->tag, status);
+    st->status.sense.status = status;
+    st->status.sense.status_qualifier = cpu_to_be16(0);
+    if (status != GOOD) {
+        slen = scsi_req_get_sense(req->req, st->status.sense.sense_data,
+                                  sizeof(st->status.sense.sense_data));
+        st->status.sense.sense_length = cpu_to_be16(slen);
+    }
+    len = sizeof(uas_ui_sense) - sizeof(st->status.sense.sense_data) + slen;
+    usb_uas_queue_status(req->uas, st, len);
+}
+
+static void usb_uas_queue_read_ready(UASRequest *req)
+{
+    UASStatus *st = usb_uas_alloc_status(UAS_UI_READ_READY, req->tag);
+
+    trace_usb_uas_read_ready(req->uas->dev.addr, req->tag);
+    usb_uas_queue_status(req->uas, st, 0);
+}
+
+static void usb_uas_queue_write_ready(UASRequest *req)
+{
+    UASStatus *st = usb_uas_alloc_status(UAS_UI_WRITE_READY, req->tag);
+
+    trace_usb_uas_write_ready(req->uas->dev.addr, req->tag);
+    usb_uas_queue_status(req->uas, st, 0);
+}
+
+/* --------------------------------------------------------------------- */
+
+static int usb_uas_get_lun(uint64_t lun64)
+{
+    return (lun64 >> 48) & 0xff;
+}
+
+static SCSIDevice *usb_uas_get_dev(UASDevice *uas, uint64_t lun64)
+{
+    if ((lun64 >> 56) != 0x00) {
+        return NULL;
+    }
+    return scsi_device_find(&uas->bus, 0, 0, usb_uas_get_lun(lun64));
+}
+
+static void usb_uas_complete_data_packet(UASRequest *req)
+{
+    USBPacket *p;
+
+    if (!req->data_async) {
+        return;
+    }
+    p = req->data;
+    req->data = NULL;
+    req->data_async = false;
+    usb_packet_complete(&req->uas->dev, p);
+}
+
+static void usb_uas_copy_data(UASRequest *req)
+{
+    uint32_t length;
+
+    length = MIN(req->buf_size - req->buf_off,
+                 req->data->iov.size - req->data->result);
+    trace_usb_uas_xfer_data(req->uas->dev.addr, req->tag, length,
+                            req->data->result, req->data->iov.size,
+                            req->buf_off, req->buf_size);
+    usb_packet_copy(req->data, scsi_req_get_buf(req->req) + req->buf_off,
+                    length);
+    req->buf_off += length;
+    req->data_off += length;
+
+    if (req->data->result == req->data->iov.size) {
+        usb_uas_complete_data_packet(req);
+    }
+    if (req->buf_size && req->buf_off == req->buf_size) {
+        req->buf_off = 0;
+        req->buf_size = 0;
+        scsi_req_continue(req->req);
+    }
+}
+
+static void usb_uas_start_next_transfer(UASDevice *uas)
+{
+    UASRequest *req;
+
+    QTAILQ_FOREACH(req, &uas->requests, next) {
+        if (req->active || req->complete) {
+            continue;
+        }
+        if (req->req->cmd.mode == SCSI_XFER_FROM_DEV && uas->datain == NULL) {
+            uas->datain = req;
+            usb_uas_queue_read_ready(req);
+            req->active = true;
+            return;
+        }
+        if (req->req->cmd.mode == SCSI_XFER_TO_DEV && uas->dataout == NULL) {
+            uas->dataout = req;
+            usb_uas_queue_write_ready(req);
+            req->active = true;
+            return;
+        }
+    }
+}
+
+static UASRequest *usb_uas_alloc_request(UASDevice *uas, uas_ui *ui)
+{
+    UASRequest *req;
+
+    req = g_new0(UASRequest, 1);
+    req->uas = uas;
+    req->tag = be16_to_cpu(ui->hdr.tag);
+    req->lun = be64_to_cpu(ui->command.lun);
+    req->dev = usb_uas_get_dev(req->uas, req->lun);
+    return req;
+}
+
+static void usb_uas_scsi_free_request(SCSIBus *bus, void *priv)
+{
+    UASRequest *req = priv;
+    UASDevice *uas = req->uas;
+
+    if (req == uas->datain) {
+        uas->datain = NULL;
+    }
+    if (req == uas->dataout) {
+        uas->dataout = NULL;
+    }
+    QTAILQ_REMOVE(&uas->requests, req, next);
+    g_free(req);
+}
+
+static UASRequest *usb_uas_find_request(UASDevice *uas, uint16_t tag)
+{
+    UASRequest *req;
+
+    QTAILQ_FOREACH(req, &uas->requests, next) {
+        if (req->tag == tag) {
+            return req;
+        }
+    }
+    return NULL;
+}
+
+static void usb_uas_scsi_transfer_data(SCSIRequest *r, uint32_t len)
+{
+    UASRequest *req = r->hba_private;
+
+    trace_usb_uas_scsi_data(req->uas->dev.addr, req->tag, len);
+    req->buf_off = 0;
+    req->buf_size = len;
+    if (req->data) {
+        usb_uas_copy_data(req);
+    } else {
+        usb_uas_start_next_transfer(req->uas);
+    }
+}
+
+static void usb_uas_scsi_command_complete(SCSIRequest *r,
+                                          uint32_t status, size_t resid)
+{
+    UASRequest *req = r->hba_private;
+    UASDevice *uas = req->uas;
+
+    trace_usb_uas_scsi_complete(req->uas->dev.addr, req->tag, status, resid);
+    req->complete = true;
+    if (req->data) {
+        usb_uas_complete_data_packet(req);
+    }
+    usb_uas_queue_sense(req, status);
+    scsi_req_unref(req->req);
+    usb_uas_start_next_transfer(uas);
+}
+
+static void usb_uas_scsi_request_cancelled(SCSIRequest *r)
+{
+    UASRequest *req = r->hba_private;
+
+    /* FIXME: queue notification to status pipe? */
+    scsi_req_unref(req->req);
+}
+
+static const struct SCSIBusInfo usb_uas_scsi_info = {
+    .tcq = true,
+    .max_target = 0,
+    .max_lun = 255,
+
+    .transfer_data = usb_uas_scsi_transfer_data,
+    .complete = usb_uas_scsi_command_complete,
+    .cancel = usb_uas_scsi_request_cancelled,
+    .free_request = usb_uas_scsi_free_request,
+};
+
+/* --------------------------------------------------------------------- */
+
+static void usb_uas_handle_reset(USBDevice *dev)
+{
+    UASDevice *uas = DO_UPCAST(UASDevice, dev, dev);
+    UASRequest *req, *nreq;
+    UASStatus *st, *nst;
+
+    trace_usb_uas_reset(dev->addr);
+    QTAILQ_FOREACH_SAFE(req, &uas->requests, next, nreq) {
+        scsi_req_cancel(req->req);
+    }
+    QTAILQ_FOREACH_SAFE(st, &uas->results, next, nst) {
+        QTAILQ_REMOVE(&uas->results, st, next);
+        g_free(st);
+    }
+}
+
+static int usb_uas_handle_control(USBDevice *dev, USBPacket *p,
+               int request, int value, int index, int length, uint8_t *data)
+{
+    int ret;
+
+    ret = usb_desc_handle_control(dev, p, request, value, index, length, data);
+    if (ret >= 0) {
+        return ret;
+    }
+    fprintf(stderr, "%s: unhandled control request\n", __func__);
+    return USB_RET_STALL;
+}
+
+static void usb_uas_cancel_io(USBDevice *dev, USBPacket *p)
+{
+    UASDevice *uas = DO_UPCAST(UASDevice, dev, dev);
+    UASRequest *req, *nreq;
+
+    if (uas->status == p) {
+        uas->status = NULL;
+        qemu_bh_cancel(uas->status_bh);
+        return;
+    }
+    QTAILQ_FOREACH_SAFE(req, &uas->requests, next, nreq) {
+        if (req->data == p) {
+            req->data = NULL;
+            return;
+        }
+    }
+    assert(!"canceled usb packet not found");
+}
+
+static void usb_uas_command(UASDevice *uas, uas_ui *ui)
+{
+    UASRequest *req;
+    uint32_t len;
+
+    req = usb_uas_find_request(uas, be16_to_cpu(ui->hdr.tag));
+    if (req) {
+        goto overlapped_tag;
+    }
+    req = usb_uas_alloc_request(uas, ui);
+    if (req->dev == NULL) {
+        goto bad_target;
+    }
+
+    trace_usb_uas_command(uas->dev.addr, req->tag,
+                          usb_uas_get_lun(req->lun),
+                          req->lun >> 32, req->lun & 0xffffffff);
+    QTAILQ_INSERT_TAIL(&uas->requests, req, next);
+    req->req = scsi_req_new(req->dev, req->tag,
+                            usb_uas_get_lun(req->lun),
+                            ui->command.cdb, req);
+    len = scsi_req_enqueue(req->req);
+    if (len) {
+        req->data_size = len;
+        scsi_req_continue(req->req);
+    }
+    return;
+
+overlapped_tag:
+    usb_uas_queue_response(uas, req->tag, UAS_RC_OVERLAPPED_TAG, 0);
+    return;
+
+bad_target:
+    /*
+     * FIXME: Seems to upset linux, is this wrong?
+     * NOTE: Happens only with no scsi devices at the bus, not sure
+     *       this is a valid UAS setup in the first place.
+     */
+    usb_uas_queue_response(uas, req->tag, UAS_RC_INVALID_INFO_UNIT, 0);
+    g_free(req);
+    return;
+}
+
+static void usb_uas_task(UASDevice *uas, uas_ui *ui)
+{
+    uint16_t tag = be16_to_cpu(ui->hdr.tag);
+    uint64_t lun64 = be64_to_cpu(ui->task.lun);
+    SCSIDevice *dev = usb_uas_get_dev(uas, lun64);
+    int lun = usb_uas_get_lun(lun64);
+    UASRequest *req;
+    uint16_t task_tag;
+
+    req = usb_uas_find_request(uas, be16_to_cpu(ui->hdr.tag));
+    if (req) {
+        goto overlapped_tag;
+    }
+
+    switch (ui->task.function) {
+    case UAS_TMF_ABORT_TASK:
+        task_tag = be16_to_cpu(ui->task.task_tag);
+        trace_usb_uas_tmf_abort_task(uas->dev.addr, tag, task_tag);
+        if (dev == NULL) {
+            goto bad_target;
+        }
+        if (dev->lun != lun) {
+            goto incorrect_lun;
+        }
+        req = usb_uas_find_request(uas, task_tag);
+        if (req && req->dev == dev) {
+            scsi_req_cancel(req->req);
+        }
+        usb_uas_queue_response(uas, tag, UAS_RC_TMF_COMPLETE, 0);
+        break;
+
+    case UAS_TMF_LOGICAL_UNIT_RESET:
+        trace_usb_uas_tmf_logical_unit_reset(uas->dev.addr, tag, lun);
+        if (dev == NULL) {
+            goto bad_target;
+        }
+        if (dev->lun != lun) {
+            goto incorrect_lun;
+        }
+        qdev_reset_all(&dev->qdev);
+        usb_uas_queue_response(uas, tag, UAS_RC_TMF_COMPLETE, 0);
+        break;
+
+    default:
+        trace_usb_uas_tmf_unsupported(uas->dev.addr, tag, ui->task.function);
+        usb_uas_queue_response(uas, tag, UAS_RC_TMF_NOT_SUPPORTED, 0);
+        break;
+    }
+    return;
+
+overlapped_tag:
+    usb_uas_queue_response(uas, req->tag, UAS_RC_OVERLAPPED_TAG, 0);
+    return;
+
+bad_target:
+    /* FIXME: correct?  [see long comment in usb_uas_command()] */
+    usb_uas_queue_response(uas, tag, UAS_RC_INVALID_INFO_UNIT, 0);
+    return;
+
+incorrect_lun:
+    usb_uas_queue_response(uas, tag, UAS_RC_INCORRECT_LUN, 0);
+    return;
+}
+
+static int usb_uas_handle_data(USBDevice *dev, USBPacket *p)
+{
+    UASDevice *uas = DO_UPCAST(UASDevice, dev, dev);
+    uas_ui ui;
+    UASStatus *st;
+    UASRequest *req;
+    int length, ret = 0;
+
+    switch (p->ep->nr) {
+    case UAS_PIPE_ID_COMMAND:
+        length = MIN(sizeof(ui), p->iov.size);
+        usb_packet_copy(p, &ui, length);
+        switch (ui.hdr.id) {
+        case UAS_UI_COMMAND:
+            usb_uas_command(uas, &ui);
+            ret = length;
+            break;
+        case UAS_UI_TASK_MGMT:
+            usb_uas_task(uas, &ui);
+            ret = length;
+            break;
+        default:
+            fprintf(stderr, "%s: unknown command ui: id 0x%x\n",
+                    __func__, ui.hdr.id);
+            ret = USB_RET_STALL;
+            break;
+        }
+        break;
+    case UAS_PIPE_ID_STATUS:
+        st = QTAILQ_FIRST(&uas->results);
+        if (st == NULL) {
+            assert(uas->status == NULL);
+            uas->status = p;
+            ret = USB_RET_ASYNC;
+            break;
+        }
+        usb_packet_copy(p, &st->status, st->length);
+        ret = st->length;
+        QTAILQ_REMOVE(&uas->results, st, next);
+        g_free(st);
+        break;
+    case UAS_PIPE_ID_DATA_IN:
+    case UAS_PIPE_ID_DATA_OUT:
+        req = (p->ep->nr == UAS_PIPE_ID_DATA_IN) ? uas->datain : uas->dataout;
+        if (req == NULL) {
+            fprintf(stderr, "%s: no inflight request\n", __func__);
+            ret = USB_RET_STALL;
+            break;
+        }
+        scsi_req_ref(req->req);
+        req->data = p;
+        usb_uas_copy_data(req);
+        if (p->result == p->iov.size || req->complete) {
+            req->data = NULL;
+            ret = p->result;
+        } else {
+            req->data_async = true;
+            ret = USB_RET_ASYNC;
+        }
+        scsi_req_unref(req->req);
+        usb_uas_start_next_transfer(uas);
+        break;
+    default:
+        fprintf(stderr, "%s: invalid endpoint %d\n", __func__, p->ep->nr);
+        ret = USB_RET_STALL;
+        break;
+    }
+    return ret;
+}
+
+static void usb_uas_handle_destroy(USBDevice *dev)
+{
+    UASDevice *uas = DO_UPCAST(UASDevice, dev, dev);
+
+    qemu_bh_delete(uas->status_bh);
+}
+
+static int usb_uas_init(USBDevice *dev)
+{
+    UASDevice *uas = DO_UPCAST(UASDevice, dev, dev);
+
+    usb_desc_create_serial(dev);
+    usb_desc_init(dev);
+
+    QTAILQ_INIT(&uas->results);
+    QTAILQ_INIT(&uas->requests);
+    uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
+
+    scsi_bus_new(&uas->bus, &uas->dev.qdev, &usb_uas_scsi_info);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_usb_uas = {
+    .name = "usb-uas",
+    .unmigratable = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_USB_DEVICE(dev, UASDevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void usb_uas_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
+
+    uc->init           = usb_uas_init;
+    uc->product_desc   = desc_strings[STR_PRODUCT];
+    uc->usb_desc       = &desc;
+    uc->cancel_packet  = usb_uas_cancel_io;
+    uc->handle_attach  = usb_desc_attach;
+    uc->handle_reset   = usb_uas_handle_reset;
+    uc->handle_control = usb_uas_handle_control;
+    uc->handle_data    = usb_uas_handle_data;
+    uc->handle_destroy = usb_uas_handle_destroy;
+    dc->fw_name = "storage";
+    dc->vmsd = &vmstate_usb_uas;
+}
+
+static TypeInfo uas_info = {
+    .name          = "usb-uas",
+    .parent        = TYPE_USB_DEVICE,
+    .instance_size = sizeof(UASDevice),
+    .class_init    = usb_uas_class_initfn,
+};
+
+static void usb_uas_register_types(void)
+{
+    type_register_static(&uas_info);
+}
+
+type_init(usb_uas_register_types)
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 5298204d9d..b043e7c23e 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -365,6 +365,7 @@ struct EHCIQueue {
     uint32_t seen;
     uint64_t ts;
     int async;
+    int revalidate;
 
     /* cached data from guest - needs to be flushed
      * when guest removes an entry (doorbell, handshake sequence)
@@ -414,16 +415,18 @@ struct EHCIState {
      */
     QEMUTimer *frame_timer;
     QEMUBH *async_bh;
-    int astate;                        // Current state in asynchronous schedule
-    int pstate;                        // Current state in periodic schedule
+    uint32_t astate;         /* Current state in asynchronous schedule */
+    uint32_t pstate;         /* Current state in periodic schedule     */
     USBPort ports[NB_PORTS];
     USBPort *companion_ports[NB_PORTS];
     uint32_t usbsts_pending;
+    uint32_t usbsts_frindex;
     EHCIQueueHead aqueues;
     EHCIQueueHead pqueues;
 
-    uint32_t a_fetch_addr;   // which address to look at next
-    uint32_t p_fetch_addr;   // which address to look at next
+    /* which address to look at next */
+    uint32_t a_fetch_addr;
+    uint32_t p_fetch_addr;
 
     USBPacket ipacket;
     QEMUSGList isgl;
@@ -556,33 +559,45 @@ static inline void ehci_clear_usbsts(EHCIState *s, int mask)
     s->usbsts &= ~mask;
 }
 
-static inline void ehci_set_interrupt(EHCIState *s, int intr)
+/* update irq line */
+static inline void ehci_update_irq(EHCIState *s)
 {
     int level = 0;
 
-    // TODO honour interrupt threshold requests
-
-    ehci_set_usbsts(s, intr);
-
     if ((s->usbsts & USBINTR_MASK) & s->usbintr) {
         level = 1;
     }
 
+    trace_usb_ehci_irq(level, s->frindex, s->usbsts, s->usbintr);
     qemu_set_irq(s->irq, level);
 }
 
-static inline void ehci_record_interrupt(EHCIState *s, int intr)
+/* flag interrupt condition */
+static inline void ehci_raise_irq(EHCIState *s, int intr)
 {
     s->usbsts_pending |= intr;
 }
 
-static inline void ehci_commit_interrupt(EHCIState *s)
+/*
+ * Commit pending interrupts (added via ehci_raise_irq),
+ * at the rate allowed by "Interrupt Threshold Control".
+ */
+static inline void ehci_commit_irq(EHCIState *s)
 {
+    uint32_t itc;
+
     if (!s->usbsts_pending) {
         return;
     }
-    ehci_set_interrupt(s, s->usbsts_pending);
+    if (s->usbsts_frindex > s->frindex) {
+        return;
+    }
+
+    itc = (s->usbcmd >> 16) & 0xff;
+    s->usbsts |= s->usbsts_pending;
     s->usbsts_pending = 0;
+    s->usbsts_frindex = s->frindex + itc;
+    ehci_update_irq(s);
 }
 
 static void ehci_update_halt(EHCIState *s)
@@ -773,7 +788,18 @@ static EHCIQueue *ehci_find_queue_by_qh(EHCIState *ehci, uint32_t addr,
     return NULL;
 }
 
-static void ehci_queues_rip_unused(EHCIState *ehci, int async, int flush)
+static void ehci_queues_tag_unused_async(EHCIState *ehci)
+{
+    EHCIQueue *q;
+
+    QTAILQ_FOREACH(q, &ehci->aqueues, next) {
+        if (!q->seen) {
+            q->revalidate = 1;
+        }
+    }
+}
+
+static void ehci_queues_rip_unused(EHCIState *ehci, int async)
 {
     EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues;
     uint64_t maxage = FRAME_TIMER_NS * ehci->maxframes * 4;
@@ -785,7 +811,7 @@ static void ehci_queues_rip_unused(EHCIState *ehci, int async, int flush)
             q->ts = ehci->last_run_ns;
             continue;
         }
-        if (!flush && ehci->last_run_ns < q->ts + maxage) {
+        if (ehci->last_run_ns < q->ts + maxage) {
             continue;
         }
         ehci_free_queue(q);
@@ -821,8 +847,9 @@ static void ehci_attach(USBPort *port)
 {
     EHCIState *s = port->opaque;
     uint32_t *portsc = &s->portsc[port->index];
+    const char *owner = (*portsc & PORTSC_POWNER) ? "comp" : "ehci";
 
-    trace_usb_ehci_port_attach(port->index, port->dev->product_desc);
+    trace_usb_ehci_port_attach(port->index, owner, port->dev->product_desc);
 
     if (*portsc & PORTSC_POWNER) {
         USBPort *companion = s->companion_ports[port->index];
@@ -834,15 +861,17 @@ static void ehci_attach(USBPort *port)
     *portsc |= PORTSC_CONNECT;
     *portsc |= PORTSC_CSC;
 
-    ehci_set_interrupt(s, USBSTS_PCD);
+    ehci_raise_irq(s, USBSTS_PCD);
+    ehci_commit_irq(s);
 }
 
 static void ehci_detach(USBPort *port)
 {
     EHCIState *s = port->opaque;
     uint32_t *portsc = &s->portsc[port->index];
+    const char *owner = (*portsc & PORTSC_POWNER) ? "comp" : "ehci";
 
-    trace_usb_ehci_port_detach(port->index);
+    trace_usb_ehci_port_detach(port->index, owner);
 
     if (*portsc & PORTSC_POWNER) {
         USBPort *companion = s->companion_ports[port->index];
@@ -862,7 +891,8 @@ static void ehci_detach(USBPort *port)
     *portsc &= ~(PORTSC_CONNECT|PORTSC_PED);
     *portsc |= PORTSC_CSC;
 
-    ehci_set_interrupt(s, USBSTS_PCD);
+    ehci_raise_irq(s, USBSTS_PCD);
+    ehci_commit_irq(s);
 }
 
 static void ehci_child_detach(USBPort *port, USBDevice *child)
@@ -889,10 +919,11 @@ static void ehci_wakeup(USBPort *port)
         USBPort *companion = s->companion_ports[port->index];
         if (companion->ops->wakeup) {
             companion->ops->wakeup(companion);
-        } else {
-            qemu_bh_schedule(s->async_bh);
         }
+        return;
     }
+
+    qemu_bh_schedule(s->async_bh);
 }
 
 static int ehci_register_companion(USBBus *bus, USBPort *ports[],
@@ -980,6 +1011,8 @@ static void ehci_reset(void *opaque)
 
     s->usbcmd = NB_MAXINTRATE << USBCMD_ITC_SH;
     s->usbsts = USBSTS_HALT;
+    s->usbsts_pending = 0;
+    s->usbsts_frindex = 0;
 
     s->astate = EST_INACTIVE;
     s->pstate = EST_INACTIVE;
@@ -1171,7 +1204,7 @@ static void ehci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val)
         val &= USBSTS_RO_MASK;              // bits 6 through 31 are RO
         ehci_clear_usbsts(s, val);          // bits 0 through 5 are R/WC
         val = s->usbsts;
-        ehci_set_interrupt(s, 0);
+        ehci_update_irq(s);
         break;
 
     case USBINTR:
@@ -1242,6 +1275,23 @@ static inline int put_dwords(EHCIState *ehci, uint32_t addr,
     return 1;
 }
 
+/*
+ *  Write the qh back to guest physical memory.  This step isn't
+ *  in the EHCI spec but we need to do it since we don't share
+ *  physical memory with our guest VM.
+ *
+ *  The first three dwords are read-only for the EHCI, so skip them
+ *  when writing back the qh.
+ */
+static void ehci_flush_qh(EHCIQueue *q)
+{
+    uint32_t *qh = (uint32_t *) &q->qh;
+    uint32_t dwords = sizeof(EHCIqh) >> 2;
+    uint32_t addr = NLPTR_GET(q->qhaddr);
+
+    put_dwords(q->ehci, addr + 3 * sizeof(uint32_t), qh + 3, dwords - 3);
+}
+
 // 4.10.2
 
 static int ehci_qh_do_overlay(EHCIQueue *q)
@@ -1289,8 +1339,7 @@ static int ehci_qh_do_overlay(EHCIQueue *q)
     q->qh.bufptr[1] &= ~BUFPTR_CPROGMASK_MASK;
     q->qh.bufptr[2] &= ~BUFPTR_FRAMETAG_MASK;
 
-    put_dwords(q->ehci, NLPTR_GET(q->qhaddr), (uint32_t *) &q->qh,
-               sizeof(EHCIqh) >> 2);
+    ehci_flush_qh(q);
 
     return 0;
 }
@@ -1386,18 +1435,18 @@ static void ehci_execute_complete(EHCIQueue *q)
         case USB_RET_NODEV:
             q->qh.token |= (QTD_TOKEN_HALT | QTD_TOKEN_XACTERR);
             set_field(&q->qh.token, 0, QTD_TOKEN_CERR);
-            ehci_record_interrupt(q->ehci, USBSTS_ERRINT);
+            ehci_raise_irq(q->ehci, USBSTS_ERRINT);
             break;
         case USB_RET_STALL:
             q->qh.token |= QTD_TOKEN_HALT;
-            ehci_record_interrupt(q->ehci, USBSTS_ERRINT);
+            ehci_raise_irq(q->ehci, USBSTS_ERRINT);
             break;
         case USB_RET_NAK:
             set_field(&q->qh.altnext_qtd, 0, QH_ALTNEXT_NAKCNT);
             return; /* We're not done yet with this transaction */
         case USB_RET_BABBLE:
             q->qh.token |= (QTD_TOKEN_HALT | QTD_TOKEN_BABBLE);
-            ehci_record_interrupt(q->ehci, USBSTS_ERRINT);
+            ehci_raise_irq(q->ehci, USBSTS_ERRINT);
             break;
         default:
             /* should not be triggerable */
@@ -1408,7 +1457,7 @@ static void ehci_execute_complete(EHCIQueue *q)
     } else if ((p->usb_status > p->tbytes) && (p->pid == USB_TOKEN_IN)) {
         p->usb_status = USB_RET_BABBLE;
         q->qh.token |= (QTD_TOKEN_HALT | QTD_TOKEN_BABBLE);
-        ehci_record_interrupt(q->ehci, USBSTS_ERRINT);
+        ehci_raise_irq(q->ehci, USBSTS_ERRINT);
     } else {
         // TODO check 4.12 for splits
 
@@ -1422,14 +1471,14 @@ static void ehci_execute_complete(EHCIQueue *q)
         set_field(&q->qh.token, p->tbytes, QTD_TOKEN_TBYTES);
     }
     ehci_finish_transfer(q, p->usb_status);
+    usb_packet_unmap(&p->packet, &p->sgl);
     qemu_sglist_destroy(&p->sgl);
-    usb_packet_unmap(&p->packet);
 
     q->qh.token ^= QTD_TOKEN_DTOGGLE;
     q->qh.token &= ~QTD_TOKEN_ACTIVE;
 
     if (q->qh.token & QTD_TOKEN_IOC) {
-        ehci_record_interrupt(q->ehci, USBSTS_INT);
+        ehci_raise_irq(q->ehci, USBSTS_INT);
     }
 }
 
@@ -1547,7 +1596,7 @@ static int ehci_process_itd(EHCIState *ehci,
                 usb_packet_map(&ehci->ipacket, &ehci->isgl);
                 ret = usb_handle_packet(dev, &ehci->ipacket);
                 assert(ret != USB_RET_ASYNC);
-                usb_packet_unmap(&ehci->ipacket);
+                usb_packet_unmap(&ehci->ipacket, &ehci->isgl);
             } else {
                 DPRINTF("ISOCH: attempt to addess non-iso endpoint\n");
                 ret = USB_RET_NAK;
@@ -1564,12 +1613,12 @@ static int ehci_process_itd(EHCIState *ehci,
                     /* 3.3.2: XACTERR is only allowed on IN transactions */
                     if (dir) {
                         itd->transact[i] |= ITD_XACT_XACTERR;
-                        ehci_record_interrupt(ehci, USBSTS_ERRINT);
+                        ehci_raise_irq(ehci, USBSTS_ERRINT);
                     }
                     break;
                 case USB_RET_BABBLE:
                     itd->transact[i] |= ITD_XACT_BABBLE;
-                    ehci_record_interrupt(ehci, USBSTS_ERRINT);
+                    ehci_raise_irq(ehci, USBSTS_ERRINT);
                     break;
                 case USB_RET_NAK:
                     /* no data for us, so do a zero-length transfer */
@@ -1587,7 +1636,7 @@ static int ehci_process_itd(EHCIState *ehci,
                 }
             }
             if (itd->transact[i] & ITD_XACT_IOC) {
-                ehci_record_interrupt(ehci, USBSTS_INT);
+                ehci_raise_irq(ehci, USBSTS_INT);
             }
             itd->transact[i] &= ~ITD_XACT_ACTIVE;
         }
@@ -1596,23 +1645,6 @@ static int ehci_process_itd(EHCIState *ehci,
 }
 
 
-/*
- *  Write the qh back to guest physical memory.  This step isn't
- *  in the EHCI spec but we need to do it since we don't share
- *  physical memory with our guest VM.
- *
- *  The first three dwords are read-only for the EHCI, so skip them
- *  when writing back the qh.
- */
-static void ehci_flush_qh(EHCIQueue *q)
-{
-    uint32_t *qh = (uint32_t *) &q->qh;
-    uint32_t dwords = sizeof(EHCIqh) >> 2;
-    uint32_t addr = NLPTR_GET(q->qhaddr);
-
-    put_dwords(q->ehci, addr + 3 * sizeof(uint32_t), qh + 3, dwords - 3);
-}
-
 /*  This state is the entry point for asynchronous schedule
  *  processing.  Entry here consitutes a EHCI start event state (4.8.5)
  */
@@ -1628,7 +1660,7 @@ static int ehci_state_waitlisthead(EHCIState *ehci,  int async)
         ehci_set_usbsts(ehci, USBSTS_REC);
     }
 
-    ehci_queues_rip_unused(ehci, async, 0);
+    ehci_queues_rip_unused(ehci, async);
 
     /*  Find the head of the list (4.9.1.1) */
     for(i = 0; i < MAX_QH; i++) {
@@ -1713,6 +1745,7 @@ static EHCIQueue *ehci_state_fetchqh(EHCIState *ehci, int async)
     EHCIPacket *p;
     uint32_t entry, devaddr;
     EHCIQueue *q;
+    EHCIqh qh;
 
     entry = ehci_get_fetch_addr(ehci, async);
     q = ehci_find_queue_by_qh(ehci, entry, async);
@@ -1730,7 +1763,17 @@ static EHCIQueue *ehci_state_fetchqh(EHCIState *ehci, int async)
     }
 
     get_dwords(ehci, NLPTR_GET(q->qhaddr),
-               (uint32_t *) &q->qh, sizeof(EHCIqh) >> 2);
+               (uint32_t *) &qh, sizeof(EHCIqh) >> 2);
+    if (q->revalidate && (q->qh.epchar      != qh.epchar ||
+                          q->qh.epcap       != qh.epcap  ||
+                          q->qh.current_qtd != qh.current_qtd)) {
+        ehci_free_queue(q);
+        q = ehci_alloc_queue(ehci, entry, async);
+        q->seen++;
+        p = NULL;
+    }
+    q->qh = qh;
+    q->revalidate = 0;
     ehci_trace_qh(q, NLPTR_GET(q->qhaddr), &q->qh);
 
     devaddr = get_field(q->qh.epchar, QH_EPCHAR_DEVADDR);
@@ -2067,6 +2110,7 @@ out:
 static int ehci_state_writeback(EHCIQueue *q)
 {
     EHCIPacket *p = QTAILQ_FIRST(&q->packets);
+    uint32_t *qtd, addr;
     int again = 0;
 
     /*  Write back the QTD from the QH area */
@@ -2074,8 +2118,9 @@ static int ehci_state_writeback(EHCIQueue *q)
     assert(p->qtdaddr == q->qtdaddr);
 
     ehci_trace_qtd(q, NLPTR_GET(p->qtdaddr), (EHCIqtd *) &q->qh.next_qtd);
-    put_dwords(q->ehci, NLPTR_GET(p->qtdaddr), (uint32_t *) &q->qh.next_qtd,
-               sizeof(EHCIqtd) >> 2);
+    qtd = (uint32_t *) &q->qh.next_qtd;
+    addr = NLPTR_GET(p->qtdaddr);
+    put_dwords(q->ehci, addr + 2 * sizeof(uint32_t), qtd + 2, 2);
     ehci_free_packet(p);
 
     /*
@@ -2179,8 +2224,6 @@ static void ehci_advance_state(EHCIState *ehci, int async)
         }
     }
     while (again);
-
-    ehci_commit_interrupt(ehci);
 }
 
 static void ehci_advance_async_state(EHCIState *ehci)
@@ -2223,10 +2266,10 @@ static void ehci_advance_async_state(EHCIState *ehci)
          */
         if (ehci->usbcmd & USBCMD_IAAD) {
             /* Remove all unseen qhs from the async qhs queue */
-            ehci_queues_rip_unused(ehci, async, 1);
+            ehci_queues_tag_unused_async(ehci);
             DPRINTF("ASYNC: doorbell request acknowledged\n");
             ehci->usbcmd &= ~USBCMD_IAAD;
-            ehci_set_interrupt(ehci, USBSTS_IAA);
+            ehci_raise_irq(ehci, USBSTS_IAA);
         }
         break;
 
@@ -2276,7 +2319,7 @@ static void ehci_advance_periodic_state(EHCIState *ehci)
         ehci_set_fetch_addr(ehci, async,entry);
         ehci_set_state(ehci, async, EST_FETCHENTRY);
         ehci_advance_state(ehci, async);
-        ehci_queues_rip_unused(ehci, async, 0);
+        ehci_queues_rip_unused(ehci, async);
         break;
 
     default:
@@ -2299,12 +2342,17 @@ static void ehci_update_frindex(EHCIState *ehci, int frames)
         ehci->frindex += 8;
 
         if (ehci->frindex == 0x00002000) {
-            ehci_set_interrupt(ehci, USBSTS_FLR);
+            ehci_raise_irq(ehci, USBSTS_FLR);
         }
 
         if (ehci->frindex == 0x00004000) {
-            ehci_set_interrupt(ehci, USBSTS_FLR);
+            ehci_raise_irq(ehci, USBSTS_FLR);
             ehci->frindex = 0;
+            if (ehci->usbsts_frindex > 0x00004000) {
+                ehci->usbsts_frindex -= 0x00004000;
+            } else {
+                ehci->usbsts_frindex = 0;
+            }
         }
     }
 }
@@ -2312,7 +2360,7 @@ static void ehci_update_frindex(EHCIState *ehci, int frames)
 static void ehci_frame_timer(void *opaque)
 {
     EHCIState *ehci = opaque;
-    int schedules = 0;
+    int need_timer = 0;
     int64_t expire_time, t_now;
     uint64_t ns_elapsed;
     int frames, skipped_frames;
@@ -2323,8 +2371,8 @@ static void ehci_frame_timer(void *opaque)
     frames = ns_elapsed / FRAME_TIMER_NS;
 
     if (ehci_periodic_enabled(ehci) || ehci->pstate != EST_INACTIVE) {
-        schedules++;
-        expire_time = t_now + (get_ticks_per_sec() / FRAME_TIMER_FREQ);
+        need_timer++;
+        ehci->async_stepdown = 0;
 
         if (frames > ehci->maxframes) {
             skipped_frames = frames - ehci->maxframes;
@@ -2343,8 +2391,6 @@ static void ehci_frame_timer(void *opaque)
         if (ehci->async_stepdown < ehci->maxframes / 2) {
             ehci->async_stepdown++;
         }
-        expire_time = t_now + (get_ticks_per_sec()
-                               * ehci->async_stepdown / FRAME_TIMER_FREQ);
         ehci_update_frindex(ehci, frames);
         ehci->last_run_ns += FRAME_TIMER_NS * frames;
     }
@@ -2353,11 +2399,19 @@ static void ehci_frame_timer(void *opaque)
      *  called
      */
     if (ehci_async_enabled(ehci) || ehci->astate != EST_INACTIVE) {
-        schedules++;
-        qemu_bh_schedule(ehci->async_bh);
+        need_timer++;
+        ehci_advance_async_state(ehci);
     }
 
-    if (schedules) {
+    ehci_commit_irq(ehci);
+    if (ehci->usbsts_pending) {
+        need_timer++;
+        ehci->async_stepdown = 0;
+    }
+
+    if (need_timer) {
+        expire_time = t_now + (get_ticks_per_sec()
+                               * (ehci->async_stepdown+1) / FRAME_TIMER_FREQ);
         qemu_mod_timer(ehci->frame_timer, expire_time);
     }
 }
@@ -2390,9 +2444,58 @@ static USBBusOps ehci_bus_ops = {
     .register_companion = ehci_register_companion,
 };
 
+static int usb_ehci_post_load(void *opaque, int version_id)
+{
+    EHCIState *s = opaque;
+    int i;
+
+    for (i = 0; i < NB_PORTS; i++) {
+        USBPort *companion = s->companion_ports[i];
+        if (companion == NULL) {
+            continue;
+        }
+        if (s->portsc[i] & PORTSC_POWNER) {
+            companion->dev = s->ports[i].dev;
+        } else {
+            companion->dev = NULL;
+        }
+    }
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_ehci = {
-    .name = "ehci",
-    .unmigratable = 1,
+    .name        = "ehci",
+    .version_id  = 1,
+    .post_load   = usb_ehci_post_load,
+    .fields      = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(dev, EHCIState),
+        /* mmio registers */
+        VMSTATE_UINT32(usbcmd, EHCIState),
+        VMSTATE_UINT32(usbsts, EHCIState),
+        VMSTATE_UINT32(usbintr, EHCIState),
+        VMSTATE_UINT32(frindex, EHCIState),
+        VMSTATE_UINT32(ctrldssegment, EHCIState),
+        VMSTATE_UINT32(periodiclistbase, EHCIState),
+        VMSTATE_UINT32(asynclistaddr, EHCIState),
+        VMSTATE_UINT32(configflag, EHCIState),
+        VMSTATE_UINT32(portsc[0], EHCIState),
+        VMSTATE_UINT32(portsc[1], EHCIState),
+        VMSTATE_UINT32(portsc[2], EHCIState),
+        VMSTATE_UINT32(portsc[3], EHCIState),
+        VMSTATE_UINT32(portsc[4], EHCIState),
+        VMSTATE_UINT32(portsc[5], EHCIState),
+        /* frame timer */
+        VMSTATE_TIMER(frame_timer, EHCIState),
+        VMSTATE_UINT64(last_run_ns, EHCIState),
+        VMSTATE_UINT32(async_stepdown, EHCIState),
+        /* schedule state */
+        VMSTATE_UINT32(astate, EHCIState),
+        VMSTATE_UINT32(pstate, EHCIState),
+        VMSTATE_UINT32(a_fetch_addr, EHCIState),
+        VMSTATE_UINT32(p_fetch_addr, EHCIState),
+        VMSTATE_END_OF_LIST()
+    }
 };
 
 static Property ehci_properties[] = {
@@ -2504,6 +2607,7 @@ static int usb_ehci_initfn(PCIDevice *dev)
     s->async_bh = qemu_bh_new(ehci_async_bh, s);
     QTAILQ_INIT(&s->aqueues);
     QTAILQ_INIT(&s->pqueues);
+    usb_packet_init(&s->ipacket);
 
     qemu_register_reset(ehci_reset, s);
 
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 1a1cc88b1f..844e7ed166 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -31,7 +31,7 @@
 #include "hw/usb.h"
 #include "hw/pci.h"
 #include "hw/sysbus.h"
-#include "hw/qdev-addr.h"
+#include "hw/qdev-dma.h"
 
 //#define DEBUG_OHCI
 /* Dump packet contents.  */
@@ -62,6 +62,7 @@ typedef struct {
     USBBus bus;
     qemu_irq irq;
     MemoryRegion mem;
+    DMAContext *dma;
     int num_ports;
     const char *name;
 
@@ -104,7 +105,7 @@ typedef struct {
     uint32_t htest;
 
     /* SM501 local memory offset */
-    target_phys_addr_t localmem_base;
+    dma_addr_t localmem_base;
 
     /* Active packets.  */
     uint32_t old_ctl;
@@ -482,14 +483,14 @@ static void ohci_reset(void *opaque)
 
 /* Get an array of dwords from main memory */
 static inline int get_dwords(OHCIState *ohci,
-                             uint32_t addr, uint32_t *buf, int num)
+                             dma_addr_t addr, uint32_t *buf, int num)
 {
     int i;
 
     addr += ohci->localmem_base;
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        cpu_physical_memory_read(addr, buf, sizeof(*buf));
+        dma_memory_read(ohci->dma, addr, buf, sizeof(*buf));
         *buf = le32_to_cpu(*buf);
     }
 
@@ -498,7 +499,7 @@ static inline int get_dwords(OHCIState *ohci,
 
 /* Put an array of dwords in to main memory */
 static inline int put_dwords(OHCIState *ohci,
-                             uint32_t addr, uint32_t *buf, int num)
+                             dma_addr_t addr, uint32_t *buf, int num)
 {
     int i;
 
@@ -506,7 +507,7 @@ static inline int put_dwords(OHCIState *ohci,
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint32_t tmp = cpu_to_le32(*buf);
-        cpu_physical_memory_write(addr, &tmp, sizeof(tmp));
+        dma_memory_write(ohci->dma, addr, &tmp, sizeof(tmp));
     }
 
     return 1;
@@ -514,14 +515,14 @@ static inline int put_dwords(OHCIState *ohci,
 
 /* Get an array of words from main memory */
 static inline int get_words(OHCIState *ohci,
-                            uint32_t addr, uint16_t *buf, int num)
+                            dma_addr_t addr, uint16_t *buf, int num)
 {
     int i;
 
     addr += ohci->localmem_base;
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        cpu_physical_memory_read(addr, buf, sizeof(*buf));
+        dma_memory_read(ohci->dma, addr, buf, sizeof(*buf));
         *buf = le16_to_cpu(*buf);
     }
 
@@ -530,7 +531,7 @@ static inline int get_words(OHCIState *ohci,
 
 /* Put an array of words in to main memory */
 static inline int put_words(OHCIState *ohci,
-                            uint32_t addr, uint16_t *buf, int num)
+                            dma_addr_t addr, uint16_t *buf, int num)
 {
     int i;
 
@@ -538,40 +539,40 @@ static inline int put_words(OHCIState *ohci,
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint16_t tmp = cpu_to_le16(*buf);
-        cpu_physical_memory_write(addr, &tmp, sizeof(tmp));
+        dma_memory_write(ohci->dma, addr, &tmp, sizeof(tmp));
     }
 
     return 1;
 }
 
 static inline int ohci_read_ed(OHCIState *ohci,
-                               uint32_t addr, struct ohci_ed *ed)
+                               dma_addr_t addr, struct ohci_ed *ed)
 {
     return get_dwords(ohci, addr, (uint32_t *)ed, sizeof(*ed) >> 2);
 }
 
 static inline int ohci_read_td(OHCIState *ohci,
-                               uint32_t addr, struct ohci_td *td)
+                               dma_addr_t addr, struct ohci_td *td)
 {
     return get_dwords(ohci, addr, (uint32_t *)td, sizeof(*td) >> 2);
 }
 
 static inline int ohci_read_iso_td(OHCIState *ohci,
-                                   uint32_t addr, struct ohci_iso_td *td)
+                                   dma_addr_t addr, struct ohci_iso_td *td)
 {
     return (get_dwords(ohci, addr, (uint32_t *)td, 4) &&
             get_words(ohci, addr + 16, td->offset, 8));
 }
 
 static inline int ohci_read_hcca(OHCIState *ohci,
-                                 uint32_t addr, struct ohci_hcca *hcca)
+                                 dma_addr_t addr, struct ohci_hcca *hcca)
 {
-    cpu_physical_memory_read(addr + ohci->localmem_base, hcca, sizeof(*hcca));
+    dma_memory_read(ohci->dma, addr + ohci->localmem_base, hcca, sizeof(*hcca));
     return 1;
 }
 
 static inline int ohci_put_ed(OHCIState *ohci,
-                              uint32_t addr, struct ohci_ed *ed)
+                              dma_addr_t addr, struct ohci_ed *ed)
 {
     /* ed->tail is under control of the HCD.
      * Since just ed->head is changed by HC, just write back this
@@ -583,64 +584,63 @@ static inline int ohci_put_ed(OHCIState *ohci,
 }
 
 static inline int ohci_put_td(OHCIState *ohci,
-                              uint32_t addr, struct ohci_td *td)
+                              dma_addr_t addr, struct ohci_td *td)
 {
     return put_dwords(ohci, addr, (uint32_t *)td, sizeof(*td) >> 2);
 }
 
 static inline int ohci_put_iso_td(OHCIState *ohci,
-                                  uint32_t addr, struct ohci_iso_td *td)
+                                  dma_addr_t addr, struct ohci_iso_td *td)
 {
     return (put_dwords(ohci, addr, (uint32_t *)td, 4) &&
             put_words(ohci, addr + 16, td->offset, 8));
 }
 
 static inline int ohci_put_hcca(OHCIState *ohci,
-                                uint32_t addr, struct ohci_hcca *hcca)
+                                dma_addr_t addr, struct ohci_hcca *hcca)
 {
-    cpu_physical_memory_write(addr + ohci->localmem_base + HCCA_WRITEBACK_OFFSET,
-                              (char *)hcca + HCCA_WRITEBACK_OFFSET,
-                              HCCA_WRITEBACK_SIZE);
+    dma_memory_write(ohci->dma,
+                     addr + ohci->localmem_base + HCCA_WRITEBACK_OFFSET,
+                     (char *)hcca + HCCA_WRITEBACK_OFFSET,
+                     HCCA_WRITEBACK_SIZE);
     return 1;
 }
 
 /* Read/Write the contents of a TD from/to main memory.  */
 static void ohci_copy_td(OHCIState *ohci, struct ohci_td *td,
-                         uint8_t *buf, int len, int write)
+                         uint8_t *buf, int len, DMADirection dir)
 {
-    uint32_t ptr;
-    uint32_t n;
+    dma_addr_t ptr, n;
 
     ptr = td->cbp;
     n = 0x1000 - (ptr & 0xfff);
     if (n > len)
         n = len;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, n, write);
+    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, n, dir);
     if (n == len)
         return;
     ptr = td->be & ~0xfffu;
     buf += n;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, len - n, write);
+    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, len - n, dir);
 }
 
 /* Read/Write the contents of an ISO TD from/to main memory.  */
 static void ohci_copy_iso_td(OHCIState *ohci,
                              uint32_t start_addr, uint32_t end_addr,
-                             uint8_t *buf, int len, int write)
+                             uint8_t *buf, int len, DMADirection dir)
 {
-    uint32_t ptr;
-    uint32_t n;
+    dma_addr_t ptr, n;
 
     ptr = start_addr;
     n = 0x1000 - (ptr & 0xfff);
     if (n > len)
         n = len;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, n, write);
+    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, n, dir);
     if (n == len)
         return;
     ptr = end_addr & ~0xfffu;
     buf += n;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, len - n, write);
+    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, len - n, dir);
 }
 
 static void ohci_process_lists(OHCIState *ohci, int completion);
@@ -803,7 +803,8 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
     }
 
     if (len && dir != OHCI_TD_DIR_IN) {
-        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len, 0);
+        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len,
+                         DMA_DIRECTION_TO_DEVICE);
     }
 
     if (completion) {
@@ -827,7 +828,8 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
     /* Writeback */
     if (dir == OHCI_TD_DIR_IN && ret >= 0 && ret <= len) {
         /* IN transfer succeeded */
-        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, ret, 1);
+        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, ret,
+                         DMA_DIRECTION_FROM_DEVICE);
         OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_CC,
                     OHCI_CC_NOERROR);
         OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_SIZE, ret);
@@ -971,7 +973,8 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
                 pktlen = len;
             }
             if (!completion) {
-                ohci_copy_td(ohci, &td, ohci->usb_buf, pktlen, 0);
+                ohci_copy_td(ohci, &td, ohci->usb_buf, pktlen,
+                             DMA_DIRECTION_TO_DEVICE);
             }
         }
     }
@@ -1021,7 +1024,8 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
     }
     if (ret >= 0) {
         if (dir == OHCI_TD_DIR_IN) {
-            ohci_copy_td(ohci, &td, ohci->usb_buf, ret, 1);
+            ohci_copy_td(ohci, &td, ohci->usb_buf, ret,
+                         DMA_DIRECTION_FROM_DEVICE);
 #ifdef DEBUG_PACKET
             DPRINTF("  data:");
             for (i = 0; i < ret; i++)
@@ -1748,11 +1752,14 @@ static USBBusOps ohci_bus_ops = {
 };
 
 static int usb_ohci_init(OHCIState *ohci, DeviceState *dev,
-                         int num_ports, uint32_t localmem_base,
-                         char *masterbus, uint32_t firstport)
+                         int num_ports, dma_addr_t localmem_base,
+                         char *masterbus, uint32_t firstport,
+                         DMAContext *dma)
 {
     int i;
 
+    ohci->dma = dma;
+
     if (usb_frame_time == 0) {
 #ifdef OHCI_TIME_WARP
         usb_frame_time = get_ticks_per_sec();
@@ -1817,7 +1824,8 @@ static int usb_ohci_initfn_pci(struct PCIDevice *dev)
     ohci->pci_dev.config[PCI_INTERRUPT_PIN] = 0x01; /* interrupt pin A */
 
     if (usb_ohci_init(&ohci->state, &dev->qdev, ohci->num_ports, 0,
-                      ohci->masterbus, ohci->firstport) != 0) {
+                      ohci->masterbus, ohci->firstport,
+                      pci_dma_context(dev)) != 0) {
         return -1;
     }
     ohci->state.irq = ohci->pci_dev.irq[0];
@@ -1831,7 +1839,7 @@ typedef struct {
     SysBusDevice busdev;
     OHCIState ohci;
     uint32_t num_ports;
-    target_phys_addr_t dma_offset;
+    dma_addr_t dma_offset;
 } OHCISysBusState;
 
 static int ohci_init_pxa(SysBusDevice *dev)
@@ -1839,7 +1847,8 @@ static int ohci_init_pxa(SysBusDevice *dev)
     OHCISysBusState *s = FROM_SYSBUS(OHCISysBusState, dev);
 
     /* Cannot fail as we pass NULL for masterbus */
-    usb_ohci_init(&s->ohci, &dev->qdev, s->num_ports, s->dma_offset, NULL, 0);
+    usb_ohci_init(&s->ohci, &dev->qdev, s->num_ports, s->dma_offset, NULL, 0,
+                  NULL);
     sysbus_init_irq(dev, &s->ohci.irq);
     sysbus_init_mmio(dev, &s->ohci.mem);
 
@@ -1875,7 +1884,7 @@ static TypeInfo ohci_pci_info = {
 
 static Property ohci_sysbus_properties[] = {
     DEFINE_PROP_UINT32("num-ports", OHCISysBusState, num_ports, 3),
-    DEFINE_PROP_TADDR("dma-offset", OHCISysBusState, dma_offset, 3),
+    DEFINE_PROP_DMAADDR("dma-offset", OHCISysBusState, dma_offset, 3),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 9871e24f50..1ace2a41da 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -292,10 +292,10 @@ static void uhci_async_cancel_device(UHCIState *s, USBDevice *dev)
 
 static void uhci_async_cancel_all(UHCIState *s)
 {
-    UHCIQueue *queue;
+    UHCIQueue *queue, *nq;
     UHCIAsync *curr, *n;
 
-    QTAILQ_FOREACH(queue, &s->queues, next) {
+    QTAILQ_FOREACH_SAFE(queue, &s->queues, next, nq) {
         QTAILQ_FOREACH_SAFE(curr, &queue->asyncs, next, n) {
             uhci_async_unlink(curr);
             uhci_async_cancel(curr);
@@ -388,11 +388,23 @@ static const VMStateDescription vmstate_uhci_port = {
     }
 };
 
+static int uhci_post_load(void *opaque, int version_id)
+{
+    UHCIState *s = opaque;
+
+    if (version_id < 2) {
+        s->expire_time = qemu_get_clock_ns(vm_clock) +
+            (get_ticks_per_sec() / FRAME_TIMER_FREQ);
+    }
+    return 0;
+}
+
 static const VMStateDescription vmstate_uhci = {
     .name = "uhci",
     .version_id = 2,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
+    .post_load = uhci_post_load,
     .fields      = (VMStateField []) {
         VMSTATE_PCI_DEVICE(dev, UHCIState),
         VMSTATE_UINT8_EQUAL(num_ports_vmstate, UHCIState),
@@ -871,7 +883,7 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
 
 done:
     len = uhci_complete_td(s, td, async, int_mask);
-    usb_packet_unmap(&async->packet);
+    usb_packet_unmap(&async->packet, &async->sgl);
     uhci_async_free(async);
     return len;
 }
@@ -1257,12 +1269,11 @@ static int usb_uhci_vt82c686b_initfn(PCIDevice *dev)
     return usb_uhci_common_initfn(dev);
 }
 
-static int usb_uhci_exit(PCIDevice *dev)
+static void usb_uhci_exit(PCIDevice *dev)
 {
     UHCIState *s = DO_UPCAST(UHCIState, dev, dev);
 
     memory_region_destroy(&s->io_bar);
-    return 0;
 }
 
 static Property uhci_properties[] = {
diff --git a/hw/usb/host-linux.c b/hw/usb/host-linux.c
index a95b0eda55..d55be878ad 100644
--- a/hw/usb/host-linux.c
+++ b/hw/usb/host-linux.c
@@ -111,6 +111,7 @@ typedef struct USBHostDevice {
     uint32_t  iso_urb_count;
     uint32_t  options;
     Notifier  exit;
+    QEMUBH    *bh;
 
     struct endp_data ep_in[USB_MAX_ENDPOINTS];
     struct endp_data ep_out[USB_MAX_ENDPOINTS];
@@ -212,7 +213,7 @@ static int is_iso_started(USBHostDevice *s, int pid, int ep)
 
 static void clear_iso_started(USBHostDevice *s, int pid, int ep)
 {
-    trace_usb_host_ep_stop_iso(s->bus_num, s->addr, ep);
+    trace_usb_host_iso_stop(s->bus_num, s->addr, ep);
     get_endp(s, pid, ep)->iso_started = 0;
 }
 
@@ -220,7 +221,7 @@ static void set_iso_started(USBHostDevice *s, int pid, int ep)
 {
     struct endp_data *e = get_endp(s, pid, ep);
 
-    trace_usb_host_ep_start_iso(s->bus_num, s->addr, ep);
+    trace_usb_host_iso_start(s->bus_num, s->addr, ep);
     if (!e->iso_started) {
         e->iso_started = 1;
         e->inflight = 0;
@@ -318,7 +319,8 @@ static void async_complete(void *opaque)
         if (r < 0) {
             if (errno == EAGAIN) {
                 if (urbs > 2) {
-                    fprintf(stderr, "husb: %d iso urbs finished at once\n", urbs);
+                    /* indicates possible latency issues */
+                    trace_usb_host_iso_many_urbs(s->bus_num, s->addr, urbs);
                 }
                 return;
             }
@@ -351,7 +353,8 @@ static void async_complete(void *opaque)
             urbs++;
             inflight = change_iso_inflight(s, pid, ep, -1);
             if (inflight == 0 && is_iso_started(s, pid, ep)) {
-                fprintf(stderr, "husb: out of buffers for iso stream\n");
+                /* can be latency issues, or simply end of stream */
+                trace_usb_host_iso_out_of_bufs(s->bus_num, s->addr, ep);
             }
             continue;
         }
@@ -1135,7 +1138,7 @@ static int usb_linux_update_endp_table(USBHostDevice *s)
     USBDescriptor *d;
     bool active = false;
 
-    usb_ep_init(&s->dev);
+    usb_ep_reset(&s->dev);
 
     for (i = 0;; i += d->bLength) {
         if (i+2 >= s->descr_len) {
@@ -1238,7 +1241,7 @@ static int usb_linux_update_endp_table(USBHostDevice *s)
     return 0;
 
 error:
-    usb_ep_init(&s->dev);
+    usb_ep_reset(&s->dev);
     return 1;
 }
 
@@ -1325,6 +1328,7 @@ static int usb_host_open(USBHostDevice *dev, int bus_num,
         goto fail;
     }
 
+    usb_ep_init(&dev->dev);
     ret = usb_linux_update_endp_table(dev);
     if (ret) {
         goto fail;
@@ -1421,6 +1425,43 @@ static void usb_host_exit_notifier(struct Notifier *n, void *data)
     }
 }
 
+/*
+ * This is *NOT* about restoring state.  We have absolutely no idea
+ * what state the host device is in at the moment and whenever it is
+ * still present in the first place.  Attemping to contine where we
+ * left off is impossible.
+ *
+ * What we are going to to to here is emulate a surprise removal of
+ * the usb device passed through, then kick host scan so the device
+ * will get re-attached (and re-initialized by the guest) in case it
+ * is still present.
+ *
+ * As the device removal will change the state of other devices (usb
+ * host controller, most likely interrupt controller too) we have to
+ * wait with it until *all* vmstate is loaded.  Thus post_load just
+ * kicks a bottom half which then does the actual work.
+ */
+static void usb_host_post_load_bh(void *opaque)
+{
+    USBHostDevice *dev = opaque;
+
+    if (dev->fd != -1) {
+        usb_host_close(dev);
+    }
+    if (dev->dev.attached) {
+        usb_device_detach(&dev->dev);
+    }
+    usb_host_auto_check(NULL);
+}
+
+static int usb_host_post_load(void *opaque, int version_id)
+{
+    USBHostDevice *dev = opaque;
+
+    qemu_bh_schedule(dev->bh);
+    return 0;
+}
+
 static int usb_host_initfn(USBDevice *dev)
 {
     USBHostDevice *s = DO_UPCAST(USBHostDevice, dev, dev);
@@ -1432,6 +1473,7 @@ static int usb_host_initfn(USBDevice *dev)
     QTAILQ_INSERT_TAIL(&hostdevs, s, next);
     s->exit.notify = usb_host_exit_notifier;
     qemu_add_exit_notifier(&s->exit);
+    s->bh = qemu_bh_new(usb_host_post_load_bh, s);
     usb_host_auto_check(NULL);
 
     if (s->match.bus_num != 0 && s->match.port != NULL) {
@@ -1443,7 +1485,13 @@ static int usb_host_initfn(USBDevice *dev)
 
 static const VMStateDescription vmstate_usb_host = {
     .name = "usb-host",
-    .unmigratable = 1,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = usb_host_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_USB_DEVICE(dev, USBHostDevice),
+        VMSTATE_END_OF_LIST()
+    }
 };
 
 static Property usb_host_dev_properties[] = {
@@ -1737,25 +1785,27 @@ static void usb_host_auto_check(void *unused)
     struct USBHostDevice *s;
     int unconnected = 0;
 
-    usb_host_scan(NULL, usb_host_auto_scan);
+    if (runstate_is_running()) {
+        usb_host_scan(NULL, usb_host_auto_scan);
 
-    QTAILQ_FOREACH(s, &hostdevs, next) {
-        if (s->fd == -1) {
-            unconnected++;
-        }
-        if (s->seen == 0) {
-            s->errcount = 0;
+        QTAILQ_FOREACH(s, &hostdevs, next) {
+            if (s->fd == -1) {
+                unconnected++;
+            }
+            if (s->seen == 0) {
+                s->errcount = 0;
+            }
+            s->seen = 0;
         }
-        s->seen = 0;
-    }
 
-    if (unconnected == 0) {
-        /* nothing to watch */
-        if (usb_auto_timer) {
-            qemu_del_timer(usb_auto_timer);
-            trace_usb_host_auto_scan_disabled();
+        if (unconnected == 0) {
+            /* nothing to watch */
+            if (usb_auto_timer) {
+                qemu_del_timer(usb_auto_timer);
+                trace_usb_host_auto_scan_disabled();
+            }
+            return;
         }
-        return;
     }
 
     if (!usb_auto_timer) {
diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c
index 2462351389..c0de30ea88 100644
--- a/hw/usb/libhw.c
+++ b/hw/usb/libhw.c
@@ -26,15 +26,15 @@
 
 int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
 {
-    int is_write = (p->pid == USB_TOKEN_IN);
-    target_phys_addr_t len;
+    DMADirection dir = (p->pid == USB_TOKEN_IN) ?
+        DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE;
+    dma_addr_t len;
     void *mem;
     int i;
 
     for (i = 0; i < sgl->nsg; i++) {
         len = sgl->sg[i].len;
-        mem = cpu_physical_memory_map(sgl->sg[i].base, &len,
-                                      is_write);
+        mem = dma_memory_map(sgl->dma, sgl->sg[i].base, &len, dir);
         if (!mem) {
             goto err;
         }
@@ -46,18 +46,19 @@ int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
     return 0;
 
 err:
-    usb_packet_unmap(p);
+    usb_packet_unmap(p, sgl);
     return -1;
 }
 
-void usb_packet_unmap(USBPacket *p)
+void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl)
 {
-    int is_write = (p->pid == USB_TOKEN_IN);
+    DMADirection dir = (p->pid == USB_TOKEN_IN) ?
+        DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE;
     int i;
 
     for (i = 0; i < p->iov.niov; i++) {
-        cpu_physical_memory_unmap(p->iov.iov[i].iov_base,
-                                  p->iov.iov[i].iov_len, is_write,
-                                  p->iov.iov[i].iov_len);
+        dma_memory_unmap(sgl->dma, p->iov.iov[i].iov_base,
+                         p->iov.iov[i].iov_len, dir,
+                         p->iov.iov[i].iov_len);
     }
 }
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index d949f040d5..10b4fbb3a7 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1033,6 +1033,8 @@ static int usbredir_handle_status(USBRedirDevice *dev,
     case usb_redir_inval:
         WARNING("got invalid param error from usb-host?\n");
         return USB_RET_NAK;
+    case usb_redir_babble:
+        return USB_RET_BABBLE;
     case usb_redir_ioerror:
     case usb_redir_timeout:
     default:
diff --git a/hw/vexpress.c b/hw/vexpress.c
index 8072c5ada9..b6158447d7 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -284,9 +284,16 @@ static void a15_daughterboard_init(const VEDBoardInfo *daughterboard,
         cpu_irq[n] = irqp[ARM_PIC_CPU_IRQ];
     }
 
-    if (ram_size > 0x80000000) {
-        fprintf(stderr, "vexpress-a15: cannot model more than 2GB RAM\n");
-        exit(1);
+    {
+        /* We have to use a separate 64 bit variable here to avoid the gcc
+         * "comparison is always false due to limited range of data type"
+         * warning if we are on a host where ram_addr_t is 32 bits.
+         */
+        uint64_t rsz = ram_size;
+        if (rsz > (30ULL * 1024 * 1024 * 1024)) {
+            fprintf(stderr, "vexpress-a15: cannot model more than 30GB RAM\n");
+            exit(1);
+        }
     }
 
     memory_region_init_ram(ram, "vexpress.highmem", ram_size);
@@ -420,7 +427,7 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard,
     memory_region_add_subregion(sysmem, map[VE_VIDEORAM], vram);
 
     /* 0x4e000000 LAN9118 Ethernet */
-    if (nd_table[0].vlan) {
+    if (nd_table[0].used) {
         lan9118_init(&nd_table[0], map[VE_ETHERNET], pic[15]);
     }
 
diff --git a/hw/vga-isa-mm.c b/hw/vga-isa-mm.c
index f8984c62cb..44ae7d92c8 100644
--- a/hw/vga-isa-mm.c
+++ b/hw/vga-isa-mm.c
@@ -28,6 +28,8 @@
 #include "pixel_ops.h"
 #include "qemu-timer.h"
 
+#define VGA_RAM_SIZE (8192 * 1024)
+
 typedef struct ISAVGAMMState {
     VGACommonState vga;
     int it_shift;
@@ -128,7 +130,8 @@ int isa_vga_mm_init(target_phys_addr_t vram_base,
 
     s = g_malloc0(sizeof(*s));
 
-    vga_common_init(&s->vga, VGA_RAM_SIZE);
+    s->vga.vram_size_mb = VGA_RAM_SIZE >> 20;
+    vga_common_init(&s->vga);
     vga_mm_init(s, vram_base, ctrl_base, it_shift, address_space);
 
     s->vga.ds = graphic_console_init(s->vga.update, s->vga.invalidate,
diff --git a/hw/vga-isa.c b/hw/vga-isa.c
index 4bcc4db62f..d2904737bc 100644
--- a/hw/vga-isa.c
+++ b/hw/vga-isa.c
@@ -49,7 +49,7 @@ static int vga_initfn(ISADevice *dev)
     MemoryRegion *vga_io_memory;
     const MemoryRegionPortio *vga_ports, *vbe_ports;
 
-    vga_common_init(s, VGA_RAM_SIZE);
+    vga_common_init(s);
     s->legacy_address_space = isa_address_space(dev);
     vga_io_memory = vga_init_io(s, &vga_ports, &vbe_ports);
     isa_register_portio_list(dev, 0x3b0, vga_ports, s, "vga");
@@ -69,6 +69,11 @@ static int vga_initfn(ISADevice *dev)
     return 0;
 }
 
+static Property vga_isa_properties[] = {
+    DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void vga_class_initfn(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -76,6 +81,7 @@ static void vga_class_initfn(ObjectClass *klass, void *data)
     ic->init = vga_initfn;
     dc->reset = vga_reset_isa;
     dc->vmsd = &vmstate_vga_common;
+    dc->props = vga_isa_properties;
 }
 
 static TypeInfo vga_info = {
diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index 465b643d21..37dc019a61 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -53,7 +53,7 @@ static int pci_vga_initfn(PCIDevice *dev)
      VGACommonState *s = &d->vga;
 
      // vga + console init
-     vga_common_init(s, VGA_RAM_SIZE);
+     vga_common_init(s);
      vga_init(s, pci_address_space(dev), pci_address_space_io(dev), true);
 
      s->ds = graphic_console_init(s->update, s->invalidate,
@@ -75,6 +75,11 @@ DeviceState *pci_vga_init(PCIBus *bus)
     return &pci_create_simple(bus, -1, "VGA")->qdev;
 }
 
+static Property vga_pci_properties[] = {
+    DEFINE_PROP_UINT32("vgamem_mb", PCIVGAState, vga.vram_size_mb, 16),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void vga_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -87,6 +92,7 @@ static void vga_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_QEMU_VGA;
     k->class_id = PCI_CLASS_DISPLAY_VGA;
     dc->vmsd = &vmstate_vga_pci;
+    dc->props = vga_pci_properties;
 }
 
 static TypeInfo vga_info = {
diff --git a/hw/vga.c b/hw/vga.c
index d784df7df4..f82ced8e66 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -38,6 +38,9 @@
 
 //#define DEBUG_BOCHS_VBE
 
+/* 16 state changes per vertical frame @60 Hz */
+#define VGA_TEXT_CURSOR_PERIOD_MS       (1000 * 2 * 16 / 60)
+
 /*
  * Video Graphics Array (VGA)
  *
@@ -1300,6 +1303,7 @@ static void vga_draw_text(VGACommonState *s, int full_update)
     uint32_t *ch_attr_ptr;
     vga_draw_glyph8_func *vga_draw_glyph8;
     vga_draw_glyph9_func *vga_draw_glyph9;
+    int64_t now = qemu_get_clock_ms(vm_clock);
 
     /* compute font data address (in plane 2) */
     v = s->sr[VGA_SEQ_CHARACTER_MAP];
@@ -1370,6 +1374,10 @@ static void vga_draw_text(VGACommonState *s, int full_update)
         s->cursor_end = s->cr[VGA_CRTC_CURSOR_END];
     }
     cursor_ptr = s->vram_ptr + (s->start_addr + cursor_offset) * 4;
+    if (now >= s->cursor_blink_time) {
+        s->cursor_blink_time = now + VGA_TEXT_CURSOR_PERIOD_MS / 2;
+        s->cursor_visible_phase = !s->cursor_visible_phase;
+    }
 
     depth_index = get_depth_index(s->ds);
     if (cw == 16)
@@ -1390,7 +1398,7 @@ static void vga_draw_text(VGACommonState *s, int full_update)
         cx_max = -1;
         for(cx = 0; cx < width; cx++) {
             ch_attr = *(uint16_t *)src;
-            if (full_update || ch_attr != *ch_attr_ptr) {
+            if (full_update || ch_attr != *ch_attr_ptr || src == cursor_ptr) {
                 if (cx < cx_min)
                     cx_min = cx;
                 if (cx > cx_max)
@@ -1420,7 +1428,8 @@ static void vga_draw_text(VGACommonState *s, int full_update)
                                     font_ptr, cheight, fgcol, bgcol, dup9);
                 }
                 if (src == cursor_ptr &&
-                    !(s->cr[VGA_CRTC_CURSOR_START] & 0x20)) {
+                    !(s->cr[VGA_CRTC_CURSOR_START] & 0x20) &&
+                    s->cursor_visible_phase) {
                     int line_start, line_last, h;
                     /* draw the cursor */
                     line_start = s->cr[VGA_CRTC_CURSOR_START] & 0x1f;
@@ -1884,6 +1893,7 @@ static void vga_update_display(void *opaque)
         }
         if (graphic_mode != s->graphic_mode) {
             s->graphic_mode = graphic_mode;
+            s->cursor_blink_time = qemu_get_clock_ms(vm_clock);
             full_update = 1;
         }
         switch(graphic_mode) {
@@ -2225,7 +2235,7 @@ const VMStateDescription vmstate_vga_common = {
     }
 };
 
-void vga_common_init(VGACommonState *s, int vga_ram_size)
+void vga_common_init(VGACommonState *s)
 {
     int i, j, v, b;
 
@@ -2252,16 +2262,23 @@ void vga_common_init(VGACommonState *s, int vga_ram_size)
         expand4to8[i] = v;
     }
 
+    /* valid range: 1 MB -> 256 MB */
+    s->vram_size = 1024 * 1024;
+    while (s->vram_size < (s->vram_size_mb << 20) &&
+           s->vram_size < (256 << 20)) {
+        s->vram_size <<= 1;
+    }
+    s->vram_size_mb = s->vram_size >> 20;
+
 #ifdef CONFIG_BOCHS_VBE
     s->is_vbe_vmstate = 1;
 #else
     s->is_vbe_vmstate = 0;
 #endif
-    memory_region_init_ram(&s->vram, "vga.vram", vga_ram_size);
+    memory_region_init_ram(&s->vram, "vga.vram", s->vram_size);
     vmstate_register_ram_global(&s->vram);
     xen_register_framebuffer(&s->vram);
     s->vram_ptr = memory_region_get_ram_ptr(&s->vram);
-    s->vram_size = vga_ram_size;
     s->get_bpp = vga_get_bpp;
     s->get_offsets = vga_get_offsets;
     s->get_resolution = vga_get_resolution;
diff --git a/hw/vga_int.h b/hw/vga_int.h
index d244d8ff99..8938093682 100644
--- a/hw/vga_int.h
+++ b/hw/vga_int.h
@@ -31,8 +31,8 @@
 /* bochs VBE support */
 #define CONFIG_BOCHS_VBE
 
-#define VBE_DISPI_MAX_XRES              1600
-#define VBE_DISPI_MAX_YRES              1200
+#define VBE_DISPI_MAX_XRES              16000
+#define VBE_DISPI_MAX_YRES              12000
 #define VBE_DISPI_MAX_BPP               32
 
 #define VBE_DISPI_INDEX_ID              0x0
@@ -107,6 +107,7 @@ typedef struct VGACommonState {
     MemoryRegion vram;
     MemoryRegion vram_vbe;
     uint32_t vram_size;
+    uint32_t vram_size_mb; /* property */
     uint32_t latch;
     MemoryRegion *chain4_alias;
     uint8_t sr_index;
@@ -155,6 +156,8 @@ typedef struct VGACommonState {
     uint32_t last_scr_width, last_scr_height; /* in pixels */
     uint32_t last_depth; /* in bits */
     uint8_t cursor_start, cursor_end;
+    bool cursor_visible_phase;
+    int64_t cursor_blink_time;
     uint32_t cursor_offset;
     unsigned int (*rgb_to_pixel)(unsigned int r,
                                  unsigned int g, unsigned b);
@@ -184,7 +187,7 @@ static inline int c6_to_8(int v)
     return (v << 2) | (b << 1) | b;
 }
 
-void vga_common_init(VGACommonState *s, int vga_ram_size);
+void vga_common_init(VGACommonState *s);
 void vga_init(VGACommonState *s, MemoryRegion *address_space,
               MemoryRegion *address_space_io, bool init_vga_ports);
 MemoryRegion *vga_init_io(VGACommonState *s,
@@ -209,7 +212,6 @@ void vga_init_vbe(VGACommonState *s, MemoryRegion *address_space);
 extern const uint8_t sr_mask[8];
 extern const uint8_t gr_mask[16];
 
-#define VGA_RAM_SIZE (8192 * 1024)
 #define VGABIOS_FILENAME "vgabios.bin"
 #define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin"
 
diff --git a/hw/vhost.c b/hw/vhost.c
index 43664e7f4d..0fd8da84e2 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -737,13 +737,13 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
 
 static void vhost_eventfd_add(MemoryListener *listener,
                               MemoryRegionSection *section,
-                              bool match_data, uint64_t data, int fd)
+                              bool match_data, uint64_t data, EventNotifier *e)
 {
 }
 
 static void vhost_eventfd_del(MemoryListener *listener,
                               MemoryRegionSection *section,
-                              bool match_data, uint64_t data, int fd)
+                              bool match_data, uint64_t data, EventNotifier *e)
 {
 }
 
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
index f672e9dafd..ecaa22dfb4 100644
--- a/hw/vhost_net.c
+++ b/hw/vhost_net.c
@@ -42,7 +42,7 @@ struct vhost_net {
     struct vhost_dev dev;
     struct vhost_virtqueue vqs[2];
     int backend;
-    VLANClientState *vc;
+    NetClientState *nc;
 };
 
 unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
@@ -80,10 +80,10 @@ void vhost_net_ack_features(struct vhost_net *net, unsigned features)
     }
 }
 
-static int vhost_net_get_fd(VLANClientState *backend)
+static int vhost_net_get_fd(NetClientState *backend)
 {
     switch (backend->info->type) {
-    case NET_CLIENT_TYPE_TAP:
+    case NET_CLIENT_OPTIONS_KIND_TAP:
         return tap_get_fd(backend);
     default:
         fprintf(stderr, "vhost-net requires tap backend\n");
@@ -91,7 +91,7 @@ static int vhost_net_get_fd(VLANClientState *backend)
     }
 }
 
-struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd,
+struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
                                  bool force)
 {
     int r;
@@ -104,7 +104,7 @@ struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd,
     if (r < 0) {
         goto fail;
     }
-    net->vc = backend;
+    net->nc = backend;
     net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
         (1 << VHOST_NET_F_VIRTIO_NET_HDR);
     net->backend = r;
@@ -151,7 +151,7 @@ int vhost_net_start(struct vhost_net *net,
         goto fail_notifiers;
     }
     if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
-        tap_set_vnet_hdr_len(net->vc,
+        tap_set_vnet_hdr_len(net->nc,
                              sizeof(struct virtio_net_hdr_mrg_rxbuf));
     }
 
@@ -160,7 +160,7 @@ int vhost_net_start(struct vhost_net *net,
         goto fail_start;
     }
 
-    net->vc->info->poll(net->vc, false);
+    net->nc->info->poll(net->nc, false);
     qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
     file.fd = net->backend;
     for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
@@ -177,10 +177,10 @@ fail:
         int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
         assert(r >= 0);
     }
-    net->vc->info->poll(net->vc, true);
+    net->nc->info->poll(net->nc, true);
     vhost_dev_stop(&net->dev, dev);
     if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
-        tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
+        tap_set_vnet_hdr_len(net->nc, sizeof(struct virtio_net_hdr));
     }
 fail_start:
     vhost_dev_disable_notifiers(&net->dev, dev);
@@ -197,10 +197,10 @@ void vhost_net_stop(struct vhost_net *net,
         int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
         assert(r >= 0);
     }
-    net->vc->info->poll(net->vc, true);
+    net->nc->info->poll(net->nc, true);
     vhost_dev_stop(&net->dev, dev);
     if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
-        tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
+        tap_set_vnet_hdr_len(net->nc, sizeof(struct virtio_net_hdr));
     }
     vhost_dev_disable_notifiers(&net->dev, dev);
 }
@@ -209,12 +209,12 @@ void vhost_net_cleanup(struct vhost_net *net)
 {
     vhost_dev_cleanup(&net->dev);
     if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
-        tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
+        tap_set_vnet_hdr_len(net->nc, sizeof(struct virtio_net_hdr));
     }
     g_free(net);
 }
 #else
-struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd,
+struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
                                  bool force)
 {
     error_report("vhost-net support is not compiled in");
diff --git a/hw/vhost_net.h b/hw/vhost_net.h
index 91e40b195e..a9db23423c 100644
--- a/hw/vhost_net.h
+++ b/hw/vhost_net.h
@@ -6,7 +6,7 @@
 struct vhost_net;
 typedef struct vhost_net VHostNetState;
 
-VHostNetState *vhost_net_init(VLANClientState *backend, int devfd, bool force);
+VHostNetState *vhost_net_init(NetClientState *backend, int devfd, bool force);
 
 bool vhost_net_query(VHostNetState *net, VirtIODevice *dev);
 int vhost_net_start(VHostNetState *net, VirtIODevice *dev);
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index d048cef50f..dd1a6506cf 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -77,7 +77,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
         size_t offset = 0;
         uint32_t pfn;
 
-        while (iov_to_buf(elem.out_sg, elem.out_num, &pfn, offset, 4) == 4) {
+        while (iov_to_buf(elem.out_sg, elem.out_num, offset, &pfn, 4) == 4) {
             ram_addr_t pa;
             ram_addr_t addr;
 
@@ -118,7 +118,7 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
      */
     reset_stats(s);
 
-    while (iov_to_buf(elem->out_sg, elem->out_num, &stat, offset, sizeof(stat))
+    while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
            == sizeof(stat)) {
         uint16_t tag = tswap16(stat.tag);
         uint64_t val = tswap64(stat.val);
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index fe0774617b..f21757ed55 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -14,6 +14,7 @@
 #include "qemu-common.h"
 #include "qemu-error.h"
 #include "trace.h"
+#include "hw/block-common.h"
 #include "blockdev.h"
 #include "virtio-blk.h"
 #include "scsi-defs.h"
@@ -478,19 +479,17 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
     VirtIOBlock *s = to_virtio_blk(vdev);
     struct virtio_blk_config blkcfg;
     uint64_t capacity;
-    int cylinders, heads, secs;
     int blk_size = s->conf->logical_block_size;
 
     bdrv_get_geometry(s->bs, &capacity);
-    bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs);
     memset(&blkcfg, 0, sizeof(blkcfg));
     stq_raw(&blkcfg.capacity, capacity);
     stl_raw(&blkcfg.seg_max, 128 - 2);
-    stw_raw(&blkcfg.cylinders, cylinders);
+    stw_raw(&blkcfg.cylinders, s->conf->cyls);
     stl_raw(&blkcfg.blk_size, blk_size);
     stw_raw(&blkcfg.min_io_size, s->conf->min_io_size / blk_size);
     stw_raw(&blkcfg.opt_io_size, s->conf->opt_io_size / blk_size);
-    blkcfg.heads = heads;
+    blkcfg.heads = s->conf->heads;
     /*
      * We must ensure that the block device capacity is a multiple of
      * the logical block size. If that is not the case, lets use
@@ -502,10 +501,10 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
      * divided by 512 - instead it is the amount of blk_size blocks
      * per track (cylinder).
      */
-    if (bdrv_getlength(s->bs) /  heads / secs % blk_size) {
-        blkcfg.sectors = secs & ~s->sector_mask;
+    if (bdrv_getlength(s->bs) /  s->conf->heads / s->conf->secs % blk_size) {
+        blkcfg.sectors = s->conf->secs & ~s->sector_mask;
     } else {
-        blkcfg.sectors = secs;
+        blkcfg.sectors = s->conf->secs;
     }
     blkcfg.size_max = 0;
     blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
@@ -589,9 +588,7 @@ static const BlockDevOps virtio_block_ops = {
 VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
 {
     VirtIOBlock *s;
-    int cylinders, heads, secs;
     static int virtio_blk_id;
-    DriveInfo *dinfo;
 
     if (!blk->conf.bs) {
         error_report("drive property not set");
@@ -602,12 +599,9 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
         return NULL;
     }
 
-    if (!blk->serial) {
-        /* try to fall back to value set with legacy -drive serial=... */
-        dinfo = drive_get_by_blockdev(blk->conf.bs);
-        if (*dinfo->serial) {
-            blk->serial = strdup(dinfo->serial);
-        }
+    blkconf_serial(&blk->conf, &blk->serial);
+    if (blkconf_geometry(&blk->conf, NULL, 65535, 255, 255) < 0) {
+        return NULL;
     }
 
     s = (VirtIOBlock *)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK,
@@ -622,7 +616,6 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
     s->blk = blk;
     s->rq = NULL;
     s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;
-    bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
 
     s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
 
diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
index d7850012bd..79ebccc95b 100644
--- a/hw/virtio-blk.h
+++ b/hw/virtio-blk.h
@@ -15,7 +15,7 @@
 #define _QEMU_VIRTIO_BLK_H
 
 #include "virtio.h"
-#include "block.h"
+#include "hw/block-common.h"
 
 /* from Linux's linux/virtio_blk.h */
 
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 3f190d417e..b1998b27d3 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -108,7 +108,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
     if (!n->nic->nc.peer) {
         return;
     }
-    if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
+    if (n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
         return;
     }
 
@@ -163,7 +163,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
     }
 }
 
-static void virtio_net_set_link_status(VLANClientState *nc)
+static void virtio_net_set_link_status(NetClientState *nc)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
     uint16_t old_status = n->status;
@@ -205,7 +205,7 @@ static int peer_has_vnet_hdr(VirtIONet *n)
     if (!n->nic->nc.peer)
         return 0;
 
-    if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP)
+    if (n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP)
         return 0;
 
     n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
@@ -249,7 +249,7 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
     }
 
     if (!n->nic->nc.peer ||
-        n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
+        n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
         return features;
     }
     if (!tap_get_vhost_net(n->nic->nc.peer)) {
@@ -288,7 +288,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
                         (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
     }
     if (!n->nic->nc.peer ||
-        n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
+        n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
         return;
     }
     if (!tap_get_vhost_net(n->nic->nc.peer)) {
@@ -453,7 +453,7 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
     qemu_notify_event();
 }
 
-static int virtio_net_can_receive(VLANClientState *nc)
+static int virtio_net_can_receive(NetClientState *nc)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
     if (!n->vdev.vm_running) {
@@ -593,7 +593,7 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
     return 0;
 }
 
-static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
     struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
@@ -656,8 +656,8 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_
         }
 
         /* copy in packet.  ugh */
-        len = iov_from_buf(sg, elem.in_num,
-                           buf + offset, 0, size - offset);
+        len = iov_from_buf(sg, elem.in_num, 0,
+                           buf + offset, size - offset);
         total += len;
         offset += len;
         /* If buffers can't be merged, at this point we
@@ -690,7 +690,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_
 
 static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
 
-static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
+static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -980,7 +980,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
-static void virtio_net_cleanup(VLANClientState *nc)
+static void virtio_net_cleanup(NetClientState *nc)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -988,7 +988,7 @@ static void virtio_net_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_virtio_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = virtio_net_can_receive,
     .receive = virtio_net_receive,
@@ -1077,6 +1077,6 @@ void virtio_net_exit(VirtIODevice *vdev)
         qemu_bh_delete(n->tx_bh);
     }
 
-    qemu_del_vlan_client(&n->nic->nc);
+    qemu_del_net_client(&n->nic->nc);
     virtio_cleanup(&n->vdev);
 }
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 9342eed070..125eded9ca 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -160,7 +160,7 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
 }
 
 static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
-                                                 int n, bool assign)
+                                                 int n, bool assign, bool set_handler)
 {
     VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
     EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
@@ -173,46 +173,18 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
                          __func__, r);
             return r;
         }
+        virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler);
         memory_region_add_eventfd(&proxy->bar, VIRTIO_PCI_QUEUE_NOTIFY, 2,
-                                  true, n, event_notifier_get_fd(notifier));
+                                  true, n, notifier);
     } else {
         memory_region_del_eventfd(&proxy->bar, VIRTIO_PCI_QUEUE_NOTIFY, 2,
-                                  true, n, event_notifier_get_fd(notifier));
-        /* Handle the race condition where the guest kicked and we deassigned
-         * before we got around to handling the kick.
-         */
-        if (event_notifier_test_and_clear(notifier)) {
-            virtio_queue_notify_vq(vq);
-        }
-
+                                  true, n, notifier);
+        virtio_queue_set_host_notifier_fd_handler(vq, false, false);
         event_notifier_cleanup(notifier);
     }
     return r;
 }
 
-static void virtio_pci_host_notifier_read(void *opaque)
-{
-    VirtQueue *vq = opaque;
-    EventNotifier *n = virtio_queue_get_host_notifier(vq);
-    if (event_notifier_test_and_clear(n)) {
-        virtio_queue_notify_vq(vq);
-    }
-}
-
-static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy,
-                                                    int n, bool assign)
-{
-    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
-    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
-    if (assign) {
-        qemu_set_fd_handler(event_notifier_get_fd(notifier),
-                            virtio_pci_host_notifier_read, NULL, vq);
-    } else {
-        qemu_set_fd_handler(event_notifier_get_fd(notifier),
-                            NULL, NULL, NULL);
-    }
-}
-
 static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
 {
     int n, r;
@@ -228,12 +200,10 @@ static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
             continue;
         }
 
-        r = virtio_pci_set_host_notifier_internal(proxy, n, true);
+        r = virtio_pci_set_host_notifier_internal(proxy, n, true, true);
         if (r < 0) {
             goto assign_error;
         }
-
-        virtio_pci_set_host_notifier_fd_handler(proxy, n, true);
     }
     proxy->ioeventfd_started = true;
     return;
@@ -244,8 +214,7 @@ assign_error:
             continue;
         }
 
-        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
-        r = virtio_pci_set_host_notifier_internal(proxy, n, false);
+        r = virtio_pci_set_host_notifier_internal(proxy, n, false, false);
         assert(r >= 0);
     }
     proxy->ioeventfd_started = false;
@@ -266,8 +235,7 @@ static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
             continue;
         }
 
-        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
-        r = virtio_pci_set_host_notifier_internal(proxy, n, false);
+        r = virtio_pci_set_host_notifier_internal(proxy, n, false, false);
         assert(r >= 0);
     }
     proxy->ioeventfd_started = false;
@@ -528,25 +496,15 @@ static unsigned virtio_pci_get_features(void *opaque)
     return proxy->host_features;
 }
 
-static void virtio_pci_guest_notifier_read(void *opaque)
-{
-    VirtQueue *vq = opaque;
-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
-    if (event_notifier_test_and_clear(n)) {
-        virtio_irq(vq);
-    }
-}
-
 static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
                                         unsigned int queue_no,
                                         unsigned int vector,
                                         MSIMessage msg)
 {
     VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
-    int fd, ret;
-
-    fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
+    int ret;
 
     if (irqfd->users == 0) {
         ret = kvm_irqchip_add_msi_route(kvm_state, msg);
@@ -557,7 +515,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
     }
     irqfd->users++;
 
-    ret = kvm_irqchip_add_irqfd(kvm_state, fd, irqfd->virq);
+    ret = kvm_irqchip_add_irq_notifier(kvm_state, n, irqfd->virq);
     if (ret < 0) {
         if (--irqfd->users == 0) {
             kvm_irqchip_release_virq(kvm_state, irqfd->virq);
@@ -565,8 +523,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
         return ret;
     }
 
-    qemu_set_fd_handler(fd, NULL, NULL, NULL);
-
+    virtio_queue_set_guest_notifier_fd_handler(vq, true, true);
     return 0;
 }
 
@@ -575,19 +532,18 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
                                              unsigned int vector)
 {
     VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
-    int fd, ret;
-
-    fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
+    int ret;
 
-    ret = kvm_irqchip_remove_irqfd(kvm_state, fd, irqfd->virq);
+    ret = kvm_irqchip_remove_irq_notifier(kvm_state, n, irqfd->virq);
     assert(ret == 0);
 
     if (--irqfd->users == 0) {
         kvm_irqchip_release_virq(kvm_state, irqfd->virq);
     }
 
-    qemu_set_fd_handler(fd, virtio_pci_guest_notifier_read, NULL, vq);
+    virtio_queue_set_guest_notifier_fd_handler(vq, true, false);
 }
 
 static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
@@ -649,14 +605,9 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
         if (r < 0) {
             return r;
         }
-        qemu_set_fd_handler(event_notifier_get_fd(notifier),
-                            virtio_pci_guest_notifier_read, NULL, vq);
+        virtio_queue_set_guest_notifier_fd_handler(vq, true, false);
     } else {
-        qemu_set_fd_handler(event_notifier_get_fd(notifier),
-                            NULL, NULL, NULL);
-        /* Test and clear notifier before closing it,
-         * in case poll callback didn't have time to run. */
-        virtio_pci_guest_notifier_read(vq);
+        virtio_queue_set_guest_notifier_fd_handler(vq, false, false);
         event_notifier_cleanup(notifier);
     }
 
@@ -732,7 +683,7 @@ static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
      * currently only stops on status change away from ok,
      * reset, vmstop and such. If we do add code to start here,
      * need to check vmstate, device state etc. */
-    return virtio_pci_set_host_notifier_internal(proxy, n, assign);
+    return virtio_pci_set_host_notifier_internal(proxy, n, assign, false);
 }
 
 static void virtio_pci_vmstate_change(void *opaque, bool running)
@@ -782,13 +733,10 @@ void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev)
     pci_set_word(config + PCI_SUBSYSTEM_ID, vdev->device_id);
     config[PCI_INTERRUPT_PIN] = 1;
 
-    memory_region_init(&proxy->msix_bar, "virtio-msix", 4096);
-    if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors,
-                                     &proxy->msix_bar, 1, 0)) {
-        pci_register_bar(&proxy->pci_dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
-                         &proxy->msix_bar);
-    } else
+    if (vdev->nvectors &&
+        msix_init_exclusive_bar(&proxy->pci_dev, vdev->nvectors, 1)) {
         vdev->nvectors = 0;
+    }
 
     proxy->pci_dev.config_write = virtio_write_config;
 
@@ -831,24 +779,21 @@ static int virtio_blk_init_pci(PCIDevice *pci_dev)
     return 0;
 }
 
-static int virtio_exit_pci(PCIDevice *pci_dev)
+static void virtio_exit_pci(PCIDevice *pci_dev)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
-    int r;
 
     memory_region_destroy(&proxy->bar);
-    r = msix_uninit(pci_dev, &proxy->msix_bar);
-    memory_region_destroy(&proxy->msix_bar);
-    return r;
+    msix_uninit_exclusive_bar(pci_dev);
 }
 
-static int virtio_blk_exit_pci(PCIDevice *pci_dev)
+static void virtio_blk_exit_pci(PCIDevice *pci_dev)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
 
     virtio_pci_stop_ioeventfd(proxy);
     virtio_blk_exit(proxy->vdev);
-    return virtio_exit_pci(pci_dev);
+    virtio_exit_pci(pci_dev);
 }
 
 static int virtio_serial_init_pci(PCIDevice *pci_dev)
@@ -873,13 +818,13 @@ static int virtio_serial_init_pci(PCIDevice *pci_dev)
     return 0;
 }
 
-static int virtio_serial_exit_pci(PCIDevice *pci_dev)
+static void virtio_serial_exit_pci(PCIDevice *pci_dev)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
 
     virtio_pci_stop_ioeventfd(proxy);
     virtio_serial_exit(proxy->vdev);
-    return virtio_exit_pci(pci_dev);
+    virtio_exit_pci(pci_dev);
 }
 
 static int virtio_net_init_pci(PCIDevice *pci_dev)
@@ -897,13 +842,13 @@ static int virtio_net_init_pci(PCIDevice *pci_dev)
     return 0;
 }
 
-static int virtio_net_exit_pci(PCIDevice *pci_dev)
+static void virtio_net_exit_pci(PCIDevice *pci_dev)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
 
     virtio_pci_stop_ioeventfd(proxy);
     virtio_net_exit(proxy->vdev);
-    return virtio_exit_pci(pci_dev);
+    virtio_exit_pci(pci_dev);
 }
 
 static int virtio_balloon_init_pci(PCIDevice *pci_dev)
@@ -924,18 +869,19 @@ static int virtio_balloon_init_pci(PCIDevice *pci_dev)
     return 0;
 }
 
-static int virtio_balloon_exit_pci(PCIDevice *pci_dev)
+static void virtio_balloon_exit_pci(PCIDevice *pci_dev)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
 
     virtio_pci_stop_ioeventfd(proxy);
     virtio_balloon_exit(proxy->vdev);
-    return virtio_exit_pci(pci_dev);
+    virtio_exit_pci(pci_dev);
 }
 
 static Property virtio_blk_properties[] = {
     DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0),
     DEFINE_BLOCK_PROPERTIES(VirtIOPCIProxy, blk.conf),
+    DEFINE_BLOCK_CHS_PROPERTIES(VirtIOPCIProxy, blk.conf),
     DEFINE_PROP_STRING("serial", VirtIOPCIProxy, blk.serial),
 #ifdef __linux__
     DEFINE_PROP_BIT("scsi", VirtIOPCIProxy, blk.scsi, 0, true),
@@ -1071,7 +1017,9 @@ static int virtio_scsi_init_pci(PCIDevice *pci_dev)
         return -EINVAL;
     }
 
-    vdev->nvectors = proxy->nvectors;
+    vdev->nvectors = proxy->nvectors == DEV_NVECTORS_UNSPECIFIED
+                                        ? proxy->scsi.num_queues + 3
+                                        : proxy->nvectors;
     virtio_init_pci(proxy, vdev);
 
     /* make the actual value visible */
@@ -1079,16 +1027,17 @@ static int virtio_scsi_init_pci(PCIDevice *pci_dev)
     return 0;
 }
 
-static int virtio_scsi_exit_pci(PCIDevice *pci_dev)
+static void virtio_scsi_exit_pci(PCIDevice *pci_dev)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
 
     virtio_scsi_exit(proxy->vdev);
-    return virtio_exit_pci(pci_dev);
+    virtio_exit_pci(pci_dev);
 }
 
 static Property virtio_scsi_properties[] = {
-    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
+    DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED),
     DEFINE_VIRTIO_SCSI_PROPERTIES(VirtIOPCIProxy, host_features, scsi),
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h
index 91b791ba9d..ac9d522f37 100644
--- a/hw/virtio-pci.h
+++ b/hw/virtio-pci.h
@@ -34,7 +34,6 @@ typedef struct {
     PCIDevice pci_dev;
     VirtIODevice *vdev;
     MemoryRegion bar;
-    MemoryRegion msix_bar;
     uint32_t flags;
     uint32_t class_code;
     uint32_t nvectors;
diff --git a/hw/virtio-scsi.c b/hw/virtio-scsi.c
index e1a767ea78..c4a5b22f94 100644
--- a/hw/virtio-scsi.c
+++ b/hw/virtio-scsi.c
@@ -24,6 +24,11 @@
 #define VIRTIO_SCSI_MAX_TARGET  255
 #define VIRTIO_SCSI_MAX_LUN     16383
 
+/* Feature Bits */
+#define VIRTIO_SCSI_F_INOUT                    0
+#define VIRTIO_SCSI_F_HOTPLUG                  1
+#define VIRTIO_SCSI_F_CHANGE                   2
+
 /* Response codes */
 #define VIRTIO_SCSI_S_OK                       0
 #define VIRTIO_SCSI_S_OVERRUN                  1
@@ -59,6 +64,12 @@
 #define VIRTIO_SCSI_T_NO_EVENT                 0
 #define VIRTIO_SCSI_T_TRANSPORT_RESET          1
 #define VIRTIO_SCSI_T_ASYNC_NOTIFY             2
+#define VIRTIO_SCSI_T_PARAM_CHANGE             3
+
+/* Reasons for transport reset event */
+#define VIRTIO_SCSI_EVT_RESET_HARD             0
+#define VIRTIO_SCSI_EVT_RESET_RESCAN           1
+#define VIRTIO_SCSI_EVT_RESET_REMOVED          2
 
 /* SCSI command request, followed by data-out */
 typedef struct {
@@ -132,6 +143,7 @@ typedef struct {
     uint32_t sense_size;
     uint32_t cdb_size;
     int resetting;
+    bool events_dropped;
     VirtQueue *ctrl_vq;
     VirtQueue *event_vq;
     VirtQueue *cmd_vqs[0];
@@ -206,11 +218,13 @@ static void qemu_sgl_init_external(QEMUSGList *qsgl, struct iovec *sg,
 static void virtio_scsi_parse_req(VirtIOSCSI *s, VirtQueue *vq,
                                   VirtIOSCSIReq *req)
 {
-    assert(req->elem.out_num && req->elem.in_num);
+    assert(req->elem.in_num);
     req->vq = vq;
     req->dev = s;
     req->sreq = NULL;
-    req->req.buf = req->elem.out_sg[0].iov_base;
+    if (req->elem.out_num) {
+        req->req.buf = req->elem.out_sg[0].iov_base;
+    }
     req->resp.buf = req->elem.in_sg[0].iov_base;
 
     if (req->elem.out_num > 1) {
@@ -541,6 +555,8 @@ static void virtio_scsi_set_config(VirtIODevice *vdev,
 static uint32_t virtio_scsi_get_features(VirtIODevice *vdev,
                                          uint32_t requested_features)
 {
+    requested_features |= (1UL << VIRTIO_SCSI_F_HOTPLUG);
+    requested_features |= (1UL << VIRTIO_SCSI_F_CHANGE);
     return requested_features;
 }
 
@@ -550,6 +566,7 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
 
     s->sense_size = VIRTIO_SCSI_SENSE_SIZE;
     s->cdb_size = VIRTIO_SCSI_CDB_SIZE;
+    s->events_dropped = false;
 }
 
 /* The device does not have anything to save beyond the virtio data.
@@ -573,6 +590,93 @@ static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
+static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
+                                   uint32_t event, uint32_t reason)
+{
+    VirtIOSCSIReq *req = virtio_scsi_pop_req(s, s->event_vq);
+    VirtIOSCSIEvent *evt;
+    int in_size;
+
+    if (!req) {
+        s->events_dropped = true;
+        return;
+    }
+
+    if (req->elem.out_num || req->elem.in_num != 1) {
+        virtio_scsi_bad_req();
+    }
+
+    if (s->events_dropped) {
+        event |= VIRTIO_SCSI_T_EVENTS_MISSED;
+        s->events_dropped = false;
+    }
+
+    in_size = req->elem.in_sg[0].iov_len;
+    if (in_size < sizeof(VirtIOSCSIEvent)) {
+        virtio_scsi_bad_req();
+    }
+
+    evt = req->resp.event;
+    memset(evt, 0, sizeof(VirtIOSCSIEvent));
+    evt->event = event;
+    evt->reason = reason;
+    if (!dev) {
+        assert(event == VIRTIO_SCSI_T_NO_EVENT);
+    } else {
+        evt->lun[0] = 1;
+        evt->lun[1] = dev->id;
+
+        /* Linux wants us to keep the same encoding we use for REPORT LUNS.  */
+        if (dev->lun >= 256) {
+            evt->lun[2] = (dev->lun >> 8) | 0x40;
+        }
+        evt->lun[3] = dev->lun & 0xFF;
+    }
+    virtio_scsi_complete_req(req);
+}
+
+static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOSCSI *s = (VirtIOSCSI *)vdev;
+
+    if (s->events_dropped) {
+        virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
+    }
+}
+
+static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
+{
+    VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+
+    if (((s->vdev.guest_features >> VIRTIO_SCSI_F_CHANGE) & 1) &&
+        (s->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK) &&
+        dev->type != TYPE_ROM) {
+        virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
+                               sense.asc | (sense.ascq << 8));
+    }
+}
+
+static void virtio_scsi_hotplug(SCSIBus *bus, SCSIDevice *dev)
+{
+    VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+
+    if (((s->vdev.guest_features >> VIRTIO_SCSI_F_HOTPLUG) & 1) &&
+        (s->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_TRANSPORT_RESET,
+                               VIRTIO_SCSI_EVT_RESET_RESCAN);
+    }
+}
+
+static void virtio_scsi_hot_unplug(SCSIBus *bus, SCSIDevice *dev)
+{
+    VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+
+    if ((s->vdev.guest_features >> VIRTIO_SCSI_F_HOTPLUG) & 1) {
+        virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_TRANSPORT_RESET,
+                               VIRTIO_SCSI_EVT_RESET_REMOVED);
+    }
+}
+
 static struct SCSIBusInfo virtio_scsi_scsi_info = {
     .tcq = true,
     .max_channel = VIRTIO_SCSI_MAX_CHANNEL,
@@ -581,6 +685,9 @@ static struct SCSIBusInfo virtio_scsi_scsi_info = {
 
     .complete = virtio_scsi_command_complete,
     .cancel = virtio_scsi_request_cancelled,
+    .change = virtio_scsi_change,
+    .hotplug = virtio_scsi_hotplug,
+    .hot_unplug = virtio_scsi_hot_unplug,
     .get_sg_list = virtio_scsi_get_sg_list,
     .save_request = virtio_scsi_save_request,
     .load_request = virtio_scsi_load_request,
@@ -609,7 +716,7 @@ VirtIODevice *virtio_scsi_init(DeviceState *dev, VirtIOSCSIConf *proxyconf)
     s->ctrl_vq = virtio_add_queue(&s->vdev, VIRTIO_SCSI_VQ_SIZE,
                                    virtio_scsi_handle_ctrl);
     s->event_vq = virtio_add_queue(&s->vdev, VIRTIO_SCSI_VQ_SIZE,
-                                   NULL);
+                                   virtio_scsi_handle_event);
     for (i = 0; i < s->conf->num_queues; i++) {
         s->cmd_vqs[i] = virtio_add_queue(&s->vdev, VIRTIO_SCSI_VQ_SIZE,
                                          virtio_scsi_handle_cmd);
diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c
index 96382a4ea1..82073f5dc2 100644
--- a/hw/virtio-serial-bus.c
+++ b/hw/virtio-serial-bus.c
@@ -106,8 +106,8 @@ static size_t write_to_port(VirtIOSerialPort *port,
             break;
         }
 
-        len = iov_from_buf(elem.in_sg, elem.in_num,
-                           buf + offset, 0, size - offset);
+        len = iov_from_buf(elem.in_sg, elem.in_num, 0,
+                           buf + offset, size - offset);
         offset += len;
 
         virtqueue_push(vq, &elem, len);
@@ -454,7 +454,7 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq)
     len = 0;
     buf = NULL;
     while (virtqueue_pop(vq, &elem)) {
-        size_t cur_len, copied;
+        size_t cur_len;
 
         cur_len = iov_size(elem.out_sg, elem.out_num);
         /*
@@ -467,9 +467,9 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq)
             buf = g_malloc(cur_len);
             len = cur_len;
         }
-        copied = iov_to_buf(elem.out_sg, elem.out_num, buf, 0, len);
+        iov_to_buf(elem.out_sg, elem.out_num, 0, buf, cur_len);
 
-        handle_control_message(vser, buf, copied);
+        handle_control_message(vser, buf, cur_len);
         virtqueue_push(vq, &elem, 0);
     }
     g_free(buf);
diff --git a/hw/virtio.c b/hw/virtio.c
index 168abe4864..209c763751 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -984,10 +984,59 @@ VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
     return vdev->vq + n;
 }
 
+static void virtio_queue_guest_notifier_read(EventNotifier *n)
+{
+    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_irq(vq);
+    }
+}
+
+void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
+                                                bool with_irqfd)
+{
+    if (assign && !with_irqfd) {
+        event_notifier_set_handler(&vq->guest_notifier,
+                                   virtio_queue_guest_notifier_read);
+    } else {
+        event_notifier_set_handler(&vq->guest_notifier, NULL);
+    }
+    if (!assign) {
+        /* Test and clear notifier before closing it,
+         * in case poll callback didn't have time to run. */
+        virtio_queue_guest_notifier_read(&vq->guest_notifier);
+    }
+}
+
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
 {
     return &vq->guest_notifier;
 }
+
+static void virtio_queue_host_notifier_read(EventNotifier *n)
+{
+    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_queue_notify_vq(vq);
+    }
+}
+
+void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
+                                               bool set_handler)
+{
+    if (assign && set_handler) {
+        event_notifier_set_handler(&vq->host_notifier,
+                                   virtio_queue_host_notifier_read);
+    } else {
+        event_notifier_set_handler(&vq->host_notifier, NULL);
+    }
+    if (!assign) {
+        /* Test and clear notifier before after disabling event,
+         * in case poll callback didn't have time to run. */
+        virtio_queue_host_notifier_read(&vq->host_notifier);
+    }
+}
+
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
 {
     return &vq->host_notifier;
diff --git a/hw/virtio.h b/hw/virtio.h
index 85aabe53d8..7a4f564529 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -18,7 +18,6 @@
 #include "net.h"
 #include "qdev.h"
 #include "sysemu.h"
-#include "block.h"
 #include "event_notifier.h"
 #ifdef CONFIG_LINUX
 #include "9p.h"
@@ -231,7 +230,11 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
 int virtio_queue_get_id(VirtQueue *vq);
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
+void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
+                                                bool with_irqfd);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
+void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
+                                               bool set_handler);
 void virtio_queue_notify_vq(VirtQueue *vq);
 void virtio_irq(VirtQueue *vq);
 #endif
diff --git a/hw/vmware_vga.c b/hw/vmware_vga.c
index 142d9f4ea0..f5e4f440d5 100644
--- a/hw/vmware_vga.c
+++ b/hw/vmware_vga.c
@@ -1078,7 +1078,7 @@ static const VMStateDescription vmstate_vmware_vga = {
     }
 };
 
-static void vmsvga_init(struct vmsvga_state_s *s, int vga_ram_size,
+static void vmsvga_init(struct vmsvga_state_s *s,
                         MemoryRegion *address_space, MemoryRegion *io)
 {
     s->scratch_size = SVGA_SCRATCH_SIZE;
@@ -1095,7 +1095,7 @@ static void vmsvga_init(struct vmsvga_state_s *s, int vga_ram_size,
     vmstate_register_ram_global(&s->fifo_ram);
     s->fifo_ptr = memory_region_get_ram_ptr(&s->fifo_ram);
 
-    vga_common_init(&s->vga, vga_ram_size);
+    vga_common_init(&s->vga);
     vga_init(&s->vga, address_space, io, true);
     vmstate_register(NULL, 0, &vmstate_vga_common, &s->vga);
 
@@ -1150,11 +1150,14 @@ static void vmsvga_io_write(void *opaque, target_phys_addr_t addr,
 
     switch (addr) {
     case SVGA_IO_MUL * SVGA_INDEX_PORT:
-        return vmsvga_index_write(s, addr, data);
+        vmsvga_index_write(s, addr, data);
+        break;
     case SVGA_IO_MUL * SVGA_VALUE_PORT:
-        return vmsvga_value_write(s, addr, data);
+        vmsvga_value_write(s, addr, data);
+        break;
     case SVGA_IO_MUL * SVGA_BIOS_PORT:
-        return vmsvga_bios_write(s, addr, data);
+        vmsvga_bios_write(s, addr, data);
+        break;
     }
 }
 
@@ -1184,7 +1187,7 @@ static int pci_vmsvga_initfn(PCIDevice *dev)
                           "vmsvga-io", 0x10);
     pci_register_bar(&s->card, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->io_bar);
 
-    vmsvga_init(&s->chip, VGA_RAM_SIZE, pci_address_space(dev),
+    vmsvga_init(&s->chip, pci_address_space(dev),
                 pci_address_space_io(dev));
 
     pci_register_bar(&s->card, 1, PCI_BASE_ADDRESS_MEM_PREFETCH, iomem);
@@ -1199,6 +1202,12 @@ static int pci_vmsvga_initfn(PCIDevice *dev)
     return 0;
 }
 
+static Property vga_vmware_properties[] = {
+    DEFINE_PROP_UINT32("vgamem_mb", struct pci_vmsvga_state_s,
+                       chip.vga.vram_size_mb, 16),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void vmsvga_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -1214,6 +1223,7 @@ static void vmsvga_class_init(ObjectClass *klass, void *data)
     k->subsystem_id = SVGA_PCI_DEVICE_ID;
     dc->reset = vmsvga_reset;
     dc->vmsd = &vmstate_vmware_vga;
+    dc->props = vga_vmware_properties;
 }
 
 static TypeInfo vmsvga_info = {
diff --git a/hw/watchdog.c b/hw/watchdog.c
index a42124d520..b52acedd98 100644
--- a/hw/watchdog.c
+++ b/hw/watchdog.c
@@ -55,7 +55,7 @@ int select_watchdog(const char *p)
     QemuOpts *opts;
 
     /* -watchdog ? lists available devices and exits cleanly. */
-    if (strcmp(p, "?") == 0) {
+    if (is_help_option(p)) {
         QLIST_FOREACH(model, &watchdog_list, entry) {
             fprintf(stderr, "\t%s\t%s\n",
                      model->wdt_name, model->wdt_description);
diff --git a/hw/wdt_i6300esb.c b/hw/wdt_i6300esb.c
index 15c69db932..4a83474906 100644
--- a/hw/wdt_i6300esb.c
+++ b/hw/wdt_i6300esb.c
@@ -411,13 +411,11 @@ static int i6300esb_init(PCIDevice *dev)
     return 0;
 }
 
-static int i6300esb_exit(PCIDevice *dev)
+static void i6300esb_exit(PCIDevice *dev)
 {
     I6300State *d = DO_UPCAST(I6300State, dev, dev);
 
     memory_region_destroy(&d->io_mem);
-
-    return 0;
 }
 
 static WatchdogTimerModel model = {
diff --git a/hw/xen-host-pci-device.c b/hw/xen-host-pci-device.c
new file mode 100644
index 0000000000..e7ff680ef2
--- /dev/null
+++ b/hw/xen-host-pci-device.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright (C) 2011       Citrix Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "xen-host-pci-device.h"
+
+#define XEN_HOST_PCI_MAX_EXT_CAP \
+    ((PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / (PCI_CAP_SIZEOF + 4))
+
+#ifdef XEN_HOST_PCI_DEVICE_DEBUG
+#  define XEN_HOST_PCI_LOG(f, a...) fprintf(stderr, "%s: " f, __func__, ##a)
+#else
+#  define XEN_HOST_PCI_LOG(f, a...) (void)0
+#endif
+
+/*
+ * from linux/ioport.h
+ * IO resources have these defined flags.
+ */
+#define IORESOURCE_BITS         0x000000ff      /* Bus-specific bits */
+
+#define IORESOURCE_TYPE_BITS    0x00000f00      /* Resource type */
+#define IORESOURCE_IO           0x00000100
+#define IORESOURCE_MEM          0x00000200
+
+#define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
+#define IORESOURCE_MEM_64       0x00100000
+
+static int xen_host_pci_sysfs_path(const XenHostPCIDevice *d,
+                                   const char *name, char *buf, ssize_t size)
+{
+    int rc;
+
+    rc = snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s",
+                  d->domain, d->bus, d->dev, d->func, name);
+
+    if (rc >= size || rc < 0) {
+        /* The ouput is truncated or an other error is encountered */
+        return -ENODEV;
+    }
+    return 0;
+}
+
+
+/* This size should be enough to read the first 7 lines of a ressource file */
+#define XEN_HOST_PCI_RESSOURCE_BUFFER_SIZE 400
+static int xen_host_pci_get_resource(XenHostPCIDevice *d)
+{
+    int i, rc, fd;
+    char path[PATH_MAX];
+    char buf[XEN_HOST_PCI_RESSOURCE_BUFFER_SIZE];
+    unsigned long long start, end, flags, size;
+    char *endptr, *s;
+    uint8_t type;
+
+    rc = xen_host_pci_sysfs_path(d, "resource", path, sizeof (path));
+    if (rc) {
+        return rc;
+    }
+    fd = open(path, O_RDONLY);
+    if (fd == -1) {
+        XEN_HOST_PCI_LOG("Error: Can't open %s: %s\n", path, strerror(errno));
+        return -errno;
+    }
+
+    do {
+        rc = read(fd, &buf, sizeof (buf) - 1);
+        if (rc < 0 && errno != EINTR) {
+            rc = -errno;
+            goto out;
+        }
+    } while (rc < 0);
+    buf[rc] = 0;
+    rc = 0;
+
+    s = buf;
+    for (i = 0; i < PCI_NUM_REGIONS; i++) {
+        type = 0;
+
+        start = strtoll(s, &endptr, 16);
+        if (*endptr != ' ' || s == endptr) {
+            break;
+        }
+        s = endptr + 1;
+        end = strtoll(s, &endptr, 16);
+        if (*endptr != ' ' || s == endptr) {
+            break;
+        }
+        s = endptr + 1;
+        flags = strtoll(s, &endptr, 16);
+        if (*endptr != '\n' || s == endptr) {
+            break;
+        }
+        s = endptr + 1;
+
+        if (start) {
+            size = end - start + 1;
+        } else {
+            size = 0;
+        }
+
+        if (flags & IORESOURCE_IO) {
+            type |= XEN_HOST_PCI_REGION_TYPE_IO;
+        }
+        if (flags & IORESOURCE_MEM) {
+            type |= XEN_HOST_PCI_REGION_TYPE_MEM;
+        }
+        if (flags & IORESOURCE_PREFETCH) {
+            type |= XEN_HOST_PCI_REGION_TYPE_PREFETCH;
+        }
+        if (flags & IORESOURCE_MEM_64) {
+            type |= XEN_HOST_PCI_REGION_TYPE_MEM_64;
+        }
+
+        if (i < PCI_ROM_SLOT) {
+            d->io_regions[i].base_addr = start;
+            d->io_regions[i].size = size;
+            d->io_regions[i].type = type;
+            d->io_regions[i].bus_flags = flags & IORESOURCE_BITS;
+        } else {
+            d->rom.base_addr = start;
+            d->rom.size = size;
+            d->rom.type = type;
+            d->rom.bus_flags = flags & IORESOURCE_BITS;
+        }
+    }
+    if (i != PCI_NUM_REGIONS) {
+        /* Invalid format or input to short */
+        rc = -ENODEV;
+    }
+
+out:
+    close(fd);
+    return rc;
+}
+
+/* This size should be enough to read a long from a file */
+#define XEN_HOST_PCI_GET_VALUE_BUFFER_SIZE 22
+static int xen_host_pci_get_value(XenHostPCIDevice *d, const char *name,
+                                  unsigned int *pvalue, int base)
+{
+    char path[PATH_MAX];
+    char buf[XEN_HOST_PCI_GET_VALUE_BUFFER_SIZE];
+    int fd, rc;
+    unsigned long value;
+    char *endptr;
+
+    rc = xen_host_pci_sysfs_path(d, name, path, sizeof (path));
+    if (rc) {
+        return rc;
+    }
+    fd = open(path, O_RDONLY);
+    if (fd == -1) {
+        XEN_HOST_PCI_LOG("Error: Can't open %s: %s\n", path, strerror(errno));
+        return -errno;
+    }
+    do {
+        rc = read(fd, &buf, sizeof (buf) - 1);
+        if (rc < 0 && errno != EINTR) {
+            rc = -errno;
+            goto out;
+        }
+    } while (rc < 0);
+    buf[rc] = 0;
+    value = strtol(buf, &endptr, base);
+    if (endptr == buf || *endptr != '\n') {
+        rc = -1;
+    } else if ((value == LONG_MIN || value == LONG_MAX) && errno == ERANGE) {
+        rc = -errno;
+    } else {
+        rc = 0;
+        *pvalue = value;
+    }
+out:
+    close(fd);
+    return rc;
+}
+
+static inline int xen_host_pci_get_hex_value(XenHostPCIDevice *d,
+                                             const char *name,
+                                             unsigned int *pvalue)
+{
+    return xen_host_pci_get_value(d, name, pvalue, 16);
+}
+
+static inline int xen_host_pci_get_dec_value(XenHostPCIDevice *d,
+                                             const char *name,
+                                             unsigned int *pvalue)
+{
+    return xen_host_pci_get_value(d, name, pvalue, 10);
+}
+
+static bool xen_host_pci_dev_is_virtfn(XenHostPCIDevice *d)
+{
+    char path[PATH_MAX];
+    struct stat buf;
+
+    if (xen_host_pci_sysfs_path(d, "physfn", path, sizeof (path))) {
+        return false;
+    }
+    return !stat(path, &buf);
+}
+
+static int xen_host_pci_config_open(XenHostPCIDevice *d)
+{
+    char path[PATH_MAX];
+    int rc;
+
+    rc = xen_host_pci_sysfs_path(d, "config", path, sizeof (path));
+    if (rc) {
+        return rc;
+    }
+    d->config_fd = open(path, O_RDWR);
+    if (d->config_fd < 0) {
+        return -errno;
+    }
+    return 0;
+}
+
+static int xen_host_pci_config_read(XenHostPCIDevice *d,
+                                    int pos, void *buf, int len)
+{
+    int rc;
+
+    do {
+        rc = pread(d->config_fd, buf, len, pos);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+    if (rc != len) {
+        return -errno;
+    }
+    return 0;
+}
+
+static int xen_host_pci_config_write(XenHostPCIDevice *d,
+                                     int pos, const void *buf, int len)
+{
+    int rc;
+
+    do {
+        rc = pwrite(d->config_fd, buf, len, pos);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+    if (rc != len) {
+        return -errno;
+    }
+    return 0;
+}
+
+
+int xen_host_pci_get_byte(XenHostPCIDevice *d, int pos, uint8_t *p)
+{
+    uint8_t buf;
+    int rc = xen_host_pci_config_read(d, pos, &buf, 1);
+    if (!rc) {
+        *p = buf;
+    }
+    return rc;
+}
+
+int xen_host_pci_get_word(XenHostPCIDevice *d, int pos, uint16_t *p)
+{
+    uint16_t buf;
+    int rc = xen_host_pci_config_read(d, pos, &buf, 2);
+    if (!rc) {
+        *p = le16_to_cpu(buf);
+    }
+    return rc;
+}
+
+int xen_host_pci_get_long(XenHostPCIDevice *d, int pos, uint32_t *p)
+{
+    uint32_t buf;
+    int rc = xen_host_pci_config_read(d, pos, &buf, 4);
+    if (!rc) {
+        *p = le32_to_cpu(buf);
+    }
+    return rc;
+}
+
+int xen_host_pci_get_block(XenHostPCIDevice *d, int pos, uint8_t *buf, int len)
+{
+    return xen_host_pci_config_read(d, pos, buf, len);
+}
+
+int xen_host_pci_set_byte(XenHostPCIDevice *d, int pos, uint8_t data)
+{
+    return xen_host_pci_config_write(d, pos, &data, 1);
+}
+
+int xen_host_pci_set_word(XenHostPCIDevice *d, int pos, uint16_t data)
+{
+    data = cpu_to_le16(data);
+    return xen_host_pci_config_write(d, pos, &data, 2);
+}
+
+int xen_host_pci_set_long(XenHostPCIDevice *d, int pos, uint32_t data)
+{
+    data = cpu_to_le32(data);
+    return xen_host_pci_config_write(d, pos, &data, 4);
+}
+
+int xen_host_pci_set_block(XenHostPCIDevice *d, int pos, uint8_t *buf, int len)
+{
+    return xen_host_pci_config_write(d, pos, buf, len);
+}
+
+int xen_host_pci_find_ext_cap_offset(XenHostPCIDevice *d, uint32_t cap)
+{
+    uint32_t header = 0;
+    int max_cap = XEN_HOST_PCI_MAX_EXT_CAP;
+    int pos = PCI_CONFIG_SPACE_SIZE;
+
+    do {
+        if (xen_host_pci_get_long(d, pos, &header)) {
+            break;
+        }
+        /*
+         * If we have no capabilities, this is indicated by cap ID,
+         * cap version and next pointer all being 0.
+         */
+        if (header == 0) {
+            break;
+        }
+
+        if (PCI_EXT_CAP_ID(header) == cap) {
+            return pos;
+        }
+
+        pos = PCI_EXT_CAP_NEXT(header);
+        if (pos < PCI_CONFIG_SPACE_SIZE) {
+            break;
+        }
+
+        max_cap--;
+    } while (max_cap > 0);
+
+    return -1;
+}
+
+int xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain,
+                            uint8_t bus, uint8_t dev, uint8_t func)
+{
+    unsigned int v;
+    int rc = 0;
+
+    d->config_fd = -1;
+    d->domain = domain;
+    d->bus = bus;
+    d->dev = dev;
+    d->func = func;
+
+    rc = xen_host_pci_config_open(d);
+    if (rc) {
+        goto error;
+    }
+    rc = xen_host_pci_get_resource(d);
+    if (rc) {
+        goto error;
+    }
+    rc = xen_host_pci_get_hex_value(d, "vendor", &v);
+    if (rc) {
+        goto error;
+    }
+    d->vendor_id = v;
+    rc = xen_host_pci_get_hex_value(d, "device", &v);
+    if (rc) {
+        goto error;
+    }
+    d->device_id = v;
+    rc = xen_host_pci_get_dec_value(d, "irq", &v);
+    if (rc) {
+        goto error;
+    }
+    d->irq = v;
+    d->is_virtfn = xen_host_pci_dev_is_virtfn(d);
+
+    return 0;
+error:
+    if (d->config_fd >= 0) {
+        close(d->config_fd);
+        d->config_fd = -1;
+    }
+    return rc;
+}
+
+void xen_host_pci_device_put(XenHostPCIDevice *d)
+{
+    if (d->config_fd >= 0) {
+        close(d->config_fd);
+        d->config_fd = -1;
+    }
+}
diff --git a/hw/xen-host-pci-device.h b/hw/xen-host-pci-device.h
new file mode 100644
index 0000000000..0079daca51
--- /dev/null
+++ b/hw/xen-host-pci-device.h
@@ -0,0 +1,55 @@
+#ifndef XEN_HOST_PCI_DEVICE_H
+#define XEN_HOST_PCI_DEVICE_H
+
+#include "pci.h"
+
+enum {
+    XEN_HOST_PCI_REGION_TYPE_IO = 1 << 1,
+    XEN_HOST_PCI_REGION_TYPE_MEM = 1 << 2,
+    XEN_HOST_PCI_REGION_TYPE_PREFETCH = 1 << 3,
+    XEN_HOST_PCI_REGION_TYPE_MEM_64 = 1 << 4,
+};
+
+typedef struct XenHostPCIIORegion {
+    pcibus_t base_addr;
+    pcibus_t size;
+    uint8_t type;
+    uint8_t bus_flags; /* Bus-specific bits */
+} XenHostPCIIORegion;
+
+typedef struct XenHostPCIDevice {
+    uint16_t domain;
+    uint8_t bus;
+    uint8_t dev;
+    uint8_t func;
+
+    uint16_t vendor_id;
+    uint16_t device_id;
+    int irq;
+
+    XenHostPCIIORegion io_regions[PCI_NUM_REGIONS - 1];
+    XenHostPCIIORegion rom;
+
+    bool is_virtfn;
+
+    int config_fd;
+} XenHostPCIDevice;
+
+int xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain,
+                            uint8_t bus, uint8_t dev, uint8_t func);
+void xen_host_pci_device_put(XenHostPCIDevice *pci_dev);
+
+int xen_host_pci_get_byte(XenHostPCIDevice *d, int pos, uint8_t *p);
+int xen_host_pci_get_word(XenHostPCIDevice *d, int pos, uint16_t *p);
+int xen_host_pci_get_long(XenHostPCIDevice *d, int pos, uint32_t *p);
+int xen_host_pci_get_block(XenHostPCIDevice *d, int pos, uint8_t *buf,
+                           int len);
+int xen_host_pci_set_byte(XenHostPCIDevice *d, int pos, uint8_t data);
+int xen_host_pci_set_word(XenHostPCIDevice *d, int pos, uint16_t data);
+int xen_host_pci_set_long(XenHostPCIDevice *d, int pos, uint32_t data);
+int xen_host_pci_set_block(XenHostPCIDevice *d, int pos, uint8_t *buf,
+                           int len);
+
+int xen_host_pci_find_ext_cap_offset(XenHostPCIDevice *s, uint32_t cap);
+
+#endif /* !XEN_HOST_PCI_DEVICE_H_ */
diff --git a/hw/xen_backend.c b/hw/xen_backend.c
index 66cb144397..f83a1e1d09 100644
--- a/hw/xen_backend.c
+++ b/hw/xen_backend.c
@@ -34,15 +34,13 @@
 #include <sys/mman.h>
 #include <sys/signal.h>
 
-#include <xs.h>
-#include <xenctrl.h>
-#include <xen/grant_table.h>
-
 #include "hw.h"
 #include "qemu-char.h"
 #include "qemu-log.h"
 #include "xen_backend.h"
 
+#include <xen/grant_table.h>
+
 /* ------------------------------------------------------------- */
 
 /* public */
diff --git a/hw/xen_backend.h b/hw/xen_backend.h
index 3305630903..fea86dd78b 100644
--- a/hw/xen_backend.h
+++ b/hw/xen_backend.h
@@ -4,6 +4,7 @@
 #include "xen_common.h"
 #include "sysemu.h"
 #include "net.h"
+#include "net/hub.h"
 
 /* ------------------------------------------------------------- */
 
diff --git a/hw/xen_common.h b/hw/xen_common.h
index fe7f227f92..727757afb4 100644
--- a/hw/xen_common.h
+++ b/hw/xen_common.h
@@ -7,7 +7,11 @@
 #include <inttypes.h>
 
 #include <xenctrl.h>
-#include <xs.h>
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 420
+#  include <xs.h>
+#else
+#  include <xenstore.h>
+#endif
 #include <xen/io/xenbus.h>
 
 #include "hw.h"
@@ -150,4 +154,7 @@ static inline int xen_xc_hvm_inject_msi(XenXC xen_xc, domid_t dom,
 
 void destroy_hvm_domain(bool reboot);
 
+/* shutdown/destroy current domain because of an error */
+void xen_shutdown_fatal_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+
 #endif /* QEMU_HW_XEN_COMMON_H */
diff --git a/hw/xen_console.c b/hw/xen_console.c
index 3794b1972d..9426d7374f 100644
--- a/hw/xen_console.c
+++ b/hw/xen_console.c
@@ -28,14 +28,13 @@
 #include <termios.h>
 #include <stdarg.h>
 #include <sys/mman.h>
-#include <xs.h>
-#include <xen/io/console.h>
-#include <xenctrl.h>
 
 #include "hw.h"
 #include "qemu-char.h"
 #include "xen_backend.h"
 
+#include <xen/io/console.h>
+
 struct buffer {
     uint8_t *data;
     size_t consumed;
diff --git a/hw/xen_devconfig.c b/hw/xen_devconfig.c
index 0928613b55..d83e8d0f64 100644
--- a/hw/xen_devconfig.c
+++ b/hw/xen_devconfig.c
@@ -123,19 +123,21 @@ int xen_config_dev_nic(NICInfo *nic)
 {
     char fe[256], be[256];
     char mac[20];
+    int vlan_id = -1;
 
+    net_hub_id_for_client(nic->netdev, &vlan_id);
     snprintf(mac, sizeof(mac), "%02x:%02x:%02x:%02x:%02x:%02x",
              nic->macaddr.a[0], nic->macaddr.a[1], nic->macaddr.a[2],
              nic->macaddr.a[3], nic->macaddr.a[4], nic->macaddr.a[5]);
-    xen_be_printf(NULL, 1, "config nic %d: mac=\"%s\"\n", nic->vlan->id, mac);
-    xen_config_dev_dirs("vif", "qnic", nic->vlan->id, fe, be, sizeof(fe));
+    xen_be_printf(NULL, 1, "config nic %d: mac=\"%s\"\n", vlan_id, mac);
+    xen_config_dev_dirs("vif", "qnic", vlan_id, fe, be, sizeof(fe));
 
     /* frontend */
-    xenstore_write_int(fe, "handle",     nic->vlan->id);
+    xenstore_write_int(fe, "handle",     vlan_id);
     xenstore_write_str(fe, "mac",        mac);
 
     /* backend */
-    xenstore_write_int(be, "handle",     nic->vlan->id);
+    xenstore_write_int(be, "handle",     vlan_id);
     xenstore_write_str(be, "mac",        mac);
 
     /* common stuff */
diff --git a/hw/xen_disk.c b/hw/xen_disk.c
index fb68ed9bbf..e6bb2f20b9 100644
--- a/hw/xen_disk.c
+++ b/hw/xen_disk.c
@@ -35,14 +35,10 @@
 #include <sys/mman.h>
 #include <sys/uio.h>
 
-#include <xs.h>
-#include <xenctrl.h>
-#include <xen/io/xenbus.h>
-
 #include "hw.h"
 #include "qemu-char.h"
-#include "xen_blkif.h"
 #include "xen_backend.h"
+#include "xen_blkif.h"
 #include "blockdev.h"
 
 /* ------------------------------------------------------------- */
diff --git a/hw/xen_nic.c b/hw/xen_nic.c
index 9a59bdad6e..8b79bfb73e 100644
--- a/hw/xen_nic.c
+++ b/hw/xen_nic.c
@@ -35,11 +35,6 @@
 #include <sys/mman.h>
 #include <sys/wait.h>
 
-#include <xs.h>
-#include <xenctrl.h>
-#include <xen/io/xenbus.h>
-#include <xen/io/netif.h>
-
 #include "hw.h"
 #include "net.h"
 #include "net/checksum.h"
@@ -47,6 +42,8 @@
 #include "qemu-char.h"
 #include "xen_backend.h"
 
+#include <xen/io/netif.h>
+
 /* ------------------------------------------------------------- */
 
 struct XenNetDev {
@@ -236,7 +233,7 @@ static void net_rx_response(struct XenNetDev *netdev,
 
 #define NET_IP_ALIGN 2
 
-static int net_rx_ok(VLANClientState *nc)
+static int net_rx_ok(NetClientState *nc)
 {
     struct XenNetDev *netdev = DO_UPCAST(NICState, nc, nc)->opaque;
     RING_IDX rc, rp;
@@ -257,7 +254,7 @@ static int net_rx_ok(VLANClientState *nc)
     return 1;
 }
 
-static ssize_t net_rx_packet(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     struct XenNetDev *netdev = DO_UPCAST(NICState, nc, nc)->opaque;
     netif_rx_request_t rxreq;
@@ -304,7 +301,7 @@ static ssize_t net_rx_packet(VLANClientState *nc, const uint8_t *buf, size_t siz
 /* ------------------------------------------------------------- */
 
 static NetClientInfo net_xen_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = net_rx_ok,
     .receive = net_rx_packet,
@@ -328,7 +325,6 @@ static int net_init(struct XenDevice *xendev)
         return -1;
     }
 
-    netdev->conf.vlan = qemu_find_vlan(netdev->xendev.dev, 1);
     netdev->conf.peer = NULL;
 
     netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf,
@@ -410,7 +406,7 @@ static void net_disconnect(struct XenDevice *xendev)
         netdev->rxs = NULL;
     }
     if (netdev->nic) {
-        qemu_del_vlan_client(&netdev->nic->nc);
+        qemu_del_net_client(&netdev->nic->nc);
         netdev->nic = NULL;
     }
 }
diff --git a/hw/xen_platform.c b/hw/xen_platform.c
index 0214f370b2..c1fe984f07 100644
--- a/hw/xen_platform.c
+++ b/hw/xen_platform.c
@@ -83,7 +83,7 @@ static void log_writeb(PCIXenPlatformState *s, char val)
 #define UNPLUG_ALL_NICS 2
 #define UNPLUG_AUX_IDE_DISKS 4
 
-static void unplug_nic(PCIBus *b, PCIDevice *d)
+static void unplug_nic(PCIBus *b, PCIDevice *d, void *o)
 {
     if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
             PCI_CLASS_NETWORK_ETHERNET) {
@@ -96,10 +96,10 @@ static void unplug_nic(PCIBus *b, PCIDevice *d)
 
 static void pci_unplug_nics(PCIBus *bus)
 {
-    pci_for_each_device(bus, 0, unplug_nic);
+    pci_for_each_device(bus, 0, unplug_nic, NULL);
 }
 
-static void unplug_disks(PCIBus *b, PCIDevice *d)
+static void unplug_disks(PCIBus *b, PCIDevice *d, void *o)
 {
     if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
             PCI_CLASS_STORAGE_IDE) {
@@ -109,7 +109,7 @@ static void unplug_disks(PCIBus *b, PCIDevice *d)
 
 static void pci_unplug_disks(PCIBus *bus)
 {
-    pci_for_each_device(bus, 0, unplug_disks);
+    pci_for_each_device(bus, 0, unplug_disks, NULL);
 }
 
 static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val)
diff --git a/hw/xen_pt.c b/hw/xen_pt.c
new file mode 100644
index 0000000000..307119a12f
--- /dev/null
+++ b/hw/xen_pt.c
@@ -0,0 +1,849 @@
+/*
+ * Copyright (c) 2007, Neocleus Corporation.
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Alex Novik <alex@neocleus.com>
+ * Allen Kay <allen.m.kay@intel.com>
+ * Guy Zana <guy@neocleus.com>
+ *
+ * This file implements direct PCI assignment to a HVM guest
+ */
+
+/*
+ * Interrupt Disable policy:
+ *
+ * INTx interrupt:
+ *   Initialize(register_real_device)
+ *     Map INTx(xc_physdev_map_pirq):
+ *       <fail>
+ *         - Set real Interrupt Disable bit to '1'.
+ *         - Set machine_irq and assigned_device->machine_irq to '0'.
+ *         * Don't bind INTx.
+ *
+ *     Bind INTx(xc_domain_bind_pt_pci_irq):
+ *       <fail>
+ *         - Set real Interrupt Disable bit to '1'.
+ *         - Unmap INTx.
+ *         - Decrement xen_pt_mapped_machine_irq[machine_irq]
+ *         - Set assigned_device->machine_irq to '0'.
+ *
+ *   Write to Interrupt Disable bit by guest software(xen_pt_cmd_reg_write)
+ *     Write '0'
+ *       - Set real bit to '0' if assigned_device->machine_irq isn't '0'.
+ *
+ *     Write '1'
+ *       - Set real bit to '1'.
+ *
+ * MSI interrupt:
+ *   Initialize MSI register(xen_pt_msi_setup, xen_pt_msi_update)
+ *     Bind MSI(xc_domain_update_msi_irq)
+ *       <fail>
+ *         - Unmap MSI.
+ *         - Set dev->msi->pirq to '-1'.
+ *
+ * MSI-X interrupt:
+ *   Initialize MSI-X register(xen_pt_msix_update_one)
+ *     Bind MSI-X(xc_domain_update_msi_irq)
+ *       <fail>
+ *         - Unmap MSI-X.
+ *         - Set entry->pirq to '-1'.
+ */
+
+#include <sys/ioctl.h>
+
+#include "pci.h"
+#include "xen.h"
+#include "xen_backend.h"
+#include "xen_pt.h"
+#include "range.h"
+
+#define XEN_PT_NR_IRQS (256)
+static uint8_t xen_pt_mapped_machine_irq[XEN_PT_NR_IRQS] = {0};
+
+void xen_pt_log(const PCIDevice *d, const char *f, ...)
+{
+    va_list ap;
+
+    va_start(ap, f);
+    if (d) {
+        fprintf(stderr, "[%02x:%02x.%d] ", pci_bus_num(d->bus),
+                PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
+    }
+    vfprintf(stderr, f, ap);
+    va_end(ap);
+}
+
+/* Config Space */
+
+static int xen_pt_pci_config_access_check(PCIDevice *d, uint32_t addr, int len)
+{
+    /* check offset range */
+    if (addr >= 0xFF) {
+        XEN_PT_ERR(d, "Failed to access register with offset exceeding 0xFF. "
+                   "(addr: 0x%02x, len: %d)\n", addr, len);
+        return -1;
+    }
+
+    /* check read size */
+    if ((len != 1) && (len != 2) && (len != 4)) {
+        XEN_PT_ERR(d, "Failed to access register with invalid access length. "
+                   "(addr: 0x%02x, len: %d)\n", addr, len);
+        return -1;
+    }
+
+    /* check offset alignment */
+    if (addr & (len - 1)) {
+        XEN_PT_ERR(d, "Failed to access register with invalid access size "
+                   "alignment. (addr: 0x%02x, len: %d)\n", addr, len);
+        return -1;
+    }
+
+    return 0;
+}
+
+int xen_pt_bar_offset_to_index(uint32_t offset)
+{
+    int index = 0;
+
+    /* check Exp ROM BAR */
+    if (offset == PCI_ROM_ADDRESS) {
+        return PCI_ROM_SLOT;
+    }
+
+    /* calculate BAR index */
+    index = (offset - PCI_BASE_ADDRESS_0) >> 2;
+    if (index >= PCI_NUM_REGIONS) {
+        return -1;
+    }
+
+    return index;
+}
+
+static uint32_t xen_pt_pci_read_config(PCIDevice *d, uint32_t addr, int len)
+{
+    XenPCIPassthroughState *s = DO_UPCAST(XenPCIPassthroughState, dev, d);
+    uint32_t val = 0;
+    XenPTRegGroup *reg_grp_entry = NULL;
+    XenPTReg *reg_entry = NULL;
+    int rc = 0;
+    int emul_len = 0;
+    uint32_t find_addr = addr;
+
+    if (xen_pt_pci_config_access_check(d, addr, len)) {
+        goto exit;
+    }
+
+    /* find register group entry */
+    reg_grp_entry = xen_pt_find_reg_grp(s, addr);
+    if (reg_grp_entry) {
+        /* check 0-Hardwired register group */
+        if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) {
+            /* no need to emulate, just return 0 */
+            val = 0;
+            goto exit;
+        }
+    }
+
+    /* read I/O device register value */
+    rc = xen_host_pci_get_block(&s->real_device, addr, (uint8_t *)&val, len);
+    if (rc < 0) {
+        XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc);
+        memset(&val, 0xff, len);
+    }
+
+    /* just return the I/O device register value for
+     * passthrough type register group */
+    if (reg_grp_entry == NULL) {
+        goto exit;
+    }
+
+    /* adjust the read value to appropriate CFC-CFF window */
+    val <<= (addr & 3) << 3;
+    emul_len = len;
+
+    /* loop around the guest requested size */
+    while (emul_len > 0) {
+        /* find register entry to be emulated */
+        reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr);
+        if (reg_entry) {
+            XenPTRegInfo *reg = reg_entry->reg;
+            uint32_t real_offset = reg_grp_entry->base_offset + reg->offset;
+            uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3);
+            uint8_t *ptr_val = NULL;
+
+            valid_mask <<= (find_addr - real_offset) << 3;
+            ptr_val = (uint8_t *)&val + (real_offset & 3);
+
+            /* do emulation based on register size */
+            switch (reg->size) {
+            case 1:
+                if (reg->u.b.read) {
+                    rc = reg->u.b.read(s, reg_entry, ptr_val, valid_mask);
+                }
+                break;
+            case 2:
+                if (reg->u.w.read) {
+                    rc = reg->u.w.read(s, reg_entry,
+                                       (uint16_t *)ptr_val, valid_mask);
+                }
+                break;
+            case 4:
+                if (reg->u.dw.read) {
+                    rc = reg->u.dw.read(s, reg_entry,
+                                        (uint32_t *)ptr_val, valid_mask);
+                }
+                break;
+            }
+
+            if (rc < 0) {
+                xen_shutdown_fatal_error("Internal error: Invalid read "
+                                         "emulation. (%s, rc: %d)\n",
+                                         __func__, rc);
+                return 0;
+            }
+
+            /* calculate next address to find */
+            emul_len -= reg->size;
+            if (emul_len > 0) {
+                find_addr = real_offset + reg->size;
+            }
+        } else {
+            /* nothing to do with passthrough type register,
+             * continue to find next byte */
+            emul_len--;
+            find_addr++;
+        }
+    }
+
+    /* need to shift back before returning them to pci bus emulator */
+    val >>= ((addr & 3) << 3);
+
+exit:
+    XEN_PT_LOG_CONFIG(d, addr, val, len);
+    return val;
+}
+
+static void xen_pt_pci_write_config(PCIDevice *d, uint32_t addr,
+                                    uint32_t val, int len)
+{
+    XenPCIPassthroughState *s = DO_UPCAST(XenPCIPassthroughState, dev, d);
+    int index = 0;
+    XenPTRegGroup *reg_grp_entry = NULL;
+    int rc = 0;
+    uint32_t read_val = 0;
+    int emul_len = 0;
+    XenPTReg *reg_entry = NULL;
+    uint32_t find_addr = addr;
+    XenPTRegInfo *reg = NULL;
+
+    if (xen_pt_pci_config_access_check(d, addr, len)) {
+        return;
+    }
+
+    XEN_PT_LOG_CONFIG(d, addr, val, len);
+
+    /* check unused BAR register */
+    index = xen_pt_bar_offset_to_index(addr);
+    if ((index >= 0) && (val > 0 && val < XEN_PT_BAR_ALLF) &&
+        (s->bases[index].bar_flag == XEN_PT_BAR_FLAG_UNUSED)) {
+        XEN_PT_WARN(d, "Guest attempt to set address to unused Base Address "
+                    "Register. (addr: 0x%02x, len: %d)\n", addr, len);
+    }
+
+    /* find register group entry */
+    reg_grp_entry = xen_pt_find_reg_grp(s, addr);
+    if (reg_grp_entry) {
+        /* check 0-Hardwired register group */
+        if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) {
+            /* ignore silently */
+            XEN_PT_WARN(d, "Access to 0-Hardwired register. "
+                        "(addr: 0x%02x, len: %d)\n", addr, len);
+            return;
+        }
+    }
+
+    rc = xen_host_pci_get_block(&s->real_device, addr,
+                                (uint8_t *)&read_val, len);
+    if (rc < 0) {
+        XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc);
+        memset(&read_val, 0xff, len);
+    }
+
+    /* pass directly to the real device for passthrough type register group */
+    if (reg_grp_entry == NULL) {
+        goto out;
+    }
+
+    memory_region_transaction_begin();
+    pci_default_write_config(d, addr, val, len);
+
+    /* adjust the read and write value to appropriate CFC-CFF window */
+    read_val <<= (addr & 3) << 3;
+    val <<= (addr & 3) << 3;
+    emul_len = len;
+
+    /* loop around the guest requested size */
+    while (emul_len > 0) {
+        /* find register entry to be emulated */
+        reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr);
+        if (reg_entry) {
+            reg = reg_entry->reg;
+            uint32_t real_offset = reg_grp_entry->base_offset + reg->offset;
+            uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3);
+            uint8_t *ptr_val = NULL;
+
+            valid_mask <<= (find_addr - real_offset) << 3;
+            ptr_val = (uint8_t *)&val + (real_offset & 3);
+
+            /* do emulation based on register size */
+            switch (reg->size) {
+            case 1:
+                if (reg->u.b.write) {
+                    rc = reg->u.b.write(s, reg_entry, ptr_val,
+                                        read_val >> ((real_offset & 3) << 3),
+                                        valid_mask);
+                }
+                break;
+            case 2:
+                if (reg->u.w.write) {
+                    rc = reg->u.w.write(s, reg_entry, (uint16_t *)ptr_val,
+                                        (read_val >> ((real_offset & 3) << 3)),
+                                        valid_mask);
+                }
+                break;
+            case 4:
+                if (reg->u.dw.write) {
+                    rc = reg->u.dw.write(s, reg_entry, (uint32_t *)ptr_val,
+                                         (read_val >> ((real_offset & 3) << 3)),
+                                         valid_mask);
+                }
+                break;
+            }
+
+            if (rc < 0) {
+                xen_shutdown_fatal_error("Internal error: Invalid write"
+                                         " emulation. (%s, rc: %d)\n",
+                                         __func__, rc);
+                return;
+            }
+
+            /* calculate next address to find */
+            emul_len -= reg->size;
+            if (emul_len > 0) {
+                find_addr = real_offset + reg->size;
+            }
+        } else {
+            /* nothing to do with passthrough type register,
+             * continue to find next byte */
+            emul_len--;
+            find_addr++;
+        }
+    }
+
+    /* need to shift back before passing them to xen_host_pci_device */
+    val >>= (addr & 3) << 3;
+
+    memory_region_transaction_commit();
+
+out:
+    if (!(reg && reg->no_wb)) {
+        /* unknown regs are passed through */
+        rc = xen_host_pci_set_block(&s->real_device, addr,
+                                    (uint8_t *)&val, len);
+
+        if (rc < 0) {
+            XEN_PT_ERR(d, "pci_write_block failed. return value: %d.\n", rc);
+        }
+    }
+}
+
+/* register regions */
+
+static uint64_t xen_pt_bar_read(void *o, target_phys_addr_t addr,
+                                unsigned size)
+{
+    PCIDevice *d = o;
+    /* if this function is called, that probably means that there is a
+     * misconfiguration of the IOMMU. */
+    XEN_PT_ERR(d, "Should not read BAR through QEMU. @0x"TARGET_FMT_plx"\n",
+               addr);
+    return 0;
+}
+static void xen_pt_bar_write(void *o, target_phys_addr_t addr, uint64_t val,
+                             unsigned size)
+{
+    PCIDevice *d = o;
+    /* Same comment as xen_pt_bar_read function */
+    XEN_PT_ERR(d, "Should not write BAR through QEMU. @0x"TARGET_FMT_plx"\n",
+               addr);
+}
+
+static const MemoryRegionOps ops = {
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .read = xen_pt_bar_read,
+    .write = xen_pt_bar_write,
+};
+
+static int xen_pt_register_regions(XenPCIPassthroughState *s)
+{
+    int i = 0;
+    XenHostPCIDevice *d = &s->real_device;
+
+    /* Register PIO/MMIO BARs */
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        XenHostPCIIORegion *r = &d->io_regions[i];
+        uint8_t type;
+
+        if (r->base_addr == 0 || r->size == 0) {
+            continue;
+        }
+
+        s->bases[i].access.u = r->base_addr;
+
+        if (r->type & XEN_HOST_PCI_REGION_TYPE_IO) {
+            type = PCI_BASE_ADDRESS_SPACE_IO;
+        } else {
+            type = PCI_BASE_ADDRESS_SPACE_MEMORY;
+            if (r->type & XEN_HOST_PCI_REGION_TYPE_PREFETCH) {
+                type |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+            }
+        }
+
+        memory_region_init_io(&s->bar[i], &ops, &s->dev,
+                              "xen-pci-pt-bar", r->size);
+        pci_register_bar(&s->dev, i, type, &s->bar[i]);
+
+        XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%08"PRIx64
+                   " base_addr=0x%08"PRIx64" type: %#x)\n",
+                   i, r->size, r->base_addr, type);
+    }
+
+    /* Register expansion ROM address */
+    if (d->rom.base_addr && d->rom.size) {
+        uint32_t bar_data = 0;
+
+        /* Re-set BAR reported by OS, otherwise ROM can't be read. */
+        if (xen_host_pci_get_long(d, PCI_ROM_ADDRESS, &bar_data)) {
+            return 0;
+        }
+        if ((bar_data & PCI_ROM_ADDRESS_MASK) == 0) {
+            bar_data |= d->rom.base_addr & PCI_ROM_ADDRESS_MASK;
+            xen_host_pci_set_long(d, PCI_ROM_ADDRESS, bar_data);
+        }
+
+        s->bases[PCI_ROM_SLOT].access.maddr = d->rom.base_addr;
+
+        memory_region_init_rom_device(&s->rom, NULL, NULL,
+                                      "xen-pci-pt-rom", d->rom.size);
+        pci_register_bar(&s->dev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_MEM_PREFETCH,
+                         &s->rom);
+
+        XEN_PT_LOG(&s->dev, "Expansion ROM registered (size=0x%08"PRIx64
+                   " base_addr=0x%08"PRIx64")\n",
+                   d->rom.size, d->rom.base_addr);
+    }
+
+    return 0;
+}
+
+static void xen_pt_unregister_regions(XenPCIPassthroughState *s)
+{
+    XenHostPCIDevice *d = &s->real_device;
+    int i;
+
+    for (i = 0; i < PCI_NUM_REGIONS - 1; i++) {
+        XenHostPCIIORegion *r = &d->io_regions[i];
+
+        if (r->base_addr == 0 || r->size == 0) {
+            continue;
+        }
+
+        memory_region_destroy(&s->bar[i]);
+    }
+    if (d->rom.base_addr && d->rom.size) {
+        memory_region_destroy(&s->rom);
+    }
+}
+
+/* region mapping */
+
+static int xen_pt_bar_from_region(XenPCIPassthroughState *s, MemoryRegion *mr)
+{
+    int i = 0;
+
+    for (i = 0; i < PCI_NUM_REGIONS - 1; i++) {
+        if (mr == &s->bar[i]) {
+            return i;
+        }
+    }
+    if (mr == &s->rom) {
+        return PCI_ROM_SLOT;
+    }
+    return -1;
+}
+
+/*
+ * This function checks if an io_region overlaps an io_region from another
+ * device.  The io_region to check is provided with (addr, size and type)
+ * A callback can be provided and will be called for every region that is
+ * overlapped.
+ * The return value indicates if the region is overlappsed */
+struct CheckBarArgs {
+    XenPCIPassthroughState *s;
+    pcibus_t addr;
+    pcibus_t size;
+    uint8_t type;
+    bool rc;
+};
+static void xen_pt_check_bar_overlap(PCIBus *bus, PCIDevice *d, void *opaque)
+{
+    struct CheckBarArgs *arg = opaque;
+    XenPCIPassthroughState *s = arg->s;
+    uint8_t type = arg->type;
+    int i;
+
+    if (d->devfn == s->dev.devfn) {
+        return;
+    }
+
+    /* xxx: This ignores bridges. */
+    for (i = 0; i < PCI_NUM_REGIONS; i++) {
+        const PCIIORegion *r = &d->io_regions[i];
+
+        if (!r->size) {
+            continue;
+        }
+        if ((type & PCI_BASE_ADDRESS_SPACE_IO)
+            != (r->type & PCI_BASE_ADDRESS_SPACE_IO)) {
+            continue;
+        }
+
+        if (ranges_overlap(arg->addr, arg->size, r->addr, r->size)) {
+            XEN_PT_WARN(&s->dev,
+                        "Overlapped to device [%02x:%02x.%d] Region: %i"
+                        " (addr: %#"FMT_PCIBUS", len: %#"FMT_PCIBUS")\n",
+                        pci_bus_num(bus), PCI_SLOT(d->devfn),
+                        PCI_FUNC(d->devfn), i, r->addr, r->size);
+            arg->rc = true;
+        }
+    }
+}
+
+static void xen_pt_region_update(XenPCIPassthroughState *s,
+                                 MemoryRegionSection *sec, bool adding)
+{
+    PCIDevice *d = &s->dev;
+    MemoryRegion *mr = sec->mr;
+    int bar = -1;
+    int rc;
+    int op = adding ? DPCI_ADD_MAPPING : DPCI_REMOVE_MAPPING;
+    struct CheckBarArgs args = {
+        .s = s,
+        .addr = sec->offset_within_address_space,
+        .size = sec->size,
+        .rc = false,
+    };
+
+    bar = xen_pt_bar_from_region(s, mr);
+    if (bar == -1 && (!s->msix || &s->msix->mmio != mr)) {
+        return;
+    }
+
+    if (s->msix && &s->msix->mmio == mr) {
+        if (adding) {
+            s->msix->mmio_base_addr = sec->offset_within_address_space;
+            rc = xen_pt_msix_update_remap(s, s->msix->bar_index);
+        }
+        return;
+    }
+
+    args.type = d->io_regions[bar].type;
+    pci_for_each_device(d->bus, pci_bus_num(d->bus),
+                        xen_pt_check_bar_overlap, &args);
+    if (args.rc) {
+        XEN_PT_WARN(d, "Region: %d (addr: %#"FMT_PCIBUS
+                    ", len: %#"FMT_PCIBUS") is overlapped.\n",
+                    bar, sec->offset_within_address_space, sec->size);
+    }
+
+    if (d->io_regions[bar].type & PCI_BASE_ADDRESS_SPACE_IO) {
+        uint32_t guest_port = sec->offset_within_address_space;
+        uint32_t machine_port = s->bases[bar].access.pio_base;
+        uint32_t size = sec->size;
+        rc = xc_domain_ioport_mapping(xen_xc, xen_domid,
+                                      guest_port, machine_port, size,
+                                      op);
+        if (rc) {
+            XEN_PT_ERR(d, "%s ioport mapping failed! (rc: %i)\n",
+                       adding ? "create new" : "remove old", rc);
+        }
+    } else {
+        pcibus_t guest_addr = sec->offset_within_address_space;
+        pcibus_t machine_addr = s->bases[bar].access.maddr
+            + sec->offset_within_region;
+        pcibus_t size = sec->size;
+        rc = xc_domain_memory_mapping(xen_xc, xen_domid,
+                                      XEN_PFN(guest_addr + XC_PAGE_SIZE - 1),
+                                      XEN_PFN(machine_addr + XC_PAGE_SIZE - 1),
+                                      XEN_PFN(size + XC_PAGE_SIZE - 1),
+                                      op);
+        if (rc) {
+            XEN_PT_ERR(d, "%s mem mapping failed! (rc: %i)\n",
+                       adding ? "create new" : "remove old", rc);
+        }
+    }
+}
+
+static void xen_pt_begin(MemoryListener *l)
+{
+}
+
+static void xen_pt_commit(MemoryListener *l)
+{
+}
+
+static void xen_pt_region_add(MemoryListener *l, MemoryRegionSection *sec)
+{
+    XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
+                                             memory_listener);
+
+    xen_pt_region_update(s, sec, true);
+}
+
+static void xen_pt_region_del(MemoryListener *l, MemoryRegionSection *sec)
+{
+    XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
+                                             memory_listener);
+
+    xen_pt_region_update(s, sec, false);
+}
+
+static void xen_pt_region_nop(MemoryListener *l, MemoryRegionSection *s)
+{
+}
+
+static void xen_pt_log_fns(MemoryListener *l, MemoryRegionSection *s)
+{
+}
+
+static void xen_pt_log_global_fns(MemoryListener *l)
+{
+}
+
+static void xen_pt_eventfd_fns(MemoryListener *l, MemoryRegionSection *s,
+                               bool match_data, uint64_t data, EventNotifier *n)
+{
+}
+
+static const MemoryListener xen_pt_memory_listener = {
+    .begin = xen_pt_begin,
+    .commit = xen_pt_commit,
+    .region_add = xen_pt_region_add,
+    .region_nop = xen_pt_region_nop,
+    .region_del = xen_pt_region_del,
+    .log_start = xen_pt_log_fns,
+    .log_stop = xen_pt_log_fns,
+    .log_sync = xen_pt_log_fns,
+    .log_global_start = xen_pt_log_global_fns,
+    .log_global_stop = xen_pt_log_global_fns,
+    .eventfd_add = xen_pt_eventfd_fns,
+    .eventfd_del = xen_pt_eventfd_fns,
+    .priority = 10,
+};
+
+/* init */
+
+static int xen_pt_initfn(PCIDevice *d)
+{
+    XenPCIPassthroughState *s = DO_UPCAST(XenPCIPassthroughState, dev, d);
+    int rc = 0;
+    uint8_t machine_irq = 0;
+    int pirq = XEN_PT_UNASSIGNED_PIRQ;
+
+    /* register real device */
+    XEN_PT_LOG(d, "Assigning real physical device %02x:%02x.%d"
+               " to devfn %#x\n",
+               s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function,
+               s->dev.devfn);
+
+    rc = xen_host_pci_device_get(&s->real_device,
+                                 s->hostaddr.domain, s->hostaddr.bus,
+                                 s->hostaddr.slot, s->hostaddr.function);
+    if (rc) {
+        XEN_PT_ERR(d, "Failed to \"open\" the real pci device. rc: %i\n", rc);
+        return -1;
+    }
+
+    s->is_virtfn = s->real_device.is_virtfn;
+    if (s->is_virtfn) {
+        XEN_PT_LOG(d, "%04x:%02x:%02x.%d is a SR-IOV Virtual Function\n",
+                   s->real_device.domain, bus, slot, func);
+    }
+
+    /* Initialize virtualized PCI configuration (Extended 256 Bytes) */
+    if (xen_host_pci_get_block(&s->real_device, 0, d->config,
+                               PCI_CONFIG_SPACE_SIZE) == -1) {
+        xen_host_pci_device_put(&s->real_device);
+        return -1;
+    }
+
+    s->memory_listener = xen_pt_memory_listener;
+
+    /* Handle real device's MMIO/PIO BARs */
+    xen_pt_register_regions(s);
+
+    /* reinitialize each config register to be emulated */
+    if (xen_pt_config_init(s)) {
+        XEN_PT_ERR(d, "PCI Config space initialisation failed.\n");
+        xen_host_pci_device_put(&s->real_device);
+        return -1;
+    }
+
+    /* Bind interrupt */
+    if (!s->dev.config[PCI_INTERRUPT_PIN]) {
+        XEN_PT_LOG(d, "no pin interrupt\n");
+        goto out;
+    }
+
+    machine_irq = s->real_device.irq;
+    rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
+
+    if (rc < 0) {
+        XEN_PT_ERR(d, "Mapping machine irq %u to pirq %i failed, (rc: %d)\n",
+                   machine_irq, pirq, rc);
+
+        /* Disable PCI intx assertion (turn on bit10 of devctl) */
+        xen_host_pci_set_word(&s->real_device,
+                              PCI_COMMAND,
+                              pci_get_word(s->dev.config + PCI_COMMAND)
+                              | PCI_COMMAND_INTX_DISABLE);
+        machine_irq = 0;
+        s->machine_irq = 0;
+    } else {
+        machine_irq = pirq;
+        s->machine_irq = pirq;
+        xen_pt_mapped_machine_irq[machine_irq]++;
+    }
+
+    /* bind machine_irq to device */
+    if (machine_irq != 0) {
+        uint8_t e_intx = xen_pt_pci_intx(s);
+
+        rc = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, machine_irq,
+                                       pci_bus_num(d->bus),
+                                       PCI_SLOT(d->devfn),
+                                       e_intx);
+        if (rc < 0) {
+            XEN_PT_ERR(d, "Binding of interrupt %i failed! (rc: %d)\n",
+                       e_intx, rc);
+
+            /* Disable PCI intx assertion (turn on bit10 of devctl) */
+            xen_host_pci_set_word(&s->real_device, PCI_COMMAND,
+                                  *(uint16_t *)(&s->dev.config[PCI_COMMAND])
+                                  | PCI_COMMAND_INTX_DISABLE);
+            xen_pt_mapped_machine_irq[machine_irq]--;
+
+            if (xen_pt_mapped_machine_irq[machine_irq] == 0) {
+                if (xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq)) {
+                    XEN_PT_ERR(d, "Unmapping of machine interrupt %i failed!"
+                               " (rc: %d)\n", machine_irq, rc);
+                }
+            }
+            s->machine_irq = 0;
+        }
+    }
+
+out:
+    memory_listener_register(&s->memory_listener, NULL);
+    XEN_PT_LOG(d, "Real physical device %02x:%02x.%d registered successfuly!\n",
+               bus, slot, func);
+
+    return 0;
+}
+
+static void xen_pt_unregister_device(PCIDevice *d)
+{
+    XenPCIPassthroughState *s = DO_UPCAST(XenPCIPassthroughState, dev, d);
+    uint8_t machine_irq = s->machine_irq;
+    uint8_t intx = xen_pt_pci_intx(s);
+    int rc;
+
+    if (machine_irq) {
+        rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq,
+                                     PT_IRQ_TYPE_PCI,
+                                     pci_bus_num(d->bus),
+                                     PCI_SLOT(s->dev.devfn),
+                                     intx,
+                                     0 /* isa_irq */);
+        if (rc < 0) {
+            XEN_PT_ERR(d, "unbinding of interrupt INT%c failed."
+                       " (machine irq: %i, rc: %d)"
+                       " But bravely continuing on..\n",
+                       'a' + intx, machine_irq, rc);
+        }
+    }
+
+    if (s->msi) {
+        xen_pt_msi_disable(s);
+    }
+    if (s->msix) {
+        xen_pt_msix_disable(s);
+    }
+
+    if (machine_irq) {
+        xen_pt_mapped_machine_irq[machine_irq]--;
+
+        if (xen_pt_mapped_machine_irq[machine_irq] == 0) {
+            rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq);
+
+            if (rc < 0) {
+                XEN_PT_ERR(d, "unmapping of interrupt %i failed. (rc: %d)"
+                           " But bravely continuing on..\n",
+                           machine_irq, rc);
+            }
+        }
+    }
+
+    /* delete all emulated config registers */
+    xen_pt_config_delete(s);
+
+    xen_pt_unregister_regions(s);
+    memory_listener_unregister(&s->memory_listener);
+
+    xen_host_pci_device_put(&s->real_device);
+}
+
+static Property xen_pci_passthrough_properties[] = {
+    DEFINE_PROP_PCI_HOST_DEVADDR("hostaddr", XenPCIPassthroughState, hostaddr),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->init = xen_pt_initfn;
+    k->exit = xen_pt_unregister_device;
+    k->config_read = xen_pt_pci_read_config;
+    k->config_write = xen_pt_pci_write_config;
+    dc->desc = "Assign an host PCI device with Xen";
+    dc->props = xen_pci_passthrough_properties;
+};
+
+static TypeInfo xen_pci_passthrough_info = {
+    .name = "xen-pci-passthrough",
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(XenPCIPassthroughState),
+    .class_init = xen_pci_passthrough_class_init,
+};
+
+static void xen_pci_passthrough_register_types(void)
+{
+    type_register_static(&xen_pci_passthrough_info);
+}
+
+type_init(xen_pci_passthrough_register_types)
diff --git a/hw/xen_pt.h b/hw/xen_pt.h
new file mode 100644
index 0000000000..41904ece93
--- /dev/null
+++ b/hw/xen_pt.h
@@ -0,0 +1,301 @@
+#ifndef XEN_PT_H
+#define XEN_PT_H
+
+#include "qemu-common.h"
+#include "xen_common.h"
+#include "pci.h"
+#include "xen-host-pci-device.h"
+
+void xen_pt_log(const PCIDevice *d, const char *f, ...) GCC_FMT_ATTR(2, 3);
+
+#define XEN_PT_ERR(d, _f, _a...) xen_pt_log(d, "%s: Error: "_f, __func__, ##_a)
+
+#ifdef XEN_PT_LOGGING_ENABLED
+#  define XEN_PT_LOG(d, _f, _a...)  xen_pt_log(d, "%s: " _f, __func__, ##_a)
+#  define XEN_PT_WARN(d, _f, _a...) \
+    xen_pt_log(d, "%s: Warning: "_f, __func__, ##_a)
+#else
+#  define XEN_PT_LOG(d, _f, _a...)
+#  define XEN_PT_WARN(d, _f, _a...)
+#endif
+
+#ifdef XEN_PT_DEBUG_PCI_CONFIG_ACCESS
+#  define XEN_PT_LOG_CONFIG(d, addr, val, len) \
+    xen_pt_log(d, "%s: address=0x%04x val=0x%08x len=%d\n", \
+               __func__, addr, val, len)
+#else
+#  define XEN_PT_LOG_CONFIG(d, addr, val, len)
+#endif
+
+
+/* Helper */
+#define XEN_PFN(x) ((x) >> XC_PAGE_SHIFT)
+
+typedef struct XenPTRegInfo XenPTRegInfo;
+typedef struct XenPTReg XenPTReg;
+
+typedef struct XenPCIPassthroughState XenPCIPassthroughState;
+
+/* function type for config reg */
+typedef int (*xen_pt_conf_reg_init)
+    (XenPCIPassthroughState *, XenPTRegInfo *, uint32_t real_offset,
+     uint32_t *data);
+typedef int (*xen_pt_conf_dword_write)
+    (XenPCIPassthroughState *, XenPTReg *cfg_entry,
+     uint32_t *val, uint32_t dev_value, uint32_t valid_mask);
+typedef int (*xen_pt_conf_word_write)
+    (XenPCIPassthroughState *, XenPTReg *cfg_entry,
+     uint16_t *val, uint16_t dev_value, uint16_t valid_mask);
+typedef int (*xen_pt_conf_byte_write)
+    (XenPCIPassthroughState *, XenPTReg *cfg_entry,
+     uint8_t *val, uint8_t dev_value, uint8_t valid_mask);
+typedef int (*xen_pt_conf_dword_read)
+    (XenPCIPassthroughState *, XenPTReg *cfg_entry,
+     uint32_t *val, uint32_t valid_mask);
+typedef int (*xen_pt_conf_word_read)
+    (XenPCIPassthroughState *, XenPTReg *cfg_entry,
+     uint16_t *val, uint16_t valid_mask);
+typedef int (*xen_pt_conf_byte_read)
+    (XenPCIPassthroughState *, XenPTReg *cfg_entry,
+     uint8_t *val, uint8_t valid_mask);
+
+#define XEN_PT_BAR_ALLF 0xFFFFFFFF
+#define XEN_PT_BAR_UNMAPPED (-1)
+
+#define PCI_CAP_MAX 48
+
+
+typedef enum {
+    XEN_PT_GRP_TYPE_HARDWIRED = 0,  /* 0 Hardwired reg group */
+    XEN_PT_GRP_TYPE_EMU,            /* emul reg group */
+} XenPTRegisterGroupType;
+
+typedef enum {
+    XEN_PT_BAR_FLAG_MEM = 0,        /* Memory type BAR */
+    XEN_PT_BAR_FLAG_IO,             /* I/O type BAR */
+    XEN_PT_BAR_FLAG_UPPER,          /* upper 64bit BAR */
+    XEN_PT_BAR_FLAG_UNUSED,         /* unused BAR */
+} XenPTBarFlag;
+
+
+typedef struct XenPTRegion {
+    /* BAR flag */
+    XenPTBarFlag bar_flag;
+    /* Translation of the emulated address */
+    union {
+        uint64_t maddr;
+        uint64_t pio_base;
+        uint64_t u;
+    } access;
+} XenPTRegion;
+
+/* XenPTRegInfo declaration
+ * - only for emulated register (either a part or whole bit).
+ * - for passthrough register that need special behavior (like interacting with
+ *   other component), set emu_mask to all 0 and specify r/w func properly.
+ * - do NOT use ALL F for init_val, otherwise the tbl will not be registered.
+ */
+
+/* emulated register infomation */
+struct XenPTRegInfo {
+    uint32_t offset;
+    uint32_t size;
+    uint32_t init_val;
+    /* reg read only field mask (ON:RO/ROS, OFF:other) */
+    uint32_t ro_mask;
+    /* reg emulate field mask (ON:emu, OFF:passthrough) */
+    uint32_t emu_mask;
+    /* no write back allowed */
+    uint32_t no_wb;
+    xen_pt_conf_reg_init init;
+    /* read/write function pointer
+     * for double_word/word/byte size */
+    union {
+        struct {
+            xen_pt_conf_dword_write write;
+            xen_pt_conf_dword_read read;
+        } dw;
+        struct {
+            xen_pt_conf_word_write write;
+            xen_pt_conf_word_read read;
+        } w;
+        struct {
+            xen_pt_conf_byte_write write;
+            xen_pt_conf_byte_read read;
+        } b;
+    } u;
+};
+
+/* emulated register management */
+struct XenPTReg {
+    QLIST_ENTRY(XenPTReg) entries;
+    XenPTRegInfo *reg;
+    uint32_t data; /* emulated value */
+};
+
+typedef struct XenPTRegGroupInfo XenPTRegGroupInfo;
+
+/* emul reg group size initialize method */
+typedef int (*xen_pt_reg_size_init_fn)
+    (XenPCIPassthroughState *, const XenPTRegGroupInfo *,
+     uint32_t base_offset, uint8_t *size);
+
+/* emulated register group infomation */
+struct XenPTRegGroupInfo {
+    uint8_t grp_id;
+    XenPTRegisterGroupType grp_type;
+    uint8_t grp_size;
+    xen_pt_reg_size_init_fn size_init;
+    XenPTRegInfo *emu_regs;
+};
+
+/* emul register group management table */
+typedef struct XenPTRegGroup {
+    QLIST_ENTRY(XenPTRegGroup) entries;
+    const XenPTRegGroupInfo *reg_grp;
+    uint32_t base_offset;
+    uint8_t size;
+    QLIST_HEAD(, XenPTReg) reg_tbl_list;
+} XenPTRegGroup;
+
+
+#define XEN_PT_UNASSIGNED_PIRQ (-1)
+typedef struct XenPTMSI {
+    uint16_t flags;
+    uint32_t addr_lo;  /* guest message address */
+    uint32_t addr_hi;  /* guest message upper address */
+    uint16_t data;     /* guest message data */
+    uint32_t ctrl_offset; /* saved control offset */
+    int pirq;          /* guest pirq corresponding */
+    bool initialized;  /* when guest MSI is initialized */
+    bool mapped;       /* when pirq is mapped */
+} XenPTMSI;
+
+typedef struct XenPTMSIXEntry {
+    int pirq;
+    uint64_t addr;
+    uint32_t data;
+    uint32_t vector_ctrl;
+    bool updated; /* indicate whether MSI ADDR or DATA is updated */
+} XenPTMSIXEntry;
+typedef struct XenPTMSIX {
+    uint32_t ctrl_offset;
+    bool enabled;
+    int total_entries;
+    int bar_index;
+    uint64_t table_base;
+    uint32_t table_offset_adjust; /* page align mmap */
+    uint64_t mmio_base_addr;
+    MemoryRegion mmio;
+    void *phys_iomem_base;
+    XenPTMSIXEntry msix_entry[0];
+} XenPTMSIX;
+
+struct XenPCIPassthroughState {
+    PCIDevice dev;
+
+    PCIHostDeviceAddress hostaddr;
+    bool is_virtfn;
+    XenHostPCIDevice real_device;
+    XenPTRegion bases[PCI_NUM_REGIONS]; /* Access regions */
+    QLIST_HEAD(, XenPTRegGroup) reg_grps;
+
+    uint32_t machine_irq;
+
+    XenPTMSI *msi;
+    XenPTMSIX *msix;
+
+    MemoryRegion bar[PCI_NUM_REGIONS - 1];
+    MemoryRegion rom;
+
+    MemoryListener memory_listener;
+};
+
+int xen_pt_config_init(XenPCIPassthroughState *s);
+void xen_pt_config_delete(XenPCIPassthroughState *s);
+XenPTRegGroup *xen_pt_find_reg_grp(XenPCIPassthroughState *s, uint32_t address);
+XenPTReg *xen_pt_find_reg(XenPTRegGroup *reg_grp, uint32_t address);
+int xen_pt_bar_offset_to_index(uint32_t offset);
+
+static inline pcibus_t xen_pt_get_emul_size(XenPTBarFlag flag, pcibus_t r_size)
+{
+    /* align resource size (memory type only) */
+    if (flag == XEN_PT_BAR_FLAG_MEM) {
+        return (r_size + XC_PAGE_SIZE - 1) & XC_PAGE_MASK;
+    } else {
+        return r_size;
+    }
+}
+
+/* INTx */
+/* The PCI Local Bus Specification, Rev. 3.0,
+ * Section 6.2.4 Miscellaneous Registers, pp 223
+ * outlines 5 valid values for the interrupt pin (intx).
+ *  0: For devices (or device functions) that don't use an interrupt in
+ *  1: INTA#
+ *  2: INTB#
+ *  3: INTC#
+ *  4: INTD#
+ *
+ * Xen uses the following 4 values for intx
+ *  0: INTA#
+ *  1: INTB#
+ *  2: INTC#
+ *  3: INTD#
+ *
+ * Observing that these list of values are not the same, xen_pt_pci_read_intx()
+ * uses the following mapping from hw to xen values.
+ * This seems to reflect the current usage within Xen.
+ *
+ * PCI hardware    | Xen | Notes
+ * ----------------+-----+----------------------------------------------------
+ * 0               | 0   | No interrupt
+ * 1               | 0   | INTA#
+ * 2               | 1   | INTB#
+ * 3               | 2   | INTC#
+ * 4               | 3   | INTD#
+ * any other value | 0   | This should never happen, log error message
+ */
+
+static inline uint8_t xen_pt_pci_read_intx(XenPCIPassthroughState *s)
+{
+    uint8_t v = 0;
+    xen_host_pci_get_byte(&s->real_device, PCI_INTERRUPT_PIN, &v);
+    return v;
+}
+
+static inline uint8_t xen_pt_pci_intx(XenPCIPassthroughState *s)
+{
+    uint8_t r_val = xen_pt_pci_read_intx(s);
+
+    XEN_PT_LOG(&s->dev, "intx=%i\n", r_val);
+    if (r_val < 1 || r_val > 4) {
+        XEN_PT_LOG(&s->dev, "Interrupt pin read from hardware is out of range:"
+                   " value=%i, acceptable range is 1 - 4\n", r_val);
+        r_val = 0;
+    } else {
+        r_val -= 1;
+    }
+
+    return r_val;
+}
+
+/* MSI/MSI-X */
+int xen_pt_msi_set_enable(XenPCIPassthroughState *s, bool en);
+int xen_pt_msi_setup(XenPCIPassthroughState *s);
+int xen_pt_msi_update(XenPCIPassthroughState *d);
+void xen_pt_msi_disable(XenPCIPassthroughState *s);
+
+int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t base);
+void xen_pt_msix_delete(XenPCIPassthroughState *s);
+int xen_pt_msix_update(XenPCIPassthroughState *s);
+int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index);
+void xen_pt_msix_disable(XenPCIPassthroughState *s);
+
+static inline bool xen_pt_has_msix_mapping(XenPCIPassthroughState *s, int bar)
+{
+    return s->msix && s->msix->bar_index == bar;
+}
+
+
+#endif /* !XEN_PT_H */
diff --git a/hw/xen_pt_config_init.c b/hw/xen_pt_config_init.c
new file mode 100644
index 0000000000..00eb3d997d
--- /dev/null
+++ b/hw/xen_pt_config_init.c
@@ -0,0 +1,1869 @@
+/*
+ * Copyright (c) 2007, Neocleus Corporation.
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Alex Novik <alex@neocleus.com>
+ * Allen Kay <allen.m.kay@intel.com>
+ * Guy Zana <guy@neocleus.com>
+ *
+ * This file implements direct PCI assignment to a HVM guest
+ */
+
+#include "qemu-timer.h"
+#include "xen_backend.h"
+#include "xen_pt.h"
+
+#define XEN_PT_MERGE_VALUE(value, data, val_mask) \
+    (((value) & (val_mask)) | ((data) & ~(val_mask)))
+
+#define XEN_PT_INVALID_REG          0xFFFFFFFF      /* invalid register value */
+
+/* prototype */
+
+static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg,
+                               uint32_t real_offset, uint32_t *data);
+
+
+/* helper */
+
+/* A return value of 1 means the capability should NOT be exposed to guest. */
+static int xen_pt_hide_dev_cap(const XenHostPCIDevice *d, uint8_t grp_id)
+{
+    switch (grp_id) {
+    case PCI_CAP_ID_EXP:
+        /* The PCI Express Capability Structure of the VF of Intel 82599 10GbE
+         * Controller looks trivial, e.g., the PCI Express Capabilities
+         * Register is 0. We should not try to expose it to guest.
+         *
+         * The datasheet is available at
+         * http://download.intel.com/design/network/datashts/82599_datasheet.pdf
+         *
+         * See 'Table 9.7. VF PCIe Configuration Space' of the datasheet, the
+         * PCI Express Capability Structure of the VF of Intel 82599 10GbE
+         * Controller looks trivial, e.g., the PCI Express Capabilities
+         * Register is 0, so the Capability Version is 0 and
+         * xen_pt_pcie_size_init() would fail.
+         */
+        if (d->vendor_id == PCI_VENDOR_ID_INTEL &&
+            d->device_id == PCI_DEVICE_ID_INTEL_82599_SFP_VF) {
+            return 1;
+        }
+        break;
+    }
+    return 0;
+}
+
+/*   find emulate register group entry */
+XenPTRegGroup *xen_pt_find_reg_grp(XenPCIPassthroughState *s, uint32_t address)
+{
+    XenPTRegGroup *entry = NULL;
+
+    /* find register group entry */
+    QLIST_FOREACH(entry, &s->reg_grps, entries) {
+        /* check address */
+        if ((entry->base_offset <= address)
+            && ((entry->base_offset + entry->size) > address)) {
+            return entry;
+        }
+    }
+
+    /* group entry not found */
+    return NULL;
+}
+
+/* find emulate register entry */
+XenPTReg *xen_pt_find_reg(XenPTRegGroup *reg_grp, uint32_t address)
+{
+    XenPTReg *reg_entry = NULL;
+    XenPTRegInfo *reg = NULL;
+    uint32_t real_offset = 0;
+
+    /* find register entry */
+    QLIST_FOREACH(reg_entry, &reg_grp->reg_tbl_list, entries) {
+        reg = reg_entry->reg;
+        real_offset = reg_grp->base_offset + reg->offset;
+        /* check address */
+        if ((real_offset <= address)
+            && ((real_offset + reg->size) > address)) {
+            return reg_entry;
+        }
+    }
+
+    return NULL;
+}
+
+
+/****************
+ * general register functions
+ */
+
+/* register initialization function */
+
+static int xen_pt_common_reg_init(XenPCIPassthroughState *s,
+                                  XenPTRegInfo *reg, uint32_t real_offset,
+                                  uint32_t *data)
+{
+    *data = reg->init_val;
+    return 0;
+}
+
+/* Read register functions */
+
+static int xen_pt_byte_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                uint8_t *value, uint8_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint8_t valid_emu_mask = 0;
+
+    /* emulate byte register */
+    valid_emu_mask = reg->emu_mask & valid_mask;
+    *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask);
+
+    return 0;
+}
+static int xen_pt_word_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                uint16_t *value, uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint16_t valid_emu_mask = 0;
+
+    /* emulate word register */
+    valid_emu_mask = reg->emu_mask & valid_mask;
+    *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask);
+
+    return 0;
+}
+static int xen_pt_long_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                uint32_t *value, uint32_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint32_t valid_emu_mask = 0;
+
+    /* emulate long register */
+    valid_emu_mask = reg->emu_mask & valid_mask;
+    *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask);
+
+    return 0;
+}
+
+/* Write register functions */
+
+static int xen_pt_byte_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                 uint8_t *val, uint8_t dev_value,
+                                 uint8_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint8_t writable_mask = 0;
+    uint8_t throughable_mask = 0;
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    return 0;
+}
+static int xen_pt_word_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                 uint16_t *val, uint16_t dev_value,
+                                 uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint16_t writable_mask = 0;
+    uint16_t throughable_mask = 0;
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    return 0;
+}
+static int xen_pt_long_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                 uint32_t *val, uint32_t dev_value,
+                                 uint32_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint32_t writable_mask = 0;
+    uint32_t throughable_mask = 0;
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    return 0;
+}
+
+
+/* XenPTRegInfo declaration
+ * - only for emulated register (either a part or whole bit).
+ * - for passthrough register that need special behavior (like interacting with
+ *   other component), set emu_mask to all 0 and specify r/w func properly.
+ * - do NOT use ALL F for init_val, otherwise the tbl will not be registered.
+ */
+
+/********************
+ * Header Type0
+ */
+
+static int xen_pt_vendor_reg_init(XenPCIPassthroughState *s,
+                                  XenPTRegInfo *reg, uint32_t real_offset,
+                                  uint32_t *data)
+{
+    *data = s->real_device.vendor_id;
+    return 0;
+}
+static int xen_pt_device_reg_init(XenPCIPassthroughState *s,
+                                  XenPTRegInfo *reg, uint32_t real_offset,
+                                  uint32_t *data)
+{
+    *data = s->real_device.device_id;
+    return 0;
+}
+static int xen_pt_status_reg_init(XenPCIPassthroughState *s,
+                                  XenPTRegInfo *reg, uint32_t real_offset,
+                                  uint32_t *data)
+{
+    XenPTRegGroup *reg_grp_entry = NULL;
+    XenPTReg *reg_entry = NULL;
+    uint32_t reg_field = 0;
+
+    /* find Header register group */
+    reg_grp_entry = xen_pt_find_reg_grp(s, PCI_CAPABILITY_LIST);
+    if (reg_grp_entry) {
+        /* find Capabilities Pointer register */
+        reg_entry = xen_pt_find_reg(reg_grp_entry, PCI_CAPABILITY_LIST);
+        if (reg_entry) {
+            /* check Capabilities Pointer register */
+            if (reg_entry->data) {
+                reg_field |= PCI_STATUS_CAP_LIST;
+            } else {
+                reg_field &= ~PCI_STATUS_CAP_LIST;
+            }
+        } else {
+            xen_shutdown_fatal_error("Internal error: Couldn't find XenPTReg*"
+                                     " for Capabilities Pointer register."
+                                     " (%s)\n", __func__);
+            return -1;
+        }
+    } else {
+        xen_shutdown_fatal_error("Internal error: Couldn't find XenPTRegGroup"
+                                 " for Header. (%s)\n", __func__);
+        return -1;
+    }
+
+    *data = reg_field;
+    return 0;
+}
+static int xen_pt_header_type_reg_init(XenPCIPassthroughState *s,
+                                       XenPTRegInfo *reg, uint32_t real_offset,
+                                       uint32_t *data)
+{
+    /* read PCI_HEADER_TYPE */
+    *data = reg->init_val | 0x80;
+    return 0;
+}
+
+/* initialize Interrupt Pin register */
+static int xen_pt_irqpin_reg_init(XenPCIPassthroughState *s,
+                                  XenPTRegInfo *reg, uint32_t real_offset,
+                                  uint32_t *data)
+{
+    *data = xen_pt_pci_read_intx(s);
+    return 0;
+}
+
+/* Command register */
+static int xen_pt_cmd_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                               uint16_t *value, uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint16_t valid_emu_mask = 0;
+    uint16_t emu_mask = reg->emu_mask;
+
+    if (s->is_virtfn) {
+        emu_mask |= PCI_COMMAND_MEMORY;
+    }
+
+    /* emulate word register */
+    valid_emu_mask = emu_mask & valid_mask;
+    *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask);
+
+    return 0;
+}
+static int xen_pt_cmd_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                uint16_t *val, uint16_t dev_value,
+                                uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint16_t writable_mask = 0;
+    uint16_t throughable_mask = 0;
+    uint16_t emu_mask = reg->emu_mask;
+
+    if (s->is_virtfn) {
+        emu_mask |= PCI_COMMAND_MEMORY;
+    }
+
+    /* modify emulate register */
+    writable_mask = ~reg->ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~emu_mask & valid_mask;
+
+    if (*val & PCI_COMMAND_INTX_DISABLE) {
+        throughable_mask |= PCI_COMMAND_INTX_DISABLE;
+    } else {
+        if (s->machine_irq) {
+            throughable_mask |= PCI_COMMAND_INTX_DISABLE;
+        }
+    }
+
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    return 0;
+}
+
+/* BAR */
+#define XEN_PT_BAR_MEM_RO_MASK    0x0000000F  /* BAR ReadOnly mask(Memory) */
+#define XEN_PT_BAR_MEM_EMU_MASK   0xFFFFFFF0  /* BAR emul mask(Memory) */
+#define XEN_PT_BAR_IO_RO_MASK     0x00000003  /* BAR ReadOnly mask(I/O) */
+#define XEN_PT_BAR_IO_EMU_MASK    0xFFFFFFFC  /* BAR emul mask(I/O) */
+
+static XenPTBarFlag xen_pt_bar_reg_parse(XenPCIPassthroughState *s,
+                                         XenPTRegInfo *reg)
+{
+    PCIDevice *d = &s->dev;
+    XenPTRegion *region = NULL;
+    PCIIORegion *r;
+    int index = 0;
+
+    /* check 64bit BAR */
+    index = xen_pt_bar_offset_to_index(reg->offset);
+    if ((0 < index) && (index < PCI_ROM_SLOT)) {
+        int type = s->real_device.io_regions[index - 1].type;
+
+        if ((type & XEN_HOST_PCI_REGION_TYPE_MEM)
+            && (type & XEN_HOST_PCI_REGION_TYPE_MEM_64)) {
+            region = &s->bases[index - 1];
+            if (region->bar_flag != XEN_PT_BAR_FLAG_UPPER) {
+                return XEN_PT_BAR_FLAG_UPPER;
+            }
+        }
+    }
+
+    /* check unused BAR */
+    r = &d->io_regions[index];
+    if (r->size == 0) {
+        return XEN_PT_BAR_FLAG_UNUSED;
+    }
+
+    /* for ExpROM BAR */
+    if (index == PCI_ROM_SLOT) {
+        return XEN_PT_BAR_FLAG_MEM;
+    }
+
+    /* check BAR I/O indicator */
+    if (s->real_device.io_regions[index].type & XEN_HOST_PCI_REGION_TYPE_IO) {
+        return XEN_PT_BAR_FLAG_IO;
+    } else {
+        return XEN_PT_BAR_FLAG_MEM;
+    }
+}
+
+static inline uint32_t base_address_with_flags(XenHostPCIIORegion *hr)
+{
+    if (hr->type & XEN_HOST_PCI_REGION_TYPE_IO) {
+        return hr->base_addr | (hr->bus_flags & ~PCI_BASE_ADDRESS_IO_MASK);
+    } else {
+        return hr->base_addr | (hr->bus_flags & ~PCI_BASE_ADDRESS_MEM_MASK);
+    }
+}
+
+static int xen_pt_bar_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg,
+                               uint32_t real_offset, uint32_t *data)
+{
+    uint32_t reg_field = 0;
+    int index;
+
+    index = xen_pt_bar_offset_to_index(reg->offset);
+    if (index < 0 || index >= PCI_NUM_REGIONS) {
+        XEN_PT_ERR(&s->dev, "Internal error: Invalid BAR index [%d].\n", index);
+        return -1;
+    }
+
+    /* set BAR flag */
+    s->bases[index].bar_flag = xen_pt_bar_reg_parse(s, reg);
+    if (s->bases[index].bar_flag == XEN_PT_BAR_FLAG_UNUSED) {
+        reg_field = XEN_PT_INVALID_REG;
+    }
+
+    *data = reg_field;
+    return 0;
+}
+static int xen_pt_bar_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                               uint32_t *value, uint32_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint32_t valid_emu_mask = 0;
+    uint32_t bar_emu_mask = 0;
+    int index;
+
+    /* get BAR index */
+    index = xen_pt_bar_offset_to_index(reg->offset);
+    if (index < 0 || index >= PCI_NUM_REGIONS) {
+        XEN_PT_ERR(&s->dev, "Internal error: Invalid BAR index [%d].\n", index);
+        return -1;
+    }
+
+    /* use fixed-up value from kernel sysfs */
+    *value = base_address_with_flags(&s->real_device.io_regions[index]);
+
+    /* set emulate mask depend on BAR flag */
+    switch (s->bases[index].bar_flag) {
+    case XEN_PT_BAR_FLAG_MEM:
+        bar_emu_mask = XEN_PT_BAR_MEM_EMU_MASK;
+        break;
+    case XEN_PT_BAR_FLAG_IO:
+        bar_emu_mask = XEN_PT_BAR_IO_EMU_MASK;
+        break;
+    case XEN_PT_BAR_FLAG_UPPER:
+        bar_emu_mask = XEN_PT_BAR_ALLF;
+        break;
+    default:
+        break;
+    }
+
+    /* emulate BAR */
+    valid_emu_mask = bar_emu_mask & valid_mask;
+    *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask);
+
+    return 0;
+}
+static int xen_pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                uint32_t *val, uint32_t dev_value,
+                                uint32_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    XenPTRegion *base = NULL;
+    PCIDevice *d = &s->dev;
+    const PCIIORegion *r;
+    uint32_t writable_mask = 0;
+    uint32_t throughable_mask = 0;
+    uint32_t bar_emu_mask = 0;
+    uint32_t bar_ro_mask = 0;
+    uint32_t r_size = 0;
+    int index = 0;
+
+    index = xen_pt_bar_offset_to_index(reg->offset);
+    if (index < 0 || index >= PCI_NUM_REGIONS) {
+        XEN_PT_ERR(d, "Internal error: Invalid BAR index [%d].\n", index);
+        return -1;
+    }
+
+    r = &d->io_regions[index];
+    base = &s->bases[index];
+    r_size = xen_pt_get_emul_size(base->bar_flag, r->size);
+
+    /* set emulate mask and read-only mask values depend on the BAR flag */
+    switch (s->bases[index].bar_flag) {
+    case XEN_PT_BAR_FLAG_MEM:
+        bar_emu_mask = XEN_PT_BAR_MEM_EMU_MASK;
+        bar_ro_mask = XEN_PT_BAR_MEM_RO_MASK | (r_size - 1);
+        break;
+    case XEN_PT_BAR_FLAG_IO:
+        bar_emu_mask = XEN_PT_BAR_IO_EMU_MASK;
+        bar_ro_mask = XEN_PT_BAR_IO_RO_MASK | (r_size - 1);
+        break;
+    case XEN_PT_BAR_FLAG_UPPER:
+        bar_emu_mask = XEN_PT_BAR_ALLF;
+        bar_ro_mask = 0;    /* all upper 32bit are R/W */
+        break;
+    default:
+        break;
+    }
+
+    /* modify emulate register */
+    writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+
+    /* check whether we need to update the virtual region address or not */
+    switch (s->bases[index].bar_flag) {
+    case XEN_PT_BAR_FLAG_MEM:
+        /* nothing to do */
+        break;
+    case XEN_PT_BAR_FLAG_IO:
+        /* nothing to do */
+        break;
+    case XEN_PT_BAR_FLAG_UPPER:
+        if (cfg_entry->data) {
+            if (cfg_entry->data != (XEN_PT_BAR_ALLF & ~bar_ro_mask)) {
+                XEN_PT_WARN(d, "Guest attempt to set high MMIO Base Address. "
+                            "Ignore mapping. "
+                            "(offset: 0x%02x, high address: 0x%08x)\n",
+                            reg->offset, cfg_entry->data);
+            }
+        }
+        break;
+    default:
+        break;
+    }
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~bar_emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    return 0;
+}
+
+/* write Exp ROM BAR */
+static int xen_pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s,
+                                        XenPTReg *cfg_entry, uint32_t *val,
+                                        uint32_t dev_value, uint32_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    XenPTRegion *base = NULL;
+    PCIDevice *d = (PCIDevice *)&s->dev;
+    uint32_t writable_mask = 0;
+    uint32_t throughable_mask = 0;
+    pcibus_t r_size = 0;
+    uint32_t bar_emu_mask = 0;
+    uint32_t bar_ro_mask = 0;
+
+    r_size = d->io_regions[PCI_ROM_SLOT].size;
+    base = &s->bases[PCI_ROM_SLOT];
+    /* align memory type resource size */
+    r_size = xen_pt_get_emul_size(base->bar_flag, r_size);
+
+    /* set emulate mask and read-only mask */
+    bar_emu_mask = reg->emu_mask;
+    bar_ro_mask = (reg->ro_mask | (r_size - 1)) & ~PCI_ROM_ADDRESS_ENABLE;
+
+    /* modify emulate register */
+    writable_mask = ~bar_ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~bar_emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    return 0;
+}
+
+/* Header Type0 reg static infomation table */
+static XenPTRegInfo xen_pt_emu_reg_header0[] = {
+    /* Vendor ID reg */
+    {
+        .offset     = PCI_VENDOR_ID,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0xFFFF,
+        .emu_mask   = 0xFFFF,
+        .init       = xen_pt_vendor_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_word_reg_write,
+    },
+    /* Device ID reg */
+    {
+        .offset     = PCI_DEVICE_ID,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0xFFFF,
+        .emu_mask   = 0xFFFF,
+        .init       = xen_pt_device_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_word_reg_write,
+    },
+    /* Command reg */
+    {
+        .offset     = PCI_COMMAND,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0xF880,
+        .emu_mask   = 0x0740,
+        .init       = xen_pt_common_reg_init,
+        .u.w.read   = xen_pt_cmd_reg_read,
+        .u.w.write  = xen_pt_cmd_reg_write,
+    },
+    /* Capabilities Pointer reg */
+    {
+        .offset     = PCI_CAPABILITY_LIST,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_ptr_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    /* Status reg */
+    /* use emulated Cap Ptr value to initialize,
+     * so need to be declared after Cap Ptr reg
+     */
+    {
+        .offset     = PCI_STATUS,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0x06FF,
+        .emu_mask   = 0x0010,
+        .init       = xen_pt_status_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_word_reg_write,
+    },
+    /* Cache Line Size reg */
+    {
+        .offset     = PCI_CACHE_LINE_SIZE,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0x00,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_common_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    /* Latency Timer reg */
+    {
+        .offset     = PCI_LATENCY_TIMER,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0x00,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_common_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    /* Header Type reg */
+    {
+        .offset     = PCI_HEADER_TYPE,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0x00,
+        .init       = xen_pt_header_type_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    /* Interrupt Line reg */
+    {
+        .offset     = PCI_INTERRUPT_LINE,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0x00,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_common_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    /* Interrupt Pin reg */
+    {
+        .offset     = PCI_INTERRUPT_PIN,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_irqpin_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    /* BAR 0 reg */
+    /* mask of BAR need to be decided later, depends on IO/MEM type */
+    {
+        .offset     = PCI_BASE_ADDRESS_0,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = xen_pt_bar_reg_init,
+        .u.dw.read  = xen_pt_bar_reg_read,
+        .u.dw.write = xen_pt_bar_reg_write,
+    },
+    /* BAR 1 reg */
+    {
+        .offset     = PCI_BASE_ADDRESS_1,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = xen_pt_bar_reg_init,
+        .u.dw.read  = xen_pt_bar_reg_read,
+        .u.dw.write = xen_pt_bar_reg_write,
+    },
+    /* BAR 2 reg */
+    {
+        .offset     = PCI_BASE_ADDRESS_2,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = xen_pt_bar_reg_init,
+        .u.dw.read  = xen_pt_bar_reg_read,
+        .u.dw.write = xen_pt_bar_reg_write,
+    },
+    /* BAR 3 reg */
+    {
+        .offset     = PCI_BASE_ADDRESS_3,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = xen_pt_bar_reg_init,
+        .u.dw.read  = xen_pt_bar_reg_read,
+        .u.dw.write = xen_pt_bar_reg_write,
+    },
+    /* BAR 4 reg */
+    {
+        .offset     = PCI_BASE_ADDRESS_4,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = xen_pt_bar_reg_init,
+        .u.dw.read  = xen_pt_bar_reg_read,
+        .u.dw.write = xen_pt_bar_reg_write,
+    },
+    /* BAR 5 reg */
+    {
+        .offset     = PCI_BASE_ADDRESS_5,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = xen_pt_bar_reg_init,
+        .u.dw.read  = xen_pt_bar_reg_read,
+        .u.dw.write = xen_pt_bar_reg_write,
+    },
+    /* Expansion ROM BAR reg */
+    {
+        .offset     = PCI_ROM_ADDRESS,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .ro_mask    = 0x000007FE,
+        .emu_mask   = 0xFFFFF800,
+        .init       = xen_pt_bar_reg_init,
+        .u.dw.read  = xen_pt_long_reg_read,
+        .u.dw.write = xen_pt_exp_rom_bar_reg_write,
+    },
+    {
+        .size = 0,
+    },
+};
+
+
+/*********************************
+ * Vital Product Data Capability
+ */
+
+/* Vital Product Data Capability Structure reg static infomation table */
+static XenPTRegInfo xen_pt_emu_reg_vpd[] = {
+    {
+        .offset     = PCI_CAP_LIST_NEXT,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_ptr_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    {
+        .size = 0,
+    },
+};
+
+
+/**************************************
+ * Vendor Specific Capability
+ */
+
+/* Vendor Specific Capability Structure reg static infomation table */
+static XenPTRegInfo xen_pt_emu_reg_vendor[] = {
+    {
+        .offset     = PCI_CAP_LIST_NEXT,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_ptr_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    {
+        .size = 0,
+    },
+};
+
+
+/*****************************
+ * PCI Express Capability
+ */
+
+static inline uint8_t get_capability_version(XenPCIPassthroughState *s,
+                                             uint32_t offset)
+{
+    uint8_t flags = pci_get_byte(s->dev.config + offset + PCI_EXP_FLAGS);
+    return flags & PCI_EXP_FLAGS_VERS;
+}
+
+static inline uint8_t get_device_type(XenPCIPassthroughState *s,
+                                      uint32_t offset)
+{
+    uint8_t flags = pci_get_byte(s->dev.config + offset + PCI_EXP_FLAGS);
+    return (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+}
+
+/* initialize Link Control register */
+static int xen_pt_linkctrl_reg_init(XenPCIPassthroughState *s,
+                                    XenPTRegInfo *reg, uint32_t real_offset,
+                                    uint32_t *data)
+{
+    uint8_t cap_ver = get_capability_version(s, real_offset - reg->offset);
+    uint8_t dev_type = get_device_type(s, real_offset - reg->offset);
+
+    /* no need to initialize in case of Root Complex Integrated Endpoint
+     * with cap_ver 1.x
+     */
+    if ((dev_type == PCI_EXP_TYPE_RC_END) && (cap_ver == 1)) {
+        *data = XEN_PT_INVALID_REG;
+    }
+
+    *data = reg->init_val;
+    return 0;
+}
+/* initialize Device Control 2 register */
+static int xen_pt_devctrl2_reg_init(XenPCIPassthroughState *s,
+                                    XenPTRegInfo *reg, uint32_t real_offset,
+                                    uint32_t *data)
+{
+    uint8_t cap_ver = get_capability_version(s, real_offset - reg->offset);
+
+    /* no need to initialize in case of cap_ver 1.x */
+    if (cap_ver == 1) {
+        *data = XEN_PT_INVALID_REG;
+    }
+
+    *data = reg->init_val;
+    return 0;
+}
+/* initialize Link Control 2 register */
+static int xen_pt_linkctrl2_reg_init(XenPCIPassthroughState *s,
+                                     XenPTRegInfo *reg, uint32_t real_offset,
+                                     uint32_t *data)
+{
+    uint8_t cap_ver = get_capability_version(s, real_offset - reg->offset);
+    uint32_t reg_field = 0;
+
+    /* no need to initialize in case of cap_ver 1.x */
+    if (cap_ver == 1) {
+        reg_field = XEN_PT_INVALID_REG;
+    } else {
+        /* set Supported Link Speed */
+        uint8_t lnkcap = pci_get_byte(s->dev.config + real_offset - reg->offset
+                                      + PCI_EXP_LNKCAP);
+        reg_field |= PCI_EXP_LNKCAP_SLS & lnkcap;
+    }
+
+    *data = reg_field;
+    return 0;
+}
+
+/* PCI Express Capability Structure reg static infomation table */
+static XenPTRegInfo xen_pt_emu_reg_pcie[] = {
+    /* Next Pointer reg */
+    {
+        .offset     = PCI_CAP_LIST_NEXT,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_ptr_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    /* Device Capabilities reg */
+    {
+        .offset     = PCI_EXP_DEVCAP,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .ro_mask    = 0x1FFCFFFF,
+        .emu_mask   = 0x10000000,
+        .init       = xen_pt_common_reg_init,
+        .u.dw.read  = xen_pt_long_reg_read,
+        .u.dw.write = xen_pt_long_reg_write,
+    },
+    /* Device Control reg */
+    {
+        .offset     = PCI_EXP_DEVCTL,
+        .size       = 2,
+        .init_val   = 0x2810,
+        .ro_mask    = 0x8400,
+        .emu_mask   = 0xFFFF,
+        .init       = xen_pt_common_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_word_reg_write,
+    },
+    /* Link Control reg */
+    {
+        .offset     = PCI_EXP_LNKCTL,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0xFC34,
+        .emu_mask   = 0xFFFF,
+        .init       = xen_pt_linkctrl_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_word_reg_write,
+    },
+    /* Device Control 2 reg */
+    {
+        .offset     = 0x28,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0xFFE0,
+        .emu_mask   = 0xFFFF,
+        .init       = xen_pt_devctrl2_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_word_reg_write,
+    },
+    /* Link Control 2 reg */
+    {
+        .offset     = 0x30,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0xE040,
+        .emu_mask   = 0xFFFF,
+        .init       = xen_pt_linkctrl2_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_word_reg_write,
+    },
+    {
+        .size = 0,
+    },
+};
+
+
+/*********************************
+ * Power Management Capability
+ */
+
+/* read Power Management Control/Status register */
+static int xen_pt_pmcsr_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                 uint16_t *value, uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint16_t valid_emu_mask = reg->emu_mask;
+
+    valid_emu_mask |= PCI_PM_CTRL_STATE_MASK | PCI_PM_CTRL_NO_SOFT_RESET;
+
+    valid_emu_mask = valid_emu_mask & valid_mask;
+    *value = XEN_PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask);
+
+    return 0;
+}
+/* write Power Management Control/Status register */
+static int xen_pt_pmcsr_reg_write(XenPCIPassthroughState *s,
+                                  XenPTReg *cfg_entry, uint16_t *val,
+                                  uint16_t dev_value, uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint16_t emu_mask = reg->emu_mask;
+    uint16_t writable_mask = 0;
+    uint16_t throughable_mask = 0;
+
+    emu_mask |= PCI_PM_CTRL_STATE_MASK | PCI_PM_CTRL_NO_SOFT_RESET;
+
+    /* modify emulate register */
+    writable_mask = emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    return 0;
+}
+
+/* Power Management Capability reg static infomation table */
+static XenPTRegInfo xen_pt_emu_reg_pm[] = {
+    /* Next Pointer reg */
+    {
+        .offset     = PCI_CAP_LIST_NEXT,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_ptr_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    /* Power Management Capabilities reg */
+    {
+        .offset     = PCI_CAP_FLAGS,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0xFFFF,
+        .emu_mask   = 0xF9C8,
+        .init       = xen_pt_common_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_word_reg_write,
+    },
+    /* PCI Power Management Control/Status reg */
+    {
+        .offset     = PCI_PM_CTRL,
+        .size       = 2,
+        .init_val   = 0x0008,
+        .ro_mask    = 0xE1FC,
+        .emu_mask   = 0x8100,
+        .init       = xen_pt_common_reg_init,
+        .u.w.read   = xen_pt_pmcsr_reg_read,
+        .u.w.write  = xen_pt_pmcsr_reg_write,
+    },
+    {
+        .size = 0,
+    },
+};
+
+
+/********************************
+ * MSI Capability
+ */
+
+/* Helper */
+static bool xen_pt_msgdata_check_type(uint32_t offset, uint16_t flags)
+{
+    /* check the offset whether matches the type or not */
+    bool is_32 = (offset == PCI_MSI_DATA_32) && !(flags & PCI_MSI_FLAGS_64BIT);
+    bool is_64 = (offset == PCI_MSI_DATA_64) &&  (flags & PCI_MSI_FLAGS_64BIT);
+    return is_32 || is_64;
+}
+
+/* Message Control register */
+static int xen_pt_msgctrl_reg_init(XenPCIPassthroughState *s,
+                                   XenPTRegInfo *reg, uint32_t real_offset,
+                                   uint32_t *data)
+{
+    PCIDevice *d = &s->dev;
+    XenPTMSI *msi = s->msi;
+    uint16_t reg_field = 0;
+
+    /* use I/O device register's value as initial value */
+    reg_field = pci_get_word(d->config + real_offset);
+
+    if (reg_field & PCI_MSI_FLAGS_ENABLE) {
+        XEN_PT_LOG(&s->dev, "MSI already enabled, disabling it first\n");
+        xen_host_pci_set_word(&s->real_device, real_offset,
+                              reg_field & ~PCI_MSI_FLAGS_ENABLE);
+    }
+    msi->flags |= reg_field;
+    msi->ctrl_offset = real_offset;
+    msi->initialized = false;
+    msi->mapped = false;
+
+    *data = reg->init_val;
+    return 0;
+}
+static int xen_pt_msgctrl_reg_write(XenPCIPassthroughState *s,
+                                    XenPTReg *cfg_entry, uint16_t *val,
+                                    uint16_t dev_value, uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    XenPTMSI *msi = s->msi;
+    uint16_t writable_mask = 0;
+    uint16_t throughable_mask = 0;
+    uint16_t raw_val;
+
+    /* Currently no support for multi-vector */
+    if (*val & PCI_MSI_FLAGS_QSIZE) {
+        XEN_PT_WARN(&s->dev, "Tries to set more than 1 vector ctrl %x\n", *val);
+    }
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+    msi->flags |= cfg_entry->data & ~PCI_MSI_FLAGS_ENABLE;
+
+    /* create value for writing to I/O device register */
+    raw_val = *val;
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    /* update MSI */
+    if (raw_val & PCI_MSI_FLAGS_ENABLE) {
+        /* setup MSI pirq for the first time */
+        if (!msi->initialized) {
+            /* Init physical one */
+            XEN_PT_LOG(&s->dev, "setup MSI\n");
+            if (xen_pt_msi_setup(s)) {
+                /* We do not broadcast the error to the framework code, so
+                 * that MSI errors are contained in MSI emulation code and
+                 * QEMU can go on running.
+                 * Guest MSI would be actually not working.
+                 */
+                *val &= ~PCI_MSI_FLAGS_ENABLE;
+                XEN_PT_WARN(&s->dev, "Can not map MSI.\n");
+                return 0;
+            }
+            if (xen_pt_msi_update(s)) {
+                *val &= ~PCI_MSI_FLAGS_ENABLE;
+                XEN_PT_WARN(&s->dev, "Can not bind MSI\n");
+                return 0;
+            }
+            msi->initialized = true;
+            msi->mapped = true;
+        }
+        msi->flags |= PCI_MSI_FLAGS_ENABLE;
+    } else {
+        msi->flags &= ~PCI_MSI_FLAGS_ENABLE;
+    }
+
+    /* pass through MSI_ENABLE bit */
+    *val &= ~PCI_MSI_FLAGS_ENABLE;
+    *val |= raw_val & PCI_MSI_FLAGS_ENABLE;
+
+    return 0;
+}
+
+/* initialize Message Upper Address register */
+static int xen_pt_msgaddr64_reg_init(XenPCIPassthroughState *s,
+                                     XenPTRegInfo *reg, uint32_t real_offset,
+                                     uint32_t *data)
+{
+    /* no need to initialize in case of 32 bit type */
+    if (!(s->msi->flags & PCI_MSI_FLAGS_64BIT)) {
+        *data = XEN_PT_INVALID_REG;
+    } else {
+        *data = reg->init_val;
+    }
+
+    return 0;
+}
+/* this function will be called twice (for 32 bit and 64 bit type) */
+/* initialize Message Data register */
+static int xen_pt_msgdata_reg_init(XenPCIPassthroughState *s,
+                                   XenPTRegInfo *reg, uint32_t real_offset,
+                                   uint32_t *data)
+{
+    uint32_t flags = s->msi->flags;
+    uint32_t offset = reg->offset;
+
+    /* check the offset whether matches the type or not */
+    if (xen_pt_msgdata_check_type(offset, flags)) {
+        *data = reg->init_val;
+    } else {
+        *data = XEN_PT_INVALID_REG;
+    }
+    return 0;
+}
+
+/* write Message Address register */
+static int xen_pt_msgaddr32_reg_write(XenPCIPassthroughState *s,
+                                      XenPTReg *cfg_entry, uint32_t *val,
+                                      uint32_t dev_value, uint32_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint32_t writable_mask = 0;
+    uint32_t throughable_mask = 0;
+    uint32_t old_addr = cfg_entry->data;
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+    s->msi->addr_lo = cfg_entry->data;
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    /* update MSI */
+    if (cfg_entry->data != old_addr) {
+        if (s->msi->mapped) {
+            xen_pt_msi_update(s);
+        }
+    }
+
+    return 0;
+}
+/* write Message Upper Address register */
+static int xen_pt_msgaddr64_reg_write(XenPCIPassthroughState *s,
+                                      XenPTReg *cfg_entry, uint32_t *val,
+                                      uint32_t dev_value, uint32_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint32_t writable_mask = 0;
+    uint32_t throughable_mask = 0;
+    uint32_t old_addr = cfg_entry->data;
+
+    /* check whether the type is 64 bit or not */
+    if (!(s->msi->flags & PCI_MSI_FLAGS_64BIT)) {
+        XEN_PT_ERR(&s->dev,
+                   "Can't write to the upper address without 64 bit support\n");
+        return -1;
+    }
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+    /* update the msi_info too */
+    s->msi->addr_hi = cfg_entry->data;
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    /* update MSI */
+    if (cfg_entry->data != old_addr) {
+        if (s->msi->mapped) {
+            xen_pt_msi_update(s);
+        }
+    }
+
+    return 0;
+}
+
+
+/* this function will be called twice (for 32 bit and 64 bit type) */
+/* write Message Data register */
+static int xen_pt_msgdata_reg_write(XenPCIPassthroughState *s,
+                                    XenPTReg *cfg_entry, uint16_t *val,
+                                    uint16_t dev_value, uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    XenPTMSI *msi = s->msi;
+    uint16_t writable_mask = 0;
+    uint16_t throughable_mask = 0;
+    uint16_t old_data = cfg_entry->data;
+    uint32_t offset = reg->offset;
+
+    /* check the offset whether matches the type or not */
+    if (!xen_pt_msgdata_check_type(offset, msi->flags)) {
+        /* exit I/O emulator */
+        XEN_PT_ERR(&s->dev, "the offset does not match the 32/64 bit type!\n");
+        return -1;
+    }
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+    /* update the msi_info too */
+    msi->data = cfg_entry->data;
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    /* update MSI */
+    if (cfg_entry->data != old_data) {
+        if (msi->mapped) {
+            xen_pt_msi_update(s);
+        }
+    }
+
+    return 0;
+}
+
+/* MSI Capability Structure reg static infomation table */
+static XenPTRegInfo xen_pt_emu_reg_msi[] = {
+    /* Next Pointer reg */
+    {
+        .offset     = PCI_CAP_LIST_NEXT,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_ptr_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    /* Message Control reg */
+    {
+        .offset     = PCI_MSI_FLAGS,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0xFF8E,
+        .emu_mask   = 0x007F,
+        .init       = xen_pt_msgctrl_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_msgctrl_reg_write,
+    },
+    /* Message Address reg */
+    {
+        .offset     = PCI_MSI_ADDRESS_LO,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .ro_mask    = 0x00000003,
+        .emu_mask   = 0xFFFFFFFF,
+        .no_wb      = 1,
+        .init       = xen_pt_common_reg_init,
+        .u.dw.read  = xen_pt_long_reg_read,
+        .u.dw.write = xen_pt_msgaddr32_reg_write,
+    },
+    /* Message Upper Address reg (if PCI_MSI_FLAGS_64BIT set) */
+    {
+        .offset     = PCI_MSI_ADDRESS_HI,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .ro_mask    = 0x00000000,
+        .emu_mask   = 0xFFFFFFFF,
+        .no_wb      = 1,
+        .init       = xen_pt_msgaddr64_reg_init,
+        .u.dw.read  = xen_pt_long_reg_read,
+        .u.dw.write = xen_pt_msgaddr64_reg_write,
+    },
+    /* Message Data reg (16 bits of data for 32-bit devices) */
+    {
+        .offset     = PCI_MSI_DATA_32,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0x0000,
+        .emu_mask   = 0xFFFF,
+        .no_wb      = 1,
+        .init       = xen_pt_msgdata_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_msgdata_reg_write,
+    },
+    /* Message Data reg (16 bits of data for 64-bit devices) */
+    {
+        .offset     = PCI_MSI_DATA_64,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0x0000,
+        .emu_mask   = 0xFFFF,
+        .no_wb      = 1,
+        .init       = xen_pt_msgdata_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_msgdata_reg_write,
+    },
+    {
+        .size = 0,
+    },
+};
+
+
+/**************************************
+ * MSI-X Capability
+ */
+
+/* Message Control register for MSI-X */
+static int xen_pt_msixctrl_reg_init(XenPCIPassthroughState *s,
+                                    XenPTRegInfo *reg, uint32_t real_offset,
+                                    uint32_t *data)
+{
+    PCIDevice *d = &s->dev;
+    uint16_t reg_field = 0;
+
+    /* use I/O device register's value as initial value */
+    reg_field = pci_get_word(d->config + real_offset);
+
+    if (reg_field & PCI_MSIX_FLAGS_ENABLE) {
+        XEN_PT_LOG(d, "MSIX already enabled, disabling it first\n");
+        xen_host_pci_set_word(&s->real_device, real_offset,
+                              reg_field & ~PCI_MSIX_FLAGS_ENABLE);
+    }
+
+    s->msix->ctrl_offset = real_offset;
+
+    *data = reg->init_val;
+    return 0;
+}
+static int xen_pt_msixctrl_reg_write(XenPCIPassthroughState *s,
+                                     XenPTReg *cfg_entry, uint16_t *val,
+                                     uint16_t dev_value, uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint16_t writable_mask = 0;
+    uint16_t throughable_mask = 0;
+    int debug_msix_enabled_old;
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
+
+    /* update MSI-X */
+    if ((*val & PCI_MSIX_FLAGS_ENABLE)
+        && !(*val & PCI_MSIX_FLAGS_MASKALL)) {
+        xen_pt_msix_update(s);
+    }
+
+    debug_msix_enabled_old = s->msix->enabled;
+    s->msix->enabled = !!(*val & PCI_MSIX_FLAGS_ENABLE);
+    if (s->msix->enabled != debug_msix_enabled_old) {
+        XEN_PT_LOG(&s->dev, "%s MSI-X\n",
+                   s->msix->enabled ? "enable" : "disable");
+    }
+
+    return 0;
+}
+
+/* MSI-X Capability Structure reg static infomation table */
+static XenPTRegInfo xen_pt_emu_reg_msix[] = {
+    /* Next Pointer reg */
+    {
+        .offset     = PCI_CAP_LIST_NEXT,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0xFF,
+        .init       = xen_pt_ptr_reg_init,
+        .u.b.read   = xen_pt_byte_reg_read,
+        .u.b.write  = xen_pt_byte_reg_write,
+    },
+    /* Message Control reg */
+    {
+        .offset     = PCI_MSI_FLAGS,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0x3FFF,
+        .emu_mask   = 0x0000,
+        .init       = xen_pt_msixctrl_reg_init,
+        .u.w.read   = xen_pt_word_reg_read,
+        .u.w.write  = xen_pt_msixctrl_reg_write,
+    },
+    {
+        .size = 0,
+    },
+};
+
+
+/****************************
+ * Capabilities
+ */
+
+/* capability structure register group size functions */
+
+static int xen_pt_reg_grp_size_init(XenPCIPassthroughState *s,
+                                    const XenPTRegGroupInfo *grp_reg,
+                                    uint32_t base_offset, uint8_t *size)
+{
+    *size = grp_reg->grp_size;
+    return 0;
+}
+/* get Vendor Specific Capability Structure register group size */
+static int xen_pt_vendor_size_init(XenPCIPassthroughState *s,
+                                   const XenPTRegGroupInfo *grp_reg,
+                                   uint32_t base_offset, uint8_t *size)
+{
+    *size = pci_get_byte(s->dev.config + base_offset + 0x02);
+    return 0;
+}
+/* get PCI Express Capability Structure register group size */
+static int xen_pt_pcie_size_init(XenPCIPassthroughState *s,
+                                 const XenPTRegGroupInfo *grp_reg,
+                                 uint32_t base_offset, uint8_t *size)
+{
+    PCIDevice *d = &s->dev;
+    uint8_t version = get_capability_version(s, base_offset);
+    uint8_t type = get_device_type(s, base_offset);
+    uint8_t pcie_size = 0;
+
+
+    /* calculate size depend on capability version and device/port type */
+    /* in case of PCI Express Base Specification Rev 1.x */
+    if (version == 1) {
+        /* The PCI Express Capabilities, Device Capabilities, and Device
+         * Status/Control registers are required for all PCI Express devices.
+         * The Link Capabilities and Link Status/Control are required for all
+         * Endpoints that are not Root Complex Integrated Endpoints. Endpoints
+         * are not required to implement registers other than those listed
+         * above and terminate the capability structure.
+         */
+        switch (type) {
+        case PCI_EXP_TYPE_ENDPOINT:
+        case PCI_EXP_TYPE_LEG_END:
+            pcie_size = 0x14;
+            break;
+        case PCI_EXP_TYPE_RC_END:
+            /* has no link */
+            pcie_size = 0x0C;
+            break;
+            /* only EndPoint passthrough is supported */
+        case PCI_EXP_TYPE_ROOT_PORT:
+        case PCI_EXP_TYPE_UPSTREAM:
+        case PCI_EXP_TYPE_DOWNSTREAM:
+        case PCI_EXP_TYPE_PCI_BRIDGE:
+        case PCI_EXP_TYPE_PCIE_BRIDGE:
+        case PCI_EXP_TYPE_RC_EC:
+        default:
+            XEN_PT_ERR(d, "Unsupported device/port type %#x.\n", type);
+            return -1;
+        }
+    }
+    /* in case of PCI Express Base Specification Rev 2.0 */
+    else if (version == 2) {
+        switch (type) {
+        case PCI_EXP_TYPE_ENDPOINT:
+        case PCI_EXP_TYPE_LEG_END:
+        case PCI_EXP_TYPE_RC_END:
+            /* For Functions that do not implement the registers,
+             * these spaces must be hardwired to 0b.
+             */
+            pcie_size = 0x3C;
+            break;
+            /* only EndPoint passthrough is supported */
+        case PCI_EXP_TYPE_ROOT_PORT:
+        case PCI_EXP_TYPE_UPSTREAM:
+        case PCI_EXP_TYPE_DOWNSTREAM:
+        case PCI_EXP_TYPE_PCI_BRIDGE:
+        case PCI_EXP_TYPE_PCIE_BRIDGE:
+        case PCI_EXP_TYPE_RC_EC:
+        default:
+            XEN_PT_ERR(d, "Unsupported device/port type %#x.\n", type);
+            return -1;
+        }
+    } else {
+        XEN_PT_ERR(d, "Unsupported capability version %#x.\n", version);
+        return -1;
+    }
+
+    *size = pcie_size;
+    return 0;
+}
+/* get MSI Capability Structure register group size */
+static int xen_pt_msi_size_init(XenPCIPassthroughState *s,
+                                const XenPTRegGroupInfo *grp_reg,
+                                uint32_t base_offset, uint8_t *size)
+{
+    PCIDevice *d = &s->dev;
+    uint16_t msg_ctrl = 0;
+    uint8_t msi_size = 0xa;
+
+    msg_ctrl = pci_get_word(d->config + (base_offset + PCI_MSI_FLAGS));
+
+    /* check if 64-bit address is capable of per-vector masking */
+    if (msg_ctrl & PCI_MSI_FLAGS_64BIT) {
+        msi_size += 4;
+    }
+    if (msg_ctrl & PCI_MSI_FLAGS_MASKBIT) {
+        msi_size += 10;
+    }
+
+    s->msi = g_new0(XenPTMSI, 1);
+    s->msi->pirq = XEN_PT_UNASSIGNED_PIRQ;
+
+    *size = msi_size;
+    return 0;
+}
+/* get MSI-X Capability Structure register group size */
+static int xen_pt_msix_size_init(XenPCIPassthroughState *s,
+                                 const XenPTRegGroupInfo *grp_reg,
+                                 uint32_t base_offset, uint8_t *size)
+{
+    int rc = 0;
+
+    rc = xen_pt_msix_init(s, base_offset);
+
+    if (rc < 0) {
+        XEN_PT_ERR(&s->dev, "Internal error: Invalid xen_pt_msix_init.\n");
+        return rc;
+    }
+
+    *size = grp_reg->grp_size;
+    return 0;
+}
+
+
+static const XenPTRegGroupInfo xen_pt_emu_reg_grps[] = {
+    /* Header Type0 reg group */
+    {
+        .grp_id      = 0xFF,
+        .grp_type    = XEN_PT_GRP_TYPE_EMU,
+        .grp_size    = 0x40,
+        .size_init   = xen_pt_reg_grp_size_init,
+        .emu_regs = xen_pt_emu_reg_header0,
+    },
+    /* PCI PowerManagement Capability reg group */
+    {
+        .grp_id      = PCI_CAP_ID_PM,
+        .grp_type    = XEN_PT_GRP_TYPE_EMU,
+        .grp_size    = PCI_PM_SIZEOF,
+        .size_init   = xen_pt_reg_grp_size_init,
+        .emu_regs = xen_pt_emu_reg_pm,
+    },
+    /* AGP Capability Structure reg group */
+    {
+        .grp_id     = PCI_CAP_ID_AGP,
+        .grp_type   = XEN_PT_GRP_TYPE_HARDWIRED,
+        .grp_size   = 0x30,
+        .size_init  = xen_pt_reg_grp_size_init,
+    },
+    /* Vital Product Data Capability Structure reg group */
+    {
+        .grp_id      = PCI_CAP_ID_VPD,
+        .grp_type    = XEN_PT_GRP_TYPE_EMU,
+        .grp_size    = 0x08,
+        .size_init   = xen_pt_reg_grp_size_init,
+        .emu_regs = xen_pt_emu_reg_vpd,
+    },
+    /* Slot Identification reg group */
+    {
+        .grp_id     = PCI_CAP_ID_SLOTID,
+        .grp_type   = XEN_PT_GRP_TYPE_HARDWIRED,
+        .grp_size   = 0x04,
+        .size_init  = xen_pt_reg_grp_size_init,
+    },
+    /* MSI Capability Structure reg group */
+    {
+        .grp_id      = PCI_CAP_ID_MSI,
+        .grp_type    = XEN_PT_GRP_TYPE_EMU,
+        .grp_size    = 0xFF,
+        .size_init   = xen_pt_msi_size_init,
+        .emu_regs = xen_pt_emu_reg_msi,
+    },
+    /* PCI-X Capabilities List Item reg group */
+    {
+        .grp_id     = PCI_CAP_ID_PCIX,
+        .grp_type   = XEN_PT_GRP_TYPE_HARDWIRED,
+        .grp_size   = 0x18,
+        .size_init  = xen_pt_reg_grp_size_init,
+    },
+    /* Vendor Specific Capability Structure reg group */
+    {
+        .grp_id      = PCI_CAP_ID_VNDR,
+        .grp_type    = XEN_PT_GRP_TYPE_EMU,
+        .grp_size    = 0xFF,
+        .size_init   = xen_pt_vendor_size_init,
+        .emu_regs = xen_pt_emu_reg_vendor,
+    },
+    /* SHPC Capability List Item reg group */
+    {
+        .grp_id     = PCI_CAP_ID_SHPC,
+        .grp_type   = XEN_PT_GRP_TYPE_HARDWIRED,
+        .grp_size   = 0x08,
+        .size_init  = xen_pt_reg_grp_size_init,
+    },
+    /* Subsystem ID and Subsystem Vendor ID Capability List Item reg group */
+    {
+        .grp_id     = PCI_CAP_ID_SSVID,
+        .grp_type   = XEN_PT_GRP_TYPE_HARDWIRED,
+        .grp_size   = 0x08,
+        .size_init  = xen_pt_reg_grp_size_init,
+    },
+    /* AGP 8x Capability Structure reg group */
+    {
+        .grp_id     = PCI_CAP_ID_AGP3,
+        .grp_type   = XEN_PT_GRP_TYPE_HARDWIRED,
+        .grp_size   = 0x30,
+        .size_init  = xen_pt_reg_grp_size_init,
+    },
+    /* PCI Express Capability Structure reg group */
+    {
+        .grp_id      = PCI_CAP_ID_EXP,
+        .grp_type    = XEN_PT_GRP_TYPE_EMU,
+        .grp_size    = 0xFF,
+        .size_init   = xen_pt_pcie_size_init,
+        .emu_regs = xen_pt_emu_reg_pcie,
+    },
+    /* MSI-X Capability Structure reg group */
+    {
+        .grp_id      = PCI_CAP_ID_MSIX,
+        .grp_type    = XEN_PT_GRP_TYPE_EMU,
+        .grp_size    = 0x0C,
+        .size_init   = xen_pt_msix_size_init,
+        .emu_regs = xen_pt_emu_reg_msix,
+    },
+    {
+        .grp_size = 0,
+    },
+};
+
+/* initialize Capabilities Pointer or Next Pointer register */
+static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s,
+                               XenPTRegInfo *reg, uint32_t real_offset,
+                               uint32_t *data)
+{
+    int i;
+    uint8_t *config = s->dev.config;
+    uint32_t reg_field = pci_get_byte(config + real_offset);
+    uint8_t cap_id = 0;
+
+    /* find capability offset */
+    while (reg_field) {
+        for (i = 0; xen_pt_emu_reg_grps[i].grp_size != 0; i++) {
+            if (xen_pt_hide_dev_cap(&s->real_device,
+                                    xen_pt_emu_reg_grps[i].grp_id)) {
+                continue;
+            }
+
+            cap_id = pci_get_byte(config + reg_field + PCI_CAP_LIST_ID);
+            if (xen_pt_emu_reg_grps[i].grp_id == cap_id) {
+                if (xen_pt_emu_reg_grps[i].grp_type == XEN_PT_GRP_TYPE_EMU) {
+                    goto out;
+                }
+                /* ignore the 0 hardwired capability, find next one */
+                break;
+            }
+        }
+
+        /* next capability */
+        reg_field = pci_get_byte(config + reg_field + PCI_CAP_LIST_NEXT);
+    }
+
+out:
+    *data = reg_field;
+    return 0;
+}
+
+
+/*************
+ * Main
+ */
+
+static uint8_t find_cap_offset(XenPCIPassthroughState *s, uint8_t cap)
+{
+    uint8_t id;
+    unsigned max_cap = PCI_CAP_MAX;
+    uint8_t pos = PCI_CAPABILITY_LIST;
+    uint8_t status = 0;
+
+    if (xen_host_pci_get_byte(&s->real_device, PCI_STATUS, &status)) {
+        return 0;
+    }
+    if ((status & PCI_STATUS_CAP_LIST) == 0) {
+        return 0;
+    }
+
+    while (max_cap--) {
+        if (xen_host_pci_get_byte(&s->real_device, pos, &pos)) {
+            break;
+        }
+        if (pos < PCI_CONFIG_HEADER_SIZE) {
+            break;
+        }
+
+        pos &= ~3;
+        if (xen_host_pci_get_byte(&s->real_device,
+                                  pos + PCI_CAP_LIST_ID, &id)) {
+            break;
+        }
+
+        if (id == 0xff) {
+            break;
+        }
+        if (id == cap) {
+            return pos;
+        }
+
+        pos += PCI_CAP_LIST_NEXT;
+    }
+    return 0;
+}
+
+static int xen_pt_config_reg_init(XenPCIPassthroughState *s,
+                                  XenPTRegGroup *reg_grp, XenPTRegInfo *reg)
+{
+    XenPTReg *reg_entry;
+    uint32_t data = 0;
+    int rc = 0;
+
+    reg_entry = g_new0(XenPTReg, 1);
+    reg_entry->reg = reg;
+
+    if (reg->init) {
+        /* initialize emulate register */
+        rc = reg->init(s, reg_entry->reg,
+                       reg_grp->base_offset + reg->offset, &data);
+        if (rc < 0) {
+            free(reg_entry);
+            return rc;
+        }
+        if (data == XEN_PT_INVALID_REG) {
+            /* free unused BAR register entry */
+            free(reg_entry);
+            return 0;
+        }
+        /* set register value */
+        reg_entry->data = data;
+    }
+    /* list add register entry */
+    QLIST_INSERT_HEAD(&reg_grp->reg_tbl_list, reg_entry, entries);
+
+    return 0;
+}
+
+int xen_pt_config_init(XenPCIPassthroughState *s)
+{
+    int i, rc;
+
+    QLIST_INIT(&s->reg_grps);
+
+    for (i = 0; xen_pt_emu_reg_grps[i].grp_size != 0; i++) {
+        uint32_t reg_grp_offset = 0;
+        XenPTRegGroup *reg_grp_entry = NULL;
+
+        if (xen_pt_emu_reg_grps[i].grp_id != 0xFF) {
+            if (xen_pt_hide_dev_cap(&s->real_device,
+                                    xen_pt_emu_reg_grps[i].grp_id)) {
+                continue;
+            }
+
+            reg_grp_offset = find_cap_offset(s, xen_pt_emu_reg_grps[i].grp_id);
+
+            if (!reg_grp_offset) {
+                continue;
+            }
+        }
+
+        reg_grp_entry = g_new0(XenPTRegGroup, 1);
+        QLIST_INIT(&reg_grp_entry->reg_tbl_list);
+        QLIST_INSERT_HEAD(&s->reg_grps, reg_grp_entry, entries);
+
+        reg_grp_entry->base_offset = reg_grp_offset;
+        reg_grp_entry->reg_grp = xen_pt_emu_reg_grps + i;
+        if (xen_pt_emu_reg_grps[i].size_init) {
+            /* get register group size */
+            rc = xen_pt_emu_reg_grps[i].size_init(s, reg_grp_entry->reg_grp,
+                                                  reg_grp_offset,
+                                                  &reg_grp_entry->size);
+            if (rc < 0) {
+                xen_pt_config_delete(s);
+                return rc;
+            }
+        }
+
+        if (xen_pt_emu_reg_grps[i].grp_type == XEN_PT_GRP_TYPE_EMU) {
+            if (xen_pt_emu_reg_grps[i].emu_regs) {
+                int j = 0;
+                XenPTRegInfo *regs = xen_pt_emu_reg_grps[i].emu_regs;
+                /* initialize capability register */
+                for (j = 0; regs->size != 0; j++, regs++) {
+                    /* initialize capability register */
+                    rc = xen_pt_config_reg_init(s, reg_grp_entry, regs);
+                    if (rc < 0) {
+                        xen_pt_config_delete(s);
+                        return rc;
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+/* delete all emulate register */
+void xen_pt_config_delete(XenPCIPassthroughState *s)
+{
+    struct XenPTRegGroup *reg_group, *next_grp;
+    struct XenPTReg *reg, *next_reg;
+
+    /* free MSI/MSI-X info table */
+    if (s->msix) {
+        xen_pt_msix_delete(s);
+    }
+    if (s->msi) {
+        g_free(s->msi);
+    }
+
+    /* free all register group entry */
+    QLIST_FOREACH_SAFE(reg_group, &s->reg_grps, entries, next_grp) {
+        /* free all register entry */
+        QLIST_FOREACH_SAFE(reg, &reg_group->reg_tbl_list, entries, next_reg) {
+            QLIST_REMOVE(reg, entries);
+            g_free(reg);
+        }
+
+        QLIST_REMOVE(reg_group, entries);
+        g_free(reg_group);
+    }
+}
diff --git a/hw/xen_pt_msi.c b/hw/xen_pt_msi.c
new file mode 100644
index 0000000000..2299cc7772
--- /dev/null
+++ b/hw/xen_pt_msi.c
@@ -0,0 +1,620 @@
+/*
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Jiang Yunhong <yunhong.jiang@intel.com>
+ *
+ * This file implements direct PCI assignment to a HVM guest
+ */
+
+#include <sys/mman.h>
+
+#include "xen_backend.h"
+#include "xen_pt.h"
+#include "apic-msidef.h"
+
+
+#define XEN_PT_AUTO_ASSIGN -1
+
+/* shift count for gflags */
+#define XEN_PT_GFLAGS_SHIFT_DEST_ID        0
+#define XEN_PT_GFLAGS_SHIFT_RH             8
+#define XEN_PT_GFLAGS_SHIFT_DM             9
+#define XEN_PT_GFLAGSSHIFT_DELIV_MODE     12
+#define XEN_PT_GFLAGSSHIFT_TRG_MODE       15
+
+
+/*
+ * Helpers
+ */
+
+static inline uint8_t msi_vector(uint32_t data)
+{
+    return (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+}
+
+static inline uint8_t msi_dest_id(uint32_t addr)
+{
+    return (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+}
+
+static inline uint32_t msi_ext_dest_id(uint32_t addr_hi)
+{
+    return addr_hi & 0xffffff00;
+}
+
+static uint32_t msi_gflags(uint32_t data, uint64_t addr)
+{
+    uint32_t result = 0;
+    int rh, dm, dest_id, deliv_mode, trig_mode;
+
+    rh = (addr >> MSI_ADDR_REDIRECTION_SHIFT) & 0x1;
+    dm = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    dest_id = msi_dest_id(addr);
+    deliv_mode = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    trig_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+
+    result = dest_id | (rh << XEN_PT_GFLAGS_SHIFT_RH)
+        | (dm << XEN_PT_GFLAGS_SHIFT_DM)
+        | (deliv_mode << XEN_PT_GFLAGSSHIFT_DELIV_MODE)
+        | (trig_mode << XEN_PT_GFLAGSSHIFT_TRG_MODE);
+
+    return result;
+}
+
+static inline uint64_t msi_addr64(XenPTMSI *msi)
+{
+    return (uint64_t)msi->addr_hi << 32 | msi->addr_lo;
+}
+
+static int msi_msix_enable(XenPCIPassthroughState *s,
+                           uint32_t address,
+                           uint16_t flag,
+                           bool enable)
+{
+    uint16_t val = 0;
+
+    if (!address) {
+        return -1;
+    }
+
+    xen_host_pci_get_word(&s->real_device, address, &val);
+    if (enable) {
+        val |= flag;
+    } else {
+        val &= ~flag;
+    }
+    xen_host_pci_set_word(&s->real_device, address, val);
+    return 0;
+}
+
+static int msi_msix_setup(XenPCIPassthroughState *s,
+                          uint64_t addr,
+                          uint32_t data,
+                          int *ppirq,
+                          bool is_msix,
+                          int msix_entry,
+                          bool is_not_mapped)
+{
+    uint8_t gvec = msi_vector(data);
+    int rc = 0;
+
+    assert((!is_msix && msix_entry == 0) || is_msix);
+
+    if (gvec == 0) {
+        /* if gvec is 0, the guest is asking for a particular pirq that
+         * is passed as dest_id */
+        *ppirq = msi_ext_dest_id(addr >> 32) | msi_dest_id(addr);
+        if (!*ppirq) {
+            /* this probably identifies an misconfiguration of the guest,
+             * try the emulated path */
+            *ppirq = XEN_PT_UNASSIGNED_PIRQ;
+        } else {
+            XEN_PT_LOG(&s->dev, "requested pirq %d for MSI%s"
+                       " (vec: %#x, entry: %#x)\n",
+                       *ppirq, is_msix ? "-X" : "", gvec, msix_entry);
+        }
+    }
+
+    if (is_not_mapped) {
+        uint64_t table_base = 0;
+
+        if (is_msix) {
+            table_base = s->msix->table_base;
+        }
+
+        rc = xc_physdev_map_pirq_msi(xen_xc, xen_domid, XEN_PT_AUTO_ASSIGN,
+                                     ppirq, PCI_DEVFN(s->real_device.dev,
+                                                      s->real_device.func),
+                                     s->real_device.bus,
+                                     msix_entry, table_base);
+        if (rc) {
+            XEN_PT_ERR(&s->dev,
+                       "Mapping of MSI%s (rc: %i, vec: %#x, entry %#x)\n",
+                       is_msix ? "-X" : "", rc, gvec, msix_entry);
+            return rc;
+        }
+    }
+
+    return 0;
+}
+static int msi_msix_update(XenPCIPassthroughState *s,
+                           uint64_t addr,
+                           uint32_t data,
+                           int pirq,
+                           bool is_msix,
+                           int msix_entry,
+                           int *old_pirq)
+{
+    PCIDevice *d = &s->dev;
+    uint8_t gvec = msi_vector(data);
+    uint32_t gflags = msi_gflags(data, addr);
+    int rc = 0;
+    uint64_t table_addr = 0;
+
+    XEN_PT_LOG(d, "Updating MSI%s with pirq %d gvec %#x gflags %#x"
+               " (entry: %#x)\n",
+               is_msix ? "-X" : "", pirq, gvec, gflags, msix_entry);
+
+    if (is_msix) {
+        table_addr = s->msix->mmio_base_addr;
+    }
+
+    rc = xc_domain_update_msi_irq(xen_xc, xen_domid, gvec,
+                                  pirq, gflags, table_addr);
+
+    if (rc) {
+        XEN_PT_ERR(d, "Updating of MSI%s failed. (rc: %d)\n",
+                   is_msix ? "-X" : "", rc);
+
+        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, *old_pirq)) {
+            XEN_PT_ERR(d, "Unmapping of MSI%s pirq %d failed.\n",
+                       is_msix ? "-X" : "", *old_pirq);
+        }
+        *old_pirq = XEN_PT_UNASSIGNED_PIRQ;
+    }
+    return rc;
+}
+
+static int msi_msix_disable(XenPCIPassthroughState *s,
+                            uint64_t addr,
+                            uint32_t data,
+                            int pirq,
+                            bool is_msix,
+                            bool is_binded)
+{
+    PCIDevice *d = &s->dev;
+    uint8_t gvec = msi_vector(data);
+    uint32_t gflags = msi_gflags(data, addr);
+    int rc = 0;
+
+    if (pirq == XEN_PT_UNASSIGNED_PIRQ) {
+        return 0;
+    }
+
+    if (is_binded) {
+        XEN_PT_LOG(d, "Unbind MSI%s with pirq %d, gvec %#x\n",
+                   is_msix ? "-X" : "", pirq, gvec);
+        rc = xc_domain_unbind_msi_irq(xen_xc, xen_domid, gvec, pirq, gflags);
+        if (rc) {
+            XEN_PT_ERR(d, "Unbinding of MSI%s failed. (pirq: %d, gvec: %#x)\n",
+                       is_msix ? "-X" : "", pirq, gvec);
+            return rc;
+        }
+    }
+
+    XEN_PT_LOG(d, "Unmap MSI%s pirq %d\n", is_msix ? "-X" : "", pirq);
+    rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, pirq);
+    if (rc) {
+        XEN_PT_ERR(d, "Unmapping of MSI%s pirq %d failed. (rc: %i)\n",
+                   is_msix ? "-X" : "", pirq, rc);
+        return rc;
+    }
+
+    return 0;
+}
+
+/*
+ * MSI virtualization functions
+ */
+
+int xen_pt_msi_set_enable(XenPCIPassthroughState *s, bool enable)
+{
+    XEN_PT_LOG(&s->dev, "%s MSI.\n", enable ? "enabling" : "disabling");
+
+    if (!s->msi) {
+        return -1;
+    }
+
+    return msi_msix_enable(s, s->msi->ctrl_offset, PCI_MSI_FLAGS_ENABLE,
+                           enable);
+}
+
+/* setup physical msi, but don't enable it */
+int xen_pt_msi_setup(XenPCIPassthroughState *s)
+{
+    int pirq = XEN_PT_UNASSIGNED_PIRQ;
+    int rc = 0;
+    XenPTMSI *msi = s->msi;
+
+    if (msi->initialized) {
+        XEN_PT_ERR(&s->dev,
+                   "Setup physical MSI when it has been properly initialized.\n");
+        return -1;
+    }
+
+    rc = msi_msix_setup(s, msi_addr64(msi), msi->data, &pirq, false, 0, true);
+    if (rc) {
+        return rc;
+    }
+
+    if (pirq < 0) {
+        XEN_PT_ERR(&s->dev, "Invalid pirq number: %d.\n", pirq);
+        return -1;
+    }
+
+    msi->pirq = pirq;
+    XEN_PT_LOG(&s->dev, "MSI mapped with pirq %d.\n", pirq);
+
+    return 0;
+}
+
+int xen_pt_msi_update(XenPCIPassthroughState *s)
+{
+    XenPTMSI *msi = s->msi;
+    return msi_msix_update(s, msi_addr64(msi), msi->data, msi->pirq,
+                           false, 0, &msi->pirq);
+}
+
+void xen_pt_msi_disable(XenPCIPassthroughState *s)
+{
+    XenPTMSI *msi = s->msi;
+
+    if (!msi) {
+        return;
+    }
+
+    xen_pt_msi_set_enable(s, false);
+
+    msi_msix_disable(s, msi_addr64(msi), msi->data, msi->pirq, false,
+                     msi->initialized);
+
+    /* clear msi info */
+    msi->flags = 0;
+    msi->mapped = false;
+    msi->pirq = XEN_PT_UNASSIGNED_PIRQ;
+}
+
+/*
+ * MSI-X virtualization functions
+ */
+
+static int msix_set_enable(XenPCIPassthroughState *s, bool enabled)
+{
+    XEN_PT_LOG(&s->dev, "%s MSI-X.\n", enabled ? "enabling" : "disabling");
+
+    if (!s->msix) {
+        return -1;
+    }
+
+    return msi_msix_enable(s, s->msix->ctrl_offset, PCI_MSIX_FLAGS_ENABLE,
+                           enabled);
+}
+
+static int xen_pt_msix_update_one(XenPCIPassthroughState *s, int entry_nr)
+{
+    XenPTMSIXEntry *entry = NULL;
+    int pirq;
+    int rc;
+
+    if (entry_nr < 0 || entry_nr >= s->msix->total_entries) {
+        return -EINVAL;
+    }
+
+    entry = &s->msix->msix_entry[entry_nr];
+
+    if (!entry->updated) {
+        return 0;
+    }
+
+    pirq = entry->pirq;
+
+    rc = msi_msix_setup(s, entry->data, entry->data, &pirq, true, entry_nr,
+                        entry->pirq == XEN_PT_UNASSIGNED_PIRQ);
+    if (rc) {
+        return rc;
+    }
+    if (entry->pirq == XEN_PT_UNASSIGNED_PIRQ) {
+        entry->pirq = pirq;
+    }
+
+    rc = msi_msix_update(s, entry->addr, entry->data, pirq, true,
+                         entry_nr, &entry->pirq);
+
+    if (!rc) {
+        entry->updated = false;
+    }
+
+    return rc;
+}
+
+int xen_pt_msix_update(XenPCIPassthroughState *s)
+{
+    XenPTMSIX *msix = s->msix;
+    int i;
+
+    for (i = 0; i < msix->total_entries; i++) {
+        xen_pt_msix_update_one(s, i);
+    }
+
+    return 0;
+}
+
+void xen_pt_msix_disable(XenPCIPassthroughState *s)
+{
+    int i = 0;
+
+    msix_set_enable(s, false);
+
+    for (i = 0; i < s->msix->total_entries; i++) {
+        XenPTMSIXEntry *entry = &s->msix->msix_entry[i];
+
+        msi_msix_disable(s, entry->addr, entry->data, entry->pirq, true, true);
+
+        /* clear MSI-X info */
+        entry->pirq = XEN_PT_UNASSIGNED_PIRQ;
+        entry->updated = false;
+    }
+}
+
+int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index)
+{
+    XenPTMSIXEntry *entry;
+    int i, ret;
+
+    if (!(s->msix && s->msix->bar_index == bar_index)) {
+        return 0;
+    }
+
+    for (i = 0; i < s->msix->total_entries; i++) {
+        entry = &s->msix->msix_entry[i];
+        if (entry->pirq != XEN_PT_UNASSIGNED_PIRQ) {
+            ret = xc_domain_unbind_pt_irq(xen_xc, xen_domid, entry->pirq,
+                                          PT_IRQ_TYPE_MSI, 0, 0, 0, 0);
+            if (ret) {
+                XEN_PT_ERR(&s->dev, "unbind MSI-X entry %d failed\n",
+                           entry->pirq);
+            }
+            entry->updated = true;
+        }
+    }
+    return xen_pt_msix_update(s);
+}
+
+static uint32_t get_entry_value(XenPTMSIXEntry *e, int offset)
+{
+    switch (offset) {
+    case PCI_MSIX_ENTRY_LOWER_ADDR:
+        return e->addr & UINT32_MAX;
+    case PCI_MSIX_ENTRY_UPPER_ADDR:
+        return e->addr >> 32;
+    case PCI_MSIX_ENTRY_DATA:
+        return e->data;
+    case PCI_MSIX_ENTRY_VECTOR_CTRL:
+        return e->vector_ctrl;
+    default:
+        return 0;
+    }
+}
+
+static void set_entry_value(XenPTMSIXEntry *e, int offset, uint32_t val)
+{
+    switch (offset) {
+    case PCI_MSIX_ENTRY_LOWER_ADDR:
+        e->addr = (e->addr & ((uint64_t)UINT32_MAX << 32)) | val;
+        break;
+    case PCI_MSIX_ENTRY_UPPER_ADDR:
+        e->addr = (uint64_t)val << 32 | (e->addr & UINT32_MAX);
+        break;
+    case PCI_MSIX_ENTRY_DATA:
+        e->data = val;
+        break;
+    case PCI_MSIX_ENTRY_VECTOR_CTRL:
+        e->vector_ctrl = val;
+        break;
+    }
+}
+
+static void pci_msix_write(void *opaque, target_phys_addr_t addr,
+                           uint64_t val, unsigned size)
+{
+    XenPCIPassthroughState *s = opaque;
+    XenPTMSIX *msix = s->msix;
+    XenPTMSIXEntry *entry;
+    int entry_nr, offset;
+
+    entry_nr = addr / PCI_MSIX_ENTRY_SIZE;
+    if (entry_nr < 0 || entry_nr >= msix->total_entries) {
+        XEN_PT_ERR(&s->dev, "asked MSI-X entry '%i' invalid!\n", entry_nr);
+        return;
+    }
+    entry = &msix->msix_entry[entry_nr];
+    offset = addr % PCI_MSIX_ENTRY_SIZE;
+
+    if (offset != PCI_MSIX_ENTRY_VECTOR_CTRL) {
+        const volatile uint32_t *vec_ctrl;
+
+        if (get_entry_value(entry, offset) == val) {
+            return;
+        }
+
+        /*
+         * If Xen intercepts the mask bit access, entry->vec_ctrl may not be
+         * up-to-date. Read from hardware directly.
+         */
+        vec_ctrl = s->msix->phys_iomem_base + entry_nr * PCI_MSIX_ENTRY_SIZE
+            + PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+        if (msix->enabled && !(*vec_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
+            XEN_PT_ERR(&s->dev, "Can't update msix entry %d since MSI-X is"
+                       " already enabled.\n", entry_nr);
+            return;
+        }
+
+        entry->updated = true;
+    }
+
+    set_entry_value(entry, offset, val);
+
+    if (offset == PCI_MSIX_ENTRY_VECTOR_CTRL) {
+        if (msix->enabled && !(val & PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
+            xen_pt_msix_update_one(s, entry_nr);
+        }
+    }
+}
+
+static uint64_t pci_msix_read(void *opaque, target_phys_addr_t addr,
+                              unsigned size)
+{
+    XenPCIPassthroughState *s = opaque;
+    XenPTMSIX *msix = s->msix;
+    int entry_nr, offset;
+
+    entry_nr = addr / PCI_MSIX_ENTRY_SIZE;
+    if (entry_nr < 0) {
+        XEN_PT_ERR(&s->dev, "asked MSI-X entry '%i' invalid!\n", entry_nr);
+        return 0;
+    }
+
+    offset = addr % PCI_MSIX_ENTRY_SIZE;
+
+    if (addr < msix->total_entries * PCI_MSIX_ENTRY_SIZE) {
+        return get_entry_value(&msix->msix_entry[entry_nr], offset);
+    } else {
+        /* Pending Bit Array (PBA) */
+        return *(uint32_t *)(msix->phys_iomem_base + addr);
+    }
+}
+
+static const MemoryRegionOps pci_msix_ops = {
+    .read = pci_msix_read,
+    .write = pci_msix_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+        .unaligned = false,
+    },
+};
+
+int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t base)
+{
+    uint8_t id = 0;
+    uint16_t control = 0;
+    uint32_t table_off = 0;
+    int i, total_entries, bar_index;
+    XenHostPCIDevice *hd = &s->real_device;
+    PCIDevice *d = &s->dev;
+    int fd = -1;
+    XenPTMSIX *msix = NULL;
+    int rc = 0;
+
+    rc = xen_host_pci_get_byte(hd, base + PCI_CAP_LIST_ID, &id);
+    if (rc) {
+        return rc;
+    }
+
+    if (id != PCI_CAP_ID_MSIX) {
+        XEN_PT_ERR(d, "Invalid id %#x base %#x\n", id, base);
+        return -1;
+    }
+
+    xen_host_pci_get_word(hd, base + PCI_MSIX_FLAGS, &control);
+    total_entries = control & PCI_MSIX_FLAGS_QSIZE;
+    total_entries += 1;
+
+    s->msix = g_malloc0(sizeof (XenPTMSIX)
+                        + total_entries * sizeof (XenPTMSIXEntry));
+    msix = s->msix;
+
+    msix->total_entries = total_entries;
+    for (i = 0; i < total_entries; i++) {
+        msix->msix_entry[i].pirq = XEN_PT_UNASSIGNED_PIRQ;
+    }
+
+    memory_region_init_io(&msix->mmio, &pci_msix_ops, s, "xen-pci-pt-msix",
+                          (total_entries * PCI_MSIX_ENTRY_SIZE
+                           + XC_PAGE_SIZE - 1)
+                          & XC_PAGE_MASK);
+
+    xen_host_pci_get_long(hd, base + PCI_MSIX_TABLE, &table_off);
+    bar_index = msix->bar_index = table_off & PCI_MSIX_FLAGS_BIRMASK;
+    table_off = table_off & ~PCI_MSIX_FLAGS_BIRMASK;
+    msix->table_base = s->real_device.io_regions[bar_index].base_addr;
+    XEN_PT_LOG(d, "get MSI-X table BAR base 0x%"PRIx64"\n", msix->table_base);
+
+    fd = open("/dev/mem", O_RDWR);
+    if (fd == -1) {
+        rc = -errno;
+        XEN_PT_ERR(d, "Can't open /dev/mem: %s\n", strerror(errno));
+        goto error_out;
+    }
+    XEN_PT_LOG(d, "table_off = %#x, total_entries = %d\n",
+               table_off, total_entries);
+    msix->table_offset_adjust = table_off & 0x0fff;
+    msix->phys_iomem_base =
+        mmap(NULL,
+             total_entries * PCI_MSIX_ENTRY_SIZE + msix->table_offset_adjust,
+             PROT_READ,
+             MAP_SHARED | MAP_LOCKED,
+             fd,
+             msix->table_base + table_off - msix->table_offset_adjust);
+    close(fd);
+    if (msix->phys_iomem_base == MAP_FAILED) {
+        rc = -errno;
+        XEN_PT_ERR(d, "Can't map physical MSI-X table: %s\n", strerror(errno));
+        goto error_out;
+    }
+    msix->phys_iomem_base = (char *)msix->phys_iomem_base
+        + msix->table_offset_adjust;
+
+    XEN_PT_LOG(d, "mapping physical MSI-X table to %p\n",
+               msix->phys_iomem_base);
+
+    memory_region_add_subregion_overlap(&s->bar[bar_index], table_off,
+                                        &msix->mmio,
+                                        2); /* Priority: pci default + 1 */
+
+    return 0;
+
+error_out:
+    memory_region_destroy(&msix->mmio);
+    g_free(s->msix);
+    s->msix = NULL;
+    return rc;
+}
+
+void xen_pt_msix_delete(XenPCIPassthroughState *s)
+{
+    XenPTMSIX *msix = s->msix;
+
+    if (!msix) {
+        return;
+    }
+
+    /* unmap the MSI-X memory mapped register area */
+    if (msix->phys_iomem_base) {
+        XEN_PT_LOG(&s->dev, "unmapping physical MSI-X table from %p\n",
+                   msix->phys_iomem_base);
+        munmap(msix->phys_iomem_base, msix->total_entries * PCI_MSIX_ENTRY_SIZE
+               + msix->table_offset_adjust);
+    }
+
+    memory_region_del_subregion(&s->bar[msix->bar_index], &msix->mmio);
+    memory_region_destroy(&msix->mmio);
+
+    g_free(s->msix);
+    s->msix = NULL;
+}
diff --git a/hw/xenfb.c b/hw/xenfb.c
index 1bcf171b01..338800a4d9 100644
--- a/hw/xenfb.c
+++ b/hw/xenfb.c
@@ -35,19 +35,16 @@
 #include <string.h>
 #include <time.h>
 
-#include <xs.h>
-#include <xenctrl.h>
-#include <xen/event_channel.h>
-#include <xen/io/xenbus.h>
-#include <xen/io/fbif.h>
-#include <xen/io/kbdif.h>
-#include <xen/io/protocols.h>
-
 #include "hw.h"
 #include "console.h"
 #include "qemu-char.h"
 #include "xen_backend.h"
 
+#include <xen/event_channel.h>
+#include <xen/io/fbif.h>
+#include <xen/io/kbdif.h>
+#include <xen/io/protocols.h>
+
 #ifndef BTN_LEFT
 #define BTN_LEFT 0x110 /* from <linux/input.h> */
 #endif
diff --git a/hw/xgmac.c b/hw/xgmac.c
index dd4bdc46f5..a91ef608f1 100644
--- a/hw/xgmac.c
+++ b/hw/xgmac.c
@@ -308,7 +308,7 @@ static const MemoryRegionOps enet_mem_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
-static int eth_can_rx(VLANClientState *nc)
+static int eth_can_rx(NetClientState *nc)
 {
     struct XgmacState *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -316,7 +316,7 @@ static int eth_can_rx(VLANClientState *nc)
     return s->regs[DMA_CONTROL] & DMA_CONTROL_SR;
 }
 
-static ssize_t eth_rx(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     struct XgmacState *s = DO_UPCAST(NICState, nc, nc)->opaque;
     static const unsigned char sa_bcast[6] = {0xff, 0xff, 0xff,
@@ -364,14 +364,14 @@ out:
     return ret;
 }
 
-static void eth_cleanup(VLANClientState *nc)
+static void eth_cleanup(NetClientState *nc)
 {
     struct XgmacState *s = DO_UPCAST(NICState, nc, nc)->opaque;
     s->nic = NULL;
 }
 
 static NetClientInfo net_xgmac_enet_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = eth_can_rx,
     .receive = eth_rx,
diff --git a/hw/xilinx_axienet.c b/hw/xilinx_axienet.c
index 2e8d8a59ba..9b08c62912 100644
--- a/hw/xilinx_axienet.c
+++ b/hw/xilinx_axienet.c
@@ -612,7 +612,7 @@ static const MemoryRegionOps enet_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
-static int eth_can_rx(VLANClientState *nc)
+static int eth_can_rx(NetClientState *nc)
 {
     struct XilinxAXIEnet *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -635,7 +635,7 @@ static int enet_match_addr(const uint8_t *buf, uint32_t f0, uint32_t f1)
     return match;
 }
 
-static ssize_t eth_rx(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     struct XilinxAXIEnet *s = DO_UPCAST(NICState, nc, nc)->opaque;
     static const unsigned char sa_bcast[6] = {0xff, 0xff, 0xff,
@@ -648,7 +648,6 @@ static ssize_t eth_rx(VLANClientState *nc, const uint8_t *buf, size_t size)
     uint16_t csum16;
     int i;
 
-    s = s;
     DENET(qemu_log("%s: %zd bytes\n", __func__, size));
 
     unicast = ~buf[0] & 0x1;
@@ -780,7 +779,7 @@ static ssize_t eth_rx(VLANClientState *nc, const uint8_t *buf, size_t size)
     return size;
 }
 
-static void eth_cleanup(VLANClientState *nc)
+static void eth_cleanup(NetClientState *nc)
 {
     /* FIXME.  */
     struct XilinxAXIEnet *s = DO_UPCAST(NICState, nc, nc)->opaque;
@@ -832,7 +831,7 @@ axienet_stream_push(void *opaque, uint8_t *buf, size_t size, uint32_t *hdr)
 }
 
 static NetClientInfo net_xilinx_enet_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = eth_can_rx,
     .receive = eth_rx,
diff --git a/hw/xilinx_ethlite.c b/hw/xilinx_ethlite.c
index affbb8bfff..56ca620dd7 100644
--- a/hw/xilinx_ethlite.c
+++ b/hw/xilinx_ethlite.c
@@ -160,7 +160,7 @@ static const MemoryRegionOps eth_ops = {
     }
 };
 
-static int eth_can_rx(VLANClientState *nc)
+static int eth_can_rx(NetClientState *nc)
 {
     struct xlx_ethlite *s = DO_UPCAST(NICState, nc, nc)->opaque;
     int r;
@@ -168,7 +168,7 @@ static int eth_can_rx(VLANClientState *nc)
     return r;
 }
 
-static ssize_t eth_rx(VLANClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     struct xlx_ethlite *s = DO_UPCAST(NICState, nc, nc)->opaque;
     unsigned int rxbase = s->rxbuf * (0x800 / 4);
@@ -194,7 +194,7 @@ static ssize_t eth_rx(VLANClientState *nc, const uint8_t *buf, size_t size)
     return size;
 }
 
-static void eth_cleanup(VLANClientState *nc)
+static void eth_cleanup(NetClientState *nc)
 {
     struct xlx_ethlite *s = DO_UPCAST(NICState, nc, nc)->opaque;
 
@@ -202,7 +202,7 @@ static void eth_cleanup(VLANClientState *nc)
 }
 
 static NetClientInfo net_xilinx_ethlite_info = {
-    .type = NET_CLIENT_TYPE_NIC,
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = eth_can_rx,
     .receive = eth_rx,
diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c
index 56d1b353d0..0d8a5e7020 100644
--- a/hw/xio3130_downstream.c
+++ b/hw/xio3130_downstream.c
@@ -60,7 +60,6 @@ static int xio3130_downstream_initfn(PCIDevice *d)
     PCIEPort *p = DO_UPCAST(PCIEPort, br, br);
     PCIESlot *s = DO_UPCAST(PCIESlot, port, p);
     int rc;
-    int tmp;
 
     rc = pci_bridge_initfn(d);
     if (rc < 0) {
@@ -108,12 +107,11 @@ err_pcie_cap:
 err_msi:
     msi_uninit(d);
 err_bridge:
-    tmp = pci_bridge_exitfn(d);
-    assert(!tmp);
+    pci_bridge_exitfn(d);
     return rc;
 }
 
-static int xio3130_downstream_exitfn(PCIDevice *d)
+static void xio3130_downstream_exitfn(PCIDevice *d)
 {
     PCIBridge* br = DO_UPCAST(PCIBridge, dev, d);
     PCIEPort *p = DO_UPCAST(PCIEPort, br, br);
@@ -123,7 +121,7 @@ static int xio3130_downstream_exitfn(PCIDevice *d)
     pcie_chassis_del_slot(s);
     pcie_cap_exit(d);
     msi_uninit(d);
-    return pci_bridge_exitfn(d);
+    pci_bridge_exitfn(d);
 }
 
 PCIESlot *xio3130_downstream_init(PCIBus *bus, int devfn, bool multifunction,
diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c
index 79725813a2..d46b86c74d 100644
--- a/hw/xio3130_upstream.c
+++ b/hw/xio3130_upstream.c
@@ -56,7 +56,6 @@ static int xio3130_upstream_initfn(PCIDevice *d)
     PCIBridge* br = DO_UPCAST(PCIBridge, dev, d);
     PCIEPort *p = DO_UPCAST(PCIEPort, br, br);
     int rc;
-    int tmp;
 
     rc = pci_bridge_initfn(d);
     if (rc < 0) {
@@ -95,17 +94,16 @@ err:
 err_msi:
     msi_uninit(d);
 err_bridge:
-    tmp =  pci_bridge_exitfn(d);
-    assert(!tmp);
+    pci_bridge_exitfn(d);
     return rc;
 }
 
-static int xio3130_upstream_exitfn(PCIDevice *d)
+static void xio3130_upstream_exitfn(PCIDevice *d)
 {
     pcie_aer_exit(d);
     pcie_cap_exit(d);
     msi_uninit(d);
-    return pci_bridge_exitfn(d);
+    pci_bridge_exitfn(d);
 }
 
 PCIEPort *xio3130_upstream_init(PCIBus *bus, int devfn, bool multifunction,
diff --git a/hw/xtensa_lx60.c b/hw/xtensa_lx60.c
index 152eed95d8..3653f65b1e 100644
--- a/hw/xtensa_lx60.c
+++ b/hw/xtensa_lx60.c
@@ -173,7 +173,7 @@ static void lx_init(const LxBoardDesc *board,
     int n;
 
     if (!cpu_model) {
-        cpu_model = "dc232b";
+        cpu_model = XTENSA_DEFAULT_CPU_MODEL;
     }
 
     for (n = 0; n < smp_cpus; n++) {
@@ -201,7 +201,7 @@ static void lx_init(const LxBoardDesc *board,
     memory_region_init(system_io, "lx60.io", 224 * 1024 * 1024);
     memory_region_add_subregion(system_memory, 0xf0000000, system_io);
     lx60_fpga_init(system_io, 0x0d020000);
-    if (nd_table[0].vlan) {
+    if (nd_table[0].used) {
         lx60_net_init(system_io, 0x0d030000, 0x0d030400, 0x0d800000,
                 xtensa_get_extint(env, 1), nd_table);
     }
@@ -300,14 +300,14 @@ static void xtensa_lx200_init(ram_addr_t ram_size,
 
 static QEMUMachine xtensa_lx60_machine = {
     .name = "lx60",
-    .desc = "lx60 EVB (dc232b)",
+    .desc = "lx60 EVB (" XTENSA_DEFAULT_CPU_MODEL ")",
     .init = xtensa_lx60_init,
     .max_cpus = 4,
 };
 
 static QEMUMachine xtensa_lx200_machine = {
     .name = "lx200",
-    .desc = "lx200 EVB (dc232b)",
+    .desc = "lx200 EVB (" XTENSA_DEFAULT_CPU_MODEL ")",
     .init = xtensa_lx200_init,
     .max_cpus = 4,
 };
diff --git a/hw/xtensa_sim.c b/hw/xtensa_sim.c
index 1ce07fb899..831460b7c4 100644
--- a/hw/xtensa_sim.c
+++ b/hw/xtensa_sim.c
@@ -102,7 +102,7 @@ static void xtensa_sim_init(ram_addr_t ram_size,
                      const char *initrd_filename, const char *cpu_model)
 {
     if (!cpu_model) {
-        cpu_model = "dc232b";
+        cpu_model = XTENSA_DEFAULT_CPU_MODEL;
     }
     sim_init(ram_size, boot_device, kernel_filename, kernel_cmdline,
             initrd_filename, cpu_model);
@@ -110,7 +110,8 @@ static void xtensa_sim_init(ram_addr_t ram_size,
 
 static QEMUMachine xtensa_sim_machine = {
     .name = "sim",
-    .desc = "sim machine (dc232b)",
+    .desc = "sim machine (" XTENSA_DEFAULT_CPU_MODEL ")",
+    .is_default = true,
     .init = xtensa_sim_init,
     .max_cpus = 4,
 };