summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.gitignore6
-rw-r--r--MAINTAINERS6
-rw-r--r--Makefile26
-rw-r--r--Makefile.objs4
-rw-r--r--arch_init.c111
-rw-r--r--audio/audio.c1
-rw-r--r--bitops.h90
-rw-r--r--block-migration.c8
-rw-r--r--block.c273
-rw-r--r--block.h5
-rw-r--r--block/blkdebug.c107
-rw-r--r--block/curl.c6
-rw-r--r--block/iscsi.c155
-rw-r--r--block/nbd.c18
-rw-r--r--block/qcow.c4
-rw-r--r--block/qcow2-refcount.c7
-rw-r--r--block/qcow2-snapshot.c6
-rw-r--r--block/qcow2.c23
-rw-r--r--block/qed.c12
-rw-r--r--block/raw.c2
-rw-r--r--block/rbd.c4
-rw-r--r--block/sheepdog.c136
-rw-r--r--blockdev.c4
-rwxr-xr-xconfigure39
-rw-r--r--console.c26
-rw-r--r--cpu-all.h1
-rw-r--r--cpu-common.h4
-rw-r--r--cutils.c234
-rw-r--r--default-configs/i386-softmmu.mak1
-rw-r--r--disas.c19
-rw-r--r--docs/usb-storage.txt38
-rw-r--r--exec-obsolete.h50
-rw-r--r--exec.c52
-rw-r--r--hmp-commands.hx6
-rw-r--r--hmp.c20
-rw-r--r--hmp.h2
-rw-r--r--hw/9pfs/virtio-9p.c8
-rw-r--r--hw/Makefile.objs1
-rw-r--r--hw/arm/Makefile.objs3
-rw-r--r--hw/cadence_gem.c4
-rw-r--r--hw/esp.c574
-rw-r--r--hw/exynos4210.c10
-rw-r--r--hw/exynos4210_mct.c4
-rw-r--r--hw/exynos4210_pwm.c2
-rw-r--r--hw/exynos4210_rtc.c595
-rw-r--r--hw/fdc.c116
-rw-r--r--hw/ide/atapi.c31
-rw-r--r--hw/imx.h34
-rw-r--r--hw/imx_avic.c408
-rw-r--r--hw/imx_ccm.c321
-rw-r--r--hw/imx_serial.c467
-rw-r--r--hw/imx_timer.c689
-rw-r--r--hw/kzm.c154
-rw-r--r--hw/megasas.c2198
-rw-r--r--hw/mfi.h1248
-rw-r--r--hw/omap.h8
-rw-r--r--hw/pci-stub.c15
-rw-r--r--hw/pci_ids.h3
-rw-r--r--hw/qdev-properties.c2
-rw-r--r--hw/qdev.h2
-rw-r--r--hw/rtl8139.c2
-rw-r--r--hw/scsi-bus.c99
-rw-r--r--hw/scsi-defs.h15
-rw-r--r--hw/scsi-disk.c69
-rw-r--r--hw/scsi-generic.c13
-rw-r--r--hw/scsi.h1
-rw-r--r--hw/sh_serial.c6
-rw-r--r--hw/usb.h3
-rw-r--r--hw/usb/Makefile.objs1
-rw-r--r--hw/usb/core.c23
-rw-r--r--hw/usb/dev-uas.c779
-rw-r--r--hw/usb/hcd-ehci.c175
-rw-r--r--hw/usb/hcd-uhci.c12
-rw-r--r--hw/usb/host-linux.c15
-rw-r--r--hw/usb/redirect.c2
-rw-r--r--hw/vga.c14
-rw-r--r--hw/vga_int.h2
-rw-r--r--hw/virtio-balloon.c4
-rw-r--r--hw/virtio-net.c4
-rw-r--r--hw/virtio-scsi.c6
-rw-r--r--hw/virtio-serial-bus.c10
-rw-r--r--iov.c193
-rw-r--r--iov.h77
-rw-r--r--kvm-all.c13
-rw-r--r--kvm.h2
-rw-r--r--linux-aio.c4
-rw-r--r--migration.c24
-rw-r--r--migration.h9
-rw-r--r--monitor.c61
-rw-r--r--net.c2
-rw-r--r--net/slirp.c70
-rw-r--r--oslib-posix.c3
-rw-r--r--pc-bios/keymaps/fi2
-rw-r--r--posix-aio-compat.c8
-rw-r--r--qapi-schema.json79
-rw-r--r--qapi/qapi-visit-core.c2
-rw-r--r--qemu-common.h57
-rw-r--r--qemu-coroutine-io.c83
-rw-r--r--qemu-doc.texi2
-rw-r--r--qemu-log.c32
-rw-r--r--qemu-log.h12
-rw-r--r--qemu-options.hx52
-rw-r--r--qemu-tech.texi2
-rw-r--r--qmp-commands.hx14
-rw-r--r--roms/Makefile17
-rw-r--r--roms/config.vga.cirrus3
-rw-r--r--roms/config.vga.isavga3
-rw-r--r--roms/config.vga.qxl6
-rw-r--r--roms/config.vga.stdvga3
-rw-r--r--roms/config.vga.vmware6
-rw-r--r--savevm.c24
-rwxr-xr-xscripts/make-release24
-rw-r--r--sysemu.h3
-rw-r--r--target-arm/cpu.c6
-rw-r--r--target-arm/cpu.h15
-rw-r--r--target-arm/helper.c441
-rw-r--r--target-arm/machine.c10
-rw-r--r--target-arm/translate.c4
-rw-r--r--target-i386/translate.c104
-rw-r--r--target-s390x/kvm.c35
-rw-r--r--targphys.h16
-rw-r--r--tests/Makefile2
-rw-r--r--tests/fdc-test.c50
-rw-r--r--tests/libqtest.c29
-rw-r--r--tests/test-iov.c260
-rw-r--r--trace-events123
-rw-r--r--vl.c10
-rw-r--r--vmstate.h2
128 files changed, 10369 insertions, 1288 deletions
diff --git a/.gitignore b/.gitignore
index 9859c7d746..824c0d24df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,12 +41,14 @@ qemu-io
 qemu-ga
 qemu-bridge-helper
 qemu-monitor.texi
+vscclient
 QMP/qmp-commands.txt
 test-coroutine
 test-qmp-input-visitor
 test-qmp-output-visitor
 test-string-input-visitor
 test-string-output-visitor
+test-visitor-serialization
 fsdev/virtfs-proxy-helper.1
 fsdev/virtfs-proxy-helper.pod
 .gdbinit
@@ -69,6 +71,10 @@ fsdev/virtfs-proxy-helper.pod
 *.vr
 *.d
 *.o
+*.lo
+*.la
+*.pc
+.libs
 *.swp
 *.orig
 .pc
diff --git a/MAINTAINERS b/MAINTAINERS
index 2a514fd167..30ed56dd77 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -207,6 +207,12 @@ M: qemu-devel@nongnu.org
 S: Orphan
 F: hw/gumstix.c
 
+i.MX31
+M: Peter Chubb <peter.chubb@nicta.com.au>
+S: Odd fixes
+F: hw/imx*
+F: hw/kzm.c
+
 Integrator CP
 M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
diff --git a/Makefile b/Makefile
index bad0e3142a..ab82ef3dcb 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ BUILD_DIR=$(CURDIR)
 # All following code might depend on configuration variables
 ifneq ($(wildcard config-host.mak),)
 # Put the all: rule here so that config-host.mak can contain dependencies.
-all: build-all
+all:
 include config-host.mak
 include $(SRC_PATH)/rules.mak
 config-host.mak: $(SRC_PATH)/configure
@@ -31,7 +31,7 @@ Makefile: ;
 configure: ;
 
 .PHONY: all clean cscope distclean dvi html info install install-doc \
-	pdf recurse-all speed tar tarbin test build-all
+	pdf recurse-all speed test dist
 
 $(call set-vpath, $(SRC_PATH))
 
@@ -82,7 +82,7 @@ defconfig:
 
 -include config-all-devices.mak
 
-build-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all
+all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all
 
 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
@@ -156,7 +156,8 @@ vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) qemu-timer-comm
 qemu-img.o: qemu-img-cmds.h
 
 tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \
-	qemu-timer-common.o main-loop.o notify.o iohandler.o cutils.o async.o
+	qemu-timer-common.o main-loop.o notify.o \
+	iohandler.o cutils.o iov.o async.o
 tools-obj-$(CONFIG_POSIX) += compatfd.o
 
 qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y)
@@ -232,6 +233,13 @@ clean:
 	rm -f $$d/qemu-options.def; \
         done
 
+VERSION ?= $(shell cat VERSION)
+
+dist: qemu-$(VERSION).tar.bz2
+
+qemu-%.tar.bz2:
+	$(SRC_PATH)/scripts/make-release "$(SRC_PATH)" "$(patsubst qemu-%.tar.bz2,%,$@)"
+
 distclean: clean
 	rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi
 	rm -f config-all-devices.mak
@@ -390,16 +398,6 @@ qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi \
 	qemu-monitor.texi qemu-img-cmds.texi
 
-VERSION ?= $(shell cat VERSION)
-FILE = qemu-$(VERSION)
-
-# tar release (use 'make -k tar' on a checkouted tree)
-tar:
-	rm -rf /tmp/$(FILE)
-	cp -r . /tmp/$(FILE)
-	cd /tmp && tar zcvf ~/$(FILE).tar.gz $(FILE) --exclude CVS --exclude .git --exclude .svn
-	rm -rf /tmp/$(FILE)
-
 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
 Makefile: $(GENERATED_HEADERS)
diff --git a/Makefile.objs b/Makefile.objs
index 625c4d5da7..5ebbcfa171 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -41,7 +41,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img
 
-block-obj-y = cutils.o cache-utils.o qemu-option.o module.o async.o
+block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
 block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
@@ -101,7 +101,7 @@ common-obj-$(CONFIG_SLIRP) += slirp/
 user-obj-y =
 user-obj-y += envlist.o path.o
 user-obj-y += tcg-runtime.o host-utils.o
-user-obj-y += cutils.o cache-utils.o
+user-obj-y += cutils.o iov.o cache-utils.o
 user-obj-y += module.o
 user-obj-y += qemu-user.o
 user-obj-y += $(trace-obj-y)
diff --git a/arch_init.c b/arch_init.c
index a9e8b7442b..5b0f5626a9 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -44,6 +44,14 @@
 #include "exec-memory.h"
 #include "hw/pcspk.h"
 
+#ifdef DEBUG_ARCH_INIT
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
 int graphic_height = 768;
@@ -161,6 +169,18 @@ static int is_dup_page(uint8_t *page)
     return 1;
 }
 
+static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+        int cont, int flag)
+{
+        qemu_put_be64(f, offset | cont | flag);
+        if (!cont) {
+                qemu_put_byte(f, strlen(block->idstr));
+                qemu_put_buffer(f, (uint8_t *)block->idstr,
+                                strlen(block->idstr));
+        }
+
+}
+
 static RAMBlock *last_block;
 static ram_addr_t last_offset;
 
@@ -187,21 +207,11 @@ static int ram_save_block(QEMUFile *f)
             p = memory_region_get_ram_ptr(mr) + offset;
 
             if (is_dup_page(p)) {
-                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_COMPRESS);
-                if (!cont) {
-                    qemu_put_byte(f, strlen(block->idstr));
-                    qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                    strlen(block->idstr));
-                }
+                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, *p);
                 bytes_sent = 1;
             } else {
-                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE);
-                if (!cont) {
-                    qemu_put_byte(f, strlen(block->idstr));
-                    qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                    strlen(block->idstr));
-                }
+                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                 bytes_sent = TARGET_PAGE_SIZE;
             }
@@ -228,20 +238,7 @@ static uint64_t bytes_transferred;
 
 static ram_addr_t ram_save_remaining(void)
 {
-    RAMBlock *block;
-    ram_addr_t count = 0;
-
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
-        ram_addr_t addr;
-        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
-            if (memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE,
-                                        DIRTY_MEMORY_MIGRATION)) {
-                count++;
-            }
-        }
-    }
-
-    return count;
+    return ram_list.dirty_pages;
 }
 
 uint64_t ram_bytes_remaining(void)
@@ -294,16 +291,23 @@ static void sort_ram_list(void)
     g_free(blocks);
 }
 
+static void migration_end(void)
+{
+    memory_global_dirty_log_stop();
+}
+
+#define MAX_WAIT 50 /* ms, half buffered_file limit */
+
 int ram_save_live(QEMUFile *f, int stage, void *opaque)
 {
     ram_addr_t addr;
     uint64_t bytes_transferred_last;
     double bwidth = 0;
-    uint64_t expected_time = 0;
     int ret;
+    int i;
 
     if (stage < 0) {
-        memory_global_dirty_log_stop();
+        migration_end();
         return 0;
     }
 
@@ -340,6 +344,7 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
     bytes_transferred_last = bytes_transferred;
     bwidth = qemu_get_clock_ns(rt_clock);
 
+    i = 0;
     while ((ret = qemu_file_rate_limit(f)) == 0) {
         int bytes_sent;
 
@@ -348,6 +353,20 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
         if (bytes_sent == 0) { /* no more blocks */
             break;
         }
+        /* we want to check in the 1st loop, just in case it was the 1st time
+           and we had to sync the dirty bitmap.
+           qemu_get_clock_ns() is a bit expensive, so we only check each some
+           iterations
+        */
+        if ((i & 63) == 0) {
+            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - bwidth) / 1000000;
+            if (t1 > MAX_WAIT) {
+                DPRINTF("big wait: " PRIu64 " milliseconds, %d iterations\n",
+                        t1, i);
+                break;
+            }
+        }
+        i++;
     }
 
     if (ret < 0) {
@@ -376,9 +395,16 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
-    expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+    if (stage == 2) {
+        uint64_t expected_time;
+        expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
 
-    return (stage == 2) && (expected_time <= migrate_max_downtime());
+        DPRINTF("ram_save_live: expected(" PRIu64 ") <= max(" PRIu64 ")?\n",
+                expected_time, migrate_max_downtime());
+
+        return expected_time <= migrate_max_downtime();
+    }
+    return 0;
 }
 
 static inline void *host_from_stream_offset(QEMUFile *f,
@@ -414,8 +440,11 @@ static inline void *host_from_stream_offset(QEMUFile *f,
 int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
     ram_addr_t addr;
-    int flags;
+    int flags, ret = 0;
     int error;
+    static uint64_t seq_iter;
+
+    seq_iter++;
 
     if (version_id < 4 || version_id > 4) {
         return -EINVAL;
@@ -445,8 +474,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
 
                     QLIST_FOREACH(block, &ram_list.blocks, next) {
                         if (!strncmp(id, block->idstr, sizeof(id))) {
-                            if (block->length != length)
-                                return -EINVAL;
+                            if (block->length != length) {
+                                ret =  -EINVAL;
+                                goto done;
+                            }
                             break;
                         }
                     }
@@ -454,7 +485,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
                     if (!block) {
                         fprintf(stderr, "Unknown ramblock \"%s\", cannot "
                                 "accept migration\n", id);
-                        return -EINVAL;
+                        ret = -EINVAL;
+                        goto done;
                     }
 
                     total_ram_bytes -= length;
@@ -483,16 +515,23 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
             void *host;
 
             host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                return -EINVAL;
+            }
 
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
         }
         error = qemu_file_get_error(f);
         if (error) {
-            return error;
+            ret = error;
+            goto done;
         }
     } while (!(flags & RAM_SAVE_FLAG_EOS));
 
-    return 0;
+done:
+    DPRINTF("Completed load of VM with exit code %d seq iteration " PRIu64 "\n",
+            ret, seq_iter);
+    return ret;
 }
 
 #ifdef HAS_AUDIO
diff --git a/audio/audio.c b/audio/audio.c
index 583ee51eab..1c7738930b 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -818,6 +818,7 @@ static int audio_attach_capture (HWVoiceOut *hw)
         sw->active = hw->enabled;
         sw->conv = noop_conv;
         sw->ratio = ((int64_t) hw_cap->info.freq << 32) / sw->info.freq;
+        sw->vol = nominal_volume;
         sw->rate = st_rate_start (sw->info.freq, hw_cap->info.freq);
         if (!sw->rate) {
             dolog ("Could not start rate conversion for `%s'\n", SW_NAME (sw));
diff --git a/bitops.h b/bitops.h
index 07d1a0638f..c45623245f 100644
--- a/bitops.h
+++ b/bitops.h
@@ -269,4 +269,94 @@ static inline unsigned long hweight_long(unsigned long w)
     return count;
 }
 
+/**
+ * extract32:
+ * @value: the value to extract the bit field from
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ *
+ * Extract from the 32 bit input @value the bit field specified by the
+ * @start and @length parameters, and return it. The bit field must
+ * lie entirely within the 32 bit word. It is valid to request that
+ * all 32 bits are returned (ie @length 32 and @start 0).
+ *
+ * Returns: the value of the bit field extracted from the input value.
+ */
+static inline uint32_t extract32(uint32_t value, int start, int length)
+{
+    assert(start >= 0 && length > 0 && length <= 32 - start);
+    return (value >> start) & (~0U >> (32 - length));
+}
+
+/**
+ * extract64:
+ * @value: the value to extract the bit field from
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ *
+ * Extract from the 64 bit input @value the bit field specified by the
+ * @start and @length parameters, and return it. The bit field must
+ * lie entirely within the 64 bit word. It is valid to request that
+ * all 64 bits are returned (ie @length 64 and @start 0).
+ *
+ * Returns: the value of the bit field extracted from the input value.
+ */
+static inline uint64_t extract64(uint64_t value, int start, int length)
+{
+    assert(start >= 0 && length > 0 && length <= 64 - start);
+    return (value >> start) & (~0ULL >> (64 - length));
+}
+
+/**
+ * deposit32:
+ * @value: initial value to insert bit field into
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ * @fieldval: the value to insert into the bit field
+ *
+ * Deposit @fieldval into the 32 bit @value at the bit field specified
+ * by the @start and @length parameters, and return the modified
+ * @value. Bits of @value outside the bit field are not modified.
+ * Bits of @fieldval above the least significant @length bits are
+ * ignored. The bit field must lie entirely within the 32 bit word.
+ * It is valid to request that all 32 bits are modified (ie @length
+ * 32 and @start 0).
+ *
+ * Returns: the modified @value.
+ */
+static inline uint32_t deposit32(uint32_t value, int start, int length,
+                                 uint32_t fieldval)
+{
+    uint32_t mask;
+    assert(start >= 0 && length > 0 && length <= 32 - start);
+    mask = (~0U >> (32 - length)) << start;
+    return (value & ~mask) | ((fieldval << start) & mask);
+}
+
+/**
+ * deposit64:
+ * @value: initial value to insert bit field into
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ * @fieldval: the value to insert into the bit field
+ *
+ * Deposit @fieldval into the 64 bit @value at the bit field specified
+ * by the @start and @length parameters, and return the modified
+ * @value. Bits of @value outside the bit field are not modified.
+ * Bits of @fieldval above the least significant @length bits are
+ * ignored. The bit field must lie entirely within the 64 bit word.
+ * It is valid to request that all 64 bits are modified (ie @length
+ * 64 and @start 0).
+ *
+ * Returns: the modified @value.
+ */
+static inline uint64_t deposit64(uint64_t value, int start, int length,
+                                 uint64_t fieldval)
+{
+    uint64_t mask;
+    assert(start >= 0 && length > 0 && length <= 64 - start);
+    mask = (~0ULL >> (64 - length)) << start;
+    return (value & ~mask) | ((fieldval << start) & mask);
+}
+
 #endif
diff --git a/block-migration.c b/block-migration.c
index fd2ffff0d5..b95b4e1389 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -700,13 +700,13 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
-static void block_set_params(int blk_enable, int shared_base, void *opaque)
+static void block_set_params(const MigrationParams *params, void *opaque)
 {
-    block_mig_state.blk_enable = blk_enable;
-    block_mig_state.shared_base = shared_base;
+    block_mig_state.blk_enable = params->blk;
+    block_mig_state.shared_base = params->shared;
 
     /* shared base means that blk_enable = 1 */
-    block_mig_state.blk_enable |= shared_base;
+    block_mig_state.blk_enable |= params->shared;
 }
 
 void blk_mig_init(void)
diff --git a/block.c b/block.c
index 0acdcac158..0c923f2ae9 100644
--- a/block.c
+++ b/block.c
@@ -971,101 +971,130 @@ static void bdrv_rebind(BlockDriverState *bs)
     }
 }
 
+static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
+                                     BlockDriverState *bs_src)
+{
+    /* move some fields that need to stay attached to the device */
+    bs_dest->open_flags         = bs_src->open_flags;
+
+    /* dev info */
+    bs_dest->dev_ops            = bs_src->dev_ops;
+    bs_dest->dev_opaque         = bs_src->dev_opaque;
+    bs_dest->dev                = bs_src->dev;
+    bs_dest->buffer_alignment   = bs_src->buffer_alignment;
+    bs_dest->copy_on_read       = bs_src->copy_on_read;
+
+    bs_dest->enable_write_cache = bs_src->enable_write_cache;
+
+    /* i/o timing parameters */
+    bs_dest->slice_time         = bs_src->slice_time;
+    bs_dest->slice_start        = bs_src->slice_start;
+    bs_dest->slice_end          = bs_src->slice_end;
+    bs_dest->io_limits          = bs_src->io_limits;
+    bs_dest->io_base            = bs_src->io_base;
+    bs_dest->throttled_reqs     = bs_src->throttled_reqs;
+    bs_dest->block_timer        = bs_src->block_timer;
+    bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
+
+    /* geometry */
+    bs_dest->cyls               = bs_src->cyls;
+    bs_dest->heads              = bs_src->heads;
+    bs_dest->secs               = bs_src->secs;
+    bs_dest->translation        = bs_src->translation;
+
+    /* r/w error */
+    bs_dest->on_read_error      = bs_src->on_read_error;
+    bs_dest->on_write_error     = bs_src->on_write_error;
+
+    /* i/o status */
+    bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
+    bs_dest->iostatus           = bs_src->iostatus;
+
+    /* dirty bitmap */
+    bs_dest->dirty_count        = bs_src->dirty_count;
+    bs_dest->dirty_bitmap       = bs_src->dirty_bitmap;
+
+    /* job */
+    bs_dest->in_use             = bs_src->in_use;
+    bs_dest->job                = bs_src->job;
+
+    /* keep the same entry in bdrv_states */
+    pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
+            bs_src->device_name);
+    bs_dest->list = bs_src->list;
+}
+
 /*
- * Add new bs contents at the top of an image chain while the chain is
- * live, while keeping required fields on the top layer.
+ * Swap bs contents for two image chains while they are live,
+ * while keeping required fields on the BlockDriverState that is
+ * actually attached to a device.
  *
  * This will modify the BlockDriverState fields, and swap contents
- * between bs_new and bs_top. Both bs_new and bs_top are modified.
+ * between bs_new and bs_old. Both bs_new and bs_old are modified.
  *
  * bs_new is required to be anonymous.
  *
  * This function does not create any image files.
  */
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
 {
     BlockDriverState tmp;
 
-    /* bs_new must be anonymous */
+    /* bs_new must be anonymous and shouldn't have anything fancy enabled */
     assert(bs_new->device_name[0] == '\0');
+    assert(bs_new->dirty_bitmap == NULL);
+    assert(bs_new->job == NULL);
+    assert(bs_new->dev == NULL);
+    assert(bs_new->in_use == 0);
+    assert(bs_new->io_limits_enabled == false);
+    assert(bs_new->block_timer == NULL);
 
     tmp = *bs_new;
+    *bs_new = *bs_old;
+    *bs_old = tmp;
 
-    /* there are some fields that need to stay on the top layer: */
-    tmp.open_flags        = bs_top->open_flags;
-
-    /* dev info */
-    tmp.dev_ops           = bs_top->dev_ops;
-    tmp.dev_opaque        = bs_top->dev_opaque;
-    tmp.dev               = bs_top->dev;
-    tmp.buffer_alignment  = bs_top->buffer_alignment;
-    tmp.copy_on_read      = bs_top->copy_on_read;
-
-    tmp.enable_write_cache = bs_top->enable_write_cache;
-
-    /* i/o timing parameters */
-    tmp.slice_time        = bs_top->slice_time;
-    tmp.slice_start       = bs_top->slice_start;
-    tmp.slice_end         = bs_top->slice_end;
-    tmp.io_limits         = bs_top->io_limits;
-    tmp.io_base           = bs_top->io_base;
-    tmp.throttled_reqs    = bs_top->throttled_reqs;
-    tmp.block_timer       = bs_top->block_timer;
-    tmp.io_limits_enabled = bs_top->io_limits_enabled;
+    /* there are some fields that should not be swapped, move them back */
+    bdrv_move_feature_fields(&tmp, bs_old);
+    bdrv_move_feature_fields(bs_old, bs_new);
+    bdrv_move_feature_fields(bs_new, &tmp);
 
-    /* geometry */
-    tmp.cyls              = bs_top->cyls;
-    tmp.heads             = bs_top->heads;
-    tmp.secs              = bs_top->secs;
-    tmp.translation       = bs_top->translation;
+    /* bs_new shouldn't be in bdrv_states even after the swap!  */
+    assert(bs_new->device_name[0] == '\0');
 
-    /* r/w error */
-    tmp.on_read_error     = bs_top->on_read_error;
-    tmp.on_write_error    = bs_top->on_write_error;
+    /* Check a few fields that should remain attached to the device */
+    assert(bs_new->dev == NULL);
+    assert(bs_new->job == NULL);
+    assert(bs_new->in_use == 0);
+    assert(bs_new->io_limits_enabled == false);
+    assert(bs_new->block_timer == NULL);
 
-    /* i/o status */
-    tmp.iostatus_enabled  = bs_top->iostatus_enabled;
-    tmp.iostatus          = bs_top->iostatus;
+    bdrv_rebind(bs_new);
+    bdrv_rebind(bs_old);
+}
 
-    /* keep the same entry in bdrv_states */
-    pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
-    tmp.list = bs_top->list;
+/*
+ * Add new bs contents at the top of an image chain while the chain is
+ * live, while keeping required fields on the top layer.
+ *
+ * This will modify the BlockDriverState fields, and swap contents
+ * between bs_new and bs_top. Both bs_new and bs_top are modified.
+ *
+ * bs_new is required to be anonymous.
+ *
+ * This function does not create any image files.
+ */
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+{
+    bdrv_swap(bs_new, bs_top);
 
     /* The contents of 'tmp' will become bs_top, as we are
      * swapping bs_new and bs_top contents. */
-    tmp.backing_hd = bs_new;
-    pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
-    pstrcpy(tmp.backing_format, sizeof(tmp.backing_format),
-            bs_top->drv ? bs_top->drv->format_name : "");
-
-    /* swap contents of the fixed new bs and the current top */
-    *bs_new = *bs_top;
-    *bs_top = tmp;
-
-    /* device_name[] was carried over from the old bs_top.  bs_new
-     * shouldn't be in bdrv_states, so we need to make device_name[]
-     * reflect the anonymity of bs_new
-     */
-    bs_new->device_name[0] = '\0';
-
-    /* clear the copied fields in the new backing file */
-    bdrv_detach_dev(bs_new, bs_new->dev);
-
-    qemu_co_queue_init(&bs_new->throttled_reqs);
-    memset(&bs_new->io_base,   0, sizeof(bs_new->io_base));
-    memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
-    bdrv_iostatus_disable(bs_new);
-
-    /* we don't use bdrv_io_limits_disable() for this, because we don't want
-     * to affect or delete the block_timer, as it has been moved to bs_top */
-    bs_new->io_limits_enabled = false;
-    bs_new->block_timer       = NULL;
-    bs_new->slice_time        = 0;
-    bs_new->slice_start       = 0;
-    bs_new->slice_end         = 0;
-
-    bdrv_rebind(bs_new);
-    bdrv_rebind(bs_top);
+    bs_top->backing_hd = bs_new;
+    bs_top->open_flags &= ~BDRV_O_NO_BACKING;
+    pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
+            bs_new->filename);
+    pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
+            bs_new->drv ? bs_new->drv->format_name : "");
 }
 
 void bdrv_delete(BlockDriverState *bs)
@@ -1610,6 +1639,20 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
     return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
 }
 
+/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+                          uint8_t *buf, int nb_sectors)
+{
+    bool enabled;
+    int ret;
+
+    enabled = bs->io_limits_enabled;
+    bs->io_limits_enabled = false;
+    ret = bdrv_read(bs, 0, buf, 1);
+    bs->io_limits_enabled = enabled;
+    return ret;
+}
+
 #define BITS_PER_LONG  (sizeof(unsigned long) * 8)
 
 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
@@ -1828,8 +1871,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
     }
 
     skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
-    qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
-                           nb_sectors * BDRV_SECTOR_SIZE);
+    qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
+                        nb_sectors * BDRV_SECTOR_SIZE);
 
 err:
     qemu_vfree(bounce_buffer);
@@ -2107,11 +2150,10 @@ static int guess_disk_lchs(BlockDriverState *bs,
                            int *pcylinders, int *pheads, int *psectors)
 {
     uint8_t buf[BDRV_SECTOR_SIZE];
-    int ret, i, heads, sectors, cylinders;
+    int i, heads, sectors, cylinders;
     struct partition *p;
     uint32_t nr_sects;
     uint64_t nb_sectors;
-    bool enabled;
 
     bdrv_get_geometry(bs, &nb_sectors);
 
@@ -2120,12 +2162,9 @@ static int guess_disk_lchs(BlockDriverState *bs,
      * but also in async I/O mode. So the I/O throttling function has to
      * be disabled temporarily here, not permanently.
      */
-    enabled = bs->io_limits_enabled;
-    bs->io_limits_enabled = false;
-    ret = bdrv_read(bs, 0, buf, 1);
-    bs->io_limits_enabled = enabled;
-    if (ret < 0)
+    if (bdrv_read_unthrottled(bs, 0, buf, 1) < 0) {
         return -1;
+    }
     /* test msdos magic */
     if (buf[510] != 0x55 || buf[511] != 0xaa)
         return -1;
@@ -2308,46 +2347,40 @@ void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
     uint64_t nb_sectors, size;
     int i, first_match, match;
 
-    bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
-    if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
-        /* User defined disk */
-        *rate = FDRIVE_RATE_500K;
-    } else {
-        bdrv_get_geometry(bs, &nb_sectors);
-        match = -1;
-        first_match = -1;
-        for (i = 0; ; i++) {
-            parse = &fd_formats[i];
-            if (parse->drive == FDRIVE_DRV_NONE) {
+    bdrv_get_geometry(bs, &nb_sectors);
+    match = -1;
+    first_match = -1;
+    for (i = 0; ; i++) {
+        parse = &fd_formats[i];
+        if (parse->drive == FDRIVE_DRV_NONE) {
+            break;
+        }
+        if (drive_in == parse->drive ||
+            drive_in == FDRIVE_DRV_NONE) {
+            size = (parse->max_head + 1) * parse->max_track *
+                parse->last_sect;
+            if (nb_sectors == size) {
+                match = i;
                 break;
             }
-            if (drive_in == parse->drive ||
-                drive_in == FDRIVE_DRV_NONE) {
-                size = (parse->max_head + 1) * parse->max_track *
-                    parse->last_sect;
-                if (nb_sectors == size) {
-                    match = i;
-                    break;
-                }
-                if (first_match == -1) {
-                    first_match = i;
-                }
-            }
-        }
-        if (match == -1) {
             if (first_match == -1) {
-                match = 1;
-            } else {
-                match = first_match;
+                first_match = i;
             }
-            parse = &fd_formats[match];
         }
-        *nb_heads = parse->max_head + 1;
-        *max_track = parse->max_track;
-        *last_sect = parse->last_sect;
-        *drive = parse->drive;
-        *rate = parse->rate;
     }
+    if (match == -1) {
+        if (first_match == -1) {
+            match = 1;
+        } else {
+            match = first_match;
+        }
+        parse = &fd_formats[match];
+    }
+    *nb_heads = parse->max_head + 1;
+    *max_track = parse->max_track;
+    *last_sect = parse->last_sect;
+    *drive = parse->drive;
+    *rate = parse->rate;
 }
 
 int bdrv_get_translation_hint(BlockDriverState *bs)
@@ -3167,13 +3200,13 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
             // Add the first request to the merged one. If the requests are
             // overlapping, drop the last sectors of the first request.
             size = (reqs[i].sector - reqs[outidx].sector) << 9;
-            qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
+            qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
 
             // We should need to add any zeros between the two requests
             assert (reqs[i].sector <= oldreq_last);
 
             // Add the second request
-            qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
+            qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
 
             reqs[outidx].nb_sectors = qiov->size >> 9;
             reqs[outidx].qiov = qiov;
@@ -3448,7 +3481,7 @@ static void bdrv_aio_bh_cb(void *opaque)
     BlockDriverAIOCBSync *acb = opaque;
 
     if (!acb->is_write)
-        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
     qemu_vfree(acb->bounce);
     acb->common.cb(acb->common.opaque, acb->ret);
     qemu_bh_delete(acb->bh);
@@ -3474,7 +3507,7 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
     acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
 
     if (is_write) {
-        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
+        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
         acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
     } else {
         acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
diff --git a/block.h b/block.h
index d135652902..e34d94210d 100644
--- a/block.h
+++ b/block.h
@@ -122,6 +122,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
 int bdrv_create_file(const char* filename, QEMUOptionParameter *options);
 BlockDriverState *bdrv_new(const char *device_name);
 void bdrv_make_anon(BlockDriverState *bs);
+void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
 void bdrv_delete(BlockDriverState *bs);
 int bdrv_parse_cache_flags(const char *mode, int *flags);
@@ -141,6 +142,8 @@ bool bdrv_dev_is_tray_open(BlockDriverState *bs);
 bool bdrv_dev_is_medium_locked(BlockDriverState *bs);
 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
               uint8_t *buf, int nb_sectors);
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+                          uint8_t *buf, int nb_sectors);
 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
                const uint8_t *buf, int nb_sectors);
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
@@ -395,9 +398,7 @@ typedef enum {
     BLKDBG_L2_ALLOC_COW_READ,
     BLKDBG_L2_ALLOC_WRITE,
 
-    BLKDBG_READ,
     BLKDBG_READ_AIO,
-    BLKDBG_READ_BACKING,
     BLKDBG_READ_BACKING_AIO,
     BLKDBG_READ_COMPRESSED,
 
diff --git a/block/blkdebug.c b/block/blkdebug.c
index e56e37da51..59dcea0650 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -26,24 +26,10 @@
 #include "block_int.h"
 #include "module.h"
 
-typedef struct BlkdebugVars {
-    int state;
-
-    /* If inject_errno != 0, an error is injected for requests */
-    int inject_errno;
-
-    /* Decides if all future requests fail (false) or only the next one and
-     * after the next request inject_errno is reset to 0 (true) */
-    bool inject_once;
-
-    /* Decides if aio_readv/writev fails right away (true) or returns an error
-     * return value only in the callback (false) */
-    bool inject_immediately;
-} BlkdebugVars;
-
 typedef struct BDRVBlkdebugState {
-    BlkdebugVars vars;
-    QLIST_HEAD(list, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
+    int state;
+    QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
+    QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
 } BDRVBlkdebugState;
 
 typedef struct BlkdebugAIOCB {
@@ -73,12 +59,14 @@ typedef struct BlkdebugRule {
             int error;
             int immediately;
             int once;
+            int64_t sector;
         } inject;
         struct {
             int new_state;
         } set_state;
     } options;
     QLIST_ENTRY(BlkdebugRule) next;
+    QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
 } BlkdebugRule;
 
 static QemuOptsList inject_error_opts = {
@@ -98,6 +86,10 @@ static QemuOptsList inject_error_opts = {
             .type = QEMU_OPT_NUMBER,
         },
         {
+            .name = "sector",
+            .type = QEMU_OPT_NUMBER,
+        },
+        {
             .name = "once",
             .type = QEMU_OPT_BOOL,
         },
@@ -147,9 +139,7 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {
     [BLKDBG_L2_ALLOC_COW_READ]              = "l2_alloc.cow_read",
     [BLKDBG_L2_ALLOC_WRITE]                 = "l2_alloc.write",
 
-    [BLKDBG_READ]                           = "read",
     [BLKDBG_READ_AIO]                       = "read_aio",
-    [BLKDBG_READ_BACKING]                   = "read_backing",
     [BLKDBG_READ_BACKING_AIO]               = "read_backing_aio",
     [BLKDBG_READ_COMPRESSED]                = "read_compressed",
 
@@ -228,6 +218,7 @@ static int add_rule(QemuOpts *opts, void *opaque)
         rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
         rule->options.inject.immediately =
             qemu_opt_get_bool(opts, "immediately", 0);
+        rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
         break;
 
     case ACTION_SET_STATE:
@@ -302,7 +293,7 @@ static int blkdebug_open(BlockDriverState *bs, const char *filename, int flags)
     filename = c + 1;
 
     /* Set initial state */
-    s->vars.state = 1;
+    s->state = 1;
 
     /* Open the backing file */
     ret = bdrv_file_open(&bs->file, filename, flags);
@@ -328,18 +319,18 @@ static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb)
 }
 
 static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
-    BlockDriverCompletionFunc *cb, void *opaque)
+    BlockDriverCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
 {
     BDRVBlkdebugState *s = bs->opaque;
-    int error = s->vars.inject_errno;
+    int error = rule->options.inject.error;
     struct BlkdebugAIOCB *acb;
     QEMUBH *bh;
 
-    if (s->vars.inject_once) {
-        s->vars.inject_errno = 0;
+    if (rule->options.inject.once) {
+        QSIMPLEQ_INIT(&s->active_rules);
     }
 
-    if (s->vars.inject_immediately) {
+    if (rule->options.inject.immediately) {
         return NULL;
     }
 
@@ -358,14 +349,21 @@ static BlockDriverAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
     BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugRule *rule = NULL;
 
-    if (s->vars.inject_errno) {
-        return inject_error(bs, cb, opaque);
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        if (rule->options.inject.sector == -1 ||
+            (rule->options.inject.sector >= sector_num &&
+             rule->options.inject.sector < sector_num + nb_sectors)) {
+            break;
+        }
+    }
+
+    if (rule && rule->options.inject.error) {
+        return inject_error(bs, cb, opaque, rule);
     }
 
-    BlockDriverAIOCB *acb =
-        bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
-    return acb;
+    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }
 
 static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
@@ -373,14 +371,21 @@ static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
     BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugRule *rule = NULL;
+
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        if (rule->options.inject.sector == -1 ||
+            (rule->options.inject.sector >= sector_num &&
+             rule->options.inject.sector < sector_num + nb_sectors)) {
+            break;
+        }
+    }
 
-    if (s->vars.inject_errno) {
-        return inject_error(bs, cb, opaque);
+    if (rule && rule->options.inject.error) {
+        return inject_error(bs, cb, opaque, rule);
     }
 
-    BlockDriverAIOCB *acb =
-        bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
-    return acb;
+    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }
 
 static void blkdebug_close(BlockDriverState *bs)
@@ -397,44 +402,53 @@ static void blkdebug_close(BlockDriverState *bs)
     }
 }
 
-static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
-    BlkdebugVars *old_vars)
+static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
+    int old_state, bool injected)
 {
     BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugVars *vars = &s->vars;
 
     /* Only process rules for the current state */
-    if (rule->state && rule->state != old_vars->state) {
-        return;
+    if (rule->state && rule->state != old_state) {
+        return injected;
     }
 
     /* Take the action */
     switch (rule->action) {
     case ACTION_INJECT_ERROR:
-        vars->inject_errno       = rule->options.inject.error;
-        vars->inject_once        = rule->options.inject.once;
-        vars->inject_immediately = rule->options.inject.immediately;
+        if (!injected) {
+            QSIMPLEQ_INIT(&s->active_rules);
+            injected = true;
+        }
+        QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
         break;
 
     case ACTION_SET_STATE:
-        vars->state              = rule->options.set_state.new_state;
+        s->state = rule->options.set_state.new_state;
         break;
     }
+    return injected;
 }
 
 static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
 {
     BDRVBlkdebugState *s = bs->opaque;
     struct BlkdebugRule *rule;
-    BlkdebugVars old_vars = s->vars;
+    int old_state = s->state;
+    bool injected;
 
     assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);
 
+    injected = false;
     QLIST_FOREACH(rule, &s->rules[event], next) {
-        process_rule(bs, rule, &old_vars);
+        injected = process_rule(bs, rule, old_state, injected);
     }
 }
 
+static int64_t blkdebug_getlength(BlockDriverState *bs)
+{
+    return bdrv_getlength(bs->file);
+}
+
 static BlockDriver bdrv_blkdebug = {
     .format_name        = "blkdebug",
     .protocol_name      = "blkdebug",
@@ -443,6 +457,7 @@ static BlockDriver bdrv_blkdebug = {
 
     .bdrv_file_open     = blkdebug_open,
     .bdrv_close         = blkdebug_close,
+    .bdrv_getlength     = blkdebug_getlength,
 
     .bdrv_aio_readv     = blkdebug_aio_readv,
     .bdrv_aio_writev    = blkdebug_aio_writev,
diff --git a/block/curl.c b/block/curl.c
index bf3680ba57..e7c3634d35 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -140,8 +140,8 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
             continue;
 
         if ((s->buf_off >= acb->end)) {
-            qemu_iovec_from_buffer(acb->qiov, s->orig_buf + acb->start,
-                                   acb->end - acb->start);
+            qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
+                                acb->end - acb->start);
             acb->common.cb(acb->common.opaque, 0);
             qemu_aio_release(acb);
             s->acb[i] = NULL;
@@ -176,7 +176,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
         {
             char *buf = state->orig_buf + (start - state->buf_start);
 
-            qemu_iovec_from_buffer(acb->qiov, buf, len);
+            qemu_iovec_from_buf(acb->qiov, 0, buf, len);
             acb->common.cb(acb->common.opaque, 0);
 
             return FIND_RET_OK;
diff --git a/block/iscsi.c b/block/iscsi.c
index 22888a0845..993a86d829 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -35,6 +35,10 @@
 #include <iscsi/iscsi.h>
 #include <iscsi/scsi-lowlevel.h>
 
+#ifdef __linux__
+#include <scsi/sg.h>
+#include <hw/scsi-defs.h>
+#endif
 
 typedef struct IscsiLun {
     struct iscsi_context *iscsi;
@@ -56,6 +60,9 @@ typedef struct IscsiAIOCB {
     int canceled;
     size_t read_size;
     size_t read_offset;
+#ifdef __linux__
+    sg_io_hdr_t *ioh;
+#endif
 } IscsiAIOCB;
 
 struct IscsiTask {
@@ -240,8 +247,7 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
     /* this will allow us to get rid of 'buf' completely */
     size = nb_sectors * BDRV_SECTOR_SIZE;
     acb->buf = g_malloc(size);
-    qemu_iovec_to_buffer(acb->qiov, acb->buf);
-
+    qemu_iovec_to_buf(acb->qiov, 0, acb->buf, size);
 
     acb->task = malloc(sizeof(struct scsi_task));
     if (acb->task == NULL) {
@@ -515,6 +521,136 @@ iscsi_aio_discard(BlockDriverState *bs,
     return &acb->common;
 }
 
+#ifdef __linux__
+static void
+iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
+                     void *command_data, void *opaque)
+{
+    IscsiAIOCB *acb = opaque;
+
+    if (acb->canceled != 0) {
+        qemu_aio_release(acb);
+        scsi_free_scsi_task(acb->task);
+        acb->task = NULL;
+        return;
+    }
+
+    acb->status = 0;
+    if (status < 0) {
+        error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
+                     iscsi_get_error(iscsi));
+        acb->status = -EIO;
+    }
+
+    acb->ioh->driver_status = 0;
+    acb->ioh->host_status   = 0;
+    acb->ioh->resid         = 0;
+
+#define SG_ERR_DRIVER_SENSE    0x08
+
+    if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
+        int ss;
+
+        acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
+
+        acb->ioh->sb_len_wr = acb->task->datain.size - 2;
+        ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
+             acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
+        memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
+    }
+
+    iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
+    scsi_free_scsi_task(acb->task);
+    acb->task = NULL;
+}
+
+static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
+        unsigned long int req, void *buf,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+    struct iscsi_data data;
+    IscsiAIOCB *acb;
+
+    assert(req == SG_IO);
+
+    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+
+    acb->iscsilun = iscsilun;
+    acb->canceled    = 0;
+    acb->buf         = NULL;
+    acb->ioh         = buf;
+
+    acb->task = malloc(sizeof(struct scsi_task));
+    if (acb->task == NULL) {
+        error_report("iSCSI: Failed to allocate task for scsi command. %s",
+                     iscsi_get_error(iscsi));
+        qemu_aio_release(acb);
+        return NULL;
+    }
+    memset(acb->task, 0, sizeof(struct scsi_task));
+
+    switch (acb->ioh->dxfer_direction) {
+    case SG_DXFER_TO_DEV:
+        acb->task->xfer_dir = SCSI_XFER_WRITE;
+        break;
+    case SG_DXFER_FROM_DEV:
+        acb->task->xfer_dir = SCSI_XFER_READ;
+        break;
+    default:
+        acb->task->xfer_dir = SCSI_XFER_NONE;
+        break;
+    }
+
+    acb->task->cdb_size = acb->ioh->cmd_len;
+    memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
+    acb->task->expxferlen = acb->ioh->dxfer_len;
+
+    if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
+        data.data = acb->ioh->dxferp;
+        data.size = acb->ioh->dxfer_len;
+    }
+    if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
+                                 iscsi_aio_ioctl_cb,
+                                 (acb->task->xfer_dir == SCSI_XFER_WRITE) ?
+                                     &data : NULL,
+                                 acb) != 0) {
+        scsi_free_scsi_task(acb->task);
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
+    /* tell libiscsi to read straight into the buffer we got from ioctl */
+    if (acb->task->xfer_dir == SCSI_XFER_READ) {
+        scsi_task_add_data_in_buffer(acb->task,
+                                     acb->ioh->dxfer_len,
+                                     acb->ioh->dxferp);
+    }
+
+    iscsi_set_events(iscsilun);
+
+    return &acb->common;
+}
+
+static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+    IscsiLun *iscsilun = bs->opaque;
+
+    switch (req) {
+    case SG_GET_VERSION_NUM:
+        *(int *)buf = 30000;
+        break;
+    case SG_GET_SCSI_ID:
+        ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
+        break;
+    default:
+        return -1;
+    }
+    return 0;
+}
+#endif
+
 static int64_t
 iscsi_getlength(BlockDriverState *bs)
 {
@@ -885,6 +1021,16 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
     if (iscsi_url != NULL) {
         iscsi_destroy_url(iscsi_url);
     }
+
+    /* Medium changer or tape. We dont have any emulation for this so this must
+     * be sg ioctl compatible. We force it to be sg, otherwise qemu will try
+     * to read from the device to guess the image format.
+     */
+    if (iscsilun->type == TYPE_MEDIUM_CHANGER ||
+        iscsilun->type == TYPE_TAPE) {
+        bs->sg = 1;
+    }
+
     return 0;
 
 failed:
@@ -926,6 +1072,11 @@ static BlockDriver bdrv_iscsi = {
     .bdrv_aio_flush  = iscsi_aio_flush,
 
     .bdrv_aio_discard = iscsi_aio_discard,
+
+#ifdef __linux__
+    .bdrv_ioctl       = iscsi_ioctl,
+    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
+#endif
 };
 
 static void iscsi_block_init(void)
diff --git a/block/nbd.c b/block/nbd.c
index 1212614223..2bce47bf7a 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -196,7 +196,7 @@ static void nbd_restart_write(void *opaque)
 }
 
 static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
-                               struct iovec *iov, int offset)
+                               QEMUIOVector *qiov, int offset)
 {
     int rc, ret;
 
@@ -205,8 +205,9 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
     qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
                             nbd_have_request, s);
     rc = nbd_send_request(s->sock, request);
-    if (rc >= 0 && iov) {
-        ret = qemu_co_sendv(s->sock, iov, request->len, offset);
+    if (rc >= 0 && qiov) {
+        ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
+                            offset, request->len);
         if (ret != request->len) {
             return -EIO;
         }
@@ -220,7 +221,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
 
 static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request,
                                  struct nbd_reply *reply,
-                                 struct iovec *iov, int offset)
+                                 QEMUIOVector *qiov, int offset)
 {
     int ret;
 
@@ -231,8 +232,9 @@ static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request,
     if (reply->handle != request->handle) {
         reply->error = EIO;
     } else {
-        if (iov && reply->error == 0) {
-            ret = qemu_co_recvv(s->sock, iov, request->len, offset);
+        if (qiov && reply->error == 0) {
+            ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
+                                offset, request->len);
             if (ret != request->len) {
                 reply->error = EIO;
             }
@@ -349,7 +351,7 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
     if (ret < 0) {
         reply.error = -ret;
     } else {
-        nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset);
+        nbd_co_receive_reply(s, &request, &reply, qiov, offset);
     }
     nbd_coroutine_end(s, &request);
     return -reply.error;
@@ -374,7 +376,7 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
     request.len = nb_sectors * 512;
 
     nbd_coroutine_start(s, &request);
-    ret = nbd_co_send_request(s, &request, qiov->iov, offset);
+    ret = nbd_co_send_request(s, &request, qiov, offset);
     if (ret < 0) {
         reply.error = -ret;
     } else {
diff --git a/block/qcow.c b/block/qcow.c
index 35dff497ae..7b5ab87d2d 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -540,7 +540,7 @@ done:
     qemu_co_mutex_unlock(&s->lock);
 
     if (qiov->niov > 1) {
-        qemu_iovec_from_buffer(qiov, orig_buf, qiov->size);
+        qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
         qemu_vfree(orig_buf);
     }
 
@@ -569,7 +569,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
 
     if (qiov->niov > 1) {
         buf = orig_buf = qemu_blockalign(bs, qiov->size);
-        qemu_iovec_to_buffer(qiov, buf);
+        qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
     } else {
         orig_buf = NULL;
         buf = (uint8_t *)qiov->iov->iov_base;
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 66f391597c..5e3f9153fb 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -627,10 +627,11 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
     BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
     assert(size > 0 && size <= s->cluster_size);
     if (s->free_byte_offset == 0) {
-        s->free_byte_offset = qcow2_alloc_clusters(bs, s->cluster_size);
-        if (s->free_byte_offset < 0) {
-            return s->free_byte_offset;
+        offset = qcow2_alloc_clusters(bs, s->cluster_size);
+        if (offset < 0) {
+            return offset;
         }
+        s->free_byte_offset = offset;
     }
  redo:
     free_in_cluster = s->cluster_size -
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 4561a2abf9..4e7c93b8b3 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -405,7 +405,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 #ifdef DEBUG_ALLOC
     {
       BdrvCheckResult result = {0};
-      qcow2_check_refcounts(bs, &result);
+      qcow2_check_refcounts(bs, &result, 0);
     }
 #endif
     return 0;
@@ -522,7 +522,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 #ifdef DEBUG_ALLOC
     {
         BdrvCheckResult result = {0};
-        qcow2_check_refcounts(bs, &result);
+        qcow2_check_refcounts(bs, &result, 0);
     }
 #endif
     return 0;
@@ -582,7 +582,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
 #ifdef DEBUG_ALLOC
     {
         BdrvCheckResult result = {0};
-        qcow2_check_refcounts(bs, &result);
+        qcow2_check_refcounts(bs, &result, 0);
     }
 #endif
     return 0;
diff --git a/block/qcow2.c b/block/qcow2.c
index 2c1cd0a446..870148ddf8 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -415,7 +415,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
 #ifdef DEBUG_ALLOC
     {
         BdrvCheckResult result = {0};
-        qcow2_check_refcounts(bs, &result);
+        qcow2_check_refcounts(bs, &result, 0);
     }
 #endif
     return ret;
@@ -508,7 +508,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
     else
         n1 = bs->total_sectors - sector_num;
 
-    qemu_iovec_memset_skip(qiov, 0, 512 * (nb_sectors - n1), 512 * n1);
+    qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
 
     return n1;
 }
@@ -547,7 +547,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
 
         qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
             cur_nr_sectors * 512);
 
         switch (ret) {
@@ -569,7 +569,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                 }
             } else {
                 /* Note: in this case, no need to wait */
-                qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors);
+                qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
             }
             break;
 
@@ -578,7 +578,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                 ret = -EIO;
                 goto fail;
             }
-            qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors);
+            qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
             break;
 
         case QCOW2_CLUSTER_COMPRESSED:
@@ -588,7 +588,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                 goto fail;
             }
 
-            qemu_iovec_from_buffer(&hd_qiov,
+            qemu_iovec_from_buf(&hd_qiov, 0,
                 s->cluster_cache + index_in_cluster * 512,
                 512 * cur_nr_sectors);
             break;
@@ -628,11 +628,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             if (s->crypt_method) {
                 qcow2_encrypt_sectors(s, sector_num,  cluster_data,
                     cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
-                qemu_iovec_reset(&hd_qiov);
-                qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
-                    cur_nr_sectors * 512);
-                qemu_iovec_from_buffer(&hd_qiov, cluster_data,
-                    512 * cur_nr_sectors);
+                qemu_iovec_from_buf(qiov, bytes_done,
+                    cluster_data, 512 * cur_nr_sectors);
             }
             break;
 
@@ -721,7 +718,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         assert((cluster_offset & 511) == 0);
 
         qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
             cur_nr_sectors * 512);
 
         if (s->crypt_method) {
@@ -732,7 +729,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
             assert(hd_qiov.size <=
                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
-            qemu_iovec_to_buffer(&hd_qiov, cluster_data);
+            qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
 
             qcow2_encrypt_sectors(s, sector_num, cluster_data,
                 cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
diff --git a/block/qed.c b/block/qed.c
index ab5972466c..5f3eefa3af 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -736,7 +736,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
     /* Zero all sectors if reading beyond the end of the backing file */
     if (pos >= backing_length ||
         pos + qiov->size > backing_length) {
-        qemu_iovec_memset(qiov, 0, qiov->size);
+        qemu_iovec_memset(qiov, 0, 0, qiov->size);
     }
 
     /* Complete now if there are no backing file sectors to read */
@@ -748,7 +748,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
     /* If the read straddles the end of the backing file, shorten it */
     size = MIN((uint64_t)backing_length - pos, qiov->size);
 
-    BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING);
+    BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
     bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
                    qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
 }
@@ -1131,7 +1131,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 
     acb->cur_nclusters = qed_bytes_to_clusters(s,
             qed_offset_into_cluster(s, acb->cur_pos) + len);
-    qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
     if (acb->flags & QED_AIOCB_ZERO) {
         /* Skip ahead if the clusters are already zero */
@@ -1177,7 +1177,7 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
 
     /* Calculate the I/O vector */
     acb->cur_cluster = offset;
-    qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
     /* Do the actual write */
     qed_aio_write_main(acb, 0);
@@ -1247,11 +1247,11 @@ static void qed_aio_read_data(void *opaque, int ret,
         goto err;
     }
 
-    qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
     /* Handle zero cluster and backing file reads */
     if (ret == QED_CLUSTER_ZERO) {
-        qemu_iovec_memset(&acb->cur_qiov, 0, acb->cur_qiov.size);
+        qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
         qed_aio_next_io(acb, 0);
         return;
     } else if (ret != QED_CLUSTER_FOUND) {
diff --git a/block/raw.c b/block/raw.c
index 09d9b4878b..ff34ea41e7 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -12,12 +12,14 @@ static int raw_open(BlockDriverState *bs, int flags)
 static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
                                      int nb_sectors, QEMUIOVector *qiov)
 {
+    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
     return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
 }
 
 static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
                                       int nb_sectors, QEMUIOVector *qiov)
 {
+    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
     return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
 }
 
diff --git a/block/rbd.c b/block/rbd.c
index eebc334462..5a0f79fc8f 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -639,7 +639,7 @@ static void rbd_aio_bh_cb(void *opaque)
     RBDAIOCB *acb = opaque;
 
     if (acb->cmd == RBD_AIO_READ) {
-        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
     }
     qemu_vfree(acb->bounce);
     acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
@@ -693,7 +693,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
     acb->bh = NULL;
 
     if (cmd == RBD_AIO_WRITE) {
-        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
+        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
     }
 
     buf = acb->bounce;
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 8877f4528d..809df39d9e 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -259,8 +259,7 @@ typedef struct AIOReq {
     uint8_t flags;
     uint32_t id;
 
-    QLIST_ENTRY(AIOReq) outstanding_aio_siblings;
-    QLIST_ENTRY(AIOReq) aioreq_siblings;
+    QLIST_ENTRY(AIOReq) aio_siblings;
 } AIOReq;
 
 enum AIOCBState {
@@ -283,8 +282,7 @@ struct SheepdogAIOCB {
     void (*aio_done_func)(SheepdogAIOCB *);
 
     int canceled;
-
-    QLIST_HEAD(aioreq_head, AIOReq) aioreq_head;
+    int nr_pending;
 };
 
 typedef struct BDRVSheepdogState {
@@ -307,7 +305,8 @@ typedef struct BDRVSheepdogState {
     Coroutine *co_recv;
 
     uint32_t aioreq_seq_num;
-    QLIST_HEAD(outstanding_aio_head, AIOReq) outstanding_aio_head;
+    QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
+    QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head;
 } BDRVSheepdogState;
 
 static const char * sd_strerror(int err)
@@ -358,7 +357,7 @@ static const char * sd_strerror(int err)
  * Sheepdog I/O handling:
  *
  * 1. In sd_co_rw_vector, we send the I/O requests to the server and
- *    link the requests to the outstanding_list in the
+ *    link the requests to the inflight_list in the
  *    BDRVSheepdogState.  The function exits without waiting for
  *    receiving the response.
  *
@@ -386,21 +385,18 @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
     aio_req->flags = flags;
     aio_req->id = s->aioreq_seq_num++;
 
-    QLIST_INSERT_HEAD(&s->outstanding_aio_head, aio_req,
-                      outstanding_aio_siblings);
-    QLIST_INSERT_HEAD(&acb->aioreq_head, aio_req, aioreq_siblings);
-
+    acb->nr_pending++;
     return aio_req;
 }
 
-static inline int free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
+static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
 {
     SheepdogAIOCB *acb = aio_req->aiocb;
-    QLIST_REMOVE(aio_req, outstanding_aio_siblings);
-    QLIST_REMOVE(aio_req, aioreq_siblings);
+
+    QLIST_REMOVE(aio_req, aio_siblings);
     g_free(aio_req);
 
-    return !QLIST_EMPTY(&acb->aioreq_head);
+    acb->nr_pending--;
 }
 
 static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
@@ -446,7 +442,7 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
     acb->canceled = 0;
     acb->coroutine = qemu_coroutine_self();
     acb->ret = 0;
-    QLIST_INIT(&acb->aioreq_head);
+    acb->nr_pending = 0;
     return acb;
 }
 
@@ -541,11 +537,18 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
     return ret;
 }
 
+static coroutine_fn int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
+                                  unsigned int *wlen, unsigned int *rlen);
+
 static int do_req(int sockfd, SheepdogReq *hdr, void *data,
                   unsigned int *wlen, unsigned int *rlen)
 {
     int ret;
 
+    if (qemu_in_coroutine()) {
+        return do_co_req(sockfd, hdr, data, wlen, rlen);
+    }
+
     socket_set_block(sockfd);
     ret = send_req(sockfd, hdr, data, wlen);
     if (ret < 0) {
@@ -577,10 +580,21 @@ out:
     return ret;
 }
 
+static void restart_co_req(void *opaque)
+{
+    Coroutine *co = opaque;
+
+    qemu_coroutine_enter(co, NULL);
+}
+
 static coroutine_fn int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
                                   unsigned int *wlen, unsigned int *rlen)
 {
     int ret;
+    Coroutine *co;
+
+    co = qemu_coroutine_self();
+    qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, NULL, co);
 
     socket_set_block(sockfd);
     ret = send_co_req(sockfd, hdr, data, wlen);
@@ -588,6 +602,8 @@ static coroutine_fn int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
         goto out;
     }
 
+    qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, NULL, co);
+
     ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
     if (ret < sizeof(*hdr)) {
         error_report("failed to get a rsp, %s", strerror(errno));
@@ -609,6 +625,7 @@ static coroutine_fn int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
     }
     ret = 0;
 out:
+    qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL, NULL);
     socket_set_nonblock(sockfd);
     return ret;
 }
@@ -617,32 +634,41 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
                            struct iovec *iov, int niov, int create,
                            enum AIOCBState aiocb_type);
 
+
+static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid)
+{
+    AIOReq *aio_req;
+
+    QLIST_FOREACH(aio_req, &s->pending_aio_head, aio_siblings) {
+        if (aio_req->oid == oid) {
+            return aio_req;
+        }
+    }
+
+    return NULL;
+}
+
 /*
  * This function searchs pending requests to the object `oid', and
  * sends them.
  */
-static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid, uint32_t id)
+static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
 {
-    AIOReq *aio_req, *next;
+    AIOReq *aio_req;
     SheepdogAIOCB *acb;
     int ret;
 
-    QLIST_FOREACH_SAFE(aio_req, &s->outstanding_aio_head,
-                       outstanding_aio_siblings, next) {
-        if (id == aio_req->id) {
-            continue;
-        }
-        if (aio_req->oid != oid) {
-            continue;
-        }
-
+    while ((aio_req = find_pending_req(s, oid)) != NULL) {
         acb = aio_req->aiocb;
+        /* move aio_req from pending list to inflight one */
+        QLIST_REMOVE(aio_req, aio_siblings);
+        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
         ret = add_aio_request(s, aio_req, acb->qiov->iov,
                               acb->qiov->niov, 0, acb->aiocb_type);
         if (ret < 0) {
             error_report("add_aio_request is failed");
             free_aio_req(s, aio_req);
-            if (QLIST_EMPTY(&acb->aioreq_head)) {
+            if (!acb->nr_pending) {
                 sd_finish_aiocb(acb);
             }
         }
@@ -663,10 +689,9 @@ static void coroutine_fn aio_read_response(void *opaque)
     int ret;
     AIOReq *aio_req = NULL;
     SheepdogAIOCB *acb;
-    int rest;
     unsigned long idx;
 
-    if (QLIST_EMPTY(&s->outstanding_aio_head)) {
+    if (QLIST_EMPTY(&s->inflight_aio_head)) {
         goto out;
     }
 
@@ -677,8 +702,8 @@ static void coroutine_fn aio_read_response(void *opaque)
         goto out;
     }
 
-    /* find the right aio_req from the outstanding_aio list */
-    QLIST_FOREACH(aio_req, &s->outstanding_aio_head, outstanding_aio_siblings) {
+    /* find the right aio_req from the inflight aio list */
+    QLIST_FOREACH(aio_req, &s->inflight_aio_head, aio_siblings) {
         if (aio_req->id == rsp.id) {
             break;
         }
@@ -716,12 +741,12 @@ static void coroutine_fn aio_read_response(void *opaque)
              * create requests are not allowed, so we search the
              * pending requests here.
              */
-            send_pending_req(s, vid_to_data_oid(s->inode.vdi_id, idx), rsp.id);
+            send_pending_req(s, vid_to_data_oid(s->inode.vdi_id, idx));
         }
         break;
     case AIOCB_READ_UDATA:
-        ret = qemu_co_recvv(fd, acb->qiov->iov, rsp.data_length,
-                            aio_req->iov_offset);
+        ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov,
+                            aio_req->iov_offset, rsp.data_length);
         if (ret < 0) {
             error_report("failed to get the data, %s", strerror(errno));
             goto out;
@@ -734,8 +759,8 @@ static void coroutine_fn aio_read_response(void *opaque)
         error_report("%s", sd_strerror(rsp.result));
     }
 
-    rest = free_aio_req(s, aio_req);
-    if (!rest) {
+    free_aio_req(s, aio_req);
+    if (!acb->nr_pending) {
         /*
          * We've finished all requests which belong to the AIOCB, so
          * we can switch back to sd_co_readv/writev now.
@@ -768,7 +793,8 @@ static int aio_flush_request(void *opaque)
 {
     BDRVSheepdogState *s = opaque;
 
-    return !QLIST_EMPTY(&s->outstanding_aio_head);
+    return !QLIST_EMPTY(&s->inflight_aio_head) ||
+        !QLIST_EMPTY(&s->pending_aio_head);
 }
 
 static int set_nodelay(int fd)
@@ -993,7 +1019,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     }
 
     if (wlen) {
-        ret = qemu_co_sendv(s->fd, iov, wlen, aio_req->iov_offset);
+        ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen);
         if (ret < 0) {
             qemu_co_mutex_unlock(&s->lock);
             error_report("failed to send a data, %s", strerror(errno));
@@ -1085,7 +1111,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 
     strstart(filename, "sheepdog:", (const char **)&filename);
 
-    QLIST_INIT(&s->outstanding_aio_head);
+    QLIST_INIT(&s->inflight_aio_head);
+    QLIST_INIT(&s->pending_aio_head);
     s->fd = -1;
 
     memset(vdi, 0, sizeof(vdi));
@@ -1447,6 +1474,7 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
         iov.iov_len = sizeof(s->inode);
         aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
                                 data_len, offset, 0, 0, offset);
+        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
         ret = add_aio_request(s, aio_req, &iov, 1, 0, AIOCB_WRITE_UDATA);
         if (ret) {
             free_aio_req(s, aio_req);
@@ -1515,7 +1543,7 @@ out:
  * Send I/O requests to the server.
  *
  * This function sends requests to the server, links the requests to
- * the outstanding_list in BDRVSheepdogState, and exits without
+ * the inflight_list in BDRVSheepdogState, and exits without
  * waiting the response.  The responses are received in the
  * `aio_read_response' function which is called from the main loop as
  * a fd handler.
@@ -1547,6 +1575,12 @@ static int coroutine_fn sd_co_rw_vector(void *p)
         }
     }
 
+    /*
+     * Make sure we don't free the aiocb before we are done with all requests.
+     * This additional reference is dropped at the end of this function.
+     */
+    acb->nr_pending++;
+
     while (done != total) {
         uint8_t flags = 0;
         uint64_t old_oid = 0;
@@ -1571,22 +1605,18 @@ static int coroutine_fn sd_co_rw_vector(void *p)
         }
 
         if (create) {
-            dprintf("update ino (%" PRIu32") %" PRIu64 " %" PRIu64
-                    " %" PRIu64 "\n", inode->vdi_id, oid,
+            dprintf("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n",
+                    inode->vdi_id, oid,
                     vid_to_data_oid(inode->data_vdi_id[idx], idx), idx);
             oid = vid_to_data_oid(inode->vdi_id, idx);
-            dprintf("new oid %lx\n", oid);
+            dprintf("new oid %" PRIx64 "\n", oid);
         }
 
         aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
 
         if (create) {
             AIOReq *areq;
-            QLIST_FOREACH(areq, &s->outstanding_aio_head,
-                          outstanding_aio_siblings) {
-                if (areq == aio_req) {
-                    continue;
-                }
+            QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) {
                 if (areq->oid == oid) {
                     /*
                      * Sheepdog cannot handle simultaneous create
@@ -1596,11 +1626,14 @@ static int coroutine_fn sd_co_rw_vector(void *p)
                      */
                     aio_req->flags = 0;
                     aio_req->base_oid = 0;
+                    QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req,
+                                      aio_siblings);
                     goto done;
                 }
             }
         }
 
+        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
         ret = add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
                               create, acb->aiocb_type);
         if (ret < 0) {
@@ -1615,7 +1648,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
         done += len;
     }
 out:
-    if (QLIST_EMPTY(&acb->aioreq_head)) {
+    if (!--acb->nr_pending) {
         return acb->ret;
     }
     return 1;
@@ -1628,7 +1661,6 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
     int ret;
 
     if (bs->growable && sector_num + nb_sectors > bs->total_sectors) {
-        /* TODO: shouldn't block here */
         ret = sd_truncate(bs, (sector_num + nb_sectors) * SECTOR_SIZE);
         if (ret < 0) {
             return ret;
@@ -1696,7 +1728,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
     hdr.opcode = SD_OP_FLUSH_VDI;
     hdr.oid = vid_to_vdi_oid(inode->vdi_id);
 
-    ret = do_co_req(s->flush_fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);
+    ret = do_req(s->flush_fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);
     if (ret) {
         error_report("failed to send a request to the sheep");
         return ret;
@@ -1726,7 +1758,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     SheepdogInode *inode;
     unsigned int datalen;
 
-    dprintf("sn_info: name %s id_str %s s: name %s vm_state_size %d "
+    dprintf("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " "
             "is_snapshot %d\n", sn_info->name, sn_info->id_str,
             s->name, sn_info->vm_state_size, s->is_snapshot);
 
diff --git a/blockdev.c b/blockdev.c
index 9e0a72a269..a85a429aef 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -609,6 +609,10 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
 
     bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
 
+    if (ro && copy_on_read) {
+        error_report("warning: disabling copy_on_read on readonly drive");
+    }
+
     ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
     if (ret < 0) {
         error_report("could not open disk image %s: %s",
diff --git a/configure b/configure
index 9f071b7876..0a3896e757 100755
--- a/configure
+++ b/configure
@@ -1043,8 +1043,6 @@ echo "  --disable-vnc-jpeg       disable JPEG lossy compression for VNC server"
 echo "  --enable-vnc-jpeg        enable JPEG lossy compression for VNC server"
 echo "  --disable-vnc-png        disable PNG compression for VNC server (default)"
 echo "  --enable-vnc-png         enable PNG compression for VNC server"
-echo "  --disable-vnc-thread     disable threaded VNC server"
-echo "  --enable-vnc-thread      enable threaded VNC server"
 echo "  --disable-curses         disable curses output"
 echo "  --enable-curses          enable curses output"
 echo "  --disable-curl           disable curl connectivity"
@@ -1141,10 +1139,26 @@ else
     exit 1
 fi
 
+# Consult white-list to determine whether to enable werror
+# by default.  Only enable by default for git builds
+z_version=`cut -f3 -d. $source_path/VERSION`
+
+if test -z "$werror" ; then
+    if test "$z_version" = "50" -a \
+        "$linux" = "yes" ; then
+        werror="yes"
+    else
+        werror="no"
+    fi
+fi
+
 gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
 gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
 gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
 gcc_flags="-fstack-protector-all -Wendif-labels $gcc_flags"
+if test "$werror" = "yes" ; then
+    gcc_flags="-Werror $gcc_flags"
+fi
 cat > $TMPC << EOF
 int main(void) { return 0; }
 EOF
@@ -2575,7 +2589,7 @@ if test "$libiscsi" != "no" ; then
 #include <iscsi/iscsi.h>
 int main(void) { iscsi_unmap_sync(NULL,0,0,0,NULL,0); return 0; }
 EOF
-  if compile_prog "-Werror" "-liscsi" ; then
+  if compile_prog "" "-liscsi" ; then
     libiscsi="yes"
     LIBS="$LIBS -liscsi"
   else
@@ -2879,19 +2893,6 @@ if test "$debug" = "no" ; then
   CFLAGS="-O2 -D_FORTIFY_SOURCE=2 $CFLAGS"
 fi
 
-# Consult white-list to determine whether to enable werror
-# by default.  Only enable by default for git builds
-z_version=`cut -f3 -d. $source_path/VERSION`
-
-if test -z "$werror" ; then
-    if test "$z_version" = "50" -a \
-        "$linux" = "yes" ; then
-        werror="yes"
-    else
-        werror="no"
-    fi
-fi
-
 # Disable zero malloc errors for official releases unless explicitly told to
 # enable/disable
 if test -z "$zero_malloc" ; then
@@ -2902,10 +2903,6 @@ if test -z "$zero_malloc" ; then
     fi
 fi
 
-if test "$werror" = "yes" ; then
-    QEMU_CFLAGS="-Werror $QEMU_CFLAGS"
-fi
-
 if test "$solaris" = "no" ; then
     if $ld --version 2>/dev/null | grep "GNU ld" >/dev/null 2>/dev/null ; then
         LDFLAGS="-Wl,--warn-common $LDFLAGS"
@@ -3573,7 +3570,7 @@ case "$target_arch2" in
     bflt="yes"
     target_nptl="yes"
     gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
-    target_phys_bits=32
+    target_phys_bits=64
     target_llong_alignment=4
     target_libs_softmmu="$fdt_libs"
   ;;
diff --git a/console.c b/console.c
index 6a463f5918..4525cc70b8 100644
--- a/console.c
+++ b/console.c
@@ -28,6 +28,7 @@
 //#define DEBUG_CONSOLE
 #define DEFAULT_BACKSCROLL 512
 #define MAX_CONSOLES 12
+#define CONSOLE_CURSOR_PERIOD 500
 
 #define QEMU_RGBA(r, g, b, a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
 #define QEMU_RGB(r, g, b) QEMU_RGBA(r, g, b, 0xff)
@@ -139,6 +140,8 @@ struct TextConsole {
     TextCell *cells;
     int text_x[2], text_y[2], cursor_invalidate;
     int echo;
+    bool cursor_visible_phase;
+    QEMUTimer *cursor_timer;
 
     int update_x0;
     int update_y0;
@@ -615,7 +618,7 @@ static void console_show_cursor(TextConsole *s, int show)
             y += s->total_height;
         if (y < s->height) {
             c = &s->cells[y1 * s->width + x];
-            if (show) {
+            if (show && s->cursor_visible_phase) {
                 TextAttributes t_attrib = s->t_attrib_default;
                 t_attrib.invers = !(t_attrib.invers); /* invert fg and bg */
                 vga_putcharxy(s->ds, x, y, c->ch, &t_attrib);
@@ -1083,6 +1086,10 @@ void console_select(unsigned int index)
     s = consoles[index];
     if (s) {
         DisplayState *ds = s->ds;
+
+        if (active_console->cursor_timer) {
+            qemu_del_timer(active_console->cursor_timer);
+        }
         active_console = s;
         if (ds_get_bits_per_pixel(s->ds)) {
             ds->surface = qemu_resize_displaysurface(ds, s->g_width, s->g_height);
@@ -1090,6 +1097,10 @@ void console_select(unsigned int index)
             s->ds->surface->width = s->width;
             s->ds->surface->height = s->height;
         }
+        if (s->cursor_timer) {
+            qemu_mod_timer(s->cursor_timer,
+                   qemu_get_clock_ms(rt_clock) + CONSOLE_CURSOR_PERIOD / 2);
+        }
         dpy_resize(s->ds);
         vga_hw_invalidate();
     }
@@ -1454,6 +1465,16 @@ static void text_console_set_echo(CharDriverState *chr, bool echo)
     s->echo = echo;
 }
 
+static void text_console_update_cursor(void *opaque)
+{
+    TextConsole *s = opaque;
+
+    s->cursor_visible_phase = !s->cursor_visible_phase;
+    vga_hw_invalidate();
+    qemu_mod_timer(s->cursor_timer,
+                   qemu_get_clock_ms(rt_clock) + CONSOLE_CURSOR_PERIOD / 2);
+}
+
 static void text_console_do_init(CharDriverState *chr, DisplayState *ds)
 {
     TextConsole *s;
@@ -1482,6 +1503,9 @@ static void text_console_do_init(CharDriverState *chr, DisplayState *ds)
         s->g_height = ds_get_height(s->ds);
     }
 
+    s->cursor_timer =
+        qemu_new_timer_ms(rt_clock, text_console_update_cursor, s);
+
     s->hw_invalidate = text_console_invalidate;
     s->hw_text_update = text_console_update;
     s->hw = s;
diff --git a/cpu-all.h b/cpu-all.h
index 9dc249a165..82ba1d7cd5 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -486,6 +486,7 @@ typedef struct RAMBlock {
 typedef struct RAMList {
     uint8_t *phys_dirty;
     QLIST_HEAD(, RAMBlock) blocks;
+    uint64_t dirty_pages;
 } RAMList;
 extern RAMList ram_list;
 
diff --git a/cpu-common.h b/cpu-common.h
index 1fe3280701..85548de5ea 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -3,9 +3,7 @@
 
 /* CPU interfaces that are target independent.  */
 
-#ifdef TARGET_PHYS_ADDR_BITS
 #include "targphys.h"
-#endif
 
 #ifndef NEED_CPU_H
 #include "poison.h"
@@ -71,9 +69,7 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
 void cpu_unregister_map_client(void *cookie);
 
-#ifndef CONFIG_USER_ONLY
 bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr);
-#endif
 
 /* Coalesced MMIO regions are areas where write operations can be reordered.
  * This usually implies that write operations are side-effect free.  This allows
diff --git a/cutils.c b/cutils.c
index af308cd7b9..e2bc1b89df 100644
--- a/cutils.c
+++ b/cutils.c
@@ -26,6 +26,7 @@
 #include <math.h>
 
 #include "qemu_socket.h"
+#include "iov.h"
 
 void pstrcpy(char *buf, int buf_size, const char *str)
 {
@@ -171,48 +172,34 @@ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
 }
 
 /*
- * Copies iovecs from src to the end of dst. It starts copying after skipping
- * the given number of bytes in src and copies until src is completely copied
- * or the total size of the copied iovec reaches size.The size of the last
- * copied iovec is changed in order to fit the specified total size if it isn't
- * a perfect fit already.
+ * Concatenates (partial) iovecs from src to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src if it comes first.  This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
  */
-void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip,
-    size_t size)
+void qemu_iovec_concat(QEMUIOVector *dst,
+                       QEMUIOVector *src, size_t soffset, size_t sbytes)
 {
     int i;
     size_t done;
-    void *iov_base;
-    uint64_t iov_len;
-
+    struct iovec *siov = src->iov;
     assert(dst->nalloc != -1);
-
-    done = 0;
-    for (i = 0; (i < src->niov) && (done != size); i++) {
-        if (skip >= src->iov[i].iov_len) {
-            /* Skip the whole iov */
-            skip -= src->iov[i].iov_len;
-            continue;
-        } else {
-            /* Skip only part (or nothing) of the iov */
-            iov_base = (uint8_t*) src->iov[i].iov_base + skip;
-            iov_len = src->iov[i].iov_len - skip;
-            skip = 0;
-        }
-
-        if (done + iov_len > size) {
-            qemu_iovec_add(dst, iov_base, size - done);
-            break;
+    assert(src->size >= soffset);
+    for (i = 0, done = 0; done < sbytes && i < src->niov; i++) {
+        if (soffset < siov[i].iov_len) {
+            size_t len = MIN(siov[i].iov_len - soffset, sbytes - done);
+            qemu_iovec_add(dst, siov[i].iov_base + soffset, len);
+            done += len;
+            soffset = 0;
         } else {
-            qemu_iovec_add(dst, iov_base, iov_len);
+            soffset -= siov[i].iov_len;
         }
-        done += iov_len;
     }
-}
-
-void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size)
-{
-    qemu_iovec_copy(dst, src, 0, size);
+    /* return done; */
 }
 
 void qemu_iovec_destroy(QEMUIOVector *qiov)
@@ -233,74 +220,22 @@ void qemu_iovec_reset(QEMUIOVector *qiov)
     qiov->size = 0;
 }
 
-void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf)
+size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
+                         void *buf, size_t bytes)
 {
-    uint8_t *p = (uint8_t *)buf;
-    int i;
-
-    for (i = 0; i < qiov->niov; ++i) {
-        memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
-        p += qiov->iov[i].iov_len;
-    }
+    return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
 }
 
-void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count)
+size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
+                           const void *buf, size_t bytes)
 {
-    const uint8_t *p = (const uint8_t *)buf;
-    size_t copy;
-    int i;
-
-    for (i = 0; i < qiov->niov && count; ++i) {
-        copy = count;
-        if (copy > qiov->iov[i].iov_len)
-            copy = qiov->iov[i].iov_len;
-        memcpy(qiov->iov[i].iov_base, p, copy);
-        p     += copy;
-        count -= copy;
-    }
+    return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
 }
 
-void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count)
+size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
+                         int fillc, size_t bytes)
 {
-    size_t n;
-    int i;
-
-    for (i = 0; i < qiov->niov && count; ++i) {
-        n = MIN(count, qiov->iov[i].iov_len);
-        memset(qiov->iov[i].iov_base, c, n);
-        count -= n;
-    }
-}
-
-void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count,
-                            size_t skip)
-{
-    int i;
-    size_t done;
-    void *iov_base;
-    uint64_t iov_len;
-
-    done = 0;
-    for (i = 0; (i < qiov->niov) && (done != count); i++) {
-        if (skip >= qiov->iov[i].iov_len) {
-            /* Skip the whole iov */
-            skip -= qiov->iov[i].iov_len;
-            continue;
-        } else {
-            /* Skip only part (or nothing) of the iov */
-            iov_base = (uint8_t*) qiov->iov[i].iov_base + skip;
-            iov_len = qiov->iov[i].iov_len - skip;
-            skip = 0;
-        }
-
-        if (done + iov_len > count) {
-            memset(iov_base, c, count - done);
-            break;
-        } else {
-            memset(iov_base, c, iov_len);
-        }
-        done += iov_len;
-    }
+    return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
 }
 
 /*
@@ -440,112 +375,3 @@ int qemu_parse_fd(const char *param)
     }
     return fd;
 }
-
-/*
- * Send/recv data with iovec buffers
- *
- * This function send/recv data from/to the iovec buffer directly.
- * The first `offset' bytes in the iovec buffer are skipped and next
- * `len' bytes are used.
- *
- * For example,
- *
- *   do_sendv_recvv(sockfd, iov, len, offset, 1);
- *
- * is equal to
- *
- *   char *buf = malloc(size);
- *   iov_to_buf(iov, iovcnt, buf, offset, size);
- *   send(sockfd, buf, size, 0);
- *   free(buf);
- */
-static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset,
-                          int do_sendv)
-{
-    int ret, diff, iovlen;
-    struct iovec *last_iov;
-
-    /* last_iov is inclusive, so count from one.  */
-    iovlen = 1;
-    last_iov = iov;
-    len += offset;
-
-    while (last_iov->iov_len < len) {
-        len -= last_iov->iov_len;
-
-        last_iov++;
-        iovlen++;
-    }
-
-    diff = last_iov->iov_len - len;
-    last_iov->iov_len -= diff;
-
-    while (iov->iov_len <= offset) {
-        offset -= iov->iov_len;
-
-        iov++;
-        iovlen--;
-    }
-
-    iov->iov_base = (char *) iov->iov_base + offset;
-    iov->iov_len -= offset;
-
-    {
-#if defined CONFIG_IOVEC && defined CONFIG_POSIX
-        struct msghdr msg;
-        memset(&msg, 0, sizeof(msg));
-        msg.msg_iov = iov;
-        msg.msg_iovlen = iovlen;
-
-        do {
-            if (do_sendv) {
-                ret = sendmsg(sockfd, &msg, 0);
-            } else {
-                ret = recvmsg(sockfd, &msg, 0);
-            }
-        } while (ret == -1 && errno == EINTR);
-#else
-        struct iovec *p = iov;
-        ret = 0;
-        while (iovlen > 0) {
-            int rc;
-            if (do_sendv) {
-                rc = send(sockfd, p->iov_base, p->iov_len, 0);
-            } else {
-                rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0);
-            }
-            if (rc == -1) {
-                if (errno == EINTR) {
-                    continue;
-                }
-                if (ret == 0) {
-                    ret = -1;
-                }
-                break;
-            }
-            if (rc == 0) {
-                break;
-            }
-            ret += rc;
-            iovlen--, p++;
-        }
-#endif
-    }
-
-    /* Undo the changes above */
-    iov->iov_base = (char *) iov->iov_base - offset;
-    iov->iov_len += offset;
-    last_iov->iov_len += diff;
-    return ret;
-}
-
-int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset)
-{
-    return do_sendv_recvv(sockfd, iov, len, iov_offset, 0);
-}
-
-int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset)
-{
-    return do_sendv_recvv(sockfd, iov, len, iov_offset, 1);
-}
-
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 2c78175ae7..fee8cde88b 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -25,3 +25,4 @@ CONFIG_HPET=y
 CONFIG_APPLESMC=y
 CONFIG_I8259=y
 CONFIG_PFLASH_CFI01=y
+CONFIG_ESP=y
diff --git a/disas.c b/disas.c
index 93d8d30d1b..7b2acc9943 100644
--- a/disas.c
+++ b/disas.c
@@ -64,6 +64,22 @@ generic_print_address (bfd_vma addr, struct disassemble_info *info)
     (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr);
 }
 
+/* Print address in hex, truncated to the width of a target virtual address. */
+static void
+generic_print_target_address(bfd_vma addr, struct disassemble_info *info)
+{
+    uint64_t mask = ~0ULL >> (64 - TARGET_VIRT_ADDR_SPACE_BITS);
+    generic_print_address(addr & mask, info);
+}
+
+/* Print address in hex, truncated to the width of a host virtual address. */
+static void
+generic_print_host_address(bfd_vma addr, struct disassemble_info *info)
+{
+    uint64_t mask = ~0ULL >> (64 - (sizeof(void *) * 8));
+    generic_print_address(addr & mask, info);
+}
+
 /* Just return the given address.  */
 
 int
@@ -154,6 +170,7 @@ void target_disas(FILE *out, target_ulong code, target_ulong size, int flags)
     disasm_info.read_memory_func = target_read_memory;
     disasm_info.buffer_vma = code;
     disasm_info.buffer_length = size;
+    disasm_info.print_address_func = generic_print_target_address;
 
 #ifdef TARGET_WORDS_BIGENDIAN
     disasm_info.endian = BFD_ENDIAN_BIG;
@@ -274,6 +291,7 @@ void disas(FILE *out, void *code, unsigned long size)
     int (*print_insn)(bfd_vma pc, disassemble_info *info);
 
     INIT_DISASSEMBLE_INFO(disasm_info, out, fprintf);
+    disasm_info.print_address_func = generic_print_host_address;
 
     disasm_info.buffer = code;
     disasm_info.buffer_vma = (uintptr_t)code;
@@ -386,6 +404,7 @@ void monitor_disas(Monitor *mon, CPUArchState *env,
     monitor_disas_env = env;
     monitor_disas_is_physical = is_physical;
     disasm_info.read_memory_func = monitor_read_memory;
+    disasm_info.print_address_func = generic_print_target_address;
 
     disasm_info.buffer_vma = pc;
 
diff --git a/docs/usb-storage.txt b/docs/usb-storage.txt
new file mode 100644
index 0000000000..ff9755920d
--- /dev/null
+++ b/docs/usb-storage.txt
@@ -0,0 +1,38 @@
+
+qemu usb storage emulation
+--------------------------
+
+Qemu has two emulations for usb storage devices.
+
+Number one emulates the classic bulk-only transport protocol which is
+used by 99% of the usb sticks on the marked today and is called
+"usb-storage".  Usage (hooking up to xhci, other host controllers work
+too):
+
+  qemu ${other_vm_args}                                \
+       -drive if=none,id=stick,file=/path/to/file.img  \
+       -device nec-usb-xhci,id=xhci                    \
+       -device usb-storage,bus=xhci.0,drive=stick
+
+
+Number two is the newer usb attached scsi transport.  This one doesn't
+automagically create a scsi disk, so you have to explicitly attach one
+manually.  Multiple logical units are supported.  Here is an example
+with tree logical units:
+
+  qemu ${other_vm_args}                                                \
+       -drive if=none,id=uas-disk1,file=/path/to/file1.img             \
+       -drive if=none,id=uas-disk2,file=/path/to/file2.img             \
+       -drive if=none,id=uas-cdrom,media=cdrom,file=/path/to/image.iso \
+       -device nec-usb-xhci,id=xhci                                    \
+       -device usb-uas,id=uas,bus=xhci.0                               \
+       -device scsi-hd,bus=uas.0,scsi-id=0,lun=0,drive=uas-disk1       \
+       -device scsi-hd,bus=uas.0,scsi-id=0,lun=1,drive=uas-disk2       \
+       -device scsi-cd,bus=uas.0,scsi-id=0,lun=5,drive=uas-cdrom
+
+
+enjoy,
+  Gerd
+
+--
+Gerd Hoffmann <kraxel@redhat.com>
diff --git a/exec-obsolete.h b/exec-obsolete.h
index 792c831718..c09925610d 100644
--- a/exec-obsolete.h
+++ b/exec-obsolete.h
@@ -45,15 +45,15 @@ int cpu_physical_memory_set_dirty_tracking(int enable);
 #define CODE_DIRTY_FLAG      0x02
 #define MIGRATION_DIRTY_FLAG 0x08
 
-/* read dirty bit (return 0 or 1) */
-static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
 {
-    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] == 0xff;
+    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS];
 }
 
-static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
+/* read dirty bit (return 0 or 1) */
+static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
 {
-    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS];
+    return cpu_physical_memory_get_dirty_flags(addr) == 0xff;
 }
 
 static inline int cpu_physical_memory_get_dirty(ram_addr_t start,
@@ -61,41 +61,55 @@ static inline int cpu_physical_memory_get_dirty(ram_addr_t start,
                                                 int dirty_flags)
 {
     int ret = 0;
-    uint8_t *p;
     ram_addr_t addr, end;
 
     end = TARGET_PAGE_ALIGN(start + length);
     start &= TARGET_PAGE_MASK;
-    p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS);
     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
-        ret |= *p++ & dirty_flags;
+        ret |= cpu_physical_memory_get_dirty_flags(addr) & dirty_flags;
     }
     return ret;
 }
 
+static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
+                                                      int dirty_flags)
+{
+    if ((dirty_flags & MIGRATION_DIRTY_FLAG) &&
+        !cpu_physical_memory_get_dirty(addr, TARGET_PAGE_SIZE,
+                                       MIGRATION_DIRTY_FLAG)) {
+        ram_list.dirty_pages++;
+    }
+    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags;
+}
+
 static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
 {
-    ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] = 0xff;
+    cpu_physical_memory_set_dirty_flags(addr, 0xff);
 }
 
-static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
-                                                      int dirty_flags)
+static inline int cpu_physical_memory_clear_dirty_flags(ram_addr_t addr,
+                                                        int dirty_flags)
 {
-    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags;
+    int mask = ~dirty_flags;
+
+    if ((dirty_flags & MIGRATION_DIRTY_FLAG) &&
+        cpu_physical_memory_get_dirty(addr, TARGET_PAGE_SIZE,
+                                      MIGRATION_DIRTY_FLAG)) {
+        ram_list.dirty_pages--;
+    }
+    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] &= mask;
 }
 
 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
                                                        ram_addr_t length,
                                                        int dirty_flags)
 {
-    uint8_t *p;
     ram_addr_t addr, end;
 
     end = TARGET_PAGE_ALIGN(start + length);
     start &= TARGET_PAGE_MASK;
-    p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS);
     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
-        *p++ |= dirty_flags;
+        cpu_physical_memory_set_dirty_flags(addr, dirty_flags);
     }
 }
 
@@ -103,16 +117,12 @@ static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
                                                         ram_addr_t length,
                                                         int dirty_flags)
 {
-    int mask;
-    uint8_t *p;
     ram_addr_t addr, end;
 
     end = TARGET_PAGE_ALIGN(start + length);
     start &= TARGET_PAGE_MASK;
-    mask = ~dirty_flags;
-    p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS);
     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
-        *p++ &= mask;
+        cpu_physical_memory_clear_dirty_flags(addr, dirty_flags);
     }
 }
 
diff --git a/exec.c b/exec.c
index 29b5078bbd..feb4795525 100644
--- a/exec.c
+++ b/exec.c
@@ -1824,11 +1824,29 @@ void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
             TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
 }
 
+static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
+                                      uintptr_t length)
+{
+    uintptr_t start1;
+
+    /* we modify the TLB cache so that the dirty bit will be set again
+       when accessing the range */
+    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
+    /* Check that we don't span multiple blocks - this breaks the
+       address comparisons below.  */
+    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
+            != (end - 1) - start) {
+        abort();
+    }
+    cpu_tlb_reset_dirty_all(start1, length);
+
+}
+
 /* Note: start and end must be within the same ram block.  */
 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
                                      int dirty_flags)
 {
-    uintptr_t length, start1;
+    uintptr_t length;
 
     start &= TARGET_PAGE_MASK;
     end = TARGET_PAGE_ALIGN(end);
@@ -1838,16 +1856,9 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
         return;
     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
 
-    /* we modify the TLB cache so that the dirty bit will be set again
-       when accessing the range */
-    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
-    /* Check that we don't span multiple blocks - this breaks the
-       address comparisons below.  */
-    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
-            != (end - 1) - start) {
-        abort();
+    if (tcg_enabled()) {
+        tlb_reset_dirty_range_all(start, end, length);
     }
-    cpu_tlb_reset_dirty_all(start1, length);
 }
 
 int cpu_physical_memory_set_dirty_tracking(int enable)
@@ -2525,26 +2536,14 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
             exit(1);
 #endif
         } else {
-#if defined(TARGET_S390X) && defined(CONFIG_KVM)
-            /* S390 KVM requires the topmost vma of the RAM to be smaller than
-               an system defined value, which is at least 256GB. Larger systems
-               have larger values. We put the guest between the end of data
-               segment (system break) and this value. We use 32GB as a base to
-               have enough room for the system break to grow. */
-            new_block->host = mmap((void*)0x800000000, size,
-                                   PROT_EXEC|PROT_READ|PROT_WRITE,
-                                   MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
-            if (new_block->host == MAP_FAILED) {
-                fprintf(stderr, "Allocating RAM failed\n");
-                abort();
-            }
-#else
             if (xen_enabled()) {
                 xen_ram_alloc(new_block->offset, size, mr);
+            } else if (kvm_enabled()) {
+                /* some s390/kvm configurations have special constraints */
+                new_block->host = kvm_vmalloc(size);
             } else {
                 new_block->host = qemu_vmalloc(size);
             }
-#endif
             qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
         }
     }
@@ -2554,8 +2553,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
 
     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
                                        last_ram_offset() >> TARGET_PAGE_BITS);
-    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
-           0xff, size >> TARGET_PAGE_BITS);
+    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
 
     if (kvm_enabled())
         kvm_setup_guest_memory(new_block->host, size);
diff --git a/hmp-commands.hx b/hmp-commands.hx
index f5d9d91de8..eea8b32894 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1236,8 +1236,7 @@ ETEXI
         .args_type  = "fdname:s",
         .params     = "getfd name",
         .help       = "receive a file descriptor via SCM rights and assign it a name",
-        .user_print = monitor_user_noop,
-        .mhandler.cmd_new = do_getfd,
+        .mhandler.cmd = hmp_getfd,
     },
 
 STEXI
@@ -1253,8 +1252,7 @@ ETEXI
         .args_type  = "fdname:s",
         .params     = "closefd name",
         .help       = "close a file descriptor previously passed via SCM rights",
-        .user_print = monitor_user_noop,
-        .mhandler.cmd_new = do_closefd,
+        .mhandler.cmd = hmp_closefd,
     },
 
 STEXI
diff --git a/hmp.c b/hmp.c
index b9cec1dafb..6b72a64d99 100644
--- a/hmp.c
+++ b/hmp.c
@@ -145,6 +145,8 @@ void hmp_info_migrate(Monitor *mon)
                        info->ram->remaining >> 10);
         monitor_printf(mon, "total ram: %" PRIu64 " kbytes\n",
                        info->ram->total >> 10);
+        monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+                       info->ram->total_time);
     }
 
     if (info->has_disk) {
@@ -1000,3 +1002,21 @@ void hmp_netdev_del(Monitor *mon, const QDict *qdict)
     qmp_netdev_del(id, &err);
     hmp_handle_error(mon, &err);
 }
+
+void hmp_getfd(Monitor *mon, const QDict *qdict)
+{
+    const char *fdname = qdict_get_str(qdict, "fdname");
+    Error *errp = NULL;
+
+    qmp_getfd(fdname, &errp);
+    hmp_handle_error(mon, &errp);
+}
+
+void hmp_closefd(Monitor *mon, const QDict *qdict)
+{
+    const char *fdname = qdict_get_str(qdict, "fdname");
+    Error *errp = NULL;
+
+    qmp_closefd(fdname, &errp);
+    hmp_handle_error(mon, &errp);
+}
diff --git a/hmp.h b/hmp.h
index 79d138d3ee..8d2b0d76da 100644
--- a/hmp.h
+++ b/hmp.h
@@ -64,5 +64,7 @@ void hmp_device_del(Monitor *mon, const QDict *qdict);
 void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
 void hmp_netdev_add(Monitor *mon, const QDict *qdict);
 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
+void hmp_getfd(Monitor *mon, const QDict *qdict);
+void hmp_closefd(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index c633fb9b7e..f4a7026381 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -1648,7 +1648,7 @@ out:
  * with qemu_iovec_destroy().
  */
 static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
-                                    uint64_t skip, size_t size,
+                                    size_t skip, size_t size,
                                     bool is_write)
 {
     QEMUIOVector elem;
@@ -1665,7 +1665,7 @@ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
 
     qemu_iovec_init_external(&elem, iov, niov);
     qemu_iovec_init(qiov, niov);
-    qemu_iovec_copy(qiov, &elem, skip, size);
+    qemu_iovec_concat(qiov, &elem, skip, size);
 }
 
 static void v9fs_read(void *opaque)
@@ -1715,7 +1715,7 @@ static void v9fs_read(void *opaque)
         qemu_iovec_init(&qiov, qiov_full.niov);
         do {
             qemu_iovec_reset(&qiov);
-            qemu_iovec_copy(&qiov, &qiov_full, count, qiov_full.size - count);
+            qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
             if (0) {
                 print_sg(qiov.iov, qiov.niov);
             }
@@ -1970,7 +1970,7 @@ static void v9fs_write(void *opaque)
     qemu_iovec_init(&qiov, qiov_full.niov);
     do {
         qemu_iovec_reset(&qiov);
-        qemu_iovec_copy(&qiov, &qiov_full, total, qiov_full.size - total);
+        qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
         if (0) {
             print_sg(qiov.iov, qiov.niov);
         }
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 3d7725934f..9a350deafb 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -86,6 +86,7 @@ hw-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o
 
 # SCSI layer
 hw-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
+hw-obj-$(CONFIG_MEGASAS_SCSI_PCI) += megasas.o
 hw-obj-$(CONFIG_ESP) += esp.o
 
 hw-obj-y += sysbus.o isa-bus.o
diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index 88ff47d95e..236786eb5a 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -11,6 +11,7 @@ obj-y += realview_gic.o realview.o arm_sysctl.o arm11mpcore.o a9mpcore.o
 obj-y += exynos4210_gic.o exynos4210_combiner.o exynos4210.o
 obj-y += exynos4_boards.o exynos4210_uart.o exynos4210_pwm.o
 obj-y += exynos4210_pmu.o exynos4210_mct.o exynos4210_fimd.o
+obj-y += exynos4210_rtc.o
 obj-y += arm_l2x0.o
 obj-y += arm_mptimer.o a15mpcore.o
 obj-y += armv7m.o armv7m_nvic.o stellaris.o pl022.o stellaris_enet.o
@@ -34,6 +35,8 @@ obj-y += framebuffer.o
 obj-y += vexpress.o
 obj-y += strongarm.o
 obj-y += collie.o
+obj-y += imx_serial.o imx_ccm.o imx_timer.o imx_avic.o
+obj-y += kzm.o
 obj-y += pl041.o lm4549.o
 obj-$(CONFIG_FDT) += ../device_tree.o
 
diff --git a/hw/cadence_gem.c b/hw/cadence_gem.c
index dbde3920d0..87143caf2d 100644
--- a/hw/cadence_gem.c
+++ b/hw/cadence_gem.c
@@ -339,8 +339,8 @@ typedef struct {
     uint8_t phy_loop; /* Are we in phy loopback? */
 
     /* The current DMA descriptor pointers */
-    target_phys_addr_t rx_desc_addr;
-    target_phys_addr_t tx_desc_addr;
+    uint32_t rx_desc_addr;
+    uint32_t tx_desc_addr;
 
 } GemState;
 
diff --git a/hw/esp.c b/hw/esp.c
index 8d73e56886..c6422ad340 100644
--- a/hw/esp.c
+++ b/hw/esp.c
@@ -2,6 +2,7 @@
  * QEMU ESP/NCR53C9x emulation
  *
  * Copyright (c) 2005-2006 Fabrice Bellard
+ * Copyright (c) 2012 Herve Poussineau
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -23,9 +24,11 @@
  */
 
 #include "sysbus.h"
+#include "pci.h"
 #include "scsi.h"
 #include "esp.h"
 #include "trace.h"
+#include "qemu-log.h"
 
 /*
  * On Sparc32, this is the ESP (NCR53C90) part of chip STP2000 (Master I/O),
@@ -35,21 +38,16 @@
  * http://www.ibiblio.org/pub/historic-linux/early-ports/Sparc/NCR/NCR53C9X.txt
  */
 
-#define ESP_ERROR(fmt, ...)                                             \
-    do { printf("ESP ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
-
 #define ESP_REGS 16
 #define TI_BUFSZ 16
 
 typedef struct ESPState ESPState;
 
 struct ESPState {
-    SysBusDevice busdev;
-    MemoryRegion iomem;
     uint8_t rregs[ESP_REGS];
     uint8_t wregs[ESP_REGS];
     qemu_irq irq;
-    uint32_t it_shift;
+    uint8_t chip_id;
     int32_t ti_size;
     uint32_t ti_rptr, ti_wptr;
     uint32_t status;
@@ -113,10 +111,12 @@ struct ESPState {
 #define CMD_MSGACC   0x12
 #define CMD_PAD      0x18
 #define CMD_SATN     0x1a
+#define CMD_RSTATN   0x1b
 #define CMD_SEL      0x41
 #define CMD_SELATN   0x42
 #define CMD_SELATNS  0x43
 #define CMD_ENSEL    0x44
+#define CMD_DISSEL   0x45
 
 #define STAT_DO 0x00
 #define STAT_DI 0x01
@@ -144,6 +144,7 @@ struct ESPState {
 #define CFG1_RESREPT 0x40
 
 #define TCHI_FAS100A 0x4
+#define TCHI_AM53C974 0x12
 
 static void esp_raise_irq(ESPState *s)
 {
@@ -163,11 +164,8 @@ static void esp_lower_irq(ESPState *s)
     }
 }
 
-static void esp_dma_enable(void *opaque, int irq, int level)
+static void esp_dma_enable(ESPState *s, int irq, int level)
 {
-    DeviceState *d = opaque;
-    ESPState *s = container_of(d, ESPState, busdev.qdev);
-
     if (level) {
         s->dma_enabled = 1;
         trace_esp_dma_enable();
@@ -183,7 +181,7 @@ static void esp_dma_enable(void *opaque, int irq, int level)
 
 static void esp_request_cancelled(SCSIRequest *req)
 {
-    ESPState *s = DO_UPCAST(ESPState, busdev.qdev, req->bus->qbus.parent);
+    ESPState *s = req->hba_private;
 
     if (req == s->current_req) {
         scsi_req_unref(s->current_req);
@@ -239,7 +237,7 @@ static void do_busid_cmd(ESPState *s, uint8_t *buf, uint8_t busid)
     trace_esp_do_busid_cmd(busid);
     lun = busid & 7;
     current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, lun);
-    s->current_req = scsi_req_new(current_lun, 0, lun, buf, NULL);
+    s->current_req = scsi_req_new(current_lun, 0, lun, buf, s);
     datalen = scsi_req_enqueue(s->current_req);
     s->ti_size = datalen;
     if (datalen != 0) {
@@ -270,7 +268,7 @@ static void handle_satn(ESPState *s)
     uint8_t buf[32];
     int len;
 
-    if (!s->dma_enabled) {
+    if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_satn;
         return;
     }
@@ -284,7 +282,7 @@ static void handle_s_without_atn(ESPState *s)
     uint8_t buf[32];
     int len;
 
-    if (!s->dma_enabled) {
+    if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_s_without_atn;
         return;
     }
@@ -296,7 +294,7 @@ static void handle_s_without_atn(ESPState *s)
 
 static void handle_satn_stop(ESPState *s)
 {
-    if (!s->dma_enabled) {
+    if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_satn_stop;
         return;
     }
@@ -393,7 +391,7 @@ static void esp_do_dma(ESPState *s)
 static void esp_command_complete(SCSIRequest *req, uint32_t status,
                                  size_t resid)
 {
-    ESPState *s = DO_UPCAST(ESPState, busdev.qdev, req->bus->qbus.parent);
+    ESPState *s = req->hba_private;
 
     trace_esp_command_complete();
     if (s->ti_size != 0) {
@@ -417,7 +415,7 @@ static void esp_command_complete(SCSIRequest *req, uint32_t status,
 
 static void esp_transfer_data(SCSIRequest *req, uint32_t len)
 {
-    ESPState *s = DO_UPCAST(ESPState, busdev.qdev, req->bus->qbus.parent);
+    ESPState *s = req->hba_private;
 
     trace_esp_transfer_data(s->dma_left, s->ti_size);
     s->async_len = len;
@@ -435,6 +433,11 @@ static void handle_ti(ESPState *s)
 {
     uint32_t dmalen, minlen;
 
+    if (s->dma && !s->dma_enabled) {
+        s->dma_cb = handle_ti;
+        return;
+    }
+
     dmalen = s->rregs[ESP_TCLO] | (s->rregs[ESP_TCMID] << 8);
     if (dmalen==0) {
       dmalen=0x10000;
@@ -462,13 +465,11 @@ static void handle_ti(ESPState *s)
     }
 }
 
-static void esp_hard_reset(DeviceState *d)
+static void esp_hard_reset(ESPState *s)
 {
-    ESPState *s = container_of(d, ESPState, busdev.qdev);
-
     memset(s->rregs, 0, ESP_REGS);
     memset(s->wregs, 0, ESP_REGS);
-    s->rregs[ESP_TCHI] = TCHI_FAS100A; // Indicate fas100a
+    s->rregs[ESP_TCHI] = s->chip_id;
     s->ti_size = 0;
     s->ti_rptr = 0;
     s->ti_wptr = 0;
@@ -479,40 +480,23 @@ static void esp_hard_reset(DeviceState *d)
     s->rregs[ESP_CFG1] = 7;
 }
 
-static void esp_soft_reset(DeviceState *d)
+static void esp_soft_reset(ESPState *s)
 {
-    ESPState *s = container_of(d, ESPState, busdev.qdev);
-
     qemu_irq_lower(s->irq);
-    esp_hard_reset(d);
+    esp_hard_reset(s);
 }
 
-static void parent_esp_reset(void *opaque, int irq, int level)
+static void parent_esp_reset(ESPState *s, int irq, int level)
 {
     if (level) {
-        esp_soft_reset(opaque);
+        esp_soft_reset(s);
     }
 }
 
-static void esp_gpio_demux(void *opaque, int irq, int level)
+static uint64_t esp_reg_read(ESPState *s, uint32_t saddr)
 {
-    switch (irq) {
-    case 0:
-        parent_esp_reset(opaque, irq, level);
-        break;
-    case 1:
-        esp_dma_enable(opaque, irq, level);
-        break;
-    }
-}
-
-static uint64_t esp_mem_read(void *opaque, target_phys_addr_t addr,
-                             unsigned size)
-{
-    ESPState *s = opaque;
-    uint32_t saddr, old_val;
+    uint32_t old_val;
 
-    saddr = addr >> s->it_shift;
     trace_esp_mem_readb(saddr, s->rregs[saddr]);
     switch (saddr) {
     case ESP_FIFO:
@@ -520,7 +504,8 @@ static uint64_t esp_mem_read(void *opaque, target_phys_addr_t addr,
             s->ti_size--;
             if ((s->rregs[ESP_RSTAT] & STAT_PIO_MASK) == 0) {
                 /* Data out.  */
-                ESP_ERROR("PIO data read not implemented\n");
+                qemu_log_mask(LOG_UNIMP,
+                              "esp: PIO data read not implemented\n");
                 s->rregs[ESP_FIFO] = 0;
             } else {
                 s->rregs[ESP_FIFO] = s->ti_buf[s->ti_rptr++];
@@ -548,13 +533,8 @@ static uint64_t esp_mem_read(void *opaque, target_phys_addr_t addr,
     return s->rregs[saddr];
 }
 
-static void esp_mem_write(void *opaque, target_phys_addr_t addr,
-                          uint64_t val, unsigned size)
+static void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
 {
-    ESPState *s = opaque;
-    uint32_t saddr;
-
-    saddr = addr >> s->it_shift;
     trace_esp_mem_writeb(saddr, s->wregs[saddr], val);
     switch (saddr) {
     case ESP_TCLO:
@@ -565,7 +545,7 @@ static void esp_mem_write(void *opaque, target_phys_addr_t addr,
         if (s->do_cmd) {
             s->cmdbuf[s->cmdlen++] = val & 0xff;
         } else if (s->ti_size == TI_BUFSZ - 1) {
-            ESP_ERROR("fifo overrun\n");
+            trace_esp_error_fifo_overrun();
         } else {
             s->ti_size++;
             s->ti_buf[s->ti_wptr++] = val & 0xff;
@@ -594,7 +574,7 @@ static void esp_mem_write(void *opaque, target_phys_addr_t addr,
             break;
         case CMD_RESET:
             trace_esp_mem_writeb_cmd_reset(val);
-            esp_soft_reset(&s->busdev.qdev);
+            esp_soft_reset(s);
             break;
         case CMD_BUSRESET:
             trace_esp_mem_writeb_cmd_bus_reset(val);
@@ -628,6 +608,9 @@ static void esp_mem_write(void *opaque, target_phys_addr_t addr,
         case CMD_SATN:
             trace_esp_mem_writeb_cmd_satn(val);
             break;
+        case CMD_RSTATN:
+            trace_esp_mem_writeb_cmd_rstatn(val);
+            break;
         case CMD_SEL:
             trace_esp_mem_writeb_cmd_sel(val);
             handle_s_without_atn(s);
@@ -644,8 +627,13 @@ static void esp_mem_write(void *opaque, target_phys_addr_t addr,
             trace_esp_mem_writeb_cmd_ensel(val);
             s->rregs[ESP_RINTR] = 0;
             break;
+        case CMD_DISSEL:
+            trace_esp_mem_writeb_cmd_dissel(val);
+            s->rregs[ESP_RINTR] = 0;
+            esp_raise_irq(s);
+            break;
         default:
-            ESP_ERROR("Unhandled ESP command (%2.2x)\n", (unsigned)val);
+            trace_esp_error_unhandled_command(val);
             break;
         }
         break;
@@ -660,7 +648,7 @@ static void esp_mem_write(void *opaque, target_phys_addr_t addr,
         s->rregs[saddr] = val;
         break;
     default:
-        ESP_ERROR("invalid write of 0x%02x at [0x%x]\n", (unsigned)val, saddr);
+        trace_esp_error_invalid_write(val, saddr);
         return;
     }
     s->wregs[saddr] = val;
@@ -672,13 +660,6 @@ static bool esp_mem_accepts(void *opaque, target_phys_addr_t addr,
     return (size == 1) || (is_write && size == 4);
 }
 
-static const MemoryRegionOps esp_mem_ops = {
-    .read = esp_mem_read,
-    .write = esp_mem_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid.accepts = esp_mem_accepts,
-};
-
 static const VMStateDescription vmstate_esp = {
     .name ="esp",
     .version_id = 3,
@@ -701,6 +682,40 @@ static const VMStateDescription vmstate_esp = {
     }
 };
 
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    uint32_t it_shift;
+    ESPState esp;
+} SysBusESPState;
+
+static void sysbus_esp_mem_write(void *opaque, target_phys_addr_t addr,
+                                 uint64_t val, unsigned int size)
+{
+    SysBusESPState *sysbus = opaque;
+    uint32_t saddr;
+
+    saddr = addr >> sysbus->it_shift;
+    esp_reg_write(&sysbus->esp, saddr, val);
+}
+
+static uint64_t sysbus_esp_mem_read(void *opaque, target_phys_addr_t addr,
+                                    unsigned int size)
+{
+    SysBusESPState *sysbus = opaque;
+    uint32_t saddr;
+
+    saddr = addr >> sysbus->it_shift;
+    return esp_reg_read(&sysbus->esp, saddr);
+}
+
+static const MemoryRegionOps sysbus_esp_mem_ops = {
+    .read = sysbus_esp_mem_read,
+    .write = sysbus_esp_mem_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid.accepts = esp_mem_accepts,
+};
+
 void esp_init(target_phys_addr_t espaddr, int it_shift,
               ESPDMAMemoryReadWriteFunc dma_memory_read,
               ESPDMAMemoryReadWriteFunc dma_memory_write,
@@ -709,14 +724,16 @@ void esp_init(target_phys_addr_t espaddr, int it_shift,
 {
     DeviceState *dev;
     SysBusDevice *s;
+    SysBusESPState *sysbus;
     ESPState *esp;
 
     dev = qdev_create(NULL, "esp");
-    esp = DO_UPCAST(ESPState, busdev.qdev, dev);
+    sysbus = DO_UPCAST(SysBusESPState, busdev.qdev, dev);
+    esp = &sysbus->esp;
     esp->dma_memory_read = dma_memory_read;
     esp->dma_memory_write = dma_memory_write;
     esp->dma_opaque = dma_opaque;
-    esp->it_shift = it_shift;
+    sysbus->it_shift = it_shift;
     /* XXX for now until rc4030 has been changed to use DMA enable signal */
     esp->dma_enabled = 1;
     qdev_init_nofail(dev);
@@ -737,48 +754,441 @@ static const struct SCSIBusInfo esp_scsi_info = {
     .cancel = esp_request_cancelled
 };
 
-static int esp_init1(SysBusDevice *dev)
+static void sysbus_esp_gpio_demux(void *opaque, int irq, int level)
 {
-    ESPState *s = FROM_SYSBUS(ESPState, dev);
+    DeviceState *d = opaque;
+    SysBusESPState *sysbus = container_of(d, SysBusESPState, busdev.qdev);
+    ESPState *s = &sysbus->esp;
+
+    switch (irq) {
+    case 0:
+        parent_esp_reset(s, irq, level);
+        break;
+    case 1:
+        esp_dma_enable(opaque, irq, level);
+        break;
+    }
+}
+
+static int sysbus_esp_init(SysBusDevice *dev)
+{
+    SysBusESPState *sysbus = FROM_SYSBUS(SysBusESPState, dev);
+    ESPState *s = &sysbus->esp;
 
     sysbus_init_irq(dev, &s->irq);
-    assert(s->it_shift != -1);
+    assert(sysbus->it_shift != -1);
 
-    memory_region_init_io(&s->iomem, &esp_mem_ops, s,
-                          "esp", ESP_REGS << s->it_shift);
-    sysbus_init_mmio(dev, &s->iomem);
+    s->chip_id = TCHI_FAS100A;
+    memory_region_init_io(&sysbus->iomem, &sysbus_esp_mem_ops, sysbus,
+                          "esp", ESP_REGS << sysbus->it_shift);
+    sysbus_init_mmio(dev, &sysbus->iomem);
 
-    qdev_init_gpio_in(&dev->qdev, esp_gpio_demux, 2);
+    qdev_init_gpio_in(&dev->qdev, sysbus_esp_gpio_demux, 2);
 
     scsi_bus_new(&s->bus, &dev->qdev, &esp_scsi_info);
     return scsi_bus_legacy_handle_cmdline(&s->bus);
 }
 
-static Property esp_properties[] = {
-    {.name = NULL},
+static void sysbus_esp_hard_reset(DeviceState *dev)
+{
+    SysBusESPState *sysbus = DO_UPCAST(SysBusESPState, busdev.qdev, dev);
+    esp_hard_reset(&sysbus->esp);
+}
+
+static const VMStateDescription vmstate_sysbus_esp_scsi = {
+    .name = "sysbusespscsi",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .minimum_version_id_old = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT(esp, SysBusESPState, 0, vmstate_esp, ESPState),
+        VMSTATE_END_OF_LIST()
+    }
 };
 
-static void esp_class_init(ObjectClass *klass, void *data)
+static void sysbus_esp_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = esp_init1;
-    dc->reset = esp_hard_reset;
-    dc->vmsd = &vmstate_esp;
-    dc->props = esp_properties;
+    k->init = sysbus_esp_init;
+    dc->reset = sysbus_esp_hard_reset;
+    dc->vmsd = &vmstate_sysbus_esp_scsi;
 }
 
-static TypeInfo esp_info = {
+static TypeInfo sysbus_esp_info = {
     .name          = "esp",
     .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(ESPState),
-    .class_init    = esp_class_init,
+    .instance_size = sizeof(SysBusESPState),
+    .class_init    = sysbus_esp_class_init,
+};
+
+#define DMA_CMD   0x0
+#define DMA_STC   0x1
+#define DMA_SPA   0x2
+#define DMA_WBC   0x3
+#define DMA_WAC   0x4
+#define DMA_STAT  0x5
+#define DMA_SMDLA 0x6
+#define DMA_WMAC  0x7
+
+#define DMA_CMD_MASK   0x03
+#define DMA_CMD_DIAG   0x04
+#define DMA_CMD_MDL    0x10
+#define DMA_CMD_INTE_P 0x20
+#define DMA_CMD_INTE_D 0x40
+#define DMA_CMD_DIR    0x80
+
+#define DMA_STAT_PWDN    0x01
+#define DMA_STAT_ERROR   0x02
+#define DMA_STAT_ABORT   0x04
+#define DMA_STAT_DONE    0x08
+#define DMA_STAT_SCSIINT 0x10
+#define DMA_STAT_BCMBLT  0x20
+
+#define SBAC_STATUS 0x1000
+
+typedef struct PCIESPState {
+    PCIDevice dev;
+    MemoryRegion io;
+    uint32_t dma_regs[8];
+    uint32_t sbac;
+    ESPState esp;
+} PCIESPState;
+
+static void esp_pci_handle_idle(PCIESPState *pci, uint32_t val)
+{
+    trace_esp_pci_dma_idle(val);
+    esp_dma_enable(&pci->esp, 0, 0);
+}
+
+static void esp_pci_handle_blast(PCIESPState *pci, uint32_t val)
+{
+    trace_esp_pci_dma_blast(val);
+    qemu_log_mask(LOG_UNIMP, "am53c974: cmd BLAST not implemented\n");
+}
+
+static void esp_pci_handle_abort(PCIESPState *pci, uint32_t val)
+{
+    trace_esp_pci_dma_abort(val);
+    if (pci->esp.current_req) {
+        scsi_req_cancel(pci->esp.current_req);
+    }
+}
+
+static void esp_pci_handle_start(PCIESPState *pci, uint32_t val)
+{
+    trace_esp_pci_dma_start(val);
+
+    pci->dma_regs[DMA_WBC] = pci->dma_regs[DMA_STC];
+    pci->dma_regs[DMA_WAC] = pci->dma_regs[DMA_SPA];
+    pci->dma_regs[DMA_WMAC] = pci->dma_regs[DMA_SMDLA];
+
+    pci->dma_regs[DMA_STAT] &= ~(DMA_STAT_BCMBLT | DMA_STAT_SCSIINT
+                               | DMA_STAT_DONE | DMA_STAT_ABORT
+                               | DMA_STAT_ERROR | DMA_STAT_PWDN);
+
+    esp_dma_enable(&pci->esp, 0, 1);
+}
+
+static void esp_pci_dma_write(PCIESPState *pci, uint32_t saddr, uint32_t val)
+{
+    trace_esp_pci_dma_write(saddr, pci->dma_regs[saddr], val);
+    switch (saddr) {
+    case DMA_CMD:
+        pci->dma_regs[saddr] = val;
+        switch (val & DMA_CMD_MASK) {
+        case 0x0: /* IDLE */
+            esp_pci_handle_idle(pci, val);
+            break;
+        case 0x1: /* BLAST */
+            esp_pci_handle_blast(pci, val);
+            break;
+        case 0x2: /* ABORT */
+            esp_pci_handle_abort(pci, val);
+            break;
+        case 0x3: /* START */
+            esp_pci_handle_start(pci, val);
+            break;
+        default: /* can't happen */
+            abort();
+        }
+        break;
+    case DMA_STC:
+    case DMA_SPA:
+    case DMA_SMDLA:
+        pci->dma_regs[saddr] = val;
+        break;
+    case DMA_STAT:
+        if (!(pci->sbac & SBAC_STATUS)) {
+            /* clear some bits on write */
+            uint32_t mask = DMA_STAT_ERROR | DMA_STAT_ABORT | DMA_STAT_DONE;
+            pci->dma_regs[DMA_STAT] &= ~(val & mask);
+        }
+        break;
+    default:
+        trace_esp_pci_error_invalid_write_dma(val, saddr);
+        return;
+    }
+}
+
+static uint32_t esp_pci_dma_read(PCIESPState *pci, uint32_t saddr)
+{
+    uint32_t val;
+
+    val = pci->dma_regs[saddr];
+    if (saddr == DMA_STAT) {
+        if (pci->esp.rregs[ESP_RSTAT] & STAT_INT) {
+            val |= DMA_STAT_SCSIINT;
+        }
+        if (pci->sbac & SBAC_STATUS) {
+            pci->dma_regs[DMA_STAT] &= ~(DMA_STAT_ERROR | DMA_STAT_ABORT |
+                                         DMA_STAT_DONE);
+        }
+    }
+
+    trace_esp_pci_dma_read(saddr, val);
+    return val;
+}
+
+static void esp_pci_io_write(void *opaque, target_phys_addr_t addr,
+                             uint64_t val, unsigned int size)
+{
+    PCIESPState *pci = opaque;
+
+    if (size < 4 || addr & 3) {
+        /* need to upgrade request: we only support 4-bytes accesses */
+        uint32_t current = 0, mask;
+        int shift;
+
+        if (addr < 0x40) {
+            current = pci->esp.wregs[addr >> 2];
+        } else if (addr < 0x60) {
+            current = pci->dma_regs[(addr - 0x40) >> 2];
+        } else if (addr < 0x74) {
+            current = pci->sbac;
+        }
+
+        shift = (4 - size) * 8;
+        mask = (~(uint32_t)0 << shift) >> shift;
+
+        shift = ((4 - (addr & 3)) & 3) * 8;
+        val <<= shift;
+        val |= current & ~(mask << shift);
+        addr &= ~3;
+        size = 4;
+    }
+
+    if (addr < 0x40) {
+        /* SCSI core reg */
+        esp_reg_write(&pci->esp, addr >> 2, val);
+    } else if (addr < 0x60) {
+        /* PCI DMA CCB */
+        esp_pci_dma_write(pci, (addr - 0x40) >> 2, val);
+    } else if (addr == 0x70) {
+        /* DMA SCSI Bus and control */
+        trace_esp_pci_sbac_write(pci->sbac, val);
+        pci->sbac = val;
+    } else {
+        trace_esp_pci_error_invalid_write((int)addr);
+    }
+}
+
+static uint64_t esp_pci_io_read(void *opaque, target_phys_addr_t addr,
+                                unsigned int size)
+{
+    PCIESPState *pci = opaque;
+    uint32_t ret;
+
+    if (addr < 0x40) {
+        /* SCSI core reg */
+        ret = esp_reg_read(&pci->esp, addr >> 2);
+    } else if (addr < 0x60) {
+        /* PCI DMA CCB */
+        ret = esp_pci_dma_read(pci, (addr - 0x40) >> 2);
+    } else if (addr == 0x70) {
+        /* DMA SCSI Bus and control */
+        trace_esp_pci_sbac_read(pci->sbac);
+        ret = pci->sbac;
+    } else {
+        /* Invalid region */
+        trace_esp_pci_error_invalid_read((int)addr);
+        ret = 0;
+    }
+
+    /* give only requested data */
+    ret >>= (addr & 3) * 8;
+    ret &= ~(~(uint64_t)0 << (8 * size));
+
+    return ret;
+}
+
+static void esp_pci_dma_memory_rw(PCIESPState *pci, uint8_t *buf, int len,
+                                  DMADirection dir)
+{
+    dma_addr_t addr;
+    DMADirection expected_dir;
+
+    if (pci->dma_regs[DMA_CMD] & DMA_CMD_DIR) {
+        expected_dir = DMA_DIRECTION_FROM_DEVICE;
+    } else {
+        expected_dir = DMA_DIRECTION_TO_DEVICE;
+    }
+
+    if (dir != expected_dir) {
+        trace_esp_pci_error_invalid_dma_direction();
+        return;
+    }
+
+    if (pci->dma_regs[DMA_STAT] & DMA_CMD_MDL) {
+        qemu_log_mask(LOG_UNIMP, "am53c974: MDL transfer not implemented\n");
+    }
+
+    addr = pci->dma_regs[DMA_SPA];
+    if (pci->dma_regs[DMA_WBC] < len) {
+        len = pci->dma_regs[DMA_WBC];
+    }
+
+    pci_dma_rw(&pci->dev, addr, buf, len, dir);
+
+    /* update status registers */
+    pci->dma_regs[DMA_WBC] -= len;
+    pci->dma_regs[DMA_WAC] += len;
+}
+
+static void esp_pci_dma_memory_read(void *opaque, uint8_t *buf, int len)
+{
+    PCIESPState *pci = opaque;
+    esp_pci_dma_memory_rw(pci, buf, len, DMA_DIRECTION_TO_DEVICE);
+}
+
+static void esp_pci_dma_memory_write(void *opaque, uint8_t *buf, int len)
+{
+    PCIESPState *pci = opaque;
+    esp_pci_dma_memory_rw(pci, buf, len, DMA_DIRECTION_FROM_DEVICE);
+}
+
+static const MemoryRegionOps esp_pci_io_ops = {
+    .read = esp_pci_io_read,
+    .write = esp_pci_io_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+};
+
+static void esp_pci_hard_reset(DeviceState *dev)
+{
+    PCIESPState *pci = DO_UPCAST(PCIESPState, dev.qdev, dev);
+    esp_hard_reset(&pci->esp);
+    pci->dma_regs[DMA_CMD] &= ~(DMA_CMD_DIR | DMA_CMD_INTE_D | DMA_CMD_INTE_P
+                              | DMA_CMD_MDL | DMA_CMD_DIAG | DMA_CMD_MASK);
+    pci->dma_regs[DMA_WBC] &= ~0xffff;
+    pci->dma_regs[DMA_WAC] = 0xffffffff;
+    pci->dma_regs[DMA_STAT] &= ~(DMA_STAT_BCMBLT | DMA_STAT_SCSIINT
+                               | DMA_STAT_DONE | DMA_STAT_ABORT
+                               | DMA_STAT_ERROR);
+    pci->dma_regs[DMA_WMAC] = 0xfffffffd;
+}
+
+static const VMStateDescription vmstate_esp_pci_scsi = {
+    .name = "pciespscsi",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .minimum_version_id_old = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(dev, PCIESPState),
+        VMSTATE_BUFFER_UNSAFE(dma_regs, PCIESPState, 0, 8 * sizeof(uint32_t)),
+        VMSTATE_STRUCT(esp, PCIESPState, 0, vmstate_esp, ESPState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void esp_pci_command_complete(SCSIRequest *req, uint32_t status,
+                                     size_t resid)
+{
+    ESPState *s = req->hba_private;
+    PCIESPState *pci = container_of(s, PCIESPState, esp);
+
+    esp_command_complete(req, status, resid);
+    pci->dma_regs[DMA_WBC] = 0;
+    pci->dma_regs[DMA_STAT] |= DMA_STAT_DONE;
+}
+
+static const struct SCSIBusInfo esp_pci_scsi_info = {
+    .tcq = false,
+    .max_target = ESP_MAX_DEVS,
+    .max_lun = 7,
+
+    .transfer_data = esp_transfer_data,
+    .complete = esp_pci_command_complete,
+    .cancel = esp_request_cancelled,
+};
+
+static int esp_pci_scsi_init(PCIDevice *dev)
+{
+    PCIESPState *pci = DO_UPCAST(PCIESPState, dev, dev);
+    ESPState *s = &pci->esp;
+    uint8_t *pci_conf;
+
+    pci_conf = pci->dev.config;
+
+    /* Interrupt pin A */
+    pci_conf[PCI_INTERRUPT_PIN] = 0x01;
+
+    s->dma_memory_read = esp_pci_dma_memory_read;
+    s->dma_memory_write = esp_pci_dma_memory_write;
+    s->dma_opaque = pci;
+    s->chip_id = TCHI_AM53C974;
+    memory_region_init_io(&pci->io, &esp_pci_io_ops, pci, "esp-io", 0x80);
+
+    pci_register_bar(&pci->dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &pci->io);
+    s->irq = pci->dev.irq[0];
+
+    scsi_bus_new(&s->bus, &dev->qdev, &esp_pci_scsi_info);
+    if (!dev->qdev.hotplugged) {
+        return scsi_bus_legacy_handle_cmdline(&s->bus);
+    }
+    return 0;
+}
+
+static int esp_pci_scsi_uninit(PCIDevice *d)
+{
+    PCIESPState *pci = DO_UPCAST(PCIESPState, dev, d);
+
+    memory_region_destroy(&pci->io);
+
+    return 0;
+}
+
+static void esp_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->init = esp_pci_scsi_init;
+    k->exit = esp_pci_scsi_uninit;
+    k->vendor_id = PCI_VENDOR_ID_AMD;
+    k->device_id = PCI_DEVICE_ID_AMD_SCSI;
+    k->revision = 0x10;
+    k->class_id = PCI_CLASS_STORAGE_SCSI;
+    dc->desc = "AMD Am53c974 PCscsi-PCI SCSI adapter";
+    dc->reset = esp_pci_hard_reset;
+    dc->vmsd = &vmstate_esp_pci_scsi;
+}
+
+static TypeInfo esp_pci_info = {
+    .name = "am53c974",
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(PCIESPState),
+    .class_init = esp_pci_class_init,
 };
 
 static void esp_register_types(void)
 {
-    type_register_static(&esp_info);
+    type_register_static(&sysbus_esp_info);
+    type_register_static(&esp_pci_info);
 }
 
 type_init(esp_register_types)
diff --git a/hw/exynos4210.c b/hw/exynos4210.c
index 9c20b3f22d..7c58c906de 100644
--- a/hw/exynos4210.c
+++ b/hw/exynos4210.c
@@ -33,6 +33,9 @@
 /* PWM */
 #define EXYNOS4210_PWM_BASE_ADDR       0x139D0000
 
+/* RTC */
+#define EXYNOS4210_RTC_BASE_ADDR       0x10070000
+
 /* MCT */
 #define EXYNOS4210_MCT_BASE_ADDR       0x10050000
 
@@ -216,7 +219,7 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
     /* mirror of iROM */
     memory_region_init_alias(&s->irom_alias_mem, "exynos4210.irom_alias",
                              &s->irom_mem,
-                             EXYNOS4210_IROM_BASE_ADDR,
+                             0,
                              EXYNOS4210_IROM_SIZE);
     memory_region_set_readonly(&s->irom_alias_mem, true);
     memory_region_add_subregion(system_mem, EXYNOS4210_IROM_MIRROR_BASE_ADDR,
@@ -258,6 +261,11 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
                           s->irq_table[exynos4210_get_irq(22, 3)],
                           s->irq_table[exynos4210_get_irq(22, 4)],
                           NULL);
+    /* RTC */
+    sysbus_create_varargs("exynos4210.rtc", EXYNOS4210_RTC_BASE_ADDR,
+                          s->irq_table[exynos4210_get_irq(23, 0)],
+                          s->irq_table[exynos4210_get_irq(23, 1)],
+                          NULL);
 
     /* Multi Core Timer */
     dev = qdev_create(NULL, "exynos4210.mct");
diff --git a/hw/exynos4210_mct.c b/hw/exynos4210_mct.c
index 7474fcf802..7a22b1f900 100644
--- a/hw/exynos4210_mct.c
+++ b/hw/exynos4210_mct.c
@@ -376,10 +376,6 @@ static uint64_t exynos4210_gfrc_get_count(Exynos4210MCTGT *s)
 {
     uint64_t count = 0;
     count = ptimer_get_count(s->ptimer_frc);
-    if (!count) {
-        /* Timer event was generated and s->reg.cnt holds adequate value */
-        return s->reg.cnt;
-    }
     count = s->count - count;
     return s->reg.cnt + count;
 }
diff --git a/hw/exynos4210_pwm.c b/hw/exynos4210_pwm.c
index 6243e59c48..0c228280a9 100644
--- a/hw/exynos4210_pwm.c
+++ b/hw/exynos4210_pwm.c
@@ -200,7 +200,7 @@ static void exynos4210_pwm_tick(void *opaque)
         ptimer_run(p->timer[id].ptimer, 1);
     } else {
         /* stop timer, set status to STOP, see Basic Timer Operation */
-        p->reg_tcon = ~TCON_TIMER_START(id);
+        p->reg_tcon &= ~TCON_TIMER_START(id);
         ptimer_stop(p->timer[id].ptimer);
     }
 }
diff --git a/hw/exynos4210_rtc.c b/hw/exynos4210_rtc.c
new file mode 100644
index 0000000000..f78102049b
--- /dev/null
+++ b/hw/exynos4210_rtc.c
@@ -0,0 +1,595 @@
+/*
+ * Samsung exynos4210 Real Time Clock
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *  Ogurtsov Oleg <o.ogurtsov@samsung.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/* Description:
+ * Register RTCCON:
+ *  CLKSEL Bit[1] not used
+ *  CLKOUTEN Bit[9] not used
+ */
+
+#include "sysbus.h"
+#include "qemu-timer.h"
+#include "qemu-common.h"
+#include "ptimer.h"
+
+#include "hw.h"
+#include "qemu-timer.h"
+#include "sysemu.h"
+
+#include "exynos4210.h"
+
+#define DEBUG_RTC 0
+
+#if DEBUG_RTC
+#define DPRINTF(fmt, ...) \
+        do { fprintf(stdout, "RTC: [%24s:%5d] " fmt, __func__, __LINE__, \
+                ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#define     EXYNOS4210_RTC_REG_MEM_SIZE     0x0100
+
+#define     INTP            0x0030
+#define     RTCCON          0x0040
+#define     TICCNT          0x0044
+#define     RTCALM          0x0050
+#define     ALMSEC          0x0054
+#define     ALMMIN          0x0058
+#define     ALMHOUR         0x005C
+#define     ALMDAY          0x0060
+#define     ALMMON          0x0064
+#define     ALMYEAR         0x0068
+#define     BCDSEC          0x0070
+#define     BCDMIN          0x0074
+#define     BCDHOUR         0x0078
+#define     BCDDAY          0x007C
+#define     BCDDAYWEEK      0x0080
+#define     BCDMON          0x0084
+#define     BCDYEAR         0x0088
+#define     CURTICNT        0x0090
+
+#define     TICK_TIMER_ENABLE   0x0100
+#define     TICNT_THRESHHOLD    2
+
+
+#define     RTC_ENABLE          0x0001
+
+#define     INTP_TICK_ENABLE    0x0001
+#define     INTP_ALM_ENABLE     0x0002
+
+#define     ALARM_INT_ENABLE    0x0040
+
+#define     RTC_BASE_FREQ       32768
+
+typedef struct Exynos4210RTCState {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+
+    /* registers */
+    uint32_t    reg_intp;
+    uint32_t    reg_rtccon;
+    uint32_t    reg_ticcnt;
+    uint32_t    reg_rtcalm;
+    uint32_t    reg_almsec;
+    uint32_t    reg_almmin;
+    uint32_t    reg_almhour;
+    uint32_t    reg_almday;
+    uint32_t    reg_almmon;
+    uint32_t    reg_almyear;
+    uint32_t    reg_curticcnt;
+
+    ptimer_state    *ptimer;        /* tick timer */
+    ptimer_state    *ptimer_1Hz;    /* clock timer */
+    uint32_t        freq;
+
+    qemu_irq        tick_irq;   /* Time Tick Generator irq */
+    qemu_irq        alm_irq;    /* alarm irq */
+
+    struct tm   current_tm;     /* current time */
+} Exynos4210RTCState;
+
+#define TICCKSEL(value) ((value & (0x0F << 4)) >> 4)
+
+/*** VMState ***/
+static const VMStateDescription vmstate_exynos4210_rtc_state = {
+    .name = "exynos4210.rtc",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(reg_intp, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_rtccon, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_ticcnt, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_rtcalm, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almsec, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almmin, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almhour, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almday, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almmon, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_almyear, Exynos4210RTCState),
+        VMSTATE_UINT32(reg_curticcnt, Exynos4210RTCState),
+        VMSTATE_PTIMER(ptimer, Exynos4210RTCState),
+        VMSTATE_PTIMER(ptimer_1Hz, Exynos4210RTCState),
+        VMSTATE_UINT32(freq, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_sec, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_min, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_hour, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_wday, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_mday, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_mon, Exynos4210RTCState),
+        VMSTATE_INT32(current_tm.tm_year, Exynos4210RTCState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+#define BCD3DIGITS(x) \
+    ((uint32_t)to_bcd((uint8_t)x) + \
+    ((uint32_t)to_bcd((uint8_t)((x % 1000) / 100)) << 8))
+
+static void check_alarm_raise(Exynos4210RTCState *s)
+{
+    unsigned int alarm_raise = 0;
+    struct tm stm = s->current_tm;
+
+    if ((s->reg_rtcalm & 0x01) &&
+        (to_bcd((uint8_t)stm.tm_sec) == (uint8_t)s->reg_almsec)) {
+        alarm_raise = 1;
+    }
+    if ((s->reg_rtcalm & 0x02) &&
+        (to_bcd((uint8_t)stm.tm_min) == (uint8_t)s->reg_almmin)) {
+        alarm_raise = 1;
+    }
+    if ((s->reg_rtcalm & 0x04) &&
+        (to_bcd((uint8_t)stm.tm_hour) == (uint8_t)s->reg_almhour)) {
+        alarm_raise = 1;
+    }
+    if ((s->reg_rtcalm & 0x08) &&
+        (to_bcd((uint8_t)stm.tm_mday) == (uint8_t)s->reg_almday)) {
+        alarm_raise = 1;
+    }
+    if ((s->reg_rtcalm & 0x10) &&
+         (to_bcd((uint8_t)stm.tm_mon) == (uint8_t)s->reg_almmon)) {
+        alarm_raise = 1;
+    }
+    if ((s->reg_rtcalm & 0x20) &&
+        (BCD3DIGITS(stm.tm_year) == s->reg_almyear)) {
+        alarm_raise = 1;
+    }
+
+    if (alarm_raise) {
+        DPRINTF("ALARM IRQ\n");
+        /* set irq status */
+        s->reg_intp |= INTP_ALM_ENABLE;
+        qemu_irq_raise(s->alm_irq);
+    }
+}
+
+/*
+ * RTC update frequency
+ * Parameters:
+ *     reg_value - current RTCCON register or his new value
+ */
+static void exynos4210_rtc_update_freq(Exynos4210RTCState *s,
+                                       uint32_t reg_value)
+{
+    uint32_t freq;
+
+    freq = s->freq;
+    /* set frequncy for time generator */
+    s->freq = RTC_BASE_FREQ / (1 << TICCKSEL(reg_value));
+
+    if (freq != s->freq) {
+        ptimer_set_freq(s->ptimer, s->freq);
+        DPRINTF("freq=%dHz\n", s->freq);
+    }
+}
+
+/* month is between 0 and 11. */
+static int get_days_in_month(int month, int year)
+{
+    static const int days_tab[12] = {
+        31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+    };
+    int d;
+    if ((unsigned)month >= 12) {
+        return 31;
+    }
+    d = days_tab[month];
+    if (month == 1) {
+        if ((year % 4) == 0 && ((year % 100) != 0 || (year % 400) == 0)) {
+            d++;
+        }
+    }
+    return d;
+}
+
+/* update 'tm' to the next second */
+static void rtc_next_second(struct tm *tm)
+{
+    int days_in_month;
+
+    tm->tm_sec++;
+    if ((unsigned)tm->tm_sec >= 60) {
+        tm->tm_sec = 0;
+        tm->tm_min++;
+        if ((unsigned)tm->tm_min >= 60) {
+            tm->tm_min = 0;
+            tm->tm_hour++;
+            if ((unsigned)tm->tm_hour >= 24) {
+                tm->tm_hour = 0;
+                /* next day */
+                tm->tm_wday++;
+                if ((unsigned)tm->tm_wday >= 7) {
+                    tm->tm_wday = 0;
+                }
+                days_in_month = get_days_in_month(tm->tm_mon,
+                                                  tm->tm_year + 1900);
+                tm->tm_mday++;
+                if (tm->tm_mday < 1) {
+                    tm->tm_mday = 1;
+                } else if (tm->tm_mday > days_in_month) {
+                    tm->tm_mday = 1;
+                    tm->tm_mon++;
+                    if (tm->tm_mon >= 12) {
+                        tm->tm_mon = 0;
+                        tm->tm_year++;
+                    }
+                }
+            }
+        }
+    }
+}
+
+/*
+ * tick handler
+ */
+static void exynos4210_rtc_tick(void *opaque)
+{
+    Exynos4210RTCState *s = (Exynos4210RTCState *)opaque;
+
+    DPRINTF("TICK IRQ\n");
+    /* set irq status */
+    s->reg_intp |= INTP_TICK_ENABLE;
+    /* raise IRQ */
+    qemu_irq_raise(s->tick_irq);
+
+    /* restart timer */
+    ptimer_set_count(s->ptimer, s->reg_ticcnt);
+    ptimer_run(s->ptimer, 1);
+}
+
+/*
+ * 1Hz clock handler
+ */
+static void exynos4210_rtc_1Hz_tick(void *opaque)
+{
+    Exynos4210RTCState *s = (Exynos4210RTCState *)opaque;
+
+    rtc_next_second(&s->current_tm);
+    /* DPRINTF("1Hz tick\n"); */
+
+    /* raise IRQ */
+    if (s->reg_rtcalm & ALARM_INT_ENABLE) {
+        check_alarm_raise(s);
+    }
+
+    ptimer_set_count(s->ptimer_1Hz, RTC_BASE_FREQ);
+    ptimer_run(s->ptimer_1Hz, 1);
+}
+
+/*
+ * RTC Read
+ */
+static uint64_t exynos4210_rtc_read(void *opaque, target_phys_addr_t offset,
+        unsigned size)
+{
+    uint32_t value = 0;
+    Exynos4210RTCState *s = (Exynos4210RTCState *)opaque;
+
+    switch (offset) {
+    case INTP:
+        value = s->reg_intp;
+        break;
+    case RTCCON:
+        value = s->reg_rtccon;
+        break;
+    case TICCNT:
+        value = s->reg_ticcnt;
+        break;
+    case RTCALM:
+        value = s->reg_rtcalm;
+        break;
+    case ALMSEC:
+        value = s->reg_almsec;
+        break;
+    case ALMMIN:
+        value = s->reg_almmin;
+        break;
+    case ALMHOUR:
+        value = s->reg_almhour;
+        break;
+    case ALMDAY:
+        value = s->reg_almday;
+        break;
+    case ALMMON:
+        value = s->reg_almmon;
+        break;
+    case ALMYEAR:
+        value = s->reg_almyear;
+        break;
+
+    case BCDSEC:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_sec);
+        break;
+    case BCDMIN:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_min);
+        break;
+    case BCDHOUR:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_hour);
+        break;
+    case BCDDAYWEEK:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_wday);
+        break;
+    case BCDDAY:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_mday);
+        break;
+    case BCDMON:
+        value = (uint32_t)to_bcd((uint8_t)s->current_tm.tm_mon + 1);
+        break;
+    case BCDYEAR:
+        value = BCD3DIGITS(s->current_tm.tm_year);
+        break;
+
+    case CURTICNT:
+        s->reg_curticcnt = ptimer_get_count(s->ptimer);
+        value = s->reg_curticcnt;
+        break;
+
+    default:
+        fprintf(stderr,
+                "[exynos4210.rtc: bad read offset " TARGET_FMT_plx "]\n",
+                offset);
+        break;
+    }
+    return value;
+}
+
+/*
+ * RTC Write
+ */
+static void exynos4210_rtc_write(void *opaque, target_phys_addr_t offset,
+        uint64_t value, unsigned size)
+{
+    Exynos4210RTCState *s = (Exynos4210RTCState *)opaque;
+
+    switch (offset) {
+    case INTP:
+        if (value & INTP_ALM_ENABLE) {
+            qemu_irq_lower(s->alm_irq);
+            s->reg_intp &= (~INTP_ALM_ENABLE);
+        }
+        if (value & INTP_TICK_ENABLE) {
+            qemu_irq_lower(s->tick_irq);
+            s->reg_intp &= (~INTP_TICK_ENABLE);
+        }
+        break;
+    case RTCCON:
+        if (value & RTC_ENABLE) {
+            exynos4210_rtc_update_freq(s, value);
+        }
+        if ((value & RTC_ENABLE) > (s->reg_rtccon & RTC_ENABLE)) {
+            /* clock timer */
+            ptimer_set_count(s->ptimer_1Hz, RTC_BASE_FREQ);
+            ptimer_run(s->ptimer_1Hz, 1);
+            DPRINTF("run clock timer\n");
+        }
+        if ((value & RTC_ENABLE) < (s->reg_rtccon & RTC_ENABLE)) {
+            /* tick timer */
+            ptimer_stop(s->ptimer);
+            /* clock timer */
+            ptimer_stop(s->ptimer_1Hz);
+            DPRINTF("stop all timers\n");
+        }
+        if (value & RTC_ENABLE) {
+            if ((value & TICK_TIMER_ENABLE) >
+                (s->reg_rtccon & TICK_TIMER_ENABLE) &&
+                (s->reg_ticcnt)) {
+                ptimer_set_count(s->ptimer, s->reg_ticcnt);
+                ptimer_run(s->ptimer, 1);
+                DPRINTF("run tick timer\n");
+            }
+            if ((value & TICK_TIMER_ENABLE) <
+                (s->reg_rtccon & TICK_TIMER_ENABLE)) {
+                ptimer_stop(s->ptimer);
+            }
+        }
+        s->reg_rtccon = value;
+        break;
+    case TICCNT:
+        if (value > TICNT_THRESHHOLD) {
+            s->reg_ticcnt = value;
+        } else {
+            fprintf(stderr,
+                    "[exynos4210.rtc: bad TICNT value %u ]\n",
+                    (uint32_t)value);
+        }
+        break;
+
+    case RTCALM:
+        s->reg_rtcalm = value;
+        break;
+    case ALMSEC:
+        s->reg_almsec = (value & 0x7f);
+        break;
+    case ALMMIN:
+        s->reg_almmin = (value & 0x7f);
+        break;
+    case ALMHOUR:
+        s->reg_almhour = (value & 0x3f);
+        break;
+    case ALMDAY:
+        s->reg_almday = (value & 0x3f);
+        break;
+    case ALMMON:
+        s->reg_almmon = (value & 0x1f);
+        break;
+    case ALMYEAR:
+        s->reg_almyear = (value & 0x0fff);
+        break;
+
+    case BCDSEC:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_sec = (int)from_bcd((uint8_t)value);
+        }
+        break;
+    case BCDMIN:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_min = (int)from_bcd((uint8_t)value);
+        }
+        break;
+    case BCDHOUR:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_hour = (int)from_bcd((uint8_t)value);
+        }
+        break;
+    case BCDDAYWEEK:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_wday = (int)from_bcd((uint8_t)value);
+        }
+        break;
+    case BCDDAY:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_mday = (int)from_bcd((uint8_t)value);
+        }
+        break;
+    case BCDMON:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            s->current_tm.tm_mon = (int)from_bcd((uint8_t)value) - 1;
+        }
+        break;
+    case BCDYEAR:
+        if (s->reg_rtccon & RTC_ENABLE) {
+            /* 3 digits */
+            s->current_tm.tm_year = (int)from_bcd((uint8_t)value) +
+                    (int)from_bcd((uint8_t)((value >> 8) & 0x0f)) * 100;
+        }
+        break;
+
+    default:
+        fprintf(stderr,
+                "[exynos4210.rtc: bad write offset " TARGET_FMT_plx "]\n",
+                offset);
+        break;
+
+    }
+}
+
+/*
+ * Set default values to timer fields and registers
+ */
+static void exynos4210_rtc_reset(DeviceState *d)
+{
+    Exynos4210RTCState *s = (Exynos4210RTCState *)d;
+
+    struct tm tm;
+
+    qemu_get_timedate(&tm, 0);
+    s->current_tm = tm;
+
+    DPRINTF("Get time from host: %d-%d-%d %2d:%02d:%02d\n",
+            s->current_tm.tm_year, s->current_tm.tm_mon, s->current_tm.tm_mday,
+            s->current_tm.tm_hour, s->current_tm.tm_min, s->current_tm.tm_sec);
+
+    s->reg_intp = 0;
+    s->reg_rtccon = 0;
+    s->reg_ticcnt = 0;
+    s->reg_rtcalm = 0;
+    s->reg_almsec = 0;
+    s->reg_almmin = 0;
+    s->reg_almhour = 0;
+    s->reg_almday = 0;
+    s->reg_almmon = 0;
+    s->reg_almyear = 0;
+
+    s->reg_curticcnt = 0;
+
+    exynos4210_rtc_update_freq(s, s->reg_rtccon);
+    ptimer_stop(s->ptimer);
+    ptimer_stop(s->ptimer_1Hz);
+}
+
+static const MemoryRegionOps exynos4210_rtc_ops = {
+    .read = exynos4210_rtc_read,
+    .write = exynos4210_rtc_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+/*
+ * RTC timer initialization
+ */
+static int exynos4210_rtc_init(SysBusDevice *dev)
+{
+    Exynos4210RTCState *s = FROM_SYSBUS(Exynos4210RTCState, dev);
+    QEMUBH *bh;
+
+    bh = qemu_bh_new(exynos4210_rtc_tick, s);
+    s->ptimer = ptimer_init(bh);
+    ptimer_set_freq(s->ptimer, RTC_BASE_FREQ);
+    exynos4210_rtc_update_freq(s, 0);
+
+    bh = qemu_bh_new(exynos4210_rtc_1Hz_tick, s);
+    s->ptimer_1Hz = ptimer_init(bh);
+    ptimer_set_freq(s->ptimer_1Hz, RTC_BASE_FREQ);
+
+    sysbus_init_irq(dev, &s->alm_irq);
+    sysbus_init_irq(dev, &s->tick_irq);
+
+    memory_region_init_io(&s->iomem, &exynos4210_rtc_ops, s, "exynos4210-rtc",
+            EXYNOS4210_RTC_REG_MEM_SIZE);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    return 0;
+}
+
+static void exynos4210_rtc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+
+    k->init = exynos4210_rtc_init;
+    dc->reset = exynos4210_rtc_reset;
+    dc->vmsd = &vmstate_exynos4210_rtc_state;
+}
+
+static const TypeInfo exynos4210_rtc_info = {
+    .name          = "exynos4210.rtc",
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(Exynos4210RTCState),
+    .class_init    = exynos4210_rtc_class_init,
+};
+
+static void exynos4210_rtc_register_types(void)
+{
+    type_register_static(&exynos4210_rtc_info);
+}
+
+type_init(exynos4210_rtc_register_types)
diff --git a/hw/fdc.c b/hw/fdc.c
index 5b3224b39b..edf07063b2 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -153,8 +153,12 @@ static int fd_seek(FDrive *drv, uint8_t head, uint8_t track, uint8_t sect,
         }
 #endif
         drv->head = head;
-        if (drv->track != track)
+        if (drv->track != track) {
+            if (drv->bs != NULL && bdrv_is_inserted(drv->bs)) {
+                drv->media_changed = 0;
+            }
             ret = 1;
+        }
         drv->track = track;
         drv->sect = sect;
     }
@@ -170,9 +174,7 @@ static int fd_seek(FDrive *drv, uint8_t head, uint8_t track, uint8_t sect,
 static void fd_recalibrate(FDrive *drv)
 {
     FLOPPY_DPRINTF("recalibrate\n");
-    drv->head = 0;
-    drv->track = 0;
-    drv->sect = 1;
+    fd_seek(drv, 0, 0, 1, 1);
 }
 
 /* Revalidate a disk drive after a disk change */
@@ -189,9 +191,6 @@ static void fd_revalidate(FDrive *drv)
                                       &last_sect, drv->drive, &drive, &rate);
         if (!bdrv_is_inserted(drv->bs)) {
             FLOPPY_DPRINTF("No disk in drive\n");
-        } else if (nb_heads != 0 && max_track != 0 && last_sect != 0) {
-            FLOPPY_DPRINTF("User defined disk (%d %d %d)\n",
-                           nb_heads - 1, max_track, last_sect);
         } else {
             FLOPPY_DPRINTF("Floppy disk (%d h %d t %d s) %s\n", nb_heads,
                            max_track, last_sect, ro ? "ro" : "rw");
@@ -305,6 +304,9 @@ enum {
 };
 
 enum {
+    FD_SR0_DS0      = 0x01,
+    FD_SR0_DS1      = 0x02,
+    FD_SR0_HEAD     = 0x04,
     FD_SR0_EQPMT    = 0x10,
     FD_SR0_SEEK     = 0x20,
     FD_SR0_ABNTERM  = 0x40,
@@ -711,14 +713,6 @@ static void fdctrl_raise_irq(FDCtrl *fdctrl, uint8_t status0)
         qemu_set_irq(fdctrl->irq, 1);
         fdctrl->sra |= FD_SRA_INTPEND;
     }
-    if (status0 & FD_SR0_SEEK) {
-        FDrive *cur_drv;
-        /* A seek clears the disk change line (if a disk is inserted) */
-        cur_drv = get_cur_drv(fdctrl);
-        if (cur_drv->bs != NULL && bdrv_is_inserted(cur_drv->bs)) {
-            cur_drv->media_changed = 0;
-        }
-    }
 
     fdctrl->reset_sensei = 0;
     fdctrl->status0 = status0;
@@ -978,14 +972,15 @@ static void fdctrl_reset_fifo(FDCtrl *fdctrl)
 }
 
 /* Set FIFO status for the host to read */
-static void fdctrl_set_fifo(FDCtrl *fdctrl, int fifo_len, int do_irq)
+static void fdctrl_set_fifo(FDCtrl *fdctrl, int fifo_len, uint8_t status0)
 {
     fdctrl->data_dir = FD_DIR_READ;
     fdctrl->data_len = fifo_len;
     fdctrl->data_pos = 0;
     fdctrl->msr |= FD_MSR_CMDBUSY | FD_MSR_RQM | FD_MSR_DIO;
-    if (do_irq)
-        fdctrl_raise_irq(fdctrl, 0x00);
+    if (status0) {
+        fdctrl_raise_irq(fdctrl, status0);
+    }
 }
 
 /* Set an error: unimplemented/unknown command */
@@ -997,7 +992,10 @@ static void fdctrl_unimplemented(FDCtrl *fdctrl, int direction)
     fdctrl_set_fifo(fdctrl, 1, 0);
 }
 
-/* Seek to next sector */
+/* Seek to next sector
+ * returns 0 when end of track reached (for DBL_SIDES on head 1)
+ * otherwise returns 1
+ */
 static int fdctrl_seek_to_next_sect(FDCtrl *fdctrl, FDrive *cur_drv)
 {
     FLOPPY_DPRINTF("seek to next sector (%d %02x %02x => %d)\n",
@@ -1005,30 +1003,39 @@ static int fdctrl_seek_to_next_sect(FDCtrl *fdctrl, FDrive *cur_drv)
                    fd_sector(cur_drv));
     /* XXX: cur_drv->sect >= cur_drv->last_sect should be an
        error in fact */
-    if (cur_drv->sect >= cur_drv->last_sect ||
-        cur_drv->sect == fdctrl->eot) {
-        cur_drv->sect = 1;
+    uint8_t new_head = cur_drv->head;
+    uint8_t new_track = cur_drv->track;
+    uint8_t new_sect = cur_drv->sect;
+
+    int ret = 1;
+
+    if (new_sect >= cur_drv->last_sect ||
+        new_sect == fdctrl->eot) {
+        new_sect = 1;
         if (FD_MULTI_TRACK(fdctrl->data_state)) {
-            if (cur_drv->head == 0 &&
+            if (new_head == 0 &&
                 (cur_drv->flags & FDISK_DBL_SIDES) != 0) {
-                cur_drv->head = 1;
+                new_head = 1;
             } else {
-                cur_drv->head = 0;
-                cur_drv->track++;
-                if ((cur_drv->flags & FDISK_DBL_SIDES) == 0)
-                    return 0;
+                new_head = 0;
+                new_track++;
+                if ((cur_drv->flags & FDISK_DBL_SIDES) == 0) {
+                    ret = 0;
+                }
             }
         } else {
-            cur_drv->track++;
-            return 0;
+            new_track++;
+            ret = 0;
+        }
+        if (ret == 1) {
+            FLOPPY_DPRINTF("seek to next track (%d %02x %02x => %d)\n",
+                    new_head, new_track, new_sect, fd_sector(cur_drv));
         }
-        FLOPPY_DPRINTF("seek to next track (%d %02x %02x => %d)\n",
-                       cur_drv->head, cur_drv->track,
-                       cur_drv->sect, fd_sector(cur_drv));
     } else {
-        cur_drv->sect++;
+        new_sect++;
     }
-    return 1;
+    fd_seek(cur_drv, new_head, new_track, new_sect, 1);
+    return ret;
 }
 
 /* Callback for transfer end (stop or abort) */
@@ -1038,10 +1045,12 @@ static void fdctrl_stop_transfer(FDCtrl *fdctrl, uint8_t status0,
     FDrive *cur_drv;
 
     cur_drv = get_cur_drv(fdctrl);
+    fdctrl->status0 = status0 | FD_SR0_SEEK | (cur_drv->head << 2) |
+                      GET_CUR_DRV(fdctrl);
+
     FLOPPY_DPRINTF("transfer status: %02x %02x %02x (%02x)\n",
-                   status0, status1, status2,
-                   status0 | (cur_drv->head << 2) | GET_CUR_DRV(fdctrl));
-    fdctrl->fifo[0] = status0 | (cur_drv->head << 2) | GET_CUR_DRV(fdctrl);
+                   status0, status1, status2, fdctrl->status0);
+    fdctrl->fifo[0] = fdctrl->status0;
     fdctrl->fifo[1] = status1;
     fdctrl->fifo[2] = status2;
     fdctrl->fifo[3] = cur_drv->track;
@@ -1054,7 +1063,7 @@ static void fdctrl_stop_transfer(FDCtrl *fdctrl, uint8_t status0,
     }
     fdctrl->msr |= FD_MSR_RQM | FD_MSR_DIO;
     fdctrl->msr &= ~FD_MSR_NONDMA;
-    fdctrl_set_fifo(fdctrl, 7, 1);
+    fdctrl_set_fifo(fdctrl, 7, fdctrl->status0);
 }
 
 /* Prepare a data transfer (either DMA or FIFO) */
@@ -1169,7 +1178,7 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
     if (direction != FD_DIR_WRITE)
         fdctrl->msr |= FD_MSR_DIO;
     /* IO based transfer: calculate len */
-    fdctrl_raise_irq(fdctrl, 0x00);
+    fdctrl_raise_irq(fdctrl, FD_SR0_SEEK);
 
     return;
 }
@@ -1598,16 +1607,18 @@ static void fdctrl_handle_sense_interrupt_status(FDCtrl *fdctrl, int direction)
 {
     FDrive *cur_drv = get_cur_drv(fdctrl);
 
-    if(fdctrl->reset_sensei > 0) {
+    if (fdctrl->reset_sensei > 0) {
         fdctrl->fifo[0] =
             FD_SR0_RDYCHG + FD_RESET_SENSEI_COUNT - fdctrl->reset_sensei;
         fdctrl->reset_sensei--;
+    } else if (!(fdctrl->sra & FD_SRA_INTPEND)) {
+        fdctrl->fifo[0] = FD_SR0_INVCMD;
+        fdctrl_set_fifo(fdctrl, 1, 0);
+        return;
     } else {
-        /* XXX: status0 handling is broken for read/write
-           commands, so we do this hack. It should be suppressed
-           ASAP */
         fdctrl->fifo[0] =
-            FD_SR0_SEEK | (cur_drv->head << 2) | GET_CUR_DRV(fdctrl);
+                (fdctrl->status0 & ~(FD_SR0_HEAD | FD_SR0_DS1 | FD_SR0_DS0))
+                | GET_CUR_DRV(fdctrl);
     }
 
     fdctrl->fifo[1] = cur_drv->track;
@@ -1626,11 +1637,7 @@ static void fdctrl_handle_seek(FDCtrl *fdctrl, int direction)
     /* The seek command just sends step pulses to the drive and doesn't care if
      * there is a medium inserted of if it's banging the head against the drive.
      */
-    if (fdctrl->fifo[2] > cur_drv->max_track) {
-        cur_drv->track = cur_drv->max_track;
-    } else {
-        cur_drv->track = fdctrl->fifo[2];
-    }
+    fd_seek(cur_drv, cur_drv->head, fdctrl->fifo[2], cur_drv->sect, 1);
     /* Raise Interrupt */
     fdctrl_raise_irq(fdctrl, FD_SR0_SEEK);
 }
@@ -1695,9 +1702,10 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction)
     SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK);
     cur_drv = get_cur_drv(fdctrl);
     if (fdctrl->fifo[2] + cur_drv->track >= cur_drv->max_track) {
-        cur_drv->track = cur_drv->max_track - 1;
+        fd_seek(cur_drv, cur_drv->head, cur_drv->max_track - 1,
+                cur_drv->sect, 1);
     } else {
-        cur_drv->track += fdctrl->fifo[2];
+        fd_seek(cur_drv, cur_drv->head, fdctrl->fifo[2], cur_drv->sect, 1);
     }
     fdctrl_reset_fifo(fdctrl);
     /* Raise Interrupt */
@@ -1711,9 +1719,9 @@ static void fdctrl_handle_relative_seek_in(FDCtrl *fdctrl, int direction)
     SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK);
     cur_drv = get_cur_drv(fdctrl);
     if (fdctrl->fifo[2] > cur_drv->track) {
-        cur_drv->track = 0;
+        fd_seek(cur_drv, cur_drv->head, 0, cur_drv->sect, 1);
     } else {
-        cur_drv->track -= fdctrl->fifo[2];
+        fd_seek(cur_drv, cur_drv->head, fdctrl->fifo[2], cur_drv->sect, 1);
     }
     fdctrl_reset_fifo(fdctrl);
     /* Raise Interrupt */
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 5919cf52d8..f7f714c726 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -956,6 +956,36 @@ static void cmd_read_cdvd_capacity(IDEState *s, uint8_t* buf)
     ide_atapi_cmd_reply(s, 8, 8);
 }
 
+static void cmd_read_disc_information(IDEState *s, uint8_t* buf)
+{
+    uint8_t type = buf[1] & 7;
+    uint32_t max_len = ube16_to_cpu(buf + 7);
+
+    /* Types 1/2 are only defined for Blu-Ray.  */
+    if (type != 0) {
+        ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
+                            ASC_INV_FIELD_IN_CMD_PACKET);
+        return;
+    }
+
+    memset(buf, 0, 34);
+    buf[1] = 32;
+    buf[2] = 0xe; /* last session complete, disc finalized */
+    buf[3] = 1;   /* first track on disc */
+    buf[4] = 1;   /* # of sessions */
+    buf[5] = 1;   /* first track of last session */
+    buf[6] = 1;   /* last track of last session */
+    buf[7] = 0x20; /* unrestricted use */
+    buf[8] = 0x00; /* CD-ROM or DVD-ROM */
+    /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
+    /* 12-23: not meaningful for CD-ROM or DVD-ROM */
+    /* 24-31: disc bar code */
+    /* 32: disc application code */
+    /* 33: number of OPC tables */
+
+    ide_atapi_cmd_reply(s, 34, max_len);
+}
+
 static void cmd_read_dvd_structure(IDEState *s, uint8_t* buf)
 {
     int max_len;
@@ -1045,6 +1075,7 @@ static const struct {
     [ 0x43 ] = { cmd_read_toc_pma_atip,             CHECK_READY },
     [ 0x46 ] = { cmd_get_configuration,             ALLOW_UA },
     [ 0x4a ] = { cmd_get_event_status_notification, ALLOW_UA },
+    [ 0x51 ] = { cmd_read_disc_information,         CHECK_READY },
     [ 0x5a ] = { cmd_mode_sense, /* (10) */         0 },
     [ 0xa8 ] = { cmd_read, /* (12) */               CHECK_READY },
     [ 0xad ] = { cmd_read_dvd_structure,            CHECK_READY },
diff --git a/hw/imx.h b/hw/imx.h
new file mode 100644
index 0000000000..ccf586fefe
--- /dev/null
+++ b/hw/imx.h
@@ -0,0 +1,34 @@
+/*
+ * i.MX31 emulation
+ *
+ * Copyright (C) 2012 Peter Chubb
+ * NICTA
+ *
+ * This code is released under the GPL, version 2.0 or later
+ * See the file `../COPYING' for details.
+ */
+
+#ifndef IMX_H
+#define IMX_H
+
+void imx_serial_create(int uart, const target_phys_addr_t addr, qemu_irq irq);
+
+typedef enum  {
+    NOCLK,
+    MCU,
+    HSP,
+    IPG,
+    CLK_32k
+} IMXClk;
+
+uint32_t imx_clock_frequency(DeviceState *s, IMXClk clock);
+
+void imx_timerp_create(const target_phys_addr_t addr,
+                      qemu_irq irq,
+                      DeviceState *ccm);
+void imx_timerg_create(const target_phys_addr_t addr,
+                      qemu_irq irq,
+                      DeviceState *ccm);
+
+
+#endif /* IMX_H */
diff --git a/hw/imx_avic.c b/hw/imx_avic.c
new file mode 100644
index 0000000000..4f010e8ee2
--- /dev/null
+++ b/hw/imx_avic.c
@@ -0,0 +1,408 @@
+/*
+ * i.MX31 Vectored Interrupt Controller
+ *
+ * Note this is NOT the PL192 provided by ARM, but
+ * a custom implementation by Freescale.
+ *
+ * Copyright (c) 2008 OKL
+ * Copyright (c) 2011 NICTA Pty Ltd
+ * Originally Written by Hans Jiang
+ *
+ * This code is licenced under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ *
+ * TODO: implement vectors.
+ */
+
+#include "hw.h"
+#include "sysbus.h"
+#include "host-utils.h"
+
+#define DEBUG_INT 1
+#undef DEBUG_INT /* comment out for debugging */
+
+#ifdef DEBUG_INT
+#define DPRINTF(fmt, args...) \
+do { printf("imx_avic: " fmt , ##args); } while (0)
+#else
+#define DPRINTF(fmt, args...) do {} while (0)
+#endif
+
+/*
+ * Define to 1 for messages about attempts to
+ * access unimplemented registers or similar.
+ */
+#define DEBUG_IMPLEMENTATION 1
+#if DEBUG_IMPLEMENTATION
+#  define IPRINTF(fmt, args...) \
+    do  { fprintf(stderr, "imx_avic: " fmt, ##args); } while (0)
+#else
+#  define IPRINTF(fmt, args...) do {} while (0)
+#endif
+
+#define IMX_AVIC_NUM_IRQS 64
+
+/* Interrupt Control Bits */
+#define ABFLAG (1<<25)
+#define ABFEN (1<<24)
+#define NIDIS (1<<22) /* Normal Interrupt disable */
+#define FIDIS (1<<21) /* Fast interrupt disable */
+#define NIAD  (1<<20) /* Normal Interrupt Arbiter Rise ARM level */
+#define FIAD  (1<<19) /* Fast Interrupt Arbiter Rise ARM level */
+#define NM    (1<<18) /* Normal interrupt mode */
+
+
+#define PRIO_PER_WORD (sizeof(uint32_t) * 8 / 4)
+#define PRIO_WORDS (IMX_AVIC_NUM_IRQS/PRIO_PER_WORD)
+
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    uint64_t pending;
+    uint64_t enabled;
+    uint64_t is_fiq;
+    uint32_t intcntl;
+    uint32_t intmask;
+    qemu_irq irq;
+    qemu_irq fiq;
+    uint32_t prio[PRIO_WORDS]; /* Priorities are 4-bits each */
+} IMXAVICState;
+
+static const VMStateDescription vmstate_imx_avic = {
+    .name = "imx-avic",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(pending, IMXAVICState),
+        VMSTATE_UINT64(enabled, IMXAVICState),
+        VMSTATE_UINT64(is_fiq, IMXAVICState),
+        VMSTATE_UINT32(intcntl, IMXAVICState),
+        VMSTATE_UINT32(intmask, IMXAVICState),
+        VMSTATE_UINT32_ARRAY(prio, IMXAVICState, PRIO_WORDS),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+
+
+static inline int imx_avic_prio(IMXAVICState *s, int irq)
+{
+    uint32_t word = irq / PRIO_PER_WORD;
+    uint32_t part = 4 * (irq % PRIO_PER_WORD);
+    return 0xf & (s->prio[word] >> part);
+}
+
+static inline void imx_avic_set_prio(IMXAVICState *s, int irq, int prio)
+{
+    uint32_t word = irq / PRIO_PER_WORD;
+    uint32_t part = 4 * (irq % PRIO_PER_WORD);
+    uint32_t mask = ~(0xf << part);
+    s->prio[word] &= mask;
+    s->prio[word] |= prio << part;
+}
+
+/* Update interrupts.  */
+static void imx_avic_update(IMXAVICState *s)
+{
+    int i;
+    uint64_t new = s->pending & s->enabled;
+    uint64_t flags;
+
+    flags = new & s->is_fiq;
+    qemu_set_irq(s->fiq, !!flags);
+
+    flags = new & ~s->is_fiq;
+    if (!flags || (s->intmask == 0x1f)) {
+        qemu_set_irq(s->irq, !!flags);
+        return;
+    }
+
+    /*
+     * Take interrupt if there's a pending interrupt with
+     * priority higher than the value of intmask
+     */
+    for (i = 0; i < IMX_AVIC_NUM_IRQS; i++) {
+        if (flags & (1UL << i)) {
+            if (imx_avic_prio(s, i) > s->intmask) {
+                qemu_set_irq(s->irq, 1);
+                return;
+            }
+        }
+    }
+    qemu_set_irq(s->irq, 0);
+}
+
+static void imx_avic_set_irq(void *opaque, int irq, int level)
+{
+    IMXAVICState *s = (IMXAVICState *)opaque;
+
+    if (level) {
+        DPRINTF("Raising IRQ %d, prio %d\n",
+                irq, imx_avic_prio(s, irq));
+        s->pending |= (1ULL << irq);
+    } else {
+        DPRINTF("Clearing IRQ %d, prio %d\n",
+                irq, imx_avic_prio(s, irq));
+        s->pending &= ~(1ULL << irq);
+    }
+
+    imx_avic_update(s);
+}
+
+
+static uint64_t imx_avic_read(void *opaque,
+                             target_phys_addr_t offset, unsigned size)
+{
+    IMXAVICState *s = (IMXAVICState *)opaque;
+
+
+    DPRINTF("read(offset = 0x%x)\n", offset >> 2);
+    switch (offset >> 2) {
+    case 0: /* INTCNTL */
+        return s->intcntl;
+
+    case 1: /* Normal Interrupt Mask Register, NIMASK */
+        return s->intmask;
+
+    case 2: /* Interrupt Enable Number Register, INTENNUM */
+    case 3: /* Interrupt Disable Number Register, INTDISNUM */
+        return 0;
+
+    case 4: /* Interrupt Enabled Number Register High */
+        return s->enabled >> 32;
+
+    case 5: /* Interrupt Enabled Number Register Low */
+        return s->enabled & 0xffffffffULL;
+
+    case 6: /* Interrupt Type Register High */
+        return s->is_fiq >> 32;
+
+    case 7: /* Interrupt Type Register Low */
+        return s->is_fiq & 0xffffffffULL;
+
+    case 8: /* Normal Interrupt Priority Register 7 */
+    case 9: /* Normal Interrupt Priority Register 6 */
+    case 10:/* Normal Interrupt Priority Register 5 */
+    case 11:/* Normal Interrupt Priority Register 4 */
+    case 12:/* Normal Interrupt Priority Register 3 */
+    case 13:/* Normal Interrupt Priority Register 2 */
+    case 14:/* Normal Interrupt Priority Register 1 */
+    case 15:/* Normal Interrupt Priority Register 0 */
+        return s->prio[15-(offset>>2)];
+
+    case 16: /* Normal interrupt vector and status register */
+    {
+        /*
+         * This returns the highest priority
+         * outstanding interrupt.  Where there is more than
+         * one pending IRQ with the same priority,
+         * take the highest numbered one.
+         */
+        uint64_t flags = s->pending & s->enabled & ~s->is_fiq;
+        int i;
+        int prio = -1;
+        int irq = -1;
+        for (i = 63; i >= 0; --i) {
+            if (flags & (1ULL<<i)) {
+                int irq_prio = imx_avic_prio(s, i);
+                if (irq_prio > prio) {
+                    irq = i;
+                    prio = irq_prio;
+                }
+            }
+        }
+        if (irq >= 0) {
+            imx_avic_set_irq(s, irq, 0);
+            return irq << 16 | prio;
+        }
+        return 0xffffffffULL;
+    }
+    case 17:/* Fast Interrupt vector and status register */
+    {
+        uint64_t flags = s->pending & s->enabled & s->is_fiq;
+        int i = ctz64(flags);
+        if (i < 64) {
+            imx_avic_set_irq(opaque, i, 0);
+            return i;
+        }
+        return 0xffffffffULL;
+    }
+    case 18:/* Interrupt source register high */
+        return s->pending >> 32;
+
+    case 19:/* Interrupt source register low */
+        return s->pending & 0xffffffffULL;
+
+    case 20:/* Interrupt Force Register high */
+    case 21:/* Interrupt Force Register low */
+        return 0;
+
+    case 22:/* Normal Interrupt Pending Register High */
+        return (s->pending & s->enabled & ~s->is_fiq) >> 32;
+
+    case 23:/* Normal Interrupt Pending Register Low */
+        return (s->pending & s->enabled & ~s->is_fiq) & 0xffffffffULL;
+
+    case 24: /* Fast Interrupt Pending Register High  */
+        return (s->pending & s->enabled & s->is_fiq) >> 32;
+
+    case 25: /* Fast Interrupt Pending Register Low  */
+        return (s->pending & s->enabled & s->is_fiq) & 0xffffffffULL;
+
+    case 0x40:            /* AVIC vector 0, use for WFI WAR */
+        return 0x4;
+
+    default:
+        IPRINTF("imx_avic_read: Bad offset 0x%x\n", (int)offset);
+        return 0;
+    }
+}
+
+static void imx_avic_write(void *opaque, target_phys_addr_t offset,
+                          uint64_t val, unsigned size)
+{
+    IMXAVICState *s = (IMXAVICState *)opaque;
+
+    /* Vector Registers not yet supported */
+    if (offset >= 0x100 && offset <= 0x2fc) {
+        IPRINTF("imx_avic_write to vector register %d ignored\n",
+                (unsigned int)((offset - 0x100) >> 2));
+        return;
+    }
+
+    DPRINTF("imx_avic_write(0x%x) = %x\n",
+            (unsigned int)offset>>2, (unsigned int)val);
+    switch (offset >> 2) {
+    case 0: /* Interrupt Control Register, INTCNTL */
+        s->intcntl = val & (ABFEN | NIDIS | FIDIS | NIAD | FIAD | NM);
+        if (s->intcntl & ABFEN) {
+            s->intcntl &= ~(val & ABFLAG);
+        }
+        break;
+
+    case 1: /* Normal Interrupt Mask Register, NIMASK */
+        s->intmask = val & 0x1f;
+        break;
+
+    case 2: /* Interrupt Enable Number Register, INTENNUM */
+        DPRINTF("enable(%d)\n", (int)val);
+        val &= 0x3f;
+        s->enabled |= (1ULL << val);
+        break;
+
+    case 3: /* Interrupt Disable Number Register, INTDISNUM */
+        DPRINTF("disable(%d)\n", (int)val);
+        val &= 0x3f;
+        s->enabled &= ~(1ULL << val);
+        break;
+
+    case 4: /* Interrupt Enable Number Register High */
+        s->enabled = (s->enabled & 0xffffffffULL) | (val << 32);
+        break;
+
+    case 5: /* Interrupt Enable Number Register Low */
+        s->enabled = (s->enabled & 0xffffffff00000000ULL) | val;
+        break;
+
+    case 6: /* Interrupt Type Register High */
+        s->is_fiq = (s->is_fiq & 0xffffffffULL) | (val << 32);
+        break;
+
+    case 7: /* Interrupt Type Register Low */
+        s->is_fiq = (s->is_fiq & 0xffffffff00000000ULL) | val;
+        break;
+
+    case 8: /* Normal Interrupt Priority Register 7 */
+    case 9: /* Normal Interrupt Priority Register 6 */
+    case 10:/* Normal Interrupt Priority Register 5 */
+    case 11:/* Normal Interrupt Priority Register 4 */
+    case 12:/* Normal Interrupt Priority Register 3 */
+    case 13:/* Normal Interrupt Priority Register 2 */
+    case 14:/* Normal Interrupt Priority Register 1 */
+    case 15:/* Normal Interrupt Priority Register 0 */
+        s->prio[15-(offset>>2)] = val;
+        break;
+
+        /* Read-only registers, writes ignored */
+    case 16:/* Normal Interrupt Vector and Status register */
+    case 17:/* Fast Interrupt vector and status register */
+    case 18:/* Interrupt source register high */
+    case 19:/* Interrupt source register low */
+        return;
+
+    case 20:/* Interrupt Force Register high */
+        s->pending = (s->pending & 0xffffffffULL) | (val << 32);
+        break;
+
+    case 21:/* Interrupt Force Register low */
+        s->pending = (s->pending & 0xffffffff00000000ULL) | val;
+        break;
+
+    case 22:/* Normal Interrupt Pending Register High */
+    case 23:/* Normal Interrupt Pending Register Low */
+    case 24: /* Fast Interrupt Pending Register High  */
+    case 25: /* Fast Interrupt Pending Register Low  */
+        return;
+
+    default:
+        IPRINTF("imx_avic_write: Bad offset %x\n", (int)offset);
+    }
+    imx_avic_update(s);
+}
+
+static const MemoryRegionOps imx_avic_ops = {
+    .read = imx_avic_read,
+    .write = imx_avic_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void imx_avic_reset(DeviceState *dev)
+{
+    IMXAVICState *s = container_of(dev, IMXAVICState, busdev.qdev);
+    s->pending = 0;
+    s->enabled = 0;
+    s->is_fiq = 0;
+    s->intmask = 0x1f;
+    s->intcntl = 0;
+    memset(s->prio, 0, sizeof s->prio);
+}
+
+static int imx_avic_init(SysBusDevice *dev)
+{
+    IMXAVICState *s = FROM_SYSBUS(IMXAVICState, dev);;
+
+    memory_region_init_io(&s->iomem, &imx_avic_ops, s, "imx_avic", 0x1000);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    qdev_init_gpio_in(&dev->qdev, imx_avic_set_irq, IMX_AVIC_NUM_IRQS);
+    sysbus_init_irq(dev, &s->irq);
+    sysbus_init_irq(dev, &s->fiq);
+
+    return 0;
+}
+
+
+static void imx_avic_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    k->init = imx_avic_init;
+    dc->vmsd = &vmstate_imx_avic;
+    dc->reset = imx_avic_reset;
+    dc->desc = "i.MX Advanced Vector Interrupt Controller";
+}
+
+static const TypeInfo imx_avic_info = {
+    .name = "imx_avic",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXAVICState),
+    .class_init = imx_avic_class_init,
+};
+
+static void imx_avic_register_types(void)
+{
+    type_register_static(&imx_avic_info);
+}
+
+type_init(imx_avic_register_types)
diff --git a/hw/imx_ccm.c b/hw/imx_ccm.c
new file mode 100644
index 0000000000..10952c6ea1
--- /dev/null
+++ b/hw/imx_ccm.c
@@ -0,0 +1,321 @@
+/*
+ * IMX31 Clock Control Module
+ *
+ * Copyright (C) 2012 NICTA
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * To get the timer frequencies right, we need to emulate at least part of
+ * the CCM.
+ */
+
+#include "hw.h"
+#include "sysbus.h"
+#include "sysemu.h"
+#include "imx.h"
+
+#define CKIH_FREQ 26000000 /* 26MHz crystal input */
+#define CKIL_FREQ    32768 /* nominal 32khz clock */
+
+
+//#define DEBUG_CCM 1
+#ifdef DEBUG_CCM
+#define DPRINTF(fmt, args...) \
+do { printf("imx_ccm: " fmt , ##args); } while (0)
+#else
+#define DPRINTF(fmt, args...) do {} while (0)
+#endif
+
+static int imx_ccm_post_load(void *opaque, int version_id);
+
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+
+    uint32_t ccmr;
+    uint32_t pdr0;
+    uint32_t pdr1;
+    uint32_t mpctl;
+    uint32_t spctl;
+    uint32_t cgr[3];
+    uint32_t pmcr0;
+    uint32_t pmcr1;
+
+    /* Frequencies precalculated on register changes */
+    uint32_t pll_refclk_freq;
+    uint32_t mcu_clk_freq;
+    uint32_t hsp_clk_freq;
+    uint32_t ipg_clk_freq;
+} IMXCCMState;
+
+static const VMStateDescription vmstate_imx_ccm = {
+    .name = "imx-ccm",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(ccmr, IMXCCMState),
+        VMSTATE_UINT32(pdr0, IMXCCMState),
+        VMSTATE_UINT32(pdr1, IMXCCMState),
+        VMSTATE_UINT32(mpctl, IMXCCMState),
+        VMSTATE_UINT32(spctl, IMXCCMState),
+        VMSTATE_UINT32_ARRAY(cgr, IMXCCMState, 3),
+        VMSTATE_UINT32(pmcr0, IMXCCMState),
+        VMSTATE_UINT32(pmcr1, IMXCCMState),
+        VMSTATE_UINT32(pll_refclk_freq, IMXCCMState),
+    },
+    .post_load = imx_ccm_post_load,
+};
+
+/* CCMR */
+#define CCMR_FPME (1<<0)
+#define CCMR_MPE  (1<<3)
+#define CCMR_MDS  (1<<7)
+#define CCMR_FPMF (1<<26)
+#define CCMR_PRCS (3<<1)
+
+/* PDR0 */
+#define PDR0_MCU_PODF_SHIFT (0)
+#define PDR0_MCU_PODF_MASK (0x7)
+#define PDR0_MAX_PODF_SHIFT (3)
+#define PDR0_MAX_PODF_MASK (0x7)
+#define PDR0_IPG_PODF_SHIFT (6)
+#define PDR0_IPG_PODF_MASK (0x3)
+#define PDR0_NFC_PODF_SHIFT (8)
+#define PDR0_NFC_PODF_MASK (0x7)
+#define PDR0_HSP_PODF_SHIFT (11)
+#define PDR0_HSP_PODF_MASK (0x7)
+#define PDR0_PER_PODF_SHIFT (16)
+#define PDR0_PER_PODF_MASK (0x1f)
+#define PDR0_CSI_PODF_SHIFT (23)
+#define PDR0_CSI_PODF_MASK (0x1ff)
+
+#define EXTRACT(value, name) (((value) >> PDR0_##name##_PODF_SHIFT) \
+                              & PDR0_##name##_PODF_MASK)
+#define INSERT(value, name) (((value) & PDR0_##name##_PODF_MASK) << \
+                             PDR0_##name##_PODF_SHIFT)
+/* PLL control registers */
+#define PD(v) (((v) >> 26) & 0xf)
+#define MFD(v) (((v) >> 16) & 0x3ff)
+#define MFI(v) (((v) >> 10) & 0xf);
+#define MFN(v) ((v) & 0x3ff)
+
+#define PLL_PD(x)               (((x) & 0xf) << 26)
+#define PLL_MFD(x)              (((x) & 0x3ff) << 16)
+#define PLL_MFI(x)              (((x) & 0xf) << 10)
+#define PLL_MFN(x)              (((x) & 0x3ff) << 0)
+
+uint32_t imx_clock_frequency(DeviceState *dev, IMXClk clock)
+{
+    IMXCCMState *s = container_of(dev, IMXCCMState, busdev.qdev);
+
+    switch (clock) {
+    case NOCLK:
+        return 0;
+    case MCU:
+        return s->mcu_clk_freq;
+    case HSP:
+        return s->hsp_clk_freq;
+    case IPG:
+        return s->ipg_clk_freq;
+    case CLK_32k:
+        return CKIL_FREQ;
+    }
+    return 0;
+}
+
+/*
+ * Calculate PLL output frequency
+ */
+static uint32_t calc_pll(uint32_t pllreg, uint32_t base_freq)
+{
+    int32_t mfn = MFN(pllreg);  /* Numerator */
+    uint32_t mfi = MFI(pllreg); /* Integer part */
+    uint32_t mfd = 1 + MFD(pllreg); /* Denominator */
+    uint32_t pd = 1 + PD(pllreg);   /* Pre-divider */
+
+    if (mfi < 5) {
+        mfi = 5;
+    }
+    /* mfn is 10-bit signed twos-complement */
+    mfn <<= 32 - 10;
+    mfn >>= 32 - 10;
+
+    return ((2 * (base_freq >> 10) * (mfi * mfd + mfn)) /
+            (mfd * pd)) << 10;
+}
+
+static void update_clocks(IMXCCMState *s)
+{
+    /*
+     * If we ever emulate more clocks, this should switch to a data-driven
+     * approach
+     */
+
+    if ((s->ccmr & CCMR_PRCS) == 1) {
+        s->pll_refclk_freq = CKIL_FREQ * 1024;
+    } else {
+        s->pll_refclk_freq = CKIH_FREQ;
+    }
+
+    /* ipg_clk_arm aka MCU clock */
+    if ((s->ccmr & CCMR_MDS) || !(s->ccmr & CCMR_MPE)) {
+        s->mcu_clk_freq = s->pll_refclk_freq;
+    } else {
+        s->mcu_clk_freq = calc_pll(s->mpctl, s->pll_refclk_freq);
+    }
+
+    /* High-speed clock */
+    s->hsp_clk_freq = s->mcu_clk_freq / (1 + EXTRACT(s->pdr0, HSP));
+    s->ipg_clk_freq = s->hsp_clk_freq / (1 + EXTRACT(s->pdr0, IPG));
+
+    DPRINTF("Clocks: mcu %uMHz, HSP %uMHz, IPG %uHz\n",
+            s->mcu_clk_freq / 1000000,
+            s->hsp_clk_freq / 1000000,
+            s->ipg_clk_freq);
+}
+
+static void imx_ccm_reset(DeviceState *dev)
+{
+    IMXCCMState *s = container_of(dev, IMXCCMState, busdev.qdev);
+
+    s->ccmr = 0x074b0b7b;
+    s->pdr0 = 0xff870b48;
+    s->pdr1 = 0x49fcfe7f;
+    s->mpctl = PLL_PD(1) | PLL_MFD(0) | PLL_MFI(6) | PLL_MFN(0);
+    s->cgr[0] = s->cgr[1] = s->cgr[2] = 0xffffffff;
+    s->spctl = PLL_PD(1) | PLL_MFD(4) | PLL_MFI(0xc) | PLL_MFN(1);
+    s->pmcr0 = 0x80209828;
+
+    update_clocks(s);
+}
+
+static uint64_t imx_ccm_read(void *opaque, target_phys_addr_t offset,
+                                unsigned size)
+{
+    IMXCCMState *s = (IMXCCMState *)opaque;
+
+    DPRINTF("read(offset=%x)", offset >> 2);
+    switch (offset >> 2) {
+    case 0: /* CCMR */
+        DPRINTF(" ccmr = 0x%x\n", s->ccmr);
+        return s->ccmr;
+    case 1:
+        DPRINTF(" pdr0 = 0x%x\n", s->pdr0);
+        return s->pdr0;
+    case 2:
+        DPRINTF(" pdr1 = 0x%x\n", s->pdr1);
+        return s->pdr1;
+    case 4:
+        DPRINTF(" mpctl = 0x%x\n", s->mpctl);
+        return s->mpctl;
+    case 6:
+        DPRINTF(" spctl = 0x%x\n", s->spctl);
+        return s->spctl;
+    case 8:
+        DPRINTF(" cgr0 = 0x%x\n", s->cgr[0]);
+        return s->cgr[0];
+    case 9:
+        DPRINTF(" cgr1 = 0x%x\n", s->cgr[1]);
+        return s->cgr[1];
+    case 10:
+        DPRINTF(" cgr2 = 0x%x\n", s->cgr[2]);
+        return s->cgr[2];
+    case 18: /* LTR1 */
+        return 0x00004040;
+    case 23:
+        DPRINTF(" pcmr0 = 0x%x\n", s->pmcr0);
+        return s->pmcr0;
+    }
+    DPRINTF(" return 0\n");
+    return 0;
+}
+
+static void imx_ccm_write(void *opaque, target_phys_addr_t offset,
+                          uint64_t value, unsigned size)
+{
+    IMXCCMState *s = (IMXCCMState *)opaque;
+
+    DPRINTF("write(offset=%x, value = %x)\n",
+            offset >> 2, (unsigned int)value);
+    switch (offset >> 2) {
+    case 0:
+        s->ccmr = CCMR_FPMF | (value & 0x3b6fdfff);
+        break;
+    case 1:
+        s->pdr0 = value & 0xff9f3fff;
+        break;
+    case 2:
+        s->pdr1 = value;
+        break;
+    case 4:
+        s->mpctl = value & 0xbfff3fff;
+        break;
+    case 6:
+        s->spctl = value & 0xbfff3fff;
+        break;
+    case 8:
+        s->cgr[0] = value;
+        return;
+    case 9:
+        s->cgr[1] = value;
+        return;
+    case 10:
+        s->cgr[2] = value;
+        return;
+
+    default:
+        return;
+    }
+    update_clocks(s);
+}
+
+static const struct MemoryRegionOps imx_ccm_ops = {
+    .read = imx_ccm_read,
+    .write = imx_ccm_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static int imx_ccm_init(SysBusDevice *dev)
+{
+    IMXCCMState *s = FROM_SYSBUS(typeof(*s), dev);
+
+    memory_region_init_io(&s->iomem, &imx_ccm_ops, s, "imx_ccm", 0x1000);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    return 0;
+}
+
+static int imx_ccm_post_load(void *opaque, int version_id)
+{
+    IMXCCMState *s = (IMXCCMState *)opaque;
+
+    update_clocks(s);
+    return 0;
+}
+
+static void imx_ccm_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+
+    sbc->init = imx_ccm_init;
+    dc->reset = imx_ccm_reset;
+    dc->vmsd = &vmstate_imx_ccm;
+    dc->desc = "i.MX Clock Control Module";
+}
+
+static TypeInfo imx_ccm_info = {
+    .name = "imx_ccm",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXCCMState),
+    .class_init = imx_ccm_class_init,
+};
+
+static void imx_ccm_register_types(void)
+{
+    type_register_static(&imx_ccm_info);
+}
+
+type_init(imx_ccm_register_types)
diff --git a/hw/imx_serial.c b/hw/imx_serial.c
new file mode 100644
index 0000000000..d4eae430f5
--- /dev/null
+++ b/hw/imx_serial.c
@@ -0,0 +1,467 @@
+/*
+ * IMX31 UARTS
+ *
+ * Copyright (c) 2008 OKL
+ * Originally Written by Hans Jiang
+ * Copyright (c) 2011 NICTA Pty Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * This is a `bare-bones' implementation of the IMX series serial ports.
+ * TODO:
+ *  -- implement FIFOs.  The real hardware has 32 word transmit
+ *                       and receive FIFOs; we currently use a 1-char buffer
+ *  -- implement DMA
+ *  -- implement BAUD-rate and modem lines, for when the backend
+ *     is a real serial device.
+ */
+
+#include "hw.h"
+#include "sysbus.h"
+#include "sysemu.h"
+#include "qemu-char.h"
+#include "imx.h"
+
+//#define DEBUG_SERIAL 1
+#ifdef DEBUG_SERIAL
+#define DPRINTF(fmt, args...) \
+do { printf("imx_serial: " fmt , ##args); } while (0)
+#else
+#define DPRINTF(fmt, args...) do {} while (0)
+#endif
+
+/*
+ * Define to 1 for messages about attempts to
+ * access unimplemented registers or similar.
+ */
+//#define DEBUG_IMPLEMENTATION 1
+#ifdef DEBUG_IMPLEMENTATION
+#  define IPRINTF(fmt, args...) \
+    do  { fprintf(stderr, "imx_serial: " fmt, ##args); } while (0)
+#else
+#  define IPRINTF(fmt, args...) do {} while (0)
+#endif
+
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    int32_t readbuff;
+
+    uint32_t usr1;
+    uint32_t usr2;
+    uint32_t ucr1;
+    uint32_t ucr2;
+    uint32_t uts1;
+
+    /*
+     * The registers below are implemented just so that the
+     * guest OS sees what it has written
+     */
+    uint32_t onems;
+    uint32_t ufcr;
+    uint32_t ubmr;
+    uint32_t ubrc;
+    uint32_t ucr3;
+
+    qemu_irq irq;
+    CharDriverState *chr;
+} IMXSerialState;
+
+static const VMStateDescription vmstate_imx_serial = {
+    .name = "imx-serial",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(readbuff, IMXSerialState),
+        VMSTATE_UINT32(usr1, IMXSerialState),
+        VMSTATE_UINT32(usr2, IMXSerialState),
+        VMSTATE_UINT32(ucr1, IMXSerialState),
+        VMSTATE_UINT32(uts1, IMXSerialState),
+        VMSTATE_UINT32(onems, IMXSerialState),
+        VMSTATE_UINT32(ufcr, IMXSerialState),
+        VMSTATE_UINT32(ubmr, IMXSerialState),
+        VMSTATE_UINT32(ubrc, IMXSerialState),
+        VMSTATE_UINT32(ucr3, IMXSerialState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+
+#define URXD_CHARRDY    (1<<15)   /* character read is valid */
+#define URXD_ERR        (1<<14)   /* Character has error */
+#define URXD_BRK        (1<<11)   /* Break received */
+
+#define USR1_PARTYER    (1<<15)   /* Parity Error */
+#define USR1_RTSS       (1<<14)   /* RTS pin status */
+#define USR1_TRDY       (1<<13)   /* Tx ready */
+#define USR1_RTSD       (1<<12)   /* RTS delta: pin changed state */
+#define USR1_ESCF       (1<<11)   /* Escape sequence interrupt */
+#define USR1_FRAMERR    (1<<10)   /* Framing error  */
+#define USR1_RRDY       (1<<9)    /* receiver ready */
+#define USR1_AGTIM      (1<<8)    /* Aging timer interrupt */
+#define USR1_DTRD       (1<<7)    /* DTR changed */
+#define USR1_RXDS       (1<<6)    /* Receiver is idle */
+#define USR1_AIRINT     (1<<5)    /* Aysnch IR interrupt */
+#define USR1_AWAKE      (1<<4)    /* Falling edge detected on RXd pin */
+
+#define USR2_ADET       (1<<15)   /* Autobaud complete */
+#define USR2_TXFE       (1<<14)   /* Transmit FIFO empty */
+#define USR2_DTRF       (1<<13)   /* DTR/DSR transition */
+#define USR2_IDLE       (1<<12)   /* UART has been idle for too long */
+#define USR2_ACST       (1<<11)   /* Autobaud counter stopped */
+#define USR2_RIDELT     (1<<10)   /* Ring Indicator delta */
+#define USR2_RIIN       (1<<9)    /* Ring Indicator Input */
+#define USR2_IRINT      (1<<8)    /* Serial Infrared Interrupt */
+#define USR2_WAKE       (1<<7)    /* Start bit detected */
+#define USR2_DCDDELT    (1<<6)    /* Data Carrier Detect delta */
+#define USR2_DCDIN      (1<<5)    /* Data Carrier Detect Input */
+#define USR2_RTSF       (1<<4)    /* RTS transition */
+#define USR2_TXDC       (1<<3)    /* Transmission complete */
+#define USR2_BRCD       (1<<2)    /* Break condition detected */
+#define USR2_ORE        (1<<1)    /* Overrun error */
+#define USR2_RDR        (1<<0)    /* Receive data ready */
+
+#define UCR1_TRDYEN     (1<<13)   /* Tx Ready Interrupt Enable */
+#define UCR1_RRDYEN     (1<<9)    /* Rx Ready Interrupt Enable */
+#define UCR1_TXMPTYEN   (1<<6)    /* Tx Empty Interrupt Enable */
+#define UCR1_UARTEN     (1<<0)    /* UART Enable */
+
+#define UCR2_TXEN       (1<<2)    /* Transmitter enable */
+#define UCR2_RXEN       (1<<1)    /* Receiver enable */
+#define UCR2_SRST       (1<<0)    /* Reset complete */
+
+#define UTS1_TXEMPTY    (1<<6)
+#define UTS1_RXEMPTY    (1<<5)
+#define UTS1_TXFULL     (1<<4)
+#define UTS1_RXFULL     (1<<3)
+
+static void imx_update(IMXSerialState *s)
+{
+    uint32_t flags;
+
+    flags = (s->usr1 & s->ucr1) & (USR1_TRDY|USR1_RRDY);
+    if (!(s->ucr1 & UCR1_TXMPTYEN)) {
+        flags &= ~USR1_TRDY;
+    }
+
+    qemu_set_irq(s->irq, !!flags);
+}
+
+static void imx_serial_reset(IMXSerialState *s)
+{
+
+    s->usr1 = USR1_TRDY | USR1_RXDS;
+    /*
+     * Fake attachment of a terminal: assert RTS.
+     */
+    s->usr1 |= USR1_RTSS;
+    s->usr2 = USR2_TXFE | USR2_TXDC | USR2_DCDIN;
+    s->uts1 = UTS1_RXEMPTY | UTS1_TXEMPTY;
+    s->ucr1 = 0;
+    s->ucr2 = UCR2_SRST;
+    s->ucr3 = 0x700;
+    s->ubmr = 0;
+    s->ubrc = 4;
+    s->readbuff = URXD_ERR;
+}
+
+static void imx_serial_reset_at_boot(DeviceState *dev)
+{
+    IMXSerialState *s = container_of(dev, IMXSerialState, busdev.qdev);
+
+    imx_serial_reset(s);
+
+    /*
+     * enable the uart on boot, so messages from the linux decompresser
+     * are visible.  On real hardware this is done by the boot rom
+     * before anything else is loaded.
+     */
+    s->ucr1 = UCR1_UARTEN;
+    s->ucr2 = UCR2_TXEN;
+
+}
+
+static uint64_t imx_serial_read(void *opaque, target_phys_addr_t offset,
+                                unsigned size)
+{
+    IMXSerialState *s = (IMXSerialState *)opaque;
+    uint32_t c;
+
+    DPRINTF("read(offset=%x)\n", offset >> 2);
+    switch (offset >> 2) {
+    case 0x0: /* URXD */
+        c = s->readbuff;
+        if (!(s->uts1 & UTS1_RXEMPTY)) {
+            /* Character is valid */
+            c |= URXD_CHARRDY;
+            s->usr1 &= ~USR1_RRDY;
+            s->usr2 &= ~USR2_RDR;
+            s->uts1 |= UTS1_RXEMPTY;
+            imx_update(s);
+            qemu_chr_accept_input(s->chr);
+        }
+        return c;
+
+    case 0x20: /* UCR1 */
+        return s->ucr1;
+
+    case 0x21: /* UCR2 */
+        return s->ucr2;
+
+    case 0x25: /* USR1 */
+        return s->usr1;
+
+    case 0x26: /* USR2 */
+        return s->usr2;
+
+    case 0x2A: /* BRM Modulator */
+        return s->ubmr;
+
+    case 0x2B: /* Baud Rate Count */
+        return s->ubrc;
+
+    case 0x2d: /* Test register */
+        return s->uts1;
+
+    case 0x24: /* UFCR */
+        return s->ufcr;
+
+    case 0x2c:
+        return s->onems;
+
+    case 0x22: /* UCR3 */
+        return s->ucr3;
+
+    case 0x23: /* UCR4 */
+    case 0x29: /* BRM Incremental */
+        return 0x0; /* TODO */
+
+    default:
+        IPRINTF("imx_serial_read: bad offset: 0x%x\n", (int)offset);
+        return 0;
+    }
+}
+
+static void imx_serial_write(void *opaque, target_phys_addr_t offset,
+                      uint64_t value, unsigned size)
+{
+    IMXSerialState *s = (IMXSerialState *)opaque;
+    unsigned char ch;
+
+    DPRINTF("write(offset=%x, value = %x) to %s\n",
+            offset >> 2,
+            (unsigned int)value, s->chr ? s->chr->label : "NODEV");
+
+    switch (offset >> 2) {
+    case 0x10: /* UTXD */
+        ch = value;
+        if (s->ucr2 & UCR2_TXEN) {
+            if (s->chr) {
+                qemu_chr_fe_write(s->chr, &ch, 1);
+            }
+            s->usr1 &= ~USR1_TRDY;
+            imx_update(s);
+            s->usr1 |= USR1_TRDY;
+            imx_update(s);
+        }
+        break;
+
+    case 0x20: /* UCR1 */
+        s->ucr1 = value & 0xffff;
+        DPRINTF("write(ucr1=%x)\n", (unsigned int)value);
+        imx_update(s);
+        break;
+
+    case 0x21: /* UCR2 */
+        /*
+         * Only a few bits in control register 2 are implemented as yet.
+         * If it's intended to use a real serial device as a back-end, this
+         * register will have to be implemented more fully.
+         */
+        if (!(value & UCR2_SRST)) {
+            imx_serial_reset(s);
+            imx_update(s);
+            value |= UCR2_SRST;
+        }
+        if (value & UCR2_RXEN) {
+            if (!(s->ucr2 & UCR2_RXEN)) {
+                qemu_chr_accept_input(s->chr);
+            }
+        }
+        s->ucr2 = value & 0xffff;
+        break;
+
+    case 0x25: /* USR1 */
+        value &= USR1_AWAKE | USR1_AIRINT | USR1_DTRD | USR1_AGTIM |
+            USR1_FRAMERR | USR1_ESCF | USR1_RTSD | USR1_PARTYER;
+        s->usr1 &= ~value;
+        break;
+
+    case 0x26: /* USR2 */
+       /*
+        * Writing 1 to some bits clears them; all other
+        * values are ignored
+        */
+        value &= USR2_ADET | USR2_DTRF | USR2_IDLE | USR2_ACST |
+            USR2_RIDELT | USR2_IRINT | USR2_WAKE |
+            USR2_DCDDELT | USR2_RTSF | USR2_BRCD | USR2_ORE;
+        s->usr2 &= ~value;
+        break;
+
+        /*
+         * Linux expects to see what it writes to these registers
+         * We don't currently alter the baud rate
+         */
+    case 0x29: /* UBIR */
+        s->ubrc = value & 0xffff;
+        break;
+
+    case 0x2a: /* UBMR */
+        s->ubmr = value & 0xffff;
+        break;
+
+    case 0x2c: /* One ms reg */
+        s->onems = value & 0xffff;
+        break;
+
+    case 0x24: /* FIFO control register */
+        s->ufcr = value & 0xffff;
+        break;
+
+    case 0x22: /* UCR3 */
+        s->ucr3 = value & 0xffff;
+        break;
+
+    case 0x2d: /* UTS1 */
+    case 0x23: /* UCR4 */
+        IPRINTF("Unimplemented Register %x written to\n", offset >> 2);
+        /* TODO */
+        break;
+
+    default:
+        IPRINTF("imx_serial_write: Bad offset 0x%x\n", (int)offset);
+    }
+}
+
+static int imx_can_receive(void *opaque)
+{
+    IMXSerialState *s = (IMXSerialState *)opaque;
+    return !(s->usr1 & USR1_RRDY);
+}
+
+static void imx_put_data(void *opaque, uint32_t value)
+{
+    IMXSerialState *s = (IMXSerialState *)opaque;
+    DPRINTF("received char\n");
+    s->usr1 |= USR1_RRDY;
+    s->usr2 |= USR2_RDR;
+    s->uts1 &= ~UTS1_RXEMPTY;
+    s->readbuff = value;
+    imx_update(s);
+}
+
+static void imx_receive(void *opaque, const uint8_t *buf, int size)
+{
+    imx_put_data(opaque, *buf);
+}
+
+static void imx_event(void *opaque, int event)
+{
+    if (event == CHR_EVENT_BREAK) {
+        imx_put_data(opaque, URXD_BRK);
+    }
+}
+
+
+static const struct MemoryRegionOps imx_serial_ops = {
+    .read = imx_serial_read,
+    .write = imx_serial_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static int imx_serial_init(SysBusDevice *dev)
+{
+    IMXSerialState *s = FROM_SYSBUS(IMXSerialState, dev);
+
+
+    memory_region_init_io(&s->iomem, &imx_serial_ops, s, "imx-serial", 0x1000);
+    sysbus_init_mmio(dev, &s->iomem);
+    sysbus_init_irq(dev, &s->irq);
+
+    if (s->chr) {
+        qemu_chr_add_handlers(s->chr, imx_can_receive, imx_receive,
+                              imx_event, s);
+    } else {
+        DPRINTF("No char dev for uart at 0x%lx\n",
+                (unsigned long)s->iomem.ram_addr);
+    }
+
+    return 0;
+}
+
+void imx_serial_create(int uart, const target_phys_addr_t addr, qemu_irq irq)
+{
+    DeviceState *dev;
+    SysBusDevice *bus;
+    CharDriverState *chr;
+    const char chr_name[] = "serial";
+    char label[ARRAY_SIZE(chr_name) + 1];
+
+    dev = qdev_create(NULL, "imx-serial");
+
+    if (uart >= MAX_SERIAL_PORTS) {
+        hw_error("Cannot assign uart %d: QEMU supports only %d ports\n",
+                 uart, MAX_SERIAL_PORTS);
+    }
+    chr = serial_hds[uart];
+    if (!chr) {
+        snprintf(label, ARRAY_SIZE(label), "%s%d", chr_name, uart);
+        chr = qemu_chr_new(label, "null", NULL);
+        if (!(chr)) {
+            hw_error("Can't assign serial port to imx-uart%d.\n", uart);
+        }
+    }
+
+    qdev_prop_set_chr(dev, "chardev", chr);
+    bus = sysbus_from_qdev(dev);
+    qdev_init_nofail(dev);
+    if (addr != (target_phys_addr_t)-1) {
+        sysbus_mmio_map(bus, 0, addr);
+    }
+    sysbus_connect_irq(bus, 0, irq);
+
+}
+
+
+static Property imx32_serial_properties[] = {
+    DEFINE_PROP_CHR("chardev", IMXSerialState, chr),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void imx_serial_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+
+    k->init = imx_serial_init;
+    dc->vmsd = &vmstate_imx_serial;
+    dc->reset = imx_serial_reset_at_boot;
+    dc->desc = "i.MX series UART";
+    dc->props = imx32_serial_properties;
+}
+
+static TypeInfo imx_serial_info = {
+    .name = "imx-serial",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXSerialState),
+    .class_init = imx_serial_class_init,
+};
+
+static void imx_serial_register_types(void)
+{
+    type_register_static(&imx_serial_info);
+}
+
+type_init(imx_serial_register_types)
diff --git a/hw/imx_timer.c b/hw/imx_timer.c
new file mode 100644
index 0000000000..16215ccf04
--- /dev/null
+++ b/hw/imx_timer.c
@@ -0,0 +1,689 @@
+/*
+ * IMX31 Timer
+ *
+ * Copyright (c) 2008 OK Labs
+ * Copyright (c) 2011 NICTA Pty Ltd
+ * Originally Written by Hans Jiang
+ * Updated by Peter Chubb
+ *
+ * This code is licenced under GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw.h"
+#include "qemu-timer.h"
+#include "ptimer.h"
+#include "sysbus.h"
+#include "imx.h"
+
+//#define DEBUG_TIMER 1
+#ifdef DEBUG_TIMER
+#  define DPRINTF(fmt, args...) \
+      do { printf("imx_timer: " fmt , ##args); } while (0)
+#else
+#  define DPRINTF(fmt, args...) do {} while (0)
+#endif
+
+/*
+ * Define to 1 for messages about attempts to
+ * access unimplemented registers or similar.
+ */
+#define DEBUG_IMPLEMENTATION 1
+#if DEBUG_IMPLEMENTATION
+#  define IPRINTF(fmt, args...)                                         \
+    do  { fprintf(stderr, "imx_timer: " fmt, ##args); } while (0)
+#else
+#  define IPRINTF(fmt, args...) do {} while (0)
+#endif
+
+/*
+ * GPT : General purpose timer
+ *
+ * This timer counts up continuously while it is enabled, resetting itself
+ * to 0 when it reaches TIMER_MAX (in freerun mode) or when it
+ * reaches the value of ocr1 (in periodic mode).  WE simulate this using a
+ * QEMU ptimer counting down from ocr1 and reloading from ocr1 in
+ * periodic mode, or counting from ocr1 to zero, then TIMER_MAX - ocr1.
+ * waiting_rov is set when counting from TIMER_MAX.
+ *
+ * In the real hardware, there are three comparison registers that can
+ * trigger interrupts, and compare channel 1 can be used to
+ * force-reset the timer. However, this is a `bare-bones'
+ * implementation: only what Linux 3.x uses has been implemented
+ * (free-running timer from 0 to OCR1 or TIMER_MAX) .
+ */
+
+
+#define TIMER_MAX  0XFFFFFFFFUL
+
+/* Control register.  Not all of these bits have any effect (yet) */
+#define GPT_CR_EN     (1 << 0)  /* GPT Enable */
+#define GPT_CR_ENMOD  (1 << 1)  /* GPT Enable Mode */
+#define GPT_CR_DBGEN  (1 << 2)  /* GPT Debug mode enable */
+#define GPT_CR_WAITEN (1 << 3)  /* GPT Wait Mode Enable  */
+#define GPT_CR_DOZEN  (1 << 4)  /* GPT Doze mode enable */
+#define GPT_CR_STOPEN (1 << 5)  /* GPT Stop Mode Enable */
+#define GPT_CR_CLKSRC_SHIFT (6)
+#define GPT_CR_CLKSRC_MASK  (0x7)
+
+#define GPT_CR_FRR    (1 << 9)  /* Freerun or Restart */
+#define GPT_CR_SWR    (1 << 15) /* Software Reset */
+#define GPT_CR_IM1    (3 << 16) /* Input capture channel 1 mode (2 bits) */
+#define GPT_CR_IM2    (3 << 18) /* Input capture channel 2 mode (2 bits) */
+#define GPT_CR_OM1    (7 << 20) /* Output Compare Channel 1 Mode (3 bits) */
+#define GPT_CR_OM2    (7 << 23) /* Output Compare Channel 2 Mode (3 bits) */
+#define GPT_CR_OM3    (7 << 26) /* Output Compare Channel 3 Mode (3 bits) */
+#define GPT_CR_FO1    (1 << 29) /* Force Output Compare Channel 1 */
+#define GPT_CR_FO2    (1 << 30) /* Force Output Compare Channel 2 */
+#define GPT_CR_FO3    (1 << 31) /* Force Output Compare Channel 3 */
+
+#define GPT_SR_OF1  (1 << 0)
+#define GPT_SR_ROV  (1 << 5)
+
+#define GPT_IR_OF1IE  (1 << 0)
+#define GPT_IR_ROVIE  (1 << 5)
+
+typedef struct {
+    SysBusDevice busdev;
+    ptimer_state *timer;
+    MemoryRegion iomem;
+    DeviceState *ccm;
+
+    uint32_t cr;
+    uint32_t pr;
+    uint32_t sr;
+    uint32_t ir;
+    uint32_t ocr1;
+    uint32_t cnt;
+
+    uint32_t waiting_rov;
+    qemu_irq irq;
+} IMXTimerGState;
+
+static const VMStateDescription vmstate_imx_timerg = {
+    .name = "imx-timerg",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT32(cr, IMXTimerGState),
+        VMSTATE_UINT32(pr, IMXTimerGState),
+        VMSTATE_UINT32(sr, IMXTimerGState),
+        VMSTATE_UINT32(ir, IMXTimerGState),
+        VMSTATE_UINT32(ocr1, IMXTimerGState),
+        VMSTATE_UINT32(cnt, IMXTimerGState),
+        VMSTATE_UINT32(waiting_rov, IMXTimerGState),
+        VMSTATE_PTIMER(timer, IMXTimerGState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const IMXClk imx_timerg_clocks[] = {
+    NOCLK,    /* 000 No clock source */
+    IPG,      /* 001 ipg_clk, 532MHz*/
+    IPG,      /* 010 ipg_clk_highfreq */
+    NOCLK,    /* 011 not defined */
+    CLK_32k,  /* 100 ipg_clk_32k */
+    NOCLK,    /* 101 not defined */
+    NOCLK,    /* 110 not defined */
+    NOCLK,    /* 111 not defined */
+};
+
+
+static void imx_timerg_set_freq(IMXTimerGState *s)
+{
+    int clksrc;
+    uint32_t freq;
+
+    clksrc = (s->cr >> GPT_CR_CLKSRC_SHIFT) & GPT_CR_CLKSRC_MASK;
+    freq = imx_clock_frequency(s->ccm, imx_timerg_clocks[clksrc]) / (1 + s->pr);
+
+    DPRINTF("Setting gtimer clksrc %d to frequency %d\n", clksrc, freq);
+    if (freq) {
+        ptimer_set_freq(s->timer, freq);
+    }
+}
+
+static void imx_timerg_update(IMXTimerGState *s)
+{
+    uint32_t flags = s->sr & s->ir & (GPT_SR_OF1 | GPT_SR_ROV);
+
+    DPRINTF("g-timer SR: %s %s IR=%s %s, %s\n",
+            s->sr & GPT_SR_OF1 ? "OF1" : "",
+            s->sr & GPT_SR_ROV ? "ROV" : "",
+            s->ir & GPT_SR_OF1 ? "OF1" : "",
+            s->ir & GPT_SR_ROV ? "ROV" : "",
+            s->cr & GPT_CR_EN ? "CR_EN" : "Not Enabled");
+
+
+    qemu_set_irq(s->irq, (s->cr & GPT_CR_EN) && flags);
+}
+
+static uint32_t imx_timerg_update_counts(IMXTimerGState *s)
+{
+    uint64_t target = s->waiting_rov ? TIMER_MAX : s->ocr1;
+    uint64_t cnt = ptimer_get_count(s->timer);
+    s->cnt = target - cnt;
+    return s->cnt;
+}
+
+static void imx_timerg_reload(IMXTimerGState *s, uint32_t timeout)
+{
+    uint64_t diff_cnt;
+
+    if (!(s->cr & GPT_CR_FRR)) {
+        IPRINTF("IMX_timerg_reload --- called in reset-mode\n");
+        return;
+    }
+
+    /*
+     * For small timeouts, qemu sometimes runs too slow.
+     * Better deliver a late interrupt than none.
+     *
+     * In Reset mode (FRR bit clear)
+     * the ptimer reloads itself from OCR1;
+     * in free-running mode we need to fake
+     * running from 0 to ocr1 to TIMER_MAX
+     */
+    if (timeout > s->cnt) {
+        diff_cnt = timeout - s->cnt;
+    } else {
+        diff_cnt = 0;
+    }
+    ptimer_set_count(s->timer, diff_cnt);
+}
+
+static uint64_t imx_timerg_read(void *opaque, target_phys_addr_t offset,
+                                unsigned size)
+{
+    IMXTimerGState *s = (IMXTimerGState *)opaque;
+
+    DPRINTF("g-read(offset=%x)", offset >> 2);
+    switch (offset >> 2) {
+    case 0: /* Control Register */
+        DPRINTF(" cr = %x\n", s->cr);
+        return s->cr;
+
+    case 1: /* prescaler */
+        DPRINTF(" pr = %x\n", s->pr);
+        return s->pr;
+
+    case 2: /* Status Register */
+        DPRINTF(" sr = %x\n", s->sr);
+        return s->sr;
+
+    case 3: /* Interrupt Register */
+        DPRINTF(" ir = %x\n", s->ir);
+        return s->ir;
+
+    case 4: /* Output Compare Register 1 */
+        DPRINTF(" ocr1 = %x\n", s->ocr1);
+        return s->ocr1;
+
+
+    case 9: /* cnt */
+        imx_timerg_update_counts(s);
+        DPRINTF(" cnt = %x\n", s->cnt);
+        return s->cnt;
+    }
+
+    IPRINTF("imx_timerg_read: Bad offset %x\n",
+            (int)offset >> 2);
+    return 0;
+}
+
+static void imx_timerg_reset(DeviceState *dev)
+{
+    IMXTimerGState *s = container_of(dev, IMXTimerGState, busdev.qdev);
+
+    /*
+     * Soft reset doesn't touch some bits; hard reset clears them
+     */
+    s->cr &= ~(GPT_CR_EN|GPT_CR_DOZEN|GPT_CR_WAITEN|GPT_CR_DBGEN);
+    s->sr = 0;
+    s->pr = 0;
+    s->ir = 0;
+    s->cnt = 0;
+    s->ocr1 = TIMER_MAX;
+    ptimer_stop(s->timer);
+    ptimer_set_limit(s->timer, TIMER_MAX, 1);
+    imx_timerg_set_freq(s);
+}
+
+static void imx_timerg_write(void *opaque, target_phys_addr_t offset,
+                             uint64_t value, unsigned size)
+{
+    IMXTimerGState *s = (IMXTimerGState *)opaque;
+    DPRINTF("g-write(offset=%x, value = 0x%x)\n", (unsigned int)offset >> 2,
+            (unsigned int)value);
+
+    switch (offset >> 2) {
+    case 0: {
+        uint32_t oldcr = s->cr;
+        /* CR */
+        if (value & GPT_CR_SWR) { /* force reset */
+            value &= ~GPT_CR_SWR;
+            imx_timerg_reset(&s->busdev.qdev);
+            imx_timerg_update(s);
+        }
+
+        s->cr = value & ~0x7c00;
+        imx_timerg_set_freq(s);
+        if ((oldcr ^ value) & GPT_CR_EN) {
+            if (value & GPT_CR_EN) {
+                if (value & GPT_CR_ENMOD) {
+                    ptimer_set_count(s->timer, s->ocr1);
+                    s->cnt = 0;
+                }
+                ptimer_run(s->timer,
+                           (value & GPT_CR_FRR) && (s->ocr1 != TIMER_MAX));
+            } else {
+                ptimer_stop(s->timer);
+            };
+        }
+        return;
+    }
+
+    case 1: /* Prescaler */
+        s->pr = value & 0xfff;
+        imx_timerg_set_freq(s);
+        return;
+
+    case 2: /* SR */
+        /*
+         * No point in implementing the status register bits to do with
+         * external interrupt sources.
+         */
+        value &= GPT_SR_OF1 | GPT_SR_ROV;
+        s->sr &= ~value;
+        imx_timerg_update(s);
+        return;
+
+    case 3: /* IR -- interrupt register */
+        s->ir = value & 0x3f;
+        imx_timerg_update(s);
+        return;
+
+    case 4: /* OCR1 -- output compare register */
+        /* In non-freerun mode, reset count when this register is written */
+        if (!(s->cr & GPT_CR_FRR)) {
+            s->waiting_rov = 0;
+            ptimer_set_limit(s->timer, value, 1);
+        } else {
+            imx_timerg_update_counts(s);
+            if (value > s->cnt) {
+                s->waiting_rov = 0;
+                imx_timerg_reload(s, value);
+            } else {
+                s->waiting_rov = 1;
+                imx_timerg_reload(s, TIMER_MAX - s->cnt);
+            }
+        }
+        s->ocr1 = value;
+        return;
+
+    default:
+        IPRINTF("imx_timerg_write: Bad offset %x\n",
+                (int)offset >> 2);
+    }
+}
+
+static void imx_timerg_timeout(void *opaque)
+{
+    IMXTimerGState *s = (IMXTimerGState *)opaque;
+
+    DPRINTF("imx_timerg_timeout, waiting rov=%d\n", s->waiting_rov);
+    if (s->cr & GPT_CR_FRR) {
+        /*
+         * Free running timer from 0 -> TIMERMAX
+         * Generates interrupt at TIMER_MAX and at cnt==ocr1
+         * If ocr1 == TIMER_MAX, then no need to reload timer.
+         */
+        if (s->ocr1 == TIMER_MAX) {
+            DPRINTF("s->ocr1 == TIMER_MAX, FRR\n");
+            s->sr |= GPT_SR_OF1 | GPT_SR_ROV;
+            imx_timerg_update(s);
+            return;
+        }
+
+        if (s->waiting_rov) {
+            /*
+             * We were waiting for cnt==TIMER_MAX
+             */
+            s->sr |= GPT_SR_ROV;
+            s->waiting_rov = 0;
+            s->cnt = 0;
+            imx_timerg_reload(s, s->ocr1);
+        } else {
+            /* Must have got a cnt==ocr1 timeout. */
+            s->sr |= GPT_SR_OF1;
+            s->cnt = s->ocr1;
+            s->waiting_rov = 1;
+            imx_timerg_reload(s, TIMER_MAX);
+        }
+        imx_timerg_update(s);
+        return;
+    }
+
+    s->sr |= GPT_SR_OF1;
+    imx_timerg_update(s);
+}
+
+static const MemoryRegionOps imx_timerg_ops = {
+    .read = imx_timerg_read,
+    .write = imx_timerg_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+
+static int imx_timerg_init(SysBusDevice *dev)
+{
+    IMXTimerGState *s = FROM_SYSBUS(IMXTimerGState, dev);
+    QEMUBH *bh;
+
+    sysbus_init_irq(dev, &s->irq);
+    memory_region_init_io(&s->iomem, &imx_timerg_ops,
+                          s, "imxg-timer",
+                          0x00001000);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    bh = qemu_bh_new(imx_timerg_timeout, s);
+    s->timer = ptimer_init(bh);
+
+    /* Hard reset resets extra bits in CR */
+    s->cr = 0;
+    return 0;
+}
+
+
+
+/*
+ * EPIT: Enhanced periodic interrupt timer
+ */
+
+#define CR_EN       (1 << 0)
+#define CR_ENMOD    (1 << 1)
+#define CR_OCIEN    (1 << 2)
+#define CR_RLD      (1 << 3)
+#define CR_PRESCALE_SHIFT (4)
+#define CR_PRESCALE_MASK  (0xfff)
+#define CR_SWR      (1 << 16)
+#define CR_IOVW     (1 << 17)
+#define CR_DBGEN    (1 << 18)
+#define CR_EPIT     (1 << 19)
+#define CR_DOZEN    (1 << 20)
+#define CR_STOPEN   (1 << 21)
+#define CR_CLKSRC_SHIFT (24)
+#define CR_CLKSRC_MASK  (0x3 << CR_CLKSRC_SHIFT)
+
+
+/*
+ * Exact clock frequencies vary from board to board.
+ * These are typical.
+ */
+static const IMXClk imx_timerp_clocks[] =  {
+    0,        /* disabled */
+    IPG, /* ipg_clk, ~532MHz */
+    IPG, /* ipg_clk_highfreq */
+    CLK_32k,    /* ipg_clk_32k -- ~32kHz */
+};
+
+typedef struct {
+    SysBusDevice busdev;
+    ptimer_state *timer;
+    MemoryRegion iomem;
+    DeviceState *ccm;
+
+    uint32_t cr;
+    uint32_t lr;
+    uint32_t cmp;
+
+    uint32_t freq;
+    int int_level;
+    qemu_irq irq;
+} IMXTimerPState;
+
+/*
+ * Update interrupt status
+ */
+static void imx_timerp_update(IMXTimerPState *s)
+{
+    if (s->int_level && (s->cr & CR_OCIEN)) {
+        qemu_irq_raise(s->irq);
+    } else {
+        qemu_irq_lower(s->irq);
+    }
+}
+
+static void imx_timerp_reset(DeviceState *dev)
+{
+    IMXTimerPState *s = container_of(dev, IMXTimerPState, busdev.qdev);
+
+    s->cr = 0;
+    s->lr = TIMER_MAX;
+    s->int_level = 0;
+    s->cmp = 0;
+    ptimer_stop(s->timer);
+    ptimer_set_count(s->timer, TIMER_MAX);
+}
+
+static uint64_t imx_timerp_read(void *opaque, target_phys_addr_t offset,
+                                unsigned size)
+{
+    IMXTimerPState *s = (IMXTimerPState *)opaque;
+
+    DPRINTF("p-read(offset=%x)", offset >> 2);
+    switch (offset >> 2) {
+    case 0: /* Control Register */
+        DPRINTF("cr %x\n", s->cr);
+        return s->cr;
+
+    case 1: /* Status Register */
+        DPRINTF("int_level %x\n", s->int_level);
+        return s->int_level;
+
+    case 2: /* LR - ticks*/
+        DPRINTF("lr %x\n", s->lr);
+        return s->lr;
+
+    case 3: /* CMP */
+        DPRINTF("cmp %x\n", s->cmp);
+        return s->cmp;
+
+    case 4: /* CNT */
+        return ptimer_get_count(s->timer);
+    }
+    IPRINTF("imx_timerp_read: Bad offset %x\n",
+            (int)offset >> 2);
+    return 0;
+}
+
+static void set_timerp_freq(IMXTimerPState *s)
+{
+    int clksrc;
+    unsigned prescaler;
+    uint32_t freq;
+
+    clksrc = (s->cr & CR_CLKSRC_MASK) >> CR_CLKSRC_SHIFT;
+    prescaler = 1 + ((s->cr >> CR_PRESCALE_SHIFT) & CR_PRESCALE_MASK);
+    freq = imx_clock_frequency(s->ccm, imx_timerp_clocks[clksrc]) / prescaler;
+
+    s->freq = freq;
+    DPRINTF("Setting ptimer frequency to %u\n", freq);
+
+    if (freq) {
+        ptimer_set_freq(s->timer, freq);
+    }
+}
+
+static void imx_timerp_write(void *opaque, target_phys_addr_t offset,
+                             uint64_t value, unsigned size)
+{
+    IMXTimerPState *s = (IMXTimerPState *)opaque;
+    DPRINTF("p-write(offset=%x, value = %x)\n", (unsigned int)offset >> 2,
+            (unsigned int)value);
+
+    switch (offset >> 2) {
+    case 0: /* CR */
+        if (value & CR_SWR) {
+            imx_timerp_reset(&s->busdev.qdev);
+            value &= ~CR_SWR;
+        }
+        s->cr = value & 0x03ffffff;
+        set_timerp_freq(s);
+
+        if (s->freq && (s->cr & CR_EN)) {
+            if (!(s->cr & CR_ENMOD)) {
+                ptimer_set_count(s->timer, s->lr);
+            }
+            ptimer_run(s->timer, 0);
+        } else {
+            ptimer_stop(s->timer);
+        }
+        break;
+
+    case 1: /* SR - ACK*/
+        s->int_level = 0;
+        imx_timerp_update(s);
+        break;
+
+    case 2: /* LR - set ticks */
+        s->lr = value;
+        ptimer_set_limit(s->timer, value, !!(s->cr & CR_IOVW));
+        break;
+
+    case 3: /* CMP */
+        s->cmp = value;
+        if (value) {
+            IPRINTF(
+                "Values for EPIT comparison other than zero not supported\n"
+            );
+        }
+        break;
+
+    default:
+        IPRINTF("imx_timerp_write: Bad offset %x\n",
+                   (int)offset >> 2);
+    }
+}
+
+static void imx_timerp_tick(void *opaque)
+{
+    IMXTimerPState *s = (IMXTimerPState *)opaque;
+
+   DPRINTF("imxp tick\n");
+    if (!(s->cr & CR_RLD)) {
+        ptimer_set_count(s->timer, TIMER_MAX);
+    }
+    s->int_level = 1;
+    imx_timerp_update(s);
+}
+
+void imx_timerp_create(const target_phys_addr_t addr,
+                              qemu_irq irq,
+                              DeviceState *ccm)
+{
+    IMXTimerPState *pp;
+    DeviceState *dev;
+
+    dev = sysbus_create_simple("imx_timerp", addr, irq);
+    pp = container_of(dev, IMXTimerPState, busdev.qdev);
+    pp->ccm = ccm;
+}
+
+static const MemoryRegionOps imx_timerp_ops = {
+  .read = imx_timerp_read,
+  .write = imx_timerp_write,
+  .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_imx_timerp = {
+    .name = "imx-timerp",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT32(cr, IMXTimerPState),
+        VMSTATE_UINT32(lr, IMXTimerPState),
+        VMSTATE_UINT32(cmp, IMXTimerPState),
+        VMSTATE_UINT32(freq, IMXTimerPState),
+        VMSTATE_INT32(int_level, IMXTimerPState),
+        VMSTATE_PTIMER(timer, IMXTimerPState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static int imx_timerp_init(SysBusDevice *dev)
+{
+    IMXTimerPState *s = FROM_SYSBUS(IMXTimerPState, dev);
+    QEMUBH *bh;
+
+    DPRINTF("imx_timerp_init\n");
+
+    sysbus_init_irq(dev, &s->irq);
+    memory_region_init_io(&s->iomem, &imx_timerp_ops,
+                          s, "imxp-timer",
+                          0x00001000);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    bh = qemu_bh_new(imx_timerp_tick, s);
+    s->timer = ptimer_init(bh);
+
+    return 0;
+}
+
+
+void imx_timerg_create(const target_phys_addr_t addr,
+                              qemu_irq irq,
+                              DeviceState *ccm)
+{
+    IMXTimerGState *pp;
+    DeviceState *dev;
+
+    dev = sysbus_create_simple("imx_timerg", addr, irq);
+    pp = container_of(dev, IMXTimerGState, busdev.qdev);
+    pp->ccm = ccm;
+}
+
+static void imx_timerg_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc  = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    k->init = imx_timerg_init;
+    dc->vmsd = &vmstate_imx_timerg;
+    dc->reset = imx_timerg_reset;
+    dc->desc = "i.MX general timer";
+}
+
+static void imx_timerp_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc  = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    k->init = imx_timerp_init;
+    dc->vmsd = &vmstate_imx_timerp;
+    dc->reset = imx_timerp_reset;
+    dc->desc = "i.MX periodic timer";
+}
+
+static const TypeInfo imx_timerp_info = {
+    .name = "imx_timerp",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXTimerPState),
+    .class_init = imx_timerp_class_init,
+};
+
+static const TypeInfo imx_timerg_info = {
+    .name = "imx_timerg",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXTimerGState),
+    .class_init = imx_timerg_class_init,
+};
+
+static void imx_timer_register_types(void)
+{
+    type_register_static(&imx_timerp_info);
+    type_register_static(&imx_timerg_info);
+}
+
+type_init(imx_timer_register_types)
diff --git a/hw/kzm.c b/hw/kzm.c
new file mode 100644
index 0000000000..08aaf43231
--- /dev/null
+++ b/hw/kzm.c
@@ -0,0 +1,154 @@
+/*
+ * KZM Board System emulation.
+ *
+ * Copyright (c) 2008 OKL and 2011 NICTA
+ * Written by Hans at OK-Labs
+ * Updated by Peter Chubb.
+ *
+ * This code is licenced under the GPL, version 2 or later.
+ * See the file `COPYING' in the top level directory.
+ *
+ * It (partially) emulates a Kyoto Microcomputer
+ * KZM-ARM11-01 evaluation board, with a Freescale
+ * i.MX31 SoC
+ */
+
+#include "sysbus.h"
+#include "exec-memory.h"
+#include "hw.h"
+#include "arm-misc.h"
+#include "devices.h"
+#include "net.h"
+#include "sysemu.h"
+#include "boards.h"
+#include "pc.h" /* for the FPGA UART that emulates a 16550 */
+#include "imx.h"
+
+    /* Memory map for Kzm Emulation Baseboard:
+     * 0x00000000-0x00003fff 16k secure ROM       IGNORED
+     * 0x00004000-0x00407fff Reserved             IGNORED
+     * 0x00404000-0x00407fff ROM                  IGNORED
+     * 0x00408000-0x0fffffff Reserved             IGNORED
+     * 0x10000000-0x1fffbfff RAM aliasing         IGNORED
+     * 0x1fffc000-0x1fffffff RAM                  EMULATED
+     * 0x20000000-0x2fffffff Reserved             IGNORED
+     * 0x30000000-0x7fffffff I.MX31 Internal Register Space
+     *   0x43f00000 IO_AREA0
+     *   0x43f90000 UART1                         EMULATED
+     *   0x43f94000 UART2                         EMULATED
+     *   0x68000000 AVIC                          EMULATED
+     *   0x53f80000 CCM                           EMULATED
+     *   0x53f94000 PIT 1                         EMULATED
+     *   0x53f98000 PIT 2                         EMULATED
+     *   0x53f90000 GPT                           EMULATED
+     * 0x80000000-0x87ffffff RAM                  EMULATED
+     * 0x88000000-0x8fffffff RAM Aliasing         EMULATED
+     * 0xa0000000-0xafffffff NAND Flash           IGNORED
+     * 0xb0000000-0xb3ffffff Unavailable          IGNORED
+     * 0xb4000000-0xb4000fff 8-bit free space     IGNORED
+     * 0xb4001000-0xb400100f Board control        IGNORED
+     *  0xb4001003           DIP switch
+     * 0xb4001010-0xb400101f 7-segment LED        IGNORED
+     * 0xb4001020-0xb400102f LED                  IGNORED
+     * 0xb4001030-0xb400103f LED                  IGNORED
+     * 0xb4001040-0xb400104f FPGA, UART           EMULATED
+     * 0xb4001050-0xb400105f FPGA, UART           EMULATED
+     * 0xb4001060-0xb40fffff FPGA                 IGNORED
+     * 0xb6000000-0xb61fffff LAN controller       EMULATED
+     * 0xb6200000-0xb62fffff FPGA NAND Controller IGNORED
+     * 0xb6300000-0xb7ffffff Free                 IGNORED
+     * 0xb8000000-0xb8004fff Memory control registers IGNORED
+     * 0xc0000000-0xc3ffffff PCMCIA/CF            IGNORED
+     * 0xc4000000-0xffffffff Reserved             IGNORED
+     */
+
+#define KZM_RAMADDRESS (0x80000000)
+#define KZM_FPGA       (0xb4001040)
+
+static struct arm_boot_info kzm_binfo = {
+    .loader_start = KZM_RAMADDRESS,
+    .board_id = 1722,
+};
+
+static void kzm_init(ram_addr_t ram_size,
+                     const char *boot_device,
+                     const char *kernel_filename, const char *kernel_cmdline,
+                     const char *initrd_filename, const char *cpu_model)
+{
+    ARMCPU *cpu;
+    MemoryRegion *address_space_mem = get_system_memory();
+    MemoryRegion *ram = g_new(MemoryRegion, 1);
+    MemoryRegion *sram = g_new(MemoryRegion, 1);
+    MemoryRegion *ram_alias = g_new(MemoryRegion, 1);
+    qemu_irq *cpu_pic;
+    DeviceState *dev;
+    DeviceState *ccm;
+
+    if (!cpu_model) {
+        cpu_model = "arm1136";
+    }
+
+    cpu = cpu_arm_init(cpu_model);
+    if (!cpu) {
+        fprintf(stderr, "Unable to find CPU definition\n");
+        exit(1);
+    }
+
+    /* On a real system, the first 16k is a `secure boot rom' */
+
+    memory_region_init_ram(ram, "kzm.ram", ram_size);
+    vmstate_register_ram_global(ram);
+    memory_region_add_subregion(address_space_mem, KZM_RAMADDRESS, ram);
+
+    memory_region_init_alias(ram_alias, "ram.alias", ram, 0, ram_size);
+    memory_region_add_subregion(address_space_mem, 0x88000000, ram_alias);
+
+    memory_region_init_ram(sram, "kzm.sram", 0x4000);
+    memory_region_add_subregion(address_space_mem, 0x1FFFC000, sram);
+
+    cpu_pic = arm_pic_init_cpu(cpu);
+    dev = sysbus_create_varargs("imx_avic", 0x68000000,
+                                cpu_pic[ARM_PIC_CPU_IRQ],
+                                cpu_pic[ARM_PIC_CPU_FIQ], NULL);
+
+
+    imx_serial_create(0, 0x43f90000, qdev_get_gpio_in(dev, 45));
+    imx_serial_create(1, 0x43f94000, qdev_get_gpio_in(dev, 32));
+
+    ccm = sysbus_create_simple("imx_ccm", 0x53f80000, NULL);
+
+    imx_timerp_create(0x53f94000, qdev_get_gpio_in(dev, 28), ccm);
+    imx_timerp_create(0x53f98000, qdev_get_gpio_in(dev, 27), ccm);
+    imx_timerg_create(0x53f90000, qdev_get_gpio_in(dev, 29), ccm);
+
+    if (nd_table[0].vlan) {
+        lan9118_init(&nd_table[0], 0xb6000000, qdev_get_gpio_in(dev, 52));
+    }
+
+    if (serial_hds[2]) { /* touchscreen */
+        serial_mm_init(address_space_mem, KZM_FPGA+0x10, 0,
+                       qdev_get_gpio_in(dev, 52),
+                       14745600, serial_hds[2],
+                       DEVICE_NATIVE_ENDIAN);
+    }
+
+    kzm_binfo.ram_size = ram_size;
+    kzm_binfo.kernel_filename = kernel_filename;
+    kzm_binfo.kernel_cmdline = kernel_cmdline;
+    kzm_binfo.initrd_filename = initrd_filename;
+    kzm_binfo.nb_cpus = 1;
+    arm_load_kernel(cpu, &kzm_binfo);
+}
+
+static QEMUMachine kzm_machine = {
+    .name = "kzm",
+    .desc = "ARM KZM Emulation Baseboard (ARM1136)",
+    .init = kzm_init,
+};
+
+static void kzm_machine_init(void)
+{
+    qemu_register_machine(&kzm_machine);
+}
+
+machine_init(kzm_machine_init)
diff --git a/hw/megasas.c b/hw/megasas.c
new file mode 100644
index 0000000000..b99fa9792e
--- /dev/null
+++ b/hw/megasas.c
@@ -0,0 +1,2198 @@
+/*
+ * QEMU MegaRAID SAS 8708EM2 Host Bus Adapter emulation
+ * Based on the linux driver code at drivers/scsi/megaraid
+ *
+ * Copyright (c) 2009-2012 Hannes Reinecke, SUSE Labs
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw.h"
+#include "pci.h"
+#include "dma.h"
+#include "msix.h"
+#include "iov.h"
+#include "scsi.h"
+#include "scsi-defs.h"
+#include "block_int.h"
+#include "trace.h"
+
+#include "mfi.h"
+
+#define MEGASAS_VERSION "1.70"
+#define MEGASAS_MAX_FRAMES 2048         /* Firmware limit at 65535 */
+#define MEGASAS_DEFAULT_FRAMES 1000     /* Windows requires this */
+#define MEGASAS_MAX_SGE 128             /* Firmware limit */
+#define MEGASAS_DEFAULT_SGE 80
+#define MEGASAS_MAX_SECTORS 0xFFFF      /* No real limit */
+#define MEGASAS_MAX_ARRAYS 128
+
+#define MEGASAS_FLAG_USE_JBOD      0
+#define MEGASAS_MASK_USE_JBOD      (1 << MEGASAS_FLAG_USE_JBOD)
+#define MEGASAS_FLAG_USE_MSIX      1
+#define MEGASAS_MASK_USE_MSIX      (1 << MEGASAS_FLAG_USE_MSIX)
+#define MEGASAS_FLAG_USE_QUEUE64   2
+#define MEGASAS_MASK_USE_QUEUE64   (1 << MEGASAS_FLAG_USE_QUEUE64)
+
+static const char *mfi_frame_desc[] = {
+    "MFI init", "LD Read", "LD Write", "LD SCSI", "PD SCSI",
+    "MFI Doorbell", "MFI Abort", "MFI SMP", "MFI Stop"};
+
+typedef struct MegasasCmd {
+    uint32_t index;
+    uint16_t flags;
+    uint16_t count;
+    uint64_t context;
+
+    target_phys_addr_t pa;
+    target_phys_addr_t pa_size;
+    union mfi_frame *frame;
+    SCSIRequest *req;
+    QEMUSGList qsg;
+    void *iov_buf;
+    size_t iov_size;
+    size_t iov_offset;
+    struct MegasasState *state;
+} MegasasCmd;
+
+typedef struct MegasasState {
+    PCIDevice dev;
+    MemoryRegion mmio_io;
+    MemoryRegion port_io;
+    MemoryRegion queue_io;
+    uint32_t frame_hi;
+
+    int fw_state;
+    uint32_t fw_sge;
+    uint32_t fw_cmds;
+    uint32_t flags;
+    int fw_luns;
+    int intr_mask;
+    int doorbell;
+    int busy;
+
+    MegasasCmd *event_cmd;
+    int event_locale;
+    int event_class;
+    int event_count;
+    int shutdown_event;
+    int boot_event;
+
+    uint64_t reply_queue_pa;
+    void *reply_queue;
+    int reply_queue_len;
+    int reply_queue_head;
+    int reply_queue_tail;
+    uint64_t consumer_pa;
+    uint64_t producer_pa;
+
+    MegasasCmd frames[MEGASAS_MAX_FRAMES];
+
+    SCSIBus bus;
+} MegasasState;
+
+#define MEGASAS_INTR_DISABLED_MASK 0xFFFFFFFF
+
+static bool megasas_intr_enabled(MegasasState *s)
+{
+    if ((s->intr_mask & MEGASAS_INTR_DISABLED_MASK) !=
+        MEGASAS_INTR_DISABLED_MASK) {
+        return true;
+    }
+    return false;
+}
+
+static bool megasas_use_queue64(MegasasState *s)
+{
+    return s->flags & MEGASAS_MASK_USE_QUEUE64;
+}
+
+static bool megasas_use_msix(MegasasState *s)
+{
+    return s->flags & MEGASAS_MASK_USE_MSIX;
+}
+
+static bool megasas_is_jbod(MegasasState *s)
+{
+    return s->flags & MEGASAS_MASK_USE_JBOD;
+}
+
+static void megasas_frame_set_cmd_status(unsigned long frame, uint8_t v)
+{
+    stb_phys(frame + offsetof(struct mfi_frame_header, cmd_status), v);
+}
+
+static void megasas_frame_set_scsi_status(unsigned long frame, uint8_t v)
+{
+    stb_phys(frame + offsetof(struct mfi_frame_header, scsi_status), v);
+}
+
+/*
+ * Context is considered opaque, but the HBA firmware is running
+ * in little endian mode. So convert it to little endian, too.
+ */
+static uint64_t megasas_frame_get_context(unsigned long frame)
+{
+    return ldq_le_phys(frame + offsetof(struct mfi_frame_header, context));
+}
+
+static bool megasas_frame_is_ieee_sgl(MegasasCmd *cmd)
+{
+    return cmd->flags & MFI_FRAME_IEEE_SGL;
+}
+
+static bool megasas_frame_is_sgl64(MegasasCmd *cmd)
+{
+    return cmd->flags & MFI_FRAME_SGL64;
+}
+
+static bool megasas_frame_is_sense64(MegasasCmd *cmd)
+{
+    return cmd->flags & MFI_FRAME_SENSE64;
+}
+
+static uint64_t megasas_sgl_get_addr(MegasasCmd *cmd,
+                                     union mfi_sgl *sgl)
+{
+    uint64_t addr;
+
+    if (megasas_frame_is_ieee_sgl(cmd)) {
+        addr = le64_to_cpu(sgl->sg_skinny->addr);
+    } else if (megasas_frame_is_sgl64(cmd)) {
+        addr = le64_to_cpu(sgl->sg64->addr);
+    } else {
+        addr = le32_to_cpu(sgl->sg32->addr);
+    }
+    return addr;
+}
+
+static uint32_t megasas_sgl_get_len(MegasasCmd *cmd,
+                                    union mfi_sgl *sgl)
+{
+    uint32_t len;
+
+    if (megasas_frame_is_ieee_sgl(cmd)) {
+        len = le32_to_cpu(sgl->sg_skinny->len);
+    } else if (megasas_frame_is_sgl64(cmd)) {
+        len = le32_to_cpu(sgl->sg64->len);
+    } else {
+        len = le32_to_cpu(sgl->sg32->len);
+    }
+    return len;
+}
+
+static union mfi_sgl *megasas_sgl_next(MegasasCmd *cmd,
+                                       union mfi_sgl *sgl)
+{
+    uint8_t *next = (uint8_t *)sgl;
+
+    if (megasas_frame_is_ieee_sgl(cmd)) {
+        next += sizeof(struct mfi_sg_skinny);
+    } else if (megasas_frame_is_sgl64(cmd)) {
+        next += sizeof(struct mfi_sg64);
+    } else {
+        next += sizeof(struct mfi_sg32);
+    }
+
+    if (next >= (uint8_t *)cmd->frame + cmd->pa_size) {
+        return NULL;
+    }
+    return (union mfi_sgl *)next;
+}
+
+static void megasas_soft_reset(MegasasState *s);
+
+static int megasas_map_sgl(MegasasState *s, MegasasCmd *cmd, union mfi_sgl *sgl)
+{
+    int i;
+    int iov_count = 0;
+    size_t iov_size = 0;
+
+    cmd->flags = le16_to_cpu(cmd->frame->header.flags);
+    iov_count = cmd->frame->header.sge_count;
+    if (iov_count > MEGASAS_MAX_SGE) {
+        trace_megasas_iovec_sgl_overflow(cmd->index, iov_count,
+                                         MEGASAS_MAX_SGE);
+        return iov_count;
+    }
+    qemu_sglist_init(&cmd->qsg, iov_count, pci_dma_context(&s->dev));
+    for (i = 0; i < iov_count; i++) {
+        dma_addr_t iov_pa, iov_size_p;
+
+        if (!sgl) {
+            trace_megasas_iovec_sgl_underflow(cmd->index, i);
+            goto unmap;
+        }
+        iov_pa = megasas_sgl_get_addr(cmd, sgl);
+        iov_size_p = megasas_sgl_get_len(cmd, sgl);
+        if (!iov_pa || !iov_size_p) {
+            trace_megasas_iovec_sgl_invalid(cmd->index, i,
+                                            iov_pa, iov_size_p);
+            goto unmap;
+        }
+        qemu_sglist_add(&cmd->qsg, iov_pa, iov_size_p);
+        sgl = megasas_sgl_next(cmd, sgl);
+        iov_size += (size_t)iov_size_p;
+    }
+    if (cmd->iov_size > iov_size) {
+        trace_megasas_iovec_overflow(cmd->index, iov_size, cmd->iov_size);
+    } else if (cmd->iov_size < iov_size) {
+        trace_megasas_iovec_underflow(cmd->iov_size, iov_size, cmd->iov_size);
+    }
+    cmd->iov_offset = 0;
+    return 0;
+unmap:
+    qemu_sglist_destroy(&cmd->qsg);
+    return iov_count - i;
+}
+
+static void megasas_unmap_sgl(MegasasCmd *cmd)
+{
+    qemu_sglist_destroy(&cmd->qsg);
+    cmd->iov_offset = 0;
+}
+
+/*
+ * passthrough sense and io sense are at the same offset
+ */
+static int megasas_build_sense(MegasasCmd *cmd, uint8_t *sense_ptr,
+    uint8_t sense_len)
+{
+    uint32_t pa_hi = 0, pa_lo;
+    target_phys_addr_t pa;
+
+    if (sense_len > cmd->frame->header.sense_len) {
+        sense_len = cmd->frame->header.sense_len;
+    }
+    if (sense_len) {
+        pa_lo = le32_to_cpu(cmd->frame->pass.sense_addr_lo);
+        if (megasas_frame_is_sense64(cmd)) {
+            pa_hi = le32_to_cpu(cmd->frame->pass.sense_addr_hi);
+        }
+        pa = ((uint64_t) pa_hi << 32) | pa_lo;
+        cpu_physical_memory_write(pa, sense_ptr, sense_len);
+        cmd->frame->header.sense_len = sense_len;
+    }
+    return sense_len;
+}
+
+static void megasas_write_sense(MegasasCmd *cmd, SCSISense sense)
+{
+    uint8_t sense_buf[SCSI_SENSE_BUF_SIZE];
+    uint8_t sense_len = 18;
+
+    memset(sense_buf, 0, sense_len);
+    sense_buf[0] = 0xf0;
+    sense_buf[2] = sense.key;
+    sense_buf[7] = 10;
+    sense_buf[12] = sense.asc;
+    sense_buf[13] = sense.ascq;
+    megasas_build_sense(cmd, sense_buf, sense_len);
+}
+
+static void megasas_copy_sense(MegasasCmd *cmd)
+{
+    uint8_t sense_buf[SCSI_SENSE_BUF_SIZE];
+    uint8_t sense_len;
+
+    sense_len = scsi_req_get_sense(cmd->req, sense_buf,
+                                   SCSI_SENSE_BUF_SIZE);
+    megasas_build_sense(cmd, sense_buf, sense_len);
+}
+
+/*
+ * Format an INQUIRY CDB
+ */
+static int megasas_setup_inquiry(uint8_t *cdb, int pg, int len)
+{
+    memset(cdb, 0, 6);
+    cdb[0] = INQUIRY;
+    if (pg > 0) {
+        cdb[1] = 0x1;
+        cdb[2] = pg;
+    }
+    cdb[3] = (len >> 8) & 0xff;
+    cdb[4] = (len & 0xff);
+    return len;
+}
+
+/*
+ * Encode lba and len into a READ_16/WRITE_16 CDB
+ */
+static void megasas_encode_lba(uint8_t *cdb, uint64_t lba,
+                               uint32_t len, bool is_write)
+{
+    memset(cdb, 0x0, 16);
+    if (is_write) {
+        cdb[0] = WRITE_16;
+    } else {
+        cdb[0] = READ_16;
+    }
+    cdb[2] = (lba >> 56) & 0xff;
+    cdb[3] = (lba >> 48) & 0xff;
+    cdb[4] = (lba >> 40) & 0xff;
+    cdb[5] = (lba >> 32) & 0xff;
+    cdb[6] = (lba >> 24) & 0xff;
+    cdb[7] = (lba >> 16) & 0xff;
+    cdb[8] = (lba >> 8) & 0xff;
+    cdb[9] = (lba) & 0xff;
+    cdb[10] = (len >> 24) & 0xff;
+    cdb[11] = (len >> 16) & 0xff;
+    cdb[12] = (len >> 8) & 0xff;
+    cdb[13] = (len) & 0xff;
+}
+
+/*
+ * Utility functions
+ */
+static uint64_t megasas_fw_time(void)
+{
+    struct tm curtime;
+    uint64_t bcd_time;
+
+    qemu_get_timedate(&curtime, 0);
+    bcd_time = ((uint64_t)curtime.tm_sec & 0xff) << 48 |
+        ((uint64_t)curtime.tm_min & 0xff)  << 40 |
+        ((uint64_t)curtime.tm_hour & 0xff) << 32 |
+        ((uint64_t)curtime.tm_mday & 0xff) << 24 |
+        ((uint64_t)curtime.tm_mon & 0xff)  << 16 |
+        ((uint64_t)(curtime.tm_year + 1900) & 0xffff);
+
+    return bcd_time;
+}
+
+static uint64_t megasas_gen_sas_addr(uint64_t id)
+{
+    uint64_t addr;
+
+    addr = 0x5001a4aULL << 36;
+    addr |= id & 0xfffffffff;
+
+    return addr;
+}
+
+/*
+ * Frame handling
+ */
+static int megasas_next_index(MegasasState *s, int index, int limit)
+{
+    index++;
+    if (index == limit) {
+        index = 0;
+    }
+    return index;
+}
+
+static MegasasCmd *megasas_lookup_frame(MegasasState *s,
+    target_phys_addr_t frame)
+{
+    MegasasCmd *cmd = NULL;
+    int num = 0, index;
+
+    index = s->reply_queue_head;
+
+    while (num < s->fw_cmds) {
+        if (s->frames[index].pa && s->frames[index].pa == frame) {
+            cmd = &s->frames[index];
+            break;
+        }
+        index = megasas_next_index(s, index, s->fw_cmds);
+        num++;
+    }
+
+    return cmd;
+}
+
+static MegasasCmd *megasas_next_frame(MegasasState *s,
+    target_phys_addr_t frame)
+{
+    MegasasCmd *cmd = NULL;
+    int num = 0, index;
+
+    cmd = megasas_lookup_frame(s, frame);
+    if (cmd) {
+        trace_megasas_qf_found(cmd->index, cmd->pa);
+        return cmd;
+    }
+    index = s->reply_queue_head;
+    num = 0;
+    while (num < s->fw_cmds) {
+        if (!s->frames[index].pa) {
+            cmd = &s->frames[index];
+            break;
+        }
+        index = megasas_next_index(s, index, s->fw_cmds);
+        num++;
+    }
+    if (!cmd) {
+        trace_megasas_qf_failed(frame);
+    }
+    trace_megasas_qf_new(index, cmd);
+    return cmd;
+}
+
+static MegasasCmd *megasas_enqueue_frame(MegasasState *s,
+    target_phys_addr_t frame, uint64_t context, int count)
+{
+    MegasasCmd *cmd = NULL;
+    int frame_size = MFI_FRAME_SIZE * 16;
+    target_phys_addr_t frame_size_p = frame_size;
+
+    cmd = megasas_next_frame(s, frame);
+    /* All frames busy */
+    if (!cmd) {
+        return NULL;
+    }
+    if (!cmd->pa) {
+        cmd->pa = frame;
+        /* Map all possible frames */
+        cmd->frame = cpu_physical_memory_map(frame, &frame_size_p, 0);
+        if (frame_size_p != frame_size) {
+            trace_megasas_qf_map_failed(cmd->index, (unsigned long)frame);
+            if (cmd->frame) {
+                cpu_physical_memory_unmap(cmd->frame, frame_size_p, 0, 0);
+                cmd->frame = NULL;
+                cmd->pa = 0;
+            }
+            s->event_count++;
+            return NULL;
+        }
+        cmd->pa_size = frame_size_p;
+        cmd->context = context;
+        if (!megasas_use_queue64(s)) {
+            cmd->context &= (uint64_t)0xFFFFFFFF;
+        }
+    }
+    cmd->count = count;
+    s->busy++;
+
+    trace_megasas_qf_enqueue(cmd->index, cmd->count, cmd->context,
+                             s->reply_queue_head, s->busy);
+
+    return cmd;
+}
+
+static void megasas_complete_frame(MegasasState *s, uint64_t context)
+{
+    int tail, queue_offset;
+
+    /* Decrement busy count */
+    s->busy--;
+
+    if (s->reply_queue_pa) {
+        /*
+         * Put command on the reply queue.
+         * Context is opaque, but emulation is running in
+         * little endian. So convert it.
+         */
+        tail = s->reply_queue_head;
+        if (megasas_use_queue64(s)) {
+            queue_offset = tail * sizeof(uint64_t);
+            stq_le_phys(s->reply_queue_pa + queue_offset, context);
+        } else {
+            queue_offset = tail * sizeof(uint32_t);
+            stl_le_phys(s->reply_queue_pa + queue_offset, context);
+        }
+        s->reply_queue_head = megasas_next_index(s, tail, s->fw_cmds);
+        trace_megasas_qf_complete(context, tail, queue_offset,
+                                  s->busy, s->doorbell);
+    }
+
+    if (megasas_intr_enabled(s)) {
+        /* Notify HBA */
+        s->doorbell++;
+        if (s->doorbell == 1) {
+            if (msix_enabled(&s->dev)) {
+                trace_megasas_msix_raise(0);
+                msix_notify(&s->dev, 0);
+            } else {
+                trace_megasas_irq_raise();
+                qemu_irq_raise(s->dev.irq[0]);
+            }
+        }
+    } else {
+        trace_megasas_qf_complete_noirq(context);
+    }
+}
+
+static void megasas_reset_frames(MegasasState *s)
+{
+    int i;
+    MegasasCmd *cmd;
+
+    for (i = 0; i < s->fw_cmds; i++) {
+        cmd = &s->frames[i];
+        if (cmd->pa) {
+            cpu_physical_memory_unmap(cmd->frame, cmd->pa_size, 0, 0);
+            cmd->frame = NULL;
+            cmd->pa = 0;
+        }
+    }
+}
+
+static void megasas_abort_command(MegasasCmd *cmd)
+{
+    if (cmd->req) {
+        scsi_req_abort(cmd->req, ABORTED_COMMAND);
+        cmd->req = NULL;
+    }
+}
+
+static int megasas_init_firmware(MegasasState *s, MegasasCmd *cmd)
+{
+    uint32_t pa_hi, pa_lo;
+    target_phys_addr_t iq_pa, initq_size;
+    struct mfi_init_qinfo *initq;
+    uint32_t flags;
+    int ret = MFI_STAT_OK;
+
+    pa_lo = le32_to_cpu(cmd->frame->init.qinfo_new_addr_lo);
+    pa_hi = le32_to_cpu(cmd->frame->init.qinfo_new_addr_hi);
+    iq_pa = (((uint64_t) pa_hi << 32) | pa_lo);
+    trace_megasas_init_firmware((uint64_t)iq_pa);
+    initq_size = sizeof(*initq);
+    initq = cpu_physical_memory_map(iq_pa, &initq_size, 0);
+    if (!initq || initq_size != sizeof(*initq)) {
+        trace_megasas_initq_map_failed(cmd->index);
+        s->event_count++;
+        ret = MFI_STAT_MEMORY_NOT_AVAILABLE;
+        goto out;
+    }
+    s->reply_queue_len = le32_to_cpu(initq->rq_entries) & 0xFFFF;
+    if (s->reply_queue_len > s->fw_cmds) {
+        trace_megasas_initq_mismatch(s->reply_queue_len, s->fw_cmds);
+        s->event_count++;
+        ret = MFI_STAT_INVALID_PARAMETER;
+        goto out;
+    }
+    pa_lo = le32_to_cpu(initq->rq_addr_lo);
+    pa_hi = le32_to_cpu(initq->rq_addr_hi);
+    s->reply_queue_pa = ((uint64_t) pa_hi << 32) | pa_lo;
+    pa_lo = le32_to_cpu(initq->ci_addr_lo);
+    pa_hi = le32_to_cpu(initq->ci_addr_hi);
+    s->consumer_pa = ((uint64_t) pa_hi << 32) | pa_lo;
+    pa_lo = le32_to_cpu(initq->pi_addr_lo);
+    pa_hi = le32_to_cpu(initq->pi_addr_hi);
+    s->producer_pa = ((uint64_t) pa_hi << 32) | pa_lo;
+    s->reply_queue_head = ldl_le_phys(s->producer_pa);
+    s->reply_queue_tail = ldl_le_phys(s->consumer_pa);
+    flags = le32_to_cpu(initq->flags);
+    if (flags & MFI_QUEUE_FLAG_CONTEXT64) {
+        s->flags |= MEGASAS_MASK_USE_QUEUE64;
+    }
+    trace_megasas_init_queue((unsigned long)s->reply_queue_pa,
+                             s->reply_queue_len, s->reply_queue_head,
+                             s->reply_queue_tail, flags);
+    megasas_reset_frames(s);
+    s->fw_state = MFI_FWSTATE_OPERATIONAL;
+out:
+    if (initq) {
+        cpu_physical_memory_unmap(initq, initq_size, 0, 0);
+    }
+    return ret;
+}
+
+static int megasas_map_dcmd(MegasasState *s, MegasasCmd *cmd)
+{
+    dma_addr_t iov_pa, iov_size;
+
+    cmd->flags = le16_to_cpu(cmd->frame->header.flags);
+    if (!cmd->frame->header.sge_count) {
+        trace_megasas_dcmd_zero_sge(cmd->index);
+        cmd->iov_size = 0;
+        return 0;
+    } else if (cmd->frame->header.sge_count > 1) {
+        trace_megasas_dcmd_invalid_sge(cmd->index,
+                                       cmd->frame->header.sge_count);
+        cmd->iov_size = 0;
+        return -1;
+    }
+    iov_pa = megasas_sgl_get_addr(cmd, &cmd->frame->dcmd.sgl);
+    iov_size = megasas_sgl_get_len(cmd, &cmd->frame->dcmd.sgl);
+    qemu_sglist_init(&cmd->qsg, 1, pci_dma_context(&s->dev));
+    qemu_sglist_add(&cmd->qsg, iov_pa, iov_size);
+    cmd->iov_size = iov_size;
+    return cmd->iov_size;
+}
+
+static void megasas_finish_dcmd(MegasasCmd *cmd, uint32_t iov_size)
+{
+    trace_megasas_finish_dcmd(cmd->index, iov_size);
+
+    if (cmd->frame->header.sge_count) {
+        qemu_sglist_destroy(&cmd->qsg);
+    }
+    if (iov_size > cmd->iov_size) {
+        if (megasas_frame_is_ieee_sgl(cmd)) {
+            cmd->frame->dcmd.sgl.sg_skinny->len = cpu_to_le32(iov_size);
+        } else if (megasas_frame_is_sgl64(cmd)) {
+            cmd->frame->dcmd.sgl.sg64->len = cpu_to_le32(iov_size);
+        } else {
+            cmd->frame->dcmd.sgl.sg32->len = cpu_to_le32(iov_size);
+        }
+    }
+    cmd->iov_size = 0;
+    return;
+}
+
+static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_ctrl_info info;
+    size_t dcmd_size = sizeof(info);
+    BusChild *kid;
+    int num_ld_disks = 0;
+
+    QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+        num_ld_disks++;
+    }
+
+    memset(&info, 0x0, cmd->iov_size);
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    info.pci.vendor = cpu_to_le16(PCI_VENDOR_ID_LSI_LOGIC);
+    info.pci.device = cpu_to_le16(PCI_DEVICE_ID_LSI_SAS1078);
+    info.pci.subvendor = cpu_to_le16(PCI_VENDOR_ID_LSI_LOGIC);
+    info.pci.subdevice = cpu_to_le16(0x1013);
+
+    info.host.type = MFI_INFO_HOST_PCIX;
+    info.device.type = MFI_INFO_DEV_SAS3G;
+    info.device.port_count = 2;
+    info.device.port_addr[0] = cpu_to_le64(megasas_gen_sas_addr((uint64_t)s));
+
+    memcpy(info.product_name, "MegaRAID SAS 8708EM2", 20);
+    snprintf(info.serial_number, 32, "QEMU%08lx",
+             (unsigned long)s & 0xFFFFFFFF);
+    snprintf(info.package_version, 0x60, "%s-QEMU", QEMU_VERSION);
+    memcpy(info.image_component[0].name, "APP", 3);
+    memcpy(info.image_component[0].version, MEGASAS_VERSION "-QEMU", 9);
+    memcpy(info.image_component[0].build_date, __DATE__, 11);
+    memcpy(info.image_component[0].build_time, __TIME__, 8);
+    info.image_component_count = 1;
+    if (s->dev.has_rom) {
+        uint8_t biosver[32];
+        uint8_t *ptr;
+
+        ptr = memory_region_get_ram_ptr(&s->dev.rom);
+        memcpy(biosver, ptr + 0x41, 31);
+        qemu_put_ram_ptr(ptr);
+        memcpy(info.image_component[1].name, "BIOS", 4);
+        memcpy(info.image_component[1].version, biosver,
+               strlen((const char *)biosver));
+        info.image_component_count++;
+    }
+    info.current_fw_time = cpu_to_le32(megasas_fw_time());
+    info.max_arms = 32;
+    info.max_spans = 8;
+    info.max_arrays = MEGASAS_MAX_ARRAYS;
+    info.max_lds = s->fw_luns;
+    info.max_cmds = cpu_to_le16(s->fw_cmds);
+    info.max_sg_elements = cpu_to_le16(s->fw_sge);
+    info.max_request_size = cpu_to_le32(MEGASAS_MAX_SECTORS);
+    info.lds_present = cpu_to_le16(num_ld_disks);
+    info.pd_present = cpu_to_le16(num_ld_disks);
+    info.pd_disks_present = cpu_to_le16(num_ld_disks);
+    info.hw_present = cpu_to_le32(MFI_INFO_HW_NVRAM |
+                                   MFI_INFO_HW_MEM |
+                                   MFI_INFO_HW_FLASH);
+    info.memory_size = cpu_to_le16(512);
+    info.nvram_size = cpu_to_le16(32);
+    info.flash_size = cpu_to_le16(16);
+    info.raid_levels = cpu_to_le32(MFI_INFO_RAID_0);
+    info.adapter_ops = cpu_to_le32(MFI_INFO_AOPS_RBLD_RATE |
+                                    MFI_INFO_AOPS_SELF_DIAGNOSTIC |
+                                    MFI_INFO_AOPS_MIXED_ARRAY);
+    info.ld_ops = cpu_to_le32(MFI_INFO_LDOPS_DISK_CACHE_POLICY |
+                               MFI_INFO_LDOPS_ACCESS_POLICY |
+                               MFI_INFO_LDOPS_IO_POLICY |
+                               MFI_INFO_LDOPS_WRITE_POLICY |
+                               MFI_INFO_LDOPS_READ_POLICY);
+    info.max_strips_per_io = cpu_to_le16(s->fw_sge);
+    info.stripe_sz_ops.min = 3;
+    info.stripe_sz_ops.max = ffs(MEGASAS_MAX_SECTORS + 1) - 1;
+    info.properties.pred_fail_poll_interval = cpu_to_le16(300);
+    info.properties.intr_throttle_cnt = cpu_to_le16(16);
+    info.properties.intr_throttle_timeout = cpu_to_le16(50);
+    info.properties.rebuild_rate = 30;
+    info.properties.patrol_read_rate = 30;
+    info.properties.bgi_rate = 30;
+    info.properties.cc_rate = 30;
+    info.properties.recon_rate = 30;
+    info.properties.cache_flush_interval = 4;
+    info.properties.spinup_drv_cnt = 2;
+    info.properties.spinup_delay = 6;
+    info.properties.ecc_bucket_size = 15;
+    info.properties.ecc_bucket_leak_rate = cpu_to_le16(1440);
+    info.properties.expose_encl_devices = 1;
+    info.properties.OnOffProperties = cpu_to_le32(MFI_CTRL_PROP_EnableJBOD);
+    info.pd_ops = cpu_to_le32(MFI_INFO_PDOPS_FORCE_ONLINE |
+                               MFI_INFO_PDOPS_FORCE_OFFLINE);
+    info.pd_mix_support = cpu_to_le32(MFI_INFO_PDMIX_SAS |
+                                       MFI_INFO_PDMIX_SATA |
+                                       MFI_INFO_PDMIX_LD);
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_mfc_get_defaults(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_defaults info;
+    size_t dcmd_size = sizeof(struct mfi_defaults);
+
+    memset(&info, 0x0, dcmd_size);
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    info.sas_addr = cpu_to_le64(megasas_gen_sas_addr((uint64_t)s));
+    info.stripe_size = 3;
+    info.flush_time = 4;
+    info.background_rate = 30;
+    info.allow_mix_in_enclosure = 1;
+    info.allow_mix_in_ld = 1;
+    info.direct_pd_mapping = 1;
+    /* Enable for BIOS support */
+    info.bios_enumerate_lds = 1;
+    info.disable_ctrl_r = 1;
+    info.expose_enclosure_devices = 1;
+    info.disable_preboot_cli = 1;
+    info.cluster_disable = 1;
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_get_bios_info(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_bios_data info;
+    size_t dcmd_size = sizeof(info);
+
+    memset(&info, 0x0, dcmd_size);
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+    info.continue_on_error = 1;
+    info.verbose = 1;
+    if (megasas_is_jbod(s)) {
+        info.expose_all_drives = 1;
+    }
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_get_fw_time(MegasasState *s, MegasasCmd *cmd)
+{
+    uint64_t fw_time;
+    size_t dcmd_size = sizeof(fw_time);
+
+    fw_time = cpu_to_le64(megasas_fw_time());
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&fw_time, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_set_fw_time(MegasasState *s, MegasasCmd *cmd)
+{
+    uint64_t fw_time;
+
+    /* This is a dummy; setting of firmware time is not allowed */
+    memcpy(&fw_time, cmd->frame->dcmd.mbox, sizeof(fw_time));
+
+    trace_megasas_dcmd_set_fw_time(cmd->index, fw_time);
+    fw_time = cpu_to_le64(megasas_fw_time());
+    return MFI_STAT_OK;
+}
+
+static int megasas_event_info(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_evt_log_state info;
+    size_t dcmd_size = sizeof(info);
+
+    memset(&info, 0, dcmd_size);
+
+    info.newest_seq_num = cpu_to_le32(s->event_count);
+    info.shutdown_seq_num = cpu_to_le32(s->shutdown_event);
+    info.boot_seq_num = cpu_to_le32(s->boot_event);
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_event_wait(MegasasState *s, MegasasCmd *cmd)
+{
+    union mfi_evt event;
+
+    if (cmd->iov_size < sizeof(struct mfi_evt_detail)) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            sizeof(struct mfi_evt_detail));
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+    s->event_count = cpu_to_le32(cmd->frame->dcmd.mbox[0]);
+    event.word = cpu_to_le32(cmd->frame->dcmd.mbox[4]);
+    s->event_locale = event.members.locale;
+    s->event_class = event.members.class;
+    s->event_cmd = cmd;
+    /* Decrease busy count; event frame doesn't count here */
+    s->busy--;
+    cmd->iov_size = sizeof(struct mfi_evt_detail);
+    return MFI_STAT_INVALID_STATUS;
+}
+
+static int megasas_dcmd_pd_get_list(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_pd_list info;
+    size_t dcmd_size = sizeof(info);
+    BusChild *kid;
+    uint32_t offset, dcmd_limit, num_pd_disks = 0, max_pd_disks;
+    uint16_t sdev_id;
+
+    memset(&info, 0, dcmd_size);
+    offset = 8;
+    dcmd_limit = offset + sizeof(struct mfi_pd_address);
+    if (cmd->iov_size < dcmd_limit) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_limit);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    max_pd_disks = (cmd->iov_size - offset) / sizeof(struct mfi_pd_address);
+    if (max_pd_disks > s->fw_luns) {
+        max_pd_disks = s->fw_luns;
+    }
+
+    QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+        SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child);
+
+        sdev_id = ((sdev->id & 0xFF) >> 8) | (sdev->lun & 0xFF);
+        info.addr[num_pd_disks].device_id = cpu_to_le16(sdev_id);
+        info.addr[num_pd_disks].encl_device_id = 0xFFFF;
+        info.addr[num_pd_disks].encl_index = 0;
+        info.addr[num_pd_disks].slot_number = (sdev->id & 0xFF);
+        info.addr[num_pd_disks].scsi_dev_type = sdev->type;
+        info.addr[num_pd_disks].connect_port_bitmap = 0x1;
+        info.addr[num_pd_disks].sas_addr[0] =
+            cpu_to_le64(megasas_gen_sas_addr((uint64_t)sdev));
+        num_pd_disks++;
+        offset += sizeof(struct mfi_pd_address);
+    }
+    trace_megasas_dcmd_pd_get_list(cmd->index, num_pd_disks,
+                                   max_pd_disks, offset);
+
+    info.size = cpu_to_le32(offset);
+    info.count = cpu_to_le32(num_pd_disks);
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, offset, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_pd_list_query(MegasasState *s, MegasasCmd *cmd)
+{
+    uint16_t flags;
+
+    /* mbox0 contains flags */
+    flags = le16_to_cpu(cmd->frame->dcmd.mbox[0]);
+    trace_megasas_dcmd_pd_list_query(cmd->index, flags);
+    if (flags == MR_PD_QUERY_TYPE_ALL ||
+        megasas_is_jbod(s)) {
+        return megasas_dcmd_pd_get_list(s, cmd);
+    }
+
+    return MFI_STAT_OK;
+}
+
+static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun,
+                                      MegasasCmd *cmd)
+{
+    struct mfi_pd_info *info = cmd->iov_buf;
+    size_t dcmd_size = sizeof(struct mfi_pd_info);
+    BlockConf *conf = &sdev->conf;
+    uint64_t pd_size;
+    uint16_t sdev_id = ((sdev->id & 0xFF) >> 8) | (lun & 0xFF);
+    uint8_t cmdbuf[6];
+    SCSIRequest *req;
+    size_t len, resid;
+
+    if (!cmd->iov_buf) {
+        cmd->iov_buf = g_malloc(dcmd_size);
+        memset(cmd->iov_buf, 0, dcmd_size);
+        info = cmd->iov_buf;
+        info->inquiry_data[0] = 0x7f; /* Force PQual 0x3, PType 0x1f */
+        info->vpd_page83[0] = 0x7f;
+        megasas_setup_inquiry(cmdbuf, 0, sizeof(info->inquiry_data));
+        req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
+        if (!req) {
+            trace_megasas_dcmd_req_alloc_failed(cmd->index,
+                                                "PD get info std inquiry");
+            g_free(cmd->iov_buf);
+            cmd->iov_buf = NULL;
+            return MFI_STAT_FLASH_ALLOC_FAIL;
+        }
+        trace_megasas_dcmd_internal_submit(cmd->index,
+                                           "PD get info std inquiry", lun);
+        len = scsi_req_enqueue(req);
+        if (len > 0) {
+            cmd->iov_size = len;
+            scsi_req_continue(req);
+        }
+        return MFI_STAT_INVALID_STATUS;
+    } else if (info->inquiry_data[0] != 0x7f && info->vpd_page83[0] == 0x7f) {
+        megasas_setup_inquiry(cmdbuf, 0x83, sizeof(info->vpd_page83));
+        req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd);
+        if (!req) {
+            trace_megasas_dcmd_req_alloc_failed(cmd->index,
+                                                "PD get info vpd inquiry");
+            return MFI_STAT_FLASH_ALLOC_FAIL;
+        }
+        trace_megasas_dcmd_internal_submit(cmd->index,
+                                           "PD get info vpd inquiry", lun);
+        len = scsi_req_enqueue(req);
+        if (len > 0) {
+            cmd->iov_size = len;
+            scsi_req_continue(req);
+        }
+        return MFI_STAT_INVALID_STATUS;
+    }
+    /* Finished, set FW state */
+    if ((info->inquiry_data[0] >> 5) == 0) {
+        if (megasas_is_jbod(cmd->state)) {
+            info->fw_state = cpu_to_le16(MFI_PD_STATE_SYSTEM);
+        } else {
+            info->fw_state = cpu_to_le16(MFI_PD_STATE_ONLINE);
+        }
+    } else {
+        info->fw_state = cpu_to_le16(MFI_PD_STATE_OFFLINE);
+    }
+
+    info->ref.v.device_id = cpu_to_le16(sdev_id);
+    info->state.ddf.pd_type = cpu_to_le16(MFI_PD_DDF_TYPE_IN_VD|
+                                          MFI_PD_DDF_TYPE_INTF_SAS);
+    bdrv_get_geometry(conf->bs, &pd_size);
+    info->raw_size = cpu_to_le64(pd_size);
+    info->non_coerced_size = cpu_to_le64(pd_size);
+    info->coerced_size = cpu_to_le64(pd_size);
+    info->encl_device_id = 0xFFFF;
+    info->slot_number = (sdev->id & 0xFF);
+    info->path_info.count = 1;
+    info->path_info.sas_addr[0] =
+        cpu_to_le64(megasas_gen_sas_addr((uint64_t)sdev));
+    info->connected_port_bitmap = 0x1;
+    info->device_speed = 1;
+    info->link_speed = 1;
+    resid = dma_buf_read(cmd->iov_buf, dcmd_size, &cmd->qsg);
+    g_free(cmd->iov_buf);
+    cmd->iov_size = dcmd_size - resid;
+    cmd->iov_buf = NULL;
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_pd_get_info(MegasasState *s, MegasasCmd *cmd)
+{
+    size_t dcmd_size = sizeof(struct mfi_pd_info);
+    uint16_t pd_id;
+    SCSIDevice *sdev = NULL;
+    int retval = MFI_STAT_DEVICE_NOT_FOUND;
+
+    if (cmd->iov_size < dcmd_size) {
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    /* mbox0 has the ID */
+    pd_id = le16_to_cpu(cmd->frame->dcmd.mbox[0]);
+    sdev = scsi_device_find(&s->bus, 0, pd_id, 0);
+    trace_megasas_dcmd_pd_get_info(cmd->index, pd_id);
+
+    if (sdev) {
+        /* Submit inquiry */
+        retval = megasas_pd_get_info_submit(sdev, pd_id, cmd);
+    }
+
+    return retval;
+}
+
+static int megasas_dcmd_ld_get_list(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_ld_list info;
+    size_t dcmd_size = sizeof(info), resid;
+    uint32_t num_ld_disks = 0, max_ld_disks = s->fw_luns;
+    uint64_t ld_size;
+    BusChild *kid;
+
+    memset(&info, 0, dcmd_size);
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    if (megasas_is_jbod(s)) {
+        max_ld_disks = 0;
+    }
+    QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+        SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child);
+        BlockConf *conf = &sdev->conf;
+
+        if (num_ld_disks >= max_ld_disks) {
+            break;
+        }
+        /* Logical device size is in blocks */
+        bdrv_get_geometry(conf->bs, &ld_size);
+        info.ld_list[num_ld_disks].ld.v.target_id = sdev->id;
+        info.ld_list[num_ld_disks].state = MFI_LD_STATE_OPTIMAL;
+        info.ld_list[num_ld_disks].size = cpu_to_le64(ld_size);
+        num_ld_disks++;
+    }
+    info.ld_count = cpu_to_le32(num_ld_disks);
+    trace_megasas_dcmd_ld_get_list(cmd->index, num_ld_disks, max_ld_disks);
+
+    resid = dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    cmd->iov_size = dcmd_size - resid;
+    return MFI_STAT_OK;
+}
+
+static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun,
+                                      MegasasCmd *cmd)
+{
+    struct mfi_ld_info *info = cmd->iov_buf;
+    size_t dcmd_size = sizeof(struct mfi_ld_info);
+    uint8_t cdb[6];
+    SCSIRequest *req;
+    ssize_t len, resid;
+    BlockConf *conf = &sdev->conf;
+    uint16_t sdev_id = ((sdev->id & 0xFF) >> 8) | (lun & 0xFF);
+    uint64_t ld_size;
+
+    if (!cmd->iov_buf) {
+        cmd->iov_buf = g_malloc(dcmd_size);
+        memset(cmd->iov_buf, 0x0, dcmd_size);
+        info = cmd->iov_buf;
+        megasas_setup_inquiry(cdb, 0x83, sizeof(info->vpd_page83));
+        req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd);
+        if (!req) {
+            trace_megasas_dcmd_req_alloc_failed(cmd->index,
+                                                "LD get info vpd inquiry");
+            g_free(cmd->iov_buf);
+            cmd->iov_buf = NULL;
+            return MFI_STAT_FLASH_ALLOC_FAIL;
+        }
+        trace_megasas_dcmd_internal_submit(cmd->index,
+                                           "LD get info vpd inquiry", lun);
+        len = scsi_req_enqueue(req);
+        if (len > 0) {
+            cmd->iov_size = len;
+            scsi_req_continue(req);
+        }
+        return MFI_STAT_INVALID_STATUS;
+    }
+
+    info->ld_config.params.state = MFI_LD_STATE_OPTIMAL;
+    info->ld_config.properties.ld.v.target_id = lun;
+    info->ld_config.params.stripe_size = 3;
+    info->ld_config.params.num_drives = 1;
+    info->ld_config.params.is_consistent = 1;
+    /* Logical device size is in blocks */
+    bdrv_get_geometry(conf->bs, &ld_size);
+    info->size = cpu_to_le64(ld_size);
+    memset(info->ld_config.span, 0, sizeof(info->ld_config.span));
+    info->ld_config.span[0].start_block = 0;
+    info->ld_config.span[0].num_blocks = info->size;
+    info->ld_config.span[0].array_ref = cpu_to_le16(sdev_id);
+
+    resid = dma_buf_read(cmd->iov_buf, dcmd_size, &cmd->qsg);
+    g_free(cmd->iov_buf);
+    cmd->iov_size = dcmd_size - resid;
+    cmd->iov_buf = NULL;
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_ld_get_info(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_ld_info info;
+    size_t dcmd_size = sizeof(info);
+    uint16_t ld_id;
+    uint32_t max_ld_disks = s->fw_luns;
+    SCSIDevice *sdev = NULL;
+    int retval = MFI_STAT_DEVICE_NOT_FOUND;
+
+    if (cmd->iov_size < dcmd_size) {
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    /* mbox0 has the ID */
+    ld_id = le16_to_cpu(cmd->frame->dcmd.mbox[0]);
+    trace_megasas_dcmd_ld_get_info(cmd->index, ld_id);
+
+    if (megasas_is_jbod(s)) {
+        return MFI_STAT_DEVICE_NOT_FOUND;
+    }
+
+    if (ld_id < max_ld_disks) {
+        sdev = scsi_device_find(&s->bus, 0, ld_id, 0);
+    }
+
+    if (sdev) {
+        retval = megasas_ld_get_info_submit(sdev, ld_id, cmd);
+    }
+
+    return retval;
+}
+
+static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd)
+{
+    uint8_t data[4096];
+    struct mfi_config_data *info;
+    int num_pd_disks = 0, array_offset, ld_offset;
+    BusChild *kid;
+
+    if (cmd->iov_size > 4096) {
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+        num_pd_disks++;
+    }
+    info = (struct mfi_config_data *)&data;
+    /*
+     * Array mapping:
+     * - One array per SCSI device
+     * - One logical drive per SCSI device
+     *   spanning the entire device
+     */
+    info->array_count = num_pd_disks;
+    info->array_size = sizeof(struct mfi_array) * num_pd_disks;
+    info->log_drv_count = num_pd_disks;
+    info->log_drv_size = sizeof(struct mfi_ld_config) * num_pd_disks;
+    info->spares_count = 0;
+    info->spares_size = sizeof(struct mfi_spare);
+    info->size = sizeof(struct mfi_config_data) + info->array_size +
+        info->log_drv_size;
+    if (info->size > 4096) {
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+
+    array_offset = sizeof(struct mfi_config_data);
+    ld_offset = array_offset + sizeof(struct mfi_array) * num_pd_disks;
+
+    QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+        SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child);
+        BlockConf *conf = &sdev->conf;
+        uint16_t sdev_id = ((sdev->id & 0xFF) >> 8) | (sdev->lun & 0xFF);
+        struct mfi_array *array;
+        struct mfi_ld_config *ld;
+        uint64_t pd_size;
+        int i;
+
+        array = (struct mfi_array *)(data + array_offset);
+        bdrv_get_geometry(conf->bs, &pd_size);
+        array->size = cpu_to_le64(pd_size);
+        array->num_drives = 1;
+        array->array_ref = cpu_to_le16(sdev_id);
+        array->pd[0].ref.v.device_id = cpu_to_le16(sdev_id);
+        array->pd[0].ref.v.seq_num = 0;
+        array->pd[0].fw_state = MFI_PD_STATE_ONLINE;
+        array->pd[0].encl.pd = 0xFF;
+        array->pd[0].encl.slot = (sdev->id & 0xFF);
+        for (i = 1; i < MFI_MAX_ROW_SIZE; i++) {
+            array->pd[i].ref.v.device_id = 0xFFFF;
+            array->pd[i].ref.v.seq_num = 0;
+            array->pd[i].fw_state = MFI_PD_STATE_UNCONFIGURED_GOOD;
+            array->pd[i].encl.pd = 0xFF;
+            array->pd[i].encl.slot = 0xFF;
+        }
+        array_offset += sizeof(struct mfi_array);
+        ld = (struct mfi_ld_config *)(data + ld_offset);
+        memset(ld, 0, sizeof(struct mfi_ld_config));
+        ld->properties.ld.v.target_id = (sdev->id & 0xFF);
+        ld->properties.default_cache_policy = MR_LD_CACHE_READ_AHEAD |
+            MR_LD_CACHE_READ_ADAPTIVE;
+        ld->properties.current_cache_policy = MR_LD_CACHE_READ_AHEAD |
+            MR_LD_CACHE_READ_ADAPTIVE;
+        ld->params.state = MFI_LD_STATE_OPTIMAL;
+        ld->params.stripe_size = 3;
+        ld->params.num_drives = 1;
+        ld->params.span_depth = 1;
+        ld->params.is_consistent = 1;
+        ld->span[0].start_block = 0;
+        ld->span[0].num_blocks = cpu_to_le64(pd_size);
+        ld->span[0].array_ref = cpu_to_le16(sdev_id);
+        ld_offset += sizeof(struct mfi_ld_config);
+    }
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)data, info->size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_get_properties(MegasasState *s, MegasasCmd *cmd)
+{
+    struct mfi_ctrl_props info;
+    size_t dcmd_size = sizeof(info);
+
+    memset(&info, 0x0, dcmd_size);
+    if (cmd->iov_size < dcmd_size) {
+        trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
+                                            dcmd_size);
+        return MFI_STAT_INVALID_PARAMETER;
+    }
+    info.pred_fail_poll_interval = cpu_to_le16(300);
+    info.intr_throttle_cnt = cpu_to_le16(16);
+    info.intr_throttle_timeout = cpu_to_le16(50);
+    info.rebuild_rate = 30;
+    info.patrol_read_rate = 30;
+    info.bgi_rate = 30;
+    info.cc_rate = 30;
+    info.recon_rate = 30;
+    info.cache_flush_interval = 4;
+    info.spinup_drv_cnt = 2;
+    info.spinup_delay = 6;
+    info.ecc_bucket_size = 15;
+    info.ecc_bucket_leak_rate = cpu_to_le16(1440);
+    info.expose_encl_devices = 1;
+
+    cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg);
+    return MFI_STAT_OK;
+}
+
+static int megasas_cache_flush(MegasasState *s, MegasasCmd *cmd)
+{
+    qemu_aio_flush();
+    return MFI_STAT_OK;
+}
+
+static int megasas_ctrl_shutdown(MegasasState *s, MegasasCmd *cmd)
+{
+    s->fw_state = MFI_FWSTATE_READY;
+    return MFI_STAT_OK;
+}
+
+static int megasas_cluster_reset_ld(MegasasState *s, MegasasCmd *cmd)
+{
+    return MFI_STAT_INVALID_DCMD;
+}
+
+static int megasas_dcmd_set_properties(MegasasState *s, MegasasCmd *cmd)
+{
+    uint8_t *dummy = g_malloc(cmd->iov_size);
+
+    dma_buf_write(dummy, cmd->iov_size, &cmd->qsg);
+
+    trace_megasas_dcmd_dump_frame(0,
+            dummy[0x00], dummy[0x01], dummy[0x02], dummy[0x03],
+            dummy[0x04], dummy[0x05], dummy[0x06], dummy[0x07]);
+    trace_megasas_dcmd_dump_frame(1,
+            dummy[0x08], dummy[0x09], dummy[0x0a], dummy[0x0b],
+            dummy[0x0c], dummy[0x0d], dummy[0x0e], dummy[0x0f]);
+    trace_megasas_dcmd_dump_frame(2,
+            dummy[0x10], dummy[0x11], dummy[0x12], dummy[0x13],
+            dummy[0x14], dummy[0x15], dummy[0x16], dummy[0x17]);
+    trace_megasas_dcmd_dump_frame(3,
+            dummy[0x18], dummy[0x19], dummy[0x1a], dummy[0x1b],
+            dummy[0x1c], dummy[0x1d], dummy[0x1e], dummy[0x1f]);
+    trace_megasas_dcmd_dump_frame(4,
+            dummy[0x20], dummy[0x21], dummy[0x22], dummy[0x23],
+            dummy[0x24], dummy[0x25], dummy[0x26], dummy[0x27]);
+    trace_megasas_dcmd_dump_frame(5,
+            dummy[0x28], dummy[0x29], dummy[0x2a], dummy[0x2b],
+            dummy[0x2c], dummy[0x2d], dummy[0x2e], dummy[0x2f]);
+    trace_megasas_dcmd_dump_frame(6,
+            dummy[0x30], dummy[0x31], dummy[0x32], dummy[0x33],
+            dummy[0x34], dummy[0x35], dummy[0x36], dummy[0x37]);
+    trace_megasas_dcmd_dump_frame(7,
+            dummy[0x38], dummy[0x39], dummy[0x3a], dummy[0x3b],
+            dummy[0x3c], dummy[0x3d], dummy[0x3e], dummy[0x3f]);
+    g_free(dummy);
+    return MFI_STAT_OK;
+}
+
+static int megasas_dcmd_dummy(MegasasState *s, MegasasCmd *cmd)
+{
+    trace_megasas_dcmd_dummy(cmd->index, cmd->iov_size);
+    return MFI_STAT_OK;
+}
+
+static const struct dcmd_cmd_tbl_t {
+    int opcode;
+    const char *desc;
+    int (*func)(MegasasState *s, MegasasCmd *cmd);
+} dcmd_cmd_tbl[] = {
+    { MFI_DCMD_CTRL_MFI_HOST_MEM_ALLOC, "CTRL_HOST_MEM_ALLOC",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_GET_INFO, "CTRL_GET_INFO",
+      megasas_ctrl_get_info },
+    { MFI_DCMD_CTRL_GET_PROPERTIES, "CTRL_GET_PROPERTIES",
+      megasas_dcmd_get_properties },
+    { MFI_DCMD_CTRL_SET_PROPERTIES, "CTRL_SET_PROPERTIES",
+      megasas_dcmd_set_properties },
+    { MFI_DCMD_CTRL_ALARM_GET, "CTRL_ALARM_GET",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_ALARM_ENABLE, "CTRL_ALARM_ENABLE",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_ALARM_DISABLE, "CTRL_ALARM_DISABLE",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_ALARM_SILENCE, "CTRL_ALARM_SILENCE",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_ALARM_TEST, "CTRL_ALARM_TEST",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_EVENT_GETINFO, "CTRL_EVENT_GETINFO",
+      megasas_event_info },
+    { MFI_DCMD_CTRL_EVENT_GET, "CTRL_EVENT_GET",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_EVENT_WAIT, "CTRL_EVENT_WAIT",
+      megasas_event_wait },
+    { MFI_DCMD_CTRL_SHUTDOWN, "CTRL_SHUTDOWN",
+      megasas_ctrl_shutdown },
+    { MFI_DCMD_HIBERNATE_STANDBY, "CTRL_STANDBY",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_GET_TIME, "CTRL_GET_TIME",
+      megasas_dcmd_get_fw_time },
+    { MFI_DCMD_CTRL_SET_TIME, "CTRL_SET_TIME",
+      megasas_dcmd_set_fw_time },
+    { MFI_DCMD_CTRL_BIOS_DATA_GET, "CTRL_BIOS_DATA_GET",
+      megasas_dcmd_get_bios_info },
+    { MFI_DCMD_CTRL_FACTORY_DEFAULTS, "CTRL_FACTORY_DEFAULTS",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_MFC_DEFAULTS_GET, "CTRL_MFC_DEFAULTS_GET",
+      megasas_mfc_get_defaults },
+    { MFI_DCMD_CTRL_MFC_DEFAULTS_SET, "CTRL_MFC_DEFAULTS_SET",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CTRL_CACHE_FLUSH, "CTRL_CACHE_FLUSH",
+      megasas_cache_flush },
+    { MFI_DCMD_PD_GET_LIST, "PD_GET_LIST",
+      megasas_dcmd_pd_get_list },
+    { MFI_DCMD_PD_LIST_QUERY, "PD_LIST_QUERY",
+      megasas_dcmd_pd_list_query },
+    { MFI_DCMD_PD_GET_INFO, "PD_GET_INFO",
+      megasas_dcmd_pd_get_info },
+    { MFI_DCMD_PD_STATE_SET, "PD_STATE_SET",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_PD_REBUILD, "PD_REBUILD",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_PD_BLINK, "PD_BLINK",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_PD_UNBLINK, "PD_UNBLINK",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_LD_GET_LIST, "LD_GET_LIST",
+      megasas_dcmd_ld_get_list},
+    { MFI_DCMD_LD_GET_INFO, "LD_GET_INFO",
+      megasas_dcmd_ld_get_info },
+    { MFI_DCMD_LD_GET_PROP, "LD_GET_PROP",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_LD_SET_PROP, "LD_SET_PROP",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_LD_DELETE, "LD_DELETE",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CFG_READ, "CFG_READ",
+      megasas_dcmd_cfg_read },
+    { MFI_DCMD_CFG_ADD, "CFG_ADD",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CFG_CLEAR, "CFG_CLEAR",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CFG_FOREIGN_READ, "CFG_FOREIGN_READ",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CFG_FOREIGN_IMPORT, "CFG_FOREIGN_IMPORT",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_BBU_STATUS, "BBU_STATUS",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_BBU_CAPACITY_INFO, "BBU_CAPACITY_INFO",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_BBU_DESIGN_INFO, "BBU_DESIGN_INFO",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_BBU_PROP_GET, "BBU_PROP_GET",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CLUSTER, "CLUSTER",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CLUSTER_RESET_ALL, "CLUSTER_RESET_ALL",
+      megasas_dcmd_dummy },
+    { MFI_DCMD_CLUSTER_RESET_LD, "CLUSTER_RESET_LD",
+      megasas_cluster_reset_ld },
+    { -1, NULL, NULL }
+};
+
+static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd)
+{
+    int opcode, len;
+    int retval = 0;
+    const struct dcmd_cmd_tbl_t *cmdptr = dcmd_cmd_tbl;
+
+    opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
+    trace_megasas_handle_dcmd(cmd->index, opcode);
+    len = megasas_map_dcmd(s, cmd);
+    if (len < 0) {
+        return MFI_STAT_MEMORY_NOT_AVAILABLE;
+    }
+    while (cmdptr->opcode != -1 && cmdptr->opcode != opcode) {
+        cmdptr++;
+    }
+    if (cmdptr->opcode == -1) {
+        trace_megasas_dcmd_unhandled(cmd->index, opcode, len);
+        retval = megasas_dcmd_dummy(s, cmd);
+    } else {
+        trace_megasas_dcmd_enter(cmd->index, cmdptr->desc, len);
+        retval = cmdptr->func(s, cmd);
+    }
+    if (retval != MFI_STAT_INVALID_STATUS) {
+        megasas_finish_dcmd(cmd, len);
+    }
+    return retval;
+}
+
+static int megasas_finish_internal_dcmd(MegasasCmd *cmd,
+                                        SCSIRequest *req)
+{
+    int opcode;
+    int retval = MFI_STAT_OK;
+    int lun = req->lun;
+
+    opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
+    scsi_req_unref(req);
+    trace_megasas_dcmd_internal_finish(cmd->index, opcode, lun);
+    switch (opcode) {
+    case MFI_DCMD_PD_GET_INFO:
+        retval = megasas_pd_get_info_submit(req->dev, lun, cmd);
+        break;
+    case MFI_DCMD_LD_GET_INFO:
+        retval = megasas_ld_get_info_submit(req->dev, lun, cmd);
+        break;
+    default:
+        trace_megasas_dcmd_internal_invalid(cmd->index, opcode);
+        retval = MFI_STAT_INVALID_DCMD;
+        break;
+    }
+    if (retval != MFI_STAT_INVALID_STATUS) {
+        megasas_finish_dcmd(cmd, cmd->iov_size);
+    }
+    return retval;
+}
+
+static int megasas_enqueue_req(MegasasCmd *cmd, bool is_write)
+{
+    int len;
+
+    len = scsi_req_enqueue(cmd->req);
+    if (len < 0) {
+        len = -len;
+    }
+    if (len > 0) {
+        if (len > cmd->iov_size) {
+            if (is_write) {
+                trace_megasas_iov_write_overflow(cmd->index, len,
+                                                 cmd->iov_size);
+            } else {
+                trace_megasas_iov_read_overflow(cmd->index, len,
+                                                cmd->iov_size);
+            }
+        }
+        if (len < cmd->iov_size) {
+            if (is_write) {
+                trace_megasas_iov_write_underflow(cmd->index, len,
+                                                  cmd->iov_size);
+            } else {
+                trace_megasas_iov_read_underflow(cmd->index, len,
+                                                 cmd->iov_size);
+            }
+            cmd->iov_size = len;
+        }
+        scsi_req_continue(cmd->req);
+    }
+    return len;
+}
+
+static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd,
+                               bool is_logical)
+{
+    uint8_t *cdb;
+    int len;
+    bool is_write;
+    struct SCSIDevice *sdev = NULL;
+
+    cdb = cmd->frame->pass.cdb;
+
+    if (cmd->frame->header.target_id < s->fw_luns) {
+        sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id,
+                                cmd->frame->header.lun_id);
+    }
+    cmd->iov_size = le32_to_cpu(cmd->frame->header.data_len);
+    trace_megasas_handle_scsi(mfi_frame_desc[cmd->frame->header.frame_cmd],
+                              is_logical, cmd->frame->header.target_id,
+                              cmd->frame->header.lun_id, sdev, cmd->iov_size);
+
+    if (!sdev || (megasas_is_jbod(s) && is_logical)) {
+        trace_megasas_scsi_target_not_present(
+            mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical,
+            cmd->frame->header.target_id, cmd->frame->header.lun_id);
+        return MFI_STAT_DEVICE_NOT_FOUND;
+    }
+
+    if (cmd->frame->header.cdb_len > 16) {
+        trace_megasas_scsi_invalid_cdb_len(
+                mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical,
+                cmd->frame->header.target_id, cmd->frame->header.lun_id,
+                cmd->frame->header.cdb_len);
+        megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
+        cmd->frame->header.scsi_status = CHECK_CONDITION;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+
+    if (megasas_map_sgl(s, cmd, &cmd->frame->pass.sgl)) {
+        megasas_write_sense(cmd, SENSE_CODE(TARGET_FAILURE));
+        cmd->frame->header.scsi_status = CHECK_CONDITION;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+
+    cmd->req = scsi_req_new(sdev, cmd->index,
+                            cmd->frame->header.lun_id, cdb, cmd);
+    if (!cmd->req) {
+        trace_megasas_scsi_req_alloc_failed(
+                mfi_frame_desc[cmd->frame->header.frame_cmd],
+                cmd->frame->header.target_id, cmd->frame->header.lun_id);
+        megasas_write_sense(cmd, SENSE_CODE(NO_SENSE));
+        cmd->frame->header.scsi_status = BUSY;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+
+    is_write = (cmd->req->cmd.mode == SCSI_XFER_TO_DEV);
+    len = megasas_enqueue_req(cmd, is_write);
+    if (len > 0) {
+        if (is_write) {
+            trace_megasas_scsi_write_start(cmd->index, len);
+        } else {
+            trace_megasas_scsi_read_start(cmd->index, len);
+        }
+    } else {
+        trace_megasas_scsi_nodata(cmd->index);
+    }
+    return MFI_STAT_INVALID_STATUS;
+}
+
+static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd)
+{
+    uint32_t lba_count, lba_start_hi, lba_start_lo;
+    uint64_t lba_start;
+    bool is_write = (cmd->frame->header.frame_cmd == MFI_CMD_LD_WRITE);
+    uint8_t cdb[16];
+    int len;
+    struct SCSIDevice *sdev = NULL;
+
+    lba_count = le32_to_cpu(cmd->frame->io.header.data_len);
+    lba_start_lo = le32_to_cpu(cmd->frame->io.lba_lo);
+    lba_start_hi = le32_to_cpu(cmd->frame->io.lba_hi);
+    lba_start = ((uint64_t)lba_start_hi << 32) | lba_start_lo;
+
+    if (cmd->frame->header.target_id < s->fw_luns) {
+        sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id,
+                                cmd->frame->header.lun_id);
+    }
+
+    trace_megasas_handle_io(cmd->index,
+                            mfi_frame_desc[cmd->frame->header.frame_cmd],
+                            cmd->frame->header.target_id,
+                            cmd->frame->header.lun_id,
+                            (unsigned long)lba_start, (unsigned long)lba_count);
+    if (!sdev) {
+        trace_megasas_io_target_not_present(cmd->index,
+            mfi_frame_desc[cmd->frame->header.frame_cmd],
+            cmd->frame->header.target_id, cmd->frame->header.lun_id);
+        return MFI_STAT_DEVICE_NOT_FOUND;
+    }
+
+    if (cmd->frame->header.cdb_len > 16) {
+        trace_megasas_scsi_invalid_cdb_len(
+            mfi_frame_desc[cmd->frame->header.frame_cmd], 1,
+            cmd->frame->header.target_id, cmd->frame->header.lun_id,
+            cmd->frame->header.cdb_len);
+        megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE));
+        cmd->frame->header.scsi_status = CHECK_CONDITION;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+
+    cmd->iov_size = lba_count * sdev->blocksize;
+    if (megasas_map_sgl(s, cmd, &cmd->frame->io.sgl)) {
+        megasas_write_sense(cmd, SENSE_CODE(TARGET_FAILURE));
+        cmd->frame->header.scsi_status = CHECK_CONDITION;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+
+    megasas_encode_lba(cdb, lba_start, lba_count, is_write);
+    cmd->req = scsi_req_new(sdev, cmd->index,
+                            cmd->frame->header.lun_id, cdb, cmd);
+    if (!cmd->req) {
+        trace_megasas_scsi_req_alloc_failed(
+            mfi_frame_desc[cmd->frame->header.frame_cmd],
+            cmd->frame->header.target_id, cmd->frame->header.lun_id);
+        megasas_write_sense(cmd, SENSE_CODE(NO_SENSE));
+        cmd->frame->header.scsi_status = BUSY;
+        s->event_count++;
+        return MFI_STAT_SCSI_DONE_WITH_ERROR;
+    }
+    len = megasas_enqueue_req(cmd, is_write);
+    if (len > 0) {
+        if (is_write) {
+            trace_megasas_io_write_start(cmd->index, lba_start, lba_count, len);
+        } else {
+            trace_megasas_io_read_start(cmd->index, lba_start, lba_count, len);
+        }
+    }
+    return MFI_STAT_INVALID_STATUS;
+}
+
+static int megasas_finish_internal_command(MegasasCmd *cmd,
+                                           SCSIRequest *req, size_t resid)
+{
+    int retval = MFI_STAT_INVALID_CMD;
+
+    if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) {
+        cmd->iov_size -= resid;
+        retval = megasas_finish_internal_dcmd(cmd, req);
+    }
+    return retval;
+}
+
+static QEMUSGList *megasas_get_sg_list(SCSIRequest *req)
+{
+    MegasasCmd *cmd = req->hba_private;
+
+    if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) {
+        return NULL;
+    } else {
+        return &cmd->qsg;
+    }
+}
+
+static void megasas_xfer_complete(SCSIRequest *req, uint32_t len)
+{
+    MegasasCmd *cmd = req->hba_private;
+    uint8_t *buf;
+    uint32_t opcode;
+
+    trace_megasas_io_complete(cmd->index, len);
+
+    if (cmd->frame->header.frame_cmd != MFI_CMD_DCMD) {
+        scsi_req_continue(req);
+        return;
+    }
+
+    buf = scsi_req_get_buf(req);
+    opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
+    if (opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) {
+        struct mfi_pd_info *info = cmd->iov_buf;
+
+        if (info->inquiry_data[0] == 0x7f) {
+            memset(info->inquiry_data, 0, sizeof(info->inquiry_data));
+            memcpy(info->inquiry_data, buf, len);
+        } else if (info->vpd_page83[0] == 0x7f) {
+            memset(info->vpd_page83, 0, sizeof(info->vpd_page83));
+            memcpy(info->vpd_page83, buf, len);
+        }
+        scsi_req_continue(req);
+    } else if (opcode == MFI_DCMD_LD_GET_INFO) {
+        struct mfi_ld_info *info = cmd->iov_buf;
+
+        if (cmd->iov_buf) {
+            memcpy(info->vpd_page83, buf, sizeof(info->vpd_page83));
+            scsi_req_continue(req);
+        }
+    }
+}
+
+static void megasas_command_complete(SCSIRequest *req, uint32_t status,
+                                     size_t resid)
+{
+    MegasasCmd *cmd = req->hba_private;
+    uint8_t cmd_status = MFI_STAT_OK;
+
+    trace_megasas_command_complete(cmd->index, status, resid);
+
+    if (cmd->req != req) {
+        /*
+         * Internal command complete
+         */
+        cmd_status = megasas_finish_internal_command(cmd, req, resid);
+        if (cmd_status == MFI_STAT_INVALID_STATUS) {
+            return;
+        }
+    } else {
+        req->status = status;
+        trace_megasas_scsi_complete(cmd->index, req->status,
+                                    cmd->iov_size, req->cmd.xfer);
+        if (req->status != GOOD) {
+            cmd_status = MFI_STAT_SCSI_DONE_WITH_ERROR;
+        }
+        if (req->status == CHECK_CONDITION) {
+            megasas_copy_sense(cmd);
+        }
+
+        megasas_unmap_sgl(cmd);
+        cmd->frame->header.scsi_status = req->status;
+        scsi_req_unref(cmd->req);
+        cmd->req = NULL;
+    }
+    cmd->frame->header.cmd_status = cmd_status;
+    megasas_complete_frame(cmd->state, cmd->context);
+}
+
+static void megasas_command_cancel(SCSIRequest *req)
+{
+    MegasasCmd *cmd = req->hba_private;
+
+    if (cmd) {
+        megasas_abort_command(cmd);
+    } else {
+        scsi_req_unref(req);
+    }
+}
+
+static int megasas_handle_abort(MegasasState *s, MegasasCmd *cmd)
+{
+    uint64_t abort_ctx = le64_to_cpu(cmd->frame->abort.abort_context);
+    target_phys_addr_t abort_addr, addr_hi, addr_lo;
+    MegasasCmd *abort_cmd;
+
+    addr_hi = le32_to_cpu(cmd->frame->abort.abort_mfi_addr_hi);
+    addr_lo = le32_to_cpu(cmd->frame->abort.abort_mfi_addr_lo);
+    abort_addr = ((uint64_t)addr_hi << 32) | addr_lo;
+
+    abort_cmd = megasas_lookup_frame(s, abort_addr);
+    if (!abort_cmd) {
+        trace_megasas_abort_no_cmd(cmd->index, abort_ctx);
+        s->event_count++;
+        return MFI_STAT_OK;
+    }
+    if (!megasas_use_queue64(s)) {
+        abort_ctx &= (uint64_t)0xFFFFFFFF;
+    }
+    if (abort_cmd->context != abort_ctx) {
+        trace_megasas_abort_invalid_context(cmd->index, abort_cmd->index,
+                                            abort_cmd->context);
+        s->event_count++;
+        return MFI_STAT_ABORT_NOT_POSSIBLE;
+    }
+    trace_megasas_abort_frame(cmd->index, abort_cmd->index);
+    megasas_abort_command(abort_cmd);
+    if (!s->event_cmd || abort_cmd != s->event_cmd) {
+        s->event_cmd = NULL;
+    }
+    s->event_count++;
+    return MFI_STAT_OK;
+}
+
+static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr,
+                                 uint32_t frame_count)
+{
+    uint8_t frame_status = MFI_STAT_INVALID_CMD;
+    uint64_t frame_context;
+    MegasasCmd *cmd;
+
+    /*
+     * Always read 64bit context, top bits will be
+     * masked out if required in megasas_enqueue_frame()
+     */
+    frame_context = megasas_frame_get_context(frame_addr);
+
+    cmd = megasas_enqueue_frame(s, frame_addr, frame_context, frame_count);
+    if (!cmd) {
+        /* reply queue full */
+        trace_megasas_frame_busy(frame_addr);
+        megasas_frame_set_scsi_status(frame_addr, BUSY);
+        megasas_frame_set_cmd_status(frame_addr, MFI_STAT_SCSI_DONE_WITH_ERROR);
+        megasas_complete_frame(s, frame_context);
+        s->event_count++;
+        return;
+    }
+    switch (cmd->frame->header.frame_cmd) {
+    case MFI_CMD_INIT:
+        frame_status = megasas_init_firmware(s, cmd);
+        break;
+    case MFI_CMD_DCMD:
+        frame_status = megasas_handle_dcmd(s, cmd);
+        break;
+    case MFI_CMD_ABORT:
+        frame_status = megasas_handle_abort(s, cmd);
+        break;
+    case MFI_CMD_PD_SCSI_IO:
+        frame_status = megasas_handle_scsi(s, cmd, 0);
+        break;
+    case MFI_CMD_LD_SCSI_IO:
+        frame_status = megasas_handle_scsi(s, cmd, 1);
+        break;
+    case MFI_CMD_LD_READ:
+    case MFI_CMD_LD_WRITE:
+        frame_status = megasas_handle_io(s, cmd);
+        break;
+    default:
+        trace_megasas_unhandled_frame_cmd(cmd->index,
+                                          cmd->frame->header.frame_cmd);
+        s->event_count++;
+        break;
+    }
+    if (frame_status != MFI_STAT_INVALID_STATUS) {
+        if (cmd->frame) {
+            cmd->frame->header.cmd_status = frame_status;
+        } else {
+            megasas_frame_set_cmd_status(frame_addr, frame_status);
+        }
+        megasas_complete_frame(s, cmd->context);
+    }
+}
+
+static uint64_t megasas_mmio_read(void *opaque, target_phys_addr_t addr,
+                                  unsigned size)
+{
+    MegasasState *s = opaque;
+    uint32_t retval = 0;
+
+    switch (addr) {
+    case MFI_IDB:
+        retval = 0;
+        break;
+    case MFI_OMSG0:
+    case MFI_OSP0:
+        retval = (megasas_use_msix(s) ? MFI_FWSTATE_MSIX_SUPPORTED : 0) |
+            (s->fw_state & MFI_FWSTATE_MASK) |
+            ((s->fw_sge & 0xff) << 16) |
+            (s->fw_cmds & 0xFFFF);
+        break;
+    case MFI_OSTS:
+        if (megasas_intr_enabled(s) && s->doorbell) {
+            retval = MFI_1078_RM | 1;
+        }
+        break;
+    case MFI_OMSK:
+        retval = s->intr_mask;
+        break;
+    case MFI_ODCR0:
+        retval = s->doorbell;
+        break;
+    default:
+        trace_megasas_mmio_invalid_readl(addr);
+        break;
+    }
+    trace_megasas_mmio_readl(addr, retval);
+    return retval;
+}
+
+static void megasas_mmio_write(void *opaque, target_phys_addr_t addr,
+                               uint64_t val, unsigned size)
+{
+    MegasasState *s = opaque;
+    uint64_t frame_addr;
+    uint32_t frame_count;
+    int i;
+
+    trace_megasas_mmio_writel(addr, val);
+    switch (addr) {
+    case MFI_IDB:
+        if (val & MFI_FWINIT_ABORT) {
+            /* Abort all pending cmds */
+            for (i = 0; i < s->fw_cmds; i++) {
+                megasas_abort_command(&s->frames[i]);
+            }
+        }
+        if (val & MFI_FWINIT_READY) {
+            /* move to FW READY */
+            megasas_soft_reset(s);
+        }
+        if (val & MFI_FWINIT_MFIMODE) {
+            /* discard MFIs */
+        }
+        break;
+    case MFI_OMSK:
+        s->intr_mask = val;
+        if (!megasas_intr_enabled(s) && !msix_enabled(&s->dev)) {
+            trace_megasas_irq_lower();
+            qemu_irq_lower(s->dev.irq[0]);
+        }
+        if (megasas_intr_enabled(s)) {
+            trace_megasas_intr_enabled();
+        } else {
+            trace_megasas_intr_disabled();
+        }
+        break;
+    case MFI_ODCR0:
+        s->doorbell = 0;
+        if (s->producer_pa && megasas_intr_enabled(s)) {
+            /* Update reply queue pointer */
+            trace_megasas_qf_update(s->reply_queue_head, s->busy);
+            stl_le_phys(s->producer_pa, s->reply_queue_head);
+            if (!msix_enabled(&s->dev)) {
+                trace_megasas_irq_lower();
+                qemu_irq_lower(s->dev.irq[0]);
+            }
+        }
+        break;
+    case MFI_IQPH:
+        /* Received high 32 bits of a 64 bit MFI frame address */
+        s->frame_hi = val;
+        break;
+    case MFI_IQPL:
+        /* Received low 32 bits of a 64 bit MFI frame address */
+    case MFI_IQP:
+        /* Received 32 bit MFI frame address */
+        frame_addr = (val & ~0x1F);
+        /* Add possible 64 bit offset */
+        frame_addr |= ((uint64_t)s->frame_hi << 32);
+        s->frame_hi = 0;
+        frame_count = (val >> 1) & 0xF;
+        megasas_handle_frame(s, frame_addr, frame_count);
+        break;
+    default:
+        trace_megasas_mmio_invalid_writel(addr, val);
+        break;
+    }
+}
+
+static const MemoryRegionOps megasas_mmio_ops = {
+    .read = megasas_mmio_read,
+    .write = megasas_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    }
+};
+
+static uint64_t megasas_port_read(void *opaque, target_phys_addr_t addr,
+                                  unsigned size)
+{
+    return megasas_mmio_read(opaque, addr & 0xff, size);
+}
+
+static void megasas_port_write(void *opaque, target_phys_addr_t addr,
+                               uint64_t val, unsigned size)
+{
+    megasas_mmio_write(opaque, addr & 0xff, val, size);
+}
+
+static const MemoryRegionOps megasas_port_ops = {
+    .read = megasas_port_read,
+    .write = megasas_port_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    }
+};
+
+static uint64_t megasas_queue_read(void *opaque, target_phys_addr_t addr,
+                                   unsigned size)
+{
+    return 0;
+}
+
+static const MemoryRegionOps megasas_queue_ops = {
+    .read = megasas_queue_read,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    }
+};
+
+static void megasas_soft_reset(MegasasState *s)
+{
+    int i;
+    MegasasCmd *cmd;
+
+    trace_megasas_reset();
+    for (i = 0; i < s->fw_cmds; i++) {
+        cmd = &s->frames[i];
+        megasas_abort_command(cmd);
+    }
+    megasas_reset_frames(s);
+    s->reply_queue_len = s->fw_cmds;
+    s->reply_queue_pa = 0;
+    s->consumer_pa = 0;
+    s->producer_pa = 0;
+    s->fw_state = MFI_FWSTATE_READY;
+    s->doorbell = 0;
+    s->intr_mask = MEGASAS_INTR_DISABLED_MASK;
+    s->frame_hi = 0;
+    s->flags &= ~MEGASAS_MASK_USE_QUEUE64;
+    s->event_count++;
+    s->boot_event = s->event_count;
+}
+
+static void megasas_scsi_reset(DeviceState *dev)
+{
+    MegasasState *s = DO_UPCAST(MegasasState, dev.qdev, dev);
+
+    megasas_soft_reset(s);
+}
+
+static const VMStateDescription vmstate_megasas = {
+    .name = "megasas",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .minimum_version_id_old = 0,
+    .fields      = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(dev, MegasasState),
+
+        VMSTATE_INT32(fw_state, MegasasState),
+        VMSTATE_INT32(intr_mask, MegasasState),
+        VMSTATE_INT32(doorbell, MegasasState),
+        VMSTATE_UINT64(reply_queue_pa, MegasasState),
+        VMSTATE_UINT64(consumer_pa, MegasasState),
+        VMSTATE_UINT64(producer_pa, MegasasState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static int megasas_scsi_uninit(PCIDevice *d)
+{
+    MegasasState *s = DO_UPCAST(MegasasState, dev, d);
+
+#ifdef USE_MSIX
+    msix_uninit(&s->dev, &s->mmio_io);
+#endif
+    memory_region_destroy(&s->mmio_io);
+    memory_region_destroy(&s->port_io);
+    memory_region_destroy(&s->queue_io);
+    return 0;
+}
+
+static const struct SCSIBusInfo megasas_scsi_info = {
+    .tcq = true,
+    .max_target = MFI_MAX_LD,
+    .max_lun = 255,
+
+    .transfer_data = megasas_xfer_complete,
+    .get_sg_list = megasas_get_sg_list,
+    .complete = megasas_command_complete,
+    .cancel = megasas_command_cancel,
+};
+
+static int megasas_scsi_init(PCIDevice *dev)
+{
+    MegasasState *s = DO_UPCAST(MegasasState, dev, dev);
+    uint8_t *pci_conf;
+    int i, bar_type;
+
+    pci_conf = s->dev.config;
+
+    /* PCI latency timer = 0 */
+    pci_conf[PCI_LATENCY_TIMER] = 0;
+    /* Interrupt pin 1 */
+    pci_conf[PCI_INTERRUPT_PIN] = 0x01;
+
+    memory_region_init_io(&s->mmio_io, &megasas_mmio_ops, s,
+                          "megasas-mmio", 0x4000);
+    memory_region_init_io(&s->port_io, &megasas_port_ops, s,
+                          "megasas-io", 256);
+    memory_region_init_io(&s->queue_io, &megasas_queue_ops, s,
+                          "megasas-queue", 0x40000);
+
+#ifdef USE_MSIX
+    /* MSI-X support is currently broken */
+    if (megasas_use_msix(s) &&
+        msix_init(&s->dev, 15, &s->mmio_io, 0, 0x2000)) {
+        s->flags &= ~MEGASAS_MASK_USE_MSIX;
+    }
+#else
+    s->flags &= ~MEGASAS_MASK_USE_MSIX;
+#endif
+
+    bar_type = PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64;
+    pci_register_bar(&s->dev, 0, bar_type, &s->mmio_io);
+    pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_IO, &s->port_io);
+    pci_register_bar(&s->dev, 3, bar_type, &s->queue_io);
+
+    if (megasas_use_msix(s)) {
+        msix_vector_use(&s->dev, 0);
+    }
+
+    if (s->fw_sge >= MEGASAS_MAX_SGE - MFI_PASS_FRAME_SIZE) {
+        s->fw_sge = MEGASAS_MAX_SGE - MFI_PASS_FRAME_SIZE;
+    } else if (s->fw_sge >= 128 - MFI_PASS_FRAME_SIZE) {
+        s->fw_sge = 128 - MFI_PASS_FRAME_SIZE;
+    } else {
+        s->fw_sge = 64 - MFI_PASS_FRAME_SIZE;
+    }
+    if (s->fw_cmds > MEGASAS_MAX_FRAMES) {
+        s->fw_cmds = MEGASAS_MAX_FRAMES;
+    }
+    trace_megasas_init(s->fw_sge, s->fw_cmds,
+                       megasas_use_msix(s) ? "MSI-X" : "INTx",
+                       megasas_is_jbod(s) ? "jbod" : "raid");
+    s->fw_luns = (MFI_MAX_LD > MAX_SCSI_DEVS) ?
+        MAX_SCSI_DEVS : MFI_MAX_LD;
+    s->producer_pa = 0;
+    s->consumer_pa = 0;
+    for (i = 0; i < s->fw_cmds; i++) {
+        s->frames[i].index = i;
+        s->frames[i].context = -1;
+        s->frames[i].pa = 0;
+        s->frames[i].state = s;
+    }
+
+    scsi_bus_new(&s->bus, &dev->qdev, &megasas_scsi_info);
+    scsi_bus_legacy_handle_cmdline(&s->bus);
+    return 0;
+}
+
+static Property megasas_properties[] = {
+    DEFINE_PROP_UINT32("max_sge", MegasasState, fw_sge,
+                       MEGASAS_DEFAULT_SGE),
+    DEFINE_PROP_UINT32("max_cmds", MegasasState, fw_cmds,
+                       MEGASAS_DEFAULT_FRAMES),
+#ifdef USE_MSIX
+    DEFINE_PROP_BIT("use_msix", MegasasState, flags,
+                    MEGASAS_FLAG_USE_MSIX, false),
+#endif
+    DEFINE_PROP_BIT("use_jbod", MegasasState, flags,
+                    MEGASAS_FLAG_USE_JBOD, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void megasas_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
+
+    pc->init = megasas_scsi_init;
+    pc->exit = megasas_scsi_uninit;
+    pc->vendor_id = PCI_VENDOR_ID_LSI_LOGIC;
+    pc->device_id = PCI_DEVICE_ID_LSI_SAS1078;
+    pc->subsystem_vendor_id = PCI_VENDOR_ID_LSI_LOGIC;
+    pc->subsystem_id = 0x1013;
+    pc->class_id = PCI_CLASS_STORAGE_RAID;
+    dc->props = megasas_properties;
+    dc->reset = megasas_scsi_reset;
+    dc->vmsd = &vmstate_megasas;
+    dc->desc = "LSI MegaRAID SAS 1078";
+}
+
+static const TypeInfo megasas_info = {
+    .name  = "megasas",
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(MegasasState),
+    .class_init = megasas_class_init,
+};
+
+static void megasas_register_types(void)
+{
+    type_register_static(&megasas_info);
+}
+
+type_init(megasas_register_types)
diff --git a/hw/mfi.h b/hw/mfi.h
new file mode 100644
index 0000000000..8a821623e0
--- /dev/null
+++ b/hw/mfi.h
@@ -0,0 +1,1248 @@
+/*
+ * NetBSD header file, copied from
+ * http://gitorious.org/freebsd/freebsd/blobs/HEAD/sys/dev/mfi/mfireg.h
+ */
+/*-
+ * Copyright (c) 2006 IronPort Systems
+ * Copyright (c) 2007 LSI Corp.
+ * Copyright (c) 2007 Rajesh Prabhakaran.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef MFI_REG_H
+#define MFI_REG_H
+
+/*
+ * MegaRAID SAS MFI firmware definitions
+ */
+
+/*
+ * Start with the register set.  All registers are 32 bits wide.
+ * The usual Intel IOP style setup.
+ */
+#define MFI_IMSG0 0x10    /* Inbound message 0 */
+#define MFI_IMSG1 0x14    /* Inbound message 1 */
+#define MFI_OMSG0 0x18    /* Outbound message 0 */
+#define MFI_OMSG1 0x1c    /* Outbound message 1 */
+#define MFI_IDB   0x20    /* Inbound doorbell */
+#define MFI_ISTS  0x24    /* Inbound interrupt status */
+#define MFI_IMSK  0x28    /* Inbound interrupt mask */
+#define MFI_ODB   0x2c    /* Outbound doorbell */
+#define MFI_OSTS  0x30    /* Outbound interrupt status */
+#define MFI_OMSK  0x34    /* Outbound interrupt mask */
+#define MFI_IQP   0x40    /* Inbound queue port */
+#define MFI_OQP   0x44    /* Outbound queue port */
+
+/*
+ * 1078 specific related register
+ */
+#define MFI_ODR0        0x9c            /* outbound doorbell register0 */
+#define MFI_ODCR0       0xa0            /* outbound doorbell clear register0  */
+#define MFI_OSP0        0xb0            /* outbound scratch pad0  */
+#define MFI_IQPL        0xc0            /* Inbound queue port (low bytes)  */
+#define MFI_IQPH        0xc4            /* Inbound queue port (high bytes)  */
+#define MFI_DIAG        0xf8            /* Host diag */
+#define MFI_SEQ         0xfc            /* Sequencer offset */
+#define MFI_1078_EIM    0x80000004      /* 1078 enable intrrupt mask  */
+#define MFI_RMI         0x2             /* reply message interrupt  */
+#define MFI_1078_RM     0x80000000      /* reply 1078 message interrupt  */
+#define MFI_ODC         0x4             /* outbound doorbell change interrupt */
+
+/*
+ * gen2 specific changes
+ */
+#define MFI_GEN2_EIM    0x00000005      /* gen2 enable interrupt mask */
+#define MFI_GEN2_RM     0x00000001      /* reply gen2 message interrupt */
+
+/*
+ * skinny specific changes
+ */
+#define MFI_SKINNY_IDB  0x00    /* Inbound doorbell is at 0x00 for skinny */
+#define MFI_SKINNY_RM   0x00000001      /* reply skinny message interrupt */
+
+/* Bits for MFI_OSTS */
+#define MFI_OSTS_INTR_VALID     0x00000002
+
+/*
+ * Firmware state values.  Found in OMSG0 during initialization.
+ */
+#define MFI_FWSTATE_MASK                0xf0000000
+#define MFI_FWSTATE_UNDEFINED           0x00000000
+#define MFI_FWSTATE_BB_INIT             0x10000000
+#define MFI_FWSTATE_FW_INIT             0x40000000
+#define MFI_FWSTATE_WAIT_HANDSHAKE      0x60000000
+#define MFI_FWSTATE_FW_INIT_2           0x70000000
+#define MFI_FWSTATE_DEVICE_SCAN         0x80000000
+#define MFI_FWSTATE_BOOT_MSG_PENDING    0x90000000
+#define MFI_FWSTATE_FLUSH_CACHE         0xa0000000
+#define MFI_FWSTATE_READY               0xb0000000
+#define MFI_FWSTATE_OPERATIONAL         0xc0000000
+#define MFI_FWSTATE_FAULT               0xf0000000
+#define MFI_FWSTATE_MAXSGL_MASK         0x00ff0000
+#define MFI_FWSTATE_MAXCMD_MASK         0x0000ffff
+#define MFI_FWSTATE_MSIX_SUPPORTED      0x04000000
+#define MFI_FWSTATE_HOSTMEMREQD_MASK    0x08000000
+
+/*
+ * Control bits to drive the card to ready state.  These go into the IDB
+ * register.
+ */
+#define MFI_FWINIT_ABORT        0x00000001 /* Abort all pending commands */
+#define MFI_FWINIT_READY        0x00000002 /* Move from operational to ready */
+#define MFI_FWINIT_MFIMODE      0x00000004 /* unknown */
+#define MFI_FWINIT_CLEAR_HANDSHAKE 0x00000008 /* Respond to WAIT_HANDSHAKE */
+#define MFI_FWINIT_HOTPLUG      0x00000010
+#define MFI_FWINIT_STOP_ADP     0x00000020 /* Move to operational, stop */
+#define MFI_FWINIT_ADP_RESET    0x00000040 /* Reset ADP */
+
+/* MFI Commands */
+typedef enum {
+    MFI_CMD_INIT = 0x00,
+    MFI_CMD_LD_READ,
+    MFI_CMD_LD_WRITE,
+    MFI_CMD_LD_SCSI_IO,
+    MFI_CMD_PD_SCSI_IO,
+    MFI_CMD_DCMD,
+    MFI_CMD_ABORT,
+    MFI_CMD_SMP,
+    MFI_CMD_STP
+} mfi_cmd_t;
+
+/* Direct commands */
+typedef enum {
+    MFI_DCMD_CTRL_MFI_HOST_MEM_ALLOC =  0x0100e100,
+    MFI_DCMD_CTRL_GET_INFO =            0x01010000,
+    MFI_DCMD_CTRL_GET_PROPERTIES =      0x01020100,
+    MFI_DCMD_CTRL_SET_PROPERTIES =      0x01020200,
+    MFI_DCMD_CTRL_ALARM =               0x01030000,
+    MFI_DCMD_CTRL_ALARM_GET =           0x01030100,
+    MFI_DCMD_CTRL_ALARM_ENABLE =        0x01030200,
+    MFI_DCMD_CTRL_ALARM_DISABLE =       0x01030300,
+    MFI_DCMD_CTRL_ALARM_SILENCE =       0x01030400,
+    MFI_DCMD_CTRL_ALARM_TEST =          0x01030500,
+    MFI_DCMD_CTRL_EVENT_GETINFO =       0x01040100,
+    MFI_DCMD_CTRL_EVENT_CLEAR =         0x01040200,
+    MFI_DCMD_CTRL_EVENT_GET =           0x01040300,
+    MFI_DCMD_CTRL_EVENT_COUNT =         0x01040400,
+    MFI_DCMD_CTRL_EVENT_WAIT =          0x01040500,
+    MFI_DCMD_CTRL_SHUTDOWN =            0x01050000,
+    MFI_DCMD_HIBERNATE_STANDBY =        0x01060000,
+    MFI_DCMD_CTRL_GET_TIME =            0x01080101,
+    MFI_DCMD_CTRL_SET_TIME =            0x01080102,
+    MFI_DCMD_CTRL_BIOS_DATA_GET =       0x010c0100,
+    MFI_DCMD_CTRL_BIOS_DATA_SET =       0x010c0200,
+    MFI_DCMD_CTRL_FACTORY_DEFAULTS =    0x010d0000,
+    MFI_DCMD_CTRL_MFC_DEFAULTS_GET =    0x010e0201,
+    MFI_DCMD_CTRL_MFC_DEFAULTS_SET =    0x010e0202,
+    MFI_DCMD_CTRL_CACHE_FLUSH =         0x01101000,
+    MFI_DCMD_PD_GET_LIST =              0x02010000,
+    MFI_DCMD_PD_LIST_QUERY =            0x02010100,
+    MFI_DCMD_PD_GET_INFO =              0x02020000,
+    MFI_DCMD_PD_STATE_SET =             0x02030100,
+    MFI_DCMD_PD_REBUILD =               0x02040100,
+    MFI_DCMD_PD_BLINK =                 0x02070100,
+    MFI_DCMD_PD_UNBLINK =               0x02070200,
+    MFI_DCMD_LD_GET_LIST =              0x03010000,
+    MFI_DCMD_LD_GET_INFO =              0x03020000,
+    MFI_DCMD_LD_GET_PROP =              0x03030000,
+    MFI_DCMD_LD_SET_PROP =              0x03040000,
+    MFI_DCMD_LD_DELETE =                0x03090000,
+    MFI_DCMD_CFG_READ =                 0x04010000,
+    MFI_DCMD_CFG_ADD =                  0x04020000,
+    MFI_DCMD_CFG_CLEAR =                0x04030000,
+    MFI_DCMD_CFG_FOREIGN_READ =         0x04060100,
+    MFI_DCMD_CFG_FOREIGN_IMPORT =       0x04060400,
+    MFI_DCMD_BBU_STATUS =               0x05010000,
+    MFI_DCMD_BBU_CAPACITY_INFO =        0x05020000,
+    MFI_DCMD_BBU_DESIGN_INFO =          0x05030000,
+    MFI_DCMD_BBU_PROP_GET =             0x05050100,
+    MFI_DCMD_CLUSTER =                  0x08000000,
+    MFI_DCMD_CLUSTER_RESET_ALL =        0x08010100,
+    MFI_DCMD_CLUSTER_RESET_LD =         0x08010200
+} mfi_dcmd_t;
+
+/* Modifiers for MFI_DCMD_CTRL_FLUSHCACHE */
+#define MFI_FLUSHCACHE_CTRL     0x01
+#define MFI_FLUSHCACHE_DISK     0x02
+
+/* Modifiers for MFI_DCMD_CTRL_SHUTDOWN */
+#define MFI_SHUTDOWN_SPINDOWN   0x01
+
+/*
+ * MFI Frame flags
+ */
+typedef enum {
+    MFI_FRAME_DONT_POST_IN_REPLY_QUEUE =        0x0001,
+    MFI_FRAME_SGL64 =                           0x0002,
+    MFI_FRAME_SENSE64 =                         0x0004,
+    MFI_FRAME_DIR_WRITE =                       0x0008,
+    MFI_FRAME_DIR_READ =                        0x0010,
+    MFI_FRAME_IEEE_SGL =                        0x0020,
+} mfi_frame_flags;
+
+/* MFI Status codes */
+typedef enum {
+    MFI_STAT_OK =                       0x00,
+    MFI_STAT_INVALID_CMD,
+    MFI_STAT_INVALID_DCMD,
+    MFI_STAT_INVALID_PARAMETER,
+    MFI_STAT_INVALID_SEQUENCE_NUMBER,
+    MFI_STAT_ABORT_NOT_POSSIBLE,
+    MFI_STAT_APP_HOST_CODE_NOT_FOUND,
+    MFI_STAT_APP_IN_USE,
+    MFI_STAT_APP_NOT_INITIALIZED,
+    MFI_STAT_ARRAY_INDEX_INVALID,
+    MFI_STAT_ARRAY_ROW_NOT_EMPTY,
+    MFI_STAT_CONFIG_RESOURCE_CONFLICT,
+    MFI_STAT_DEVICE_NOT_FOUND,
+    MFI_STAT_DRIVE_TOO_SMALL,
+    MFI_STAT_FLASH_ALLOC_FAIL,
+    MFI_STAT_FLASH_BUSY,
+    MFI_STAT_FLASH_ERROR =              0x10,
+    MFI_STAT_FLASH_IMAGE_BAD,
+    MFI_STAT_FLASH_IMAGE_INCOMPLETE,
+    MFI_STAT_FLASH_NOT_OPEN,
+    MFI_STAT_FLASH_NOT_STARTED,
+    MFI_STAT_FLUSH_FAILED,
+    MFI_STAT_HOST_CODE_NOT_FOUNT,
+    MFI_STAT_LD_CC_IN_PROGRESS,
+    MFI_STAT_LD_INIT_IN_PROGRESS,
+    MFI_STAT_LD_LBA_OUT_OF_RANGE,
+    MFI_STAT_LD_MAX_CONFIGURED,
+    MFI_STAT_LD_NOT_OPTIMAL,
+    MFI_STAT_LD_RBLD_IN_PROGRESS,
+    MFI_STAT_LD_RECON_IN_PROGRESS,
+    MFI_STAT_LD_WRONG_RAID_LEVEL,
+    MFI_STAT_MAX_SPARES_EXCEEDED,
+    MFI_STAT_MEMORY_NOT_AVAILABLE =     0x20,
+    MFI_STAT_MFC_HW_ERROR,
+    MFI_STAT_NO_HW_PRESENT,
+    MFI_STAT_NOT_FOUND,
+    MFI_STAT_NOT_IN_ENCL,
+    MFI_STAT_PD_CLEAR_IN_PROGRESS,
+    MFI_STAT_PD_TYPE_WRONG,
+    MFI_STAT_PR_DISABLED,
+    MFI_STAT_ROW_INDEX_INVALID,
+    MFI_STAT_SAS_CONFIG_INVALID_ACTION,
+    MFI_STAT_SAS_CONFIG_INVALID_DATA,
+    MFI_STAT_SAS_CONFIG_INVALID_PAGE,
+    MFI_STAT_SAS_CONFIG_INVALID_TYPE,
+    MFI_STAT_SCSI_DONE_WITH_ERROR,
+    MFI_STAT_SCSI_IO_FAILED,
+    MFI_STAT_SCSI_RESERVATION_CONFLICT,
+    MFI_STAT_SHUTDOWN_FAILED =          0x30,
+    MFI_STAT_TIME_NOT_SET,
+    MFI_STAT_WRONG_STATE,
+    MFI_STAT_LD_OFFLINE,
+    MFI_STAT_PEER_NOTIFICATION_REJECTED,
+    MFI_STAT_PEER_NOTIFICATION_FAILED,
+    MFI_STAT_RESERVATION_IN_PROGRESS,
+    MFI_STAT_I2C_ERRORS_DETECTED,
+    MFI_STAT_PCI_ERRORS_DETECTED,
+    MFI_STAT_DIAG_FAILED,
+    MFI_STAT_BOOT_MSG_PENDING,
+    MFI_STAT_FOREIGN_CONFIG_INCOMPLETE,
+    MFI_STAT_INVALID_SGL,
+    MFI_STAT_UNSUPPORTED_HW,
+    MFI_STAT_CC_SCHEDULE_DISABLED,
+    MFI_STAT_PD_COPYBACK_IN_PROGRESS,
+    MFI_STAT_MULTIPLE_PDS_IN_ARRAY =    0x40,
+    MFI_STAT_FW_DOWNLOAD_ERROR,
+    MFI_STAT_FEATURE_SECURITY_NOT_ENABLED,
+    MFI_STAT_LOCK_KEY_ALREADY_EXISTS,
+    MFI_STAT_LOCK_KEY_BACKUP_NOT_ALLOWED,
+    MFI_STAT_LOCK_KEY_VERIFY_NOT_ALLOWED,
+    MFI_STAT_LOCK_KEY_VERIFY_FAILED,
+    MFI_STAT_LOCK_KEY_REKEY_NOT_ALLOWED,
+    MFI_STAT_LOCK_KEY_INVALID,
+    MFI_STAT_LOCK_KEY_ESCROW_INVALID,
+    MFI_STAT_LOCK_KEY_BACKUP_REQUIRED,
+    MFI_STAT_SECURE_LD_EXISTS,
+    MFI_STAT_LD_SECURE_NOT_ALLOWED,
+    MFI_STAT_REPROVISION_NOT_ALLOWED,
+    MFI_STAT_PD_SECURITY_TYPE_WRONG,
+    MFI_STAT_LD_ENCRYPTION_TYPE_INVALID,
+    MFI_STAT_CONFIG_FDE_NON_FDE_MIX_NOT_ALLOWED = 0x50,
+    MFI_STAT_CONFIG_LD_ENCRYPTION_TYPE_MIX_NOT_ALLOWED,
+    MFI_STAT_SECRET_KEY_NOT_ALLOWED,
+    MFI_STAT_PD_HW_ERRORS_DETECTED,
+    MFI_STAT_LD_CACHE_PINNED,
+    MFI_STAT_POWER_STATE_SET_IN_PROGRESS,
+    MFI_STAT_POWER_STATE_SET_BUSY,
+    MFI_STAT_POWER_STATE_WRONG,
+    MFI_STAT_PR_NO_AVAILABLE_PD_FOUND,
+    MFI_STAT_CTRL_RESET_REQUIRED,
+    MFI_STAT_LOCK_KEY_EKM_NO_BOOT_AGENT,
+    MFI_STAT_SNAP_NO_SPACE,
+    MFI_STAT_SNAP_PARTIAL_FAILURE,
+    MFI_STAT_UPGRADE_KEY_INCOMPATIBLE,
+    MFI_STAT_PFK_INCOMPATIBLE,
+    MFI_STAT_PD_MAX_UNCONFIGURED,
+    MFI_STAT_IO_METRICS_DISABLED =      0x60,
+    MFI_STAT_AEC_NOT_STOPPED,
+    MFI_STAT_PI_TYPE_WRONG,
+    MFI_STAT_LD_PD_PI_INCOMPATIBLE,
+    MFI_STAT_PI_NOT_ENABLED,
+    MFI_STAT_LD_BLOCK_SIZE_MISMATCH,
+    MFI_STAT_INVALID_STATUS =           0xFF
+} mfi_status_t;
+
+/* Event classes */
+typedef enum {
+    MFI_EVT_CLASS_DEBUG =      -2,
+    MFI_EVT_CLASS_PROGRESS =   -1,
+    MFI_EVT_CLASS_INFO =        0,
+    MFI_EVT_CLASS_WARNING =     1,
+    MFI_EVT_CLASS_CRITICAL =    2,
+    MFI_EVT_CLASS_FATAL =       3,
+    MFI_EVT_CLASS_DEAD =        4
+} mfi_evt_class_t;
+
+/* Event locales */
+typedef enum {
+    MFI_EVT_LOCALE_LD =         0x0001,
+    MFI_EVT_LOCALE_PD =         0x0002,
+    MFI_EVT_LOCALE_ENCL =       0x0004,
+    MFI_EVT_LOCALE_BBU =        0x0008,
+    MFI_EVT_LOCALE_SAS =        0x0010,
+    MFI_EVT_LOCALE_CTRL =       0x0020,
+    MFI_EVT_LOCALE_CONFIG =     0x0040,
+    MFI_EVT_LOCALE_CLUSTER =    0x0080,
+    MFI_EVT_LOCALE_ALL =        0xffff
+} mfi_evt_locale_t;
+
+/* Event args */
+typedef enum {
+    MR_EVT_ARGS_NONE =          0x00,
+    MR_EVT_ARGS_CDB_SENSE,
+    MR_EVT_ARGS_LD,
+    MR_EVT_ARGS_LD_COUNT,
+    MR_EVT_ARGS_LD_LBA,
+    MR_EVT_ARGS_LD_OWNER,
+    MR_EVT_ARGS_LD_LBA_PD_LBA,
+    MR_EVT_ARGS_LD_PROG,
+    MR_EVT_ARGS_LD_STATE,
+    MR_EVT_ARGS_LD_STRIP,
+    MR_EVT_ARGS_PD,
+    MR_EVT_ARGS_PD_ERR,
+    MR_EVT_ARGS_PD_LBA,
+    MR_EVT_ARGS_PD_LBA_LD,
+    MR_EVT_ARGS_PD_PROG,
+    MR_EVT_ARGS_PD_STATE,
+    MR_EVT_ARGS_PCI,
+    MR_EVT_ARGS_RATE,
+    MR_EVT_ARGS_STR,
+    MR_EVT_ARGS_TIME,
+    MR_EVT_ARGS_ECC,
+    MR_EVT_ARGS_LD_PROP,
+    MR_EVT_ARGS_PD_SPARE,
+    MR_EVT_ARGS_PD_INDEX,
+    MR_EVT_ARGS_DIAG_PASS,
+    MR_EVT_ARGS_DIAG_FAIL,
+    MR_EVT_ARGS_PD_LBA_LBA,
+    MR_EVT_ARGS_PORT_PHY,
+    MR_EVT_ARGS_PD_MISSING,
+    MR_EVT_ARGS_PD_ADDRESS,
+    MR_EVT_ARGS_BITMAP,
+    MR_EVT_ARGS_CONNECTOR,
+    MR_EVT_ARGS_PD_PD,
+    MR_EVT_ARGS_PD_FRU,
+    MR_EVT_ARGS_PD_PATHINFO,
+    MR_EVT_ARGS_PD_POWER_STATE,
+    MR_EVT_ARGS_GENERIC,
+} mfi_evt_args;
+
+/* Event codes */
+#define MR_EVT_CFG_CLEARED                          0x0004
+#define MR_EVT_CTRL_SHUTDOWN                        0x002a
+#define MR_EVT_LD_STATE_CHANGE                      0x0051
+#define MR_EVT_PD_INSERTED                          0x005b
+#define MR_EVT_PD_REMOVED                           0x0070
+#define MR_EVT_PD_STATE_CHANGED                     0x0072
+#define MR_EVT_LD_CREATED                           0x008a
+#define MR_EVT_LD_DELETED                           0x008b
+#define MR_EVT_FOREIGN_CFG_IMPORTED                 0x00db
+#define MR_EVT_LD_OFFLINE                           0x00fc
+#define MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED         0x0152
+
+typedef enum {
+    MR_LD_CACHE_WRITE_BACK =            0x01,
+    MR_LD_CACHE_WRITE_ADAPTIVE =        0x02,
+    MR_LD_CACHE_READ_AHEAD =            0x04,
+    MR_LD_CACHE_READ_ADAPTIVE =         0x08,
+    MR_LD_CACHE_WRITE_CACHE_BAD_BBU =   0x10,
+    MR_LD_CACHE_ALLOW_WRITE_CACHE =     0x20,
+    MR_LD_CACHE_ALLOW_READ_CACHE =      0x40
+} mfi_ld_cache;
+
+typedef enum {
+    MR_PD_CACHE_UNCHANGED  =    0,
+    MR_PD_CACHE_ENABLE =        1,
+    MR_PD_CACHE_DISABLE =       2
+} mfi_pd_cache;
+
+typedef enum {
+    MR_PD_QUERY_TYPE_ALL =              0,
+    MR_PD_QUERY_TYPE_STATE =            1,
+    MR_PD_QUERY_TYPE_POWER_STATE =      2,
+    MR_PD_QUERY_TYPE_MEDIA_TYPE =       3,
+    MR_PD_QUERY_TYPE_SPEED =            4,
+    MR_PD_QUERY_TYPE_EXPOSED_TO_HOST =  5, /*query for system drives */
+} mfi_pd_query_type;
+
+/*
+ * Other propertities and definitions
+ */
+#define MFI_MAX_PD_CHANNELS     2
+#define MFI_MAX_LD_CHANNELS     2
+#define MFI_MAX_CHANNELS        (MFI_MAX_PD_CHANNELS + MFI_MAX_LD_CHANNELS)
+#define MFI_MAX_CHANNEL_DEVS  128
+#define MFI_DEFAULT_ID         -1
+#define MFI_MAX_LUN             8
+#define MFI_MAX_LD             64
+
+#define MFI_FRAME_SIZE         64
+#define MFI_MBOX_SIZE          12
+
+/* Firmware flashing can take 40s */
+#define MFI_POLL_TIMEOUT_SECS  50
+
+/* Allow for speedier math calculations */
+#define MFI_SECTOR_LEN        512
+
+/* Scatter Gather elements */
+struct mfi_sg32 {
+    uint32_t addr;
+    uint32_t len;
+} __attribute__ ((packed));
+
+struct mfi_sg64 {
+    uint64_t addr;
+    uint32_t len;
+} __attribute__ ((packed));
+
+struct mfi_sg_skinny {
+    uint64_t addr;
+    uint32_t len;
+    uint32_t flag;
+} __attribute__ ((packed));
+
+union mfi_sgl {
+    struct mfi_sg32 sg32[1];
+    struct mfi_sg64 sg64[1];
+    struct mfi_sg_skinny sg_skinny[1];
+} __attribute__ ((packed));
+
+/* Message frames.  All messages have a common header */
+struct mfi_frame_header {
+    uint8_t frame_cmd;
+    uint8_t sense_len;
+    uint8_t cmd_status;
+    uint8_t scsi_status;
+    uint8_t target_id;
+    uint8_t lun_id;
+    uint8_t cdb_len;
+    uint8_t sge_count;
+    uint64_t context;
+    uint16_t flags;
+    uint16_t timeout;
+    uint32_t data_len;
+} __attribute__ ((packed));
+
+struct mfi_init_frame {
+    struct mfi_frame_header header;
+    uint32_t qinfo_new_addr_lo;
+    uint32_t qinfo_new_addr_hi;
+    uint32_t qinfo_old_addr_lo;
+    uint32_t qinfo_old_addr_hi;
+    uint32_t reserved[6];
+};
+
+#define MFI_IO_FRAME_SIZE 40
+struct mfi_io_frame {
+    struct mfi_frame_header header;
+    uint32_t sense_addr_lo;
+    uint32_t sense_addr_hi;
+    uint32_t lba_lo;
+    uint32_t lba_hi;
+    union mfi_sgl sgl;
+} __attribute__ ((packed));
+
+#define MFI_PASS_FRAME_SIZE 48
+struct mfi_pass_frame {
+    struct mfi_frame_header header;
+    uint32_t sense_addr_lo;
+    uint32_t sense_addr_hi;
+    uint8_t cdb[16];
+    union mfi_sgl sgl;
+} __attribute__ ((packed));
+
+#define MFI_DCMD_FRAME_SIZE 40
+struct mfi_dcmd_frame {
+    struct mfi_frame_header header;
+    uint32_t opcode;
+    uint8_t mbox[MFI_MBOX_SIZE];
+    union mfi_sgl sgl;
+} __attribute__ ((packed));
+
+struct mfi_abort_frame {
+    struct mfi_frame_header header;
+    uint64_t abort_context;
+    uint32_t abort_mfi_addr_lo;
+    uint32_t abort_mfi_addr_hi;
+    uint32_t reserved1[6];
+} __attribute__ ((packed));
+
+struct mfi_smp_frame {
+    struct mfi_frame_header header;
+    uint64_t sas_addr;
+    union {
+        struct mfi_sg32 sg32[2];
+        struct mfi_sg64 sg64[2];
+    } sgl;
+} __attribute__ ((packed));
+
+struct mfi_stp_frame {
+    struct mfi_frame_header header;
+    uint16_t fis[10];
+    uint32_t stp_flags;
+    union {
+        struct mfi_sg32 sg32[2];
+        struct mfi_sg64 sg64[2];
+    } sgl;
+} __attribute__ ((packed));
+
+union mfi_frame {
+    struct mfi_frame_header header;
+    struct mfi_init_frame init;
+    struct mfi_io_frame io;
+    struct mfi_pass_frame pass;
+    struct mfi_dcmd_frame dcmd;
+    struct mfi_abort_frame abort;
+    struct mfi_smp_frame smp;
+    struct mfi_stp_frame stp;
+    uint64_t raw[8];
+    uint8_t bytes[MFI_FRAME_SIZE];
+};
+
+#define MFI_SENSE_LEN 128
+struct mfi_sense {
+    uint8_t     data[MFI_SENSE_LEN];
+};
+
+#define MFI_QUEUE_FLAG_CONTEXT64 0x00000002
+
+/* The queue init structure that is passed with the init message */
+struct mfi_init_qinfo {
+    uint32_t flags;
+    uint32_t rq_entries;
+    uint32_t rq_addr_lo;
+    uint32_t rq_addr_hi;
+    uint32_t pi_addr_lo;
+    uint32_t pi_addr_hi;
+    uint32_t ci_addr_lo;
+    uint32_t ci_addr_hi;
+} __attribute__ ((packed));
+
+/* Controller properties */
+struct mfi_ctrl_props {
+    uint16_t seq_num;
+    uint16_t pred_fail_poll_interval;
+    uint16_t intr_throttle_cnt;
+    uint16_t intr_throttle_timeout;
+    uint8_t rebuild_rate;
+    uint8_t patrol_read_rate;
+    uint8_t bgi_rate;
+    uint8_t cc_rate;
+    uint8_t recon_rate;
+    uint8_t cache_flush_interval;
+    uint8_t spinup_drv_cnt;
+    uint8_t spinup_delay;
+    uint8_t cluster_enable;
+    uint8_t coercion_mode;
+    uint8_t alarm_enable;
+    uint8_t disable_auto_rebuild;
+    uint8_t disable_battery_warn;
+    uint8_t ecc_bucket_size;
+    uint16_t ecc_bucket_leak_rate;
+    uint8_t restore_hotspare_on_insertion;
+    uint8_t expose_encl_devices;
+    uint8_t maintainPdFailHistory;
+    uint8_t disallowHostRequestReordering;
+    uint8_t abortCCOnError;
+    uint8_t loadBalanceMode;
+    uint8_t disableAutoDetectBackplane;
+    uint8_t snapVDSpace;
+    uint32_t OnOffProperties;
+/* set TRUE to disable copyBack (0=copyback enabled) */
+#define MFI_CTRL_PROP_CopyBackDisabled           (1 << 0)
+#define MFI_CTRL_PROP_SMARTerEnabled             (1 << 1)
+#define MFI_CTRL_PROP_PRCorrectUnconfiguredAreas (1 << 2)
+#define MFI_CTRL_PROP_UseFdeOnly                 (1 << 3)
+#define MFI_CTRL_PROP_DisableNCQ                 (1 << 4)
+#define MFI_CTRL_PROP_SSDSMARTerEnabled          (1 << 5)
+#define MFI_CTRL_PROP_SSDPatrolReadEnabled       (1 << 6)
+#define MFI_CTRL_PROP_EnableSpinDownUnconfigured (1 << 7)
+#define MFI_CTRL_PROP_AutoEnhancedImport         (1 << 8)
+#define MFI_CTRL_PROP_EnableSecretKeyControl     (1 << 9)
+#define MFI_CTRL_PROP_DisableOnlineCtrlReset     (1 << 10)
+#define MFI_CTRL_PROP_AllowBootWithPinnedCache   (1 << 11)
+#define MFI_CTRL_PROP_DisableSpinDownHS          (1 << 12)
+#define MFI_CTRL_PROP_EnableJBOD                 (1 << 13)
+
+    uint8_t autoSnapVDSpace; /* % of source LD to be
+                              * reserved for auto snapshot
+                              * in snapshot repository, for
+                              * metadata and user data
+                              * 1=5%, 2=10%, 3=15% and so on
+                              */
+    uint8_t viewSpace;       /* snapshot writeable VIEWs
+                              * capacity as a % of source LD
+                              * capacity. 0=READ only
+                              * 1=5%, 2=10%, 3=15% and so on
+                              */
+    uint16_t spinDownTime;    /* # of idle minutes before device
+                               * is spun down (0=use FW defaults)
+                               */
+    uint8_t reserved[24];
+} __attribute__ ((packed));
+
+/* PCI information about the card. */
+struct mfi_info_pci {
+    uint16_t vendor;
+    uint16_t device;
+    uint16_t subvendor;
+    uint16_t subdevice;
+    uint8_t reserved[24];
+} __attribute__ ((packed));
+
+/* Host (front end) interface information */
+struct mfi_info_host {
+    uint8_t type;
+#define MFI_INFO_HOST_PCIX      0x01
+#define MFI_INFO_HOST_PCIE      0x02
+#define MFI_INFO_HOST_ISCSI     0x04
+#define MFI_INFO_HOST_SAS3G     0x08
+    uint8_t reserved[6];
+    uint8_t port_count;
+    uint64_t port_addr[8];
+} __attribute__ ((packed));
+
+/* Device (back end) interface information */
+struct mfi_info_device {
+    uint8_t type;
+#define MFI_INFO_DEV_SPI        0x01
+#define MFI_INFO_DEV_SAS3G      0x02
+#define MFI_INFO_DEV_SATA1      0x04
+#define MFI_INFO_DEV_SATA3G     0x08
+    uint8_t reserved[6];
+    uint8_t port_count;
+    uint64_t port_addr[8];
+} __attribute__ ((packed));
+
+/* Firmware component information */
+struct mfi_info_component {
+    char name[8];
+    char version[32];
+    char build_date[16];
+    char build_time[16];
+} __attribute__ ((packed));
+
+/* Controller default settings */
+struct mfi_defaults {
+    uint64_t sas_addr;
+    uint8_t phy_polarity;
+    uint8_t background_rate;
+    uint8_t stripe_size;
+    uint8_t flush_time;
+    uint8_t write_back;
+    uint8_t read_ahead;
+    uint8_t cache_when_bbu_bad;
+    uint8_t cached_io;
+    uint8_t smart_mode;
+    uint8_t alarm_disable;
+    uint8_t coercion;
+    uint8_t zrc_config;
+    uint8_t dirty_led_shows_drive_activity;
+    uint8_t bios_continue_on_error;
+    uint8_t spindown_mode;
+    uint8_t allowed_device_types;
+    uint8_t allow_mix_in_enclosure;
+    uint8_t allow_mix_in_ld;
+    uint8_t allow_sata_in_cluster;
+    uint8_t max_chained_enclosures;
+    uint8_t disable_ctrl_r;
+    uint8_t enable_web_bios;
+    uint8_t phy_polarity_split;
+    uint8_t direct_pd_mapping;
+    uint8_t bios_enumerate_lds;
+    uint8_t restored_hot_spare_on_insertion;
+    uint8_t expose_enclosure_devices;
+    uint8_t maintain_pd_fail_history;
+    uint8_t disable_puncture;
+    uint8_t zero_based_enumeration;
+    uint8_t disable_preboot_cli;
+    uint8_t show_drive_led_on_activity;
+    uint8_t cluster_disable;
+    uint8_t sas_disable;
+    uint8_t auto_detect_backplane;
+    uint8_t fde_only;
+    uint8_t delay_during_post;
+    uint8_t resv[19];
+} __attribute__ ((packed));
+
+/* Controller default settings */
+struct mfi_bios_data {
+    uint16_t boot_target_id;
+    uint8_t do_not_int_13;
+    uint8_t continue_on_error;
+    uint8_t verbose;
+    uint8_t geometry;
+    uint8_t expose_all_drives;
+    uint8_t reserved[56];
+    uint8_t check_sum;
+} __attribute__ ((packed));
+
+/* SAS (?) controller info, returned from MFI_DCMD_CTRL_GETINFO. */
+struct mfi_ctrl_info {
+    struct mfi_info_pci pci;
+    struct mfi_info_host host;
+    struct mfi_info_device device;
+
+    /* Firmware components that are present and active. */
+    uint32_t image_check_word;
+    uint32_t image_component_count;
+    struct mfi_info_component image_component[8];
+
+    /* Firmware components that have been flashed but are inactive */
+    uint32_t pending_image_component_count;
+    struct mfi_info_component pending_image_component[8];
+
+    uint8_t max_arms;
+    uint8_t max_spans;
+    uint8_t max_arrays;
+    uint8_t max_lds;
+    char product_name[80];
+    char serial_number[32];
+    uint32_t hw_present;
+#define MFI_INFO_HW_BBU         0x01
+#define MFI_INFO_HW_ALARM       0x02
+#define MFI_INFO_HW_NVRAM       0x04
+#define MFI_INFO_HW_UART        0x08
+#define MFI_INFO_HW_MEM         0x10
+#define MFI_INFO_HW_FLASH       0x20
+    uint32_t current_fw_time;
+    uint16_t max_cmds;
+    uint16_t max_sg_elements;
+    uint32_t max_request_size;
+    uint16_t lds_present;
+    uint16_t lds_degraded;
+    uint16_t lds_offline;
+    uint16_t pd_present;
+    uint16_t pd_disks_present;
+    uint16_t pd_disks_pred_failure;
+    uint16_t pd_disks_failed;
+    uint16_t nvram_size;
+    uint16_t memory_size;
+    uint16_t flash_size;
+    uint16_t ram_correctable_errors;
+    uint16_t ram_uncorrectable_errors;
+    uint8_t cluster_allowed;
+    uint8_t cluster_active;
+    uint16_t max_strips_per_io;
+
+    uint32_t raid_levels;
+#define MFI_INFO_RAID_0         0x01
+#define MFI_INFO_RAID_1         0x02
+#define MFI_INFO_RAID_5         0x04
+#define MFI_INFO_RAID_1E        0x08
+#define MFI_INFO_RAID_6         0x10
+
+    uint32_t adapter_ops;
+#define MFI_INFO_AOPS_RBLD_RATE         0x0001
+#define MFI_INFO_AOPS_CC_RATE           0x0002
+#define MFI_INFO_AOPS_BGI_RATE          0x0004
+#define MFI_INFO_AOPS_RECON_RATE        0x0008
+#define MFI_INFO_AOPS_PATROL_RATE       0x0010
+#define MFI_INFO_AOPS_ALARM_CONTROL     0x0020
+#define MFI_INFO_AOPS_CLUSTER_SUPPORTED 0x0040
+#define MFI_INFO_AOPS_BBU               0x0080
+#define MFI_INFO_AOPS_SPANNING_ALLOWED  0x0100
+#define MFI_INFO_AOPS_DEDICATED_SPARES  0x0200
+#define MFI_INFO_AOPS_REVERTIBLE_SPARES 0x0400
+#define MFI_INFO_AOPS_FOREIGN_IMPORT    0x0800
+#define MFI_INFO_AOPS_SELF_DIAGNOSTIC   0x1000
+#define MFI_INFO_AOPS_MIXED_ARRAY       0x2000
+#define MFI_INFO_AOPS_GLOBAL_SPARES     0x4000
+
+    uint32_t ld_ops;
+#define MFI_INFO_LDOPS_READ_POLICY      0x01
+#define MFI_INFO_LDOPS_WRITE_POLICY     0x02
+#define MFI_INFO_LDOPS_IO_POLICY        0x04
+#define MFI_INFO_LDOPS_ACCESS_POLICY    0x08
+#define MFI_INFO_LDOPS_DISK_CACHE_POLICY 0x10
+
+    struct {
+        uint8_t min;
+        uint8_t max;
+        uint8_t reserved[2];
+    } __attribute__ ((packed)) stripe_sz_ops;
+
+    uint32_t pd_ops;
+#define MFI_INFO_PDOPS_FORCE_ONLINE     0x01
+#define MFI_INFO_PDOPS_FORCE_OFFLINE    0x02
+#define MFI_INFO_PDOPS_FORCE_REBUILD    0x04
+
+    uint32_t pd_mix_support;
+#define MFI_INFO_PDMIX_SAS              0x01
+#define MFI_INFO_PDMIX_SATA             0x02
+#define MFI_INFO_PDMIX_ENCL             0x04
+#define MFI_INFO_PDMIX_LD               0x08
+#define MFI_INFO_PDMIX_SATA_CLUSTER     0x10
+
+    uint8_t ecc_bucket_count;
+    uint8_t reserved2[11];
+    struct mfi_ctrl_props properties;
+    char package_version[0x60];
+    uint8_t pad[0x800 - 0x6a0];
+} __attribute__ ((packed));
+
+/* keep track of an event. */
+union mfi_evt {
+    struct {
+        uint16_t locale;
+        uint8_t reserved;
+        int8_t class;
+    } members;
+    uint32_t word;
+} __attribute__ ((packed));
+
+/* event log state. */
+struct mfi_evt_log_state {
+    uint32_t newest_seq_num;
+    uint32_t oldest_seq_num;
+    uint32_t clear_seq_num;
+    uint32_t shutdown_seq_num;
+    uint32_t boot_seq_num;
+} __attribute__ ((packed));
+
+struct mfi_progress {
+    uint16_t progress;
+    uint16_t elapsed_seconds;
+} __attribute__ ((packed));
+
+struct mfi_evt_ld {
+    uint16_t target_id;
+    uint8_t ld_index;
+    uint8_t reserved;
+} __attribute__ ((packed));
+
+struct mfi_evt_pd {
+    uint16_t device_id;
+    uint8_t enclosure_index;
+    uint8_t slot_number;
+} __attribute__ ((packed));
+
+/* event detail, returned from MFI_DCMD_CTRL_EVENT_WAIT. */
+struct mfi_evt_detail {
+    uint32_t seq;
+    uint32_t time;
+    uint32_t code;
+    union mfi_evt class;
+    uint8_t arg_type;
+    uint8_t reserved1[15];
+
+    union {
+        struct {
+            struct mfi_evt_pd pd;
+            uint8_t cdb_len;
+            uint8_t sense_len;
+            uint8_t reserved[2];
+            uint8_t cdb[16];
+            uint8_t sense[64];
+        } cdb_sense;
+
+        struct mfi_evt_ld ld;
+
+        struct {
+            struct mfi_evt_ld ld;
+            uint64_t count;
+        } ld_count;
+
+        struct {
+            uint64_t lba;
+            struct mfi_evt_ld ld;
+        } ld_lba;
+
+        struct {
+            struct mfi_evt_ld ld;
+            uint32_t pre_owner;
+            uint32_t new_owner;
+        } ld_owner;
+
+        struct {
+            uint64_t ld_lba;
+            uint64_t pd_lba;
+            struct mfi_evt_ld ld;
+            struct mfi_evt_pd pd;
+        } ld_lba_pd_lba;
+
+        struct {
+            struct mfi_evt_ld ld;
+            struct mfi_progress prog;
+        } ld_prog;
+
+        struct {
+            struct mfi_evt_ld ld;
+            uint32_t prev_state;
+            uint32_t new_state;
+        } ld_state;
+
+        struct {
+            uint64_t strip;
+            struct mfi_evt_ld ld;
+        } ld_strip;
+
+        struct mfi_evt_pd pd;
+
+        struct {
+            struct mfi_evt_pd pd;
+            uint32_t err;
+        } pd_err;
+
+        struct {
+            uint64_t lba;
+            struct mfi_evt_pd pd;
+        } pd_lba;
+
+        struct {
+            uint64_t lba;
+            struct mfi_evt_pd pd;
+            struct mfi_evt_ld ld;
+        } pd_lba_ld;
+
+        struct {
+            struct mfi_evt_pd pd;
+            struct mfi_progress prog;
+        } pd_prog;
+
+        struct {
+            struct mfi_evt_pd ld;
+            uint32_t prev_state;
+            uint32_t new_state;
+        } pd_state;
+
+        struct {
+            uint16_t venderId;
+            uint16_t deviceId;
+            uint16_t subVenderId;
+            uint16_t subDeviceId;
+        } pci;
+
+        uint32_t rate;
+
+        char str[96];
+
+        struct {
+            uint32_t rtc;
+            uint16_t elapsedSeconds;
+        } time;
+
+        struct {
+            uint32_t ecar;
+            uint32_t elog;
+            char str[64];
+        } ecc;
+
+        uint8_t b[96];
+        uint16_t s[48];
+        uint32_t w[24];
+        uint64_t d[12];
+    } args;
+
+    char description[128];
+} __attribute__ ((packed));
+
+struct mfi_evt_list {
+    uint32_t count;
+    uint32_t reserved;
+    struct mfi_evt_detail event[1];
+} __attribute__ ((packed));
+
+union mfi_pd_ref {
+    struct {
+        uint16_t device_id;
+        uint16_t seq_num;
+    } v;
+    uint32_t ref;
+} __attribute__ ((packed));
+
+union mfi_pd_ddf_type {
+    struct {
+        uint16_t pd_type;
+#define MFI_PD_DDF_TYPE_FORCED_PD_GUID (1 << 0)
+#define MFI_PD_DDF_TYPE_IN_VD          (1 << 1)
+#define MFI_PD_DDF_TYPE_IS_GLOBAL_SPARE (1 << 2)
+#define MFI_PD_DDF_TYPE_IS_SPARE        (1 << 3)
+#define MFI_PD_DDF_TYPE_IS_FOREIGN      (1 << 4)
+#define MFI_PD_DDF_TYPE_INTF_SPI        (1 << 12)
+#define MFI_PD_DDF_TYPE_INTF_SAS        (1 << 13)
+#define MFI_PD_DDF_TYPE_INTF_SATA1      (1 << 14)
+#define MFI_PD_DDF_TYPE_INTF_SATA3G     (1 << 15)
+        uint16_t reserved;
+    } ddf;
+    struct {
+        uint32_t reserved;
+    } non_disk;
+    uint32_t type;
+} __attribute__ ((packed));
+
+struct mfi_pd_progress {
+    uint32_t active;
+#define PD_PROGRESS_ACTIVE_REBUILD (1 << 0)
+#define PD_PROGRESS_ACTIVE_PATROL  (1 << 1)
+#define PD_PROGRESS_ACTIVE_CLEAR   (1 << 2)
+    struct mfi_progress rbld;
+    struct mfi_progress patrol;
+    struct mfi_progress clear;
+    struct mfi_progress reserved[4];
+} __attribute__ ((packed));
+
+struct mfi_pd_info {
+    union mfi_pd_ref ref;
+    uint8_t inquiry_data[96];
+    uint8_t vpd_page83[64];
+    uint8_t not_supported;
+    uint8_t scsi_dev_type;
+    uint8_t connected_port_bitmap;
+    uint8_t device_speed;
+    uint32_t media_err_count;
+    uint32_t other_err_count;
+    uint32_t pred_fail_count;
+    uint32_t last_pred_fail_event_seq_num;
+    uint16_t fw_state;
+    uint8_t disable_for_removal;
+    uint8_t link_speed;
+    union mfi_pd_ddf_type state;
+    struct {
+        uint8_t count;
+        uint8_t is_path_broken;
+        uint8_t reserved[6];
+        uint64_t sas_addr[4];
+    } path_info;
+    uint64_t raw_size;
+    uint64_t non_coerced_size;
+    uint64_t coerced_size;
+    uint16_t encl_device_id;
+    uint8_t encl_index;
+    uint8_t slot_number;
+    struct mfi_pd_progress prog_info;
+    uint8_t bad_block_table_full;
+    uint8_t unusable_in_current_config;
+    uint8_t vpd_page83_ext[64];
+    uint8_t reserved[512-358];
+} __attribute__ ((packed));
+
+struct mfi_pd_address {
+    uint16_t device_id;
+    uint16_t encl_device_id;
+    uint8_t encl_index;
+    uint8_t slot_number;
+    uint8_t scsi_dev_type;
+    uint8_t connect_port_bitmap;
+    uint64_t sas_addr[2];
+} __attribute__ ((packed));
+
+#define MFI_MAX_SYS_PDS 240
+struct mfi_pd_list {
+    uint32_t size;
+    uint32_t count;
+    struct mfi_pd_address addr[MFI_MAX_SYS_PDS];
+} __attribute__ ((packed));
+
+union mfi_ld_ref {
+    struct {
+        uint8_t target_id;
+        uint8_t reserved;
+        uint16_t seq;
+    } v;
+    uint32_t ref;
+} __attribute__ ((packed));
+
+struct mfi_ld_list {
+    uint32_t ld_count;
+    uint32_t reserved1;
+    struct {
+        union mfi_ld_ref ld;
+        uint8_t state;
+        uint8_t reserved2[3];
+        uint64_t size;
+    } ld_list[MFI_MAX_LD];
+} __attribute__ ((packed));
+
+enum mfi_ld_access {
+    MFI_LD_ACCESS_RW =          0,
+    MFI_LD_ACCSSS_RO =          2,
+    MFI_LD_ACCESS_BLOCKED =     3,
+};
+#define MFI_LD_ACCESS_MASK      3
+
+enum mfi_ld_state {
+    MFI_LD_STATE_OFFLINE =              0,
+    MFI_LD_STATE_PARTIALLY_DEGRADED =   1,
+    MFI_LD_STATE_DEGRADED =             2,
+    MFI_LD_STATE_OPTIMAL =              3
+};
+
+enum mfi_syspd_state {
+    MFI_PD_STATE_UNCONFIGURED_GOOD =    0x00,
+    MFI_PD_STATE_UNCONFIGURED_BAD =     0x01,
+    MFI_PD_STATE_HOT_SPARE =            0x02,
+    MFI_PD_STATE_OFFLINE =              0x10,
+    MFI_PD_STATE_FAILED =               0x11,
+    MFI_PD_STATE_REBUILD =              0x14,
+    MFI_PD_STATE_ONLINE =               0x18,
+    MFI_PD_STATE_COPYBACK =             0x20,
+    MFI_PD_STATE_SYSTEM =               0x40
+};
+
+struct mfi_ld_props {
+    union mfi_ld_ref ld;
+    char name[16];
+    uint8_t default_cache_policy;
+    uint8_t access_policy;
+    uint8_t disk_cache_policy;
+    uint8_t current_cache_policy;
+    uint8_t no_bgi;
+    uint8_t reserved[7];
+} __attribute__ ((packed));
+
+struct mfi_ld_params {
+    uint8_t primary_raid_level;
+    uint8_t raid_level_qualifier;
+    uint8_t secondary_raid_level;
+    uint8_t stripe_size;
+    uint8_t num_drives;
+    uint8_t span_depth;
+    uint8_t state;
+    uint8_t init_state;
+    uint8_t is_consistent;
+    uint8_t reserved[23];
+} __attribute__ ((packed));
+
+struct mfi_ld_progress {
+    uint32_t            active;
+#define MFI_LD_PROGRESS_CC      (1<<0)
+#define MFI_LD_PROGRESS_BGI     (1<<1)
+#define MFI_LD_PROGRESS_FGI     (1<<2)
+#define MFI_LD_PORGRESS_RECON   (1<<3)
+    struct mfi_progress cc;
+    struct mfi_progress bgi;
+    struct mfi_progress fgi;
+    struct mfi_progress recon;
+    struct mfi_progress reserved[4];
+} __attribute__ ((packed));
+
+struct mfi_span {
+    uint64_t start_block;
+    uint64_t num_blocks;
+    uint16_t array_ref;
+    uint8_t reserved[6];
+} __attribute__ ((packed));
+
+#define MFI_MAX_SPAN_DEPTH      8
+struct mfi_ld_config {
+    struct mfi_ld_props properties;
+    struct mfi_ld_params params;
+    struct mfi_span span[MFI_MAX_SPAN_DEPTH];
+} __attribute__ ((packed));
+
+struct mfi_ld_info {
+    struct mfi_ld_config ld_config;
+    uint64_t size;
+    struct mfi_ld_progress progress;
+    uint16_t cluster_owner;
+    uint8_t reconstruct_active;
+    uint8_t reserved1[1];
+    uint8_t vpd_page83[64];
+    uint8_t reserved2[16];
+} __attribute__ ((packed));
+
+union mfi_spare_type {
+    uint8_t flags;
+#define MFI_SPARE_IS_DEDICATED (1 << 0)
+#define MFI_SPARE_IS_REVERTABLE (1 << 1)
+#define MFI_SPARE_IS_ENCL_AFFINITY (1 << 2)
+    uint8_t type;
+} __attribute__ ((packed));
+
+#define MFI_MAX_ARRAYS 16
+struct mfi_spare {
+    union mfi_pd_ref ref;
+    union mfi_spare_type spare_type;
+    uint8_t reserved[2];
+    uint8_t array_count;
+    uint16_t array_refd[MFI_MAX_ARRAYS];
+} __attribute__ ((packed));
+
+#define MFI_MAX_ROW_SIZE 32
+struct mfi_array {
+    uint64_t size;
+    uint8_t num_drives;
+    uint8_t reserved;
+    uint16_t array_ref;
+    uint8_t pad[20];
+    struct {
+        union mfi_pd_ref ref;
+        uint16_t fw_state; /* enum mfi_syspd_state */
+        struct {
+            uint8_t pd;
+            uint8_t slot;
+        } encl;
+    } pd[MFI_MAX_ROW_SIZE];
+} __attribute__ ((packed));
+
+struct mfi_config_data {
+    uint32_t size;
+    uint16_t array_count;
+    uint16_t array_size;
+    uint16_t log_drv_count;
+    uint16_t log_drv_size;
+    uint16_t spares_count;
+    uint16_t spares_size;
+    uint8_t reserved[16];
+    /*
+      struct mfi_array  array[];
+      struct mfi_ld_config ld[];
+      struct mfi_spare  spare[];
+    */
+} __attribute__ ((packed));
+
+#define MFI_SCSI_MAX_TARGETS  128
+#define MFI_SCSI_MAX_LUNS       8
+#define MFI_SCSI_INITIATOR_ID 255
+#define MFI_SCSI_MAX_CMDS       8
+#define MFI_SCSI_MAX_CDB_LEN   16
+
+#endif /* MFI_REG_H */
diff --git a/hw/omap.h b/hw/omap.h
index 3d98941b72..413851bc34 100644
--- a/hw/omap.h
+++ b/hw/omap.h
@@ -942,13 +942,7 @@ struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sysmem,
                 unsigned long sdram_size,
                 const char *core);
 
-# if TARGET_PHYS_ADDR_BITS == 32
-#  define OMAP_FMT_plx "%#08x"
-# elif TARGET_PHYS_ADDR_BITS == 64
-#  define OMAP_FMT_plx "%#08" PRIx64
-# else
-#  error TARGET_PHYS_ADDR_BITS undefined
-# endif
+#define OMAP_FMT_plx "%#08" TARGET_PRIxPHYS
 
 uint32_t omap_badwidth_read8(void *opaque, target_phys_addr_t addr);
 void omap_badwidth_write8(void *opaque, target_phys_addr_t addr,
diff --git a/hw/pci-stub.c b/hw/pci-stub.c
index 134c4484b6..e083191529 100644
--- a/hw/pci-stub.c
+++ b/hw/pci-stub.c
@@ -34,6 +34,21 @@ static void pci_error_message(Monitor *mon)
     monitor_printf(mon, "PCI devices not supported\n");
 }
 
+void pci_register_bar(PCIDevice *pci_dev, int region_num,
+                      uint8_t type, MemoryRegion *memory)
+{
+}
+
+const VMStateDescription vmstate_pci_device = {
+    .name = "PCIDeviceStub",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 int do_pcie_aer_inject_error(Monitor *mon,
                              const QDict *qdict, QObject **ret_data)
 {
diff --git a/hw/pci_ids.h b/hw/pci_ids.h
index 649e6b379d..301bf1cd86 100644
--- a/hw/pci_ids.h
+++ b/hw/pci_ids.h
@@ -15,6 +15,7 @@
 
 #define PCI_CLASS_STORAGE_SCSI           0x0100
 #define PCI_CLASS_STORAGE_IDE            0x0101
+#define PCI_CLASS_STORAGE_RAID           0x0104
 #define PCI_CLASS_STORAGE_SATA           0x0106
 #define PCI_CLASS_STORAGE_OTHER          0x0180
 
@@ -47,6 +48,7 @@
 
 #define PCI_VENDOR_ID_LSI_LOGIC          0x1000
 #define PCI_DEVICE_ID_LSI_53C895A        0x0012
+#define PCI_DEVICE_ID_LSI_SAS1078        0x0060
 
 #define PCI_VENDOR_ID_DEC                0x1011
 #define PCI_DEVICE_ID_DEC_21154          0x0026
@@ -57,6 +59,7 @@
 
 #define PCI_VENDOR_ID_AMD                0x1022
 #define PCI_DEVICE_ID_AMD_LANCE          0x2000
+#define PCI_DEVICE_ID_AMD_SCSI           0x2020
 
 #define PCI_VENDOR_ID_TI                 0x104c
 
diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c
index 0b894620c9..3571cf3017 100644
--- a/hw/qdev-properties.c
+++ b/hw/qdev-properties.c
@@ -1123,7 +1123,7 @@ void qdev_prop_set_uint64(DeviceState *dev, const char *name, uint64_t value)
     assert_no_error(errp);
 }
 
-void qdev_prop_set_string(DeviceState *dev, const char *name, char *value)
+void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value)
 {
     Error *errp = NULL;
     object_property_set_str(OBJECT(dev), value, name, &errp);
diff --git a/hw/qdev.h b/hw/qdev.h
index f4683dc771..a0770b085a 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -316,7 +316,7 @@ void qdev_prop_set_uint16(DeviceState *dev, const char *name, uint16_t value);
 void qdev_prop_set_uint32(DeviceState *dev, const char *name, uint32_t value);
 void qdev_prop_set_int32(DeviceState *dev, const char *name, int32_t value);
 void qdev_prop_set_uint64(DeviceState *dev, const char *name, uint64_t value);
-void qdev_prop_set_string(DeviceState *dev, const char *name, char *value);
+void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value);
 void qdev_prop_set_chr(DeviceState *dev, const char *name, CharDriverState *value);
 void qdev_prop_set_netdev(DeviceState *dev, const char *name, VLANClientState *value);
 void qdev_prop_set_vlan(DeviceState *dev, const char *name, VLANState *value);
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index 7b150475f4..436b015c64 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -1785,7 +1785,7 @@ static void rtl8139_transfer_frame(RTL8139State *s, uint8_t *buf, int size,
         if (iov) {
             buf2_size = iov_size(iov, 3);
             buf2 = g_malloc(buf2_size);
-            iov_to_buf(iov, 3, buf2, 0, buf2_size);
+            iov_to_buf(iov, 3, 0, buf2, buf2_size);
             buf = buf2;
         }
 
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 14e2f730b8..dc7406389d 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -734,20 +734,16 @@ static int scsi_req_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
     switch (buf[0] >> 5) {
     case 0:
         cmd->xfer = buf[4];
-        cmd->len = 6;
         break;
     case 1:
     case 2:
         cmd->xfer = lduw_be_p(&buf[7]);
-        cmd->len = 10;
         break;
     case 4:
         cmd->xfer = ldl_be_p(&buf[10]) & 0xffffffffULL;
-        cmd->len = 16;
         break;
     case 5:
         cmd->xfer = ldl_be_p(&buf[6]) & 0xffffffffULL;
-        cmd->len = 12;
         break;
     default:
         return -1;
@@ -771,11 +767,9 @@ static int scsi_req_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
     case SYNCHRONIZE_CACHE_16:
     case LOCATE_16:
     case LOCK_UNLOCK_CACHE:
-    case LOAD_UNLOAD:
     case SET_CD_SPEED:
     case SET_LIMITS:
     case WRITE_LONG_10:
-    case MOVE_MEDIUM:
     case UPDATE_BLOCK:
     case RESERVE_TRACK:
     case SET_READ_AHEAD:
@@ -885,7 +879,6 @@ static int scsi_req_stream_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *bu
     case READ_REVERSE:
     case RECOVER_BUFFERED_DATA:
     case WRITE_6:
-        cmd->len = 6;
         cmd->xfer = buf[4] | (buf[3] << 8) | (buf[2] << 16);
         if (buf[1] & 0x01) { /* fixed */
             cmd->xfer *= dev->blocksize;
@@ -895,22 +888,34 @@ static int scsi_req_stream_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *bu
     case READ_REVERSE_16:
     case VERIFY_16:
     case WRITE_16:
-        cmd->len = 16;
         cmd->xfer = buf[14] | (buf[13] << 8) | (buf[12] << 16);
         if (buf[1] & 0x01) { /* fixed */
             cmd->xfer *= dev->blocksize;
         }
         break;
     case REWIND:
-    case START_STOP:
-        cmd->len = 6;
+    case LOAD_UNLOAD:
         cmd->xfer = 0;
         break;
     case SPACE_16:
         cmd->xfer = buf[13] | (buf[12] << 8);
         break;
     case READ_POSITION:
-        cmd->xfer = buf[8] | (buf[7] << 8);
+        switch (buf[1] & 0x1f) /* operation code */ {
+        case SHORT_FORM_BLOCK_ID:
+        case SHORT_FORM_VENDOR_SPECIFIC:
+            cmd->xfer = 20;
+            break;
+        case LONG_FORM:
+            cmd->xfer = 32;
+            break;
+        case EXTENDED_FORM:
+            cmd->xfer = buf[8] | (buf[7] << 8);
+            break;
+        default:
+            return -1;
+        }
+
         break;
     case FORMAT_UNIT:
         cmd->xfer = buf[4] | (buf[3] << 8);
@@ -922,6 +927,29 @@ static int scsi_req_stream_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *bu
     return 0;
 }
 
+static int scsi_req_medium_changer_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
+{
+    switch (buf[0]) {
+    /* medium changer commands */
+    case EXCHANGE_MEDIUM:
+    case INITIALIZE_ELEMENT_STATUS:
+    case INITIALIZE_ELEMENT_STATUS_WITH_RANGE:
+    case MOVE_MEDIUM:
+    case POSITION_TO_ELEMENT:
+        cmd->xfer = 0;
+        break;
+    case READ_ELEMENT_STATUS:
+        cmd->xfer = buf[9] | (buf[8] << 8) | (buf[7] << 16);
+        break;
+
+    /* generic commands */
+    default:
+        return scsi_req_length(cmd, dev, buf);
+    }
+    return 0;
+}
+
+
 static void scsi_cmd_xfer_mode(SCSICommand *cmd)
 {
     if (!cmd->xfer) {
@@ -1001,11 +1029,36 @@ int scsi_req_parse(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
 {
     int rc;
 
-    if (dev->type == TYPE_TAPE) {
+    switch (buf[0] >> 5) {
+    case 0:
+        cmd->len = 6;
+        break;
+    case 1:
+    case 2:
+        cmd->len = 10;
+        break;
+    case 4:
+        cmd->len = 16;
+        break;
+    case 5:
+        cmd->len = 12;
+        break;
+    default:
+        return -1;
+    }
+
+    switch (dev->type) {
+    case TYPE_TAPE:
         rc = scsi_req_stream_length(cmd, dev, buf);
-    } else {
+        break;
+    case TYPE_MEDIUM_CHANGER:
+        rc = scsi_req_medium_changer_length(cmd, dev, buf);
+        break;
+    default:
         rc = scsi_req_length(cmd, dev, buf);
+        break;
     }
+
     if (rc != 0)
         return rc;
 
@@ -1183,7 +1236,8 @@ static const char *scsi_command_name(uint8_t cmd)
         [ REQUEST_SENSE            ] = "REQUEST_SENSE",
         [ FORMAT_UNIT              ] = "FORMAT_UNIT",
         [ READ_BLOCK_LIMITS        ] = "READ_BLOCK_LIMITS",
-        [ REASSIGN_BLOCKS          ] = "REASSIGN_BLOCKS",
+        [ REASSIGN_BLOCKS          ] = "REASSIGN_BLOCKS/INITIALIZE ELEMENT STATUS",
+        /* LOAD_UNLOAD and INITIALIZE_ELEMENT_STATUS use the same operation code */
         [ READ_6                   ] = "READ_6",
         [ WRITE_6                  ] = "WRITE_6",
         [ SET_CAPACITY             ] = "SET_CAPACITY",
@@ -1200,14 +1254,16 @@ static const char *scsi_command_name(uint8_t cmd)
         [ COPY                     ] = "COPY",
         [ ERASE                    ] = "ERASE",
         [ MODE_SENSE               ] = "MODE_SENSE",
-        [ START_STOP               ] = "START_STOP",
+        [ START_STOP               ] = "START_STOP/LOAD_UNLOAD",
+        /* LOAD_UNLOAD and START_STOP use the same operation code */
         [ RECEIVE_DIAGNOSTIC       ] = "RECEIVE_DIAGNOSTIC",
         [ SEND_DIAGNOSTIC          ] = "SEND_DIAGNOSTIC",
         [ ALLOW_MEDIUM_REMOVAL     ] = "ALLOW_MEDIUM_REMOVAL",
         [ READ_CAPACITY_10         ] = "READ_CAPACITY_10",
         [ READ_10                  ] = "READ_10",
         [ WRITE_10                 ] = "WRITE_10",
-        [ SEEK_10                  ] = "SEEK_10",
+        [ SEEK_10                  ] = "SEEK_10/POSITION_TO_ELEMENT",
+        /* SEEK_10 and POSITION_TO_ELEMENT use the same operation code */
         [ WRITE_VERIFY_10          ] = "WRITE_VERIFY_10",
         [ VERIFY_10                ] = "VERIFY_10",
         [ SEARCH_HIGH              ] = "SEARCH_HIGH",
@@ -1218,7 +1274,8 @@ static const char *scsi_command_name(uint8_t cmd)
         /* READ_POSITION and PRE_FETCH use the same operation code */
         [ SYNCHRONIZE_CACHE        ] = "SYNCHRONIZE_CACHE",
         [ LOCK_UNLOCK_CACHE        ] = "LOCK_UNLOCK_CACHE",
-        [ READ_DEFECT_DATA         ] = "READ_DEFECT_DATA",
+        [ READ_DEFECT_DATA         ] = "READ_DEFECT_DATA/INITIALIZE_ELEMENT_STATUS_WITH_RANGE",
+        /* READ_DEFECT_DATA and INITIALIZE_ELEMENT_STATUS_WITH_RANGE use the same operation code */
         [ MEDIUM_SCAN              ] = "MEDIUM_SCAN",
         [ COMPARE                  ] = "COMPARE",
         [ COPY_VERIFY              ] = "COPY_VERIFY",
@@ -1263,6 +1320,7 @@ static const char *scsi_command_name(uint8_t cmd)
         [ REPORT_LUNS              ] = "REPORT_LUNS",
         [ BLANK                    ] = "BLANK",
         [ MOVE_MEDIUM              ] = "MOVE_MEDIUM",
+        [ EXCHANGE_MEDIUM          ] = "EXCHANGE MEDIUM",
         [ LOAD_UNLOAD              ] = "LOAD_UNLOAD",
         [ READ_12                  ] = "READ_12",
         [ WRITE_12                 ] = "WRITE_12",
@@ -1296,6 +1354,7 @@ static const char *scsi_command_name(uint8_t cmd)
 
 SCSIRequest *scsi_req_ref(SCSIRequest *req)
 {
+    assert(req->refcount > 0);
     req->refcount++;
     return req;
 }
@@ -1304,6 +1363,10 @@ void scsi_req_unref(SCSIRequest *req)
 {
     assert(req->refcount > 0);
     if (--req->refcount == 0) {
+        SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, req->dev->qdev.parent_bus);
+        if (bus->info->free_request && req->hba_private) {
+            bus->info->free_request(bus, req->hba_private);
+        }
         if (req->ops->free_req) {
             req->ops->free_req(req);
         }
@@ -1389,7 +1452,7 @@ void scsi_req_complete(SCSIRequest *req, int status)
     assert(req->status == -1);
     req->status = status;
 
-    assert(req->sense_len < sizeof(req->sense));
+    assert(req->sense_len <= sizeof(req->sense));
     if (status == GOOD) {
         req->sense_len = 0;
     }
diff --git a/hw/scsi-defs.h b/hw/scsi-defs.h
index 219c84dfb1..8a73f745ba 100644
--- a/hw/scsi-defs.h
+++ b/hw/scsi-defs.h
@@ -29,6 +29,7 @@
 #define REQUEST_SENSE         0x03
 #define FORMAT_UNIT           0x04
 #define READ_BLOCK_LIMITS     0x05
+#define INITIALIZE_ELEMENT_STATUS 0x07
 #define REASSIGN_BLOCKS       0x07
 #define READ_6                0x08
 #define WRITE_6               0x0a
@@ -44,6 +45,7 @@
 #define COPY                  0x18
 #define ERASE                 0x19
 #define MODE_SENSE            0x1a
+#define LOAD_UNLOAD           0x1b
 #define START_STOP            0x1b
 #define RECEIVE_DIAGNOSTIC    0x1c
 #define SEND_DIAGNOSTIC       0x1d
@@ -53,6 +55,7 @@
 #define WRITE_10              0x2a
 #define SEEK_10               0x2b
 #define LOCATE_10             0x2b
+#define POSITION_TO_ELEMENT   0x2b
 #define WRITE_VERIFY_10       0x2e
 #define VERIFY_10             0x2f
 #define SEARCH_HIGH           0x30
@@ -63,6 +66,7 @@
 #define READ_POSITION         0x34
 #define SYNCHRONIZE_CACHE     0x35
 #define LOCK_UNLOCK_CACHE     0x36
+#define INITIALIZE_ELEMENT_STATUS_WITH_RANGE 0x37
 #define READ_DEFECT_DATA      0x37
 #define MEDIUM_SCAN           0x38
 #define COMPARE               0x39
@@ -82,6 +86,7 @@
 #define GET_EVENT_STATUS_NOTIFICATION 0x4a
 #define LOG_SELECT            0x4c
 #define LOG_SENSE             0x4d
+#define READ_DISC_INFORMATION 0x51
 #define RESERVE_TRACK         0x53
 #define MODE_SELECT_10        0x55
 #define RESERVE_10            0x56
@@ -116,7 +121,7 @@
 #define MAINTENANCE_IN        0xa3
 #define MAINTENANCE_OUT       0xa4
 #define MOVE_MEDIUM           0xa5
-#define LOAD_UNLOAD           0xa6
+#define EXCHANGE_MEDIUM       0xa6
 #define SET_READ_AHEAD        0xa7
 #define READ_12               0xa8
 #define WRITE_12              0xaa
@@ -142,6 +147,14 @@
 #define SAI_READ_CAPACITY_16  0x10
 
 /*
+ * READ POSITION service action codes
+ */
+#define SHORT_FORM_BLOCK_ID  0x00
+#define SHORT_FORM_VENDOR_SPECIFIC 0x01
+#define LONG_FORM            0x06
+#define EXTENDED_FORM        0x08
+
+/*
  *  SAM Status codes
  */
 
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index ae2519458c..34336b1b58 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -67,6 +67,7 @@ struct SCSIDiskState
     bool media_changed;
     bool media_event;
     bool eject_request;
+    uint64_t wwn;
     QEMUBH *bh;
     char *version;
     char *serial;
@@ -522,6 +523,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
     int buflen = 0;
+    int start;
 
     if (req->cmd.buf[1] & 0x1) {
         /* Vital product data */
@@ -530,14 +532,14 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
         outbuf[buflen++] = s->qdev.type & 0x1f;
         outbuf[buflen++] = page_code ; // this page
         outbuf[buflen++] = 0x00;
+        outbuf[buflen++] = 0x00;
+        start = buflen;
 
         switch (page_code) {
         case 0x00: /* Supported page codes, mandatory */
         {
-            int pages;
             DPRINTF("Inquiry EVPD[Supported pages] "
                     "buffer size %zd\n", req->cmd.xfer);
-            pages = buflen++;
             outbuf[buflen++] = 0x00; // list of supported pages (this page)
             if (s->serial) {
                 outbuf[buflen++] = 0x80; // unit serial number
@@ -547,7 +549,6 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
                 outbuf[buflen++] = 0xb0; // block limits
                 outbuf[buflen++] = 0xb2; // thin provisioning
             }
-            outbuf[pages] = buflen - pages - 1; // number of pages
             break;
         }
         case 0x80: /* Device serial number, optional */
@@ -566,7 +567,6 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
 
             DPRINTF("Inquiry EVPD[Serial number] "
                     "buffer size %zd\n", req->cmd.xfer);
-            outbuf[buflen++] = l;
             memcpy(outbuf+buflen, s->serial, l);
             buflen += l;
             break;
@@ -584,14 +584,21 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             DPRINTF("Inquiry EVPD[Device identification] "
                     "buffer size %zd\n", req->cmd.xfer);
 
-            outbuf[buflen++] = 4 + id_len;
             outbuf[buflen++] = 0x2; // ASCII
             outbuf[buflen++] = 0;   // not officially assigned
             outbuf[buflen++] = 0;   // reserved
             outbuf[buflen++] = id_len; // length of data following
-
             memcpy(outbuf+buflen, str, id_len);
             buflen += id_len;
+
+            if (s->wwn) {
+                outbuf[buflen++] = 0x1; // Binary
+                outbuf[buflen++] = 0x3; // NAA
+                outbuf[buflen++] = 0;   // reserved
+                outbuf[buflen++] = 8;
+                stq_be_p(&outbuf[buflen], s->wwn);
+                buflen += 8;
+            }
             break;
         }
         case 0xb0: /* block limits */
@@ -609,8 +616,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
                 return -1;
             }
             /* required VPD size with unmap support */
-            outbuf[3] = buflen = 0x3c;
-
+            buflen = 0x40;
             memset(outbuf + 4, 0, buflen - 4);
 
             /* optimal transfer length granularity */
@@ -632,7 +638,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
         }
         case 0xb2: /* thin provisioning */
         {
-            outbuf[3] = buflen = 8;
+            buflen = 8;
             outbuf[4] = 0;
             outbuf[5] = 0x60; /* write_same 10/16 supported */
             outbuf[6] = s->qdev.conf.discard_granularity ? 2 : 1;
@@ -643,6 +649,8 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             return -1;
         }
         /* done with EVPD */
+        assert(buflen - start <= 255);
+        outbuf[start - 1] = buflen - start;
         return buflen;
     }
 
@@ -716,6 +724,39 @@ static inline bool media_is_cd(SCSIDiskState *s)
     return nb_sectors <= CD_MAX_SECTORS;
 }
 
+static int scsi_read_disc_information(SCSIDiskState *s, SCSIDiskReq *r,
+                                      uint8_t *outbuf)
+{
+    uint8_t type = r->req.cmd.buf[1] & 7;
+
+    if (s->qdev.type != TYPE_ROM) {
+        return -1;
+    }
+
+    /* Types 1/2 are only defined for Blu-Ray.  */
+    if (type != 0) {
+        scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
+        return -1;
+    }
+
+    memset(outbuf, 0, 34);
+    outbuf[1] = 32;
+    outbuf[2] = 0xe; /* last session complete, disc finalized */
+    outbuf[3] = 1;   /* first track on disc */
+    outbuf[4] = 1;   /* # of sessions */
+    outbuf[5] = 1;   /* first track of last session */
+    outbuf[6] = 1;   /* last track of last session */
+    outbuf[7] = 0x20; /* unrestricted use */
+    outbuf[8] = 0x00; /* CD-ROM or DVD-ROM */
+    /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
+    /* 12-23: not meaningful for CD-ROM or DVD-ROM */
+    /* 24-31: disc bar code */
+    /* 32: disc application code */
+    /* 33: number of OPC tables */
+
+    return 34;
+}
+
 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
                                    uint8_t *outbuf)
 {
@@ -1355,6 +1396,12 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
             goto illegal_request;
         }
         break;
+    case READ_DISC_INFORMATION:
+        buflen = scsi_read_disc_information(s, r, outbuf);
+        if (buflen < 0) {
+            goto illegal_request;
+        }
+        break;
     case READ_DVD_STRUCTURE:
         buflen = scsi_read_dvd_structure(s, r, outbuf);
         if (buflen < 0) {
@@ -1482,6 +1529,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
     case ALLOW_MEDIUM_REMOVAL:
     case READ_CAPACITY_10:
     case READ_TOC:
+    case READ_DISC_INFORMATION:
     case READ_DVD_STRUCTURE:
     case GET_CONFIGURATION:
     case GET_EVENT_STATUS_NOTIFICATION:
@@ -1925,6 +1973,7 @@ static Property scsi_hd_properties[] = {
                     SCSI_DISK_F_REMOVABLE, false),
     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
                     SCSI_DISK_F_DPOFUA, false),
+    DEFINE_PROP_HEX64("wwn", SCSIDiskState, wwn, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1969,6 +2018,7 @@ static TypeInfo scsi_hd_info = {
 
 static Property scsi_cd_properties[] = {
     DEFINE_SCSI_DISK_PROPERTIES(),
+    DEFINE_PROP_HEX64("wwn", SCSIDiskState, wwn, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2030,6 +2080,7 @@ static Property scsi_disk_properties[] = {
                     SCSI_DISK_F_REMOVABLE, false),
     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
                     SCSI_DISK_F_DPOFUA, false),
+    DEFINE_PROP_HEX64("wwn", SCSIDiskState, wwn, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index d856d23b3b..8d5106061e 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -400,12 +400,6 @@ static int scsi_generic_initfn(SCSIDevice *s)
         return -1;
     }
 
-    /* check we are really using a /dev/sg* file */
-    if (!bdrv_is_sg(s->conf.bs)) {
-        error_report("not /dev/sg*");
-        return -1;
-    }
-
     if (bdrv_get_on_error(s->conf.bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
         error_report("Device doesn't support drive option werror");
         return -1;
@@ -416,8 +410,11 @@ static int scsi_generic_initfn(SCSIDevice *s)
     }
 
     /* check we are using a driver managing SG_IO (version 3 and after */
-    if (bdrv_ioctl(s->conf.bs, SG_GET_VERSION_NUM, &sg_version) < 0 ||
-        sg_version < 30000) {
+    if (bdrv_ioctl(s->conf.bs, SG_GET_VERSION_NUM, &sg_version) < 0) {
+        error_report("scsi generic interface not supported");
+        return -1;
+    }
+    if (sg_version < 30000) {
         error_report("scsi generic interface too old");
         return -1;
     }
diff --git a/hw/scsi.h b/hw/scsi.h
index 76f06d41de..367a346020 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -134,6 +134,7 @@ struct SCSIBusInfo {
 
     void (*save_request)(QEMUFile *f, SCSIRequest *req);
     void *(*load_request)(QEMUFile *f, SCSIRequest *req);
+    void (*free_request)(SCSIBus *bus, void *priv);
 };
 
 #define TYPE_SCSI_BUS "SCSI"
diff --git a/hw/sh_serial.c b/hw/sh_serial.c
index 43b0eb1c1d..1d1883dd20 100644
--- a/hw/sh_serial.c
+++ b/hw/sh_serial.c
@@ -186,7 +186,8 @@ static void sh_serial_write(void *opaque, target_phys_addr_t offs,
         }
     }
 
-    fprintf(stderr, "sh_serial: unsupported write to 0x%02x\n", offs);
+    fprintf(stderr, "sh_serial: unsupported write to 0x%02"
+            TARGET_PRIxPHYS "\n", offs);
     abort();
 }
 
@@ -287,7 +288,8 @@ static uint64_t sh_serial_read(void *opaque, target_phys_addr_t offs,
 #endif
 
     if (ret & ~((1 << 16) - 1)) {
-        fprintf(stderr, "sh_serial: unsupported read from 0x%02x\n", offs);
+        fprintf(stderr, "sh_serial: unsupported read from 0x%02"
+                TARGET_PRIxPHYS "\n", offs);
         abort();
     }
 
diff --git a/hw/usb.h b/hw/usb.h
index a5623d393f..7ed8fb8fcf 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -145,6 +145,8 @@
 #define USB_ENDPOINT_XFER_INT		3
 #define USB_ENDPOINT_XFER_INVALID     255
 
+#define USB_INTERFACE_INVALID         255
+
 typedef struct USBBus USBBus;
 typedef struct USBBusOps USBBusOps;
 typedef struct USBPort USBPort;
@@ -363,6 +365,7 @@ void usb_packet_complete(USBDevice *dev, USBPacket *p);
 void usb_cancel_packet(USBPacket * p);
 
 void usb_ep_init(USBDevice *dev);
+void usb_ep_reset(USBDevice *dev);
 void usb_ep_dump(USBDevice *dev);
 struct USBEndpoint *usb_ep_get(USBDevice *dev, int pid, int ep);
 uint8_t usb_ep_get_type(USBDevice *dev, int pid, int ep);
diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs
index 9c7ddf5cb2..4225136d0f 100644
--- a/hw/usb/Makefile.objs
+++ b/hw/usb/Makefile.objs
@@ -11,3 +11,4 @@ common-obj-y += core.o bus.o desc.o dev-hub.o
 common-obj-y += host-$(HOST_USB).o dev-bluetooth.o
 common-obj-y += dev-hid.o dev-storage.o dev-wacom.o
 common-obj-y += dev-serial.o dev-network.o dev-audio.o
+common-obj-y += dev-uas.o
diff --git a/hw/usb/core.c b/hw/usb/core.c
index 0e02da7601..01a7622837 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -522,10 +522,10 @@ void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes)
     switch (p->pid) {
     case USB_TOKEN_SETUP:
     case USB_TOKEN_OUT:
-        iov_to_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes);
+        iov_to_buf(p->iov.iov, p->iov.niov, p->result, ptr, bytes);
         break;
     case USB_TOKEN_IN:
-        iov_from_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes);
+        iov_from_buf(p->iov.iov, p->iov.niov, p->result, ptr, bytes);
         break;
     default:
         fprintf(stderr, "%s: invalid pid: %x\n", __func__, p->pid);
@@ -539,7 +539,7 @@ void usb_packet_skip(USBPacket *p, size_t bytes)
     assert(p->result >= 0);
     assert(p->result + bytes <= p->iov.size);
     if (p->pid == USB_TOKEN_IN) {
-        iov_clear(p->iov.iov, p->iov.niov, p->result, bytes);
+        iov_memset(p->iov.iov, p->iov.niov, p->result, 0, bytes);
     }
     p->result += bytes;
 }
@@ -550,7 +550,7 @@ void usb_packet_cleanup(USBPacket *p)
     qemu_iovec_destroy(&p->iov);
 }
 
-void usb_ep_init(USBDevice *dev)
+void usb_ep_reset(USBDevice *dev)
 {
     int ep;
 
@@ -559,7 +559,6 @@ void usb_ep_init(USBDevice *dev)
     dev->ep_ctl.ifnum = 0;
     dev->ep_ctl.dev = dev;
     dev->ep_ctl.pipeline = false;
-    QTAILQ_INIT(&dev->ep_ctl.queue);
     for (ep = 0; ep < USB_MAX_ENDPOINTS; ep++) {
         dev->ep_in[ep].nr = ep + 1;
         dev->ep_out[ep].nr = ep + 1;
@@ -567,12 +566,22 @@ void usb_ep_init(USBDevice *dev)
         dev->ep_out[ep].pid = USB_TOKEN_OUT;
         dev->ep_in[ep].type = USB_ENDPOINT_XFER_INVALID;
         dev->ep_out[ep].type = USB_ENDPOINT_XFER_INVALID;
-        dev->ep_in[ep].ifnum = 0;
-        dev->ep_out[ep].ifnum = 0;
+        dev->ep_in[ep].ifnum = USB_INTERFACE_INVALID;
+        dev->ep_out[ep].ifnum = USB_INTERFACE_INVALID;
         dev->ep_in[ep].dev = dev;
         dev->ep_out[ep].dev = dev;
         dev->ep_in[ep].pipeline = false;
         dev->ep_out[ep].pipeline = false;
+    }
+}
+
+void usb_ep_init(USBDevice *dev)
+{
+    int ep;
+
+    usb_ep_reset(dev);
+    QTAILQ_INIT(&dev->ep_ctl.queue);
+    for (ep = 0; ep < USB_MAX_ENDPOINTS; ep++) {
         QTAILQ_INIT(&dev->ep_in[ep].queue);
         QTAILQ_INIT(&dev->ep_out[ep].queue);
     }
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
new file mode 100644
index 0000000000..9b02ff48fa
--- /dev/null
+++ b/hw/usb/dev-uas.c
@@ -0,0 +1,779 @@
+/*
+ * UAS (USB Attached SCSI) emulation
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Author: Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qemu-option.h"
+#include "qemu-config.h"
+#include "trace.h"
+
+#include "hw/usb.h"
+#include "hw/usb/desc.h"
+#include "hw/scsi.h"
+#include "hw/scsi-defs.h"
+
+/* --------------------------------------------------------------------- */
+
+#define UAS_UI_COMMAND              0x01
+#define UAS_UI_SENSE                0x03
+#define UAS_UI_RESPONSE             0x04
+#define UAS_UI_TASK_MGMT            0x05
+#define UAS_UI_READ_READY           0x06
+#define UAS_UI_WRITE_READY          0x07
+
+#define UAS_RC_TMF_COMPLETE         0x00
+#define UAS_RC_INVALID_INFO_UNIT    0x02
+#define UAS_RC_TMF_NOT_SUPPORTED    0x04
+#define UAS_RC_TMF_FAILED           0x05
+#define UAS_RC_TMF_SUCCEEDED        0x08
+#define UAS_RC_INCORRECT_LUN        0x09
+#define UAS_RC_OVERLAPPED_TAG       0x0a
+
+#define UAS_TMF_ABORT_TASK          0x01
+#define UAS_TMF_ABORT_TASK_SET      0x02
+#define UAS_TMF_CLEAR_TASK_SET      0x04
+#define UAS_TMF_LOGICAL_UNIT_RESET  0x08
+#define UAS_TMF_I_T_NEXUS_RESET     0x10
+#define UAS_TMF_CLEAR_ACA           0x40
+#define UAS_TMF_QUERY_TASK          0x80
+#define UAS_TMF_QUERY_TASK_SET      0x81
+#define UAS_TMF_QUERY_ASYNC_EVENT   0x82
+
+#define UAS_PIPE_ID_COMMAND         0x01
+#define UAS_PIPE_ID_STATUS          0x02
+#define UAS_PIPE_ID_DATA_IN         0x03
+#define UAS_PIPE_ID_DATA_OUT        0x04
+
+typedef struct {
+    uint8_t    id;
+    uint8_t    reserved;
+    uint16_t   tag;
+} QEMU_PACKED  uas_ui_header;
+
+typedef struct {
+    uint8_t    prio_taskattr;   /* 6:3 priority, 2:0 task attribute   */
+    uint8_t    reserved_1;
+    uint8_t    add_cdb_length;  /* 7:2 additional adb length (dwords) */
+    uint8_t    reserved_2;
+    uint64_t   lun;
+    uint8_t    cdb[16];
+    uint8_t    add_cdb[];
+} QEMU_PACKED  uas_ui_command;
+
+typedef struct {
+    uint16_t   status_qualifier;
+    uint8_t    status;
+    uint8_t    reserved[7];
+    uint16_t   sense_length;
+    uint8_t    sense_data[18];
+} QEMU_PACKED  uas_ui_sense;
+
+typedef struct {
+    uint16_t   add_response_info;
+    uint8_t    response_code;
+} QEMU_PACKED  uas_ui_response;
+
+typedef struct {
+    uint8_t    function;
+    uint8_t    reserved;
+    uint16_t   task_tag;
+    uint64_t   lun;
+} QEMU_PACKED  uas_ui_task_mgmt;
+
+typedef struct {
+    uas_ui_header  hdr;
+    union {
+        uas_ui_command   command;
+        uas_ui_sense     sense;
+        uas_ui_task_mgmt task;
+        uas_ui_response  response;
+    };
+} QEMU_PACKED  uas_ui;
+
+/* --------------------------------------------------------------------- */
+
+typedef struct UASDevice UASDevice;
+typedef struct UASRequest UASRequest;
+typedef struct UASStatus UASStatus;
+
+struct UASDevice {
+    USBDevice                 dev;
+    SCSIBus                   bus;
+    UASRequest                *datain;
+    UASRequest                *dataout;
+    USBPacket                 *status;
+    QEMUBH                    *status_bh;
+    QTAILQ_HEAD(, UASStatus)  results;
+    QTAILQ_HEAD(, UASRequest) requests;
+};
+
+struct UASRequest {
+    uint16_t     tag;
+    uint64_t     lun;
+    UASDevice    *uas;
+    SCSIDevice   *dev;
+    SCSIRequest  *req;
+    USBPacket    *data;
+    bool         data_async;
+    bool         active;
+    bool         complete;
+    uint32_t     buf_off;
+    uint32_t     buf_size;
+    uint32_t     data_off;
+    uint32_t     data_size;
+    QTAILQ_ENTRY(UASRequest)  next;
+};
+
+struct UASStatus {
+    uas_ui                    status;
+    uint32_t                  length;
+    QTAILQ_ENTRY(UASStatus)   next;
+};
+
+/* --------------------------------------------------------------------- */
+
+enum {
+    STR_MANUFACTURER = 1,
+    STR_PRODUCT,
+    STR_SERIALNUMBER,
+    STR_CONFIG_HIGH,
+};
+
+static const USBDescStrings desc_strings = {
+    [STR_MANUFACTURER] = "QEMU",
+    [STR_PRODUCT]      = "USB Attached SCSI HBA",
+    [STR_SERIALNUMBER] = "27842",
+    [STR_CONFIG_HIGH]  = "High speed config (usb 2.0)",
+};
+
+static const USBDescIface desc_iface_high = {
+    .bInterfaceNumber              = 0,
+    .bNumEndpoints                 = 4,
+    .bInterfaceClass               = USB_CLASS_MASS_STORAGE,
+    .bInterfaceSubClass            = 0x06, /* SCSI */
+    .bInterfaceProtocol            = 0x62, /* UAS  */
+    .eps = (USBDescEndpoint[]) {
+        {
+            .bEndpointAddress      = USB_DIR_OUT | UAS_PIPE_ID_COMMAND,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 512,
+            .extra = (uint8_t[]) {
+                0x04,  /*  u8  bLength */
+                0x24,  /*  u8  bDescriptorType */
+                UAS_PIPE_ID_COMMAND,
+                0x00,  /*  u8  bReserved */
+            },
+        },{
+            .bEndpointAddress      = USB_DIR_IN | UAS_PIPE_ID_STATUS,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 512,
+            .extra = (uint8_t[]) {
+                0x04,  /*  u8  bLength */
+                0x24,  /*  u8  bDescriptorType */
+                UAS_PIPE_ID_STATUS,
+                0x00,  /*  u8  bReserved */
+            },
+        },{
+            .bEndpointAddress      = USB_DIR_IN | UAS_PIPE_ID_DATA_IN,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 512,
+            .extra = (uint8_t[]) {
+                0x04,  /*  u8  bLength */
+                0x24,  /*  u8  bDescriptorType */
+                UAS_PIPE_ID_DATA_IN,
+                0x00,  /*  u8  bReserved */
+            },
+        },{
+            .bEndpointAddress      = USB_DIR_OUT | UAS_PIPE_ID_DATA_OUT,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 512,
+            .extra = (uint8_t[]) {
+                0x04,  /*  u8  bLength */
+                0x24,  /*  u8  bDescriptorType */
+                UAS_PIPE_ID_DATA_OUT,
+                0x00,  /*  u8  bReserved */
+            },
+        },
+    }
+};
+
+static const USBDescDevice desc_device_high = {
+    .bcdUSB                        = 0x0200,
+    .bMaxPacketSize0               = 64,
+    .bNumConfigurations            = 1,
+    .confs = (USBDescConfig[]) {
+        {
+            .bNumInterfaces        = 1,
+            .bConfigurationValue   = 1,
+            .iConfiguration        = STR_CONFIG_HIGH,
+            .bmAttributes          = 0xc0,
+            .nif = 1,
+            .ifs = &desc_iface_high,
+        },
+    },
+};
+
+static const USBDesc desc = {
+    .id = {
+        .idVendor          = 0x46f4, /* CRC16() of "QEMU" */
+        .idProduct         = 0x0002,
+        .bcdDevice         = 0,
+        .iManufacturer     = STR_MANUFACTURER,
+        .iProduct          = STR_PRODUCT,
+        .iSerialNumber     = STR_SERIALNUMBER,
+    },
+    .high = &desc_device_high,
+    .str  = desc_strings,
+};
+
+/* --------------------------------------------------------------------- */
+
+static UASStatus *usb_uas_alloc_status(uint8_t id, uint16_t tag)
+{
+    UASStatus *st = g_new0(UASStatus, 1);
+
+    st->status.hdr.id = id;
+    st->status.hdr.tag = cpu_to_be16(tag);
+    st->length = sizeof(uas_ui_header);
+    return st;
+}
+
+static void usb_uas_send_status_bh(void *opaque)
+{
+    UASDevice *uas = opaque;
+    UASStatus *st = QTAILQ_FIRST(&uas->results);
+    USBPacket *p = uas->status;
+
+    assert(p != NULL);
+    assert(st != NULL);
+
+    uas->status = NULL;
+    usb_packet_copy(p, &st->status, st->length);
+    p->result = st->length;
+    QTAILQ_REMOVE(&uas->results, st, next);
+    g_free(st);
+
+    usb_packet_complete(&uas->dev, p);
+}
+
+static void usb_uas_queue_status(UASDevice *uas, UASStatus *st, int length)
+{
+    st->length += length;
+    QTAILQ_INSERT_TAIL(&uas->results, st, next);
+    if (uas->status) {
+        /*
+         * Just schedule bh make sure any in-flight data transaction
+         * is finished before completing (sending) the status packet.
+         */
+        qemu_bh_schedule(uas->status_bh);
+    } else {
+        USBEndpoint *ep = usb_ep_get(&uas->dev, USB_TOKEN_IN,
+                                     UAS_PIPE_ID_STATUS);
+        usb_wakeup(ep);
+    }
+}
+
+static void usb_uas_queue_response(UASDevice *uas, uint16_t tag,
+                                   uint8_t code, uint16_t add_info)
+{
+    UASStatus *st = usb_uas_alloc_status(UAS_UI_RESPONSE, tag);
+
+    trace_usb_uas_response(uas->dev.addr, tag, code);
+    st->status.response.response_code = code;
+    st->status.response.add_response_info = cpu_to_be16(add_info);
+    usb_uas_queue_status(uas, st, sizeof(uas_ui_response));
+}
+
+static void usb_uas_queue_sense(UASRequest *req, uint8_t status)
+{
+    UASStatus *st = usb_uas_alloc_status(UAS_UI_SENSE, req->tag);
+    int len, slen = 0;
+
+    trace_usb_uas_sense(req->uas->dev.addr, req->tag, status);
+    st->status.sense.status = status;
+    st->status.sense.status_qualifier = cpu_to_be16(0);
+    if (status != GOOD) {
+        slen = scsi_req_get_sense(req->req, st->status.sense.sense_data,
+                                  sizeof(st->status.sense.sense_data));
+        st->status.sense.sense_length = cpu_to_be16(slen);
+    }
+    len = sizeof(uas_ui_sense) - sizeof(st->status.sense.sense_data) + slen;
+    usb_uas_queue_status(req->uas, st, len);
+}
+
+static void usb_uas_queue_read_ready(UASRequest *req)
+{
+    UASStatus *st = usb_uas_alloc_status(UAS_UI_READ_READY, req->tag);
+
+    trace_usb_uas_read_ready(req->uas->dev.addr, req->tag);
+    usb_uas_queue_status(req->uas, st, 0);
+}
+
+static void usb_uas_queue_write_ready(UASRequest *req)
+{
+    UASStatus *st = usb_uas_alloc_status(UAS_UI_WRITE_READY, req->tag);
+
+    trace_usb_uas_write_ready(req->uas->dev.addr, req->tag);
+    usb_uas_queue_status(req->uas, st, 0);
+}
+
+/* --------------------------------------------------------------------- */
+
+static int usb_uas_get_lun(uint64_t lun64)
+{
+    return (lun64 >> 48) & 0xff;
+}
+
+static SCSIDevice *usb_uas_get_dev(UASDevice *uas, uint64_t lun64)
+{
+    if ((lun64 >> 56) != 0x00) {
+        return NULL;
+    }
+    return scsi_device_find(&uas->bus, 0, 0, usb_uas_get_lun(lun64));
+}
+
+static void usb_uas_complete_data_packet(UASRequest *req)
+{
+    USBPacket *p;
+
+    if (!req->data_async) {
+        return;
+    }
+    p = req->data;
+    req->data = NULL;
+    req->data_async = false;
+    usb_packet_complete(&req->uas->dev, p);
+}
+
+static void usb_uas_copy_data(UASRequest *req)
+{
+    uint32_t length;
+
+    length = MIN(req->buf_size - req->buf_off,
+                 req->data->iov.size - req->data->result);
+    trace_usb_uas_xfer_data(req->uas->dev.addr, req->tag, length,
+                            req->data->result, req->data->iov.size,
+                            req->buf_off, req->buf_size);
+    usb_packet_copy(req->data, scsi_req_get_buf(req->req) + req->buf_off,
+                    length);
+    req->buf_off += length;
+    req->data_off += length;
+
+    if (req->data->result == req->data->iov.size) {
+        usb_uas_complete_data_packet(req);
+    }
+    if (req->buf_size && req->buf_off == req->buf_size) {
+        req->buf_off = 0;
+        req->buf_size = 0;
+        scsi_req_continue(req->req);
+    }
+}
+
+static void usb_uas_start_next_transfer(UASDevice *uas)
+{
+    UASRequest *req;
+
+    QTAILQ_FOREACH(req, &uas->requests, next) {
+        if (req->active || req->complete) {
+            continue;
+        }
+        if (req->req->cmd.mode == SCSI_XFER_FROM_DEV && uas->datain == NULL) {
+            uas->datain = req;
+            usb_uas_queue_read_ready(req);
+            req->active = true;
+            return;
+        }
+        if (req->req->cmd.mode == SCSI_XFER_TO_DEV && uas->dataout == NULL) {
+            uas->dataout = req;
+            usb_uas_queue_write_ready(req);
+            req->active = true;
+            return;
+        }
+    }
+}
+
+static UASRequest *usb_uas_alloc_request(UASDevice *uas, uas_ui *ui)
+{
+    UASRequest *req;
+
+    req = g_new0(UASRequest, 1);
+    req->uas = uas;
+    req->tag = be16_to_cpu(ui->hdr.tag);
+    req->lun = be64_to_cpu(ui->command.lun);
+    req->dev = usb_uas_get_dev(req->uas, req->lun);
+    return req;
+}
+
+static void usb_uas_scsi_free_request(SCSIBus *bus, void *priv)
+{
+    UASRequest *req = priv;
+    UASDevice *uas = req->uas;
+
+    if (req == uas->datain) {
+        uas->datain = NULL;
+    }
+    if (req == uas->dataout) {
+        uas->dataout = NULL;
+    }
+    QTAILQ_REMOVE(&uas->requests, req, next);
+    g_free(req);
+}
+
+static UASRequest *usb_uas_find_request(UASDevice *uas, uint16_t tag)
+{
+    UASRequest *req;
+
+    QTAILQ_FOREACH(req, &uas->requests, next) {
+        if (req->tag == tag) {
+            return req;
+        }
+    }
+    return NULL;
+}
+
+static void usb_uas_scsi_transfer_data(SCSIRequest *r, uint32_t len)
+{
+    UASRequest *req = r->hba_private;
+
+    trace_usb_uas_scsi_data(req->uas->dev.addr, req->tag, len);
+    req->buf_off = 0;
+    req->buf_size = len;
+    if (req->data) {
+        usb_uas_copy_data(req);
+    } else {
+        usb_uas_start_next_transfer(req->uas);
+    }
+}
+
+static void usb_uas_scsi_command_complete(SCSIRequest *r,
+                                          uint32_t status, size_t resid)
+{
+    UASRequest *req = r->hba_private;
+    UASDevice *uas = req->uas;
+
+    trace_usb_uas_scsi_complete(req->uas->dev.addr, req->tag, status, resid);
+    req->complete = true;
+    if (req->data) {
+        usb_uas_complete_data_packet(req);
+    }
+    usb_uas_queue_sense(req, status);
+    scsi_req_unref(req->req);
+    usb_uas_start_next_transfer(uas);
+}
+
+static void usb_uas_scsi_request_cancelled(SCSIRequest *r)
+{
+    UASRequest *req = r->hba_private;
+
+    /* FIXME: queue notification to status pipe? */
+    scsi_req_unref(req->req);
+}
+
+static const struct SCSIBusInfo usb_uas_scsi_info = {
+    .tcq = true,
+    .max_target = 0,
+    .max_lun = 255,
+
+    .transfer_data = usb_uas_scsi_transfer_data,
+    .complete = usb_uas_scsi_command_complete,
+    .cancel = usb_uas_scsi_request_cancelled,
+    .free_request = usb_uas_scsi_free_request,
+};
+
+/* --------------------------------------------------------------------- */
+
+static void usb_uas_handle_reset(USBDevice *dev)
+{
+    UASDevice *uas = DO_UPCAST(UASDevice, dev, dev);
+    UASRequest *req, *nreq;
+    UASStatus *st, *nst;
+
+    trace_usb_uas_reset(dev->addr);
+    QTAILQ_FOREACH_SAFE(req, &uas->requests, next, nreq) {
+        scsi_req_cancel(req->req);
+    }
+    QTAILQ_FOREACH_SAFE(st, &uas->results, next, nst) {
+        QTAILQ_REMOVE(&uas->results, st, next);
+        g_free(st);
+    }
+}
+
+static int usb_uas_handle_control(USBDevice *dev, USBPacket *p,
+               int request, int value, int index, int length, uint8_t *data)
+{
+    int ret;
+
+    ret = usb_desc_handle_control(dev, p, request, value, index, length, data);
+    if (ret >= 0) {
+        return ret;
+    }
+    fprintf(stderr, "%s: unhandled control request\n", __func__);
+    return USB_RET_STALL;
+}
+
+static void usb_uas_cancel_io(USBDevice *dev, USBPacket *p)
+{
+    UASDevice *uas = DO_UPCAST(UASDevice, dev, dev);
+    UASRequest *req, *nreq;
+
+    if (uas->status == p) {
+        uas->status = NULL;
+        qemu_bh_cancel(uas->status_bh);
+        return;
+    }
+    QTAILQ_FOREACH_SAFE(req, &uas->requests, next, nreq) {
+        if (req->data == p) {
+            req->data = NULL;
+            return;
+        }
+    }
+    assert(!"canceled usb packet not found");
+}
+
+static void usb_uas_command(UASDevice *uas, uas_ui *ui)
+{
+    UASRequest *req;
+    uint32_t len;
+
+    req = usb_uas_find_request(uas, be16_to_cpu(ui->hdr.tag));
+    if (req) {
+        goto overlapped_tag;
+    }
+    req = usb_uas_alloc_request(uas, ui);
+    if (req->dev == NULL) {
+        goto bad_target;
+    }
+
+    trace_usb_uas_command(uas->dev.addr, req->tag,
+                          usb_uas_get_lun(req->lun),
+                          req->lun >> 32, req->lun & 0xffffffff);
+    QTAILQ_INSERT_TAIL(&uas->requests, req, next);
+    req->req = scsi_req_new(req->dev, req->tag,
+                            usb_uas_get_lun(req->lun),
+                            ui->command.cdb, req);
+    len = scsi_req_enqueue(req->req);
+    if (len) {
+        req->data_size = len;
+        scsi_req_continue(req->req);
+    }
+    return;
+
+overlapped_tag:
+    usb_uas_queue_response(uas, req->tag, UAS_RC_OVERLAPPED_TAG, 0);
+    return;
+
+bad_target:
+    /*
+     * FIXME: Seems to upset linux, is this wrong?
+     * NOTE: Happens only with no scsi devices at the bus, not sure
+     *       this is a valid UAS setup in the first place.
+     */
+    usb_uas_queue_response(uas, req->tag, UAS_RC_INVALID_INFO_UNIT, 0);
+    g_free(req);
+    return;
+}
+
+static void usb_uas_task(UASDevice *uas, uas_ui *ui)
+{
+    uint16_t tag = be16_to_cpu(ui->hdr.tag);
+    uint64_t lun64 = be64_to_cpu(ui->task.lun);
+    SCSIDevice *dev = usb_uas_get_dev(uas, lun64);
+    int lun = usb_uas_get_lun(lun64);
+    UASRequest *req;
+    uint16_t task_tag;
+
+    req = usb_uas_find_request(uas, be16_to_cpu(ui->hdr.tag));
+    if (req) {
+        goto overlapped_tag;
+    }
+
+    switch (ui->task.function) {
+    case UAS_TMF_ABORT_TASK:
+        task_tag = be16_to_cpu(ui->task.task_tag);
+        trace_usb_uas_tmf_abort_task(uas->dev.addr, tag, task_tag);
+        if (dev == NULL) {
+            goto bad_target;
+        }
+        if (dev->lun != lun) {
+            goto incorrect_lun;
+        }
+        req = usb_uas_find_request(uas, task_tag);
+        if (req && req->dev == dev) {
+            scsi_req_cancel(req->req);
+        }
+        usb_uas_queue_response(uas, tag, UAS_RC_TMF_COMPLETE, 0);
+        break;
+
+    case UAS_TMF_LOGICAL_UNIT_RESET:
+        trace_usb_uas_tmf_logical_unit_reset(uas->dev.addr, tag, lun);
+        if (dev == NULL) {
+            goto bad_target;
+        }
+        if (dev->lun != lun) {
+            goto incorrect_lun;
+        }
+        qdev_reset_all(&dev->qdev);
+        usb_uas_queue_response(uas, tag, UAS_RC_TMF_COMPLETE, 0);
+        break;
+
+    default:
+        trace_usb_uas_tmf_unsupported(uas->dev.addr, tag, ui->task.function);
+        usb_uas_queue_response(uas, tag, UAS_RC_TMF_NOT_SUPPORTED, 0);
+        break;
+    }
+    return;
+
+overlapped_tag:
+    usb_uas_queue_response(uas, req->tag, UAS_RC_OVERLAPPED_TAG, 0);
+    return;
+
+bad_target:
+    /* FIXME: correct?  [see long comment in usb_uas_command()] */
+    usb_uas_queue_response(uas, tag, UAS_RC_INVALID_INFO_UNIT, 0);
+    return;
+
+incorrect_lun:
+    usb_uas_queue_response(uas, tag, UAS_RC_INCORRECT_LUN, 0);
+    return;
+}
+
+static int usb_uas_handle_data(USBDevice *dev, USBPacket *p)
+{
+    UASDevice *uas = DO_UPCAST(UASDevice, dev, dev);
+    uas_ui ui;
+    UASStatus *st;
+    UASRequest *req;
+    int length, ret = 0;
+
+    switch (p->ep->nr) {
+    case UAS_PIPE_ID_COMMAND:
+        length = MIN(sizeof(ui), p->iov.size);
+        usb_packet_copy(p, &ui, length);
+        switch (ui.hdr.id) {
+        case UAS_UI_COMMAND:
+            usb_uas_command(uas, &ui);
+            ret = length;
+            break;
+        case UAS_UI_TASK_MGMT:
+            usb_uas_task(uas, &ui);
+            ret = length;
+            break;
+        default:
+            fprintf(stderr, "%s: unknown command ui: id 0x%x\n",
+                    __func__, ui.hdr.id);
+            ret = USB_RET_STALL;
+            break;
+        }
+        break;
+    case UAS_PIPE_ID_STATUS:
+        st = QTAILQ_FIRST(&uas->results);
+        if (st == NULL) {
+            assert(uas->status == NULL);
+            uas->status = p;
+            ret = USB_RET_ASYNC;
+            break;
+        }
+        usb_packet_copy(p, &st->status, st->length);
+        ret = st->length;
+        QTAILQ_REMOVE(&uas->results, st, next);
+        g_free(st);
+        break;
+    case UAS_PIPE_ID_DATA_IN:
+    case UAS_PIPE_ID_DATA_OUT:
+        req = (p->ep->nr == UAS_PIPE_ID_DATA_IN) ? uas->datain : uas->dataout;
+        if (req == NULL) {
+            fprintf(stderr, "%s: no inflight request\n", __func__);
+            ret = USB_RET_STALL;
+            break;
+        }
+        scsi_req_ref(req->req);
+        req->data = p;
+        usb_uas_copy_data(req);
+        if (p->result == p->iov.size || req->complete) {
+            req->data = NULL;
+            ret = p->result;
+        } else {
+            req->data_async = true;
+            ret = USB_RET_ASYNC;
+        }
+        scsi_req_unref(req->req);
+        usb_uas_start_next_transfer(uas);
+        break;
+    default:
+        fprintf(stderr, "%s: invalid endpoint %d\n", __func__, p->ep->nr);
+        ret = USB_RET_STALL;
+        break;
+    }
+    return ret;
+}
+
+static void usb_uas_handle_destroy(USBDevice *dev)
+{
+    UASDevice *uas = DO_UPCAST(UASDevice, dev, dev);
+
+    qemu_bh_delete(uas->status_bh);
+}
+
+static int usb_uas_init(USBDevice *dev)
+{
+    UASDevice *uas = DO_UPCAST(UASDevice, dev, dev);
+
+    usb_desc_create_serial(dev);
+    usb_desc_init(dev);
+
+    QTAILQ_INIT(&uas->results);
+    QTAILQ_INIT(&uas->requests);
+    uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
+
+    scsi_bus_new(&uas->bus, &uas->dev.qdev, &usb_uas_scsi_info);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_usb_uas = {
+    .name = "usb-uas",
+    .unmigratable = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_USB_DEVICE(dev, UASDevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void usb_uas_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
+
+    uc->init           = usb_uas_init;
+    uc->product_desc   = desc_strings[STR_PRODUCT];
+    uc->usb_desc       = &desc;
+    uc->cancel_packet  = usb_uas_cancel_io;
+    uc->handle_attach  = usb_desc_attach;
+    uc->handle_reset   = usb_uas_handle_reset;
+    uc->handle_control = usb_uas_handle_control;
+    uc->handle_data    = usb_uas_handle_data;
+    uc->handle_destroy = usb_uas_handle_destroy;
+    dc->fw_name = "storage";
+    dc->vmsd = &vmstate_usb_uas;
+}
+
+static TypeInfo uas_info = {
+    .name          = "usb-uas",
+    .parent        = TYPE_USB_DEVICE,
+    .instance_size = sizeof(UASDevice),
+    .class_init    = usb_uas_class_initfn,
+};
+
+static void usb_uas_register_types(void)
+{
+    type_register_static(&uas_info);
+}
+
+type_init(usb_uas_register_types)
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 1582c2c69e..b043e7c23e 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -365,6 +365,7 @@ struct EHCIQueue {
     uint32_t seen;
     uint64_t ts;
     int async;
+    int revalidate;
 
     /* cached data from guest - needs to be flushed
      * when guest removes an entry (doorbell, handshake sequence)
@@ -419,6 +420,7 @@ struct EHCIState {
     USBPort ports[NB_PORTS];
     USBPort *companion_ports[NB_PORTS];
     uint32_t usbsts_pending;
+    uint32_t usbsts_frindex;
     EHCIQueueHead aqueues;
     EHCIQueueHead pqueues;
 
@@ -557,34 +559,45 @@ static inline void ehci_clear_usbsts(EHCIState *s, int mask)
     s->usbsts &= ~mask;
 }
 
-static inline void ehci_set_interrupt(EHCIState *s, int intr)
+/* update irq line */
+static inline void ehci_update_irq(EHCIState *s)
 {
     int level = 0;
 
-    // TODO honour interrupt threshold requests
-
-    ehci_set_usbsts(s, intr);
-
     if ((s->usbsts & USBINTR_MASK) & s->usbintr) {
         level = 1;
     }
 
-    trace_usb_ehci_interrupt(level, s->usbsts, s->usbintr);
+    trace_usb_ehci_irq(level, s->frindex, s->usbsts, s->usbintr);
     qemu_set_irq(s->irq, level);
 }
 
-static inline void ehci_record_interrupt(EHCIState *s, int intr)
+/* flag interrupt condition */
+static inline void ehci_raise_irq(EHCIState *s, int intr)
 {
     s->usbsts_pending |= intr;
 }
 
-static inline void ehci_commit_interrupt(EHCIState *s)
+/*
+ * Commit pending interrupts (added via ehci_raise_irq),
+ * at the rate allowed by "Interrupt Threshold Control".
+ */
+static inline void ehci_commit_irq(EHCIState *s)
 {
+    uint32_t itc;
+
     if (!s->usbsts_pending) {
         return;
     }
-    ehci_set_interrupt(s, s->usbsts_pending);
+    if (s->usbsts_frindex > s->frindex) {
+        return;
+    }
+
+    itc = (s->usbcmd >> 16) & 0xff;
+    s->usbsts |= s->usbsts_pending;
     s->usbsts_pending = 0;
+    s->usbsts_frindex = s->frindex + itc;
+    ehci_update_irq(s);
 }
 
 static void ehci_update_halt(EHCIState *s)
@@ -775,7 +788,18 @@ static EHCIQueue *ehci_find_queue_by_qh(EHCIState *ehci, uint32_t addr,
     return NULL;
 }
 
-static void ehci_queues_rip_unused(EHCIState *ehci, int async, int flush)
+static void ehci_queues_tag_unused_async(EHCIState *ehci)
+{
+    EHCIQueue *q;
+
+    QTAILQ_FOREACH(q, &ehci->aqueues, next) {
+        if (!q->seen) {
+            q->revalidate = 1;
+        }
+    }
+}
+
+static void ehci_queues_rip_unused(EHCIState *ehci, int async)
 {
     EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues;
     uint64_t maxage = FRAME_TIMER_NS * ehci->maxframes * 4;
@@ -787,7 +811,7 @@ static void ehci_queues_rip_unused(EHCIState *ehci, int async, int flush)
             q->ts = ehci->last_run_ns;
             continue;
         }
-        if (!flush && ehci->last_run_ns < q->ts + maxage) {
+        if (ehci->last_run_ns < q->ts + maxage) {
             continue;
         }
         ehci_free_queue(q);
@@ -837,7 +861,8 @@ static void ehci_attach(USBPort *port)
     *portsc |= PORTSC_CONNECT;
     *portsc |= PORTSC_CSC;
 
-    ehci_set_interrupt(s, USBSTS_PCD);
+    ehci_raise_irq(s, USBSTS_PCD);
+    ehci_commit_irq(s);
 }
 
 static void ehci_detach(USBPort *port)
@@ -866,7 +891,8 @@ static void ehci_detach(USBPort *port)
     *portsc &= ~(PORTSC_CONNECT|PORTSC_PED);
     *portsc |= PORTSC_CSC;
 
-    ehci_set_interrupt(s, USBSTS_PCD);
+    ehci_raise_irq(s, USBSTS_PCD);
+    ehci_commit_irq(s);
 }
 
 static void ehci_child_detach(USBPort *port, USBDevice *child)
@@ -893,10 +919,11 @@ static void ehci_wakeup(USBPort *port)
         USBPort *companion = s->companion_ports[port->index];
         if (companion->ops->wakeup) {
             companion->ops->wakeup(companion);
-        } else {
-            qemu_bh_schedule(s->async_bh);
         }
+        return;
     }
+
+    qemu_bh_schedule(s->async_bh);
 }
 
 static int ehci_register_companion(USBBus *bus, USBPort *ports[],
@@ -984,6 +1011,8 @@ static void ehci_reset(void *opaque)
 
     s->usbcmd = NB_MAXINTRATE << USBCMD_ITC_SH;
     s->usbsts = USBSTS_HALT;
+    s->usbsts_pending = 0;
+    s->usbsts_frindex = 0;
 
     s->astate = EST_INACTIVE;
     s->pstate = EST_INACTIVE;
@@ -1175,7 +1204,7 @@ static void ehci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val)
         val &= USBSTS_RO_MASK;              // bits 6 through 31 are RO
         ehci_clear_usbsts(s, val);          // bits 0 through 5 are R/WC
         val = s->usbsts;
-        ehci_set_interrupt(s, 0);
+        ehci_update_irq(s);
         break;
 
     case USBINTR:
@@ -1246,6 +1275,23 @@ static inline int put_dwords(EHCIState *ehci, uint32_t addr,
     return 1;
 }
 
+/*
+ *  Write the qh back to guest physical memory.  This step isn't
+ *  in the EHCI spec but we need to do it since we don't share
+ *  physical memory with our guest VM.
+ *
+ *  The first three dwords are read-only for the EHCI, so skip them
+ *  when writing back the qh.
+ */
+static void ehci_flush_qh(EHCIQueue *q)
+{
+    uint32_t *qh = (uint32_t *) &q->qh;
+    uint32_t dwords = sizeof(EHCIqh) >> 2;
+    uint32_t addr = NLPTR_GET(q->qhaddr);
+
+    put_dwords(q->ehci, addr + 3 * sizeof(uint32_t), qh + 3, dwords - 3);
+}
+
 // 4.10.2
 
 static int ehci_qh_do_overlay(EHCIQueue *q)
@@ -1293,8 +1339,7 @@ static int ehci_qh_do_overlay(EHCIQueue *q)
     q->qh.bufptr[1] &= ~BUFPTR_CPROGMASK_MASK;
     q->qh.bufptr[2] &= ~BUFPTR_FRAMETAG_MASK;
 
-    put_dwords(q->ehci, NLPTR_GET(q->qhaddr), (uint32_t *) &q->qh,
-               sizeof(EHCIqh) >> 2);
+    ehci_flush_qh(q);
 
     return 0;
 }
@@ -1390,18 +1435,18 @@ static void ehci_execute_complete(EHCIQueue *q)
         case USB_RET_NODEV:
             q->qh.token |= (QTD_TOKEN_HALT | QTD_TOKEN_XACTERR);
             set_field(&q->qh.token, 0, QTD_TOKEN_CERR);
-            ehci_record_interrupt(q->ehci, USBSTS_ERRINT);
+            ehci_raise_irq(q->ehci, USBSTS_ERRINT);
             break;
         case USB_RET_STALL:
             q->qh.token |= QTD_TOKEN_HALT;
-            ehci_record_interrupt(q->ehci, USBSTS_ERRINT);
+            ehci_raise_irq(q->ehci, USBSTS_ERRINT);
             break;
         case USB_RET_NAK:
             set_field(&q->qh.altnext_qtd, 0, QH_ALTNEXT_NAKCNT);
             return; /* We're not done yet with this transaction */
         case USB_RET_BABBLE:
             q->qh.token |= (QTD_TOKEN_HALT | QTD_TOKEN_BABBLE);
-            ehci_record_interrupt(q->ehci, USBSTS_ERRINT);
+            ehci_raise_irq(q->ehci, USBSTS_ERRINT);
             break;
         default:
             /* should not be triggerable */
@@ -1412,7 +1457,7 @@ static void ehci_execute_complete(EHCIQueue *q)
     } else if ((p->usb_status > p->tbytes) && (p->pid == USB_TOKEN_IN)) {
         p->usb_status = USB_RET_BABBLE;
         q->qh.token |= (QTD_TOKEN_HALT | QTD_TOKEN_BABBLE);
-        ehci_record_interrupt(q->ehci, USBSTS_ERRINT);
+        ehci_raise_irq(q->ehci, USBSTS_ERRINT);
     } else {
         // TODO check 4.12 for splits
 
@@ -1433,7 +1478,7 @@ static void ehci_execute_complete(EHCIQueue *q)
     q->qh.token &= ~QTD_TOKEN_ACTIVE;
 
     if (q->qh.token & QTD_TOKEN_IOC) {
-        ehci_record_interrupt(q->ehci, USBSTS_INT);
+        ehci_raise_irq(q->ehci, USBSTS_INT);
     }
 }
 
@@ -1568,12 +1613,12 @@ static int ehci_process_itd(EHCIState *ehci,
                     /* 3.3.2: XACTERR is only allowed on IN transactions */
                     if (dir) {
                         itd->transact[i] |= ITD_XACT_XACTERR;
-                        ehci_record_interrupt(ehci, USBSTS_ERRINT);
+                        ehci_raise_irq(ehci, USBSTS_ERRINT);
                     }
                     break;
                 case USB_RET_BABBLE:
                     itd->transact[i] |= ITD_XACT_BABBLE;
-                    ehci_record_interrupt(ehci, USBSTS_ERRINT);
+                    ehci_raise_irq(ehci, USBSTS_ERRINT);
                     break;
                 case USB_RET_NAK:
                     /* no data for us, so do a zero-length transfer */
@@ -1591,7 +1636,7 @@ static int ehci_process_itd(EHCIState *ehci,
                 }
             }
             if (itd->transact[i] & ITD_XACT_IOC) {
-                ehci_record_interrupt(ehci, USBSTS_INT);
+                ehci_raise_irq(ehci, USBSTS_INT);
             }
             itd->transact[i] &= ~ITD_XACT_ACTIVE;
         }
@@ -1600,23 +1645,6 @@ static int ehci_process_itd(EHCIState *ehci,
 }
 
 
-/*
- *  Write the qh back to guest physical memory.  This step isn't
- *  in the EHCI spec but we need to do it since we don't share
- *  physical memory with our guest VM.
- *
- *  The first three dwords are read-only for the EHCI, so skip them
- *  when writing back the qh.
- */
-static void ehci_flush_qh(EHCIQueue *q)
-{
-    uint32_t *qh = (uint32_t *) &q->qh;
-    uint32_t dwords = sizeof(EHCIqh) >> 2;
-    uint32_t addr = NLPTR_GET(q->qhaddr);
-
-    put_dwords(q->ehci, addr + 3 * sizeof(uint32_t), qh + 3, dwords - 3);
-}
-
 /*  This state is the entry point for asynchronous schedule
  *  processing.  Entry here consitutes a EHCI start event state (4.8.5)
  */
@@ -1632,7 +1660,7 @@ static int ehci_state_waitlisthead(EHCIState *ehci,  int async)
         ehci_set_usbsts(ehci, USBSTS_REC);
     }
 
-    ehci_queues_rip_unused(ehci, async, 0);
+    ehci_queues_rip_unused(ehci, async);
 
     /*  Find the head of the list (4.9.1.1) */
     for(i = 0; i < MAX_QH; i++) {
@@ -1717,6 +1745,7 @@ static EHCIQueue *ehci_state_fetchqh(EHCIState *ehci, int async)
     EHCIPacket *p;
     uint32_t entry, devaddr;
     EHCIQueue *q;
+    EHCIqh qh;
 
     entry = ehci_get_fetch_addr(ehci, async);
     q = ehci_find_queue_by_qh(ehci, entry, async);
@@ -1734,7 +1763,17 @@ static EHCIQueue *ehci_state_fetchqh(EHCIState *ehci, int async)
     }
 
     get_dwords(ehci, NLPTR_GET(q->qhaddr),
-               (uint32_t *) &q->qh, sizeof(EHCIqh) >> 2);
+               (uint32_t *) &qh, sizeof(EHCIqh) >> 2);
+    if (q->revalidate && (q->qh.epchar      != qh.epchar ||
+                          q->qh.epcap       != qh.epcap  ||
+                          q->qh.current_qtd != qh.current_qtd)) {
+        ehci_free_queue(q);
+        q = ehci_alloc_queue(ehci, entry, async);
+        q->seen++;
+        p = NULL;
+    }
+    q->qh = qh;
+    q->revalidate = 0;
     ehci_trace_qh(q, NLPTR_GET(q->qhaddr), &q->qh);
 
     devaddr = get_field(q->qh.epchar, QH_EPCHAR_DEVADDR);
@@ -2071,6 +2110,7 @@ out:
 static int ehci_state_writeback(EHCIQueue *q)
 {
     EHCIPacket *p = QTAILQ_FIRST(&q->packets);
+    uint32_t *qtd, addr;
     int again = 0;
 
     /*  Write back the QTD from the QH area */
@@ -2078,8 +2118,9 @@ static int ehci_state_writeback(EHCIQueue *q)
     assert(p->qtdaddr == q->qtdaddr);
 
     ehci_trace_qtd(q, NLPTR_GET(p->qtdaddr), (EHCIqtd *) &q->qh.next_qtd);
-    put_dwords(q->ehci, NLPTR_GET(p->qtdaddr), (uint32_t *) &q->qh.next_qtd,
-               sizeof(EHCIqtd) >> 2);
+    qtd = (uint32_t *) &q->qh.next_qtd;
+    addr = NLPTR_GET(p->qtdaddr);
+    put_dwords(q->ehci, addr + 2 * sizeof(uint32_t), qtd + 2, 2);
     ehci_free_packet(p);
 
     /*
@@ -2183,8 +2224,6 @@ static void ehci_advance_state(EHCIState *ehci, int async)
         }
     }
     while (again);
-
-    ehci_commit_interrupt(ehci);
 }
 
 static void ehci_advance_async_state(EHCIState *ehci)
@@ -2227,10 +2266,10 @@ static void ehci_advance_async_state(EHCIState *ehci)
          */
         if (ehci->usbcmd & USBCMD_IAAD) {
             /* Remove all unseen qhs from the async qhs queue */
-            ehci_queues_rip_unused(ehci, async, 1);
+            ehci_queues_tag_unused_async(ehci);
             DPRINTF("ASYNC: doorbell request acknowledged\n");
             ehci->usbcmd &= ~USBCMD_IAAD;
-            ehci_set_interrupt(ehci, USBSTS_IAA);
+            ehci_raise_irq(ehci, USBSTS_IAA);
         }
         break;
 
@@ -2280,7 +2319,7 @@ static void ehci_advance_periodic_state(EHCIState *ehci)
         ehci_set_fetch_addr(ehci, async,entry);
         ehci_set_state(ehci, async, EST_FETCHENTRY);
         ehci_advance_state(ehci, async);
-        ehci_queues_rip_unused(ehci, async, 0);
+        ehci_queues_rip_unused(ehci, async);
         break;
 
     default:
@@ -2303,12 +2342,17 @@ static void ehci_update_frindex(EHCIState *ehci, int frames)
         ehci->frindex += 8;
 
         if (ehci->frindex == 0x00002000) {
-            ehci_set_interrupt(ehci, USBSTS_FLR);
+            ehci_raise_irq(ehci, USBSTS_FLR);
         }
 
         if (ehci->frindex == 0x00004000) {
-            ehci_set_interrupt(ehci, USBSTS_FLR);
+            ehci_raise_irq(ehci, USBSTS_FLR);
             ehci->frindex = 0;
+            if (ehci->usbsts_frindex > 0x00004000) {
+                ehci->usbsts_frindex -= 0x00004000;
+            } else {
+                ehci->usbsts_frindex = 0;
+            }
         }
     }
 }
@@ -2316,7 +2360,7 @@ static void ehci_update_frindex(EHCIState *ehci, int frames)
 static void ehci_frame_timer(void *opaque)
 {
     EHCIState *ehci = opaque;
-    int schedules = 0;
+    int need_timer = 0;
     int64_t expire_time, t_now;
     uint64_t ns_elapsed;
     int frames, skipped_frames;
@@ -2327,8 +2371,8 @@ static void ehci_frame_timer(void *opaque)
     frames = ns_elapsed / FRAME_TIMER_NS;
 
     if (ehci_periodic_enabled(ehci) || ehci->pstate != EST_INACTIVE) {
-        schedules++;
-        expire_time = t_now + (get_ticks_per_sec() / FRAME_TIMER_FREQ);
+        need_timer++;
+        ehci->async_stepdown = 0;
 
         if (frames > ehci->maxframes) {
             skipped_frames = frames - ehci->maxframes;
@@ -2347,8 +2391,6 @@ static void ehci_frame_timer(void *opaque)
         if (ehci->async_stepdown < ehci->maxframes / 2) {
             ehci->async_stepdown++;
         }
-        expire_time = t_now + (get_ticks_per_sec()
-                               * ehci->async_stepdown / FRAME_TIMER_FREQ);
         ehci_update_frindex(ehci, frames);
         ehci->last_run_ns += FRAME_TIMER_NS * frames;
     }
@@ -2357,11 +2399,19 @@ static void ehci_frame_timer(void *opaque)
      *  called
      */
     if (ehci_async_enabled(ehci) || ehci->astate != EST_INACTIVE) {
-        schedules++;
-        qemu_bh_schedule(ehci->async_bh);
+        need_timer++;
+        ehci_advance_async_state(ehci);
+    }
+
+    ehci_commit_irq(ehci);
+    if (ehci->usbsts_pending) {
+        need_timer++;
+        ehci->async_stepdown = 0;
     }
 
-    if (schedules) {
+    if (need_timer) {
+        expire_time = t_now + (get_ticks_per_sec()
+                               * (ehci->async_stepdown+1) / FRAME_TIMER_FREQ);
         qemu_mod_timer(ehci->frame_timer, expire_time);
     }
 }
@@ -2557,6 +2607,7 @@ static int usb_ehci_initfn(PCIDevice *dev)
     s->async_bh = qemu_bh_new(ehci_async_bh, s);
     QTAILQ_INIT(&s->aqueues);
     QTAILQ_INIT(&s->pqueues);
+    usb_packet_init(&s->ipacket);
 
     qemu_register_reset(ehci_reset, s);
 
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 8f652d2f4a..2aac8a2505 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -388,11 +388,23 @@ static const VMStateDescription vmstate_uhci_port = {
     }
 };
 
+static int uhci_post_load(void *opaque, int version_id)
+{
+    UHCIState *s = opaque;
+
+    if (version_id < 2) {
+        s->expire_time = qemu_get_clock_ns(vm_clock) +
+            (get_ticks_per_sec() / FRAME_TIMER_FREQ);
+    }
+    return 0;
+}
+
 static const VMStateDescription vmstate_uhci = {
     .name = "uhci",
     .version_id = 2,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
+    .post_load = uhci_post_load,
     .fields      = (VMStateField []) {
         VMSTATE_PCI_DEVICE(dev, UHCIState),
         VMSTATE_UINT8_EQUAL(num_ports_vmstate, UHCIState),
diff --git a/hw/usb/host-linux.c b/hw/usb/host-linux.c
index 5479fb5987..d55be878ad 100644
--- a/hw/usb/host-linux.c
+++ b/hw/usb/host-linux.c
@@ -213,7 +213,7 @@ static int is_iso_started(USBHostDevice *s, int pid, int ep)
 
 static void clear_iso_started(USBHostDevice *s, int pid, int ep)
 {
-    trace_usb_host_ep_stop_iso(s->bus_num, s->addr, ep);
+    trace_usb_host_iso_stop(s->bus_num, s->addr, ep);
     get_endp(s, pid, ep)->iso_started = 0;
 }
 
@@ -221,7 +221,7 @@ static void set_iso_started(USBHostDevice *s, int pid, int ep)
 {
     struct endp_data *e = get_endp(s, pid, ep);
 
-    trace_usb_host_ep_start_iso(s->bus_num, s->addr, ep);
+    trace_usb_host_iso_start(s->bus_num, s->addr, ep);
     if (!e->iso_started) {
         e->iso_started = 1;
         e->inflight = 0;
@@ -319,7 +319,8 @@ static void async_complete(void *opaque)
         if (r < 0) {
             if (errno == EAGAIN) {
                 if (urbs > 2) {
-                    fprintf(stderr, "husb: %d iso urbs finished at once\n", urbs);
+                    /* indicates possible latency issues */
+                    trace_usb_host_iso_many_urbs(s->bus_num, s->addr, urbs);
                 }
                 return;
             }
@@ -352,7 +353,8 @@ static void async_complete(void *opaque)
             urbs++;
             inflight = change_iso_inflight(s, pid, ep, -1);
             if (inflight == 0 && is_iso_started(s, pid, ep)) {
-                fprintf(stderr, "husb: out of buffers for iso stream\n");
+                /* can be latency issues, or simply end of stream */
+                trace_usb_host_iso_out_of_bufs(s->bus_num, s->addr, ep);
             }
             continue;
         }
@@ -1136,7 +1138,7 @@ static int usb_linux_update_endp_table(USBHostDevice *s)
     USBDescriptor *d;
     bool active = false;
 
-    usb_ep_init(&s->dev);
+    usb_ep_reset(&s->dev);
 
     for (i = 0;; i += d->bLength) {
         if (i+2 >= s->descr_len) {
@@ -1239,7 +1241,7 @@ static int usb_linux_update_endp_table(USBHostDevice *s)
     return 0;
 
 error:
-    usb_ep_init(&s->dev);
+    usb_ep_reset(&s->dev);
     return 1;
 }
 
@@ -1326,6 +1328,7 @@ static int usb_host_open(USBHostDevice *dev, int bus_num,
         goto fail;
     }
 
+    usb_ep_init(&dev->dev);
     ret = usb_linux_update_endp_table(dev);
     if (ret) {
         goto fail;
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index d949f040d5..10b4fbb3a7 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1033,6 +1033,8 @@ static int usbredir_handle_status(USBRedirDevice *dev,
     case usb_redir_inval:
         WARNING("got invalid param error from usb-host?\n");
         return USB_RET_NAK;
+    case usb_redir_babble:
+        return USB_RET_BABBLE;
     case usb_redir_ioerror:
     case usb_redir_timeout:
     default:
diff --git a/hw/vga.c b/hw/vga.c
index acb3f7d924..f82ced8e66 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -38,6 +38,9 @@
 
 //#define DEBUG_BOCHS_VBE
 
+/* 16 state changes per vertical frame @60 Hz */
+#define VGA_TEXT_CURSOR_PERIOD_MS       (1000 * 2 * 16 / 60)
+
 /*
  * Video Graphics Array (VGA)
  *
@@ -1300,6 +1303,7 @@ static void vga_draw_text(VGACommonState *s, int full_update)
     uint32_t *ch_attr_ptr;
     vga_draw_glyph8_func *vga_draw_glyph8;
     vga_draw_glyph9_func *vga_draw_glyph9;
+    int64_t now = qemu_get_clock_ms(vm_clock);
 
     /* compute font data address (in plane 2) */
     v = s->sr[VGA_SEQ_CHARACTER_MAP];
@@ -1370,6 +1374,10 @@ static void vga_draw_text(VGACommonState *s, int full_update)
         s->cursor_end = s->cr[VGA_CRTC_CURSOR_END];
     }
     cursor_ptr = s->vram_ptr + (s->start_addr + cursor_offset) * 4;
+    if (now >= s->cursor_blink_time) {
+        s->cursor_blink_time = now + VGA_TEXT_CURSOR_PERIOD_MS / 2;
+        s->cursor_visible_phase = !s->cursor_visible_phase;
+    }
 
     depth_index = get_depth_index(s->ds);
     if (cw == 16)
@@ -1390,7 +1398,7 @@ static void vga_draw_text(VGACommonState *s, int full_update)
         cx_max = -1;
         for(cx = 0; cx < width; cx++) {
             ch_attr = *(uint16_t *)src;
-            if (full_update || ch_attr != *ch_attr_ptr) {
+            if (full_update || ch_attr != *ch_attr_ptr || src == cursor_ptr) {
                 if (cx < cx_min)
                     cx_min = cx;
                 if (cx > cx_max)
@@ -1420,7 +1428,8 @@ static void vga_draw_text(VGACommonState *s, int full_update)
                                     font_ptr, cheight, fgcol, bgcol, dup9);
                 }
                 if (src == cursor_ptr &&
-                    !(s->cr[VGA_CRTC_CURSOR_START] & 0x20)) {
+                    !(s->cr[VGA_CRTC_CURSOR_START] & 0x20) &&
+                    s->cursor_visible_phase) {
                     int line_start, line_last, h;
                     /* draw the cursor */
                     line_start = s->cr[VGA_CRTC_CURSOR_START] & 0x1f;
@@ -1884,6 +1893,7 @@ static void vga_update_display(void *opaque)
         }
         if (graphic_mode != s->graphic_mode) {
             s->graphic_mode = graphic_mode;
+            s->cursor_blink_time = qemu_get_clock_ms(vm_clock);
             full_update = 1;
         }
         switch(graphic_mode) {
diff --git a/hw/vga_int.h b/hw/vga_int.h
index 3b38764a38..8938093682 100644
--- a/hw/vga_int.h
+++ b/hw/vga_int.h
@@ -156,6 +156,8 @@ typedef struct VGACommonState {
     uint32_t last_scr_width, last_scr_height; /* in pixels */
     uint32_t last_depth; /* in bits */
     uint8_t cursor_start, cursor_end;
+    bool cursor_visible_phase;
+    int64_t cursor_blink_time;
     uint32_t cursor_offset;
     unsigned int (*rgb_to_pixel)(unsigned int r,
                                  unsigned int g, unsigned b);
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index d048cef50f..dd1a6506cf 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -77,7 +77,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
         size_t offset = 0;
         uint32_t pfn;
 
-        while (iov_to_buf(elem.out_sg, elem.out_num, &pfn, offset, 4) == 4) {
+        while (iov_to_buf(elem.out_sg, elem.out_num, offset, &pfn, 4) == 4) {
             ram_addr_t pa;
             ram_addr_t addr;
 
@@ -118,7 +118,7 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
      */
     reset_stats(s);
 
-    while (iov_to_buf(elem->out_sg, elem->out_num, &stat, offset, sizeof(stat))
+    while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
            == sizeof(stat)) {
         uint16_t tag = tswap16(stat.tag);
         uint64_t val = tswap64(stat.val);
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 3f190d417e..533aa3d0f3 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -656,8 +656,8 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_
         }
 
         /* copy in packet.  ugh */
-        len = iov_from_buf(sg, elem.in_num,
-                           buf + offset, 0, size - offset);
+        len = iov_from_buf(sg, elem.in_num, 0,
+                           buf + offset, size - offset);
         total += len;
         offset += len;
         /* If buffers can't be merged, at this point we
diff --git a/hw/virtio-scsi.c b/hw/virtio-scsi.c
index e1a767ea78..0a5ac40e2f 100644
--- a/hw/virtio-scsi.c
+++ b/hw/virtio-scsi.c
@@ -405,6 +405,10 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
     }
 }
 
+static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
+{
+}
+
 static void virtio_scsi_command_complete(SCSIRequest *r, uint32_t status,
                                          size_t resid)
 {
@@ -609,7 +613,7 @@ VirtIODevice *virtio_scsi_init(DeviceState *dev, VirtIOSCSIConf *proxyconf)
     s->ctrl_vq = virtio_add_queue(&s->vdev, VIRTIO_SCSI_VQ_SIZE,
                                    virtio_scsi_handle_ctrl);
     s->event_vq = virtio_add_queue(&s->vdev, VIRTIO_SCSI_VQ_SIZE,
-                                   NULL);
+                                   virtio_scsi_handle_event);
     for (i = 0; i < s->conf->num_queues; i++) {
         s->cmd_vqs[i] = virtio_add_queue(&s->vdev, VIRTIO_SCSI_VQ_SIZE,
                                          virtio_scsi_handle_cmd);
diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c
index 96382a4ea1..82073f5dc2 100644
--- a/hw/virtio-serial-bus.c
+++ b/hw/virtio-serial-bus.c
@@ -106,8 +106,8 @@ static size_t write_to_port(VirtIOSerialPort *port,
             break;
         }
 
-        len = iov_from_buf(elem.in_sg, elem.in_num,
-                           buf + offset, 0, size - offset);
+        len = iov_from_buf(elem.in_sg, elem.in_num, 0,
+                           buf + offset, size - offset);
         offset += len;
 
         virtqueue_push(vq, &elem, len);
@@ -454,7 +454,7 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq)
     len = 0;
     buf = NULL;
     while (virtqueue_pop(vq, &elem)) {
-        size_t cur_len, copied;
+        size_t cur_len;
 
         cur_len = iov_size(elem.out_sg, elem.out_num);
         /*
@@ -467,9 +467,9 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq)
             buf = g_malloc(cur_len);
             len = cur_len;
         }
-        copied = iov_to_buf(elem.out_sg, elem.out_num, buf, 0, len);
+        iov_to_buf(elem.out_sg, elem.out_num, 0, buf, cur_len);
 
-        handle_control_message(vser, buf, copied);
+        handle_control_message(vser, buf, cur_len);
         virtqueue_push(vq, &elem, 0);
     }
     g_free(buf);
diff --git a/iov.c b/iov.c
index 0f964939d0..b3330610bb 100644
--- a/iov.c
+++ b/iov.c
@@ -7,6 +7,7 @@
  * Author(s):
  *  Anthony Liguori <aliguori@us.ibm.com>
  *  Amit Shah <amit.shah@redhat.com>
+ *  Michael Tokarev <mjt@tls.msk.ru>
  *
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
@@ -17,75 +18,69 @@
 
 #include "iov.h"
 
+#ifdef _WIN32
+# include <windows.h>
+# include <winsock2.h>
+#else
+# include <sys/types.h>
+# include <sys/socket.h>
+#endif
+
 size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt,
-                    const void *buf, size_t iov_off, size_t size)
+                    size_t offset, const void *buf, size_t bytes)
 {
-    size_t iovec_off, buf_off;
+    size_t done;
     unsigned int i;
-
-    iovec_off = 0;
-    buf_off = 0;
-    for (i = 0; i < iov_cnt && size; i++) {
-        if (iov_off < (iovec_off + iov[i].iov_len)) {
-            size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off, size);
-
-            memcpy(iov[i].iov_base + (iov_off - iovec_off), buf + buf_off, len);
-
-            buf_off += len;
-            iov_off += len;
-            size -= len;
+    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+        if (offset < iov[i].iov_len) {
+            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+            memcpy(iov[i].iov_base + offset, buf + done, len);
+            done += len;
+            offset = 0;
+        } else {
+            offset -= iov[i].iov_len;
         }
-        iovec_off += iov[i].iov_len;
     }
-    return buf_off;
+    assert(offset == 0);
+    return done;
 }
 
 size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
-                  void *buf, size_t iov_off, size_t size)
+                  size_t offset, void *buf, size_t bytes)
 {
-    uint8_t *ptr;
-    size_t iovec_off, buf_off;
+    size_t done;
     unsigned int i;
-
-    ptr = buf;
-    iovec_off = 0;
-    buf_off = 0;
-    for (i = 0; i < iov_cnt && size; i++) {
-        if (iov_off < (iovec_off + iov[i].iov_len)) {
-            size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size);
-
-            memcpy(ptr + buf_off, iov[i].iov_base + (iov_off - iovec_off), len);
-
-            buf_off += len;
-            iov_off += len;
-            size -= len;
+    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+        if (offset < iov[i].iov_len) {
+            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+            memcpy(buf + done, iov[i].iov_base + offset, len);
+            done += len;
+            offset = 0;
+        } else {
+            offset -= iov[i].iov_len;
         }
-        iovec_off += iov[i].iov_len;
     }
-    return buf_off;
+    assert(offset == 0);
+    return done;
 }
 
-size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt,
-                 size_t iov_off, size_t size)
+size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
+                  size_t offset, int fillc, size_t bytes)
 {
-    size_t iovec_off, buf_off;
+    size_t done;
     unsigned int i;
-
-    iovec_off = 0;
-    buf_off = 0;
-    for (i = 0; i < iov_cnt && size; i++) {
-        if (iov_off < (iovec_off + iov[i].iov_len)) {
-            size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size);
-
-            memset(iov[i].iov_base + (iov_off - iovec_off), 0, len);
-
-            buf_off += len;
-            iov_off += len;
-            size -= len;
+    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+        if (offset < iov[i].iov_len) {
+            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+            memset(iov[i].iov_base + offset, fillc, len);
+            done += len;
+            offset = 0;
+        } else {
+            offset -= iov[i].iov_len;
         }
-        iovec_off += iov[i].iov_len;
     }
-    return buf_off;
+    assert(offset == 0);
+    return done;
 }
 
 size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
@@ -100,6 +95,102 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
     return len;
 }
 
+/* helper function for iov_send_recv() */
+static ssize_t
+do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
+{
+#if defined CONFIG_IOVEC && defined CONFIG_POSIX
+    ssize_t ret;
+    struct msghdr msg;
+    memset(&msg, 0, sizeof(msg));
+    msg.msg_iov = iov;
+    msg.msg_iovlen = iov_cnt;
+    do {
+        ret = do_send
+            ? sendmsg(sockfd, &msg, 0)
+            : recvmsg(sockfd, &msg, 0);
+    } while (ret < 0 && errno == EINTR);
+    return ret;
+#else
+    /* else send piece-by-piece */
+    /*XXX Note: windows has WSASend() and WSARecv() */
+    unsigned i = 0;
+    ssize_t ret = 0;
+    while (i < iov_cnt) {
+        ssize_t r = do_send
+            ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
+            : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
+        if (r > 0) {
+            ret += r;
+        } else if (!r) {
+            break;
+        } else if (errno == EINTR) {
+            continue;
+        } else {
+            /* else it is some "other" error,
+             * only return if there was no data processed. */
+            if (ret == 0) {
+                ret = -1;
+            }
+            break;
+        }
+        i++;
+    }
+    return ret;
+#endif
+}
+
+ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+                      size_t offset, size_t bytes,
+                      bool do_send)
+{
+    ssize_t ret;
+    unsigned si, ei;            /* start and end indexes */
+
+    /* Find the start position, skipping `offset' bytes:
+     * first, skip all full-sized vector elements, */
+    for (si = 0; si < iov_cnt && offset >= iov[si].iov_len; ++si) {
+        offset -= iov[si].iov_len;
+    }
+    if (offset) {
+        assert(si < iov_cnt);
+        /* second, skip `offset' bytes from the (now) first element,
+         * undo it on exit */
+        iov[si].iov_base += offset;
+        iov[si].iov_len -= offset;
+    }
+    /* Find the end position skipping `bytes' bytes: */
+    /* first, skip all full-sized elements */
+    for (ei = si; ei < iov_cnt && iov[ei].iov_len <= bytes; ++ei) {
+        bytes -= iov[ei].iov_len;
+    }
+    if (bytes) {
+        /* second, fixup the last element, and remember
+         * the length we've cut from the end of it in `bytes' */
+        size_t tail;
+        assert(ei < iov_cnt);
+        assert(iov[ei].iov_len > bytes);
+        tail = iov[ei].iov_len - bytes;
+        iov[ei].iov_len = bytes;
+        bytes = tail;  /* bytes is now equal to the tail size */
+        ++ei;
+    }
+
+    ret = do_send_recv(sockfd, iov + si, ei - si, do_send);
+
+    /* Undo the changes above */
+    if (offset) {
+        iov[si].iov_base -= offset;
+        iov[si].iov_len += offset;
+    }
+    if (bytes) {
+        iov[ei-1].iov_len += bytes;
+    }
+
+    return ret;
+}
+
+
 void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
                  FILE *fp, const char *prefix, size_t limit)
 {
diff --git a/iov.h b/iov.h
index 94d2f78284..381f37a546 100644
--- a/iov.h
+++ b/iov.h
@@ -1,10 +1,11 @@
 /*
- * Helpers for getting linearized buffers from iov / filling buffers into iovs
+ * Helpers for using (partial) iovecs.
  *
  * Copyright (C) 2010 Red Hat, Inc.
  *
  * Author(s):
  *  Amit Shah <amit.shah@redhat.com>
+ *  Michael Tokarev <mjt@tls.msk.ru>
  *
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
@@ -12,12 +13,76 @@
 
 #include "qemu-common.h"
 
+/**
+ * count and return data size, in bytes, of an iovec
+ * starting at `iov' of `iov_cnt' number of elements.
+ */
+size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt);
+
+/**
+ * Copy from single continuous buffer to scatter-gather vector of buffers
+ * (iovec) and back like memcpy() between two continuous memory regions.
+ * Data in single continuous buffer starting at address `buf' and
+ * `bytes' bytes long will be copied to/from an iovec `iov' with
+ * `iov_cnt' number of elements, starting at byte position `offset'
+ * within the iovec.  If the iovec does not contain enough space,
+ * only part of data will be copied, up to the end of the iovec.
+ * Number of bytes actually copied will be returned, which is
+ *  min(bytes, iov_size(iov)-offset)
+ * `Offset' must point to the inside of iovec.
+ * It is okay to use very large value for `bytes' since we're
+ * limited by the size of the iovec anyway, provided that the
+ * buffer pointed to by buf has enough space.  One possible
+ * such "large" value is -1 (sinice size_t is unsigned),
+ * so specifying `-1' as `bytes' means 'up to the end of iovec'.
+ */
 size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt,
-                    const void *buf, size_t iov_off, size_t size);
+                    size_t offset, const void *buf, size_t bytes);
 size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
-                  void *buf, size_t iov_off, size_t size);
-size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt);
-size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt,
-                 size_t iov_off, size_t size);
+                  size_t offset, void *buf, size_t bytes);
+
+/**
+ * Set data bytes pointed out by iovec `iov' of size `iov_cnt' elements,
+ * starting at byte offset `start', to value `fillc', repeating it
+ * `bytes' number of times.  `Offset' must point to the inside of iovec.
+ * If `bytes' is large enough, only last bytes portion of iovec,
+ * up to the end of it, will be filled with the specified value.
+ * Function return actual number of bytes processed, which is
+ * min(size, iov_size(iov) - offset).
+ * Again, it is okay to use large value for `bytes' to mean "up to the end".
+ */
+size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
+                  size_t offset, int fillc, size_t bytes);
+
+/*
+ * Send/recv data from/to iovec buffers directly
+ *
+ * `offset' bytes in the beginning of iovec buffer are skipped and
+ * next `bytes' bytes are used, which must be within data of iovec.
+ *
+ *   r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true);
+ *
+ * is logically equivalent to
+ *
+ *   char *buf = malloc(bytes);
+ *   iov_to_buf(iov, iovcnt, offset, buf, bytes);
+ *   r = send(sockfd, buf, bytes, 0);
+ *   free(buf);
+ *
+ * For iov_send_recv() _whole_ area being sent or received
+ * should be within the iovec, not only beginning of it.
+ */
+ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+                      size_t offset, size_t bytes, bool do_send);
+#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \
+  iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false)
+#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \
+  iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true)
+
+/**
+ * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements
+ * in file `fp', prefixing each line with `prefix' and processing not more
+ * than `limit' data bytes.
+ */
 void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
                  FILE *fp, const char *prefix, size_t limit);
diff --git a/kvm-all.c b/kvm-all.c
index add24a14a9..2148b20bdb 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1672,6 +1672,19 @@ int kvm_allows_irq0_override(void)
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
 }
 
+void *kvm_vmalloc(ram_addr_t size)
+{
+#ifdef TARGET_S390X
+    void *mem;
+
+    mem = kvm_arch_vmalloc(size);
+    if (mem) {
+        return mem;
+    }
+#endif
+    return qemu_vmalloc(size);
+}
+
 void kvm_setup_guest_memory(void *start, size_t size)
 {
     if (!kvm_has_sync_mmu()) {
diff --git a/kvm.h b/kvm.h
index 99003f459a..2617dd5acd 100644
--- a/kvm.h
+++ b/kvm.h
@@ -70,6 +70,8 @@ int kvm_init_vcpu(CPUArchState *env);
 int kvm_cpu_exec(CPUArchState *env);
 
 #if !defined(CONFIG_USER_ONLY)
+void *kvm_vmalloc(ram_addr_t size);
+void *kvm_arch_vmalloc(ram_addr_t size);
 void kvm_setup_guest_memory(void *start, size_t size);
 
 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
diff --git a/linux-aio.c b/linux-aio.c
index fa0fbf34aa..ce9b5d4be8 100644
--- a/linux-aio.c
+++ b/linux-aio.c
@@ -63,8 +63,8 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
         } else if (ret >= 0) {
             /* Short reads mean EOF, pad with zeros. */
             if (laiocb->is_read) {
-                qemu_iovec_memset_skip(laiocb->qiov, 0,
-                    laiocb->qiov->size - ret, ret);
+                qemu_iovec_memset(laiocb->qiov, ret, 0,
+                    laiocb->qiov->size - ret);
             } else {
                 ret = -EINVAL;
             }
diff --git a/migration.c b/migration.c
index 3f485d33a5..8db1b433f0 100644
--- a/migration.c
+++ b/migration.c
@@ -131,6 +131,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
         info->ram->transferred = ram_bytes_transferred();
         info->ram->remaining = ram_bytes_remaining();
         info->ram->total = ram_bytes_total();
+        info->ram->total_time = qemu_get_clock_ms(rt_clock)
+            - s->total_time;
 
         if (blk_mig_active()) {
             info->has_disk = true;
@@ -143,6 +145,13 @@ MigrationInfo *qmp_query_migrate(Error **errp)
     case MIG_STATE_COMPLETED:
         info->has_status = true;
         info->status = g_strdup("completed");
+
+        info->has_ram = true;
+        info->ram = g_malloc0(sizeof(*info->ram));
+        info->ram->transferred = ram_bytes_transferred();
+        info->ram->remaining = 0;
+        info->ram->total = ram_bytes_total();
+        info->ram->total_time = s->total_time;
         break;
     case MIG_STATE_ERROR:
         info->has_status = true;
@@ -260,6 +269,7 @@ static void migrate_fd_put_ready(void *opaque)
         } else {
             migrate_fd_completed(s);
         }
+        s->total_time = qemu_get_clock_ms(rt_clock) - s->total_time;
         if (s->state != MIG_STATE_COMPLETED) {
             if (old_vm_running) {
                 vm_start();
@@ -352,7 +362,7 @@ void migrate_fd_connect(MigrationState *s)
                                       migrate_fd_close);
 
     DPRINTF("beginning savevm\n");
-    ret = qemu_savevm_state_begin(s->file, s->blk, s->shared);
+    ret = qemu_savevm_state_begin(s->file, &s->params);
     if (ret < 0) {
         DPRINTF("failed, %d\n", ret);
         migrate_fd_error(s);
@@ -361,18 +371,18 @@ void migrate_fd_connect(MigrationState *s)
     migrate_fd_put_ready(s);
 }
 
-static MigrationState *migrate_init(int blk, int inc)
+static MigrationState *migrate_init(const MigrationParams *params)
 {
     MigrationState *s = migrate_get_current();
     int64_t bandwidth_limit = s->bandwidth_limit;
 
     memset(s, 0, sizeof(*s));
     s->bandwidth_limit = bandwidth_limit;
-    s->blk = blk;
-    s->shared = inc;
+    s->params = *params;
 
     s->bandwidth_limit = bandwidth_limit;
     s->state = MIG_STATE_SETUP;
+    s->total_time = qemu_get_clock_ms(rt_clock);
 
     return s;
 }
@@ -394,9 +404,13 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
                  Error **errp)
 {
     MigrationState *s = migrate_get_current();
+    MigrationParams params;
     const char *p;
     int ret;
 
+    params.blk = blk;
+    params.shared = inc;
+
     if (s->state == MIG_STATE_ACTIVE) {
         error_set(errp, QERR_MIGRATION_ACTIVE);
         return;
@@ -411,7 +425,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
         return;
     }
 
-    s = migrate_init(blk, inc);
+    s = migrate_init(&params);
 
     if (strstart(uri, "tcp:", &p)) {
         ret = tcp_start_outgoing_migration(s, p, errp);
diff --git a/migration.h b/migration.h
index 2e9ca2edf2..de13004573 100644
--- a/migration.h
+++ b/migration.h
@@ -19,6 +19,11 @@
 #include "notify.h"
 #include "error.h"
 
+struct MigrationParams {
+    bool blk;
+    bool shared;
+};
+
 typedef struct MigrationState MigrationState;
 
 struct MigrationState
@@ -31,8 +36,8 @@ struct MigrationState
     int (*close)(MigrationState *s);
     int (*write)(MigrationState *s, const void *buff, size_t size);
     void *opaque;
-    int blk;
-    int shared;
+    MigrationParams params;
+    int64_t total_time;
 };
 
 void process_incoming_migration(QEMUFile *f);
diff --git a/monitor.c b/monitor.c
index f6107badb6..09aa3cdf52 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1262,45 +1262,24 @@ static void do_print(Monitor *mon, const QDict *qdict)
     int format = qdict_get_int(qdict, "format");
     target_phys_addr_t val = qdict_get_int(qdict, "val");
 
-#if TARGET_PHYS_ADDR_BITS == 32
     switch(format) {
     case 'o':
-        monitor_printf(mon, "%#o", val);
+        monitor_printf(mon, "%#" TARGET_PRIoPHYS, val);
         break;
     case 'x':
-        monitor_printf(mon, "%#x", val);
+        monitor_printf(mon, "%#" TARGET_PRIxPHYS, val);
         break;
     case 'u':
-        monitor_printf(mon, "%u", val);
+        monitor_printf(mon, "%" TARGET_PRIuPHYS, val);
         break;
     default:
     case 'd':
-        monitor_printf(mon, "%d", val);
+        monitor_printf(mon, "%" TARGET_PRIdPHYS, val);
         break;
     case 'c':
         monitor_printc(mon, val);
         break;
     }
-#else
-    switch(format) {
-    case 'o':
-        monitor_printf(mon, "%#" PRIo64, val);
-        break;
-    case 'x':
-        monitor_printf(mon, "%#" PRIx64, val);
-        break;
-    case 'u':
-        monitor_printf(mon, "%" PRIu64, val);
-        break;
-    default:
-    case 'd':
-        monitor_printf(mon, "%" PRId64, val);
-        break;
-    case 'c':
-        monitor_printc(mon, val);
-        break;
-    }
-#endif
     monitor_printf(mon, "\n");
 }
 
@@ -2328,48 +2307,45 @@ static void do_inject_mce(Monitor *mon, const QDict *qdict)
 }
 #endif
 
-static int do_getfd(Monitor *mon, const QDict *qdict, QObject **ret_data)
+void qmp_getfd(const char *fdname, Error **errp)
 {
-    const char *fdname = qdict_get_str(qdict, "fdname");
     mon_fd_t *monfd;
     int fd;
 
-    fd = qemu_chr_fe_get_msgfd(mon->chr);
+    fd = qemu_chr_fe_get_msgfd(cur_mon->chr);
     if (fd == -1) {
-        qerror_report(QERR_FD_NOT_SUPPLIED);
-        return -1;
+        error_set(errp, QERR_FD_NOT_SUPPLIED);
+        return;
     }
 
     if (qemu_isdigit(fdname[0])) {
-        qerror_report(QERR_INVALID_PARAMETER_VALUE, "fdname",
-                      "a name not starting with a digit");
-        return -1;
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdname",
+                  "a name not starting with a digit");
+        return;
     }
 
-    QLIST_FOREACH(monfd, &mon->fds, next) {
+    QLIST_FOREACH(monfd, &cur_mon->fds, next) {
         if (strcmp(monfd->name, fdname) != 0) {
             continue;
         }
 
         close(monfd->fd);
         monfd->fd = fd;
-        return 0;
+        return;
     }
 
     monfd = g_malloc0(sizeof(mon_fd_t));
     monfd->name = g_strdup(fdname);
     monfd->fd = fd;
 
-    QLIST_INSERT_HEAD(&mon->fds, monfd, next);
-    return 0;
+    QLIST_INSERT_HEAD(&cur_mon->fds, monfd, next);
 }
 
-static int do_closefd(Monitor *mon, const QDict *qdict, QObject **ret_data)
+void qmp_closefd(const char *fdname, Error **errp)
 {
-    const char *fdname = qdict_get_str(qdict, "fdname");
     mon_fd_t *monfd;
 
-    QLIST_FOREACH(monfd, &mon->fds, next) {
+    QLIST_FOREACH(monfd, &cur_mon->fds, next) {
         if (strcmp(monfd->name, fdname) != 0) {
             continue;
         }
@@ -2378,11 +2354,10 @@ static int do_closefd(Monitor *mon, const QDict *qdict, QObject **ret_data)
         close(monfd->fd);
         g_free(monfd->name);
         g_free(monfd);
-        return 0;
+        return;
     }
 
-    qerror_report(QERR_FD_NOT_FOUND, fdname);
-    return -1;
+    error_set(errp, QERR_FD_NOT_FOUND, fdname);
 }
 
 static void do_loadvm(Monitor *mon, const QDict *qdict)
diff --git a/net.c b/net.c
index 4aa416cffb..abf0fd0a0d 100644
--- a/net.c
+++ b/net.c
@@ -544,7 +544,7 @@ static ssize_t vc_sendv_compat(VLANClientState *vc, const struct iovec *iov,
     uint8_t buffer[4096];
     size_t offset;
 
-    offset = iov_to_buf(iov, iovcnt, buffer, 0, sizeof(buffer));
+    offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer));
 
     return vc->info->receive(vc, buffer, offset);
 }
diff --git a/net/slirp.c b/net/slirp.c
index 37b6ccfde9..b82eab0a07 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -26,6 +26,7 @@
 #include "config-host.h"
 
 #ifndef _WIN32
+#include <pwd.h>
 #include <sys/wait.h>
 #endif
 #include "net.h"
@@ -487,8 +488,27 @@ static int slirp_smb(SlirpState* s, const char *exported_dir,
     static int instance;
     char smb_conf[128];
     char smb_cmdline[128];
+    struct passwd *passwd;
     FILE *f;
 
+    passwd = getpwuid(geteuid());
+    if (!passwd) {
+        error_report("failed to retrieve user name");
+        return -1;
+    }
+
+    if (access(CONFIG_SMBD_COMMAND, F_OK)) {
+        error_report("could not find '%s', please install it",
+                     CONFIG_SMBD_COMMAND);
+        return -1;
+    }
+
+    if (access(exported_dir, R_OK | X_OK)) {
+        error_report("error accessing shared directory '%s': %s",
+                     exported_dir, strerror(errno));
+        return -1;
+    }
+
     snprintf(s->smb_dir, sizeof(s->smb_dir), "/tmp/qemu-smb.%ld-%d",
              (long)getpid(), instance++);
     if (mkdir(s->smb_dir, 0700) < 0) {
@@ -517,14 +537,16 @@ static int slirp_smb(SlirpState* s, const char *exported_dir,
             "[qemu]\n"
             "path=%s\n"
             "read only=no\n"
-            "guest ok=yes\n",
+            "guest ok=yes\n"
+            "force user=%s\n",
             s->smb_dir,
             s->smb_dir,
             s->smb_dir,
             s->smb_dir,
             s->smb_dir,
             s->smb_dir,
-            exported_dir
+            exported_dir,
+            passwd->pw_name
             );
     fclose(f);
 
@@ -616,25 +638,35 @@ static int slirp_guestfwd(SlirpState *s, const char *config_str,
 
     fwd = g_malloc(sizeof(struct GuestFwd));
     snprintf(buf, sizeof(buf), "guestfwd.tcp.%d", port);
-    fwd->hd = qemu_chr_new(buf, p, NULL);
-    if (!fwd->hd) {
-        error_report("could not open guest forwarding device '%s'", buf);
-        g_free(fwd);
-        return -1;
-    }
 
-    if (slirp_add_exec(s->slirp, 3, fwd->hd, &server, port) < 0) {
-        error_report("conflicting/invalid host:port in guest forwarding "
-                     "rule '%s'", config_str);
-        g_free(fwd);
-        return -1;
-    }
-    fwd->server = server;
-    fwd->port = port;
-    fwd->slirp = s->slirp;
+    if ((strlen(p) > 4) && !strncmp(p, "cmd:", 4)) {
+        if (slirp_add_exec(s->slirp, 0, &p[4], &server, port) < 0) {
+            error_report("conflicting/invalid host:port in guest forwarding "
+                         "rule '%s'", config_str);
+            g_free(fwd);
+            return -1;
+        }
+    } else {
+        fwd->hd = qemu_chr_new(buf, p, NULL);
+        if (!fwd->hd) {
+            error_report("could not open guest forwarding device '%s'", buf);
+            g_free(fwd);
+            return -1;
+        }
 
-    qemu_chr_add_handlers(fwd->hd, guestfwd_can_read, guestfwd_read,
-                          NULL, fwd);
+        if (slirp_add_exec(s->slirp, 3, fwd->hd, &server, port) < 0) {
+            error_report("conflicting/invalid host:port in guest forwarding "
+                         "rule '%s'", config_str);
+            g_free(fwd);
+            return -1;
+        }
+        fwd->server = server;
+        fwd->port = port;
+        fwd->slirp = s->slirp;
+
+        qemu_chr_add_handlers(fwd->hd, guestfwd_can_read, guestfwd_read,
+                              NULL, fwd);
+    }
     return 0;
 
  fail_syntax:
diff --git a/oslib-posix.c b/oslib-posix.c
index 6b7ba646c7..dbeb6272b8 100644
--- a/oslib-posix.c
+++ b/oslib-posix.c
@@ -41,6 +41,9 @@ extern int daemon(int, int);
       therefore we need special code which handles running on Valgrind. */
 #  define QEMU_VMALLOC_ALIGN (512 * 4096)
 #  define CONFIG_VALGRIND
+#elif defined(__linux__) && defined(__s390x__)
+   /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
+#  define QEMU_VMALLOC_ALIGN (256 * 4096)
 #else
 #  define QEMU_VMALLOC_ALIGN getpagesize()
 #endif
diff --git a/pc-bios/keymaps/fi b/pc-bios/keymaps/fi
index 2a4e0f0454..4be75865a9 100644
--- a/pc-bios/keymaps/fi
+++ b/pc-bios/keymaps/fi
@@ -99,9 +99,7 @@ asterisk 0x2b shift
 acute 0x2b altgr
 multiply 0x2b shift altgr
 guillemotleft 0x2c altgr
-less 0x2c shift altgr
 guillemotright 0x2d altgr
-greater 0x2d shift altgr
 copyright 0x2e altgr
 leftdoublequotemark 0x2f altgr
 grave 0x2f shift altgr
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 68361f555a..96e4daf505 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -29,6 +29,7 @@
 #include "qemu-common.h"
 #include "trace.h"
 #include "block_int.h"
+#include "iov.h"
 
 #include "block/raw-posix-aio.h"
 
@@ -351,11 +352,8 @@ static void *aio_thread(void *unused)
             if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
                 /* A short read means that we have reached EOF. Pad the buffer
                  * with zeros for bytes after EOF. */
-                QEMUIOVector qiov;
-
-                qemu_iovec_init_external(&qiov, aiocb->aio_iov,
-                                         aiocb->aio_niov);
-                qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - ret, ret);
+                iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
+                           0, aiocb->aio_nbytes - ret);
 
                 ret = aiocb->aio_nbytes;
             }
diff --git a/qapi-schema.json b/qapi-schema.json
index 3b6e3468b4..a92adb1d5b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -260,10 +260,15 @@
 #
 # @total: total amount of bytes involved in the migration process
 #
+# @total_time: tota0l amount of ms since migration started.  If
+#        migration has ended, it returns the total migration
+#        time. (since 1.2)
+#
 # Since: 0.14.0.
 ##
 { 'type': 'MigrationStats',
-  'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' } }
+  'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' ,
+           'total_time': 'int' } }
 
 ##
 # @MigrationInfo
@@ -275,8 +280,9 @@
 #          'cancelled'. If this field is not returned, no migration process
 #          has been initiated
 #
-# @ram: #optional @MigrationStats containing detailed migration status,
-#       only returned if status is 'active'
+# @ram: #optional @MigrationStats containing detailed migration
+#       status, only returned if status is 'active' or
+#       'completed'. 'comppleted' (since 1.2)
 #
 # @disk: #optional @MigrationStats containing detailed disk migration
 #        status, only returned if status is 'active' and it is a block
@@ -1783,34 +1789,36 @@
 #
 # Dump guest's memory to vmcore. It is a synchronous operation that can take
 # very long depending on the amount of guest memory. This command is only
-# supported only on i386 and x86_64
-#
-# @paging: if true, do paging to get guest's memory mapping. The @paging's
-# default value of @paging is false, If you want to use gdb to process the
-# core, please set @paging to true. The reason why the @paging's value is
-# false:
-#   1. guest machine in a catastrophic state can have corrupted memory,
-#      which we cannot trust.
-#   2. The guest machine can be in read-mode even if paging is enabled.
-#      For example: the guest machine uses ACPI to sleep, and ACPI sleep
-#      state goes in real-mode
+# supported on i386 and x86_64.
+#
+# @paging: if true, do paging to get guest's memory mapping. This allows
+# using gdb to process the core file. However, setting @paging to false
+# may be desirable because of two reasons:
+#
+#   1. The guest may be in a catastrophic state or can have corrupted
+#      memory, which cannot be trusted
+#   2. The guest can be in real-mode even if paging is enabled. For example,
+#      the guest uses ACPI to sleep, and ACPI sleep state goes in real-mode
+#
 # @protocol: the filename or file descriptor of the vmcore. The supported
-# protocol can be file or fd:
+# protocols are:
+#
 #   1. file: the protocol starts with "file:", and the following string is
 #      the file's path.
 #   2. fd: the protocol starts with "fd:", and the following string is the
 #      fd's name.
+#
 # @begin: #optional if specified, the starting physical address.
+#
 # @length: #optional if specified, the memory size, in bytes. If you don't
-# want to dump all guest's memory, please specify the start @begin and
-# @length
+# want to dump all guest's memory, please specify the start @begin and @length
 #
 # Returns: nothing on success
 #          If @begin contains an invalid address, InvalidParameter
 #          If only one of @begin and @length is specified, MissingParameter
 #          If @protocol stats with "fd:", and the fd cannot be found, FdNotFound
 #          If @protocol starts with "file:", and the file cannot be
-#          opened, OpenFileFailed
+#             opened, OpenFileFailed
 #          If @protocol does not start with "fd:" or "file:", InvalidParameter
 #          If an I/O error occurs while writing the file, IOError
 #          If the target does not support this command, Unsupported
@@ -1862,3 +1870,38 @@
 # Since: 0.14.0
 ##
 { 'command': 'netdev_del', 'data': {'id': 'str'} }
+
+##
+# @getfd:
+#
+# Receive a file descriptor via SCM rights and assign it a name
+#
+# @fdname: file descriptor name
+#
+# Returns: Nothing on success
+#          If file descriptor was not received, FdNotSupplied
+#          If @fdname is not valid, InvalidParameterType
+#
+# Since: 0.14.0
+#
+# Notes: If @fdname already exists, the file descriptor assigned to
+#        it will be closed and replaced by the received file
+#        descriptor.
+#        The 'closefd' command can be used to explicitly close the
+#        file descriptor when it is no longer needed.
+##
+{ 'command': 'getfd', 'data': {'fdname': 'str'} }
+
+##
+# @closefd:
+#
+# Close a file descriptor previously passed via SCM rights
+#
+# @fdname: file descriptor name
+#
+# Returns: Nothing on success
+#          If @fdname is not found, FdNotFound
+#
+# Since: 0.14.0
+##
+{ 'command': 'closefd', 'data': {'fdname': 'str'} }
diff --git a/qapi/qapi-visit-core.c b/qapi/qapi-visit-core.c
index ffffbf79aa..705eca90aa 100644
--- a/qapi/qapi-visit-core.c
+++ b/qapi/qapi-visit-core.c
@@ -298,7 +298,7 @@ void input_type_enum(Visitor *v, int *obj, const char *strings[],
     }
 
     if (strings[value] == NULL) {
-        error_set(errp, QERR_INVALID_PARAMETER, name ? name : "null");
+        error_set(errp, QERR_INVALID_PARAMETER, enum_str);
         g_free(enum_str);
         return;
     }
diff --git a/qemu-common.h b/qemu-common.h
index 9d9e603c6e..09676f529f 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -17,6 +17,7 @@ typedef struct DeviceState DeviceState;
 
 struct Monitor;
 typedef struct Monitor Monitor;
+typedef struct MigrationParams MigrationParams;
 
 /* we put basic includes here to avoid repeating them in device drivers */
 #include <stdlib.h>
@@ -205,9 +206,6 @@ int qemu_pipe(int pipefd[2]);
 #define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags)
 #endif
 
-int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset);
-int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset);
-
 /* Error handling.  */
 
 void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
@@ -311,32 +309,29 @@ struct qemu_work_item {
 void qemu_init_vcpu(void *env);
 #endif
 
-/**
- * Sends an iovec (or optionally a part of it) down a socket, yielding
- * when the socket is full.
- */
-int qemu_co_sendv(int sockfd, struct iovec *iov,
-                  int len, int iov_offset);
-
-/**
- * Receives data into an iovec (or optionally into a part of it) from
- * a socket, yielding when there is no data in the socket.
- */
-int qemu_co_recvv(int sockfd, struct iovec *iov,
-                  int len, int iov_offset);
-
 
 /**
- * Sends a buffer down a socket, yielding when the socket is full.
+ * Sends a (part of) iovec down a socket, yielding when the socket is full, or
+ * Receives data into a (part of) iovec from a socket,
+ * yielding when there is no data in the socket.
+ * The same interface as qemu_sendv_recvv(), with added yielding.
+ * XXX should mark these as coroutine_fn
  */
-int qemu_co_send(int sockfd, void *buf, int len);
+ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+                            size_t offset, size_t bytes, bool do_send);
+#define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \
+  qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false)
+#define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \
+  qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true)
 
 /**
- * Receives data into a buffer from a socket, yielding when there
- * is no data in the socket.
+ * The same as above, but with just a single buffer
  */
-int qemu_co_recv(int sockfd, void *buf, int len);
-
+ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send);
+#define qemu_co_recv(sockfd, buf, bytes) \
+  qemu_co_send_recv(sockfd, buf, bytes, false)
+#define qemu_co_send(sockfd, buf, bytes) \
+  qemu_co_send_recv(sockfd, buf, bytes, true)
 
 typedef struct QEMUIOVector {
     struct iovec *iov;
@@ -348,16 +343,16 @@ typedef struct QEMUIOVector {
 void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
 void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
 void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
-void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip,
-    size_t size);
-void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size);
+void qemu_iovec_concat(QEMUIOVector *dst,
+                       QEMUIOVector *src, size_t soffset, size_t sbytes);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
-void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf);
-void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count);
-void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count);
-void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count,
-                            size_t skip);
+size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
+                         void *buf, size_t bytes);
+size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
+                           const void *buf, size_t bytes);
+size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
+                         int fillc, size_t bytes);
 
 bool buffer_is_zero(const void *buf, size_t len);
 
diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c
index 40fd514395..5734965003 100644
--- a/qemu-coroutine-io.c
+++ b/qemu-coroutine-io.c
@@ -25,72 +25,41 @@
 #include "qemu-common.h"
 #include "qemu_socket.h"
 #include "qemu-coroutine.h"
+#include "iov.h"
 
-int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov,
-                               int len, int iov_offset)
+ssize_t coroutine_fn
+qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+                    size_t offset, size_t bytes, bool do_send)
 {
-    int total = 0;
-    int ret;
-    while (len) {
-        ret = qemu_recvv(sockfd, iov, len, iov_offset + total);
-        if (ret < 0) {
+    size_t done = 0;
+    ssize_t ret;
+    while (done < bytes) {
+        ret = iov_send_recv(sockfd, iov, iov_cnt,
+                            offset + done, bytes - done, do_send);
+        if (ret > 0) {
+            done += ret;
+        } else if (ret < 0) {
             if (errno == EAGAIN) {
                 qemu_coroutine_yield();
-                continue;
-            }
-            if (total == 0) {
-                total = -1;
-            }
-            break;
-        }
-        if (ret == 0) {
-            break;
-        }
-        total += ret, len -= ret;
-    }
-
-    return total;
-}
-
-int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov,
-                               int len, int iov_offset)
-{
-    int total = 0;
-    int ret;
-    while (len) {
-        ret = qemu_sendv(sockfd, iov, len, iov_offset + total);
-        if (ret < 0) {
-            if (errno == EAGAIN) {
-                qemu_coroutine_yield();
-                continue;
-            }
-            if (total == 0) {
-                total = -1;
+            } else if (done == 0) {
+                return -1;
+            } else {
+                break;
             }
+        } else if (ret == 0 && !do_send) {
+            /* write (send) should never return 0.
+             * read (recv) returns 0 for end-of-file (-data).
+             * In both cases there's little point retrying,
+             * but we do for write anyway, just in case */
             break;
         }
-        total += ret, len -= ret;
     }
-
-    return total;
+    return done;
 }
 
-int coroutine_fn qemu_co_recv(int sockfd, void *buf, int len)
+ssize_t coroutine_fn
+qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send)
 {
-    struct iovec iov;
-
-    iov.iov_base = buf;
-    iov.iov_len = len;
-
-    return qemu_co_recvv(sockfd, &iov, len, 0);
-}
-
-int coroutine_fn qemu_co_send(int sockfd, void *buf, int len)
-{
-    struct iovec iov;
-
-    iov.iov_base = buf;
-    iov.iov_len = len;
-
-    return qemu_co_sendv(sockfd, &iov, len, 0);
+    struct iovec iov = { .iov_base = buf, .iov_len = bytes };
+    return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send);
 }
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 0af0ff45c2..84dad19579 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -78,7 +78,7 @@ to ease cross-compilation and cross-debugging.
 
 @end itemize
 
-QEMU can run without an host kernel driver and yet gives acceptable
+QEMU can run without a host kernel driver and yet gives acceptable
 performance.
 
 For system emulation, the following hardware targets are supported:
diff --git a/qemu-log.c b/qemu-log.c
index 1ec70e7e83..396aafdf62 100644
--- a/qemu-log.c
+++ b/qemu-log.c
@@ -52,7 +52,7 @@ void qemu_log_mask(int mask, const char *fmt, ...)
 }
 
 /* enable or disable low levels log */
-void cpu_set_log(int log_flags)
+void qemu_set_log(int log_flags, bool use_own_buffers)
 {
     qemu_loglevel = log_flags;
     if (qemu_loglevel && !qemu_logfile) {
@@ -61,19 +61,20 @@ void cpu_set_log(int log_flags)
             perror(logfilename);
             _exit(1);
         }
-#if !defined(CONFIG_SOFTMMU)
         /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
-        {
+        if (use_own_buffers) {
             static char logfile_buf[4096];
+
             setvbuf(qemu_logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
-        }
-#elif defined(_WIN32)
-        /* Win32 doesn't support line-buffering, so use unbuffered output. */
-        setvbuf(qemu_logfile, NULL, _IONBF, 0);
+        } else {
+#if defined(_WIN32)
+            /* Win32 doesn't support line-buffering, so use unbuffered output. */
+            setvbuf(qemu_logfile, NULL, _IONBF, 0);
 #else
-        setvbuf(qemu_logfile, NULL, _IOLBF, 0);
+            setvbuf(qemu_logfile, NULL, _IOLBF, 0);
 #endif
-        log_append = 1;
+            log_append = 1;
+        }
     }
     if (!qemu_loglevel && qemu_logfile) {
         fclose(qemu_logfile);
@@ -99,10 +100,7 @@ const CPULogItem cpu_log_items[] = {
     { CPU_LOG_TB_OP, "op",
       "show micro ops for each compiled TB" },
     { CPU_LOG_TB_OP_OPT, "op_opt",
-      "show micro ops "
-#ifdef TARGET_I386
-      "before eflags optimization and "
-#endif
+      "show micro ops (x86 only: before eflags optimization) and\n"
       "after liveness analysis" },
     { CPU_LOG_INT, "int",
       "show interrupts/exceptions in short format" },
@@ -110,16 +108,12 @@ const CPULogItem cpu_log_items[] = {
       "show trace before each executed TB (lots of logs)" },
     { CPU_LOG_TB_CPU, "cpu",
       "show CPU state before block translation" },
-#ifdef TARGET_I386
     { CPU_LOG_PCALL, "pcall",
-      "show protected mode far calls/returns/exceptions" },
+      "x86 only: show protected mode far calls/returns/exceptions" },
     { CPU_LOG_RESET, "cpu_reset",
-      "show CPU state before CPU resets" },
-#endif
-#ifdef DEBUG_IOPORT
+      "x86 only: show CPU state before CPU resets" },
     { CPU_LOG_IOPORT, "ioport",
       "show all i/o ports accesses" },
-#endif
     { LOG_UNIMP, "unimp",
       "log unimplemented functionality" },
     { 0, NULL, NULL },
diff --git a/qemu-log.h b/qemu-log.h
index 4cdc7c7a47..5ccecf30af 100644
--- a/qemu-log.h
+++ b/qemu-log.h
@@ -142,7 +142,17 @@ typedef struct CPULogItem {
 
 extern const CPULogItem cpu_log_items[];
 
-void cpu_set_log(int log_flags);
+void qemu_set_log(int log_flags, bool use_own_buffers);
+
+static inline void cpu_set_log(int log_flags)
+{
+#ifdef CONFIG_USER_ONLY
+    qemu_set_log(log_flags, true);
+#else
+    qemu_set_log(log_flags, false);
+#endif
+}
+
 void cpu_set_log_filename(const char *filename);
 int cpu_str_to_log_mask(const char *str);
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 8b662648ae..97245a335c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1030,8 +1030,21 @@ is a TCP port number, not a display number.
 @item password
 
 Require that password based authentication is used for client connections.
-The password must be set separately using the @code{change} command in the
-@ref{pcsys_monitor}
+
+The password must be set separately using the @code{set_password} command in
+the @ref{pcsys_monitor}. The syntax to change your password is:
+@code{set_password <protocol> <password>} where <protocol> could be either
+"vnc" or "spice".
+
+If you would like to change <protocol> password expiration, you should use
+@code{expire_password <protocol> <expiration-time>} where expiration time could
+be one of the following options: now, never, +seconds or UNIX time of
+expiration, e.g. +60 to make password expire in 60 seconds, or 1335196800
+to make password expire on "Mon Apr 23 12:00:00 EDT 2012" (UNIX time for this
+date and time).
+
+You can also use keywords "now" or "never" for the expiration time to
+allow <protocol> password to expire immediately or never expire.
 
 @item tls
 
@@ -1421,8 +1434,28 @@ Then when you use on the host @code{telnet localhost 5555}, you
 connect to the guest telnet server.
 
 @item guestfwd=[tcp]:@var{server}:@var{port}-@var{dev}
+@item guestfwd=[tcp]:@var{server}:@var{port}-@var{cmd:command}
 Forward guest TCP connections to the IP address @var{server} on port @var{port}
-to the character device @var{dev}. This option can be given multiple times.
+to the character device @var{dev} or to a program executed by @var{cmd:command}
+which gets spawned for each connection. This option can be given multiple times.
+
+You can either use a chardev directly and have that one used throughout Qemu's
+lifetime, like in the following example:
+
+@example
+# open 10.10.1.1:4321 on bootup, connect 10.0.2.100:1234 to it whenever
+# the guest accesses it
+qemu -net user,guestfwd=tcp:10.0.2.100:1234-tcp:10.10.1.1:4321 [...]
+@end example
+
+Or you can execute a command on every TCP connection established by the guest,
+so that Qemu behaves similar to an inetd process for that virtual server:
+
+@example
+# call "netcat 10.10.1.1 4321" on every TCP connection to 10.0.2.100:1234
+# and connect the TCP stream to its stdin/stdout
+qemu -net 'user,guestfwd=tcp:10.0.2.100:1234-cmd:netcat 10.10.1.1 4321'
+@end example
 
 @end table
 
@@ -2621,7 +2654,10 @@ DEF("nodefaults", 0, QEMU_OPTION_nodefaults, \
 STEXI
 @item -nodefaults
 @findex -nodefaults
-Don't create default devices.
+Don't create default devices. Normally, QEMU sets the default devices like serial
+port, parallel port, virtual console, monitor device, VGA adapter, floppy and
+CD-ROM drive and others. The @code{-nodefaults} option will disable all those
+default devices.
 ETEXI
 
 #ifndef _WIN32
@@ -2677,7 +2713,9 @@ DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
 STEXI
 @item -readconfig @var{file}
 @findex -readconfig
-Read device configuration from @var{file}.
+Read device configuration from @var{file}. This approach is useful when you want to spawn
+QEMU process with many command line options but you don't want to exceed the command line
+character limit.
 ETEXI
 DEF("writeconfig", HAS_ARG, QEMU_OPTION_writeconfig,
     "-writeconfig <file>\n"
@@ -2685,7 +2723,9 @@ DEF("writeconfig", HAS_ARG, QEMU_OPTION_writeconfig,
 STEXI
 @item -writeconfig @var{file}
 @findex -writeconfig
-Write device configuration to @var{file}.
+Write device configuration to @var{file}. The @var{file} can be either filename to save
+command line and device configuration into file or dash @code{-}) character to print the
+output to stdout. This can be later used as input file for @code{-readconfig} option.
 ETEXI
 DEF("nodefconfig", 0, QEMU_OPTION_nodefconfig,
     "-nodefconfig\n"
diff --git a/qemu-tech.texi b/qemu-tech.texi
index b51a58abba..d73dda8e35 100644
--- a/qemu-tech.texi
+++ b/qemu-tech.texi
@@ -536,7 +536,7 @@ timers, especially together with the use of bottom halves (BHs).
 @node Hardware interrupts
 @section Hardware interrupts
 
-In order to be faster, QEMU does not check at every basic block if an
+In order to be faster, QEMU does not check at every basic block if a
 hardware interrupt is pending. Instead, the user must asynchronously
 call a specific function to tell that an interrupt is pending. This
 function resets the chaining of the currently executing basic
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 2e1a38e695..e3cf3c5a1a 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -873,8 +873,7 @@ EQMP
         .args_type  = "fdname:s",
         .params     = "getfd name",
         .help       = "receive a file descriptor via SCM rights and assign it a name",
-        .user_print = monitor_user_noop,
-        .mhandler.cmd_new = do_getfd,
+        .mhandler.cmd_new = qmp_marshal_input_getfd,
     },
 
 SQMP
@@ -892,6 +891,14 @@ Example:
 -> { "execute": "getfd", "arguments": { "fdname": "fd1" } }
 <- { "return": {} }
 
+Notes:
+
+(1) If the name specified by the "fdname" argument already exists,
+    the file descriptor assigned to it will be closed and replaced
+    by the received file descriptor.
+(2) The 'closefd' command can be used to explicitly close the file
+    descriptor when it is no longer needed.
+
 EQMP
 
     {
@@ -899,8 +906,7 @@ EQMP
         .args_type  = "fdname:s",
         .params     = "closefd name",
         .help       = "close a file descriptor previously passed via SCM rights",
-        .user_print = monitor_user_noop,
-        .mhandler.cmd_new = do_closefd,
+        .mhandler.cmd_new = qmp_marshal_input_closefd,
     },
 
 SQMP
diff --git a/roms/Makefile b/roms/Makefile
index 0114e6f33f..feb9c2b145 100644
--- a/roms/Makefile
+++ b/roms/Makefile
@@ -1,10 +1,27 @@
 
+vgabios_variants := stdvga cirrus vmware qxl
+
 default:
 	@echo "nothing is build by default"
 	@echo "available build targets:"
 	@echo "  bios           -- update bios.bin (seabios)"
+	@echo "  seavgabios     -- update vgabios binaries (seabios)"
+	@echo "  lgplvgabios    -- update vgabios binaries (lgpl)"
 
 bios: config.seabios
 	sh configure-seabios.sh $<
 	make -C seabios out/bios.bin
 	cp seabios/out/bios.bin ../pc-bios/bios.bin
+
+seavgabios: $(patsubst %,seavgabios-%,$(vgabios_variants))
+
+seavgabios-%: config.vga.%
+	sh configure-seabios.sh $<
+	make -C seabios out/vgabios.bin
+	cp seabios/out/vgabios.bin ../pc-bios/vgabios-$*.bin
+
+lgplvgabios: $(patsubst %,lgplvgabios-%,$(vgabios_variants))
+
+lgplvgabios-%:
+	make -C vgabios vgabios-$*.bin
+	cp vgabios/VGABIOS-lgpl-latest.$*.bin ../pc-bios/vgabios-$*.bin
diff --git a/roms/config.vga.cirrus b/roms/config.vga.cirrus
new file mode 100644
index 0000000000..c8fe58239f
--- /dev/null
+++ b/roms/config.vga.cirrus
@@ -0,0 +1,3 @@
+CONFIG_BUILD_VGABIOS=y
+CONFIG_VGA_CIRRUS=y
+CONFIG_VGA_PCI=y
diff --git a/roms/config.vga.isavga b/roms/config.vga.isavga
new file mode 100644
index 0000000000..e55e294a0c
--- /dev/null
+++ b/roms/config.vga.isavga
@@ -0,0 +1,3 @@
+CONFIG_BUILD_VGABIOS=y
+CONFIG_VGA_BOCHS=y
+CONFIG_VGA_PCI=n
diff --git a/roms/config.vga.qxl b/roms/config.vga.qxl
new file mode 100644
index 0000000000..d393f0c34f
--- /dev/null
+++ b/roms/config.vga.qxl
@@ -0,0 +1,6 @@
+CONFIG_BUILD_VGABIOS=y
+CONFIG_VGA_BOCHS=y
+CONFIG_VGA_PCI=y
+CONFIG_OVERRIDE_PCI_ID=y
+CONFIG_VGA_VID=0x1b36
+CONFIG_VGA_DID=0x0100
diff --git a/roms/config.vga.stdvga b/roms/config.vga.stdvga
new file mode 100644
index 0000000000..7d063b787c
--- /dev/null
+++ b/roms/config.vga.stdvga
@@ -0,0 +1,3 @@
+CONFIG_BUILD_VGABIOS=y
+CONFIG_VGA_BOCHS=y
+CONFIG_VGA_PCI=y
diff --git a/roms/config.vga.vmware b/roms/config.vga.vmware
new file mode 100644
index 0000000000..eb10427afd
--- /dev/null
+++ b/roms/config.vga.vmware
@@ -0,0 +1,6 @@
+CONFIG_BUILD_VGABIOS=y
+CONFIG_VGA_BOCHS=y
+CONFIG_VGA_PCI=y
+CONFIG_OVERRIDE_PCI_ID=y
+CONFIG_VGA_VID=0x15ad
+CONFIG_VGA_DID=0x0405
diff --git a/savevm.c b/savevm.c
index faa81457d5..a15c163b6e 100644
--- a/savevm.c
+++ b/savevm.c
@@ -85,6 +85,7 @@
 #include "cpus.h"
 #include "memory.h"
 #include "qmp-commands.h"
+#include "trace.h"
 
 #define SELF_ANNOUNCE_ROUNDS 5
 
@@ -1561,7 +1562,8 @@ bool qemu_savevm_state_blocked(Error **errp)
     return false;
 }
 
-int qemu_savevm_state_begin(QEMUFile *f, int blk_enable, int shared)
+int qemu_savevm_state_begin(QEMUFile *f,
+                            const MigrationParams *params)
 {
     SaveStateEntry *se;
     int ret;
@@ -1569,8 +1571,8 @@ int qemu_savevm_state_begin(QEMUFile *f, int blk_enable, int shared)
     QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         if(se->set_params == NULL) {
             continue;
-	}
-	se->set_params(blk_enable, shared, se->opaque);
+        }
+        se->set_params(params, se->opaque);
     }
     
     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
@@ -1624,11 +1626,17 @@ int qemu_savevm_state_iterate(QEMUFile *f)
         if (se->save_live_state == NULL)
             continue;
 
+        if (qemu_file_rate_limit(f)) {
+            return 0;
+        }
+        trace_savevm_section_start();
         /* Section type */
         qemu_put_byte(f, QEMU_VM_SECTION_PART);
         qemu_put_be32(f, se->section_id);
 
         ret = se->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque);
+        trace_savevm_section_end(se->section_id);
+
         if (ret <= 0) {
             /* Do not proceed to the next vmstate before this one reported
                completion of the current stage. This serializes the migration
@@ -1658,11 +1666,13 @@ int qemu_savevm_state_complete(QEMUFile *f)
         if (se->save_live_state == NULL)
             continue;
 
+        trace_savevm_section_start();
         /* Section type */
         qemu_put_byte(f, QEMU_VM_SECTION_END);
         qemu_put_be32(f, se->section_id);
 
         ret = se->save_live_state(f, QEMU_VM_SECTION_END, se->opaque);
+        trace_savevm_section_end(se->section_id);
         if (ret < 0) {
             return ret;
         }
@@ -1674,6 +1684,7 @@ int qemu_savevm_state_complete(QEMUFile *f)
 	if (se->save_state == NULL && se->vmsd == NULL)
 	    continue;
 
+        trace_savevm_section_start();
         /* Section type */
         qemu_put_byte(f, QEMU_VM_SECTION_FULL);
         qemu_put_be32(f, se->section_id);
@@ -1687,6 +1698,7 @@ int qemu_savevm_state_complete(QEMUFile *f)
         qemu_put_be32(f, se->version_id);
 
         vmstate_save(f, se);
+        trace_savevm_section_end(se->section_id);
     }
 
     qemu_put_byte(f, QEMU_VM_EOF);
@@ -1708,13 +1720,17 @@ void qemu_savevm_state_cancel(QEMUFile *f)
 static int qemu_savevm_state(QEMUFile *f)
 {
     int ret;
+    MigrationParams params = {
+        .blk = 0,
+        .shared = 0
+    };
 
     if (qemu_savevm_state_blocked(NULL)) {
         ret = -EINVAL;
         goto out;
     }
 
-    ret = qemu_savevm_state_begin(f, 0, 0);
+    ret = qemu_savevm_state_begin(f, &params);
     if (ret < 0)
         goto out;
 
diff --git a/scripts/make-release b/scripts/make-release
new file mode 100755
index 0000000000..196c755f57
--- /dev/null
+++ b/scripts/make-release
@@ -0,0 +1,24 @@
+#!/bin/bash -e
+#
+# QEMU Release Script
+#
+# Copyright IBM, Corp. 2012
+#
+# Authors:
+#  Anthony Liguori <aliguori@us.ibm.com>
+#
+# This work is licensed under the terms of the GNU GPLv2 or later.
+# See the COPYING file in the top-level directory.
+
+src="$1"
+version="$2"
+destination=qemu-${version}
+
+git clone "${src}" ${destination}
+pushd ${destination}
+git checkout "v${version}"
+git submodule update --init
+rm -rf .git roms/*/.git
+popd
+tar cfj ${destination}.tar.bz2 ${destination}
+rm -rf ${destination}
diff --git a/sysemu.h b/sysemu.h
index bc2c788921..6540c7912f 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -77,7 +77,8 @@ void do_info_snapshots(Monitor *mon);
 void qemu_announce_self(void);
 
 bool qemu_savevm_state_blocked(Error **errp);
-int qemu_savevm_state_begin(QEMUFile *f, int blk_enable, int shared);
+int qemu_savevm_state_begin(QEMUFile *f,
+                            const MigrationParams *params);
 int qemu_savevm_state_iterate(QEMUFile *f);
 int qemu_savevm_state_complete(QEMUFile *f);
 void qemu_savevm_state_cancel(QEMUFile *f);
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index ae5795337f..b00f5fa547 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -129,7 +129,7 @@ static void arm_cpu_reset(CPUState *s)
 
 static inline void set_feature(CPUARMState *env, int feature)
 {
-    env->features |= 1u << feature;
+    env->features |= 1ULL << feature;
 }
 
 static void arm_cpu_initfn(Object *obj)
@@ -192,6 +192,9 @@ void arm_cpu_realize(ARMCPU *cpu)
     if (arm_feature(env, ARM_FEATURE_VFP3)) {
         set_feature(env, ARM_FEATURE_VFP);
     }
+    if (arm_feature(env, ARM_FEATURE_LPAE)) {
+        set_feature(env, ARM_FEATURE_PXN);
+    }
 
     register_cp_regs_for_features(cpu);
 }
@@ -532,6 +535,7 @@ static void cortex_a15_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+    set_feature(&cpu->env, ARM_FEATURE_LPAE);
     cpu->midr = 0x412fc0f1;
     cpu->reset_fpsid = 0x410430f0;
     cpu->mvfr0 = 0x10110222;
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 33afa185e9..191895cca8 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -113,7 +113,9 @@ typedef struct CPUARMState {
         uint32_t c1_xscaleauxcr; /* XScale auxiliary control register.  */
         uint32_t c1_scr; /* secure config register.  */
         uint32_t c2_base0; /* MMU translation table base 0.  */
-        uint32_t c2_base1; /* MMU translation table base 1.  */
+        uint32_t c2_base0_hi; /* MMU translation table base 0, high 32 bits */
+        uint32_t c2_base1; /* MMU translation table base 0.  */
+        uint32_t c2_base1_hi; /* MMU translation table base 1, high 32 bits */
         uint32_t c2_control; /* MMU translation table base control.  */
         uint32_t c2_mask; /* MMU translation table base selection mask.  */
         uint32_t c2_base_mask; /* MMU translation table base 0 mask. */
@@ -127,6 +129,7 @@ typedef struct CPUARMState {
         uint32_t c6_insn; /* Fault address registers.  */
         uint32_t c6_data;
         uint32_t c7_par;  /* Translation result. */
+        uint32_t c7_par_hi;  /* Translation result, high 32 bits */
         uint32_t c9_insn; /* Cache lockdown registers.  */
         uint32_t c9_data;
         uint32_t c9_pmcr; /* performance monitor control register */
@@ -221,7 +224,7 @@ typedef struct CPUARMState {
     /* These fields after the common ones so they are preserved on reset.  */
 
     /* Internal CPU feature flags.  */
-    uint32_t features;
+    uint64_t features;
 
     void *nvic;
     const struct arm_boot_info *boot_info;
@@ -386,11 +389,13 @@ enum arm_features {
     ARM_FEATURE_CACHE_DIRTY_REG, /* 1136/1176 cache dirty status register */
     ARM_FEATURE_CACHE_BLOCK_OPS, /* v6 optional cache block operations */
     ARM_FEATURE_MPIDR, /* has cp15 MPIDR */
+    ARM_FEATURE_PXN, /* has Privileged Execute Never bit */
+    ARM_FEATURE_LPAE, /* has Large Physical Address Extension */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
 {
-    return (env->features & (1u << feature)) != 0;
+    return (env->features & (1ULL << feature)) != 0;
 }
 
 void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
@@ -619,7 +624,7 @@ static inline bool cp_access_ok(CPUARMState *env,
 #define TARGET_PAGE_BITS 10
 #endif
 
-#define TARGET_PHYS_ADDR_SPACE_BITS 32
+#define TARGET_PHYS_ADDR_SPACE_BITS 40
 #define TARGET_VIRT_ADDR_SPACE_BITS 32
 
 static inline CPUARMState *cpu_init(const char *cpu_model)
@@ -636,7 +641,7 @@ static inline CPUARMState *cpu_init(const char *cpu_model)
 #define cpu_signal_handler cpu_arm_signal_handler
 #define cpu_list arm_cpu_list
 
-#define CPU_SAVE_VERSION 7
+#define CPU_SAVE_VERSION 9
 
 /* MMU modes definitions */
 #define MMU_MODE0_SUFFIX _kernel
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 23099236ad..5727da296c 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -3,11 +3,12 @@
 #include "helper.h"
 #include "host-utils.h"
 #include "sysemu.h"
+#include "bitops.h"
 
 #ifndef CONFIG_USER_ONLY
 static inline int get_phys_addr(CPUARMState *env, uint32_t address,
                                 int access_type, int is_user,
-                                uint32_t *phys_ptr, int *prot,
+                                target_phys_addr_t *phys_ptr, int *prot,
                                 target_ulong *page_size);
 #endif
 
@@ -216,9 +217,9 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
       .access = PL1_W, .type = ARM_CP_NOP },
     { .name = "ISB", .cp = 15, .crn = 7, .crm = 5, .opc1 = 0, .opc2 = 4,
       .access = PL0_W, .type = ARM_CP_NOP },
-    { .name = "ISB", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 4,
+    { .name = "DSB", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 4,
       .access = PL0_W, .type = ARM_CP_NOP },
-    { .name = "ISB", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 5,
+    { .name = "DMB", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 5,
       .access = PL0_W, .type = ARM_CP_NOP },
     { .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c6_insn),
@@ -346,7 +347,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
      */
     { .name = "DBGDRAR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
-    { .name = "DBGDRAR", .cp = 14, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
+    { .name = "DBGDSAR", .cp = 14, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
     /* the old v6 WFI, UNPREDICTABLE in v7 but we choose to NOP */
     { .name = "NOP", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4,
@@ -491,7 +492,9 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
 
 static int par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
-    if (arm_feature(env, ARM_FEATURE_V7)) {
+    if (arm_feature(env, ARM_FEATURE_LPAE)) {
+        env->cp15.c7_par = value;
+    } else if (arm_feature(env, ARM_FEATURE_V7)) {
         env->cp15.c7_par = value & 0xfffff6ff;
     } else {
         env->cp15.c7_par = value & 0xfffff1ff;
@@ -501,9 +504,20 @@ static int par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 
 #ifndef CONFIG_USER_ONLY
 /* get_phys_addr() isn't present for user-mode-only targets */
+
+/* Return true if extended addresses are enabled, ie this is an
+ * LPAE implementation and we are using the long-descriptor translation
+ * table format because the TTBCR EAE bit is set.
+ */
+static inline bool extended_addresses_enabled(CPUARMState *env)
+{
+    return arm_feature(env, ARM_FEATURE_LPAE)
+        && (env->cp15.c2_control & (1 << 31));
+}
+
 static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
-    uint32_t phys_addr;
+    target_phys_addr_t phys_addr;
     target_ulong page_size;
     int prot;
     int ret, is_user = ri->opc2 & 2;
@@ -515,18 +529,44 @@ static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
     }
     ret = get_phys_addr(env, value, access_type, is_user,
                         &phys_addr, &prot, &page_size);
-    if (ret == 0) {
-        /* We do not set any attribute bits in the PAR */
-        if (page_size == (1 << 24)
-            && arm_feature(env, ARM_FEATURE_V7)) {
-            env->cp15.c7_par = (phys_addr & 0xff000000) | 1 << 1;
+    if (extended_addresses_enabled(env)) {
+        /* ret is a DFSR/IFSR value for the long descriptor
+         * translation table format, but with WnR always clear.
+         * Convert it to a 64-bit PAR.
+         */
+        uint64_t par64 = (1 << 11); /* LPAE bit always set */
+        if (ret == 0) {
+            par64 |= phys_addr & ~0xfffULL;
+            /* We don't set the ATTR or SH fields in the PAR. */
         } else {
-            env->cp15.c7_par = phys_addr & 0xfffff000;
+            par64 |= 1; /* F */
+            par64 |= (ret & 0x3f) << 1; /* FS */
+            /* Note that S2WLK and FSTAGE are always zero, because we don't
+             * implement virtualization and therefore there can't be a stage 2
+             * fault.
+             */
         }
+        env->cp15.c7_par = par64;
+        env->cp15.c7_par_hi = par64 >> 32;
     } else {
-        env->cp15.c7_par = ((ret & (10 << 1)) >> 5) |
-            ((ret & (12 << 1)) >> 6) |
-            ((ret & 0xf) << 1) | 1;
+        /* ret is a DFSR/IFSR value for the short descriptor
+         * translation table format (with WnR always clear).
+         * Convert it to a 32-bit PAR.
+         */
+        if (ret == 0) {
+            /* We do not set any attribute bits in the PAR */
+            if (page_size == (1 << 24)
+                && arm_feature(env, ARM_FEATURE_V7)) {
+                env->cp15.c7_par = (phys_addr & 0xff000000) | 1 << 1;
+            } else {
+                env->cp15.c7_par = phys_addr & 0xfffff000;
+            }
+        } else {
+            env->cp15.c7_par = ((ret & (10 << 1)) >> 5) |
+                ((ret & (12 << 1)) >> 6) |
+                ((ret & 0xf) << 1) | 1;
+        }
+        env->cp15.c7_par_hi = 0;
     }
     return 0;
 }
@@ -653,7 +693,20 @@ static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
 static int vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
-    value &= 7;
+    if (arm_feature(env, ARM_FEATURE_LPAE)) {
+        value &= ~((7 << 19) | (3 << 14) | (0xf << 3));
+        /* With LPAE the TTBCR could result in a change of ASID
+         * via the TTBCR.A1 bit, so do a TLB flush.
+         */
+        tlb_flush(env, 1);
+    } else {
+        value &= 7;
+    }
+    /* Note that we always calculate c2_mask and c2_base_mask, but
+     * they are only used for short-descriptor tables (ie if EAE is 0);
+     * for long-descriptor tables the TTBCR fields are used differently
+     * and the c2_mask and c2_base_mask values are meaningless.
+     */
     env->cp15.c2_control = value;
     env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> value);
     env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> value);
@@ -679,7 +732,7 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
       .fieldoffset = offsetof(CPUARMState, cp15.c2_base0), .resetvalue = 0, },
     { .name = "TTBR1", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c2_base0), .resetvalue = 0, },
+      .fieldoffset = offsetof(CPUARMState, cp15.c2_base1), .resetvalue = 0, },
     { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW, .writefn = vmsa_ttbcr_write,
       .resetfn = vmsa_ttbcr_reset,
@@ -871,6 +924,96 @@ static const ARMCPRegInfo mpidr_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
+static int par64_read(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value)
+{
+    *value = ((uint64_t)env->cp15.c7_par_hi << 32) | env->cp15.c7_par;
+    return 0;
+}
+
+static int par64_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+{
+    env->cp15.c7_par_hi = value >> 32;
+    env->cp15.c7_par = value;
+    return 0;
+}
+
+static void par64_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    env->cp15.c7_par_hi = 0;
+    env->cp15.c7_par = 0;
+}
+
+static int ttbr064_read(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t *value)
+{
+    *value = ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0;
+    return 0;
+}
+
+static int ttbr064_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value)
+{
+    env->cp15.c2_base0_hi = value >> 32;
+    env->cp15.c2_base0 = value;
+    /* Writes to the 64 bit format TTBRs may change the ASID */
+    tlb_flush(env, 1);
+    return 0;
+}
+
+static void ttbr064_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    env->cp15.c2_base0_hi = 0;
+    env->cp15.c2_base0 = 0;
+}
+
+static int ttbr164_read(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t *value)
+{
+    *value = ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1;
+    return 0;
+}
+
+static int ttbr164_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value)
+{
+    env->cp15.c2_base1_hi = value >> 32;
+    env->cp15.c2_base1 = value;
+    return 0;
+}
+
+static void ttbr164_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    env->cp15.c2_base1_hi = 0;
+    env->cp15.c2_base1 = 0;
+}
+
+static const ARMCPRegInfo lpae_cp_reginfo[] = {
+    /* NOP AMAIR0/1: the override is because these clash with tha rather
+     * broadly specified TLB_LOCKDOWN entry in the generic cp_reginfo.
+     */
+    { .name = "AMAIR0", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE,
+      .resetvalue = 0 },
+    { .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1,
+      .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE,
+      .resetvalue = 0 },
+    /* 64 bit access versions of the (dummy) debug registers */
+    { .name = "DBGDRAR", .cp = 14, .crm = 1, .opc1 = 0,
+      .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "DBGDSAR", .cp = 14, .crm = 2, .opc1 = 0,
+      .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "PAR", .cp = 15, .crm = 7, .opc1 = 0,
+      .access = PL1_RW, .type = ARM_CP_64BIT,
+      .readfn = par64_read, .writefn = par64_write, .resetfn = par64_reset },
+    { .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0,
+      .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = ttbr064_read,
+      .writefn = ttbr064_write, .resetfn = ttbr064_reset },
+    { .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1,
+      .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = ttbr164_read,
+      .writefn = ttbr164_write, .resetfn = ttbr164_reset },
+    REGINFO_SENTINEL
+};
+
 static int sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     env->cp15.c1_sys = value;
@@ -1016,6 +1159,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
     if (arm_feature(env, ARM_FEATURE_MPIDR)) {
         define_arm_cp_regs(cpu, mpidr_cp_reginfo);
     }
+    if (arm_feature(env, ARM_FEATURE_LPAE)) {
+        define_arm_cp_regs(cpu, lpae_cp_reginfo);
+    }
     /* Slightly awkwardly, the OMAP and StrongARM cores need all of
      * cp15 crn=0 to be writes-ignored, whereas for other cores they should
      * be read-only (ie write causes UNDEF exception).
@@ -1833,8 +1979,8 @@ static uint32_t get_level1_table_address(CPUARMState *env, uint32_t address)
 }
 
 static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type,
-			    int is_user, uint32_t *phys_ptr, int *prot,
-                            target_ulong *page_size)
+                            int is_user, target_phys_addr_t *phys_ptr,
+                            int *prot, target_ulong *page_size)
 {
     int code;
     uint32_t table;
@@ -1843,7 +1989,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type,
     int ap;
     int domain;
     int domain_prot;
-    uint32_t phys_addr;
+    target_phys_addr_t phys_addr;
 
     /* Pagetable walk.  */
     /* Lookup l1 descriptor.  */
@@ -1928,45 +2074,46 @@ do_fault:
 }
 
 static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
-			    int is_user, uint32_t *phys_ptr, int *prot,
-                            target_ulong *page_size)
+                            int is_user, target_phys_addr_t *phys_ptr,
+                            int *prot, target_ulong *page_size)
 {
     int code;
     uint32_t table;
     uint32_t desc;
     uint32_t xn;
+    uint32_t pxn = 0;
     int type;
     int ap;
-    int domain;
+    int domain = 0;
     int domain_prot;
-    uint32_t phys_addr;
+    target_phys_addr_t phys_addr;
 
     /* Pagetable walk.  */
     /* Lookup l1 descriptor.  */
     table = get_level1_table_address(env, address);
     desc = ldl_phys(table);
     type = (desc & 3);
-    if (type == 0) {
-        /* Section translation fault.  */
+    if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) {
+        /* Section translation fault, or attempt to use the encoding
+         * which is Reserved on implementations without PXN.
+         */
         code = 5;
-        domain = 0;
         goto do_fault;
-    } else if (type == 2 && (desc & (1 << 18))) {
-        /* Supersection.  */
-        domain = 0;
-    } else {
-        /* Section or page.  */
+    }
+    if ((type == 1) || !(desc & (1 << 18))) {
+        /* Page or Section.  */
         domain = (desc >> 5) & 0x0f;
     }
     domain_prot = (env->cp15.c3 >> (domain * 2)) & 3;
     if (domain_prot == 0 || domain_prot == 2) {
-        if (type == 2)
+        if (type != 1) {
             code = 9; /* Section domain fault.  */
-        else
+        } else {
             code = 11; /* Page domain fault.  */
+        }
         goto do_fault;
     }
-    if (type == 2) {
+    if (type != 1) {
         if (desc & (1 << 18)) {
             /* Supersection.  */
             phys_addr = (desc & 0xff000000) | (address & 0x00ffffff);
@@ -1978,8 +2125,12 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
         }
         ap = ((desc >> 10) & 3) | ((desc >> 13) & 4);
         xn = desc & (1 << 4);
+        pxn = desc & 1;
         code = 13;
     } else {
+        if (arm_feature(env, ARM_FEATURE_PXN)) {
+            pxn = (desc >> 2) & 1;
+        }
         /* Lookup l2 entry.  */
         table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
         desc = ldl_phys(table);
@@ -2007,6 +2158,9 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
     if (domain_prot == 3) {
         *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
     } else {
+        if (pxn && !is_user) {
+            xn = 1;
+        }
         if (xn && access_type == 2)
             goto do_fault;
 
@@ -2031,8 +2185,187 @@ do_fault:
     return code | (domain << 4);
 }
 
-static int get_phys_addr_mpu(CPUARMState *env, uint32_t address, int access_type,
-			     int is_user, uint32_t *phys_ptr, int *prot)
+/* Fault type for long-descriptor MMU fault reporting; this corresponds
+ * to bits [5..2] in the STATUS field in long-format DFSR/IFSR.
+ */
+typedef enum {
+    translation_fault = 1,
+    access_fault = 2,
+    permission_fault = 3,
+} MMUFaultType;
+
+static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
+                              int access_type, int is_user,
+                              target_phys_addr_t *phys_ptr, int *prot,
+                              target_ulong *page_size_ptr)
+{
+    /* Read an LPAE long-descriptor translation table. */
+    MMUFaultType fault_type = translation_fault;
+    uint32_t level = 1;
+    uint32_t epd;
+    uint32_t tsz;
+    uint64_t ttbr;
+    int ttbr_select;
+    int n;
+    target_phys_addr_t descaddr;
+    uint32_t tableattrs;
+    target_ulong page_size;
+    uint32_t attrs;
+
+    /* Determine whether this address is in the region controlled by
+     * TTBR0 or TTBR1 (or if it is in neither region and should fault).
+     * This is a Non-secure PL0/1 stage 1 translation, so controlled by
+     * TTBCR/TTBR0/TTBR1 in accordance with ARM ARM DDI0406C table B-32:
+     */
+    uint32_t t0sz = extract32(env->cp15.c2_control, 0, 3);
+    uint32_t t1sz = extract32(env->cp15.c2_control, 16, 3);
+    if (t0sz && !extract32(address, 32 - t0sz, t0sz)) {
+        /* there is a ttbr0 region and we are in it (high bits all zero) */
+        ttbr_select = 0;
+    } else if (t1sz && !extract32(~address, 32 - t1sz, t1sz)) {
+        /* there is a ttbr1 region and we are in it (high bits all one) */
+        ttbr_select = 1;
+    } else if (!t0sz) {
+        /* ttbr0 region is "everything not in the ttbr1 region" */
+        ttbr_select = 0;
+    } else if (!t1sz) {
+        /* ttbr1 region is "everything not in the ttbr0 region" */
+        ttbr_select = 1;
+    } else {
+        /* in the gap between the two regions, this is a Translation fault */
+        fault_type = translation_fault;
+        goto do_fault;
+    }
+
+    /* Note that QEMU ignores shareability and cacheability attributes,
+     * so we don't need to do anything with the SH, ORGN, IRGN fields
+     * in the TTBCR.  Similarly, TTBCR:A1 selects whether we get the
+     * ASID from TTBR0 or TTBR1, but QEMU's TLB doesn't currently
+     * implement any ASID-like capability so we can ignore it (instead
+     * we will always flush the TLB any time the ASID is changed).
+     */
+    if (ttbr_select == 0) {
+        ttbr = ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0;
+        epd = extract32(env->cp15.c2_control, 7, 1);
+        tsz = t0sz;
+    } else {
+        ttbr = ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1;
+        epd = extract32(env->cp15.c2_control, 23, 1);
+        tsz = t1sz;
+    }
+
+    if (epd) {
+        /* Translation table walk disabled => Translation fault on TLB miss */
+        goto do_fault;
+    }
+
+    /* If the region is small enough we will skip straight to a 2nd level
+     * lookup. This affects the number of bits of the address used in
+     * combination with the TTBR to find the first descriptor. ('n' here
+     * matches the usage in the ARM ARM sB3.6.6, where bits [39..n] are
+     * from the TTBR, [n-1..3] from the vaddr, and [2..0] always zero).
+     */
+    if (tsz > 1) {
+        level = 2;
+        n = 14 - tsz;
+    } else {
+        n = 5 - tsz;
+    }
+
+    /* Clear the vaddr bits which aren't part of the within-region address,
+     * so that we don't have to special case things when calculating the
+     * first descriptor address.
+     */
+    address &= (0xffffffffU >> tsz);
+
+    /* Now we can extract the actual base address from the TTBR */
+    descaddr = extract64(ttbr, 0, 40);
+    descaddr &= ~((1ULL << n) - 1);
+
+    tableattrs = 0;
+    for (;;) {
+        uint64_t descriptor;
+
+        descaddr |= ((address >> (9 * (4 - level))) & 0xff8);
+        descriptor = ldq_phys(descaddr);
+        if (!(descriptor & 1) ||
+            (!(descriptor & 2) && (level == 3))) {
+            /* Invalid, or the Reserved level 3 encoding */
+            goto do_fault;
+        }
+        descaddr = descriptor & 0xfffffff000ULL;
+
+        if ((descriptor & 2) && (level < 3)) {
+            /* Table entry. The top five bits are attributes which  may
+             * propagate down through lower levels of the table (and
+             * which are all arranged so that 0 means "no effect", so
+             * we can gather them up by ORing in the bits at each level).
+             */
+            tableattrs |= extract64(descriptor, 59, 5);
+            level++;
+            continue;
+        }
+        /* Block entry at level 1 or 2, or page entry at level 3.
+         * These are basically the same thing, although the number
+         * of bits we pull in from the vaddr varies.
+         */
+        page_size = (1 << (39 - (9 * level)));
+        descaddr |= (address & (page_size - 1));
+        /* Extract attributes from the descriptor and merge with table attrs */
+        attrs = extract64(descriptor, 2, 10)
+            | (extract64(descriptor, 52, 12) << 10);
+        attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
+        attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */
+        /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
+         * means "force PL1 access only", which means forcing AP[1] to 0.
+         */
+        if (extract32(tableattrs, 2, 1)) {
+            attrs &= ~(1 << 4);
+        }
+        /* Since we're always in the Non-secure state, NSTable is ignored. */
+        break;
+    }
+    /* Here descaddr is the final physical address, and attributes
+     * are all in attrs.
+     */
+    fault_type = access_fault;
+    if ((attrs & (1 << 8)) == 0) {
+        /* Access flag */
+        goto do_fault;
+    }
+    fault_type = permission_fault;
+    if (is_user && !(attrs & (1 << 4))) {
+        /* Unprivileged access not enabled */
+        goto do_fault;
+    }
+    *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+    if (attrs & (1 << 12) || (!is_user && (attrs & (1 << 11)))) {
+        /* XN or PXN */
+        if (access_type == 2) {
+            goto do_fault;
+        }
+        *prot &= ~PAGE_EXEC;
+    }
+    if (attrs & (1 << 5)) {
+        /* Write access forbidden */
+        if (access_type == 1) {
+            goto do_fault;
+        }
+        *prot &= ~PAGE_WRITE;
+    }
+
+    *phys_ptr = descaddr;
+    *page_size_ptr = page_size;
+    return 0;
+
+do_fault:
+    /* Long-descriptor format IFSR/DFSR value */
+    return (1 << 9) | (fault_type << 2) | level;
+}
+
+static int get_phys_addr_mpu(CPUARMState *env, uint32_t address,
+                             int access_type, int is_user,
+                             target_phys_addr_t *phys_ptr, int *prot)
 {
     int n;
     uint32_t mask;
@@ -2091,9 +2424,32 @@ static int get_phys_addr_mpu(CPUARMState *env, uint32_t address, int access_type
     return 0;
 }
 
+/* get_phys_addr - get the physical address for this virtual address
+ *
+ * Find the physical address corresponding to the given virtual address,
+ * by doing a translation table walk on MMU based systems or using the
+ * MPU state on MPU based systems.
+ *
+ * Returns 0 if the translation was successful. Otherwise, phys_ptr,
+ * prot and page_size are not filled in, and the return value provides
+ * information on why the translation aborted, in the format of a
+ * DFSR/IFSR fault register, with the following caveats:
+ *  * we honour the short vs long DFSR format differences.
+ *  * the WnR bit is never set (the caller must do this).
+ *  * for MPU based systems we don't bother to return a full FSR format
+ *    value.
+ *
+ * @env: CPUARMState
+ * @address: virtual address to get physical address for
+ * @access_type: 0 for read, 1 for write, 2 for execute
+ * @is_user: 0 for privileged access, 1 for user
+ * @phys_ptr: set to the physical address corresponding to the virtual address
+ * @prot: set to the permissions for the page containing phys_ptr
+ * @page_size: set to the size of the page containing phys_ptr
+ */
 static inline int get_phys_addr(CPUARMState *env, uint32_t address,
                                 int access_type, int is_user,
-                                uint32_t *phys_ptr, int *prot,
+                                target_phys_addr_t *phys_ptr, int *prot,
                                 target_ulong *page_size)
 {
     /* Fast Context Switch Extension.  */
@@ -2110,6 +2466,9 @@ static inline int get_phys_addr(CPUARMState *env, uint32_t address,
         *page_size = TARGET_PAGE_SIZE;
 	return get_phys_addr_mpu(env, address, access_type, is_user, phys_ptr,
 				 prot);
+    } else if (extended_addresses_enabled(env)) {
+        return get_phys_addr_lpae(env, address, access_type, is_user, phys_ptr,
+                                  prot, page_size);
     } else if (env->cp15.c1_sys & (1 << 23)) {
         return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr,
                                 prot, page_size);
@@ -2122,7 +2481,7 @@ static inline int get_phys_addr(CPUARMState *env, uint32_t address,
 int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address,
                               int access_type, int mmu_idx)
 {
-    uint32_t phys_addr;
+    target_phys_addr_t phys_addr;
     target_ulong page_size;
     int prot;
     int ret, is_user;
@@ -2132,7 +2491,7 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address,
                         &page_size);
     if (ret == 0) {
         /* Map a single [sub]page.  */
-        phys_addr &= ~(uint32_t)0x3ff;
+        phys_addr &= ~(target_phys_addr_t)0x3ff;
         address &= ~(uint32_t)0x3ff;
         tlb_set_page (env, address, phys_addr, prot, mmu_idx, page_size);
         return 0;
@@ -2154,7 +2513,7 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address,
 
 target_phys_addr_t cpu_get_phys_page_debug(CPUARMState *env, target_ulong addr)
 {
-    uint32_t phys_addr;
+    target_phys_addr_t phys_addr;
     target_ulong page_size;
     int prot;
     int ret;
diff --git a/target-arm/machine.c b/target-arm/machine.c
index a2a75fbd19..68dca7ffb2 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -27,7 +27,9 @@ void cpu_save(QEMUFile *f, void *opaque)
     qemu_put_be32(f, env->cp15.c1_xscaleauxcr);
     qemu_put_be32(f, env->cp15.c1_scr);
     qemu_put_be32(f, env->cp15.c2_base0);
+    qemu_put_be32(f, env->cp15.c2_base0_hi);
     qemu_put_be32(f, env->cp15.c2_base1);
+    qemu_put_be32(f, env->cp15.c2_base1_hi);
     qemu_put_be32(f, env->cp15.c2_control);
     qemu_put_be32(f, env->cp15.c2_mask);
     qemu_put_be32(f, env->cp15.c2_base_mask);
@@ -42,6 +44,7 @@ void cpu_save(QEMUFile *f, void *opaque)
     qemu_put_be32(f, env->cp15.c6_insn);
     qemu_put_be32(f, env->cp15.c6_data);
     qemu_put_be32(f, env->cp15.c7_par);
+    qemu_put_be32(f, env->cp15.c7_par_hi);
     qemu_put_be32(f, env->cp15.c9_insn);
     qemu_put_be32(f, env->cp15.c9_data);
     qemu_put_be32(f, env->cp15.c9_pmcr);
@@ -60,7 +63,7 @@ void cpu_save(QEMUFile *f, void *opaque)
     qemu_put_be32(f, env->cp15.c15_diagnostic);
     qemu_put_be32(f, env->cp15.c15_power_diagnostic);
 
-    qemu_put_be32(f, env->features);
+    qemu_put_be64(f, env->features);
 
     if (arm_feature(env, ARM_FEATURE_VFP)) {
         for (i = 0;  i < 16; i++) {
@@ -144,7 +147,9 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
     env->cp15.c1_xscaleauxcr = qemu_get_be32(f);
     env->cp15.c1_scr = qemu_get_be32(f);
     env->cp15.c2_base0 = qemu_get_be32(f);
+    env->cp15.c2_base0_hi = qemu_get_be32(f);
     env->cp15.c2_base1 = qemu_get_be32(f);
+    env->cp15.c2_base1_hi = qemu_get_be32(f);
     env->cp15.c2_control = qemu_get_be32(f);
     env->cp15.c2_mask = qemu_get_be32(f);
     env->cp15.c2_base_mask = qemu_get_be32(f);
@@ -159,6 +164,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
     env->cp15.c6_insn = qemu_get_be32(f);
     env->cp15.c6_data = qemu_get_be32(f);
     env->cp15.c7_par = qemu_get_be32(f);
+    env->cp15.c7_par_hi = qemu_get_be32(f);
     env->cp15.c9_insn = qemu_get_be32(f);
     env->cp15.c9_data = qemu_get_be32(f);
     env->cp15.c9_pmcr = qemu_get_be32(f);
@@ -177,7 +183,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
     env->cp15.c15_diagnostic = qemu_get_be32(f);
     env->cp15.c15_power_diagnostic = qemu_get_be32(f);
 
-    env->features = qemu_get_be32(f);
+    env->features = qemu_get_be64(f);
 
     if (arm_feature(env, ARM_FEATURE_VFP)) {
         for (i = 0;  i < 16; i++) {
diff --git a/target-arm/translate.c b/target-arm/translate.c
index a2a0ecddad..29008a4b34 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -6236,7 +6236,7 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
             }
             gen_set_pc_im(s->pc);
             s->is_jmp = DISAS_WFI;
-            break;
+            return 0;
         default:
             break;
         }
@@ -6263,7 +6263,9 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                 tcg_gen_trunc_i64_i32(tmp, tmp64);
                 store_reg(s, rt, tmp);
                 tcg_gen_shri_i64(tmp64, tmp64, 32);
+                tmp = tcg_temp_new_i32();
                 tcg_gen_trunc_i64_i32(tmp, tmp64);
+                tcg_temp_free_i64(tmp64);
                 store_reg(s, rt2, tmp);
             } else {
                 TCGv tmp;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index a902f4a8bf..1988dae290 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -38,18 +38,10 @@
 #define PREFIX_ADR    0x10
 
 #ifdef TARGET_X86_64
-#define X86_64_ONLY(x) x
-#define X86_64_DEF(...)  __VA_ARGS__
 #define CODE64(s) ((s)->code64)
 #define REX_X(s) ((s)->rex_x)
 #define REX_B(s) ((s)->rex_b)
-/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */
-#if 1
-#define BUGGY_64(x) NULL
-#endif
 #else
-#define X86_64_ONLY(x) NULL
-#define X86_64_DEF(...)
 #define CODE64(s) 0
 #define REX_X(s) 0
 #define REX_B(s) 0
@@ -271,11 +263,30 @@ static inline void gen_op_andl_A0_ffff(void)
 #define REG_LH_OFFSET 4
 #endif
 
+/* In instruction encodings for byte register accesses the
+ * register number usually indicates "low 8 bits of register N";
+ * however there are some special cases where N 4..7 indicates
+ * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
+ * true for this special case, false otherwise.
+ */
+static inline bool byte_reg_is_xH(int reg)
+{
+    if (reg < 4) {
+        return false;
+    }
+#ifdef TARGET_X86_64
+    if (reg >= 8 || x86_64_hregs) {
+        return false;
+    }
+#endif
+    return true;
+}
+
 static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
 {
     switch(ot) {
     case OT_BYTE:
-        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+        if (!byte_reg_is_xH(reg)) {
             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
         } else {
             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
@@ -330,19 +341,11 @@ static inline void gen_op_mov_reg_A0(int size, int reg)
 
 static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
 {
-    switch(ot) {
-    case OT_BYTE:
-        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
-            goto std_case;
-        } else {
-            tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
-            tcg_gen_ext8u_tl(t0, t0);
-        }
-        break;
-    default:
-    std_case:
+    if (ot == OT_BYTE && byte_reg_is_xH(reg)) {
+        tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
+        tcg_gen_ext8u_tl(t0, t0);
+    } else {
         tcg_gen_mov_tl(t0, cpu_regs[reg]);
-        break;
     }
 }
 
@@ -2947,24 +2950,33 @@ static const SSEFunc_0_pp sse_op_table2[3 * 8][2] = {
     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
 };
 
-static const SSEFunc_0_pi sse_op_table3a[4] = {
+static const SSEFunc_0_pi sse_op_table3ai[] = {
     gen_helper_cvtsi2ss,
-    gen_helper_cvtsi2sd,
-    X86_64_ONLY(gen_helper_cvtsq2ss),
-    X86_64_ONLY(gen_helper_cvtsq2sd),
+    gen_helper_cvtsi2sd
 };
 
-static const SSEFunc_i_p sse_op_table3b[4 * 2] = {
+#ifdef TARGET_X86_64
+static const SSEFunc_0_pl sse_op_table3aq[] = {
+    gen_helper_cvtsq2ss,
+    gen_helper_cvtsq2sd
+};
+#endif
+
+static const SSEFunc_i_p sse_op_table3bi[] = {
     gen_helper_cvttss2si,
+    gen_helper_cvtss2si,
     gen_helper_cvttsd2si,
-    X86_64_ONLY(gen_helper_cvttss2sq),
-    X86_64_ONLY(gen_helper_cvttsd2sq),
+    gen_helper_cvtsd2si
+};
 
-    gen_helper_cvtss2si,
-    gen_helper_cvtsd2si,
-    X86_64_ONLY(gen_helper_cvtss2sq),
-    X86_64_ONLY(gen_helper_cvtsd2sq),
+#ifdef TARGET_X86_64
+static const SSEFunc_l_p sse_op_table3bq[] = {
+    gen_helper_cvttss2sq,
+    gen_helper_cvtss2sq,
+    gen_helper_cvttsd2sq,
+    gen_helper_cvtsd2sq
 };
+#endif
 
 static const SSEFunc_0_pp sse_op_table4[8][4] = {
     SSE_FOP(cmpeq),
@@ -3097,10 +3109,6 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
 {
     int b1, op1_offset, op2_offset, is_xmm, val, ot;
     int modrm, mod, rm, reg, reg_addr, offset_addr;
-    SSEFunc_i_p sse_fn_i_p;
-    SSEFunc_l_p sse_fn_l_p;
-    SSEFunc_0_pi sse_fn_pi;
-    SSEFunc_0_pl sse_fn_pl;
     SSEFunc_0_pp sse_fn_pp;
     SSEFunc_0_ppi sse_fn_ppi;
     SSEFunc_0_ppt sse_fn_ppt;
@@ -3563,14 +3571,16 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == OT_LONG) {
-                sse_fn_pi = sse_op_table3a[(s->dflag == 2) * 2 +
-                                           ((b >> 8) - 2)];
+                SSEFunc_0_pi sse_fn_pi = sse_op_table3ai[(b >> 8) & 1];
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 sse_fn_pi(cpu_ptr0, cpu_tmp2_i32);
             } else {
-                sse_fn_pl = sse_op_table3a[(s->dflag == 2) * 2 +
-                                           ((b >> 8) - 2)];
+#ifdef TARGET_X86_64
+                SSEFunc_0_pl sse_fn_pl = sse_op_table3aq[(b >> 8) & 1];
                 sse_fn_pl(cpu_ptr0, cpu_T[0]);
+#else
+                goto illegal_op;
+#endif
             }
             break;
         case 0x02c: /* cvttps2pi */
@@ -3624,16 +3634,18 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             }
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
             if (ot == OT_LONG) {
-                sse_fn_i_p = sse_op_table3b[(s->dflag == 2) * 2 +
-                                            ((b >> 8) - 2) +
-                                            (b & 1) * 4];
+                SSEFunc_i_p sse_fn_i_p =
+                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
                 sse_fn_i_p(cpu_tmp2_i32, cpu_ptr0);
                 tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
             } else {
-                sse_fn_l_p = sse_op_table3b[(s->dflag == 2) * 2 +
-                                            ((b >> 8) - 2) +
-                                            (b & 1) * 4];
+#ifdef TARGET_X86_64
+                SSEFunc_l_p sse_fn_l_p =
+                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
                 sse_fn_l_p(cpu_T[0], cpu_ptr0);
+#else
+                goto illegal_op;
+#endif
             }
             gen_op_mov_reg_T0(ot, reg);
             break;
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index ec08dd0474..47008c24f2 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -135,6 +135,41 @@ int kvm_arch_get_registers(CPUS390XState *env)
     return 0;
 }
 
+/*
+ * Legacy layout for s390:
+ * Older S390 KVM requires the topmost vma of the RAM to be
+ * smaller than an system defined value, which is at least 256GB.
+ * Larger systems have larger values. We put the guest between
+ * the end of data segment (system break) and this value. We
+ * use 32GB as a base to have enough room for the system break
+ * to grow. We also have to use MAP parameters that avoid
+ * read-only mapping of guest pages.
+ */
+static void *legacy_s390_alloc(ram_addr_t size)
+{
+    void *mem;
+
+    mem = mmap((void *) 0x800000000ULL, size,
+               PROT_EXEC|PROT_READ|PROT_WRITE,
+               MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+    if (mem == MAP_FAILED) {
+        fprintf(stderr, "Allocating RAM failed\n");
+        abort();
+    }
+    return mem;
+}
+
+void *kvm_arch_vmalloc(ram_addr_t size)
+{
+    /* Can we use the standard allocation ? */
+    if (kvm_check_extension(kvm_state, KVM_CAP_S390_GMAP) &&
+        kvm_check_extension(kvm_state, KVM_CAP_S390_COW)) {
+        return NULL;
+    } else {
+        return legacy_s390_alloc(size);
+    }
+}
+
 int kvm_arch_insert_sw_breakpoint(CPUS390XState *env, struct kvm_sw_breakpoint *bp)
 {
     static const uint8_t diag_501[] = {0x83, 0x24, 0x05, 0x01};
diff --git a/targphys.h b/targphys.h
index 95648d6882..bd4938fc02 100644
--- a/targphys.h
+++ b/targphys.h
@@ -11,10 +11,26 @@
 typedef uint32_t target_phys_addr_t;
 #define TARGET_PHYS_ADDR_MAX UINT32_MAX
 #define TARGET_FMT_plx "%08x"
+/* Format strings for printing target_phys_addr_t types.
+ * These are recommended over the less flexible TARGET_FMT_plx,
+ * which is retained for the benefit of existing code.
+ */
+#define TARGET_PRIdPHYS PRId32
+#define TARGET_PRIiPHYS PRIi32
+#define TARGET_PRIoPHYS PRIo32
+#define TARGET_PRIuPHYS PRIu32
+#define TARGET_PRIxPHYS PRIx32
+#define TARGET_PRIXPHYS PRIX32
 #elif TARGET_PHYS_ADDR_BITS == 64
 typedef uint64_t target_phys_addr_t;
 #define TARGET_PHYS_ADDR_MAX UINT64_MAX
 #define TARGET_FMT_plx "%016" PRIx64
+#define TARGET_PRIdPHYS PRId64
+#define TARGET_PRIiPHYS PRIi64
+#define TARGET_PRIoPHYS PRIo64
+#define TARGET_PRIuPHYS PRIu64
+#define TARGET_PRIxPHYS PRIx64
+#define TARGET_PRIXPHYS PRIX64
 #endif
 #endif
 
diff --git a/tests/Makefile b/tests/Makefile
index d66ab196a7..d687ecce3f 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -14,6 +14,7 @@ check-unit-y += tests/test-string-input-visitor$(EXESUF)
 check-unit-y += tests/test-string-output-visitor$(EXESUF)
 check-unit-y += tests/test-coroutine$(EXESUF)
 check-unit-y += tests/test-visitor-serialization$(EXESUF)
+check-unit-y += tests/test-iov$(EXESUF)
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -47,6 +48,7 @@ tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o $(tools-obj-y)
 tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o $(tools-obj-y)
 tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) $(tools-obj-y)
 tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y)
+tests/test-iov$(EXESUF): tests/test-iov.o iov.o
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py
diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index 610e2f1e26..585fb0e343 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c
@@ -142,7 +142,7 @@ static uint8_t send_read_command(void)
     }
 
     st0 = floppy_recv();
-    if (st0 != 0x40) {
+    if (st0 != 0x60) {
         ret = 1;
     }
 
@@ -156,19 +156,16 @@ static uint8_t send_read_command(void)
     return ret;
 }
 
-static void send_step_pulse(void)
+static void send_step_pulse(int cyl)
 {
     int drive = 0;
     int head = 0;
-    static int cyl = 0;
 
     floppy_send(CMD_SEEK);
     floppy_send(head << 2 | drive);
     g_assert(!get_irq(FLOPPY_IRQ));
     floppy_send(cyl);
     ack_irq();
-
-    cyl = (cyl + 1) % 4;
 }
 
 static uint8_t cmos_read(uint8_t reg)
@@ -195,8 +192,7 @@ static void test_no_media_on_start(void)
     assert_bit_set(dir, DSKCHG);
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);
-    send_step_pulse();
-    send_step_pulse();
+    send_step_pulse(1);
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);
     dir = inb(FLOPPY_BASE + reg_dir);
@@ -227,7 +223,14 @@ static void test_media_change(void)
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);
 
-    send_step_pulse();
+    send_step_pulse(0);
+    dir = inb(FLOPPY_BASE + reg_dir);
+    assert_bit_set(dir, DSKCHG);
+    dir = inb(FLOPPY_BASE + reg_dir);
+    assert_bit_set(dir, DSKCHG);
+
+    /* Step to next track should clear DSKCHG bit. */
+    send_step_pulse(1);
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_clear(dir, DSKCHG);
     dir = inb(FLOPPY_BASE + reg_dir);
@@ -243,11 +246,39 @@ static void test_media_change(void)
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);
 
-    send_step_pulse();
+    send_step_pulse(0);
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);
+
+    send_step_pulse(1);
+    dir = inb(FLOPPY_BASE + reg_dir);
+    assert_bit_set(dir, DSKCHG);
+    dir = inb(FLOPPY_BASE + reg_dir);
+    assert_bit_set(dir, DSKCHG);
+}
+
+static void test_sense_interrupt(void)
+{
+    int drive = 0;
+    int head = 0;
+    int cyl = 0;
+    int ret = 0;
+
+    floppy_send(CMD_SENSE_INT);
+    ret = floppy_recv();
+    g_assert(ret == 0x80);
+
+    floppy_send(CMD_SEEK);
+    floppy_send(head << 2 | drive);
+    g_assert(!get_irq(FLOPPY_IRQ));
+    floppy_send(cyl);
+
+    floppy_send(CMD_SENSE_INT);
+    ret = floppy_recv();
+    g_assert(ret == 0x20);
+    floppy_recv();
 }
 
 /* success if no crash or abort */
@@ -297,6 +328,7 @@ int main(int argc, char **argv)
     qtest_add_func("/fdc/no_media_on_start", test_no_media_on_start);
     qtest_add_func("/fdc/read_without_media", test_read_without_media);
     qtest_add_func("/fdc/media_change", test_media_change);
+    qtest_add_func("/fdc/sense_interrupt", test_sense_interrupt);
     qtest_add_func("/fdc/fuzz-registers", fuzz_registers);
 
     ret = g_test_run();
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 071b6be521..02d039218d 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -40,6 +40,7 @@ struct QTestState
     bool irq_level[MAX_IRQ];
     GString *rx;
     gchar *pid_file;
+    char *socket_path, *qmp_socket_path;
 };
 
 #define g_assert_no_errno(ret) do { \
@@ -88,8 +89,6 @@ QTestState *qtest_init(const char *extra_args)
 {
     QTestState *s;
     int sock, qmpsock, ret, i;
-    gchar *socket_path;
-    gchar *qmp_socket_path;
     gchar *pid_file;
     gchar *command;
     const char *qemu_binary;
@@ -98,14 +97,14 @@ QTestState *qtest_init(const char *extra_args)
     qemu_binary = getenv("QTEST_QEMU_BINARY");
     g_assert(qemu_binary != NULL);
 
-    socket_path = g_strdup_printf("/tmp/qtest-%d.sock", getpid());
-    qmp_socket_path = g_strdup_printf("/tmp/qtest-%d.qmp", getpid());
-    pid_file = g_strdup_printf("/tmp/qtest-%d.pid", getpid());
-
     s = g_malloc(sizeof(*s));
 
-    sock = init_socket(socket_path);
-    qmpsock = init_socket(qmp_socket_path);
+    s->socket_path = g_strdup_printf("/tmp/qtest-%d.sock", getpid());
+    s->qmp_socket_path = g_strdup_printf("/tmp/qtest-%d.qmp", getpid());
+    pid_file = g_strdup_printf("/tmp/qtest-%d.pid", getpid());
+
+    sock = init_socket(s->socket_path);
+    qmpsock = init_socket(s->qmp_socket_path);
 
     pid = fork();
     if (pid == 0) {
@@ -115,8 +114,8 @@ QTestState *qtest_init(const char *extra_args)
                                   "-qmp unix:%s,nowait "
                                   "-pidfile %s "
                                   "-machine accel=qtest "
-                                  "%s", qemu_binary, socket_path,
-                                  qmp_socket_path, pid_file,
+                                  "%s", qemu_binary, s->socket_path,
+                                  s->qmp_socket_path, pid_file,
                                   extra_args ?: "");
 
         ret = system(command);
@@ -133,9 +132,6 @@ QTestState *qtest_init(const char *extra_args)
         s->irq_level[i] = false;
     }
 
-    g_free(socket_path);
-    g_free(qmp_socket_path);
-
     /* Read the QMP greeting and then do the handshake */
     qtest_qmp(s, "");
     qtest_qmp(s, "{ 'execute': 'qmp_capabilities' }");
@@ -160,6 +156,13 @@ void qtest_quit(QTestState *s)
 
         fclose(f);
     }
+
+    unlink(s->pid_file);
+    unlink(s->socket_path);
+    unlink(s->qmp_socket_path);
+    g_free(s->pid_file);
+    g_free(s->socket_path);
+    g_free(s->qmp_socket_path);
 }
 
 static void socket_sendf(int fd, const char *fmt, va_list ap)
diff --git a/tests/test-iov.c b/tests/test-iov.c
new file mode 100644
index 0000000000..cbe7a8955c
--- /dev/null
+++ b/tests/test-iov.c
@@ -0,0 +1,260 @@
+#include <glib.h>
+#include "qemu-common.h"
+#include "iov.h"
+#include "qemu_socket.h"
+
+/* create a randomly-sized iovec with random vectors */
+static void iov_random(struct iovec **iovp, unsigned *iov_cntp)
+{
+     unsigned niov = g_test_rand_int_range(3,8);
+     struct iovec *iov = g_malloc(niov * sizeof(*iov));
+     unsigned i;
+     for (i = 0; i < niov; ++i) {
+         iov[i].iov_len = g_test_rand_int_range(5,20);
+         iov[i].iov_base = g_malloc(iov[i].iov_len);
+     }
+     *iovp = iov;
+     *iov_cntp = niov;
+}
+
+static void iov_free(struct iovec *iov, unsigned niov)
+{
+    unsigned i;
+    for (i = 0; i < niov; ++i) {
+        g_free(iov[i].iov_base);
+    }
+    g_free(iov);
+}
+
+static void test_iov_bytes(struct iovec *iov, unsigned niov,
+                           size_t offset, size_t bytes)
+{
+    unsigned i;
+    size_t j, o;
+    unsigned char *b;
+    o = 0;
+
+    /* we walk over all elements, */
+    for (i = 0; i < niov; ++i) {
+        b = iov[i].iov_base;
+        /* over each char of each element, */
+        for (j = 0; j < iov[i].iov_len; ++j) {
+            /* counting each of them and
+             * verifying that the ones within [offset,offset+bytes)
+             * range are equal to the position number (o) */
+            if (o >= offset && o < offset + bytes) {
+                g_assert(b[j] == (o & 255));
+            } else {
+                g_assert(b[j] == 0xff);
+            }
+            ++o;
+        }
+    }
+}
+
+static void test_to_from_buf_1(void)
+{
+     unsigned niov;
+     struct iovec *iov;
+     size_t sz;
+     unsigned char *ibuf, *obuf;
+     unsigned i, j, n;
+
+     iov_random(&iov, &niov);
+
+     sz = iov_size(iov, niov);
+
+     ibuf = g_malloc(sz + 8) + 4;
+     memcpy(ibuf-4, "aaaa", 4); memcpy(ibuf + sz, "bbbb", 4);
+     obuf = g_malloc(sz + 8) + 4;
+     memcpy(obuf-4, "xxxx", 4); memcpy(obuf + sz, "yyyy", 4);
+
+     /* fill in ibuf with 0123456... */
+     for (i = 0; i < sz; ++i) {
+         ibuf[i] = i & 255;
+     }
+
+     for (i = 0; i <= sz; ++i) {
+
+         /* Test from/to buf for offset(i) in [0..sz] up to the end of buffer.
+          * For last iteration with offset == sz, the procedure should
+          * skip whole vector and process exactly 0 bytes */
+
+         /* first set bytes [i..sz) to some "random" value */
+         n = iov_memset(iov, niov, 0, 0xff, -1);
+         g_assert(n == sz);
+
+         /* next copy bytes [i..sz) from ibuf to iovec */
+         n = iov_from_buf(iov, niov, i, ibuf + i, -1);
+         g_assert(n == sz - i);
+
+         /* clear part of obuf */
+         memset(obuf + i, 0, sz - i);
+         /* and set this part of obuf to values from iovec */
+         n = iov_to_buf(iov, niov, i, obuf + i, -1);
+         g_assert(n == sz - i);
+
+         /* now compare resulting buffers */
+         g_assert(memcmp(ibuf, obuf, sz) == 0);
+
+         /* test just one char */
+         n = iov_to_buf(iov, niov, i, obuf + i, 1);
+         g_assert(n == (i < sz));
+         if (n) {
+             g_assert(obuf[i] == (i & 255));
+         }
+
+         for (j = i; j <= sz; ++j) {
+             /* now test num of bytes cap up to byte no. j,
+              * with j in [i..sz]. */
+
+             /* clear iovec */
+             n = iov_memset(iov, niov, 0, 0xff, -1);
+             g_assert(n == sz);
+
+             /* copy bytes [i..j) from ibuf to iovec */
+             n = iov_from_buf(iov, niov, i, ibuf + i, j - i);
+             g_assert(n == j - i);
+
+             /* clear part of obuf */
+             memset(obuf + i, 0, j - i);
+
+             /* copy bytes [i..j) from iovec to obuf */
+             n = iov_to_buf(iov, niov, i, obuf + i, j - i);
+             g_assert(n == j - i);
+
+             /* verify result */
+             g_assert(memcmp(ibuf, obuf, sz) == 0);
+
+             /* now actually check if the iovec contains the right data */
+             test_iov_bytes(iov, niov, i, j - i);
+         }
+    }
+    g_assert(!memcmp(ibuf-4, "aaaa", 4) && !memcmp(ibuf+sz, "bbbb", 4));
+    g_free(ibuf-4);
+    g_assert(!memcmp(obuf-4, "xxxx", 4) && !memcmp(obuf+sz, "yyyy", 4));
+    g_free(obuf-4);
+    iov_free(iov, niov);
+}
+
+static void test_to_from_buf(void)
+{
+    int x;
+    for (x = 0; x < 4; ++x) {
+        test_to_from_buf_1();
+    }
+}
+
+static void test_io(void)
+{
+#ifndef _WIN32
+/* socketpair(PF_UNIX) which does not exist on windows */
+
+    int sv[2];
+    int r;
+    unsigned i, j, k, s, t;
+    fd_set fds;
+    unsigned niov;
+    struct iovec *iov, *siov;
+    unsigned char *buf;
+    size_t sz;
+
+    iov_random(&iov, &niov);
+    sz = iov_size(iov, niov);
+    buf = g_malloc(sz);
+    for (i = 0; i < sz; ++i) {
+        buf[i] = i & 255;
+    }
+    iov_from_buf(iov, niov, 0, buf, sz);
+
+    siov = g_malloc(sizeof(*iov) * niov);
+    memcpy(siov, iov, sizeof(*iov) * niov);
+
+    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) < 0) {
+       perror("socketpair");
+       exit(1);
+    }
+
+    FD_ZERO(&fds);
+
+    t = 0;
+    if (fork() == 0) {
+       /* writer */
+
+       close(sv[0]);
+       FD_SET(sv[1], &fds);
+       fcntl(sv[1], F_SETFL, O_RDWR|O_NONBLOCK);
+       r = g_test_rand_int_range(sz / 2, sz);
+       setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &r, sizeof(r));
+
+       for (i = 0; i <= sz; ++i) {
+           for (j = i; j <= sz; ++j) {
+               k = i;
+               do {
+                   s = g_test_rand_int_range(0, j - k + 1);
+                   r = iov_send(sv[1], iov, niov, k, s);
+                   g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0);
+                   if (r >= 0) {
+                       k += r;
+                       t += r;
+                       usleep(g_test_rand_int_range(0, 30));
+                   } else if (errno == EAGAIN) {
+                       select(sv[1]+1, NULL, &fds, NULL, NULL);
+                       continue;
+                   } else {
+                       perror("send");
+                       exit(1);
+                   }
+               } while(k < j);
+           }
+       }
+       exit(0);
+
+    } else {
+       /* reader & verifier */
+
+       close(sv[1]);
+       FD_SET(sv[0], &fds);
+       fcntl(sv[0], F_SETFL, O_RDWR|O_NONBLOCK);
+       r = g_test_rand_int_range(sz / 2, sz);
+       setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &r, sizeof(r));
+       usleep(500000);
+
+       for (i = 0; i <= sz; ++i) {
+           for (j = i; j <= sz; ++j) {
+               k = i;
+               iov_memset(iov, niov, 0, 0xff, -1);
+               do {
+                   s = g_test_rand_int_range(0, j - k + 1);
+                   r = iov_recv(sv[0], iov, niov, k, s);
+                   g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0);
+                   if (r > 0) {
+                       k += r;
+                       t += r;
+                   } else if (!r) {
+                       if (s) {
+                           break;
+                       }
+                   } else if (errno == EAGAIN) {
+                       select(sv[0]+1, &fds, NULL, NULL, NULL);
+                       continue;
+                   } else {
+                       perror("recv");
+                       exit(1);
+                   }
+               } while(k < j);
+               test_iov_bytes(iov, niov, i, j - i);
+           }
+        }
+     }
+#endif
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_rand_int();
+    g_test_add_func("/basic/iov/from-to-buf", test_to_from_buf);
+    g_test_add_func("/basic/iov/io", test_io);
+    return g_test_run();
+}
diff --git a/trace-events b/trace-events
index c935ba24f4..fa0c883456 100644
--- a/trace-events
+++ b/trace-events
@@ -258,7 +258,7 @@ usb_ehci_port_reset(uint32_t port, int enable) "reset port #%d - %d"
 usb_ehci_data(int rw, uint32_t cpage, uint32_t offset, uint32_t addr, uint32_t len, uint32_t bufpos) "write %d, cpage %d, offset 0x%03x, addr 0x%08x, len %d, bufpos %d"
 usb_ehci_queue_action(void *q, const char *action) "q %p: %s"
 usb_ehci_packet_action(void *q, void *p, const char *action) "q %p p %p: %s"
-usb_ehci_interrupt(uint32_t level, uint32_t sts, uint32_t mask) "level %d, sts 0x%x, mask 0x%x"
+usb_ehci_irq(uint32_t level, uint32_t frindex, uint32_t sts, uint32_t mask) "level %d, frindex 0x%04x, sts 0x%x, mask 0x%x"
 
 # hw/usb/hcd-uhci.c
 usb_uhci_reset(void) "=== RESET ==="
@@ -347,6 +347,20 @@ usb_hub_clear_port_feature(int addr, int nr, const char *f) "dev %d, port %d, fe
 usb_hub_attach(int addr, int nr) "dev %d, port %d"
 usb_hub_detach(int addr, int nr) "dev %d, port %d"
 
+# hw/usb/dev-uas.c
+usb_uas_reset(int addr) "dev %d"
+usb_uas_command(int addr, uint16_t tag, int lun, uint32_t lun64_1, uint32_t lun64_2) "dev %d, tag 0x%x, lun %d, lun64 %08x-%08x"
+usb_uas_response(int addr, uint16_t tag, uint8_t code) "dev %d, tag 0x%x, code 0x%x"
+usb_uas_sense(int addr, uint16_t tag, uint8_t status) "dev %d, tag 0x%x, status 0x%x"
+usb_uas_read_ready(int addr, uint16_t tag) "dev %d, tag 0x%x"
+usb_uas_write_ready(int addr, uint16_t tag) "dev %d, tag 0x%x"
+usb_uas_xfer_data(int addr, uint16_t tag, uint32_t copy, uint32_t uoff, uint32_t usize, uint32_t soff, uint32_t ssize) "dev %d, tag 0x%x, copy %d, usb-pkt %d/%d, scsi-buf %d/%d"
+usb_uas_scsi_data(int addr, uint16_t tag, uint32_t bytes) "dev %d, tag 0x%x, bytes %d"
+usb_uas_scsi_complete(int addr, uint16_t tag, uint32_t status, uint32_t resid) "dev %d, tag 0x%x, status 0x%x, residue %d"
+usb_uas_tmf_abort_task(int addr, uint16_t tag, uint16_t task_tag) "dev %d, tag 0x%x, task-tag 0x%x"
+usb_uas_tmf_logical_unit_reset(int addr, uint16_t tag, int lun) "dev %d, tag 0x%x, lun %d"
+usb_uas_tmf_unsupported(int addr, uint16_t tag, uint32_t function) "dev %d, tag 0x%x, function 0x%x"
+
 # hw/usb/host-linux.c
 usb_host_open_started(int bus, int addr) "dev %d:%d"
 usb_host_open_success(int bus, int addr) "dev %d:%d"
@@ -368,8 +382,10 @@ usb_host_urb_complete(int bus, int addr, void *aurb, int status, int length, int
 usb_host_urb_canceled(int bus, int addr, void *aurb) "dev %d:%d, aurb %p"
 usb_host_ep_set_halt(int bus, int addr, int ep) "dev %d:%d, ep %d"
 usb_host_ep_clear_halt(int bus, int addr, int ep) "dev %d:%d, ep %d"
-usb_host_ep_start_iso(int bus, int addr, int ep) "dev %d:%d, ep %d"
-usb_host_ep_stop_iso(int bus, int addr, int ep) "dev %d:%d, ep %d"
+usb_host_iso_start(int bus, int addr, int ep) "dev %d:%d, ep %d"
+usb_host_iso_stop(int bus, int addr, int ep) "dev %d:%d, ep %d"
+usb_host_iso_out_of_bufs(int bus, int addr, int ep) "dev %d:%d, ep %d"
+usb_host_iso_many_urbs(int bus, int addr, int count) "dev %d:%d, count %d"
 usb_host_reset(int bus, int addr) "dev %d:%d"
 usb_host_auto_scan_enabled(void)
 usb_host_auto_scan_disabled(void)
@@ -512,6 +528,85 @@ lm32_uart_irq_state(int level) "irq state %d"
 # hw/lm32_sys.c
 lm32_sys_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
 
+# hw/megasas.c
+megasas_init_firmware(uint64_t pa) "pa %" PRIx64 " "
+megasas_init_queue(uint64_t queue_pa, int queue_len, uint64_t head, uint64_t tail, uint32_t flags) "queue at %" PRIx64 " len %d head %" PRIx64 " tail %" PRIx64 " flags %x"
+megasas_initq_map_failed(int frame) "scmd %d: failed to map queue"
+megasas_initq_mismatch(int queue_len, int fw_cmds) "queue size %d max fw cmds %d"
+megasas_qf_found(unsigned int index, uint64_t pa) "found mapped frame %x pa %" PRIx64 ""
+megasas_qf_new(unsigned int index, void *cmd) "return new frame %x cmd %p"
+megasas_qf_failed(unsigned long pa) "all frames busy for frame %lx"
+megasas_qf_enqueue(unsigned int index, unsigned int count, uint64_t context, unsigned int tail, int busy) "enqueue frame %x count %d context %" PRIx64 " tail %x busy %d"
+megasas_qf_update(unsigned int head, unsigned int busy) "update reply queue head %x busy %d"
+megasas_qf_dequeue(unsigned int index) "dequeue frame %x"
+megasas_qf_map_failed(int cmd, unsigned long frame) "scmd %d: frame %lu"
+megasas_qf_complete_noirq(uint64_t context) "context %" PRIx64 " "
+megasas_qf_complete(uint64_t context, unsigned int tail, unsigned int offset, int busy, unsigned int doorbell) "context %" PRIx64 " tail %x offset %d busy %d doorbell %x"
+megasas_handle_frame(const char *cmd, uint64_t addr, uint64_t context, uint32_t count) "MFI cmd %s addr %" PRIx64 " context %" PRIx64 " count %d"
+megasas_frame_busy(uint64_t addr) "frame %" PRIx64 " busy"
+megasas_unhandled_frame_cmd(int cmd, uint8_t frame_cmd) "scmd %d: Unhandled MFI cmd %x"
+megasas_handle_scsi(const char *frame, int bus, int dev, int lun, void *sdev, unsigned long size) "%s dev %x/%x/%x sdev %p xfer %lu"
+megasas_scsi_target_not_present(const char *frame, int bus, int dev, int lun) "%s dev %x/%x/%x target not present"
+megasas_scsi_invalid_cdb_len(const char *frame, int bus, int dev, int lun, int len) "%s dev %x/%x/%x invalid cdb len %d"
+megasas_iov_read_overflow(int cmd, int bytes, int len) "scmd %d: %d/%d bytes"
+megasas_iov_write_overflow(int cmd, int bytes, int len) "scmd %d: %d/%d bytes"
+megasas_iov_read_underflow(int cmd, int bytes, int len) "scmd %d: %d/%d bytes"
+megasas_iov_write_underflow(int cmd, int bytes, int len) "scmd %d: %d/%d bytes"
+megasas_scsi_req_alloc_failed(const char *frame, int dev, int lun) "%s dev %x/%x req allocation failed"
+megasas_scsi_read_start(int cmd, int len) "scmd %d: transfer %d bytes of data"
+megasas_scsi_write_start(int cmd, int len) "scmd %d: transfer %d bytes of data"
+megasas_scsi_nodata(int cmd) "scmd %d: no data to be transferred"
+megasas_scsi_complete(int cmd, uint32_t status, int len, int xfer) "scmd %d: finished with status %x, len %u/%u"
+megasas_command_complete(int cmd, uint32_t status, uint32_t resid) "scmd %d: command completed, status %x, residual %d"
+megasas_handle_io(int cmd, const char *frame, int dev, int lun, unsigned long lba, unsigned long count) "scmd %d: %s dev %x/%x lba %lx count %lu"
+megasas_io_target_not_present(int cmd, const char *frame, int dev, int lun) "scmd %d: %s dev 1/%x/%x LUN not present"
+megasas_io_read_start(int cmd, unsigned long lba, unsigned long count, unsigned long len) "scmd %d: start LBA %lx %lu blocks (%lu bytes)"
+megasas_io_write_start(int cmd, unsigned long lba, unsigned long count, unsigned long len) "scmd %d: start LBA %lx %lu blocks (%lu bytes)"
+megasas_io_complete(int cmd, uint32_t len) "scmd %d: %d bytes completed"
+megasas_io_read(int cmd, int bytes, int len, unsigned long offset) "scmd %d: %d/%d bytes, iov offset %lu"
+megasas_io_write(int cmd, int bytes, int len, unsigned long offset) "scmd %d: %d/%d bytes, iov offset %lu"
+megasas_io_continue(int cmd, int bytes) "scmd %d: %d bytes left"
+megasas_iovec_map_failed(int cmd, int index, unsigned long iov_size) "scmd %d: iovec %d size %lu"
+megasas_iovec_sgl_overflow(int cmd, int index, int limit) "scmd %d: iovec count %d limit %d"
+megasas_iovec_sgl_underflow(int cmd, int index) "scmd %d: iovec count %d"
+megasas_iovec_sgl_invalid(int cmd, int index, uint64_t pa, uint32_t len) "scmd %d: element %d pa %" PRIx64 " len %u"
+megasas_iovec_overflow(int cmd, int len, int limit) "scmd %d: len %d limit %d"
+megasas_iovec_underflow(int cmd, int len, int limit) "scmd %d: len %d limit %d"
+megasas_handle_dcmd(int cmd, int opcode) "scmd %d: MFI DCMD opcode %x"
+megasas_finish_dcmd(int cmd, int size) "scmd %d: MFI DCMD wrote %d bytes"
+megasas_dcmd_req_alloc_failed(int cmd, const char *desc) "scmd %d: %s alloc failed"
+megasas_dcmd_internal_submit(int cmd, const char *desc, int dev) "scmd %d: %s to dev %d"
+megasas_dcmd_internal_finish(int cmd, int opcode, int lun) "scmd %d: DCMD finish internal cmd %x lun %d"
+megasas_dcmd_internal_invalid(int cmd, int opcode) "scmd %d: Invalid internal DCMD %x"
+megasas_dcmd_unhandled(int cmd, int opcode, int len) "scmd %d: opcode %x, len %d"
+megasas_dcmd_zero_sge(int cmd) "scmd %d: zero DCMD sge count"
+megasas_dcmd_invalid_sge(int cmd, int count) "scmd %d: invalid DCMD sge count %d"
+megasas_dcmd_map_failed(int cmd) "scmd %d: Failed to map DCMD buffer"
+megasas_dcmd_invalid_xfer_len(int cmd, unsigned long size, unsigned long max) "scmd %d: invalid xfer len %ld, max %ld"
+megasas_dcmd_enter(int cmd, const char *dcmd, int len) "scmd %d: DCMD %s len %d"
+megasas_dcmd_dummy(int cmd, unsigned long size) "scmd %d: DCMD dummy xfer len %ld"
+megasas_dcmd_set_fw_time(int cmd, unsigned long time) "scmd %d: Set FW time %lx"
+megasas_dcmd_pd_get_list(int cmd, int num, int max, int offset) "scmd %d: DCMD PD get list: %d / %d PDs, size %d"
+megasas_dcmd_ld_get_list(int cmd, int num, int max) "scmd %d: DCMD LD get list: found %d / %d LDs"
+megasas_dcmd_ld_get_info(int cmd, int ld_id) "scmd %d: DCMD LD get info for dev %d"
+megasas_dcmd_pd_get_info(int cmd, int pd_id) "scmd %d: DCMD PD get info for dev %d"
+megasas_dcmd_pd_list_query(int cmd, int flags) "scmd %d: DCMD PD list query flags %x"
+megasas_dcmd_dump_frame(int offset, char f0, char f1, char f2, char f3, char f4, char f5, char f6, char f7) "0x%x: %02x %02x %02x %02x %02x %02x %02x %02x"
+megasas_abort_frame(int cmd, int abort_cmd) "scmd %d: aborting frame %x"
+megasas_abort_no_cmd(int cmd, uint64_t context) "scmd %d: no active command for frame context %" PRIx64 ""
+megasas_abort_invalid_context(int cmd, uint64_t context, int abort_cmd) "scmd %d: invalid frame context %" PRIx64 " for abort frame %x"
+megasas_reset(void) "Reset"
+megasas_init(int sges, int cmds, const char *intr, const char *mode) "Using %d sges, %d cmds, %s, %s mode"
+megasas_msix_raise(int vector) "vector %d"
+megasas_irq_lower(void) "INTx"
+megasas_irq_raise(void) "INTx"
+megasas_intr_enabled(void) "Interrupts enabled"
+megasas_intr_disabled(void) "Interrupts disabled"
+megasas_mmio_readl(unsigned long addr, uint32_t val) "addr 0x%lx: 0x%x"
+megasas_mmio_invalid_readl(unsigned long addr) "addr 0x%lx"
+megasas_mmio_writel(uint32_t addr, uint32_t val) "addr 0x%x: 0x%x"
+megasas_mmio_invalid_writel(uint32_t addr, uint32_t val) "addr 0x%x: 0x%x"
+
 # hw/milkymist-ac97.c
 milkymist_ac97_memory_read(uint32_t addr, uint32_t value) "addr %08x value %08x"
 milkymist_ac97_memory_write(uint32_t addr, uint32_t value) "addr %08x value %08x"
@@ -645,6 +740,9 @@ iscsi_aio_read16_cb(void *iscsi, int status, void *acb, int canceled) "iscsi %p
 iscsi_aio_readv(void *iscsi, int64_t sector_num, int nb_sectors, void *opaque, void *acb) "iscsi %p sector_num %"PRId64" nb_sectors %d opaque %p acb %p"
 
 # hw/esp.c
+esp_error_fifo_overrun(void) "FIFO overrun"
+esp_error_unhandled_command(uint32_t val) "unhandled command (%2.2x)"
+esp_error_invalid_write(uint32_t val, uint32_t addr) "invalid write of 0x%02x at [0x%x]"
 esp_raise_irq(void) "Raise IRQ"
 esp_lower_irq(void) "Lower IRQ"
 esp_dma_enable(void) "Raise enable"
@@ -670,10 +768,24 @@ esp_mem_writeb_cmd_iccs(uint32_t val) "Initiator Command Complete Sequence (%2.2
 esp_mem_writeb_cmd_msgacc(uint32_t val) "Message Accepted (%2.2x)"
 esp_mem_writeb_cmd_pad(uint32_t val) "Transfer padding (%2.2x)"
 esp_mem_writeb_cmd_satn(uint32_t val) "Set ATN (%2.2x)"
+esp_mem_writeb_cmd_rstatn(uint32_t val) "Reset ATN (%2.2x)"
 esp_mem_writeb_cmd_sel(uint32_t val) "Select without ATN (%2.2x)"
 esp_mem_writeb_cmd_selatn(uint32_t val) "Select with ATN (%2.2x)"
 esp_mem_writeb_cmd_selatns(uint32_t val) "Select with ATN & stop (%2.2x)"
 esp_mem_writeb_cmd_ensel(uint32_t val) "Enable selection (%2.2x)"
+esp_mem_writeb_cmd_dissel(uint32_t val) "Disable selection (%2.2x)"
+esp_pci_error_invalid_dma_direction(void) "invalid DMA transfer direction"
+esp_pci_error_invalid_read(uint32_t reg) "read access outside bounds (reg 0x%x)"
+esp_pci_error_invalid_write(uint32_t reg) "write access outside bounds (reg 0x%x)"
+esp_pci_error_invalid_write_dma(uint32_t val, uint32_t addr) "invalid write of 0x%02x at [0x%x]"
+esp_pci_dma_read(uint32_t saddr, uint32_t reg) "reg[%d]: 0x%8.8x"
+esp_pci_dma_write(uint32_t saddr, uint32_t reg, uint32_t val) "reg[%d]: 0x%8.8x -> 0x%8.8x"
+esp_pci_dma_idle(uint32_t val) "IDLE (%.8x)"
+esp_pci_dma_blast(uint32_t val) "BLAST (%.8x)"
+esp_pci_dma_abort(uint32_t val) "ABORT (%.8x)"
+esp_pci_dma_start(uint32_t val) "START (%.8x)"
+esp_pci_sbac_read(uint32_t reg) "sbac: 0x%8.8x"
+esp_pci_sbac_write(uint32_t reg, uint32_t val) "sbac: 0x%8.8x -> 0x%8.8x"
 
 # monitor.c
 handle_qmp_command(void *mon, const char *cmd_name) "mon %p cmd_name \"%s\""
@@ -783,6 +895,11 @@ displaysurface_resize(void *display_state, void *display_surface, int width, int
 # vga.c
 ppm_save(const char *filename, void *display_surface) "%s surface=%p"
 
+# savevm.c
+
+savevm_section_start(void) ""
+savevm_section_end(unsigned int section_id) "section_id %u"
+
 # hw/qxl.c
 disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
 disable qxl_io_write_vga(int qid, const char *mode, uint32_t addr, uint32_t val) "%d %s addr=%u val=%u"
diff --git a/vl.c b/vl.c
index 1329c30e6a..46248b9c1c 100644
--- a/vl.c
+++ b/vl.c
@@ -1795,9 +1795,8 @@ char *qemu_find_file(int type, const char *name)
     const char *subdir;
     char *buf;
 
-    /* If name contains path separators then try it as a straight path.  */
-    if ((strchr(name, '/') || strchr(name, '\\'))
-        && access(name, R_OK) == 0) {
+    /* Try the name as a straight path first */
+    if (access(name, R_OK) == 0) {
         return g_strdup(name);
     }
     switch (type) {
@@ -3585,8 +3584,11 @@ int main(int argc, char **argv, char **envp)
     /* init remote displays */
     if (vnc_display) {
         vnc_display_init(ds);
-        if (vnc_display_open(ds, vnc_display) < 0)
+        if (vnc_display_open(ds, vnc_display) < 0) {
+            fprintf(stderr, "Failed to start VNC server on `%s'\n",
+                    vnc_display);
             exit(1);
+        }
 
         if (show_vnc_port) {
             printf("VNC server running on `%s'\n", vnc_display_local_addr(ds));
diff --git a/vmstate.h b/vmstate.h
index 82d97aead4..5af45e0c12 100644
--- a/vmstate.h
+++ b/vmstate.h
@@ -26,7 +26,7 @@
 #ifndef QEMU_VMSTATE_H
 #define QEMU_VMSTATE_H 1
 
-typedef void SaveSetParamsHandler(int blk_enable, int shared, void * opaque);
+typedef void SaveSetParamsHandler(const MigrationParams *params, void * opaque);
 typedef void SaveStateHandler(QEMUFile *f, void *opaque);
 typedef int SaveLiveStateHandler(QEMUFile *f, int stage, void *opaque);
 typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);