summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--aio-win32.c8
-rw-r--r--async.c16
-rw-r--r--block.c72
-rw-r--r--block/Makefile.objs3
-rw-r--r--block/archipelago.c21
-rw-r--r--block/blkdebug.c17
-rw-r--r--block/blkverify.c21
-rw-r--r--block/cow.c433
-rw-r--r--block/curl.c16
-rw-r--r--block/iscsi.c23
-rw-r--r--block/linux-aio.c34
-rw-r--r--block/null.c168
-rw-r--r--block/qcow2-cluster.c43
-rw-r--r--block/qcow2-refcount.c67
-rw-r--r--block/qcow2.c77
-rw-r--r--block/qcow2.h6
-rw-r--r--block/qed.c23
-rw-r--r--block/quorum.c11
-rw-r--r--block/rbd.c25
-rw-r--r--block/sheepdog.c54
-rw-r--r--block/vhdx.c21
-rw-r--r--block/win32-aio.c18
-rwxr-xr-xconfigure27
-rw-r--r--dma-helpers.c20
-rw-r--r--docs/image-fuzzer.txt3
-rw-r--r--docs/rdma.txt6
-rw-r--r--hmp.c2
-rw-r--r--hw/block/Makefile.objs2
-rw-r--r--hw/block/virtio-blk.c20
-rw-r--r--hw/i386/pc.c2
-rw-r--r--hw/ide/ahci.c45
-rw-r--r--hw/ide/atapi.c7
-rw-r--r--hw/ide/core.c26
-rw-r--r--hw/ide/ich.c7
-rw-r--r--hw/net/vhost_net.c2
-rw-r--r--hw/ppc/spapr.c2
-rw-r--r--hw/s390x/css.c40
-rw-r--r--hw/s390x/css.h2
-rw-r--r--hw/virtio/Makefile.objs2
-rw-r--r--hw/virtio/dataplane/vring.c3
-rw-r--r--include/block/aio.h9
-rw-r--r--include/block/block.h1
-rw-r--r--include/hw/s390x/sclp.h2
-rw-r--r--include/hw/virtio/dataplane/vring.h2
-rw-r--r--include/hw/virtio/virtio-blk.h2
-rw-r--r--include/hw/virtio/virtio_ring.h167
-rw-r--r--include/qemu/main-loop.h2
-rw-r--r--include/sysemu/arch_init.h34
-rw-r--r--iothread.c11
-rw-r--r--libcacard/vcard_emul_nss.c4
-rw-r--r--linux-headers/linux/vhost.h2
-rw-r--r--main-loop.c9
-rw-r--r--migration-rdma.c4
-rw-r--r--qapi/block-core.json146
-rw-r--r--qdev-monitor.c2
-rw-r--r--qemu-char.c10
-rw-r--r--qemu-doc.texi9
-rw-r--r--qemu-img.c8
-rw-r--r--qemu-img.texi4
-rw-r--r--qemu-io.c7
-rw-r--r--qemu-nbd.c6
-rw-r--r--qmp-commands.hx2
-rw-r--r--scripts/qapi-types.py2
-rw-r--r--scripts/qapi-visit.py3
-rw-r--r--target-s390x/ioinst.h10
-rw-r--r--tests/Makefile2
-rw-r--r--tests/ahci-test.c1561
-rw-r--r--tests/image-fuzzer/qcow2/fuzz.py26
-rw-r--r--tests/image-fuzzer/qcow2/layout.py138
-rwxr-xr-xtests/image-fuzzer/runner.py54
-rw-r--r--tests/libqos/pci.c6
-rwxr-xr-xtests/qemu-iotests/06056
-rw-r--r--tests/qemu-iotests/060.out61
-rwxr-xr-xtests/qemu-iotests/0692
-rwxr-xr-xtests/qemu-iotests/0722
-rwxr-xr-xtests/qemu-iotests/0992
-rw-r--r--tests/qemu-iotests/common6
-rw-r--r--tests/test-aio.c10
-rw-r--r--tests/test-thread-pool.c44
-rw-r--r--tests/test-throttle.c10
-rw-r--r--thread-pool.c36
-rw-r--r--ui/cocoa.m2
-rw-r--r--vl.c27
-rw-r--r--xen-hvm.c9
84 files changed, 2903 insertions, 1004 deletions
diff --git a/aio-win32.c b/aio-win32.c
index 61e3d2ddfe..d81313b00b 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -283,9 +283,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
     int count;
     int timeout;
 
-    if (aio_prepare(ctx)) {
+    have_select_revents = aio_prepare(ctx);
+    if (have_select_revents) {
         blocking = false;
-        have_select_revents = true;
     }
 
     was_dispatching = ctx->dispatching;
@@ -335,6 +335,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
         event = NULL;
         if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
             event = events[ret - WAIT_OBJECT_0];
+            events[ret - WAIT_OBJECT_0] = events[--count];
         } else if (!have_select_revents) {
             break;
         }
@@ -343,9 +344,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
         blocking = false;
 
         progress |= aio_dispatch_handlers(ctx, event);
-
-        /* Try again, but only call each handler once.  */
-        events[ret - WAIT_OBJECT_0] = events[--count];
     }
 
     progress |= timerlistgroup_run_timers(&ctx->tlg);
diff --git a/async.c b/async.c
index a99e7f639a..6e1b282aa7 100644
--- a/async.c
+++ b/async.c
@@ -289,18 +289,24 @@ static void aio_rfifolock_cb(void *opaque)
     aio_notify(opaque);
 }
 
-AioContext *aio_context_new(void)
+AioContext *aio_context_new(Error **errp)
 {
+    int ret;
     AioContext *ctx;
     ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+    ret = event_notifier_init(&ctx->notifier, false);
+    if (ret < 0) {
+        g_source_destroy(&ctx->source);
+        error_setg_errno(errp, -ret, "Failed to initialize event notifier");
+        return NULL;
+    }
+    aio_set_event_notifier(ctx, &ctx->notifier,
+                           (EventNotifierHandler *)
+                           event_notifier_test_and_clear);
     ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
     ctx->thread_pool = NULL;
     qemu_mutex_init(&ctx->bh_lock);
     rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
-    event_notifier_init(&ctx->notifier, false);
-    aio_set_event_notifier(ctx, &ctx->notifier, 
-                           (EventNotifierHandler *)
-                           event_notifier_test_and_clear);
     timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
 
     return ctx;
diff --git a/block.c b/block.c
index bcd952a4cb..a857913fc2 100644
--- a/block.c
+++ b/block.c
@@ -4640,7 +4640,28 @@ int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
 
 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
 {
-    acb->aiocb_info->cancel(acb);
+    qemu_aio_ref(acb);
+    bdrv_aio_cancel_async(acb);
+    while (acb->refcnt > 1) {
+        if (acb->aiocb_info->get_aio_context) {
+            aio_poll(acb->aiocb_info->get_aio_context(acb), true);
+        } else if (acb->bs) {
+            aio_poll(bdrv_get_aio_context(acb->bs), true);
+        } else {
+            abort();
+        }
+    }
+    qemu_aio_unref(acb);
+}
+
+/* Async version of aio cancel. The caller is not blocked if the acb implements
+ * cancel_async, otherwise we do nothing and let the request normally complete.
+ * In either case the completion callback must be called. */
+void bdrv_aio_cancel_async(BlockDriverAIOCB *acb)
+{
+    if (acb->aiocb_info->cancel_async) {
+        acb->aiocb_info->cancel_async(acb);
+    }
 }
 
 /**************************************************************/
@@ -4656,18 +4677,8 @@ typedef struct BlockDriverAIOCBSync {
     int is_write;
 } BlockDriverAIOCBSync;
 
-static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
-{
-    BlockDriverAIOCBSync *acb =
-        container_of(blockacb, BlockDriverAIOCBSync, common);
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    qemu_aio_release(acb);
-}
-
 static const AIOCBInfo bdrv_em_aiocb_info = {
     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
-    .cancel             = bdrv_aio_cancel_em,
 };
 
 static void bdrv_aio_bh_cb(void *opaque)
@@ -4681,7 +4692,7 @@ static void bdrv_aio_bh_cb(void *opaque)
     acb->common.cb(acb->common.opaque, acb->ret);
     qemu_bh_delete(acb->bh);
     acb->bh = NULL;
-    qemu_aio_release(acb);
+    qemu_aio_unref(acb);
 }
 
 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
@@ -4738,22 +4749,8 @@ typedef struct BlockDriverAIOCBCoroutine {
     QEMUBH* bh;
 } BlockDriverAIOCBCoroutine;
 
-static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
-{
-    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
-    BlockDriverAIOCBCoroutine *acb =
-        container_of(blockacb, BlockDriverAIOCBCoroutine, common);
-    bool done = false;
-
-    acb->done = &done;
-    while (!done) {
-        aio_poll(aio_context, true);
-    }
-}
-
 static const AIOCBInfo bdrv_em_co_aiocb_info = {
     .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
-    .cancel             = bdrv_aio_co_cancel_em,
 };
 
 static void bdrv_co_em_bh(void *opaque)
@@ -4762,12 +4759,8 @@ static void bdrv_co_em_bh(void *opaque)
 
     acb->common.cb(acb->common.opaque, acb->req.error);
 
-    if (acb->done) {
-        *acb->done = true;
-    }
-
     qemu_bh_delete(acb->bh);
-    qemu_aio_release(acb);
+    qemu_aio_unref(acb);
 }
 
 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
@@ -4806,7 +4799,6 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
     acb->req.qiov = qiov;
     acb->req.flags = flags;
     acb->is_write = is_write;
-    acb->done = NULL;
 
     co = qemu_coroutine_create(bdrv_co_do_rw);
     qemu_coroutine_enter(co, acb);
@@ -4833,7 +4825,6 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
     BlockDriverAIOCBCoroutine *acb;
 
     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-    acb->done = NULL;
 
     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
     qemu_coroutine_enter(co, acb);
@@ -4863,7 +4854,6 @@ BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
     acb->req.sector = sector_num;
     acb->req.nb_sectors = nb_sectors;
-    acb->done = NULL;
     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
     qemu_coroutine_enter(co, acb);
 
@@ -4891,13 +4881,23 @@ void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
     acb->bs = bs;
     acb->cb = cb;
     acb->opaque = opaque;
+    acb->refcnt = 1;
     return acb;
 }
 
-void qemu_aio_release(void *p)
+void qemu_aio_ref(void *p)
+{
+    BlockDriverAIOCB *acb = p;
+    acb->refcnt++;
+}
+
+void qemu_aio_unref(void *p)
 {
     BlockDriverAIOCB *acb = p;
-    g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+    assert(acb->refcnt > 0);
+    if (--acb->refcnt == 0) {
+        g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+    }
 }
 
 /**************************************************************/
diff --git a/block/Makefile.objs b/block/Makefile.objs
index c9c8bbbcde..a833ed5745 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,4 +1,4 @@
-block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
+block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
@@ -9,6 +9,7 @@ block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
+block-obj-y += null.o
 
 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
diff --git a/block/archipelago.c b/block/archipelago.c
index 93fb7c0634..73d91a422f 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -91,7 +91,6 @@ typedef struct ArchipelagoAIOCB {
     struct BDRVArchipelagoState *s;
     QEMUIOVector *qiov;
     ARCHIPCmd cmd;
-    bool cancelled;
     int status;
     int64_t size;
     int64_t ret;
@@ -318,9 +317,7 @@ static void qemu_archipelago_complete_aio(void *opaque)
     aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
     aio_cb->status = 0;
 
-    if (!aio_cb->cancelled) {
-        qemu_aio_release(aio_cb);
-    }
+    qemu_aio_unref(aio_cb);
     g_free(reqdata);
 }
 
@@ -725,19 +722,8 @@ static int qemu_archipelago_create(const char *filename,
     return ret;
 }
 
-static void qemu_archipelago_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) blockacb;
-    aio_cb->cancelled = true;
-    while (aio_cb->status == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(aio_cb->common.bs), true);
-    }
-    qemu_aio_release(aio_cb);
-}
-
 static const AIOCBInfo archipelago_aiocb_info = {
     .aiocb_size = sizeof(ArchipelagoAIOCB),
-    .cancel = qemu_archipelago_aio_cancel,
 };
 
 static int archipelago_submit_request(BDRVArchipelagoState *s,
@@ -889,7 +875,6 @@ static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
 
     aio_cb->ret = 0;
     aio_cb->s = s;
-    aio_cb->cancelled = false;
     aio_cb->status = -EINPROGRESS;
 
     off = sector_num * BDRV_SECTOR_SIZE;
@@ -905,7 +890,7 @@ static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
 
 err_exit:
     error_report("qemu_archipelago_aio_rw(): I/O Error\n");
-    qemu_aio_release(aio_cb);
+    qemu_aio_unref(aio_cb);
     return NULL;
 }
 
@@ -1008,7 +993,7 @@ static int qemu_archipelago_truncate(BlockDriverState *bs, int64_t offset)
     req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC);
     if (!req) {
         archipelagolog("Cannot get XSEG request\n");
-        return err_exit2;
+        goto err_exit2;
     }
 
     ret = xseg_prep_request(s->xseg, req, targetlen, 0);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 69b330eced..ced0b600f9 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -52,11 +52,8 @@ typedef struct BlkdebugSuspendedReq {
     QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;
 
-static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb);
-
 static const AIOCBInfo blkdebug_aiocb_info = {
-    .aiocb_size = sizeof(BlkdebugAIOCB),
-    .cancel     = blkdebug_aio_cancel,
+    .aiocb_size    = sizeof(BlkdebugAIOCB),
 };
 
 enum {
@@ -447,17 +444,7 @@ static void error_callback_bh(void *opaque)
     struct BlkdebugAIOCB *acb = opaque;
     qemu_bh_delete(acb->bh);
     acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_release(acb);
-}
-
-static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    BlkdebugAIOCB *acb = container_of(blockacb, BlkdebugAIOCB, common);
-    if (acb->bh) {
-        qemu_bh_delete(acb->bh);
-        acb->bh = NULL;
-    }
-    qemu_aio_release(acb);
+    qemu_aio_unref(acb);
 }
 
 static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
diff --git a/block/blkverify.c b/block/blkverify.c
index 163064cf6b..7d64a23a09 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -29,7 +29,6 @@ struct BlkverifyAIOCB {
 
     int ret;                    /* first completed request's result */
     unsigned int done;          /* completion counter */
-    bool *finished;             /* completion signal for cancel */
 
     QEMUIOVector *qiov;         /* user I/O vector */
     QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */
@@ -38,22 +37,8 @@ struct BlkverifyAIOCB {
     void (*verify)(BlkverifyAIOCB *acb);
 };
 
-static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
-    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
-    bool finished = false;
-
-    /* Wait until request completes, invokes its callback, and frees itself */
-    acb->finished = &finished;
-    while (!finished) {
-        aio_poll(aio_context, true);
-    }
-}
-
 static const AIOCBInfo blkverify_aiocb_info = {
     .aiocb_size         = sizeof(BlkverifyAIOCB),
-    .cancel             = blkverify_aio_cancel,
 };
 
 static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
@@ -194,7 +179,6 @@ static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
     acb->qiov = qiov;
     acb->buf = NULL;
     acb->verify = NULL;
-    acb->finished = NULL;
     return acb;
 }
 
@@ -208,10 +192,7 @@ static void blkverify_aio_bh(void *opaque)
         qemu_vfree(acb->buf);
     }
     acb->common.cb(acb->common.opaque, acb->ret);
-    if (acb->finished) {
-        *acb->finished = true;
-    }
-    qemu_aio_release(acb);
+    qemu_aio_unref(acb);
 }
 
 static void blkverify_aio_cb(void *opaque, int ret)
diff --git a/block/cow.c b/block/cow.c
deleted file mode 100644
index c3769fe03b..0000000000
--- a/block/cow.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- * Block driver for the COW format
- *
- * Copyright (c) 2004 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-
-/**************************************************************/
-/* COW block driver using file system holes */
-
-/* user mode linux compatible COW file */
-#define COW_MAGIC 0x4f4f4f4d  /* MOOO */
-#define COW_VERSION 2
-
-struct cow_header_v2 {
-    uint32_t magic;
-    uint32_t version;
-    char backing_file[1024];
-    int32_t mtime;
-    uint64_t size;
-    uint32_t sectorsize;
-};
-
-typedef struct BDRVCowState {
-    CoMutex lock;
-    int64_t cow_sectors_offset;
-} BDRVCowState;
-
-static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const struct cow_header_v2 *cow_header = (const void *)buf;
-
-    if (buf_size >= sizeof(struct cow_header_v2) &&
-        be32_to_cpu(cow_header->magic) == COW_MAGIC &&
-        be32_to_cpu(cow_header->version) == COW_VERSION)
-        return 100;
-    else
-        return 0;
-}
-
-static int cow_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    BDRVCowState *s = bs->opaque;
-    struct cow_header_v2 cow_header;
-    int bitmap_size;
-    int64_t size;
-    int ret;
-
-    /* see if it is a cow image */
-    ret = bdrv_pread(bs->file, 0, &cow_header, sizeof(cow_header));
-    if (ret < 0) {
-        goto fail;
-    }
-
-    if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
-        error_setg(errp, "Image not in COW format");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    if (be32_to_cpu(cow_header.version) != COW_VERSION) {
-        char version[64];
-        snprintf(version, sizeof(version),
-               "COW version %" PRIu32, cow_header.version);
-        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-            bs->device_name, "cow", version);
-        ret = -ENOTSUP;
-        goto fail;
-    }
-
-    /* cow image found */
-    size = be64_to_cpu(cow_header.size);
-    bs->total_sectors = size / 512;
-
-    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
-            cow_header.backing_file);
-
-    bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
-    s->cow_sectors_offset = (bitmap_size + 511) & ~511;
-    qemu_co_mutex_init(&s->lock);
-    return 0;
- fail:
-    return ret;
-}
-
-static inline void cow_set_bits(uint8_t *bitmap, int start, int64_t nb_sectors)
-{
-    int64_t bitnum = start, last = start + nb_sectors;
-    while (bitnum < last) {
-        if ((bitnum & 7) == 0 && bitnum + 8 <= last) {
-            bitmap[bitnum / 8] = 0xFF;
-            bitnum += 8;
-            continue;
-        }
-        bitmap[bitnum/8] |= (1 << (bitnum % 8));
-        bitnum++;
-    }
-}
-
-#define BITS_PER_BITMAP_SECTOR (512 * 8)
-
-/* Cannot use bitmap.c on big-endian machines.  */
-static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
-{
-    return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
-}
-
-static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors)
-{
-    int streak_value = value ? 0xFF : 0;
-    int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR);
-    int bitnum = start;
-    while (bitnum < last) {
-        if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) {
-            bitnum += 8;
-            continue;
-        }
-        if (cow_test_bit(bitnum, bitmap) == value) {
-            bitnum++;
-            continue;
-        }
-        break;
-    }
-    return MIN(bitnum, last) - start;
-}
-
-/* Return true if first block has been changed (ie. current version is
- * in COW file).  Set the number of continuous blocks for which that
- * is true. */
-static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *num_same)
-{
-    int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
-    uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
-    bool first = true;
-    int changed = 0, same = 0;
-
-    do {
-        int ret;
-        uint8_t bitmap[BDRV_SECTOR_SIZE];
-
-        bitnum &= BITS_PER_BITMAP_SECTOR - 1;
-        int sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
-
-        ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
-        if (ret < 0) {
-            return ret;
-        }
-
-        if (first) {
-            changed = cow_test_bit(bitnum, bitmap);
-            first = false;
-        }
-
-        same += cow_find_streak(bitmap, changed, bitnum, nb_sectors);
-
-        bitnum += sector_bits;
-        nb_sectors -= sector_bits;
-        offset += BDRV_SECTOR_SIZE;
-    } while (nb_sectors);
-
-    *num_same = same;
-    return changed;
-}
-
-static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *num_same)
-{
-    BDRVCowState *s = bs->opaque;
-    int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
-    int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS);
-    if (ret < 0) {
-        return ret;
-    }
-    return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID;
-}
-
-static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
-        int nb_sectors)
-{
-    int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
-    uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
-    bool first = true;
-    int sector_bits;
-
-    for ( ; nb_sectors;
-            bitnum += sector_bits,
-            nb_sectors -= sector_bits,
-            offset += BDRV_SECTOR_SIZE) {
-        int ret, set;
-        uint8_t bitmap[BDRV_SECTOR_SIZE];
-
-        bitnum &= BITS_PER_BITMAP_SECTOR - 1;
-        sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
-
-        ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
-        if (ret < 0) {
-            return ret;
-        }
-
-        /* Skip over any already set bits */
-        set = cow_find_streak(bitmap, 1, bitnum, sector_bits);
-        bitnum += set;
-        sector_bits -= set;
-        nb_sectors -= set;
-        if (!sector_bits) {
-            continue;
-        }
-
-        if (first) {
-            ret = bdrv_flush(bs->file);
-            if (ret < 0) {
-                return ret;
-            }
-            first = false;
-        }
-
-        cow_set_bits(bitmap, bitnum, sector_bits);
-
-        ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    return 0;
-}
-
-static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
-                                 uint8_t *buf, int nb_sectors)
-{
-    BDRVCowState *s = bs->opaque;
-    int ret, n;
-
-    while (nb_sectors > 0) {
-        ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n);
-        if (ret < 0) {
-            return ret;
-        }
-        if (ret) {
-            ret = bdrv_pread(bs->file,
-                        s->cow_sectors_offset + sector_num * 512,
-                        buf, n * 512);
-            if (ret < 0) {
-                return ret;
-            }
-        } else {
-            if (bs->backing_hd) {
-                /* read from the base image */
-                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
-                if (ret < 0) {
-                    return ret;
-                }
-            } else {
-                memset(buf, 0, n * 512);
-            }
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    return 0;
-}
-
-static coroutine_fn int cow_co_read(BlockDriverState *bs, int64_t sector_num,
-                                    uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVCowState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = cow_read(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static int cow_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
-{
-    BDRVCowState *s = bs->opaque;
-    int ret;
-
-    ret = bdrv_pwrite(bs->file, s->cow_sectors_offset + sector_num * 512,
-                      buf, nb_sectors * 512);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return cow_update_bitmap(bs, sector_num, nb_sectors);
-}
-
-static coroutine_fn int cow_co_write(BlockDriverState *bs, int64_t sector_num,
-                                     const uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVCowState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = cow_write(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static void cow_close(BlockDriverState *bs)
-{
-}
-
-static int cow_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    struct cow_header_v2 cow_header;
-    struct stat st;
-    int64_t image_sectors = 0;
-    char *image_filename = NULL;
-    Error *local_err = NULL;
-    int ret;
-    BlockDriverState *cow_bs = NULL;
-
-    /* Read out options */
-    image_sectors = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                                 BDRV_SECTOR_SIZE);
-    image_filename = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto exit;
-    }
-
-    ret = bdrv_open(&cow_bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto exit;
-    }
-
-    memset(&cow_header, 0, sizeof(cow_header));
-    cow_header.magic = cpu_to_be32(COW_MAGIC);
-    cow_header.version = cpu_to_be32(COW_VERSION);
-    if (image_filename) {
-        /* Note: if no file, we put a dummy mtime */
-        cow_header.mtime = cpu_to_be32(0);
-
-        if (stat(image_filename, &st) != 0) {
-            goto mtime_fail;
-        }
-        cow_header.mtime = cpu_to_be32(st.st_mtime);
-    mtime_fail:
-        pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
-                image_filename);
-    }
-    cow_header.sectorsize = cpu_to_be32(512);
-    cow_header.size = cpu_to_be64(image_sectors * 512);
-    ret = bdrv_pwrite(cow_bs, 0, &cow_header, sizeof(cow_header));
-    if (ret < 0) {
-        goto exit;
-    }
-
-    /* resize to include at least all the bitmap */
-    ret = bdrv_truncate(cow_bs,
-        sizeof(cow_header) + ((image_sectors + 7) >> 3));
-    if (ret < 0) {
-        goto exit;
-    }
-
-exit:
-    g_free(image_filename);
-    if (cow_bs) {
-        bdrv_unref(cow_bs);
-    }
-    return ret;
-}
-
-static QemuOptsList cow_create_opts = {
-    .name = "cow-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(cow_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FILE,
-            .type = QEMU_OPT_STRING,
-            .help = "File name of a base image"
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_cow = {
-    .format_name    = "cow",
-    .instance_size  = sizeof(BDRVCowState),
-
-    .bdrv_probe     = cow_probe,
-    .bdrv_open      = cow_open,
-    .bdrv_close     = cow_close,
-    .bdrv_create    = cow_create,
-    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
-    .supports_backing       = true,
-
-    .bdrv_read              = cow_co_read,
-    .bdrv_write             = cow_co_write,
-    .bdrv_co_get_block_status   = cow_co_get_block_status,
-
-    .create_opts    = &cow_create_opts,
-};
-
-static void bdrv_cow_init(void)
-{
-    bdrv_register(&bdrv_cow);
-}
-
-block_init(bdrv_cow_init);
diff --git a/block/curl.c b/block/curl.c
index 938f9d94e8..225407c643 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -212,7 +212,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
             qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                 acb->end - acb->start);
             acb->common.cb(acb->common.opaque, 0);
-            qemu_aio_release(acb);
+            qemu_aio_unref(acb);
             s->acb[i] = NULL;
         }
     }
@@ -304,7 +304,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
                     }
 
                     acb->common.cb(acb->common.opaque, -EIO);
-                    qemu_aio_release(acb);
+                    qemu_aio_unref(acb);
                     state->acb[i] = NULL;
                 }
             }
@@ -613,14 +613,8 @@ out_noclean:
     return -EINVAL;
 }
 
-static void curl_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    // Do we have to implement canceling? Seems to work without...
-}
-
 static const AIOCBInfo curl_aiocb_info = {
     .aiocb_size         = sizeof(CURLAIOCB),
-    .cancel             = curl_aio_cancel,
 };
 
 
@@ -642,7 +636,7 @@ static void curl_readv_bh_cb(void *p)
     // we can just call the callback and be done.
     switch (curl_find_buf(s, start, acb->nb_sectors * SECTOR_SIZE, acb)) {
         case FIND_RET_OK:
-            qemu_aio_release(acb);
+            qemu_aio_unref(acb);
             // fall through
         case FIND_RET_WAIT:
             return;
@@ -654,7 +648,7 @@ static void curl_readv_bh_cb(void *p)
     state = curl_init_state(acb->common.bs, s);
     if (!state) {
         acb->common.cb(acb->common.opaque, -EIO);
-        qemu_aio_release(acb);
+        qemu_aio_unref(acb);
         return;
     }
 
@@ -670,7 +664,7 @@ static void curl_readv_bh_cb(void *p)
     if (state->buf_len && state->orig_buf == NULL) {
         curl_clean_state(state);
         acb->common.cb(acb->common.opaque, -ENOMEM);
-        qemu_aio_release(acb);
+        qemu_aio_unref(acb);
         return;
     }
     state->acb[0] = acb;
diff --git a/block/iscsi.c b/block/iscsi.c
index 84bcae89fa..3c1b416704 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -88,7 +88,6 @@ typedef struct IscsiAIOCB {
     struct scsi_task *task;
     uint8_t *buf;
     int status;
-    int canceled;
     int64_t sector_num;
     int nb_sectors;
 #ifdef __linux__
@@ -120,16 +119,14 @@ iscsi_bh_cb(void *p)
     g_free(acb->buf);
     acb->buf = NULL;
 
-    if (acb->canceled == 0) {
-        acb->common.cb(acb->common.opaque, acb->status);
-    }
+    acb->common.cb(acb->common.opaque, acb->status);
 
     if (acb->task != NULL) {
         scsi_free_scsi_task(acb->task);
         acb->task = NULL;
     }
 
-    qemu_aio_release(acb);
+    qemu_aio_unref(acb);
 }
 
 static void
@@ -240,20 +237,15 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
         return;
     }
 
-    acb->canceled = 1;
-
     /* send a task mgmt call to the target to cancel the task on the target */
     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
                                      iscsi_abort_task_cb, acb);
 
-    while (acb->status == -EINPROGRESS) {
-        aio_poll(iscsilun->aio_context, true);
-    }
 }
 
 static const AIOCBInfo iscsi_aiocb_info = {
     .aiocb_size         = sizeof(IscsiAIOCB),
-    .cancel             = iscsi_aio_cancel,
+    .cancel_async       = iscsi_aio_cancel,
 };
 
 
@@ -638,10 +630,6 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
     g_free(acb->buf);
     acb->buf = NULL;
 
-    if (acb->canceled != 0) {
-        return;
-    }
-
     acb->status = 0;
     if (status < 0) {
         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
@@ -683,7 +671,6 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
 
     acb->iscsilun = iscsilun;
-    acb->canceled    = 0;
     acb->bh          = NULL;
     acb->status      = -EINPROGRESS;
     acb->buf         = NULL;
@@ -693,7 +680,7 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
     if (acb->task == NULL) {
         error_report("iSCSI: Failed to allocate task for scsi command. %s",
                      iscsi_get_error(iscsi));
-        qemu_aio_release(acb);
+        qemu_aio_unref(acb);
         return NULL;
     }
     memset(acb->task, 0, sizeof(struct scsi_task));
@@ -731,7 +718,7 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
                                  (data.size > 0) ? &data : NULL,
                                  acb) != 0) {
         scsi_free_scsi_task(acb->task);
-        qemu_aio_release(acb);
+        qemu_aio_unref(acb);
         return NULL;
     }
 
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 9aca758b10..f4baba2be0 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -85,11 +85,10 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
                 ret = -EINVAL;
             }
         }
-
-        laiocb->common.cb(laiocb->common.opaque, ret);
     }
+    laiocb->common.cb(laiocb->common.opaque, ret);
 
-    qemu_aio_release(laiocb);
+    qemu_aio_unref(laiocb);
 }
 
 /* The completion BH fetches completed I/O requests and invokes their
@@ -153,35 +152,22 @@ static void laio_cancel(BlockDriverAIOCB *blockacb)
     struct io_event event;
     int ret;
 
-    if (laiocb->ret != -EINPROGRESS)
+    if (laiocb->ret != -EINPROGRESS) {
         return;
-
-    /*
-     * Note that as of Linux 2.6.31 neither the block device code nor any
-     * filesystem implements cancellation of AIO request.
-     * Thus the polling loop below is the normal code path.
-     */
+    }
     ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
-    if (ret == 0) {
-        laiocb->ret = -ECANCELED;
+    laiocb->ret = -ECANCELED;
+    if (ret != 0) {
+        /* iocb is not cancelled, cb will be called by the event loop later */
         return;
     }
 
-    /*
-     * We have to wait for the iocb to finish.
-     *
-     * The only way to get the iocb status update is by polling the io context.
-     * We might be able to do this slightly more optimal by removing the
-     * O_NONBLOCK flag.
-     */
-    while (laiocb->ret == -EINPROGRESS) {
-        qemu_laio_completion_cb(&laiocb->ctx->e);
-    }
+    laiocb->common.cb(laiocb->common.opaque, laiocb->ret);
 }
 
 static const AIOCBInfo laio_aiocb_info = {
     .aiocb_size         = sizeof(struct qemu_laiocb),
-    .cancel             = laio_cancel,
+    .cancel_async       = laio_cancel,
 };
 
 static void ioq_init(LaioQueue *io_q)
@@ -300,7 +286,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
     return &laiocb->common;
 
 out_free_aiocb:
-    qemu_aio_release(laiocb);
+    qemu_aio_unref(laiocb);
     return NULL;
 }
 
diff --git a/block/null.c b/block/null.c
new file mode 100644
index 0000000000..828441999c
--- /dev/null
+++ b/block/null.c
@@ -0,0 +1,168 @@
+/*
+ * Null block driver
+ *
+ * Authors:
+ *  Fam Zheng <famz@redhat.com>
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "block/block_int.h"
+
+typedef struct {
+    int64_t length;
+} BDRVNullState;
+
+static QemuOptsList runtime_opts = {
+    .name = "null",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "",
+        },
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "size of the null block",
+        },
+        { /* end of list */ }
+    },
+};
+
+static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
+                          Error **errp)
+{
+    QemuOpts *opts;
+    BDRVNullState *s = bs->opaque;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &error_abort);
+    s->length =
+        qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 1 << 30);
+    qemu_opts_del(opts);
+    return 0;
+}
+
+static void null_close(BlockDriverState *bs)
+{
+}
+
+static int64_t null_getlength(BlockDriverState *bs)
+{
+    BDRVNullState *s = bs->opaque;
+    return s->length;
+}
+
+static coroutine_fn int null_co_readv(BlockDriverState *bs,
+                                      int64_t sector_num, int nb_sectors,
+                                      QEMUIOVector *qiov)
+{
+    return 0;
+}
+
+static coroutine_fn int null_co_writev(BlockDriverState *bs,
+                                       int64_t sector_num, int nb_sectors,
+                                       QEMUIOVector *qiov)
+{
+    return 0;
+}
+
+static coroutine_fn int null_co_flush(BlockDriverState *bs)
+{
+    return 0;
+}
+
+typedef struct {
+    BlockDriverAIOCB common;
+    QEMUBH *bh;
+} NullAIOCB;
+
+static const AIOCBInfo null_aiocb_info = {
+    .aiocb_size = sizeof(NullAIOCB),
+};
+
+static void null_bh_cb(void *opaque)
+{
+    NullAIOCB *acb = opaque;
+    acb->common.cb(acb->common.opaque, 0);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_unref(acb);
+}
+
+static inline BlockDriverAIOCB *null_aio_common(BlockDriverState *bs,
+                                                BlockDriverCompletionFunc *cb,
+                                                void *opaque)
+{
+    NullAIOCB *acb;
+
+    acb = qemu_aio_get(&null_aiocb_info, bs, cb, opaque);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
+    qemu_bh_schedule(acb->bh);
+    return &acb->common;
+}
+
+static BlockDriverAIOCB *null_aio_readv(BlockDriverState *bs,
+                                        int64_t sector_num, QEMUIOVector *qiov,
+                                        int nb_sectors,
+                                        BlockDriverCompletionFunc *cb,
+                                        void *opaque)
+{
+    return null_aio_common(bs, cb, opaque);
+}
+
+static BlockDriverAIOCB *null_aio_writev(BlockDriverState *bs,
+                                         int64_t sector_num, QEMUIOVector *qiov,
+                                         int nb_sectors,
+                                         BlockDriverCompletionFunc *cb,
+                                         void *opaque)
+{
+    return null_aio_common(bs, cb, opaque);
+}
+
+static BlockDriverAIOCB *null_aio_flush(BlockDriverState *bs,
+                                        BlockDriverCompletionFunc *cb,
+                                        void *opaque)
+{
+    return null_aio_common(bs, cb, opaque);
+}
+
+static BlockDriver bdrv_null_co = {
+    .format_name            = "null-co",
+    .protocol_name          = "null-co",
+    .instance_size          = sizeof(BDRVNullState),
+
+    .bdrv_file_open         = null_file_open,
+    .bdrv_close             = null_close,
+    .bdrv_getlength         = null_getlength,
+
+    .bdrv_co_readv          = null_co_readv,
+    .bdrv_co_writev         = null_co_writev,
+    .bdrv_co_flush_to_disk  = null_co_flush,
+};
+
+static BlockDriver bdrv_null_aio = {
+    .format_name            = "null-aio",
+    .protocol_name          = "null-aio",
+    .instance_size          = sizeof(BDRVNullState),
+
+    .bdrv_file_open         = null_file_open,
+    .bdrv_close             = null_close,
+    .bdrv_getlength         = null_getlength,
+
+    .bdrv_aio_readv         = null_aio_readv,
+    .bdrv_aio_writev        = null_aio_writev,
+    .bdrv_aio_flush         = null_aio_flush,
+};
+
+static void bdrv_null_init(void)
+{
+    bdrv_register(&bdrv_null_co);
+    bdrv_register(&bdrv_null_aio);
+}
+
+block_init(bdrv_null_init);
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 735f687b06..f7dd8c0985 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -486,6 +486,13 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
         goto out;
     }
 
+    if (offset_into_cluster(s, l2_offset)) {
+        qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64
+                                " unaligned (L1 index: %#" PRIx64 ")",
+                                l2_offset, l1_index);
+        return -EIO;
+    }
+
     /* load the l2 table in memory */
 
     ret = l2_load(bs, l2_offset, &l2_table);
@@ -508,8 +515,11 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
         break;
     case QCOW2_CLUSTER_ZERO:
         if (s->qcow_version < 3) {
-            qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-            return -EIO;
+            qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
+                                    " in pre-v3 image (L2 offset: %#" PRIx64
+                                    ", L2 index: %#x)", l2_offset, l2_index);
+            ret = -EIO;
+            goto fail;
         }
         c = count_contiguous_clusters(nb_clusters, s->cluster_size,
                 &l2_table[l2_index], QCOW_OFLAG_ZERO);
@@ -525,6 +535,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
         c = count_contiguous_clusters(nb_clusters, s->cluster_size,
                 &l2_table[l2_index], QCOW_OFLAG_ZERO);
         *cluster_offset &= L2E_OFFSET_MASK;
+        if (offset_into_cluster(s, *cluster_offset)) {
+            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#"
+                                    PRIx64 " unaligned (L2 offset: %#" PRIx64
+                                    ", L2 index: %#x)", *cluster_offset,
+                                    l2_offset, l2_index);
+            ret = -EIO;
+            goto fail;
+        }
         break;
     default:
         abort();
@@ -541,6 +559,10 @@ out:
     *num = nb_available - index_in_cluster;
 
     return ret;
+
+fail:
+    qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+    return ret;
 }
 
 /*
@@ -576,6 +598,12 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
 
     assert(l1_index < s->l1_size);
     l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+    if (offset_into_cluster(s, l2_offset)) {
+        qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64
+                                " unaligned (L1 index: %#" PRIx64 ")",
+                                l2_offset, l1_index);
+        return -EIO;
+    }
 
     /* seek the l2 table of the given l2 offset */
 
@@ -948,6 +976,15 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
         bool offset_matches =
             (cluster_offset & L2E_OFFSET_MASK) == *host_offset;
 
+        if (offset_into_cluster(s, cluster_offset & L2E_OFFSET_MASK)) {
+            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
+                                    "%#llx unaligned (guest offset: %#" PRIx64
+                                    ")", cluster_offset & L2E_OFFSET_MASK,
+                                    guest_offset);
+            ret = -EIO;
+            goto out;
+        }
+
         if (*host_offset != 0 && !offset_matches) {
             *bytes = 0;
             ret = 0;
@@ -979,7 +1016,7 @@ out:
 
     /* Only return a host offset if we actually made progress. Otherwise we
      * would make requirements for handle_alloc() that it can't fulfill */
-    if (ret) {
+    if (ret > 0) {
         *host_offset = (cluster_offset & L2E_OFFSET_MASK)
                      + offset_into_cluster(s, guest_offset);
     }
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 43665b86e7..2bcaaf9b98 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -26,8 +26,6 @@
 #include "block/block_int.h"
 #include "block/qcow2.h"
 #include "qemu/range.h"
-#include "qapi/qmp/types.h"
-#include "qapi-event.h"
 
 static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
@@ -110,6 +108,13 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
     if (!refcount_block_offset)
         return 0;
 
+    if (offset_into_cluster(s, refcount_block_offset)) {
+        qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64
+                                " unaligned (reftable index: %#" PRIx64 ")",
+                                refcount_block_offset, refcount_table_index);
+        return -EIO;
+    }
+
     ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
         (void**) &refcount_block);
     if (ret < 0) {
@@ -183,6 +188,14 @@ static int alloc_refcount_block(BlockDriverState *bs,
 
         /* If it's already there, we're done */
         if (refcount_block_offset) {
+            if (offset_into_cluster(s, refcount_block_offset)) {
+                qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
+                                        PRIx64 " unaligned (reftable index: "
+                                        "%#x)", refcount_block_offset,
+                                        refcount_table_index);
+                return -EIO;
+            }
+
              return load_refcount_block(bs, refcount_block_offset,
                  (void**) refcount_block);
         }
@@ -838,8 +851,14 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
     case QCOW2_CLUSTER_NORMAL:
     case QCOW2_CLUSTER_ZERO:
         if (l2_entry & L2E_OFFSET_MASK) {
-            qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
-                                nb_clusters << s->cluster_bits, type);
+            if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
+                qcow2_signal_corruption(bs, false, -1, -1,
+                                        "Cannot free unaligned cluster %#llx",
+                                        l2_entry & L2E_OFFSET_MASK);
+            } else {
+                qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
+                                    nb_clusters << s->cluster_bits, type);
+            }
         }
         break;
     case QCOW2_CLUSTER_UNALLOCATED:
@@ -903,6 +922,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
             old_l2_offset = l2_offset;
             l2_offset &= L1E_OFFSET_MASK;
 
+            if (offset_into_cluster(s, l2_offset)) {
+                qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
+                                        PRIx64 " unaligned (L1 index: %#x)",
+                                        l2_offset, i);
+                ret = -EIO;
+                goto fail;
+            }
+
             ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
                 (void**) &l2_table);
             if (ret < 0) {
@@ -935,6 +962,17 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 
                     case QCOW2_CLUSTER_NORMAL:
                     case QCOW2_CLUSTER_ZERO:
+                        if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) {
+                            qcow2_signal_corruption(bs, true, -1, -1, "Data "
+                                                    "cluster offset %#llx "
+                                                    "unaligned (L2 offset: %#"
+                                                    PRIx64 ", L2 index: %#x)",
+                                                    offset & L2E_OFFSET_MASK,
+                                                    l2_offset, j);
+                            ret = -EIO;
+                            goto fail;
+                        }
+
                         cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
                         if (!cluster_index) {
                             /* unallocated */
@@ -1838,26 +1876,11 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
         return ret;
     } else if (ret > 0) {
         int metadata_ol_bitnr = ffs(ret) - 1;
-        char *message;
-
         assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
 
-        fprintf(stderr, "qcow2: Preventing invalid write on metadata (overlaps "
-                "with %s); image marked as corrupt.\n",
-                metadata_ol_names[metadata_ol_bitnr]);
-        message = g_strdup_printf("Prevented %s overwrite",
-                metadata_ol_names[metadata_ol_bitnr]);
-        qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
-                                              message,
-                                              true,
-                                              offset,
-                                              true,
-                                              size,
-                                              &error_abort);
-        g_free(message);
-
-        qcow2_mark_corrupt(bs);
-        bs->drv = NULL; /* make BDS unusable */
+        qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid "
+                                "write on metadata (overlaps with %s)",
+                                metadata_ol_names[metadata_ol_bitnr]);
         return -EIO;
     }
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 0daf25cb58..778fc1ec13 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -31,6 +31,8 @@
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qbool.h"
 #include "qapi/util.h"
+#include "qapi/qmp/types.h"
+#include "qapi-event.h"
 #include "trace.h"
 #include "qemu/option_int.h"
 
@@ -404,6 +406,12 @@ static QemuOptsList qcow2_runtime_opts = {
                     "templates (none, constant, cached, all)",
         },
         {
+            .name = QCOW2_OPT_OVERLAP_TEMPLATE,
+            .type = QEMU_OPT_STRING,
+            .help = "Selects which overlap checks to perform from a range of "
+                    "templates (none, constant, cached, all)",
+        },
+        {
             .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
             .type = QEMU_OPT_BOOL,
             .help = "Check for unintended writes into the main qcow2 header",
@@ -536,11 +544,11 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     unsigned int len, i;
     int ret = 0;
     QCowHeader header;
-    QemuOpts *opts;
+    QemuOpts *opts = NULL;
     Error *local_err = NULL;
     uint64_t ext_end;
     uint64_t l1_vm_state_index;
-    const char *opt_overlap_check;
+    const char *opt_overlap_check, *opt_overlap_check_template;
     int overlap_check_template = 0;
     uint64_t l2_cache_size, refcount_cache_size;
 
@@ -920,7 +928,21 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     s->discard_passthrough[QCOW2_DISCARD_OTHER] =
         qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
 
-    opt_overlap_check = qemu_opt_get(opts, "overlap-check") ?: "cached";
+    opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
+    opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
+    if (opt_overlap_check_template && opt_overlap_check &&
+        strcmp(opt_overlap_check_template, opt_overlap_check))
+    {
+        error_setg(errp, "Conflicting values for qcow2 options '"
+                   QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
+                   "' ('%s')", opt_overlap_check, opt_overlap_check_template);
+        ret = -EINVAL;
+        goto fail;
+    }
+    if (!opt_overlap_check) {
+        opt_overlap_check = opt_overlap_check_template ?: "cached";
+    }
+
     if (!strcmp(opt_overlap_check, "none")) {
         overlap_check_template = 0;
     } else if (!strcmp(opt_overlap_check, "constant")) {
@@ -933,7 +955,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
         error_setg(errp, "Unsupported value '%s' for qcow2 option "
                    "'overlap-check'. Allowed are either of the following: "
                    "none, constant, cached, all", opt_overlap_check);
-        qemu_opts_del(opts);
         ret = -EINVAL;
         goto fail;
     }
@@ -948,6 +969,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     qemu_opts_del(opts);
+    opts = NULL;
 
     if (s->use_lazy_refcounts && s->qcow_version < 3) {
         error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
@@ -965,6 +987,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     return ret;
 
  fail:
+    qemu_opts_del(opts);
     g_free(s->unknown_header_fields);
     cleanup_unknown_header_ext(bs);
     qcow2_free_snapshots(bs);
@@ -2529,6 +2552,52 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts)
     return 0;
 }
 
+/*
+ * If offset or size are negative, respectively, they will not be included in
+ * the BLOCK_IMAGE_CORRUPTED event emitted.
+ * fatal will be ignored for read-only BDS; corruptions found there will always
+ * be considered non-fatal.
+ */
+void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
+                             int64_t size, const char *message_format, ...)
+{
+    BDRVQcowState *s = bs->opaque;
+    char *message;
+    va_list ap;
+
+    fatal = fatal && !bs->read_only;
+
+    if (s->signaled_corruption &&
+        (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
+    {
+        return;
+    }
+
+    va_start(ap, message_format);
+    message = g_strdup_vprintf(message_format, ap);
+    va_end(ap);
+
+    if (fatal) {
+        fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
+                "corruption events will be suppressed\n", message);
+    } else {
+        fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
+                "corruption events will be suppressed\n", message);
+    }
+
+    qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), message,
+                                          offset >= 0, offset, size >= 0, size,
+                                          fatal, &error_abort);
+    g_free(message);
+
+    if (fatal) {
+        qcow2_mark_corrupt(bs);
+        bs->drv = NULL; /* make BDS unusable */
+    }
+
+    s->signaled_corruption = true;
+}
+
 static QemuOptsList qcow2_create_opts = {
     .name = "qcow2-create-opts",
     .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
diff --git a/block/qcow2.h b/block/qcow2.h
index 6aeb7ea90f..7d61e615ff 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -83,6 +83,7 @@
 #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
 #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
 #define QCOW2_OPT_OVERLAP "overlap-check"
+#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
 #define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
 #define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1"
 #define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2"
@@ -261,6 +262,7 @@ typedef struct BDRVQcowState {
     bool discard_passthrough[QCOW2_DISCARD_MAX];
 
     int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
+    bool signaled_corruption;
 
     uint64_t incompatible_features;
     uint64_t compatible_features;
@@ -477,6 +479,10 @@ int qcow2_mark_corrupt(BlockDriverState *bs);
 int qcow2_mark_consistent(BlockDriverState *bs);
 int qcow2_update_header(BlockDriverState *bs);
 
+void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
+                             int64_t size, const char *message_format, ...)
+                             GCC_FMT_ATTR(5, 6);
+
 /* qcow2-refcount.c functions */
 int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);
diff --git a/block/qed.c b/block/qed.c
index f8d9e12263..e2316d7ff0 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -18,22 +18,8 @@
 #include "qapi/qmp/qerror.h"
 #include "migration/migration.h"
 
-static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    QEDAIOCB *acb = (QEDAIOCB *)blockacb;
-    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
-    bool finished = false;
-
-    /* Wait for the request to finish */
-    acb->finished = &finished;
-    while (!finished) {
-        aio_poll(aio_context, true);
-    }
-}
-
 static const AIOCBInfo qed_aiocb_info = {
     .aiocb_size         = sizeof(QEDAIOCB),
-    .cancel             = qed_aio_cancel,
 };
 
 static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
@@ -919,18 +905,12 @@ static void qed_aio_complete_bh(void *opaque)
     BlockDriverCompletionFunc *cb = acb->common.cb;
     void *user_opaque = acb->common.opaque;
     int ret = acb->bh_ret;
-    bool *finished = acb->finished;
 
     qemu_bh_delete(acb->bh);
-    qemu_aio_release(acb);
+    qemu_aio_unref(acb);
 
     /* Invoke callback */
     cb(user_opaque, ret);
-
-    /* Signal cancel completion */
-    if (finished) {
-        *finished = true;
-    }
 }
 
 static void qed_aio_complete(QEDAIOCB *acb, int ret)
@@ -1397,7 +1377,6 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
                         opaque, flags);
 
     acb->flags = flags;
-    acb->finished = NULL;
     acb->qiov = qiov;
     acb->qiov_offset = 0;
     acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
diff --git a/block/quorum.c b/block/quorum.c
index 093382e8f5..7687466733 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -138,16 +138,15 @@ static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
 
     /* cancel all callbacks */
     for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_cancel(acb->qcrs[i].aiocb);
+        if (acb->qcrs[i].aiocb) {
+            bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
+        }
     }
-
-    g_free(acb->qcrs);
-    qemu_aio_release(acb);
 }
 
 static AIOCBInfo quorum_aiocb_info = {
     .aiocb_size         = sizeof(QuorumAIOCB),
-    .cancel             = quorum_aio_cancel,
+    .cancel_async       = quorum_aio_cancel,
 };
 
 static void quorum_aio_finalize(QuorumAIOCB *acb)
@@ -169,7 +168,7 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
     }
 
     g_free(acb->qcrs);
-    qemu_aio_release(acb);
+    qemu_aio_unref(acb);
 }
 
 static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
diff --git a/block/rbd.c b/block/rbd.c
index b7f7d5ff30..96947e328a 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -77,7 +77,6 @@ typedef struct RBDAIOCB {
     int64_t sector_num;
     int error;
     struct BDRVRBDState *s;
-    int cancelled;
     int status;
 } RBDAIOCB;
 
@@ -408,9 +407,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
     acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
     acb->status = 0;
 
-    if (!acb->cancelled) {
-        qemu_aio_release(acb);
-    }
+    qemu_aio_unref(acb);
 }
 
 /* TODO Convert to fine grained options */
@@ -539,25 +536,8 @@ static void qemu_rbd_close(BlockDriverState *bs)
     rados_shutdown(s->cluster);
 }
 
-/*
- * Cancel aio. Since we don't reference acb in a non qemu threads,
- * it is safe to access it here.
- */
-static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    RBDAIOCB *acb = (RBDAIOCB *) blockacb;
-    acb->cancelled = 1;
-
-    while (acb->status == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(acb->common.bs), true);
-    }
-
-    qemu_aio_release(acb);
-}
-
 static const AIOCBInfo rbd_aiocb_info = {
     .aiocb_size = sizeof(RBDAIOCB),
-    .cancel = qemu_rbd_aio_cancel,
 };
 
 static void rbd_finish_bh(void *opaque)
@@ -640,7 +620,6 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
     acb->ret = 0;
     acb->error = 0;
     acb->s = s;
-    acb->cancelled = 0;
     acb->bh = NULL;
     acb->status = -EINPROGRESS;
 
@@ -692,7 +671,7 @@ failed_completion:
 failed:
     g_free(rcb);
     qemu_vfree(acb->bounce);
-    qemu_aio_release(acb);
+    qemu_aio_unref(acb);
     return NULL;
 }
 
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 7da36e1f9a..2d78ef91f7 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -315,7 +315,6 @@ struct SheepdogAIOCB {
     void (*aio_done_func)(SheepdogAIOCB *);
 
     bool cancelable;
-    bool *finished;
     int nr_pending;
 };
 
@@ -446,10 +445,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
 static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
 {
     qemu_coroutine_enter(acb->coroutine, NULL);
-    if (acb->finished) {
-        *acb->finished = true;
-    }
-    qemu_aio_release(acb);
+    qemu_aio_unref(acb);
 }
 
 /*
@@ -482,36 +478,33 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
     SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
     BDRVSheepdogState *s = acb->common.bs->opaque;
     AIOReq *aioreq, *next;
-    bool finished = false;
-
-    acb->finished = &finished;
-    while (!finished) {
-        if (sd_acb_cancelable(acb)) {
-            /* Remove outstanding requests from pending and failed queues.  */
-            QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings,
-                               next) {
-                if (aioreq->aiocb == acb) {
-                    free_aio_req(s, aioreq);
-                }
+
+    if (sd_acb_cancelable(acb)) {
+        /* Remove outstanding requests from pending and failed queues.  */
+        QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings,
+                           next) {
+            if (aioreq->aiocb == acb) {
+                free_aio_req(s, aioreq);
             }
-            QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
-                               next) {
-                if (aioreq->aiocb == acb) {
-                    free_aio_req(s, aioreq);
-                }
+        }
+        QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
+                           next) {
+            if (aioreq->aiocb == acb) {
+                free_aio_req(s, aioreq);
             }
+        }
 
-            assert(acb->nr_pending == 0);
-            sd_finish_aiocb(acb);
-            return;
+        assert(acb->nr_pending == 0);
+        if (acb->common.cb) {
+            acb->common.cb(acb->common.opaque, -ECANCELED);
         }
-        aio_poll(s->aio_context, true);
+        sd_finish_aiocb(acb);
     }
 }
 
 static const AIOCBInfo sd_aiocb_info = {
-    .aiocb_size = sizeof(SheepdogAIOCB),
-    .cancel = sd_aio_cancel,
+    .aiocb_size     = sizeof(SheepdogAIOCB),
+    .cancel_async   = sd_aio_cancel,
 };
 
 static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
@@ -528,7 +521,6 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
 
     acb->aio_done_func = NULL;
     acb->cancelable = true;
-    acb->finished = NULL;
     acb->coroutine = qemu_coroutine_self();
     acb->ret = 0;
     acb->nr_pending = 0;
@@ -2138,7 +2130,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
 
     ret = sd_co_rw_vector(acb);
     if (ret <= 0) {
-        qemu_aio_release(acb);
+        qemu_aio_unref(acb);
         return ret;
     }
 
@@ -2159,7 +2151,7 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
 
     ret = sd_co_rw_vector(acb);
     if (ret <= 0) {
-        qemu_aio_release(acb);
+        qemu_aio_unref(acb);
         return ret;
     }
 
@@ -2518,7 +2510,7 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
 
     ret = sd_co_rw_vector(acb);
     if (ret <= 0) {
-        qemu_aio_release(acb);
+        qemu_aio_unref(acb);
         return ret;
     }
 
diff --git a/block/vhdx.c b/block/vhdx.c
index 796b7bd884..b0464487a5 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -99,7 +99,8 @@ static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d,
 /* Each parent type must have a valid GUID; this is for parent images
  * of type 'VHDX'.  If we were to allow e.g. a QCOW2 parent, we would
  * need to make up our own QCOW2 GUID type */
-static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7,
+static const MSGUID parent_vhdx_guid __attribute__((unused))
+                                     = { .data1 = 0xb04aefb7,
                                          .data2 = 0xd19e,
                                          .data3 = 0x4a81,
                                          .data4 = { 0xb7, 0x89, 0x25, 0xb8,
@@ -1407,6 +1408,12 @@ exit:
     return ret;
 }
 
+#define VHDX_METADATA_ENTRY_BUFFER_SIZE \
+                                    (sizeof(VHDXFileParameters)               +\
+                                     sizeof(VHDXVirtualDiskSize)              +\
+                                     sizeof(VHDXPage83Data)                   +\
+                                     sizeof(VHDXVirtualDiskLogicalSectorSize) +\
+                                     sizeof(VHDXVirtualDiskPhysicalSectorSize))
 
 /*
  * Create the Metadata entries.
@@ -1445,11 +1452,7 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
     VHDXVirtualDiskLogicalSectorSize  *mt_log_sector_size;
     VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size;
 
-    entry_buffer = g_malloc0(sizeof(VHDXFileParameters)               +
-                             sizeof(VHDXVirtualDiskSize)              +
-                             sizeof(VHDXPage83Data)                   +
-                             sizeof(VHDXVirtualDiskLogicalSectorSize) +
-                             sizeof(VHDXVirtualDiskPhysicalSectorSize));
+    entry_buffer = g_malloc0(VHDX_METADATA_ENTRY_BUFFER_SIZE);
 
     mt_file_params = entry_buffer;
     offset += sizeof(VHDXFileParameters);
@@ -1530,7 +1533,7 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
     }
 
     ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
-                      VHDX_HEADER_BLOCK_SIZE);
+                      VHDX_METADATA_ENTRY_BUFFER_SIZE);
     if (ret < 0) {
         goto exit;
     }
@@ -1593,7 +1596,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
                 bdrv_has_zero_init(bs) == 0) {
         /* for a fixed file, the default BAT entry is not zero */
         s->bat = g_try_malloc0(length);
-        if (length && s->bat != NULL) {
+        if (length && s->bat == NULL) {
             ret = -ENOMEM;
             goto exit;
         }
@@ -1725,7 +1728,6 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
         goto exit;
     }
 
-
 exit:
     g_free(s);
     g_free(buffer);
@@ -1876,7 +1878,6 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
 
-
 delete_and_exit:
     bdrv_unref(bs);
 exit:
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 5030e3274d..9323c1aed1 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -88,7 +88,7 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
 
 
     waiocb->common.cb(waiocb->common.opaque, ret);
-    qemu_aio_release(waiocb);
+    qemu_aio_unref(waiocb);
 }
 
 static void win32_aio_completion_cb(EventNotifier *e)
@@ -106,22 +106,8 @@ static void win32_aio_completion_cb(EventNotifier *e)
     }
 }
 
-static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb;
-
-    /*
-     * CancelIoEx is only supported in Vista and newer.  For now, just
-     * wait for completion.
-     */
-    while (!HasOverlappedIoCompleted(&waiocb->ov)) {
-        aio_poll(bdrv_get_aio_context(blockacb->bs), true);
-    }
-}
-
 static const AIOCBInfo win32_aiocb_info = {
     .aiocb_size         = sizeof(QEMUWin32AIOCB),
-    .cancel             = win32_aio_cancel,
 };
 
 BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
@@ -172,7 +158,7 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
 out_dec_count:
     aio->count--;
 out:
-    qemu_aio_release(waiocb);
+    qemu_aio_unref(waiocb);
     return NULL;
 }
 
diff --git a/configure b/configure
index 6c3d6cde14..862f6d27e3 100755
--- a/configure
+++ b/configure
@@ -327,7 +327,6 @@ glusterfs=""
 glusterfs_discard="no"
 glusterfs_zerofill="no"
 archipelago=""
-virtio_blk_data_plane=""
 gtk=""
 gtkabi=""
 vte=""
@@ -1093,9 +1092,8 @@ for opt do
   ;;
   --enable-archipelago) archipelago="yes"
   ;;
-  --disable-virtio-blk-data-plane) virtio_blk_data_plane="no"
-  ;;
-  --enable-virtio-blk-data-plane) virtio_blk_data_plane="yes"
+  --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane)
+      echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2
   ;;
   --disable-gtk) gtk="no"
   ;;
@@ -1350,7 +1348,7 @@ Advanced options (experts only):
   --enable-linux-aio       enable Linux AIO support
   --disable-cap-ng         disable libcap-ng support
   --enable-cap-ng          enable libcap-ng support
-  --disable-attr           disables attr and xattr support
+  --disable-attr           disable attr and xattr support
   --enable-attr            enable attr and xattr support
   --disable-blobs          disable installing provided firmware blobs
   --enable-docs            enable documentation build
@@ -1381,7 +1379,7 @@ Advanced options (experts only):
   --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent
   --with-win-sdk=SDK-path  path to Windows Platform SDK (to build VSS .tlb)
   --disable-seccomp        disable seccomp support
-  --enable-seccomp         enables seccomp support
+  --enable-seccomp         enable seccomp support
   --with-coroutine=BACKEND coroutine backend. Supported options:
                            gthread, ucontext, sigaltstack, windows
   --disable-coroutine-pool disable coroutine freelist (worse performance)
@@ -1396,7 +1394,7 @@ Advanced options (experts only):
   --enable-tpm             enable TPM support
   --disable-libssh2        disable ssh block device support
   --enable-libssh2         enable ssh block device support
-  --disable-vhdx           disables support for the Microsoft VHDX image format
+  --disable-vhdx           disable support for the Microsoft VHDX image format
   --enable-vhdx            enable support for the Microsoft VHDX image format
   --disable-quorum         disable quorum block filter support
   --enable-quorum          enable quorum block filter support
@@ -2944,16 +2942,6 @@ else
 fi
 
 ##########################################
-# adjust virtio-blk-data-plane based on linux-aio
-
-if test "$virtio_blk_data_plane" = "yes" -a \
-	"$linux_aio" != "yes" ; then
-  error_exit "virtio-blk-data-plane requires Linux AIO, please try --enable-linux-aio"
-elif test -z "$virtio_blk_data_plane" ; then
-  virtio_blk_data_plane=$linux_aio
-fi
-
-##########################################
 # attr probe
 
 if test "$attr" != "no" ; then
@@ -4327,7 +4315,6 @@ echo "coroutine backend $coroutine"
 echo "coroutine pool    $coroutine_pool"
 echo "GlusterFS support $glusterfs"
 echo "Archipelago support $archipelago"
-echo "virtio-blk-data-plane $virtio_blk_data_plane"
 echo "gcov              $gcov_tool"
 echo "gcov enabled      $gcov"
 echo "TPM support       $tpm"
@@ -4789,10 +4776,6 @@ if test "$quorum" = "yes" ; then
   echo "CONFIG_QUORUM=y" >> $config_host_mak
 fi
 
-if test "$virtio_blk_data_plane" = "yes" ; then
-  echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak
-fi
-
 if test "$vhdx" = "yes" ; then
   echo "CONFIG_VHDX=y" >> $config_host_mak
 fi
diff --git a/dma-helpers.c b/dma-helpers.c
index f6811fa86b..7f86e18e72 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -73,7 +73,6 @@ typedef struct {
     QEMUSGList *sg;
     uint64_t sector_num;
     DMADirection dir;
-    bool in_cancel;
     int sg_cur_index;
     dma_addr_t sg_cur_byte;
     QEMUIOVector iov;
@@ -125,12 +124,7 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
         qemu_bh_delete(dbs->bh);
         dbs->bh = NULL;
     }
-    if (!dbs->in_cancel) {
-        /* Requests may complete while dma_aio_cancel is in progress.  In
-         * this case, the AIOCB should not be released because it is still
-         * referenced by dma_aio_cancel.  */
-        qemu_aio_release(dbs);
-    }
+    qemu_aio_unref(dbs);
 }
 
 static void dma_bdrv_cb(void *opaque, int ret)
@@ -186,19 +180,14 @@ static void dma_aio_cancel(BlockDriverAIOCB *acb)
     trace_dma_aio_cancel(dbs);
 
     if (dbs->acb) {
-        BlockDriverAIOCB *acb = dbs->acb;
-        dbs->acb = NULL;
-        dbs->in_cancel = true;
-        bdrv_aio_cancel(acb);
-        dbs->in_cancel = false;
+        bdrv_aio_cancel_async(dbs->acb);
     }
-    dbs->common.cb = NULL;
-    dma_complete(dbs, 0);
 }
 
+
 static const AIOCBInfo dma_aiocb_info = {
     .aiocb_size         = sizeof(DMAAIOCB),
-    .cancel             = dma_aio_cancel,
+    .cancel_async       = dma_aio_cancel,
 };
 
 BlockDriverAIOCB *dma_bdrv_io(
@@ -217,7 +206,6 @@ BlockDriverAIOCB *dma_bdrv_io(
     dbs->sg_cur_index = 0;
     dbs->sg_cur_byte = 0;
     dbs->dir = dir;
-    dbs->in_cancel = false;
     dbs->io_func = io_func;
     dbs->bh = NULL;
     qemu_iovec_init(&dbs->iov, sg->nsg);
diff --git a/docs/image-fuzzer.txt b/docs/image-fuzzer.txt
index 0d0005d34a..3e23ebec33 100644
--- a/docs/image-fuzzer.txt
+++ b/docs/image-fuzzer.txt
@@ -125,7 +125,8 @@ If a fuzzer configuration is specified, then it has the next interpretation:
     will be always fuzzed for every test. This case is useful for regression
     testing.
 
-For now only header fields, header extensions and L1/L2 tables are generated.
+The generator can create header fields, header extensions, L1/L2 tables and
+refcount table and blocks.
 
 Module interfaces
 -----------------
diff --git a/docs/rdma.txt b/docs/rdma.txt
index 1f5d9e9fe4..2bdd0a5be0 100644
--- a/docs/rdma.txt
+++ b/docs/rdma.txt
@@ -18,7 +18,7 @@ Contents:
 * RDMA Migration Protocol Description
 * Versioning and Capabilities
 * QEMUFileRDMA Interface
-* Migration of pc.ram
+* Migration of VM's ram
 * Error handling
 * TODO
 
@@ -149,7 +149,7 @@ The only difference between a SEND message and an RDMA
 message is that SEND messages cause notifications
 to be posted to the completion queue (CQ) on the
 infiniband receiver side, whereas RDMA messages (used
-for pc.ram) do not (to behave like an actual DMA).
+for VM's ram) do not (to behave like an actual DMA).
 
 Messages in infiniband require two things:
 
@@ -355,7 +355,7 @@ If the buffer is empty, then we follow the same steps
 listed above and issue another "QEMU File" protocol command,
 asking for a new SEND message to re-fill the buffer.
 
-Migration of pc.ram:
+Migration of VM's ram:
 ====================
 
 At the beginning of the migration, (migration-rdma.c),
diff --git a/hmp.c b/hmp.c
index 40a90dae70..31fb6a15ca 100644
--- a/hmp.c
+++ b/hmp.c
@@ -679,6 +679,8 @@ void hmp_info_block_jobs(Monitor *mon, const QDict *qdict)
         }
         list = list->next;
     }
+
+    qapi_free_BlockJobInfoList(list);
 }
 
 void hmp_info_tpm(Monitor *mon, const QDict *qdict)
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
index bf46f03b7e..d4c3ab758d 100644
--- a/hw/block/Makefile.objs
+++ b/hw/block/Makefile.objs
@@ -12,4 +12,4 @@ common-obj-$(CONFIG_NVME_PCI) += nvme.o
 obj-$(CONFIG_SH4) += tc58128.o
 
 obj-$(CONFIG_VIRTIO) += virtio-blk.o
-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/
+obj-$(CONFIG_VIRTIO) += dataplane/
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 38ad38f49f..45e0c8f6e9 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -18,10 +18,8 @@
 #include "hw/block/block.h"
 #include "sysemu/blockdev.h"
 #include "hw/virtio/virtio-blk.h"
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
-# include "dataplane/virtio-blk.h"
-# include "migration/migration.h"
-#endif
+#include "dataplane/virtio-blk.h"
+#include "migration/migration.h"
 #include "block/scsi.h"
 #ifdef __linux__
 # include <scsi/sg.h>
@@ -435,7 +433,6 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
         .num_writes = 0,
     };
 
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
      * dataplane here instead of waiting for .set_status().
      */
@@ -443,7 +440,6 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
         virtio_blk_data_plane_start(s->dataplane);
         return;
     }
-#endif
 
     while ((req = virtio_blk_get_request(s))) {
         virtio_blk_handle_request(req, &mrb);
@@ -500,11 +496,9 @@ static void virtio_blk_reset(VirtIODevice *vdev)
 {
     VirtIOBlock *s = VIRTIO_BLK(vdev);
 
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     if (s->dataplane) {
         virtio_blk_data_plane_stop(s->dataplane);
     }
-#endif
 
     /*
      * This should cancel pending requests, but can't do nicely until there
@@ -594,12 +588,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
     VirtIOBlock *s = VIRTIO_BLK(vdev);
     uint32_t features;
 
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER |
                                     VIRTIO_CONFIG_S_DRIVER_OK))) {
         virtio_blk_data_plane_stop(s->dataplane);
     }
-#endif
 
     if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
         return;
@@ -694,7 +686,6 @@ static const BlockDevOps virtio_block_ops = {
     .resize_cb = virtio_blk_resize,
 };
 
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
 /* Disable dataplane thread during live migration since it does not
  * update the dirty memory bitmap yet.
  */
@@ -725,7 +716,6 @@ static void virtio_blk_migration_state_changed(Notifier *notifier, void *data)
         }
     }
 }
-#endif /* CONFIG_VIRTIO_BLK_DATA_PLANE */
 
 static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
 {
@@ -762,7 +752,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
 
     s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output);
     s->complete_request = virtio_blk_complete_request;
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     virtio_blk_data_plane_create(vdev, blk, &s->dataplane, &err);
     if (err != NULL) {
         error_propagate(errp, err);
@@ -771,7 +760,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     }
     s->migration_state_notifier.notify = virtio_blk_migration_state_changed;
     add_migration_state_change_notifier(&s->migration_state_notifier);
-#endif
 
     s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
     register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
@@ -789,11 +777,9 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOBlock *s = VIRTIO_BLK(dev);
 
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     remove_migration_state_change_notifier(&s->migration_state_notifier);
     virtio_blk_data_plane_destroy(s->dataplane);
     s->dataplane = NULL;
-#endif
     qemu_del_vm_change_state_handler(s->change);
     unregister_savevm(dev, "virtio-blk", s);
     blockdev_mark_auto_del(s->bs);
@@ -818,9 +804,7 @@ static Property virtio_blk_properties[] = {
 #ifdef __linux__
     DEFINE_PROP_BIT("scsi", VirtIOBlock, blk.scsi, 0, true),
 #endif
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, blk.data_plane, 0, false),
-#endif
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 2c2e9dcbf4..82a7daa188 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -75,7 +75,7 @@
 /* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables
  * (128K) and other BIOS datastructures (less than 4K reported to be used at
  * the moment, 32K should be enough for a while).  */
-unsigned acpi_data_size = 0x20000 + 0x8000;
+static unsigned acpi_data_size = 0x20000 + 0x8000;
 void pc_set_legacy_acpi_data_size(void)
 {
     acpi_data_size = 0x10000;
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index ba69de30e0..8978643fff 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -78,8 +78,7 @@ static uint32_t  ahci_port_read(AHCIState *s, int port, int offset)
         val = pr->cmd;
         break;
     case PORT_TFDATA:
-        val = ((uint16_t)s->dev[port].port.ifs[0].error << 8) |
-              s->dev[port].port.ifs[0].status;
+        val = pr->tfdata;
         break;
     case PORT_SIG:
         val = pr->sig;
@@ -251,14 +250,13 @@ static void  ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
             check_cmd(s, port);
             break;
         case PORT_TFDATA:
-            s->dev[port].port.ifs[0].error = (val >> 8) & 0xff;
-            s->dev[port].port.ifs[0].status = val & 0xff;
+            /* Read Only. */
             break;
         case PORT_SIG:
-            pr->sig = val;
+            /* Read Only */
             break;
         case PORT_SCR_STAT:
-            pr->scr_stat = val;
+            /* Read Only */
             break;
         case PORT_SCR_CTL:
             if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) &&
@@ -497,6 +495,8 @@ static void ahci_reset_port(AHCIState *s, int port)
     pr->scr_stat = 0;
     pr->scr_err = 0;
     pr->scr_act = 0;
+    pr->tfdata = 0x7F;
+    pr->sig = 0xFFFFFFFF;
     d->busy_slot = -1;
     d->init_d2h_sent = false;
 
@@ -528,16 +528,16 @@ static void ahci_reset_port(AHCIState *s, int port)
 
     s->dev[port].port_state = STATE_RUN;
     if (!ide_state->bs) {
-        s->dev[port].port_regs.sig = 0;
+        pr->sig = 0;
         ide_state->status = SEEK_STAT | WRERR_STAT;
     } else if (ide_state->drive_kind == IDE_CD) {
-        s->dev[port].port_regs.sig = SATA_SIGNATURE_CDROM;
+        pr->sig = SATA_SIGNATURE_CDROM;
         ide_state->lcyl = 0x14;
         ide_state->hcyl = 0xeb;
         DPRINTF(port, "set lcyl = %d\n", ide_state->lcyl);
         ide_state->status = SEEK_STAT | WRERR_STAT | READY_STAT;
     } else {
-        s->dev[port].port_regs.sig = SATA_SIGNATURE_DISK;
+        pr->sig = SATA_SIGNATURE_DISK;
         ide_state->status = SEEK_STAT | WRERR_STAT;
     }
 
@@ -563,7 +563,8 @@ static void debug_print_fis(uint8_t *fis, int cmd_len)
 
 static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished)
 {
-    AHCIPortRegs *pr = &s->dev[port].port_regs;
+    AHCIDevice *ad = &s->dev[port];
+    AHCIPortRegs *pr = &ad->port_regs;
     IDEState *ide_state;
     uint8_t *sdb_fis;
 
@@ -572,8 +573,8 @@ static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished)
         return;
     }
 
-    sdb_fis = &s->dev[port].res_fis[RES_FIS_SDBFIS];
-    ide_state = &s->dev[port].port.ifs[0];
+    sdb_fis = &ad->res_fis[RES_FIS_SDBFIS];
+    ide_state = &ad->port.ifs[0];
 
     /* clear memory */
     *(uint32_t*)sdb_fis = 0;
@@ -582,9 +583,14 @@ static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished)
     sdb_fis[0] = ide_state->error;
     sdb_fis[2] = ide_state->status & 0x77;
     s->dev[port].finished |= finished;
-    *(uint32_t*)(sdb_fis + 4) = cpu_to_le32(s->dev[port].finished);
+    *(uint32_t*)(sdb_fis + 4) = cpu_to_le32(ad->finished);
+
+    /* Update shadow registers (except BSY 0x80 and DRQ 0x08) */
+    pr->tfdata = (ad->port.ifs[0].error << 8) |
+        (ad->port.ifs[0].status & 0x77) |
+        (pr->tfdata & 0x88);
 
-    ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_SDB_FIS);
+    ahci_trigger_irq(s, ad, PORT_IRQ_SDB_FIS);
 }
 
 static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len)
@@ -642,6 +648,10 @@ static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len)
     pio_fis[18] = 0;
     pio_fis[19] = 0;
 
+    /* Update shadow registers: */
+    pr->tfdata = (ad->port.ifs[0].error << 8) |
+        ad->port.ifs[0].status;
+
     if (pio_fis[2] & ERR_STAT) {
         ahci_trigger_irq(ad->hba, ad, PORT_IRQ_TF_ERR);
     }
@@ -693,6 +703,10 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
         d2h_fis[i] = 0;
     }
 
+    /* Update shadow registers: */
+    pr->tfdata = (ad->port.ifs[0].error << 8) |
+        ad->port.ifs[0].status;
+
     if (d2h_fis[2] & ERR_STAT) {
         ahci_trigger_irq(ad->hba, ad, PORT_IRQ_TF_ERR);
     }
@@ -791,6 +805,9 @@ static void ncq_cb(void *opaque, int ret)
     NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
     IDEState *ide_state = &ncq_tfs->drive->port.ifs[0];
 
+    if (ret == -ECANCELED) {
+        return;
+    }
     /* Clear bit for this tag in SActive */
     ncq_tfs->drive->port_regs.scr_act &= ~(1 << ncq_tfs->tag);
 
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 6d52cda4cc..10218dfa3a 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -136,6 +136,7 @@ void ide_atapi_cmd_ok(IDEState *s)
     s->error = 0;
     s->status = READY_STAT | SEEK_STAT;
     s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
+    ide_transfer_stop(s);
     ide_set_irq(s->bus);
 }
 
@@ -149,6 +150,7 @@ void ide_atapi_cmd_error(IDEState *s, int sense_key, int asc)
     s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
     s->sense_key = sense_key;
     s->asc = asc;
+    ide_transfer_stop(s);
     ide_set_irq(s->bus);
 }
 
@@ -176,9 +178,7 @@ void ide_atapi_cmd_reply_end(IDEState *s)
 #endif
     if (s->packet_transfer_size <= 0) {
         /* end of transfer */
-        s->status = READY_STAT | SEEK_STAT;
-        s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
-        ide_transfer_stop(s);
+        ide_atapi_cmd_ok(s);
         ide_set_irq(s->bus);
 #ifdef DEBUG_IDE_ATAPI
         printf("status=0x%x\n", s->status);
@@ -188,7 +188,6 @@ void ide_atapi_cmd_reply_end(IDEState *s)
         if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
             ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
             if (ret < 0) {
-                ide_transfer_stop(s);
                 ide_atapi_io_error(s, ret);
                 return;
             }
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 6fba056783..190700ac74 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -372,23 +372,21 @@ static void trim_aio_cancel(BlockDriverAIOCB *acb)
 {
     TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common);
 
-    /* Exit the loop in case bdrv_aio_cancel calls ide_issue_trim_cb again.  */
+    /* Exit the loop so ide_issue_trim_cb will not continue  */
     iocb->j = iocb->qiov->niov - 1;
     iocb->i = (iocb->qiov->iov[iocb->j].iov_len / 8) - 1;
 
-    /* Tell ide_issue_trim_cb not to trigger the completion, too.  */
-    qemu_bh_delete(iocb->bh);
-    iocb->bh = NULL;
+    iocb->ret = -ECANCELED;
 
     if (iocb->aiocb) {
-        bdrv_aio_cancel(iocb->aiocb);
+        bdrv_aio_cancel_async(iocb->aiocb);
+        iocb->aiocb = NULL;
     }
-    qemu_aio_release(iocb);
 }
 
 static const AIOCBInfo trim_aiocb_info = {
     .aiocb_size         = sizeof(TrimAIOCB),
-    .cancel             = trim_aio_cancel,
+    .cancel_async       = trim_aio_cancel,
 };
 
 static void ide_trim_bh_cb(void *opaque)
@@ -399,7 +397,7 @@ static void ide_trim_bh_cb(void *opaque)
 
     qemu_bh_delete(iocb->bh);
     iocb->bh = NULL;
-    qemu_aio_release(iocb);
+    qemu_aio_unref(iocb);
 }
 
 static void ide_issue_trim_cb(void *opaque, int ret)
@@ -568,6 +566,9 @@ static void ide_sector_read_cb(void *opaque, int ret)
     s->pio_aiocb = NULL;
     s->status &= ~BUSY_STAT;
 
+    if (ret == -ECANCELED) {
+        return;
+    }
     block_acct_done(bdrv_get_stats(s->bs), &s->acct);
     if (ret != 0) {
         if (ide_handle_rw_error(s, -ret, IDE_RETRY_PIO |
@@ -678,6 +679,9 @@ void ide_dma_cb(void *opaque, int ret)
     int64_t sector_num;
     bool stay_active = false;
 
+    if (ret == -ECANCELED) {
+        return;
+    }
     if (ret < 0) {
         int op = IDE_RETRY_DMA;
 
@@ -803,6 +807,9 @@ static void ide_sector_write_cb(void *opaque, int ret)
     IDEState *s = opaque;
     int n;
 
+    if (ret == -ECANCELED) {
+        return;
+    }
     block_acct_done(bdrv_get_stats(s->bs), &s->acct);
 
     s->pio_aiocb = NULL;
@@ -882,6 +889,9 @@ static void ide_flush_cb(void *opaque, int ret)
 
     s->pio_aiocb = NULL;
 
+    if (ret == -ECANCELED) {
+        return;
+    }
     if (ret < 0) {
         /* XXX: What sector number to set here? */
         if (ide_handle_rw_error(s, -ret, IDE_RETRY_FLUSH)) {
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index a2f1639310..8eb77a1472 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -71,6 +71,7 @@
 #include <hw/ide/pci.h>
 #include <hw/ide/ahci.h>
 
+#define ICH9_MSI_CAP_OFFSET     0x80
 #define ICH9_SATA_CAP_OFFSET    0xA8
 
 #define ICH9_IDP_BAR            4
@@ -115,7 +116,6 @@ static int pci_ich9_ahci_init(PCIDevice *dev)
     /* XXX Software should program this register */
     dev->config[0x90]   = 1 << 6; /* Address Map Register - AHCI mode */
 
-    msi_init(dev, 0x50, 1, true, false);
     d->ahci.irq = pci_allocate_irq(dev);
 
     pci_register_bar(dev, ICH9_IDP_BAR, PCI_BASE_ADDRESS_SPACE_IO,
@@ -135,6 +135,11 @@ static int pci_ich9_ahci_init(PCIDevice *dev)
                  (ICH9_IDP_BAR + 0x4) | (ICH9_IDP_INDEX_LOG2 << 4));
     d->ahci.idp_offset = ICH9_IDP_INDEX;
 
+    /* Although the AHCI 1.3 specification states that the first capability
+     * should be PMCAP, the Intel ICH9 data sheet specifies that the ICH9
+     * AHCI device puts the MSI capability first, pointing to 0x80. */
+    msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false);
+
     return 0;
 }
 
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 77bb93e4d7..4e3a061622 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -28,7 +28,6 @@
 #include <sys/socket.h>
 #include <linux/kvm.h>
 #include <fcntl.h>
-#include <linux/virtio_ring.h>
 #include <netpacket/packet.h>
 #include <net/ethernet.h>
 #include <net/if.h>
@@ -36,6 +35,7 @@
 
 #include <stdio.h>
 
+#include "hw/virtio/virtio_ring.h"
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/virtio-bus.h"
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2ab4460f04..2becc9ff07 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -541,7 +541,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
 
     /*
-     * According to PAPR, rtas ibm,os-term, does not gaurantee a return
+     * According to PAPR, rtas ibm,os-term does not guarantee a return
      * back to the guest cpu.
      *
      * While an additional ibm,extended-os-term property indicates that
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 49c2aaff1f..b67c039a70 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -243,17 +243,25 @@ static void copy_sense_id_to_guest(SenseId *dest, SenseId *src)
     }
 }
 
-static CCW1 copy_ccw_from_guest(hwaddr addr)
+static CCW1 copy_ccw_from_guest(hwaddr addr, bool fmt1)
 {
-    CCW1 tmp;
+    CCW0 tmp0;
+    CCW1 tmp1;
     CCW1 ret;
 
-    cpu_physical_memory_read(addr, &tmp, sizeof(tmp));
-    ret.cmd_code = tmp.cmd_code;
-    ret.flags = tmp.flags;
-    ret.count = be16_to_cpu(tmp.count);
-    ret.cda = be32_to_cpu(tmp.cda);
-
+    if (fmt1) {
+        cpu_physical_memory_read(addr, &tmp1, sizeof(tmp1));
+        ret.cmd_code = tmp1.cmd_code;
+        ret.flags = tmp1.flags;
+        ret.count = be16_to_cpu(tmp1.count);
+        ret.cda = be32_to_cpu(tmp1.cda);
+    } else {
+        cpu_physical_memory_read(addr, &tmp0, sizeof(tmp0));
+        ret.cmd_code = tmp0.cmd_code;
+        ret.flags = tmp0.flags;
+        ret.count = be16_to_cpu(tmp0.count);
+        ret.cda = be16_to_cpu(tmp0.cda1) | (tmp0.cda0 << 16);
+    }
     return ret;
 }
 
@@ -268,7 +276,8 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr)
         return -EIO;
     }
 
-    ccw = copy_ccw_from_guest(ccw_addr);
+    /* Translate everything to format-1 ccws - the information is the same. */
+    ccw = copy_ccw_from_guest(ccw_addr, sch->ccw_fmt_1);
 
     /* Check for invalid command codes. */
     if ((ccw.cmd_code & 0x0f) == 0) {
@@ -285,6 +294,13 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr)
 
     check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC));
 
+    if (!ccw.cda) {
+        if (sch->ccw_no_data_cnt == 255) {
+            return -EINVAL;
+        }
+        sch->ccw_no_data_cnt++;
+    }
+
     /* Look at the command. */
     switch (ccw.cmd_code) {
     case CCW_CMD_NOOP:
@@ -386,6 +402,8 @@ static void sch_handle_start_func(SubchDev *sch, ORB *orb)
             s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND);
             return;
         }
+        sch->ccw_fmt_1 = !!(orb->ctrl0 & ORB_CTRL0_MASK_FMT);
+        sch->ccw_no_data_cnt = 0;
     } else {
         s->ctrl &= ~(SCSW_ACTL_SUSP | SCSW_ACTL_RESUME_PEND);
     }
@@ -1347,6 +1365,8 @@ void subch_device_save(SubchDev *s, QEMUFile *f)
         qemu_put_byte(f, s->id.ciw[i].command);
         qemu_put_be16(f, s->id.ciw[i].count);
     }
+    qemu_put_byte(f, s->ccw_fmt_1);
+    qemu_put_byte(f, s->ccw_no_data_cnt);
     return;
 }
 
@@ -1402,6 +1422,8 @@ int subch_device_load(SubchDev *s, QEMUFile *f)
         s->id.ciw[i].command = qemu_get_byte(f);
         s->id.ciw[i].count = qemu_get_be16(f);
     }
+    s->ccw_fmt_1 = qemu_get_byte(f);
+    s->ccw_no_data_cnt = qemu_get_byte(f);
     return 0;
 }
 
diff --git a/hw/s390x/css.h b/hw/s390x/css.h
index c864ea765b..33104ac58e 100644
--- a/hw/s390x/css.h
+++ b/hw/s390x/css.h
@@ -76,7 +76,9 @@ struct SubchDev {
     hwaddr channel_prog;
     CCW1 last_cmd;
     bool last_cmd_valid;
+    bool ccw_fmt_1;
     bool thinint_active;
+    uint8_t ccw_no_data_cnt;
     /* transport-provided data: */
     int (*ccw_cb) (SubchDev *, CCW1);
     SenseId id;
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index ec9e855bc1..d21c397756 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -2,7 +2,7 @@ common-obj-y += virtio-rng.o
 common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
 common-obj-y += virtio-bus.o
 common-obj-y += virtio-mmio.o
-common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/
+common-obj-$(CONFIG_VIRTIO) += dataplane/
 
 obj-y += virtio.o virtio-balloon.o 
 obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index 67cb2b823e..372706a234 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -181,7 +181,8 @@ static int get_desc(Vring *vring, VirtQueueElement *elem,
 
     /* Stop for now if there are not enough iovecs available. */
     if (*num >= VIRTQUEUE_MAX_SIZE) {
-        return -ENOBUFS;
+        error_report("Invalid SG num: %u", *num);
+        return -EFAULT;
     }
 
     /* TODO handle non-contiguous memory across region boundaries */
diff --git a/include/block/aio.h b/include/block/aio.h
index 4603c0f066..156272177f 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -26,7 +26,8 @@ typedef struct BlockDriverAIOCB BlockDriverAIOCB;
 typedef void BlockDriverCompletionFunc(void *opaque, int ret);
 
 typedef struct AIOCBInfo {
-    void (*cancel)(BlockDriverAIOCB *acb);
+    void (*cancel_async)(BlockDriverAIOCB *acb);
+    AioContext *(*get_aio_context)(BlockDriverAIOCB *acb);
     size_t aiocb_size;
 } AIOCBInfo;
 
@@ -35,11 +36,13 @@ struct BlockDriverAIOCB {
     BlockDriverState *bs;
     BlockDriverCompletionFunc *cb;
     void *opaque;
+    int refcnt;
 };
 
 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
                    BlockDriverCompletionFunc *cb, void *opaque);
-void qemu_aio_release(void *p);
+void qemu_aio_unref(void *p);
+void qemu_aio_ref(void *p);
 
 typedef struct AioHandler AioHandler;
 typedef void QEMUBHFunc(void *opaque);
@@ -99,7 +102,7 @@ void aio_set_dispatching(AioContext *ctx, bool dispatching);
  * They also provide bottom halves, a service to execute a piece of code
  * as soon as possible.
  */
-AioContext *aio_context_new(void);
+AioContext *aio_context_new(Error **errp);
 
 /**
  * aio_context_ref:
diff --git a/include/block/block.h b/include/block/block.h
index 07d6d8e67e..3318f0dfbe 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -337,6 +337,7 @@ BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
                                    int64_t sector_num, int nb_sectors,
                                    BlockDriverCompletionFunc *cb, void *opaque);
 void bdrv_aio_cancel(BlockDriverAIOCB *acb);
+void bdrv_aio_cancel_async(BlockDriverAIOCB *acb);
 
 typedef struct BlockRequest {
     /* Fields to be filled by multiwrite caller */
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index 5c435749e1..ec07a118f2 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -28,8 +28,6 @@
 #define SCLP_UNASSIGN_STORAGE                   0x000C0001
 #define SCLP_CMD_READ_EVENT_DATA                0x00770005
 #define SCLP_CMD_WRITE_EVENT_DATA               0x00760005
-#define SCLP_CMD_READ_EVENT_DATA                0x00770005
-#define SCLP_CMD_WRITE_EVENT_DATA               0x00760005
 #define SCLP_CMD_WRITE_EVENT_MASK               0x00780005
 
 /* SCLP Memory hotplug codes */
diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h
index af73ee2ae3..d3e086aefc 100644
--- a/include/hw/virtio/dataplane/vring.h
+++ b/include/hw/virtio/dataplane/vring.h
@@ -17,8 +17,8 @@
 #ifndef VRING_H
 #define VRING_H
 
-#include <linux/virtio_ring.h>
 #include "qemu-common.h"
+#include "hw/virtio/virtio_ring.h"
 #include "hw/virtio/virtio.h"
 
 typedef struct {
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index cf61154658..f3853f2b75 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -132,10 +132,8 @@ typedef struct VirtIOBlock {
     VMChangeStateEntry *change;
     /* Function to push to vq and notify guest */
     void (*complete_request)(struct VirtIOBlockReq *req, unsigned char status);
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     Notifier migration_state_notifier;
     struct VirtIOBlockDataPlane *dataplane;
-#endif
 } VirtIOBlock;
 
 typedef struct MultiReqBuffer {
diff --git a/include/hw/virtio/virtio_ring.h b/include/hw/virtio/virtio_ring.h
new file mode 100644
index 0000000000..8f58bc975e
--- /dev/null
+++ b/include/hw/virtio/virtio_ring.h
@@ -0,0 +1,167 @@
+#ifndef _LINUX_VIRTIO_RING_H
+#define _LINUX_VIRTIO_RING_H
+/*
+ * This file is copied from /usr/include/linux while converting __uNN types
+ * to uXX_t, __inline__ to inline, and tab to spaces.
+ * */
+
+/* An interface for efficient virtio implementation, currently for use by KVM
+ * and lguest, but hopefully others soon.  Do NOT change this since it will
+ * break existing servers and clients.
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright Rusty Russell IBM Corporation 2007. */
+
+/* This marks a buffer as continuing via the next field. */
+#define VRING_DESC_F_NEXT   1
+/* This marks a buffer as write-only (otherwise read-only). */
+#define VRING_DESC_F_WRITE  2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT   4
+
+/* The Host uses this in used->flags to advise the Guest: don't kick me when
+ * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
+ * will still kick if it's out of buffers. */
+#define VRING_USED_F_NO_NOTIFY  1
+/* The Guest uses this in avail->flags to advise the Host: don't interrupt me
+ * when you consume a buffer.  It's unreliable, so it's simply an
+ * optimization.  */
+#define VRING_AVAIL_F_NO_INTERRUPT  1
+
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC 28
+
+/* The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring. Host should ignore the avail->flags field. */
+/* The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring. Guest should ignore the used->flags field. */
+#define VIRTIO_RING_F_EVENT_IDX     29
+
+/* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
+struct vring_desc {
+    /* Address (guest-physical). */
+    uint64_t addr;
+    /* Length. */
+    uint32_t len;
+    /* The flags as indicated above. */
+    uint16_t flags;
+    /* We chain unused descriptors via this, too */
+    uint16_t next;
+};
+
+struct vring_avail {
+    uint16_t flags;
+    uint16_t idx;
+    uint16_t ring[];
+};
+
+/* u32 is used here for ids for padding reasons. */
+struct vring_used_elem {
+    /* Index of start of used descriptor chain. */
+    uint32_t id;
+    /* Total length of the descriptor chain which was used (written to) */
+    uint32_t len;
+};
+
+struct vring_used {
+    uint16_t flags;
+    uint16_t idx;
+    struct vring_used_elem ring[];
+};
+
+struct vring {
+    unsigned int num;
+
+    struct vring_desc *desc;
+
+    struct vring_avail *avail;
+
+    struct vring_used *used;
+};
+
+/* The standard layout for the ring is a continuous chunk of memory which looks
+ * like this.  We assume num is a power of 2.
+ *
+ * struct vring
+ * {
+ *  // The actual descriptors (16 bytes each)
+ *  struct vring_desc desc[num];
+ *
+ *  // A ring of available descriptor heads with free-running index.
+ *  uint16_t avail_flags;
+ *  uint16_t avail_idx;
+ *  uint16_t available[num];
+ *  uint16_t used_event_idx;
+ *
+ *  // Padding to the next align boundary.
+ *  char pad[];
+ *
+ *  // A ring of used descriptor heads with free-running index.
+ *  uint16_t used_flags;
+ *  uint16_t used_idx;
+ *  struct vring_used_elem used[num];
+ *  uint16_t avail_event_idx;
+ * };
+ */
+/* We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility. */
+#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num])
+
+static inline void vring_init(struct vring *vr, unsigned int num, void *p,
+                  unsigned long align)
+{
+    vr->num = num;
+    vr->desc = p;
+    vr->avail = p + num*sizeof(struct vring_desc);
+    vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(uint16_t)
+        + align-1) & ~(align - 1));
+}
+
+static inline unsigned vring_size(unsigned int num, unsigned long align)
+{
+    return ((sizeof(struct vring_desc) * num + sizeof(uint16_t) * (3 + num)
+         + align - 1) & ~(align - 1))
+        + sizeof(uint16_t) * 3 + sizeof(struct vring_used_elem) * num;
+}
+
+/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
+/* Assuming a given event_idx value from the other size, if
+ * we have just incremented index from old to new_idx,
+ * should we trigger an event? */
+static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
+{
+    /* Note: Xen has similar logic for notification hold-off
+     * in include/xen/interface/io/ring.h with req_event and req_prod
+     * corresponding to event_idx + 1 and new_idx respectively.
+     * Note also that req_event and req_prod in Xen start at 1,
+     * event indexes in virtio start at 0. */
+    return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
+}
+
+#endif /* _LINUX_VIRTIO_RING_H */
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 6f0200a7ac..62c68c0f32 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -42,7 +42,7 @@
  *
  * In the case of QEMU tools, this will also start/initialize timers.
  */
-int qemu_init_main_loop(void);
+int qemu_init_main_loop(Error **errp);
 
 /**
  * main_loop_wait: Run one iteration of the main loop.
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index 8939233f37..769ec069b7 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -6,23 +6,23 @@
 
 enum {
     QEMU_ARCH_ALL = -1,
-    QEMU_ARCH_ALPHA = 1,
-    QEMU_ARCH_ARM = 2,
-    QEMU_ARCH_CRIS = 4,
-    QEMU_ARCH_I386 = 8,
-    QEMU_ARCH_M68K = 16,
-    QEMU_ARCH_LM32 = 32,
-    QEMU_ARCH_MICROBLAZE = 64,
-    QEMU_ARCH_MIPS = 128,
-    QEMU_ARCH_PPC = 256,
-    QEMU_ARCH_S390X = 512,
-    QEMU_ARCH_SH4 = 1024,
-    QEMU_ARCH_SPARC = 2048,
-    QEMU_ARCH_XTENSA = 4096,
-    QEMU_ARCH_OPENRISC = 8192,
-    QEMU_ARCH_UNICORE32 = 0x4000,
-    QEMU_ARCH_MOXIE = 0x8000,
-    QEMU_ARCH_TRICORE = 0x10000,
+    QEMU_ARCH_ALPHA = (1 << 0),
+    QEMU_ARCH_ARM = (1 << 1),
+    QEMU_ARCH_CRIS = (1 << 2),
+    QEMU_ARCH_I386 = (1 << 3),
+    QEMU_ARCH_M68K = (1 << 4),
+    QEMU_ARCH_LM32 = (1 << 5),
+    QEMU_ARCH_MICROBLAZE = (1 << 6),
+    QEMU_ARCH_MIPS = (1 << 7),
+    QEMU_ARCH_PPC = (1 << 8),
+    QEMU_ARCH_S390X = (1 << 9),
+    QEMU_ARCH_SH4 = (1 << 10),
+    QEMU_ARCH_SPARC = (1 << 11),
+    QEMU_ARCH_XTENSA = (1 << 12),
+    QEMU_ARCH_OPENRISC = (1 << 13),
+    QEMU_ARCH_UNICORE32 = (1 << 14),
+    QEMU_ARCH_MOXIE = (1 << 15),
+    QEMU_ARCH_TRICORE = (1 << 16),
 };
 
 extern const uint32_t arch_type;
diff --git a/iothread.c b/iothread.c
index d9403cf69e..342a23fcb0 100644
--- a/iothread.c
+++ b/iothread.c
@@ -17,6 +17,7 @@
 #include "block/aio.h"
 #include "sysemu/iothread.h"
 #include "qmp-commands.h"
+#include "qemu/error-report.h"
 
 #define IOTHREADS_PATH "/objects"
 
@@ -53,6 +54,9 @@ static void iothread_instance_finalize(Object *obj)
 {
     IOThread *iothread = IOTHREAD(obj);
 
+    if (!iothread->ctx) {
+        return;
+    }
     iothread->stopping = true;
     aio_notify(iothread->ctx);
     qemu_thread_join(&iothread->thread);
@@ -63,11 +67,16 @@ static void iothread_instance_finalize(Object *obj)
 
 static void iothread_complete(UserCreatable *obj, Error **errp)
 {
+    Error *local_error = NULL;
     IOThread *iothread = IOTHREAD(obj);
 
     iothread->stopping = false;
-    iothread->ctx = aio_context_new();
     iothread->thread_id = -1;
+    iothread->ctx = aio_context_new(&local_error);
+    if (!iothread->ctx) {
+        error_propagate(errp, local_error);
+        return;
+    }
 
     qemu_mutex_init(&iothread->init_done_lock);
     qemu_cond_init(&iothread->init_done_cond);
diff --git a/libcacard/vcard_emul_nss.c b/libcacard/vcard_emul_nss.c
index f1bba57c2f..07b446481e 100644
--- a/libcacard/vcard_emul_nss.c
+++ b/libcacard/vcard_emul_nss.c
@@ -286,10 +286,10 @@ vcard_emul_rsa_op(VCard *card, VCardKey *key,
             }
         }
         if ((i < buffer_size) && (buffer[i] == 0)) {
-            /* yes, we have a properly formated PKCS #1 signature */
+            /* yes, we have a properly formatted PKCS #1 signature */
             /*
              * NOTE: even if we accidentally got an encrypt buffer, which
-             * through shear luck started with 00, 01, ff, 00, it won't matter
+             * through sheer luck started with 00, 01, ff, 00, it won't matter
              * because the resulting Sign operation will effectively decrypt
              * the real buffer.
              */
diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
index c656f61cfc..bb5df43874 100644
--- a/linux-headers/linux/vhost.h
+++ b/linux-headers/linux/vhost.h
@@ -14,7 +14,7 @@
 
 #include <linux/ioctl.h>
 #include <linux/virtio_config.h>
-#include <linux/virtio_ring.h>
+#include "hw/virtio/virtio_ring.h"
 
 struct vhost_vring_state {
 	unsigned int index;
diff --git a/main-loop.c b/main-loop.c
index 3cc79f82f9..53393a4b18 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -126,10 +126,11 @@ void qemu_notify_event(void)
 
 static GArray *gpollfds;
 
-int qemu_init_main_loop(void)
+int qemu_init_main_loop(Error **errp)
 {
     int ret;
     GSource *src;
+    Error *local_error = NULL;
 
     init_clocks();
 
@@ -138,8 +139,12 @@ int qemu_init_main_loop(void)
         return ret;
     }
 
+    qemu_aio_context = aio_context_new(&local_error);
+    if (!qemu_aio_context) {
+        error_propagate(errp, local_error);
+        return -EMFILE;
+    }
     gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
-    qemu_aio_context = aio_context_new();
     src = aio_get_g_source(qemu_aio_context);
     g_source_attach(src, NULL);
     g_source_unref(src);
diff --git a/migration-rdma.c b/migration-rdma.c
index d99812c451..b32dbdfccd 100644
--- a/migration-rdma.c
+++ b/migration-rdma.c
@@ -2523,7 +2523,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
 /*
  * QEMUFile interface to the control channel.
  * SEND messages for control only.
- * pc.ram is handled with regular RDMA messages.
+ * VM's ram is handled with regular RDMA messages.
  */
 static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
                                 int64_t pos, int size)
@@ -2539,7 +2539,7 @@ static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
 
     /*
      * Push out any writes that
-     * we're queued up for pc.ram.
+     * we're queued up for VM's ram.
      */
     ret = qemu_rdma_write_flush(f, rdma);
     if (ret < 0) {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 95dcd81ed4..fa2d1b7528 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -197,7 +197,7 @@
 #       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
 #       'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
 #       'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
-#       2.2: 'archipelago'
+#       2.2: 'archipelago' added, 'cow' dropped
 #
 # @backing_file: #optional the name of the backing file (for copy-on-write)
 #
@@ -1148,10 +1148,11 @@
 # Since: 2.0
 ##
 { 'enum': 'BlockdevDriver',
-  'data': [ 'archipelago', 'file', 'host_device', 'host_cdrom', 'host_floppy',
-            'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
-            'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
-            'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
+  'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
+            'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
+            'host_floppy', 'http', 'https', 'null-aio', 'null-co', 'parallels',
+            'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx',
+            'vmdk', 'vpc', 'vvfat' ] }
 
 ##
 # @BlockdevOptionsBase
@@ -1204,6 +1205,18 @@
   'data': { 'filename': 'str' } }
 
 ##
+# @BlockdevOptionsNull
+#
+# Driver specific block device options for the null backend.
+#
+# @size:    #optional size of the device in bytes.
+#
+# Since: 2.2
+##
+{ 'type': 'BlockdevOptionsNull',
+  'data': { '*size': 'int' } }
+
+##
 # @BlockdevOptionsVVFAT
 #
 # Driver specific block device options for the vvfat protocol.
@@ -1251,6 +1264,67 @@
   'data': { '*backing': 'BlockdevRef' } }
 
 ##
+# @Qcow2OverlapCheckMode
+#
+# General overlap check modes.
+#
+# @none:        Do not perform any checks
+#
+# @constant:    Perform only checks which can be done in constant time and
+#               without reading anything from disk
+#
+# @cached:      Perform only checks which can be done without reading anything
+#               from disk
+#
+# @all:         Perform all available overlap checks
+#
+# Since: 2.2
+##
+{ 'enum': 'Qcow2OverlapCheckMode',
+  'data': [ 'none', 'constant', 'cached', 'all' ] }
+
+##
+# @Qcow2OverlapCheckFlags
+#
+# Structure of flags for each metadata structure. Setting a field to 'true'
+# makes qemu guard that structure against unintended overwriting. The default
+# value is chosen according to the template given.
+#
+# @template: Specifies a template mode which can be adjusted using the other
+#            flags, defaults to 'cached'
+#
+# Since: 2.2
+##
+{ 'type': 'Qcow2OverlapCheckFlags',
+  'data': { '*template':       'Qcow2OverlapCheckMode',
+            '*main-header':    'bool',
+            '*active-l1':      'bool',
+            '*active-l2':      'bool',
+            '*refcount-table': 'bool',
+            '*refcount-block': 'bool',
+            '*snapshot-table': 'bool',
+            '*inactive-l1':    'bool',
+            '*inactive-l2':    'bool' } }
+
+##
+# @Qcow2OverlapChecks
+#
+# Specifies which metadata structures should be guarded against unintended
+# overwriting.
+#
+# @flags:   set of flags for separate specification of each metadata structure
+#           type
+#
+# @mode:    named mode which chooses a specific set of flags
+#
+# Since: 2.2
+##
+{ 'union': 'Qcow2OverlapChecks',
+  'discriminator': {},
+  'data': { 'flags': 'Qcow2OverlapCheckFlags',
+            'mode':  'Qcow2OverlapCheckMode' } }
+
+##
 # @BlockdevOptionsQcow2
 #
 # Driver specific block device options for qcow2.
@@ -1269,6 +1343,18 @@
 #                         should be issued on other occasions where a cluster
 #                         gets freed
 #
+# @overlap-check:         #optional which overlap checks to perform for writes
+#                         to the image, defaults to 'cached' (since 2.2)
+#
+# @cache-size:            #optional the maximum total size of the L2 table and
+#                         refcount block caches in bytes (since 2.2)
+#
+# @l2-cache-size:         #optional the maximum size of the L2 table cache in
+#                         bytes (since 2.2)
+#
+# @refcount-cache-size:   #optional the maximum size of the refcount block cache
+#                         in bytes (since 2.2)
+#
 # Since: 1.7
 ##
 { 'type': 'BlockdevOptionsQcow2',
@@ -1276,7 +1362,11 @@
   'data': { '*lazy-refcounts': 'bool',
             '*pass-discard-request': 'bool',
             '*pass-discard-snapshot': 'bool',
-            '*pass-discard-other': 'bool' } }
+            '*pass-discard-other': 'bool',
+            '*overlap-check': 'Qcow2OverlapChecks',
+            '*cache-size': 'int',
+            '*l2-cache-size': 'int',
+            '*refcount-cache-size': 'int' } }
 
 
 ##
@@ -1471,39 +1561,40 @@
   'discriminator': 'driver',
   'data': {
       'archipelago':'BlockdevOptionsArchipelago',
+      'blkdebug':   'BlockdevOptionsBlkdebug',
+      'blkverify':  'BlockdevOptionsBlkverify',
+      'bochs':      'BlockdevOptionsGenericFormat',
+      'cloop':      'BlockdevOptionsGenericFormat',
+      'dmg':        'BlockdevOptionsGenericFormat',
       'file':       'BlockdevOptionsFile',
-      'host_device':'BlockdevOptionsFile',
+      'ftp':        'BlockdevOptionsFile',
+      'ftps':       'BlockdevOptionsFile',
+# TODO gluster: Wait for structured options
       'host_cdrom': 'BlockdevOptionsFile',
+      'host_device':'BlockdevOptionsFile',
       'host_floppy':'BlockdevOptionsFile',
       'http':       'BlockdevOptionsFile',
       'https':      'BlockdevOptionsFile',
-      'ftp':        'BlockdevOptionsFile',
-      'ftps':       'BlockdevOptionsFile',
-      'tftp':       'BlockdevOptionsFile',
-# TODO gluster: Wait for structured options
 # TODO iscsi: Wait for structured options
 # TODO nbd: Should take InetSocketAddress for 'host'?
 # TODO nfs: Wait for structured options
-# TODO rbd: Wait for structured options
-# TODO sheepdog: Wait for structured options
-# TODO ssh: Should take InetSocketAddress for 'host'?
-      'vvfat':      'BlockdevOptionsVVFAT',
-      'blkdebug':   'BlockdevOptionsBlkdebug',
-      'blkverify':  'BlockdevOptionsBlkverify',
-      'bochs':      'BlockdevOptionsGenericFormat',
-      'cloop':      'BlockdevOptionsGenericFormat',
-      'cow':        'BlockdevOptionsGenericCOWFormat',
-      'dmg':        'BlockdevOptionsGenericFormat',
+      'null-aio':   'BlockdevOptionsNull',
+      'null-co':    'BlockdevOptionsNull',
       'parallels':  'BlockdevOptionsGenericFormat',
-      'qcow':       'BlockdevOptionsGenericCOWFormat',
       'qcow2':      'BlockdevOptionsQcow2',
+      'qcow':       'BlockdevOptionsGenericCOWFormat',
       'qed':        'BlockdevOptionsGenericCOWFormat',
+      'quorum':     'BlockdevOptionsQuorum',
       'raw':        'BlockdevOptionsGenericFormat',
+# TODO rbd: Wait for structured options
+# TODO sheepdog: Wait for structured options
+# TODO ssh: Should take InetSocketAddress for 'host'?
+      'tftp':       'BlockdevOptionsFile',
       'vdi':        'BlockdevOptionsGenericFormat',
       'vhdx':       'BlockdevOptionsGenericFormat',
       'vmdk':       'BlockdevOptionsGenericCOWFormat',
       'vpc':        'BlockdevOptionsGenericFormat',
-      'quorum':     'BlockdevOptionsQuorum'
+      'vvfat':      'BlockdevOptionsVVFAT'
   } }
 
 ##
@@ -1555,7 +1646,7 @@
 ##
 # @BLOCK_IMAGE_CORRUPTED
 #
-# Emitted when a disk image is being marked corrupt
+# Emitted when a corruption has been detected in a disk image
 #
 # @device: device name
 #
@@ -1569,13 +1660,18 @@
 # @size: #optional, if the corruption resulted from an image access, this is
 #        the access size
 #
+# fatal: if set, the image is marked corrupt and therefore unusable after this
+#        event and must be repaired (Since 2.2; before, every
+#        BLOCK_IMAGE_CORRUPTED event was fatal)
+#
 # Since: 1.7
 ##
 { 'event': 'BLOCK_IMAGE_CORRUPTED',
   'data': { 'device' : 'str',
             'msg'    : 'str',
             '*offset': 'int',
-            '*size'  : 'int' } }
+            '*size'  : 'int',
+            'fatal'  : 'bool' } }
 
 ##
 # @BLOCK_IO_ERROR
diff --git a/qdev-monitor.c b/qdev-monitor.c
index fb9ee24b3a..5ec66067f5 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -206,7 +206,7 @@ int qdev_device_help(QemuOpts *opts)
     }
 
     prop_list = qmp_device_list_properties(driver, &local_err);
-    if (!prop_list) {
+    if (local_err) {
         error_printf("%s\n", error_get_pretty(local_err));
         error_free(local_err);
         return 1;
diff --git a/qemu-char.c b/qemu-char.c
index 2a3cb9fb05..8623c70964 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -1017,6 +1017,7 @@ static CharDriverState *qemu_chr_open_pipe(ChardevHostdev *opts)
 /* init terminal so that we can grab keys */
 static struct termios oldtty;
 static int old_fd0_flags;
+static bool stdio_in_use;
 static bool stdio_allow_signal;
 
 static void term_exit(void)
@@ -1060,8 +1061,15 @@ static CharDriverState *qemu_chr_open_stdio(ChardevStdio *opts)
         error_report("cannot use stdio with -daemonize");
         return NULL;
     }
+
+    if (stdio_in_use) {
+        error_report("cannot use stdio by multiple character devices");
+        exit(1);
+    }
+
+    stdio_in_use = true;
     old_fd0_flags = fcntl(0, F_GETFL);
-    tcgetattr (0, &oldtty);
+    tcgetattr(0, &oldtty);
     qemu_set_nonblock(0);
     atexit(term_exit);
 
diff --git a/qemu-doc.texi b/qemu-doc.texi
index ef3be72ae2..9973090c6c 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -654,15 +654,6 @@ File name of a base image (see @option{create} subcommand)
 If this option is set to @code{on}, the image is encrypted.
 @end table
 
-@item cow
-User Mode Linux Copy On Write image format. It is supported only for
-compatibility with previous versions.
-Supported options:
-@table @code
-@item backing_file
-File name of a base image (see @option{create} subcommand)
-@end table
-
 @item vdi
 VirtualBox 1.1 compatible image format.
 Supported options:
diff --git a/qemu-img.c b/qemu-img.c
index 91d1ac3d7d..dbf0904dc0 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2879,6 +2879,7 @@ int main(int argc, char **argv)
 {
     const img_cmd_t *cmd;
     const char *cmdname;
+    Error *local_error = NULL;
     int c;
     static const struct option long_options[] = {
         {"help", no_argument, 0, 'h'},
@@ -2893,7 +2894,12 @@ int main(int argc, char **argv)
     error_set_progname(argv[0]);
     qemu_init_exec_dir(argv[0]);
 
-    qemu_init_main_loop();
+    if (qemu_init_main_loop(&local_error)) {
+        error_report("%s", error_get_pretty(local_error));
+        error_free(local_error);
+        exit(EXIT_FAILURE);
+    }
+
     bdrv_init();
     if (argc < 2) {
         error_exit("Not enough arguments");
diff --git a/qemu-img.texi b/qemu-img.texi
index 50d2cca7a0..e3ef4a00e9 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -228,8 +228,8 @@ compression is read-only. It means that if a compressed sector is
 rewritten, then it is rewritten as uncompressed data.
 
 Image conversion is also useful to get smaller image when using a
-growable format such as @code{qcow} or @code{cow}: the empty sectors
-are detected and suppressed from the destination image.
+growable format such as @code{qcow}: the empty sectors are detected and
+suppressed from the destination image.
 
 @var{sparse_size} indicates the consecutive number of bytes (defaults to 4k)
 that must contain only zeros for qemu-img to create a sparse image during
diff --git a/qemu-io.c b/qemu-io.c
index d2ab6946e8..66cf3ef4be 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -379,6 +379,7 @@ int main(int argc, char **argv)
     int c;
     int opt_index = 0;
     int flags = BDRV_O_UNMAP;
+    Error *local_error = NULL;
 
 #ifdef CONFIG_POSIX
     signal(SIGPIPE, SIG_IGN);
@@ -444,7 +445,11 @@ int main(int argc, char **argv)
         exit(1);
     }
 
-    qemu_init_main_loop();
+    if (qemu_init_main_loop(&local_error)) {
+        error_report("%s", error_get_pretty(local_error));
+        error_free(local_error);
+        exit(1);
+    }
     bdrv_init();
 
     /* initialize commands */
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 9bc152e6c3..de9963f8fb 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -674,7 +674,11 @@ int main(int argc, char **argv)
         snprintf(sockpath, 128, SOCKET_PATH, basename(device));
     }
 
-    qemu_init_main_loop();
+    if (qemu_init_main_loop(&local_err)) {
+        error_report("%s", error_get_pretty(local_err));
+        error_free(local_err);
+        exit(EXIT_FAILURE);
+    }
     bdrv_init();
     atexit(bdrv_close_all);
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 7658d4bd24..76656cc074 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2081,7 +2081,7 @@ Each json-object contain the following:
          - "file": device file name (json-string)
          - "ro": true if read-only, false otherwise (json-bool)
          - "drv": driver format name (json-string)
-             - Possible values: "blkdebug", "bochs", "cloop", "cow", "dmg",
+             - Possible values: "blkdebug", "bochs", "cloop", "dmg",
                                 "file", "file", "ftp", "ftps", "host_cdrom",
                                 "host_device", "host_floppy", "http", "https",
                                 "nbd", "parallels", "qcow", "qcow2", "raw",
diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py
index b4632324a7..d2f815bca2 100644
--- a/scripts/qapi-types.py
+++ b/scripts/qapi-types.py
@@ -177,6 +177,8 @@ const int %(name)s_qtypes[QTYPE_MAX] = {
             qtype = "QTYPE_QDICT"
         elif find_union(qapi_type):
             qtype = "QTYPE_QDICT"
+        elif find_enum(qapi_type):
+            qtype = "QTYPE_QSTRING"
         else:
             assert False, "Invalid anonymous union member"
 
diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index c12969721a..df9f7fb657 100644
--- a/scripts/qapi-visit.py
+++ b/scripts/qapi-visit.py
@@ -263,7 +263,8 @@ void visit_type_%(name)s(Visitor *m, %(name)s **obj, const char *name, Error **e
     for key in members:
         assert (members[key] in builtin_types
             or find_struct(members[key])
-            or find_union(members[key])), "Invalid anonymous union member"
+            or find_union(members[key])
+            or find_enum(members[key])), "Invalid anonymous union member"
 
         enum_full_value = generate_enum_full_value(disc_type, key)
         ret += mcgen('''
diff --git a/target-s390x/ioinst.h b/target-s390x/ioinst.h
index 5bbc67d15e..29f6423df4 100644
--- a/target-s390x/ioinst.h
+++ b/target-s390x/ioinst.h
@@ -156,6 +156,16 @@ typedef struct ORB {
 #define ORB_CTRL1_MASK_ORBX 0x01
 #define ORB_CTRL1_MASK_INVALID 0x3e
 
+/* channel command word (type 0) */
+typedef struct CCW0 {
+        uint8_t cmd_code;
+        uint8_t cda0;
+        uint16_t cda1;
+        uint8_t flags;
+        uint8_t reserved;
+        uint16_t count;
+} QEMU_PACKED CCW0;
+
 /* channel command word (type 1) */
 typedef struct CCW1 {
     uint8_t cmd_code;
diff --git a/tests/Makefile b/tests/Makefile
index a5e3d0c369..f5de29c0b9 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -132,6 +132,7 @@ check-qtest-i386-y = tests/endianness-test$(EXESUF)
 check-qtest-i386-y += tests/fdc-test$(EXESUF)
 gcov-files-i386-y = hw/block/fdc.c
 check-qtest-i386-y += tests/ide-test$(EXESUF)
+check-qtest-i386-y += tests/ahci-test$(EXESUF)
 check-qtest-i386-y += tests/hd-geo-test$(EXESUF)
 gcov-files-i386-y += hw/block/hd-geometry.c
 check-qtest-i386-y += tests/boot-order-test$(EXESUF)
@@ -307,6 +308,7 @@ tests/endianness-test$(EXESUF): tests/endianness-test.o
 tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y)
 tests/fdc-test$(EXESUF): tests/fdc-test.o
 tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y)
+tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y)
 tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
 tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
 tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o $(libqos-obj-y)
diff --git a/tests/ahci-test.c b/tests/ahci-test.c
new file mode 100644
index 0000000000..4c77ebec7a
--- /dev/null
+++ b/tests/ahci-test.c
@@ -0,0 +1,1561 @@
+/*
+ * AHCI test cases
+ *
+ * Copyright (c) 2014 John Snow <jsnow@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <glib.h>
+
+#include "libqtest.h"
+#include "libqos/pci-pc.h"
+#include "libqos/malloc-pc.h"
+
+#include "qemu-common.h"
+#include "qemu/host-utils.h"
+
+#include "hw/pci/pci_ids.h"
+#include "hw/pci/pci_regs.h"
+
+/* Test-specific defines. */
+#define TEST_IMAGE_SIZE    (64 * 1024 * 1024)
+
+/*** Supplementary PCI Config Space IDs & Masks ***/
+#define PCI_DEVICE_ID_INTEL_Q35_AHCI   (0x2922)
+#define PCI_MSI_FLAGS_RESERVED         (0xFF00)
+#define PCI_PM_CTRL_RESERVED             (0xFC)
+#define PCI_BCC(REG32)          ((REG32) >> 24)
+#define PCI_PI(REG32)   (((REG32) >> 8) & 0xFF)
+#define PCI_SCC(REG32) (((REG32) >> 16) & 0xFF)
+
+/*** Recognized AHCI Device Types ***/
+#define AHCI_INTEL_ICH9 (PCI_DEVICE_ID_INTEL_Q35_AHCI << 16 | \
+                         PCI_VENDOR_ID_INTEL)
+
+/*** AHCI/HBA Register Offsets and Bitmasks ***/
+#define AHCI_CAP                          (0)
+#define AHCI_CAP_NP                    (0x1F)
+#define AHCI_CAP_SXS                   (0x20)
+#define AHCI_CAP_EMS                   (0x40)
+#define AHCI_CAP_CCCS                  (0x80)
+#define AHCI_CAP_NCS                 (0x1F00)
+#define AHCI_CAP_PSC                 (0x2000)
+#define AHCI_CAP_SSC                 (0x4000)
+#define AHCI_CAP_PMD                 (0x8000)
+#define AHCI_CAP_FBSS               (0x10000)
+#define AHCI_CAP_SPM                (0x20000)
+#define AHCI_CAP_SAM                (0x40000)
+#define AHCI_CAP_RESERVED           (0x80000)
+#define AHCI_CAP_ISS               (0xF00000)
+#define AHCI_CAP_SCLO             (0x1000000)
+#define AHCI_CAP_SAL              (0x2000000)
+#define AHCI_CAP_SALP             (0x4000000)
+#define AHCI_CAP_SSS              (0x8000000)
+#define AHCI_CAP_SMPS            (0x10000000)
+#define AHCI_CAP_SSNTF           (0x20000000)
+#define AHCI_CAP_SNCQ            (0x40000000)
+#define AHCI_CAP_S64A            (0x80000000)
+
+#define AHCI_GHC                          (1)
+#define AHCI_GHC_HR                    (0x01)
+#define AHCI_GHC_IE                    (0x02)
+#define AHCI_GHC_MRSM                  (0x04)
+#define AHCI_GHC_RESERVED        (0x7FFFFFF8)
+#define AHCI_GHC_AE              (0x80000000)
+
+#define AHCI_IS                           (2)
+#define AHCI_PI                           (3)
+#define AHCI_VS                           (4)
+
+#define AHCI_CCCCTL                       (5)
+#define AHCI_CCCCTL_EN                 (0x01)
+#define AHCI_CCCCTL_RESERVED           (0x06)
+#define AHCI_CCCCTL_CC               (0xFF00)
+#define AHCI_CCCCTL_TV           (0xFFFF0000)
+
+#define AHCI_CCCPORTS                     (6)
+#define AHCI_EMLOC                        (7)
+
+#define AHCI_EMCTL                        (8)
+#define AHCI_EMCTL_STSMR               (0x01)
+#define AHCI_EMCTL_CTLTM              (0x100)
+#define AHCI_EMCTL_CTLRST             (0x200)
+#define AHCI_EMCTL_RESERVED      (0xF0F0FCFE)
+
+#define AHCI_CAP2                         (9)
+#define AHCI_CAP2_BOH                  (0x01)
+#define AHCI_CAP2_NVMP                 (0x02)
+#define AHCI_CAP2_APST                 (0x04)
+#define AHCI_CAP2_RESERVED       (0xFFFFFFF8)
+
+#define AHCI_BOHC                        (10)
+#define AHCI_RESERVED                    (11)
+#define AHCI_NVMHCI                      (24)
+#define AHCI_VENDOR                      (40)
+#define AHCI_PORTS                       (64)
+
+/*** Port Memory Offsets & Bitmasks ***/
+#define AHCI_PX_CLB                       (0)
+#define AHCI_PX_CLB_RESERVED          (0x1FF)
+
+#define AHCI_PX_CLBU                      (1)
+
+#define AHCI_PX_FB                        (2)
+#define AHCI_PX_FB_RESERVED            (0xFF)
+
+#define AHCI_PX_FBU                       (3)
+
+#define AHCI_PX_IS                        (4)
+#define AHCI_PX_IS_DHRS                 (0x1)
+#define AHCI_PX_IS_PSS                  (0x2)
+#define AHCI_PX_IS_DSS                  (0x4)
+#define AHCI_PX_IS_SDBS                 (0x8)
+#define AHCI_PX_IS_UFS                 (0x10)
+#define AHCI_PX_IS_DPS                 (0x20)
+#define AHCI_PX_IS_PCS                 (0x40)
+#define AHCI_PX_IS_DMPS                (0x80)
+#define AHCI_PX_IS_RESERVED       (0x23FFF00)
+#define AHCI_PX_IS_PRCS            (0x400000)
+#define AHCI_PX_IS_IPMS            (0x800000)
+#define AHCI_PX_IS_OFS            (0x1000000)
+#define AHCI_PX_IS_INFS           (0x4000000)
+#define AHCI_PX_IS_IFS            (0x8000000)
+#define AHCI_PX_IS_HBDS          (0x10000000)
+#define AHCI_PX_IS_HBFS          (0x20000000)
+#define AHCI_PX_IS_TFES          (0x40000000)
+#define AHCI_PX_IS_CPDS          (0x80000000)
+
+#define AHCI_PX_IE                        (5)
+#define AHCI_PX_IE_DHRE                 (0x1)
+#define AHCI_PX_IE_PSE                  (0x2)
+#define AHCI_PX_IE_DSE                  (0x4)
+#define AHCI_PX_IE_SDBE                 (0x8)
+#define AHCI_PX_IE_UFE                 (0x10)
+#define AHCI_PX_IE_DPE                 (0x20)
+#define AHCI_PX_IE_PCE                 (0x40)
+#define AHCI_PX_IE_DMPE                (0x80)
+#define AHCI_PX_IE_RESERVED       (0x23FFF00)
+#define AHCI_PX_IE_PRCE            (0x400000)
+#define AHCI_PX_IE_IPME            (0x800000)
+#define AHCI_PX_IE_OFE            (0x1000000)
+#define AHCI_PX_IE_INFE           (0x4000000)
+#define AHCI_PX_IE_IFE            (0x8000000)
+#define AHCI_PX_IE_HBDE          (0x10000000)
+#define AHCI_PX_IE_HBFE          (0x20000000)
+#define AHCI_PX_IE_TFEE          (0x40000000)
+#define AHCI_PX_IE_CPDE          (0x80000000)
+
+#define AHCI_PX_CMD                       (6)
+#define AHCI_PX_CMD_ST                  (0x1)
+#define AHCI_PX_CMD_SUD                 (0x2)
+#define AHCI_PX_CMD_POD                 (0x4)
+#define AHCI_PX_CMD_CLO                 (0x8)
+#define AHCI_PX_CMD_FRE                (0x10)
+#define AHCI_PX_CMD_RESERVED           (0xE0)
+#define AHCI_PX_CMD_CCS              (0x1F00)
+#define AHCI_PX_CMD_MPSS             (0x2000)
+#define AHCI_PX_CMD_FR               (0x4000)
+#define AHCI_PX_CMD_CR               (0x8000)
+#define AHCI_PX_CMD_CPS             (0x10000)
+#define AHCI_PX_CMD_PMA             (0x20000)
+#define AHCI_PX_CMD_HPCP            (0x40000)
+#define AHCI_PX_CMD_MPSP            (0x80000)
+#define AHCI_PX_CMD_CPD            (0x100000)
+#define AHCI_PX_CMD_ESP            (0x200000)
+#define AHCI_PX_CMD_FBSCP          (0x400000)
+#define AHCI_PX_CMD_APSTE          (0x800000)
+#define AHCI_PX_CMD_ATAPI         (0x1000000)
+#define AHCI_PX_CMD_DLAE          (0x2000000)
+#define AHCI_PX_CMD_ALPE          (0x4000000)
+#define AHCI_PX_CMD_ASP           (0x8000000)
+#define AHCI_PX_CMD_ICC          (0xF0000000)
+
+#define AHCI_PX_RES1                      (7)
+
+#define AHCI_PX_TFD                       (8)
+#define AHCI_PX_TFD_STS                (0xFF)
+#define AHCI_PX_TFD_STS_ERR            (0x01)
+#define AHCI_PX_TFD_STS_CS1            (0x06)
+#define AHCI_PX_TFD_STS_DRQ            (0x08)
+#define AHCI_PX_TFD_STS_CS2            (0x70)
+#define AHCI_PX_TFD_STS_BSY            (0x80)
+#define AHCI_PX_TFD_ERR              (0xFF00)
+#define AHCI_PX_TFD_RESERVED     (0xFFFF0000)
+
+#define AHCI_PX_SIG                       (9)
+#define AHCI_PX_SIG_SECTOR_COUNT       (0xFF)
+#define AHCI_PX_SIG_LBA_LOW          (0xFF00)
+#define AHCI_PX_SIG_LBA_MID        (0xFF0000)
+#define AHCI_PX_SIG_LBA_HIGH     (0xFF000000)
+
+#define AHCI_PX_SSTS                     (10)
+#define AHCI_PX_SSTS_DET               (0x0F)
+#define AHCI_PX_SSTS_SPD               (0xF0)
+#define AHCI_PX_SSTS_IPM              (0xF00)
+#define AHCI_PX_SSTS_RESERVED    (0xFFFFF000)
+#define SSTS_DET_NO_DEVICE             (0x00)
+#define SSTS_DET_PRESENT               (0x01)
+#define SSTS_DET_ESTABLISHED           (0x03)
+#define SSTS_DET_OFFLINE               (0x04)
+
+#define AHCI_PX_SCTL                     (11)
+
+#define AHCI_PX_SERR                     (12)
+#define AHCI_PX_SERR_ERR             (0xFFFF)
+#define AHCI_PX_SERR_DIAG        (0xFFFF0000)
+#define AHCI_PX_SERR_DIAG_X      (0x04000000)
+
+#define AHCI_PX_SACT                     (13)
+#define AHCI_PX_CI                       (14)
+#define AHCI_PX_SNTF                     (15)
+
+#define AHCI_PX_FBS                      (16)
+#define AHCI_PX_FBS_EN                  (0x1)
+#define AHCI_PX_FBS_DEC                 (0x2)
+#define AHCI_PX_FBS_SDE                 (0x4)
+#define AHCI_PX_FBS_DEV               (0xF00)
+#define AHCI_PX_FBS_ADO              (0xF000)
+#define AHCI_PX_FBS_DWE             (0xF0000)
+#define AHCI_PX_FBS_RESERVED     (0xFFF000F8)
+
+#define AHCI_PX_RES2                     (17)
+#define AHCI_PX_VS                       (28)
+
+#define HBA_DATA_REGION_SIZE            (256)
+#define HBA_PORT_DATA_SIZE              (128)
+#define HBA_PORT_NUM_REG (HBA_PORT_DATA_SIZE/4)
+
+#define AHCI_VERSION_0_95        (0x00000905)
+#define AHCI_VERSION_1_0         (0x00010000)
+#define AHCI_VERSION_1_1         (0x00010100)
+#define AHCI_VERSION_1_2         (0x00010200)
+#define AHCI_VERSION_1_3         (0x00010300)
+
+/*** Structures ***/
+
+/**
+ * Generic FIS structure.
+ */
+typedef struct FIS {
+    uint8_t fis_type;
+    uint8_t flags;
+    char data[0];
+} __attribute__((__packed__)) FIS;
+
+/**
+ * Register device-to-host FIS structure.
+ */
+typedef struct RegD2HFIS {
+    /* DW0 */
+    uint8_t fis_type;
+    uint8_t flags;
+    uint8_t status;
+    uint8_t error;
+    /* DW1 */
+    uint8_t lba_low;
+    uint8_t lba_mid;
+    uint8_t lba_high;
+    uint8_t device;
+    /* DW2 */
+    uint8_t lba3;
+    uint8_t lba4;
+    uint8_t lba5;
+    uint8_t res1;
+    /* DW3 */
+    uint16_t count;
+    uint8_t res2;
+    uint8_t res3;
+    /* DW4 */
+    uint16_t res4;
+    uint16_t res5;
+} __attribute__((__packed__)) RegD2HFIS;
+
+/**
+ * Register host-to-device FIS structure.
+ */
+typedef struct RegH2DFIS {
+    /* DW0 */
+    uint8_t fis_type;
+    uint8_t flags;
+    uint8_t command;
+    uint8_t feature_low;
+    /* DW1 */
+    uint8_t lba_low;
+    uint8_t lba_mid;
+    uint8_t lba_high;
+    uint8_t device;
+    /* DW2 */
+    uint8_t lba3;
+    uint8_t lba4;
+    uint8_t lba5;
+    uint8_t feature_high;
+    /* DW3 */
+    uint16_t count;
+    uint8_t icc;
+    uint8_t control;
+    /* DW4 */
+    uint32_t aux;
+} __attribute__((__packed__)) RegH2DFIS;
+
+/**
+ * Command List entry structure.
+ * The command list contains between 1-32 of these structures.
+ */
+typedef struct AHCICommand {
+    uint8_t b1;
+    uint8_t b2;
+    uint16_t prdtl; /* Phys Region Desc. Table Length */
+    uint32_t prdbc; /* Phys Region Desc. Byte Count */
+    uint32_t ctba;  /* Command Table Descriptor Base Address */
+    uint32_t ctbau; /*                                    '' Upper */
+    uint32_t res[4];
+} __attribute__((__packed__)) AHCICommand;
+
+/**
+ * Physical Region Descriptor; pointed to by the Command List Header,
+ * struct ahci_command.
+ */
+typedef struct PRD {
+    uint32_t dba;  /* Data Base Address */
+    uint32_t dbau; /* Data Base Address Upper */
+    uint32_t res;  /* Reserved */
+    uint32_t dbc;  /* Data Byte Count (0-indexed) & Interrupt Flag (bit 2^31) */
+} PRD;
+
+typedef struct HBACap {
+    uint32_t cap;
+    uint32_t cap2;
+} HBACap;
+
+/*** Globals ***/
+static QGuestAllocator *guest_malloc;
+static QPCIBus *pcibus;
+static uint64_t barsize;
+static char tmp_path[] = "/tmp/qtest.XXXXXX";
+static bool ahci_pedantic;
+static uint32_t ahci_fingerprint;
+
+/*** Macro Utilities ***/
+#define BITANY(data, mask) (((data) & (mask)) != 0)
+#define BITSET(data, mask) (((data) & (mask)) == (mask))
+#define BITCLR(data, mask) (((data) & (mask)) == 0)
+#define ASSERT_BIT_SET(data, mask) g_assert_cmphex((data) & (mask), ==, (mask))
+#define ASSERT_BIT_CLEAR(data, mask) g_assert_cmphex((data) & (mask), ==, 0)
+
+/*** IO macros for the AHCI memory registers. ***/
+#define AHCI_READ(OFST) qpci_io_readl(ahci, hba_base + (OFST))
+#define AHCI_WRITE(OFST, VAL) qpci_io_writel(ahci, hba_base + (OFST), (VAL))
+#define AHCI_RREG(regno)      AHCI_READ(4 * (regno))
+#define AHCI_WREG(regno, val) AHCI_WRITE(4 * (regno), (val))
+#define AHCI_SET(regno, mask) AHCI_WREG((regno), AHCI_RREG(regno) | (mask))
+#define AHCI_CLR(regno, mask) AHCI_WREG((regno), AHCI_RREG(regno) & ~(mask))
+
+/*** IO macros for port-specific offsets inside of AHCI memory. ***/
+#define PX_OFST(port, regno) (HBA_PORT_NUM_REG * (port) + AHCI_PORTS + (regno))
+#define PX_RREG(port, regno)      AHCI_RREG(PX_OFST((port), (regno)))
+#define PX_WREG(port, regno, val) AHCI_WREG(PX_OFST((port), (regno)), (val))
+#define PX_SET(port, reg, mask)   PX_WREG((port), (reg),                \
+                                          PX_RREG((port), (reg)) | (mask));
+#define PX_CLR(port, reg, mask)   PX_WREG((port), (reg),                \
+                                          PX_RREG((port), (reg)) & ~(mask));
+
+/* For calculating how big the PRD table needs to be: */
+#define CMD_TBL_SIZ(n) ((0x80 + ((n) * sizeof(PRD)) + 0x7F) & ~0x7F)
+
+
+/*** Function Declarations ***/
+static QPCIDevice *get_ahci_device(void);
+static QPCIDevice *start_ahci_device(QPCIDevice *dev, void **hba_base);
+static void free_ahci_device(QPCIDevice *dev);
+static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base,
+                                HBACap *hcap, uint8_t port);
+static void ahci_test_pci_spec(QPCIDevice *ahci);
+static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header,
+                               uint8_t offset);
+static void ahci_test_satacap(QPCIDevice *ahci, uint8_t offset);
+static void ahci_test_msicap(QPCIDevice *ahci, uint8_t offset);
+static void ahci_test_pmcap(QPCIDevice *ahci, uint8_t offset);
+
+/*** Utilities ***/
+
+static void string_bswap16(uint16_t *s, size_t bytes)
+{
+    g_assert_cmphex((bytes & 1), ==, 0);
+    bytes /= 2;
+
+    while (bytes--) {
+        *s = bswap16(*s);
+        s++;
+    }
+}
+
+/**
+ * Locate, verify, and return a handle to the AHCI device.
+ */
+static QPCIDevice *get_ahci_device(void)
+{
+    QPCIDevice *ahci;
+
+    pcibus = qpci_init_pc();
+
+    /* Find the AHCI PCI device and verify it's the right one. */
+    ahci = qpci_device_find(pcibus, QPCI_DEVFN(0x1F, 0x02));
+    g_assert(ahci != NULL);
+
+    ahci_fingerprint = qpci_config_readl(ahci, PCI_VENDOR_ID);
+
+    switch (ahci_fingerprint) {
+    case AHCI_INTEL_ICH9:
+        break;
+    default:
+        /* Unknown device. */
+        g_assert_not_reached();
+    }
+
+    return ahci;
+}
+
+static void free_ahci_device(QPCIDevice *ahci)
+{
+    /* libqos doesn't have a function for this, so free it manually */
+    g_free(ahci);
+
+    if (pcibus) {
+        qpci_free_pc(pcibus);
+        pcibus = NULL;
+    }
+
+    /* Clear our cached barsize information. */
+    barsize = 0;
+}
+
+/*** Test Setup & Teardown ***/
+
+/**
+ * Launch QEMU with the given command line,
+ * and then set up interrupts and our guest malloc interface.
+ */
+static void qtest_boot(const char *cmdline_fmt, ...)
+{
+    va_list ap;
+    char *cmdline;
+
+    va_start(ap, cmdline_fmt);
+    cmdline = g_strdup_vprintf(cmdline_fmt, ap);
+    va_end(ap);
+
+    qtest_start(cmdline);
+    qtest_irq_intercept_in(global_qtest, "ioapic");
+    guest_malloc = pc_alloc_init();
+
+    g_free(cmdline);
+}
+
+/**
+ * Tear down the QEMU instance.
+ */
+static void qtest_shutdown(void)
+{
+    g_free(guest_malloc);
+    guest_malloc = NULL;
+    qtest_end();
+}
+
+/**
+ * Start a Q35 machine and bookmark a handle to the AHCI device.
+ */
+static QPCIDevice *ahci_boot(void)
+{
+    qtest_boot("-drive if=none,id=drive0,file=%s,cache=writeback,serial=%s"
+               " -M q35 "
+               "-device ide-hd,drive=drive0 "
+               "-global ide-hd.ver=%s",
+               tmp_path, "testdisk", "version");
+
+    /* Verify that we have an AHCI device present. */
+    return get_ahci_device();
+}
+
+/**
+ * Clean up the PCI device, then terminate the QEMU instance.
+ */
+static void ahci_shutdown(QPCIDevice *ahci)
+{
+    free_ahci_device(ahci);
+    qtest_shutdown();
+}
+
+/*** Logical Device Initialization ***/
+
+/**
+ * Start the PCI device and sanity-check default operation.
+ */
+static void ahci_pci_enable(QPCIDevice *ahci, void **hba_base)
+{
+    uint8_t reg;
+
+    start_ahci_device(ahci, hba_base);
+
+    switch (ahci_fingerprint) {
+    case AHCI_INTEL_ICH9:
+        /* ICH9 has a register at PCI 0x92 that
+         * acts as a master port enabler mask. */
+        reg = qpci_config_readb(ahci, 0x92);
+        reg |= 0x3F;
+        qpci_config_writeb(ahci, 0x92, reg);
+        /* 0...0111111b -- bit significant, ports 0-5 enabled. */
+        ASSERT_BIT_SET(qpci_config_readb(ahci, 0x92), 0x3F);
+        break;
+    }
+
+}
+
+/**
+ * Map BAR5/ABAR, and engage the PCI device.
+ */
+static QPCIDevice *start_ahci_device(QPCIDevice *ahci, void **hba_base)
+{
+    /* Map AHCI's ABAR (BAR5) */
+    *hba_base = qpci_iomap(ahci, 5, &barsize);
+
+    /* turns on pci.cmd.iose, pci.cmd.mse and pci.cmd.bme */
+    qpci_device_enable(ahci);
+
+    return ahci;
+}
+
+/**
+ * Test and initialize the AHCI's HBA memory areas.
+ * Initialize and start any ports with devices attached.
+ * Bring the HBA into the idle state.
+ */
+static void ahci_hba_enable(QPCIDevice *ahci, void *hba_base)
+{
+    /* Bits of interest in this section:
+     * GHC.AE     Global Host Control / AHCI Enable
+     * PxCMD.ST   Port Command: Start
+     * PxCMD.SUD  "Spin Up Device"
+     * PxCMD.POD  "Power On Device"
+     * PxCMD.FRE  "FIS Receive Enable"
+     * PxCMD.FR   "FIS Receive Running"
+     * PxCMD.CR   "Command List Running"
+     */
+
+    g_assert(ahci != NULL);
+    g_assert(hba_base != NULL);
+
+    uint32_t reg, ports_impl, clb, fb;
+    uint16_t i;
+    uint8_t num_cmd_slots;
+
+    g_assert(hba_base != 0);
+
+    /* Set GHC.AE to 1 */
+    AHCI_SET(AHCI_GHC, AHCI_GHC_AE);
+    reg = AHCI_RREG(AHCI_GHC);
+    ASSERT_BIT_SET(reg, AHCI_GHC_AE);
+
+    /* Read CAP.NCS, how many command slots do we have? */
+    reg = AHCI_RREG(AHCI_CAP);
+    num_cmd_slots = ((reg & AHCI_CAP_NCS) >> ctzl(AHCI_CAP_NCS)) + 1;
+    g_test_message("Number of Command Slots: %u", num_cmd_slots);
+
+    /* Determine which ports are implemented. */
+    ports_impl = AHCI_RREG(AHCI_PI);
+
+    for (i = 0; ports_impl; ports_impl >>= 1, ++i) {
+        if (!(ports_impl & 0x01)) {
+            continue;
+        }
+
+        g_test_message("Initializing port %u", i);
+
+        reg = PX_RREG(i, AHCI_PX_CMD);
+        if (BITCLR(reg, AHCI_PX_CMD_ST | AHCI_PX_CMD_CR |
+                   AHCI_PX_CMD_FRE | AHCI_PX_CMD_FR)) {
+            g_test_message("port is idle");
+        } else {
+            g_test_message("port needs to be idled");
+            PX_CLR(i, AHCI_PX_CMD, (AHCI_PX_CMD_ST | AHCI_PX_CMD_FRE));
+            /* The port has 500ms to disengage. */
+            usleep(500000);
+            reg = PX_RREG(i, AHCI_PX_CMD);
+            ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CR);
+            ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FR);
+            g_test_message("port is now idle");
+            /* The spec does allow for possibly needing a PORT RESET
+             * or HBA reset if we fail to idle the port. */
+        }
+
+        /* Allocate Memory for the Command List Buffer & FIS Buffer */
+        /* PxCLB space ... 0x20 per command, as in 4.2.2 p 36 */
+        clb = guest_alloc(guest_malloc, num_cmd_slots * 0x20);
+        g_test_message("CLB: 0x%08x", clb);
+        PX_WREG(i, AHCI_PX_CLB, clb);
+        g_assert_cmphex(clb, ==, PX_RREG(i, AHCI_PX_CLB));
+
+        /* PxFB space ... 0x100, as in 4.2.1 p 35 */
+        fb = guest_alloc(guest_malloc, 0x100);
+        g_test_message("FB: 0x%08x", fb);
+        PX_WREG(i, AHCI_PX_FB, fb);
+        g_assert_cmphex(fb, ==, PX_RREG(i, AHCI_PX_FB));
+
+        /* Clear PxSERR, PxIS, then IS.IPS[x] by writing '1's. */
+        PX_WREG(i, AHCI_PX_SERR, 0xFFFFFFFF);
+        PX_WREG(i, AHCI_PX_IS, 0xFFFFFFFF);
+        AHCI_WREG(AHCI_IS, (1 << i));
+
+        /* Verify Interrupts Cleared */
+        reg = PX_RREG(i, AHCI_PX_SERR);
+        g_assert_cmphex(reg, ==, 0);
+
+        reg = PX_RREG(i, AHCI_PX_IS);
+        g_assert_cmphex(reg, ==, 0);
+
+        reg = AHCI_RREG(AHCI_IS);
+        ASSERT_BIT_CLEAR(reg, (1 << i));
+
+        /* Enable All Interrupts: */
+        PX_WREG(i, AHCI_PX_IE, 0xFFFFFFFF);
+        reg = PX_RREG(i, AHCI_PX_IE);
+        g_assert_cmphex(reg, ==, ~((uint32_t)AHCI_PX_IE_RESERVED));
+
+        /* Enable the FIS Receive Engine. */
+        PX_SET(i, AHCI_PX_CMD, AHCI_PX_CMD_FRE);
+        reg = PX_RREG(i, AHCI_PX_CMD);
+        ASSERT_BIT_SET(reg, AHCI_PX_CMD_FR);
+
+        /* AHCI 1.3 spec: if !STS.BSY, !STS.DRQ and PxSSTS.DET indicates
+         * physical presence, a device is present and may be started. However,
+         * PxSERR.DIAG.X /may/ need to be cleared a priori. */
+        reg = PX_RREG(i, AHCI_PX_SERR);
+        if (BITSET(reg, AHCI_PX_SERR_DIAG_X)) {
+            PX_SET(i, AHCI_PX_SERR, AHCI_PX_SERR_DIAG_X);
+        }
+
+        reg = PX_RREG(i, AHCI_PX_TFD);
+        if (BITCLR(reg, AHCI_PX_TFD_STS_BSY | AHCI_PX_TFD_STS_DRQ)) {
+            reg = PX_RREG(i, AHCI_PX_SSTS);
+            if ((reg & AHCI_PX_SSTS_DET) == SSTS_DET_ESTABLISHED) {
+                /* Device Found: set PxCMD.ST := 1 */
+                PX_SET(i, AHCI_PX_CMD, AHCI_PX_CMD_ST);
+                ASSERT_BIT_SET(PX_RREG(i, AHCI_PX_CMD), AHCI_PX_CMD_CR);
+                g_test_message("Started Device %u", i);
+            } else if ((reg & AHCI_PX_SSTS_DET)) {
+                /* Device present, but in some unknown state. */
+                g_assert_not_reached();
+            }
+        }
+    }
+
+    /* Enable GHC.IE */
+    AHCI_SET(AHCI_GHC, AHCI_GHC_IE);
+    reg = AHCI_RREG(AHCI_GHC);
+    ASSERT_BIT_SET(reg, AHCI_GHC_IE);
+
+    /* TODO: The device should now be idling and waiting for commands.
+     * In the future, a small test-case to inspect the Register D2H FIS
+     * and clear the initial interrupts might be good. */
+}
+
+/*** Specification Adherence Tests ***/
+
+/**
+ * Implementation for test_pci_spec. Ensures PCI configuration space is sane.
+ */
+static void ahci_test_pci_spec(QPCIDevice *ahci)
+{
+    uint8_t datab;
+    uint16_t data;
+    uint32_t datal;
+
+    /* Most of these bits should start cleared until we turn them on. */
+    data = qpci_config_readw(ahci, PCI_COMMAND);
+    ASSERT_BIT_CLEAR(data, PCI_COMMAND_MEMORY);
+    ASSERT_BIT_CLEAR(data, PCI_COMMAND_MASTER);
+    ASSERT_BIT_CLEAR(data, PCI_COMMAND_SPECIAL);     /* Reserved */
+    ASSERT_BIT_CLEAR(data, PCI_COMMAND_VGA_PALETTE); /* Reserved */
+    ASSERT_BIT_CLEAR(data, PCI_COMMAND_PARITY);
+    ASSERT_BIT_CLEAR(data, PCI_COMMAND_WAIT);        /* Reserved */
+    ASSERT_BIT_CLEAR(data, PCI_COMMAND_SERR);
+    ASSERT_BIT_CLEAR(data, PCI_COMMAND_FAST_BACK);
+    ASSERT_BIT_CLEAR(data, PCI_COMMAND_INTX_DISABLE);
+    ASSERT_BIT_CLEAR(data, 0xF800);                  /* Reserved */
+
+    data = qpci_config_readw(ahci, PCI_STATUS);
+    ASSERT_BIT_CLEAR(data, 0x01 | 0x02 | 0x04);     /* Reserved */
+    ASSERT_BIT_CLEAR(data, PCI_STATUS_INTERRUPT);
+    ASSERT_BIT_SET(data, PCI_STATUS_CAP_LIST);      /* must be set */
+    ASSERT_BIT_CLEAR(data, PCI_STATUS_UDF);         /* Reserved */
+    ASSERT_BIT_CLEAR(data, PCI_STATUS_PARITY);
+    ASSERT_BIT_CLEAR(data, PCI_STATUS_SIG_TARGET_ABORT);
+    ASSERT_BIT_CLEAR(data, PCI_STATUS_REC_TARGET_ABORT);
+    ASSERT_BIT_CLEAR(data, PCI_STATUS_REC_MASTER_ABORT);
+    ASSERT_BIT_CLEAR(data, PCI_STATUS_SIG_SYSTEM_ERROR);
+    ASSERT_BIT_CLEAR(data, PCI_STATUS_DETECTED_PARITY);
+
+    /* RID occupies the low byte, CCs occupy the high three. */
+    datal = qpci_config_readl(ahci, PCI_CLASS_REVISION);
+    if (ahci_pedantic) {
+        /* AHCI 1.3 specifies that at-boot, the RID should reset to 0x00,
+         * Though in practice this is likely seldom true. */
+        ASSERT_BIT_CLEAR(datal, 0xFF);
+    }
+
+    /* BCC *must* equal 0x01. */
+    g_assert_cmphex(PCI_BCC(datal), ==, 0x01);
+    if (PCI_SCC(datal) == 0x01) {
+        /* IDE */
+        ASSERT_BIT_SET(0x80000000, datal);
+        ASSERT_BIT_CLEAR(0x60000000, datal);
+    } else if (PCI_SCC(datal) == 0x04) {
+        /* RAID */
+        g_assert_cmphex(PCI_PI(datal), ==, 0);
+    } else if (PCI_SCC(datal) == 0x06) {
+        /* AHCI */
+        g_assert_cmphex(PCI_PI(datal), ==, 0x01);
+    } else {
+        g_assert_not_reached();
+    }
+
+    datab = qpci_config_readb(ahci, PCI_CACHE_LINE_SIZE);
+    g_assert_cmphex(datab, ==, 0);
+
+    datab = qpci_config_readb(ahci, PCI_LATENCY_TIMER);
+    g_assert_cmphex(datab, ==, 0);
+
+    /* Only the bottom 7 bits must be off. */
+    datab = qpci_config_readb(ahci, PCI_HEADER_TYPE);
+    ASSERT_BIT_CLEAR(datab, 0x7F);
+
+    /* BIST is optional, but the low 7 bits must always start off regardless. */
+    datab = qpci_config_readb(ahci, PCI_BIST);
+    ASSERT_BIT_CLEAR(datab, 0x7F);
+
+    /* BARS 0-4 do not have a boot spec, but ABAR/BAR5 must be clean. */
+    datal = qpci_config_readl(ahci, PCI_BASE_ADDRESS_5);
+    g_assert_cmphex(datal, ==, 0);
+
+    qpci_config_writel(ahci, PCI_BASE_ADDRESS_5, 0xFFFFFFFF);
+    datal = qpci_config_readl(ahci, PCI_BASE_ADDRESS_5);
+    /* ABAR must be 32-bit, memory mapped, non-prefetchable and
+     * must be >= 512 bytes. To that end, bits 0-8 must be off. */
+    ASSERT_BIT_CLEAR(datal, 0xFF);
+
+    /* Capability list MUST be present, */
+    datal = qpci_config_readl(ahci, PCI_CAPABILITY_LIST);
+    /* But these bits are reserved. */
+    ASSERT_BIT_CLEAR(datal, ~0xFF);
+    g_assert_cmphex(datal, !=, 0);
+
+    /* Check specification adherence for capability extenstions. */
+    data = qpci_config_readw(ahci, datal);
+
+    switch (ahci_fingerprint) {
+    case AHCI_INTEL_ICH9:
+        /* Intel ICH9 Family Datasheet 14.1.19 p.550 */
+        g_assert_cmphex((data & 0xFF), ==, PCI_CAP_ID_MSI);
+        break;
+    default:
+        /* AHCI 1.3, Section 2.1.14 -- CAP must point to PMCAP. */
+        g_assert_cmphex((data & 0xFF), ==, PCI_CAP_ID_PM);
+    }
+
+    ahci_test_pci_caps(ahci, data, (uint8_t)datal);
+
+    /* Reserved. */
+    datal = qpci_config_readl(ahci, PCI_CAPABILITY_LIST + 4);
+    g_assert_cmphex(datal, ==, 0);
+
+    /* IPIN might vary, but ILINE must be off. */
+    datab = qpci_config_readb(ahci, PCI_INTERRUPT_LINE);
+    g_assert_cmphex(datab, ==, 0);
+}
+
+/**
+ * Test PCI capabilities for AHCI specification adherence.
+ */
+static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header,
+                               uint8_t offset)
+{
+    uint8_t cid = header & 0xFF;
+    uint8_t next = header >> 8;
+
+    g_test_message("CID: %02x; next: %02x", cid, next);
+
+    switch (cid) {
+    case PCI_CAP_ID_PM:
+        ahci_test_pmcap(ahci, offset);
+        break;
+    case PCI_CAP_ID_MSI:
+        ahci_test_msicap(ahci, offset);
+        break;
+    case PCI_CAP_ID_SATA:
+        ahci_test_satacap(ahci, offset);
+        break;
+
+    default:
+        g_test_message("Unknown CAP 0x%02x", cid);
+    }
+
+    if (next) {
+        ahci_test_pci_caps(ahci, qpci_config_readw(ahci, next), next);
+    }
+}
+
+/**
+ * Test SATA PCI capabilitity for AHCI specification adherence.
+ */
+static void ahci_test_satacap(QPCIDevice *ahci, uint8_t offset)
+{
+    uint16_t dataw;
+    uint32_t datal;
+
+    g_test_message("Verifying SATACAP");
+
+    /* Assert that the SATACAP version is 1.0, And reserved bits are empty. */
+    dataw = qpci_config_readw(ahci, offset + 2);
+    g_assert_cmphex(dataw, ==, 0x10);
+
+    /* Grab the SATACR1 register. */
+    datal = qpci_config_readw(ahci, offset + 4);
+
+    switch (datal & 0x0F) {
+    case 0x04: /* BAR0 */
+    case 0x05: /* BAR1 */
+    case 0x06:
+    case 0x07:
+    case 0x08:
+    case 0x09: /* BAR5 */
+    case 0x0F: /* Immediately following SATACR1 in PCI config space. */
+        break;
+    default:
+        /* Invalid BARLOC for the Index Data Pair. */
+        g_assert_not_reached();
+    }
+
+    /* Reserved. */
+    g_assert_cmphex((datal >> 24), ==, 0x00);
+}
+
+/**
+ * Test MSI PCI capability for AHCI specification adherence.
+ */
+static void ahci_test_msicap(QPCIDevice *ahci, uint8_t offset)
+{
+    uint16_t dataw;
+    uint32_t datal;
+
+    g_test_message("Verifying MSICAP");
+
+    dataw = qpci_config_readw(ahci, offset + PCI_MSI_FLAGS);
+    ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_ENABLE);
+    ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_QSIZE);
+    ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_RESERVED);
+
+    datal = qpci_config_readl(ahci, offset + PCI_MSI_ADDRESS_LO);
+    g_assert_cmphex(datal, ==, 0);
+
+    if (dataw & PCI_MSI_FLAGS_64BIT) {
+        g_test_message("MSICAP is 64bit");
+        datal = qpci_config_readl(ahci, offset + PCI_MSI_ADDRESS_HI);
+        g_assert_cmphex(datal, ==, 0);
+        dataw = qpci_config_readw(ahci, offset + PCI_MSI_DATA_64);
+        g_assert_cmphex(dataw, ==, 0);
+    } else {
+        g_test_message("MSICAP is 32bit");
+        dataw = qpci_config_readw(ahci, offset + PCI_MSI_DATA_32);
+        g_assert_cmphex(dataw, ==, 0);
+    }
+}
+
+/**
+ * Test Power Management PCI capability for AHCI specification adherence.
+ */
+static void ahci_test_pmcap(QPCIDevice *ahci, uint8_t offset)
+{
+    uint16_t dataw;
+
+    g_test_message("Verifying PMCAP");
+
+    dataw = qpci_config_readw(ahci, offset + PCI_PM_PMC);
+    ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_PME_CLOCK);
+    ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_RESERVED);
+    ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_D1);
+    ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_D2);
+
+    dataw = qpci_config_readw(ahci, offset + PCI_PM_CTRL);
+    ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_STATE_MASK);
+    ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_RESERVED);
+    ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_DATA_SEL_MASK);
+    ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_DATA_SCALE_MASK);
+}
+
+static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base)
+{
+    HBACap hcap;
+    unsigned i;
+    uint32_t cap, cap2, reg;
+    uint32_t ports;
+    uint8_t nports_impl;
+    uint8_t maxports;
+
+    g_assert(ahci != 0);
+    g_assert(hba_base != 0);
+
+    /*
+     * Note that the AHCI spec does expect the BIOS to set up a few things:
+     * CAP.SSS    - Support for staggered spin-up            (t/f)
+     * CAP.SMPS   - Support for mechanical presence switches (t/f)
+     * PI         - Ports Implemented                        (1-32)
+     * PxCMD.HPCP - Hot Plug Capable Port
+     * PxCMD.MPSP - Mechanical Presence Switch Present
+     * PxCMD.CPD  - Cold Presence Detection support
+     *
+     * Additional items are touched if CAP.SSS is on, see AHCI 10.1.1 p.97:
+     * Foreach Port Implemented:
+     * -PxCMD.ST, PxCMD.CR, PxCMD.FRE, PxCMD.FR, PxSCTL.DET are 0
+     * -PxCLB/U and PxFB/U are set to valid regions in memory
+     * -PxSUD is set to 1.
+     * -PxSSTS.DET is polled for presence; if detected, we continue:
+     * -PxSERR is cleared with 1's.
+     * -If PxTFD.STS.BSY, PxTFD.STS.DRQ, and PxTFD.STS.ERR are all zero,
+     *  the device is ready.
+     */
+
+    /* 1 CAP - Capabilities Register */
+    cap = AHCI_RREG(AHCI_CAP);
+    ASSERT_BIT_CLEAR(cap, AHCI_CAP_RESERVED);
+
+    /* 2 GHC - Global Host Control */
+    reg = AHCI_RREG(AHCI_GHC);
+    ASSERT_BIT_CLEAR(reg, AHCI_GHC_HR);
+    ASSERT_BIT_CLEAR(reg, AHCI_GHC_IE);
+    ASSERT_BIT_CLEAR(reg, AHCI_GHC_MRSM);
+    if (BITSET(cap, AHCI_CAP_SAM)) {
+        g_test_message("Supports AHCI-Only Mode: GHC_AE is Read-Only.");
+        ASSERT_BIT_SET(reg, AHCI_GHC_AE);
+    } else {
+        g_test_message("Supports AHCI/Legacy mix.");
+        ASSERT_BIT_CLEAR(reg, AHCI_GHC_AE);
+    }
+
+    /* 3 IS - Interrupt Status */
+    reg = AHCI_RREG(AHCI_IS);
+    g_assert_cmphex(reg, ==, 0);
+
+    /* 4 PI - Ports Implemented */
+    ports = AHCI_RREG(AHCI_PI);
+    /* Ports Implemented must be non-zero. */
+    g_assert_cmphex(ports, !=, 0);
+    /* Ports Implemented must be <= Number of Ports. */
+    nports_impl = ctpopl(ports);
+    g_assert_cmpuint(((AHCI_CAP_NP & cap) + 1), >=, nports_impl);
+
+    g_assert_cmphex(barsize, >, 0);
+    /* Ports must be within the proper range. Given a mapping of SIZE,
+     * 256 bytes are used for global HBA control, and the rest is used
+     * for ports data, at 0x80 bytes each. */
+    maxports = (barsize - HBA_DATA_REGION_SIZE) / HBA_PORT_DATA_SIZE;
+    /* e.g, 30 ports for 4K of memory. (4096 - 256) / 128 = 30 */
+    g_assert_cmphex((reg >> maxports), ==, 0);
+
+    /* 5 AHCI Version */
+    reg = AHCI_RREG(AHCI_VS);
+    switch (reg) {
+    case AHCI_VERSION_0_95:
+    case AHCI_VERSION_1_0:
+    case AHCI_VERSION_1_1:
+    case AHCI_VERSION_1_2:
+    case AHCI_VERSION_1_3:
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    /* 6 Command Completion Coalescing Control: depends on CAP.CCCS. */
+    reg = AHCI_RREG(AHCI_CCCCTL);
+    if (BITSET(cap, AHCI_CAP_CCCS)) {
+        ASSERT_BIT_CLEAR(reg, AHCI_CCCCTL_EN);
+        ASSERT_BIT_CLEAR(reg, AHCI_CCCCTL_RESERVED);
+        ASSERT_BIT_SET(reg, AHCI_CCCCTL_CC);
+        ASSERT_BIT_SET(reg, AHCI_CCCCTL_TV);
+    } else {
+        g_assert_cmphex(reg, ==, 0);
+    }
+
+    /* 7 CCC_PORTS */
+    reg = AHCI_RREG(AHCI_CCCPORTS);
+    /* Must be zeroes initially regardless of CAP.CCCS */
+    g_assert_cmphex(reg, ==, 0);
+
+    /* 8 EM_LOC */
+    reg = AHCI_RREG(AHCI_EMLOC);
+    if (BITCLR(cap, AHCI_CAP_EMS)) {
+        g_assert_cmphex(reg, ==, 0);
+    }
+
+    /* 9 EM_CTL */
+    reg = AHCI_RREG(AHCI_EMCTL);
+    if (BITSET(cap, AHCI_CAP_EMS)) {
+        ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_STSMR);
+        ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_CTLTM);
+        ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_CTLRST);
+        ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_RESERVED);
+    } else {
+        g_assert_cmphex(reg, ==, 0);
+    }
+
+    /* 10 CAP2 -- Capabilities Extended */
+    cap2 = AHCI_RREG(AHCI_CAP2);
+    ASSERT_BIT_CLEAR(cap2, AHCI_CAP2_RESERVED);
+
+    /* 11 BOHC -- Bios/OS Handoff Control */
+    reg = AHCI_RREG(AHCI_BOHC);
+    g_assert_cmphex(reg, ==, 0);
+
+    /* 12 -- 23: Reserved */
+    g_test_message("Verifying HBA reserved area is empty.");
+    for (i = AHCI_RESERVED; i < AHCI_NVMHCI; ++i) {
+        reg = AHCI_RREG(i);
+        g_assert_cmphex(reg, ==, 0);
+    }
+
+    /* 24 -- 39: NVMHCI */
+    if (BITCLR(cap2, AHCI_CAP2_NVMP)) {
+        g_test_message("Verifying HBA/NVMHCI area is empty.");
+        for (i = AHCI_NVMHCI; i < AHCI_VENDOR; ++i) {
+            reg = AHCI_RREG(i);
+            g_assert_cmphex(reg, ==, 0);
+        }
+    }
+
+    /* 40 -- 63: Vendor */
+    g_test_message("Verifying HBA/Vendor area is empty.");
+    for (i = AHCI_VENDOR; i < AHCI_PORTS; ++i) {
+        reg = AHCI_RREG(i);
+        g_assert_cmphex(reg, ==, 0);
+    }
+
+    /* 64 -- XX: Port Space */
+    hcap.cap = cap;
+    hcap.cap2 = cap2;
+    for (i = 0; ports || (i < maxports); ports >>= 1, ++i) {
+        if (BITSET(ports, 0x1)) {
+            g_test_message("Testing port %u for spec", i);
+            ahci_test_port_spec(ahci, hba_base, &hcap, i);
+        } else {
+            uint16_t j;
+            uint16_t low = AHCI_PORTS + (32 * i);
+            uint16_t high = AHCI_PORTS + (32 * (i + 1));
+            g_test_message("Asserting unimplemented port %u "
+                           "(reg [%u-%u]) is empty.",
+                           i, low, high - 1);
+            for (j = low; j < high; ++j) {
+                reg = AHCI_RREG(j);
+                g_assert_cmphex(reg, ==, 0);
+            }
+        }
+    }
+}
+
+/**
+ * Test the memory space for one port for specification adherence.
+ */
+static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base,
+                                HBACap *hcap, uint8_t port)
+{
+    uint32_t reg;
+    unsigned i;
+
+    /* (0) CLB */
+    reg = PX_RREG(port, AHCI_PX_CLB);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CLB_RESERVED);
+
+    /* (1) CLBU */
+    if (BITCLR(hcap->cap, AHCI_CAP_S64A)) {
+        reg = PX_RREG(port, AHCI_PX_CLBU);
+        g_assert_cmphex(reg, ==, 0);
+    }
+
+    /* (2) FB */
+    reg = PX_RREG(port, AHCI_PX_FB);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_FB_RESERVED);
+
+    /* (3) FBU */
+    if (BITCLR(hcap->cap, AHCI_CAP_S64A)) {
+        reg = PX_RREG(port, AHCI_PX_FBU);
+        g_assert_cmphex(reg, ==, 0);
+    }
+
+    /* (4) IS */
+    reg = PX_RREG(port, AHCI_PX_IS);
+    g_assert_cmphex(reg, ==, 0);
+
+    /* (5) IE */
+    reg = PX_RREG(port, AHCI_PX_IE);
+    g_assert_cmphex(reg, ==, 0);
+
+    /* (6) CMD */
+    reg = PX_RREG(port, AHCI_PX_CMD);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FRE);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_RESERVED);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CCS);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FR);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CR);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_PMA); /* And RW only if CAP.SPM */
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_APSTE); /* RW only if CAP2.APST */
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ATAPI);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_DLAE);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ALPE);  /* RW only if CAP.SALP */
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ASP);   /* RW only if CAP.SALP */
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ICC);
+    /* If CPDetect support does not exist, CPState must be off. */
+    if (BITCLR(reg, AHCI_PX_CMD_CPD)) {
+        ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CPS);
+    }
+    /* If MPSPresence is not set, MPSState must be off. */
+    if (BITCLR(reg, AHCI_PX_CMD_MPSP)) {
+        ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSS);
+    }
+    /* If we do not support MPS, MPSS and MPSP must be off. */
+    if (BITCLR(hcap->cap, AHCI_CAP_SMPS)) {
+        ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSS);
+        ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSP);
+    }
+    /* If, via CPD or MPSP we detect a drive, HPCP must be on. */
+    if (BITANY(reg, AHCI_PX_CMD_CPD || AHCI_PX_CMD_MPSP)) {
+        ASSERT_BIT_SET(reg, AHCI_PX_CMD_HPCP);
+    }
+    /* HPCP and ESP cannot both be active. */
+    g_assert(!BITSET(reg, AHCI_PX_CMD_HPCP | AHCI_PX_CMD_ESP));
+    /* If CAP.FBSS is not set, FBSCP must not be set. */
+    if (BITCLR(hcap->cap, AHCI_CAP_FBSS)) {
+        ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FBSCP);
+    }
+
+    /* (7) RESERVED */
+    reg = PX_RREG(port, AHCI_PX_RES1);
+    g_assert_cmphex(reg, ==, 0);
+
+    /* (8) TFD */
+    reg = PX_RREG(port, AHCI_PX_TFD);
+    /* At boot, prior to an FIS being received, the TFD register should be 0x7F,
+     * which breaks down as follows, as seen in AHCI 1.3 sec 3.3.8, p. 27. */
+    ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_ERR);
+    ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_CS1);
+    ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_DRQ);
+    ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_CS2);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_BSY);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_RESERVED);
+
+    /* (9) SIG */
+    /* Though AHCI specifies the boot value should be 0xFFFFFFFF,
+     * Even when GHC.ST is zero, the AHCI HBA may receive the initial
+     * D2H register FIS and update the signature asynchronously,
+     * so we cannot expect a value here. AHCI 1.3, sec 3.3.9, pp 27-28 */
+
+    /* (10) SSTS / SCR0: SStatus */
+    reg = PX_RREG(port, AHCI_PX_SSTS);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_SSTS_RESERVED);
+    /* Even though the register should be 0 at boot, it is asynchronous and
+     * prone to change, so we cannot test any well known value. */
+
+    /* (11) SCTL / SCR2: SControl */
+    reg = PX_RREG(port, AHCI_PX_SCTL);
+    g_assert_cmphex(reg, ==, 0);
+
+    /* (12) SERR / SCR1: SError */
+    reg = PX_RREG(port, AHCI_PX_SERR);
+    g_assert_cmphex(reg, ==, 0);
+
+    /* (13) SACT / SCR3: SActive */
+    reg = PX_RREG(port, AHCI_PX_SACT);
+    g_assert_cmphex(reg, ==, 0);
+
+    /* (14) CI */
+    reg = PX_RREG(port, AHCI_PX_CI);
+    g_assert_cmphex(reg, ==, 0);
+
+    /* (15) SNTF */
+    reg = PX_RREG(port, AHCI_PX_SNTF);
+    g_assert_cmphex(reg, ==, 0);
+
+    /* (16) FBS */
+    reg = PX_RREG(port, AHCI_PX_FBS);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_EN);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DEC);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_SDE);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DEV);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DWE);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_RESERVED);
+    if (BITSET(hcap->cap, AHCI_CAP_FBSS)) {
+        /* if Port-Multiplier FIS-based switching avail, ADO must >= 2 */
+        g_assert((reg & AHCI_PX_FBS_ADO) >> ctzl(AHCI_PX_FBS_ADO) >= 2);
+    }
+
+    /* [17 -- 27] RESERVED */
+    for (i = AHCI_PX_RES2; i < AHCI_PX_VS; ++i) {
+        reg = PX_RREG(port, i);
+        g_assert_cmphex(reg, ==, 0);
+    }
+
+    /* [28 -- 31] Vendor-Specific */
+    for (i = AHCI_PX_VS; i < 32; ++i) {
+        reg = PX_RREG(port, i);
+        if (reg) {
+            g_test_message("INFO: Vendor register %u non-empty", i);
+        }
+    }
+}
+
+/**
+ * Utilizing an initialized AHCI HBA, issue an IDENTIFY command to the first
+ * device we see, then read and check the response.
+ */
+static void ahci_test_identify(QPCIDevice *ahci, void *hba_base)
+{
+    RegD2HFIS *d2h = g_malloc0(0x20);
+    RegD2HFIS *pio = g_malloc0(0x20);
+    RegH2DFIS fis;
+    AHCICommand cmd;
+    PRD prd;
+    uint32_t ports, reg, clb, table, fb, data_ptr;
+    uint16_t buff[256];
+    unsigned i;
+    int rc;
+
+    g_assert(ahci != NULL);
+    g_assert(hba_base != NULL);
+
+    /* We need to:
+     * (1) Create a Command Table Buffer and update the Command List Slot #0
+     *     to point to this buffer.
+     * (2) Construct an FIS host-to-device command structure, and write it to
+     *     the top of the command table buffer.
+     * (3) Create a data buffer for the IDENTIFY response to be sent to
+     * (4) Create a Physical Region Descriptor that points to the data buffer,
+     *     and write it to the bottom (offset 0x80) of the command table.
+     * (5) Now, PxCLB points to the command list, command 0 points to
+     *     our table, and our table contains an FIS instruction and a
+     *     PRD that points to our rx buffer.
+     * (6) We inform the HBA via PxCI that there is a command ready in slot #0.
+     */
+
+    /* Pick the first implemented and running port */
+    ports = AHCI_RREG(AHCI_PI);
+    for (i = 0; i < 32; ports >>= 1, ++i) {
+        if (ports == 0) {
+            i = 32;
+        }
+
+        if (!(ports & 0x01)) {
+            continue;
+        }
+
+        reg = PX_RREG(i, AHCI_PX_CMD);
+        if (BITSET(reg, AHCI_PX_CMD_ST)) {
+            break;
+        }
+    }
+    g_assert_cmphex(i, <, 32);
+    g_test_message("Selected port %u for test", i);
+
+    /* Clear out this port's interrupts (ignore the init register d2h fis) */
+    reg = PX_RREG(i, AHCI_PX_IS);
+    PX_WREG(i, AHCI_PX_IS, reg);
+    g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0);
+
+    /* Wipe the FIS-Recieve Buffer */
+    fb = PX_RREG(i, AHCI_PX_FB);
+    g_assert_cmphex(fb, !=, 0);
+    qmemset(fb, 0x00, 0x100);
+
+    /* Create a Command Table buffer. 0x80 is the smallest with a PRDTL of 0. */
+    /* We need at least one PRD, so round up to the nearest 0x80 multiple.    */
+    table = guest_alloc(guest_malloc, CMD_TBL_SIZ(1));
+    g_assert(table);
+    ASSERT_BIT_CLEAR(table, 0x7F);
+
+    /* Create a data buffer ... where we will dump the IDENTIFY data to. */
+    data_ptr = guest_alloc(guest_malloc, 512);
+    g_assert(data_ptr);
+
+    /* Grab the Command List Buffer pointer */
+    clb = PX_RREG(i, AHCI_PX_CLB);
+    g_assert(clb);
+
+    /* Copy the existing Command #0 structure from the CLB into local memory,
+     * and build a new command #0. */
+    memread(clb, &cmd, sizeof(cmd));
+    cmd.b1 = 5;    /* reg_h2d_fis is 5 double-words long */
+    cmd.b2 = 0x04; /* clear PxTFD.STS.BSY when done */
+    cmd.prdtl = cpu_to_le16(1); /* One PRD table entry. */
+    cmd.prdbc = 0;
+    cmd.ctba = cpu_to_le32(table);
+    cmd.ctbau = 0;
+
+    /* Construct our PRD, noting that DBC is 0-indexed. */
+    prd.dba = cpu_to_le32(data_ptr);
+    prd.dbau = 0;
+    prd.res = 0;
+    /* 511+1 bytes, request DPS interrupt */
+    prd.dbc = cpu_to_le32(511 | 0x80000000);
+
+    /* Construct our Command FIS, Based on http://wiki.osdev.org/AHCI */
+    memset(&fis, 0x00, sizeof(fis));
+    fis.fis_type = 0x27; /* Register Host-to-Device FIS */
+    fis.command = 0xEC;  /* IDENTIFY */
+    fis.device = 0;
+    fis.flags = 0x80;    /* Indicate this is a command FIS */
+
+    /* We've committed nothing yet, no interrupts should be posted yet. */
+    g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0);
+
+    /* Commit the Command FIS to the Command Table */
+    memwrite(table, &fis, sizeof(fis));
+
+    /* Commit the PRD entry to the Command Table */
+    memwrite(table + 0x80, &prd, sizeof(prd));
+
+    /* Commit Command #0, pointing to the Table, to the Command List Buffer. */
+    memwrite(clb, &cmd, sizeof(cmd));
+
+    /* Everything is in place, but we haven't given the go-ahead yet. */
+    g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0);
+
+    /* Issue Command #0 via PxCI */
+    PX_WREG(i, AHCI_PX_CI, (1 << 0));
+    while (BITSET(PX_RREG(i, AHCI_PX_TFD), AHCI_PX_TFD_STS_BSY)) {
+        usleep(50);
+    }
+
+    /* Check for expected interrupts */
+    reg = PX_RREG(i, AHCI_PX_IS);
+    ASSERT_BIT_SET(reg, AHCI_PX_IS_DHRS);
+    ASSERT_BIT_SET(reg, AHCI_PX_IS_PSS);
+    /* BUG: we expect AHCI_PX_IS_DPS to be set. */
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_IS_DPS);
+
+    /* Clear expected interrupts and assert all interrupts now cleared. */
+    PX_WREG(i, AHCI_PX_IS, AHCI_PX_IS_DHRS | AHCI_PX_IS_PSS | AHCI_PX_IS_DPS);
+    g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0);
+
+    /* Check for errors. */
+    reg = PX_RREG(i, AHCI_PX_SERR);
+    g_assert_cmphex(reg, ==, 0);
+    reg = PX_RREG(i, AHCI_PX_TFD);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_ERR);
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR);
+
+    /* Investigate CMD #0, assert that we read 512 bytes */
+    memread(clb, &cmd, sizeof(cmd));
+    g_assert_cmphex(512, ==, le32_to_cpu(cmd.prdbc));
+
+    /* Investigate FIS responses */
+    memread(fb + 0x20, pio, 0x20);
+    memread(fb + 0x40, d2h, 0x20);
+    g_assert_cmphex(pio->fis_type, ==, 0x5f);
+    g_assert_cmphex(d2h->fis_type, ==, 0x34);
+    g_assert_cmphex(pio->flags, ==, d2h->flags);
+    g_assert_cmphex(pio->status, ==, d2h->status);
+    g_assert_cmphex(pio->error, ==, d2h->error);
+
+    reg = PX_RREG(i, AHCI_PX_TFD);
+    g_assert_cmphex((reg & AHCI_PX_TFD_ERR), ==, pio->error);
+    g_assert_cmphex((reg & AHCI_PX_TFD_STS), ==, pio->status);
+    /* The PIO Setup FIS contains a "bytes read" field, which is a
+     * 16-bit value. The Physical Region Descriptor Byte Count is
+     * 32-bit, but for small transfers using one PRD, it should match. */
+    g_assert_cmphex(le16_to_cpu(pio->res4), ==, le32_to_cpu(cmd.prdbc));
+
+    /* Last, but not least: Investigate the IDENTIFY response data. */
+    memread(data_ptr, &buff, 512);
+
+    /* Check serial number/version in the buffer */
+    /* NB: IDENTIFY strings are packed in 16bit little endian chunks.
+     * Since we copy byte-for-byte in ahci-test, on both LE and BE, we need to
+     * unchunk this data. By contrast, ide-test copies 2 bytes at a time, and
+     * as a consequence, only needs to unchunk the data on LE machines. */
+    string_bswap16(&buff[10], 20);
+    rc = memcmp(&buff[10], "testdisk            ", 20);
+    g_assert_cmphex(rc, ==, 0);
+
+    string_bswap16(&buff[23], 8);
+    rc = memcmp(&buff[23], "version ", 8);
+    g_assert_cmphex(rc, ==, 0);
+
+    g_free(d2h);
+    g_free(pio);
+}
+
+/******************************************************************************/
+/* Test Interfaces                                                            */
+/******************************************************************************/
+
+/**
+ * Basic sanity test to boot a machine, find an AHCI device, and shutdown.
+ */
+static void test_sanity(void)
+{
+    QPCIDevice *ahci;
+    ahci = ahci_boot();
+    ahci_shutdown(ahci);
+}
+
+/**
+ * Ensure that the PCI configuration space for the AHCI device is in-line with
+ * the AHCI 1.3 specification for initial values.
+ */
+static void test_pci_spec(void)
+{
+    QPCIDevice *ahci;
+    ahci = ahci_boot();
+    ahci_test_pci_spec(ahci);
+    ahci_shutdown(ahci);
+}
+
+/**
+ * Engage the PCI AHCI device and sanity check the response.
+ * Perform additional PCI config space bringup for the HBA.
+ */
+static void test_pci_enable(void)
+{
+    QPCIDevice *ahci;
+    void *hba_base;
+    ahci = ahci_boot();
+    ahci_pci_enable(ahci, &hba_base);
+    ahci_shutdown(ahci);
+}
+
+/**
+ * Investigate the memory mapped regions of the HBA,
+ * and test them for AHCI specification adherence.
+ */
+static void test_hba_spec(void)
+{
+    QPCIDevice *ahci;
+    void *hba_base;
+
+    ahci = ahci_boot();
+    ahci_pci_enable(ahci, &hba_base);
+    ahci_test_hba_spec(ahci, hba_base);
+    ahci_shutdown(ahci);
+}
+
+/**
+ * Engage the HBA functionality of the AHCI PCI device,
+ * and bring it into a functional idle state.
+ */
+static void test_hba_enable(void)
+{
+    QPCIDevice *ahci;
+    void *hba_base;
+
+    ahci = ahci_boot();
+    ahci_pci_enable(ahci, &hba_base);
+    ahci_hba_enable(ahci, hba_base);
+    ahci_shutdown(ahci);
+}
+
+/**
+ * Bring up the device and issue an IDENTIFY command.
+ * Inspect the state of the HBA device and the data returned.
+ */
+static void test_identify(void)
+{
+    QPCIDevice *ahci;
+    void *hba_base;
+
+    ahci = ahci_boot();
+    ahci_pci_enable(ahci, &hba_base);
+    ahci_hba_enable(ahci, hba_base);
+    ahci_test_identify(ahci, hba_base);
+    ahci_shutdown(ahci);
+}
+
+/******************************************************************************/
+
+int main(int argc, char **argv)
+{
+    const char *arch;
+    int fd;
+    int ret;
+    int c;
+
+    static struct option long_options[] = {
+        {"pedantic", no_argument, 0, 'p' },
+        {0, 0, 0, 0},
+    };
+
+    /* Should be first to utilize g_test functionality, So we can see errors. */
+    g_test_init(&argc, &argv, NULL);
+
+    while (1) {
+        c = getopt_long(argc, argv, "", long_options, NULL);
+        if (c == -1) {
+            break;
+        }
+        switch (c) {
+        case -1:
+            break;
+        case 'p':
+            ahci_pedantic = 1;
+            break;
+        default:
+            fprintf(stderr, "Unrecognized ahci_test option.\n");
+            g_assert_not_reached();
+        }
+    }
+
+    /* Check architecture */
+    arch = qtest_get_arch();
+    if (strcmp(arch, "i386") && strcmp(arch, "x86_64")) {
+        g_test_message("Skipping test for non-x86");
+        return 0;
+    }
+
+    /* Create a temporary raw image */
+    fd = mkstemp(tmp_path);
+    g_assert(fd >= 0);
+    ret = ftruncate(fd, TEST_IMAGE_SIZE);
+    g_assert(ret == 0);
+    close(fd);
+
+    /* Run the tests */
+    qtest_add_func("/ahci/sanity",     test_sanity);
+    qtest_add_func("/ahci/pci_spec",   test_pci_spec);
+    qtest_add_func("/ahci/pci_enable", test_pci_enable);
+    qtest_add_func("/ahci/hba_spec",   test_hba_spec);
+    qtest_add_func("/ahci/hba_enable", test_hba_enable);
+    qtest_add_func("/ahci/identify",   test_identify);
+
+    ret = g_test_run();
+
+    /* Cleanup */
+    unlink(tmp_path);
+
+    return ret;
+}
diff --git a/tests/image-fuzzer/qcow2/fuzz.py b/tests/image-fuzzer/qcow2/fuzz.py
index 57527f9b4a..20eba6bc1b 100644
--- a/tests/image-fuzzer/qcow2/fuzz.py
+++ b/tests/image-fuzzer/qcow2/fuzz.py
@@ -18,8 +18,8 @@
 
 import random
 
-
 UINT8 = 0xff
+UINT16 = 0xffff
 UINT32 = 0xffffffff
 UINT64 = 0xffffffffffffffff
 # Most significant bit orders
@@ -28,6 +28,8 @@ UINT64_M = 63
 # Fuzz vectors
 UINT8_V = [0, 0x10, UINT8/4, UINT8/2 - 1, UINT8/2, UINT8/2 + 1, UINT8 - 1,
            UINT8]
+UINT16_V = [0, 0x100, 0x1000, UINT16/4, UINT16/2 - 1, UINT16/2, UINT16/2 + 1,
+            UINT16 - 1, UINT16]
 UINT32_V = [0, 0x100, 0x1000, 0x10000, 0x100000, UINT32/4, UINT32/2 - 1,
             UINT32/2, UINT32/2 + 1, UINT32 - 1, UINT32]
 UINT64_V = UINT32_V + [0x1000000, 0x10000000, 0x100000000, UINT64/4,
@@ -332,9 +334,8 @@ def l1_entry(current):
     constraints = UINT64_V
     # Reserved bits are ignored
     # Added a possibility when only flags are fuzzed
-    offset = 0x7fffffffffffffff & random.choice([selector(current,
-                                                          constraints),
-                                                 current])
+    offset = 0x7fffffffffffffff & \
+             random.choice([selector(current, constraints), current])
     is_cow = random.randint(0, 1)
     return offset + (is_cow << UINT64_M)
 
@@ -344,12 +345,23 @@ def l2_entry(current):
     constraints = UINT64_V
     # Reserved bits are ignored
     # Add a possibility when only flags are fuzzed
-    offset = 0x3ffffffffffffffe & random.choice([selector(current,
-                                                          constraints),
-                                                 current])
+    offset = 0x3ffffffffffffffe & \
+             random.choice([selector(current, constraints), current])
     is_compressed = random.randint(0, 1)
     is_cow = random.randint(0, 1)
     is_zero = random.randint(0, 1)
     value = offset + (is_cow << UINT64_M) + \
             (is_compressed << UINT64_M - 1) + is_zero
     return value
+
+
+def refcount_table_entry(current):
+    """Fuzz an entry of the refcount table."""
+    constraints = UINT64_V
+    return selector(current, constraints)
+
+
+def refcount_block_entry(current):
+    """Fuzz an entry of a refcount block."""
+    constraints = UINT16_V
+    return selector(current, constraints)
diff --git a/tests/image-fuzzer/qcow2/layout.py b/tests/image-fuzzer/qcow2/layout.py
index 730c771d3c..63e801f4e8 100644
--- a/tests/image-fuzzer/qcow2/layout.py
+++ b/tests/image-fuzzer/qcow2/layout.py
@@ -102,6 +102,8 @@ class Image(object):
         self.end_of_extension_area = FieldsList()
         self.l2_tables = FieldsList()
         self.l1_table = FieldsList()
+        self.refcount_table = FieldsList()
+        self.refcount_blocks = FieldsList()
         self.ext_offset = 0
         self.create_header(cluster_bits, backing_file_name)
         self.set_backing_file_name(backing_file_name)
@@ -113,7 +115,8 @@ class Image(object):
     def __iter__(self):
         return chain(self.header, self.backing_file_format,
                      self.feature_name_table, self.end_of_extension_area,
-                     self.backing_file_name, self.l1_table, self.l2_tables)
+                     self.backing_file_name, self.l1_table, self.l2_tables,
+                     self.refcount_table, self.refcount_blocks)
 
     def create_header(self, cluster_bits, backing_file_name=None):
         """Generate a random valid header."""
@@ -330,6 +333,138 @@ class Image(object):
                                                 float(self.cluster_size**2)))
         self.header['l1_table_offset'][0].value = l1_offset
 
+    def create_refcount_structures(self):
+        """Generate random refcount blocks and refcount table."""
+        def allocate_rfc_blocks(data, size):
+            """Return indices of clusters allocated for refcount blocks."""
+            cluster_ids = set()
+            diff = block_ids = set([x / size for x in data])
+            while len(diff) != 0:
+                # Allocate all yet not allocated clusters
+                new = self._get_available_clusters(data | cluster_ids,
+                                                   len(diff))
+                # Indices of new refcount blocks necessary to cover clusters
+                # in 'new'
+                diff = set([x / size for x in new]) - block_ids
+                cluster_ids |= new
+                block_ids |= diff
+            return cluster_ids, block_ids
+
+        def allocate_rfc_table(data, init_blocks, block_size):
+            """Return indices of clusters allocated for the refcount table
+            and updated indices of clusters allocated for blocks and indices
+            of blocks.
+            """
+            blocks = set(init_blocks)
+            clusters = set()
+            # Number of entries in one cluster of the refcount table
+            size = self.cluster_size / UINT64_S
+            # Number of clusters necessary for the refcount table based on
+            # the current number of refcount blocks
+            table_size = int(ceil((max(blocks) + 1) / float(size)))
+            # Index of the first cluster of the refcount table
+            table_start = self._get_adjacent_clusters(data, table_size + 1)
+            # Clusters allocated for the current length of the refcount table
+            table_clusters = set(range(table_start, table_start + table_size))
+            # Clusters allocated for the refcount table including
+            # last optional one for potential l1 growth
+            table_clusters_allocated = set(range(table_start, table_start +
+                                                 table_size + 1))
+            # New refcount blocks necessary for clusters occupied by the
+            # refcount table
+            diff = set([c / block_size for c in table_clusters]) - blocks
+            blocks |= diff
+            while len(diff) != 0:
+                # Allocate clusters for new refcount blocks
+                new = self._get_available_clusters((data | clusters) |
+                                                   table_clusters_allocated,
+                                                   len(diff))
+                # Indices of new refcount blocks necessary to cover
+                # clusters in 'new'
+                diff = set([x / block_size for x in new]) - blocks
+                clusters |= new
+                blocks |= diff
+                # Check if the refcount table needs one more cluster
+                if int(ceil((max(blocks) + 1) / float(size))) > table_size:
+                    new_block_id = (table_start + table_size) / block_size
+                    # Check if the additional table cluster needs
+                    # one more refcount block
+                    if new_block_id not in blocks:
+                        diff.add(new_block_id)
+                    table_clusters.add(table_start + table_size)
+                    table_size += 1
+            return table_clusters, blocks, clusters
+
+        def create_table_entry(table_offset, block_cluster, block_size,
+                               cluster):
+            """Generate a refcount table entry."""
+            offset = table_offset + UINT64_S * (cluster / block_size)
+            return ['>Q', offset, block_cluster * self.cluster_size,
+                    'refcount_table_entry']
+
+        def create_block_entry(block_cluster, block_size, cluster):
+            """Generate a list of entries for the current block."""
+            entry_size = self.cluster_size / block_size
+            offset = block_cluster * self.cluster_size
+            entry_offset = offset + entry_size * (cluster % block_size)
+            # While snapshots are not supported all refcounts are set to 1
+            return ['>H', entry_offset, 1, 'refcount_block_entry']
+        # Size of a block entry in bits
+        refcount_bits = 1 << self.header['refcount_order'][0].value
+        # Number of refcount entries per refcount block
+        # Convert self.cluster_size from bytes to bits to have the same
+        # base for the numerator and denominator
+        block_size = self.cluster_size * 8 / refcount_bits
+        meta_data = self._get_metadata()
+        if len(self.data_clusters) == 0:
+            # All metadata for an empty guest image needs 4 clusters:
+            # header, rfc table, rfc block, L1 table.
+            # Header takes cluster #0, other clusters ##1-3 can be used
+            block_clusters = set([random.choice(list(set(range(1, 4)) -
+                                                     meta_data))])
+            block_ids = set([0])
+            table_clusters = set([random.choice(list(set(range(1, 4)) -
+                                                     meta_data -
+                                                     block_clusters))])
+        else:
+            block_clusters, block_ids = \
+                                allocate_rfc_blocks(self.data_clusters |
+                                                    meta_data, block_size)
+            table_clusters, block_ids, new_clusters = \
+                                    allocate_rfc_table(self.data_clusters |
+                                                       meta_data |
+                                                       block_clusters,
+                                                       block_ids,
+                                                       block_size)
+            block_clusters |= new_clusters
+
+        meta_data |= block_clusters | table_clusters
+        table_offset = min(table_clusters) * self.cluster_size
+        block_id = None
+        # Clusters allocated for refcount blocks
+        block_clusters = list(block_clusters)
+        # Indices of refcount blocks
+        block_ids = list(block_ids)
+        # Refcount table entries
+        rfc_table = []
+        # Refcount entries
+        rfc_blocks = []
+
+        for cluster in sorted(self.data_clusters | meta_data):
+            if cluster / block_size != block_id:
+                block_id = cluster / block_size
+                block_cluster = block_clusters[block_ids.index(block_id)]
+                rfc_table.append(create_table_entry(table_offset,
+                                                    block_cluster,
+                                                    block_size, cluster))
+            rfc_blocks.append(create_block_entry(block_cluster, block_size,
+                                                 cluster))
+        self.refcount_table = FieldsList(rfc_table)
+        self.refcount_blocks = FieldsList(rfc_blocks)
+
+        self.header['refcount_table_offset'][0].value = table_offset
+        self.header['refcount_table_clusters'][0].value = len(table_clusters)
+
     def fuzz(self, fields_to_fuzz=None):
         """Fuzz an image by corrupting values of a random subset of its fields.
 
@@ -471,6 +606,7 @@ def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None,
     image.create_feature_name_table()
     image.set_end_of_extension_area()
     image.create_l_structures()
+    image.create_refcount_structures()
     image.fuzz(fields_to_fuzz)
     image.write(test_img_path)
     return image.image_size
diff --git a/tests/image-fuzzer/runner.py b/tests/image-fuzzer/runner.py
index c903c8a342..0a8743ef41 100755
--- a/tests/image-fuzzer/runner.py
+++ b/tests/image-fuzzer/runner.py
@@ -70,7 +70,7 @@ def run_app(fd, q_args):
         """Exception for signal.alarm events."""
         pass
 
-    def handler(*arg):
+    def handler(*args):
         """Notify that an alarm event occurred."""
         raise Alarm
 
@@ -134,8 +134,8 @@ class TestEnv(object):
         self.init_path = os.getcwd()
         self.work_dir = work_dir
         self.current_dir = os.path.join(work_dir, 'test-' + test_id)
-        self.qemu_img = os.environ.get('QEMU_IMG', 'qemu-img')\
-                                  .strip().split(' ')
+        self.qemu_img = \
+            os.environ.get('QEMU_IMG', 'qemu-img').strip().split(' ')
         self.qemu_io = os.environ.get('QEMU_IO', 'qemu-io').strip().split(' ')
         self.commands = [['qemu-img', 'check', '-f', 'qcow2', '$test_img'],
                          ['qemu-img', 'info', '-f', 'qcow2', '$test_img'],
@@ -150,8 +150,7 @@ class TestEnv(object):
                           'discard $off $len'],
                          ['qemu-io', '$test_img', '-c',
                           'truncate $off']]
-        for fmt in ['raw', 'vmdk', 'vdi', 'cow', 'qcow2', 'file',
-                    'qed', 'vpc']:
+        for fmt in ['raw', 'vmdk', 'vdi', 'qcow2', 'file', 'qed', 'vpc']:
             self.commands.append(
                 ['qemu-img', 'convert', '-f', 'qcow2', '-O', fmt,
                  '$test_img', 'converted_image.' + fmt])
@@ -178,7 +177,7 @@ class TestEnv(object):
         by 'qemu-img create'.
         """
         # All formats supported by the 'qemu-img create' command.
-        backing_file_fmt = random.choice(['raw', 'vmdk', 'vdi', 'cow', 'qcow2',
+        backing_file_fmt = random.choice(['raw', 'vmdk', 'vdi', 'qcow2',
                                           'file', 'qed', 'vpc'])
         backing_file_name = 'backing_img.' + backing_file_fmt
         backing_file_size = random.randint(MIN_BACKING_FILE_SIZE,
@@ -212,10 +211,8 @@ class TestEnv(object):
 
         os.chdir(self.current_dir)
         backing_file_name, backing_file_fmt = self._create_backing_file()
-        img_size = image_generator.create_image('test.img',
-                                                backing_file_name,
-                                                backing_file_fmt,
-                                                fuzz_config)
+        img_size = image_generator.create_image(
+            'test.img', backing_file_name, backing_file_fmt, fuzz_config)
         for item in commands:
             shutil.copy('test.img', 'copy.img')
             # 'off' and 'len' are multiple of the sector size
@@ -228,7 +225,7 @@ class TestEnv(object):
             elif item[0] == 'qemu-io':
                 current_cmd = list(self.qemu_io)
             else:
-                multilog("Warning: test command '%s' is not defined.\n" \
+                multilog("Warning: test command '%s' is not defined.\n"
                          % item[0], sys.stderr, self.log, self.parent_log)
                 continue
             # Replace all placeholders with their real values
@@ -244,29 +241,28 @@ class TestEnv(object):
                            "Backing file: %s\n" \
                            % (self.seed, " ".join(current_cmd),
                               self.current_dir, backing_file_name)
-
             temp_log = StringIO.StringIO()
             try:
                 retcode = run_app(temp_log, current_cmd)
             except OSError, e:
-                multilog(test_summary + "Error: Start of '%s' failed. " \
-                         "Reason: %s\n\n" % (os.path.basename(
-                             current_cmd[0]), e[1]),
+                multilog("%sError: Start of '%s' failed. Reason: %s\n\n"
+                         % (test_summary, os.path.basename(current_cmd[0]),
+                            e[1]),
                          sys.stderr, self.log, self.parent_log)
                 raise TestException
 
             if retcode < 0:
                 self.log.write(temp_log.getvalue())
-                multilog(test_summary + "FAIL: Test terminated by signal " +
-                         "%s\n\n" % str_signal(-retcode), sys.stderr, self.log,
-                         self.parent_log)
+                multilog("%sFAIL: Test terminated by signal %s\n\n"
+                         % (test_summary, str_signal(-retcode)),
+                         sys.stderr, self.log, self.parent_log)
                 self.failed = True
             else:
                 if self.log_all:
                     self.log.write(temp_log.getvalue())
-                    multilog(test_summary + "PASS: Application exited with" +
-                             " the code '%d'\n\n" % retcode, sys.stdout,
-                             self.log, self.parent_log)
+                    multilog("%sPASS: Application exited with the code " \
+                             "'%d'\n\n" % (test_summary, retcode),
+                             sys.stdout, self.log, self.parent_log)
             temp_log.close()
             os.remove('copy.img')
 
@@ -286,8 +282,9 @@ if __name__ == '__main__':
 
         Set up test environment in TEST_DIR and run a test in it. A module for
         test image generation should be specified via IMG_GENERATOR.
+
         Example:
-        runner.py -c '[["qemu-img", "info", "$test_img"]]' /tmp/test qcow2
+          runner.py -c '[["qemu-img", "info", "$test_img"]]' /tmp/test qcow2
 
         Optional arguments:
           -h, --help                    display this help and exit
@@ -305,20 +302,22 @@ if __name__ == '__main__':
 
         '--command' accepts a JSON array of commands. Each command presents
         an application under test with all its paramaters as a list of strings,
-        e.g.
-          ["qemu-io", "$test_img", "-c", "write $off $len"]
+        e.g. ["qemu-io", "$test_img", "-c", "write $off $len"].
 
         Supported application aliases: 'qemu-img' and 'qemu-io'.
+
         Supported argument aliases: $test_img for the fuzzed image, $off
         for an offset, $len for length.
 
         Values for $off and $len will be generated based on the virtual disk
-        size of the fuzzed image
+        size of the fuzzed image.
+
         Paths to 'qemu-img' and 'qemu-io' are retrevied from 'QEMU_IMG' and
-        'QEMU_IO' environment variables
+        'QEMU_IO' environment variables.
 
         '--config' accepts a JSON array of fields to be fuzzed, e.g.
-          '[["header"], ["header", "version"]]'
+        '[["header"], ["header", "version"]]'.
+
         Each of the list elements can consist of a complex image element only
         as ["header"] or ["feature_name_table"] or an exact field as
         ["header", "version"]. In the first case random portion of the element
@@ -368,7 +367,6 @@ if __name__ == '__main__':
     seed = None
     config = None
     duration = None
-
     for opt, arg in opts:
         if opt in ('-h', '--help'):
             usage()
diff --git a/tests/libqos/pci.c b/tests/libqos/pci.c
index d5ce683d77..4e630c250a 100644
--- a/tests/libqos/pci.c
+++ b/tests/libqos/pci.c
@@ -71,6 +71,12 @@ void qpci_device_enable(QPCIDevice *dev)
     cmd = qpci_config_readw(dev, PCI_COMMAND);
     cmd |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
     qpci_config_writew(dev, PCI_COMMAND, cmd);
+
+    /* Verify the bits are now set. */
+    cmd = qpci_config_readw(dev, PCI_COMMAND);
+    g_assert_cmphex(cmd & PCI_COMMAND_IO, ==, PCI_COMMAND_IO);
+    g_assert_cmphex(cmd & PCI_COMMAND_MEMORY, ==, PCI_COMMAND_MEMORY);
+    g_assert_cmphex(cmd & PCI_COMMAND_MASTER, ==, PCI_COMMAND_MASTER);
 }
 
 uint8_t qpci_find_capability(QPCIDevice *dev, uint8_t id)
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 830386fdaa..2355567951 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -169,9 +169,61 @@ echo "=== Testing unallocated image header ==="
 echo
 _make_test_img 64M
 # Create L1/L2
-$QEMU_IO -c "$OPEN_RW" -c "write 0 64k" | _filter_qemu_io
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
-$QEMU_IO -c "$OPEN_RW" -c "write 64k 64k" | _filter_qemu_io
+$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Testing unaligned L1 entry ==="
+echo
+_make_test_img 64M
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
+# aligned or not does not matter
+poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
+$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Testing unaligned L2 entry ==="
+echo
+_make_test_img 64M
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
+$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Testing unaligned reftable entry ==="
+echo
+_make_test_img 64M
+poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Testing non-fatal corruption on freeing ==="
+echo
+_make_test_img 64M
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
+$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Testing read-only corruption report ==="
+echo
+_make_test_img 64M
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
+# Should only emit a single error message
+$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
+
+echo
+echo "=== Testing non-fatal and then fatal corruption report ==="
+echo
+_make_test_img 64M
+$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
+poke_file "$TEST_IMG" "$l2_offset"        "\x80\x00\x00\x00\x00\x05\x2a\x00"
+poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
+# Should emit two error messages
+$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
 
 # success, all done
 echo "*** done"
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index c27c952412..4f0c6d0c8e 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -8,7 +8,7 @@ ERROR cluster 3 refcount=1 reference=3
 1 errors were found on the image.
 Data may be corrupted, or further writes to the image may corrupt it.
 incompatible_features     0x0
-qcow2: Preventing invalid write on metadata (overlaps with active L1 table); image marked as corrupt.
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with active L1 table); further corruption events will be suppressed
 write failed: Input/output error
 incompatible_features     0x2
 qemu-io: can't open device TEST_DIR/t.IMGFMT: IMGFMT: Image is corrupt; cannot be opened read/write
@@ -24,7 +24,7 @@ ERROR cluster 2 refcount=1 reference=2
 2 errors were found on the image.
 Data may be corrupted, or further writes to the image may corrupt it.
 incompatible_features     0x0
-qcow2: Preventing invalid write on metadata (overlaps with refcount block); image marked as corrupt.
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with refcount block); further corruption events will be suppressed
 write failed: Input/output error
 incompatible_features     0x2
 Repairing refcount block 0 refcount=2
@@ -56,7 +56,7 @@ Data may be corrupted, or further writes to the image may corrupt it.
 1 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 incompatible_features     0x0
-qcow2: Preventing invalid write on metadata (overlaps with inactive L2 table); image marked as corrupt.
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with inactive L2 table); further corruption events will be suppressed
 write failed: Input/output error
 incompatible_features     0x2
 Repairing cluster 4 refcount=1 reference=2
@@ -88,7 +88,7 @@ wrote 65536/65536 bytes at offset 536870912
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 discard 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qcow2: Preventing invalid write on metadata (overlaps with active L2 table); image marked as corrupt.
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with active L2 table); further corruption events will be suppressed
 blkdebug: Suspended request '0'
 write failed: Input/output error
 blkdebug: Resuming request '0'
@@ -99,6 +99,57 @@ aio_write failed: No medium found
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
 wrote 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qcow2: Preventing invalid write on metadata (overlaps with qcow2_header); image marked as corrupt.
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with qcow2_header); further corruption events will be suppressed
 write failed: Input/output error
+
+=== Testing unaligned L1 entry ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Marking image as corrupt: L2 table offset 0x42a00 unaligned (L1 index: 0); further corruption events will be suppressed
+read failed: Input/output error
+
+=== Testing unaligned L2 entry ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Marking image as corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed
+read failed: Input/output error
+
+=== Testing unaligned reftable entry ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+qcow2: Marking image as corrupt: Refblock offset 0x22a00 unaligned (reftable index: 0); further corruption events will be suppressed
+write failed: Input/output error
+
+=== Testing non-fatal corruption on freeing ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Image is corrupt: Cannot free unaligned cluster 0x52a00; further non-fatal corruption events will be suppressed
+discard 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Testing read-only corruption report ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Image is corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further non-fatal corruption events will be suppressed
+read failed: Input/output error
+read failed: Input/output error
+
+=== Testing non-fatal and then fatal corruption report ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Image is corrupt: Cannot free unaligned cluster 0x52a00; further non-fatal corruption events will be suppressed
+qcow2: Marking image as corrupt: Data cluster offset 0x62a00 unaligned (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed
+discard 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read failed: Input/output error
 *** done
diff --git a/tests/qemu-iotests/069 b/tests/qemu-iotests/069
index e661598c4a..ce9e0541b2 100755
--- a/tests/qemu-iotests/069
+++ b/tests/qemu-iotests/069
@@ -38,7 +38,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.rc
 . ./common.filter
 
-_supported_fmt cow qed qcow qcow2 vmdk
+_supported_fmt qed qcow qcow2 vmdk
 _supported_proto file
 _supported_os Linux
 _unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat"
diff --git a/tests/qemu-iotests/072 b/tests/qemu-iotests/072
index 58faa8b5a7..e4a723d733 100755
--- a/tests/qemu-iotests/072
+++ b/tests/qemu-iotests/072
@@ -38,7 +38,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.rc
 . ./common.filter
 
-_supported_fmt vpc vmdk vhdx vdi qed qcow2 qcow cow
+_supported_fmt vpc vmdk vhdx vdi qed qcow2 qcow
 _supported_proto file
 _supported_os Linux
 
diff --git a/tests/qemu-iotests/099 b/tests/qemu-iotests/099
index a26d3d243e..ffc7ea795a 100755
--- a/tests/qemu-iotests/099
+++ b/tests/qemu-iotests/099
@@ -41,7 +41,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # Basically all formats, but "raw" has issues with _filter_imgfmt regarding the
 # raw comparison image for blkverify; also, all images have to support creation
-_supported_fmt cow qcow qcow2 qed vdi vhdx vmdk vpc
+_supported_fmt qcow qcow2 qed vdi vhdx vmdk vpc
 _supported_proto file
 _supported_os Linux
 
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 70df659d5b..89c6dde263 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -136,7 +136,6 @@ common options
 check options
     -raw                test raw (default)
     -bochs              test bochs
-    -cow                test cow
     -cloop              test cloop
     -parallels          test parallels
     -qcow               test qcow
@@ -182,11 +181,6 @@ testlist options
             xpand=false
             ;;
 
-        -cow)
-            IMGFMT=cow
-            xpand=false
-            ;;
-
         -cloop)
             IMGFMT=cloop
             IMGFMT_GENERIC=false
diff --git a/tests/test-aio.c b/tests/test-aio.c
index c6a8713e83..a7cb5c9915 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -14,6 +14,7 @@
 #include "block/aio.h"
 #include "qemu/timer.h"
 #include "qemu/sockets.h"
+#include "qemu/error-report.h"
 
 static AioContext *ctx;
 
@@ -810,11 +811,18 @@ static void test_source_timer_schedule(void)
 
 int main(int argc, char **argv)
 {
+    Error *local_error = NULL;
     GSource *src;
 
     init_clocks();
 
-    ctx = aio_context_new();
+    ctx = aio_context_new(&local_error);
+    if (!ctx) {
+        error_report("Failed to create AIO Context: '%s'",
+                     error_get_pretty(local_error));
+        error_free(local_error);
+        exit(1);
+    }
     src = aio_get_g_source(ctx);
     g_source_attach(src, NULL);
     g_source_unref(src);
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
index f40b7fc170..8c4d68b345 100644
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@@ -4,6 +4,7 @@
 #include "block/thread-pool.h"
 #include "block/block.h"
 #include "qemu/timer.h"
+#include "qemu/error-report.h"
 
 static AioContext *ctx;
 static ThreadPool *pool;
@@ -33,7 +34,7 @@ static int long_cb(void *opaque)
 static void done_cb(void *opaque, int ret)
 {
     WorkerTestData *data = opaque;
-    g_assert_cmpint(data->ret, ==, -EINPROGRESS);
+    g_assert(data->ret == -EINPROGRESS || data->ret == -ECANCELED);
     data->ret = ret;
     data->aiocb = NULL;
 
@@ -132,7 +133,7 @@ static void test_submit_many(void)
     }
 }
 
-static void test_cancel(void)
+static void do_test_cancel(bool sync)
 {
     WorkerTestData data[100];
     int num_canceled;
@@ -170,18 +171,25 @@ static void test_cancel(void)
     for (i = 0; i < 100; i++) {
         if (atomic_cmpxchg(&data[i].n, 0, 3) == 0) {
             data[i].ret = -ECANCELED;
-            bdrv_aio_cancel(data[i].aiocb);
-            active--;
+            if (sync) {
+                bdrv_aio_cancel(data[i].aiocb);
+            } else {
+                bdrv_aio_cancel_async(data[i].aiocb);
+            }
             num_canceled++;
         }
     }
     g_assert_cmpint(active, >, 0);
     g_assert_cmpint(num_canceled, <, 100);
 
-    /* Canceling the others will be a blocking operation.  */
     for (i = 0; i < 100; i++) {
         if (data[i].aiocb && data[i].n != 3) {
-            bdrv_aio_cancel(data[i].aiocb);
+            if (sync) {
+                /* Canceling the others will be a blocking operation.  */
+                bdrv_aio_cancel(data[i].aiocb);
+            } else {
+                bdrv_aio_cancel_async(data[i].aiocb);
+            }
         }
     }
 
@@ -193,22 +201,39 @@ static void test_cancel(void)
     for (i = 0; i < 100; i++) {
         if (data[i].n == 3) {
             g_assert_cmpint(data[i].ret, ==, -ECANCELED);
-            g_assert(data[i].aiocb != NULL);
+            g_assert(data[i].aiocb == NULL);
         } else {
             g_assert_cmpint(data[i].n, ==, 2);
-            g_assert_cmpint(data[i].ret, ==, 0);
+            g_assert(data[i].ret == 0 || data[i].ret == -ECANCELED);
             g_assert(data[i].aiocb == NULL);
         }
     }
 }
 
+static void test_cancel(void)
+{
+    do_test_cancel(true);
+}
+
+static void test_cancel_async(void)
+{
+    do_test_cancel(false);
+}
+
 int main(int argc, char **argv)
 {
     int ret;
+    Error *local_error = NULL;
 
     init_clocks();
 
-    ctx = aio_context_new();
+    ctx = aio_context_new(&local_error);
+    if (!ctx) {
+        error_report("Failed to create AIO Context: '%s'",
+                     error_get_pretty(local_error));
+        error_free(local_error);
+        exit(1);
+    }
     pool = aio_get_thread_pool(ctx);
 
     g_test_init(&argc, &argv, NULL);
@@ -217,6 +242,7 @@ int main(int argc, char **argv)
     g_test_add_func("/thread-pool/submit-co", test_submit_co);
     g_test_add_func("/thread-pool/submit-many", test_submit_many);
     g_test_add_func("/thread-pool/cancel", test_cancel);
+    g_test_add_func("/thread-pool/cancel-async", test_cancel_async);
 
     ret = g_test_run();
 
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 000ae31af9..d8ba415e43 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -14,6 +14,7 @@
 #include <math.h>
 #include "block/aio.h"
 #include "qemu/throttle.h"
+#include "qemu/error-report.h"
 
 static AioContext     *ctx;
 static LeakyBucket    bkt;
@@ -492,10 +493,17 @@ static void test_accounting(void)
 int main(int argc, char **argv)
 {
     GSource *src;
+    Error *local_error = NULL;
 
     init_clocks();
 
-    ctx = aio_context_new();
+    ctx = aio_context_new(&local_error);
+    if (!ctx) {
+        error_report("Failed to create AIO Context: '%s'",
+                     error_get_pretty(local_error));
+        error_free(local_error);
+        exit(1);
+    }
     src = aio_get_g_source(ctx);
     g_source_attach(src, NULL);
     g_source_unref(src);
diff --git a/thread-pool.c b/thread-pool.c
index bc07d7a1c9..86fa4a8f89 100644
--- a/thread-pool.c
+++ b/thread-pool.c
@@ -31,7 +31,6 @@ enum ThreadState {
     THREAD_QUEUED,
     THREAD_ACTIVE,
     THREAD_DONE,
-    THREAD_CANCELED,
 };
 
 struct ThreadPoolElement {
@@ -58,7 +57,6 @@ struct ThreadPool {
     AioContext *ctx;
     QEMUBH *completion_bh;
     QemuMutex lock;
-    QemuCond check_cancel;
     QemuCond worker_stopped;
     QemuSemaphore sem;
     int max_threads;
@@ -73,7 +71,6 @@ struct ThreadPool {
     int idle_threads;
     int new_threads;     /* backlog of threads we need to create */
     int pending_threads; /* threads created but not running yet */
-    int pending_cancellations; /* whether we need a cond_broadcast */
     bool stopping;
 };
 
@@ -113,9 +110,6 @@ static void *worker_thread(void *opaque)
         req->state = THREAD_DONE;
 
         qemu_mutex_lock(&pool->lock);
-        if (pool->pending_cancellations) {
-            qemu_cond_broadcast(&pool->check_cancel);
-        }
 
         qemu_bh_schedule(pool->completion_bh);
     }
@@ -173,7 +167,7 @@ static void thread_pool_completion_bh(void *opaque)
 
 restart:
     QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
-        if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
+        if (elem->state != THREAD_DONE) {
             continue;
         }
         if (elem->state == THREAD_DONE) {
@@ -191,12 +185,12 @@ restart:
             qemu_bh_schedule(pool->completion_bh);
 
             elem->common.cb(elem->common.opaque, elem->ret);
-            qemu_aio_release(elem);
+            qemu_aio_unref(elem);
             goto restart;
         } else {
             /* remove the request */
             QLIST_REMOVE(elem, all);
-            qemu_aio_release(elem);
+            qemu_aio_unref(elem);
         }
     }
 }
@@ -217,22 +211,26 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb)
          */
         qemu_sem_timedwait(&pool->sem, 0) == 0) {
         QTAILQ_REMOVE(&pool->request_list, elem, reqs);
-        elem->state = THREAD_CANCELED;
         qemu_bh_schedule(pool->completion_bh);
-    } else {
-        pool->pending_cancellations++;
-        while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
-            qemu_cond_wait(&pool->check_cancel, &pool->lock);
-        }
-        pool->pending_cancellations--;
+
+        elem->state = THREAD_DONE;
+        elem->ret = -ECANCELED;
     }
+
     qemu_mutex_unlock(&pool->lock);
-    thread_pool_completion_bh(pool);
+}
+
+static AioContext *thread_pool_get_aio_context(BlockDriverAIOCB *acb)
+{
+    ThreadPoolElement *elem = (ThreadPoolElement *)acb;
+    ThreadPool *pool = elem->pool;
+    return pool->ctx;
 }
 
 static const AIOCBInfo thread_pool_aiocb_info = {
     .aiocb_size         = sizeof(ThreadPoolElement),
-    .cancel             = thread_pool_cancel,
+    .cancel_async       = thread_pool_cancel,
+    .get_aio_context    = thread_pool_get_aio_context,
 };
 
 BlockDriverAIOCB *thread_pool_submit_aio(ThreadPool *pool,
@@ -299,7 +297,6 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
     pool->ctx = ctx;
     pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
     qemu_mutex_init(&pool->lock);
-    qemu_cond_init(&pool->check_cancel);
     qemu_cond_init(&pool->worker_stopped);
     qemu_sem_init(&pool->sem, 0);
     pool->max_threads = 64;
@@ -342,7 +339,6 @@ void thread_pool_free(ThreadPool *pool)
 
     qemu_bh_delete(pool->completion_bh);
     qemu_sem_destroy(&pool->sem);
-    qemu_cond_destroy(&pool->check_cancel);
     qemu_cond_destroy(&pool->worker_stopped);
     qemu_mutex_destroy(&pool->lock);
     g_free(pool);
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 3147178118..d37c29b4a4 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -857,7 +857,7 @@ QemuCocoaView *cocoaView;
         [op setPrompt:@"Boot image"];
         [op setMessage:@"Select the disk image you want to boot.\n\nHit the \"Cancel\" button to quit"];
         NSArray *filetypes = [NSArray arrayWithObjects:@"img", @"iso", @"dmg",
-                                 @"qcow", @"qcow2", @"cow", @"cloop", @"vmdk", nil];
+                                 @"qcow", @"qcow2", @"cloop", @"vmdk", nil];
 #if (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
         [op setAllowedFileTypes:filetypes];
         [op beginSheetModalForWindow:normalWindow
diff --git a/vl.c b/vl.c
index dc792fe08b..dbdca594b7 100644
--- a/vl.c
+++ b/vl.c
@@ -2968,6 +2968,7 @@ int main(int argc, char **argv, char **envp)
     ram_addr_t maxram_size = default_ram_size;
     uint64_t ram_slots = 0;
     FILE *vmstate_dump_file = NULL;
+    Error *main_loop_err = NULL;
 
     atexit(qemu_run_exit_notifiers);
     error_set_progname(argv[0]);
@@ -3358,34 +3359,34 @@ int main(int argc, char **argv, char **envp)
 
                     sz = qemu_opt_get_size(opts, "maxmem", 0);
                     if (sz < ram_size) {
-                        fprintf(stderr, "qemu: invalid -m option value: maxmem "
-                                "(%" PRIu64 ") <= initial memory ("
-                                RAM_ADDR_FMT ")\n", sz, ram_size);
+                        error_report("invalid -m option value: maxmem "
+                                "(0x%" PRIx64 ") <= initial memory (0x"
+                                RAM_ADDR_FMT ")", sz, ram_size);
                         exit(EXIT_FAILURE);
                     }
 
                     slots = qemu_opt_get_number(opts, "slots", 0);
                     if ((sz > ram_size) && !slots) {
-                        fprintf(stderr, "qemu: invalid -m option value: maxmem "
-                                "(%" PRIu64 ") more than initial memory ("
+                        error_report("invalid -m option value: maxmem "
+                                "(0x%" PRIx64 ") more than initial memory (0x"
                                 RAM_ADDR_FMT ") but no hotplug slots where "
-                                "specified\n", sz, ram_size);
+                                "specified", sz, ram_size);
                         exit(EXIT_FAILURE);
                     }
 
                     if ((sz <= ram_size) && slots) {
-                        fprintf(stderr, "qemu: invalid -m option value:  %"
+                        error_report("invalid -m option value:  %"
                                 PRIu64 " hotplug slots where specified but "
-                                "maxmem (%" PRIu64 ") <= initial memory ("
-                                RAM_ADDR_FMT ")\n", slots, sz, ram_size);
+                                "maxmem (0x%" PRIx64 ") <= initial memory (0x"
+                                RAM_ADDR_FMT ")", slots, sz, ram_size);
                         exit(EXIT_FAILURE);
                     }
                     maxram_size = sz;
                     ram_slots = slots;
                 } else if ((!maxmem_str && slots_str) ||
                            (maxmem_str && !slots_str)) {
-                    fprintf(stderr, "qemu: invalid -m option value: missing "
-                            "'%s' option\n", slots_str ? "maxmem" : "slots");
+                    error_report("invalid -m option value: missing "
+                            "'%s' option", slots_str ? "maxmem" : "slots");
                     exit(EXIT_FAILURE);
                 }
                 break;
@@ -3998,8 +3999,8 @@ int main(int argc, char **argv, char **envp)
 
     os_daemonize();
 
-    if (qemu_init_main_loop()) {
-        fprintf(stderr, "qemu_init_main_loop failed\n");
+    if (qemu_init_main_loop(&main_loop_err)) {
+        error_report("%s", error_get_pretty(main_loop_err));
         exit(1);
     }
 
diff --git a/xen-hvm.c b/xen-hvm.c
index 38059f34ba..05e522cd73 100644
--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -979,6 +979,7 @@ static void xen_wakeup_notifier(Notifier *notifier, void *data)
     xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0);
 }
 
+/* return 0 means OK, or -1 means critical issue -- will exit(1) */
 int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
                  MemoryRegion **ram_memory)
 {
@@ -992,15 +993,13 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
     state->xce_handle = xen_xc_evtchn_open(NULL, 0);
     if (state->xce_handle == XC_HANDLER_INITIAL_VALUE) {
         perror("xen: event channel open");
-        g_free(state);
-        return -errno;
+        return -1;
     }
 
     state->xenstore = xs_daemon_open();
     if (state->xenstore == NULL) {
         perror("xen: xenstore open");
-        g_free(state);
-        return -errno;
+        return -1;
     }
 
     state->exit.notify = xen_exit_notifier;
@@ -1070,7 +1069,7 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
     /* Initialize backend core & drivers */
     if (xen_be_init() != 0) {
         fprintf(stderr, "%s: xen backend core setup failed\n", __FUNCTION__);
-        exit(1);
+        return -1;
     }
     xen_be_register("console", &xen_console_ops);
     xen_be_register("vkbd", &xen_kbdmouse_ops);