234 files changed, 8142 insertions, 2275 deletions
diff --git a/Makefile b/Makefile
index cbf252d547..fc1f8bb934 100644
--- a/Makefile
+++ b/Makefile
@@ -151,6 +151,8 @@ dummy := $(call unnest-vars,, \
                 stub-obj-y \
                 util-obj-y \
                 qga-obj-y \
+                ivshmem-client-obj-y \
+                ivshmem-server-obj-y \
                 qga-vss-dll-obj-y \
                 block-obj-y \
                 block-obj-m \
@@ -323,6 +325,11 @@ ifneq ($(EXESUF),)
 qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
 endif
 
+ivshmem-client$(EXESUF): $(ivshmem-client-obj-y)
+	$(call LINK, $^)
+ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
+	$(call LINK, $^)
+
 clean:
 # avoid old build problems by removing potentially incorrect old files
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
diff --git a/Makefile.objs b/Makefile.objs
index ecfe03c195..fe02ee2cf4 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -104,3 +104,8 @@ target-obj-y += trace/
 # by libqemuutil.a.  These should be moved to a separate .json schema.
 qga-obj-y = qga/
 qga-vss-dll-obj-y = qga/
+
+######################################################################
+# contrib
+ivshmem-client-obj-y = contrib/ivshmem-client/
+ivshmem-server-obj-y = contrib/ivshmem-server/
diff --git a/aio-posix.c b/aio-posix.c
index d4770336c5..0467f23a63 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -25,6 +25,7 @@ struct AioHandler
     IOHandler *io_write;
     int deleted;
     void *opaque;
+    bool is_external;
     QLIST_ENTRY(AioHandler) node;
 };
 
@@ -43,6 +44,7 @@ static AioHandler *find_aio_handler(AioContext *ctx, int fd)
 
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
+                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque)
@@ -82,6 +84,7 @@ void aio_set_fd_handler(AioContext *ctx,
         node->io_read = io_read;
         node->io_write = io_write;
         node->opaque = opaque;
+        node->is_external = is_external;
 
         node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
         node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
@@ -92,10 +95,11 @@ void aio_set_fd_handler(AioContext *ctx,
 
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *notifier,
+                            bool is_external,
                             EventNotifierHandler *io_read)
 {
     aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-                       (IOHandler *)io_read, NULL, notifier);
+                       is_external, (IOHandler *)io_read, NULL, notifier);
 }
 
 bool aio_prepare(AioContext *ctx)
@@ -257,7 +261,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     /* fill pollfds */
     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->pfd.events) {
+        if (!node->deleted && node->pfd.events
+            && aio_node_check(ctx, node->is_external)) {
             add_pollfd(node);
         }
     }
diff --git a/aio-win32.c b/aio-win32.c
index 50a6867458..43c4c79a75 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -28,11 +28,13 @@ struct AioHandler {
     GPollFD pfd;
     int deleted;
     void *opaque;
+    bool is_external;
     QLIST_ENTRY(AioHandler) node;
 };
 
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
+                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque)
@@ -86,6 +88,7 @@ void aio_set_fd_handler(AioContext *ctx,
         node->opaque = opaque;
         node->io_read = io_read;
         node->io_write = io_write;
+        node->is_external = is_external;
 
         event = event_notifier_get_handle(&ctx->notifier);
         WSAEventSelect(node->pfd.fd, event,
@@ -98,6 +101,7 @@ void aio_set_fd_handler(AioContext *ctx,
 
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *e,
+                            bool is_external,
                             EventNotifierHandler *io_notify)
 {
     AioHandler *node;
@@ -133,6 +137,7 @@ void aio_set_event_notifier(AioContext *ctx,
             node->e = e;
             node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
             node->pfd.events = G_IO_IN;
+            node->is_external = is_external;
             QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
 
             g_source_add_poll(&ctx->source, &node->pfd);
@@ -304,7 +309,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
     /* fill fd sets */
     count = 0;
     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->io_notify) {
+        if (!node->deleted && node->io_notify
+            && aio_node_check(ctx, node->is_external)) {
             events[count++] = event_notifier_get_handle(node->e);
         }
     }
diff --git a/async.c b/async.c
index efce14b63a..bdc64a3da9 100644
--- a/async.c
+++ b/async.c
@@ -247,7 +247,7 @@ aio_ctx_finalize(GSource     *source)
     }
     qemu_mutex_unlock(&ctx->bh_lock);
 
-    aio_set_event_notifier(ctx, &ctx->notifier, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
     event_notifier_cleanup(&ctx->notifier);
     rfifolock_destroy(&ctx->lock);
     qemu_mutex_destroy(&ctx->bh_lock);
@@ -329,6 +329,7 @@ AioContext *aio_context_new(Error **errp)
     }
     g_source_set_can_recurse(&ctx->source, true);
     aio_set_event_notifier(ctx, &ctx->notifier,
+                           false,
                            (EventNotifierHandler *)
                            event_notifier_dummy_cb);
     ctx->thread_pool = NULL;
diff --git a/block.c b/block.c
index 6771c3a1a1..e9f40dc768 100644
--- a/block.c
+++ b/block.c
@@ -257,7 +257,6 @@ BlockDriverState *bdrv_new(void)
     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
         QLIST_INIT(&bs->op_blockers[i]);
     }
-    bdrv_iostatus_disable(bs);
     notifier_list_init(&bs->close_notifiers);
     notifier_with_return_list_init(&bs->before_write_notifiers);
     qemu_co_queue_init(&bs->throttled_reqs[0]);
@@ -857,7 +856,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
         goto fail_opts;
     }
 
-    bs->guest_block_size = 512;
     bs->request_alignment = 512;
     bs->zero_beyond_eof = true;
     open_flags = bdrv_open_flags(bs, flags);
@@ -1081,6 +1079,10 @@ static int bdrv_fill_options(QDict **options, const char **pfilename,
         }
     }
 
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        *flags |= BDRV_O_INCOMING;
+    }
+
     return 0;
 }
 
@@ -1908,6 +1910,10 @@ void bdrv_close(BlockDriverState *bs)
     bdrv_drain(bs); /* in case flush left pending I/O */
     notifier_list_notify(&bs->close_notifiers, bs);
 
+    if (bs->blk) {
+        blk_dev_change_media_cb(bs->blk, false);
+    }
+
     if (bs->drv) {
         BdrvChild *child, *next;
 
@@ -1946,10 +1952,6 @@ void bdrv_close(BlockDriverState *bs)
         bs->full_open_options = NULL;
     }
 
-    if (bs->blk) {
-        blk_dev_change_media_cb(bs->blk, false);
-    }
-
     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
         g_free(ban);
     }
@@ -1998,19 +2000,10 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
     /* move some fields that need to stay attached to the device */
 
     /* dev info */
-    bs_dest->guest_block_size   = bs_src->guest_block_size;
     bs_dest->copy_on_read       = bs_src->copy_on_read;
 
     bs_dest->enable_write_cache = bs_src->enable_write_cache;
 
-    /* r/w error */
-    bs_dest->on_read_error      = bs_src->on_read_error;
-    bs_dest->on_write_error     = bs_src->on_write_error;
-
-    /* i/o status */
-    bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
-    bs_dest->iostatus           = bs_src->iostatus;
-
     /* dirty bitmap */
     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
 }
@@ -2497,82 +2490,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
 }
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
-                       BlockdevOnError on_write_error)
-{
-    bs->on_read_error = on_read_error;
-    bs->on_write_error = on_write_error;
-}
-
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
-{
-    return is_read ? bs->on_read_error : bs->on_write_error;
-}
-
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
-{
-    BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
-
-    switch (on_err) {
-    case BLOCKDEV_ON_ERROR_ENOSPC:
-        return (error == ENOSPC) ?
-               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
-    case BLOCKDEV_ON_ERROR_STOP:
-        return BLOCK_ERROR_ACTION_STOP;
-    case BLOCKDEV_ON_ERROR_REPORT:
-        return BLOCK_ERROR_ACTION_REPORT;
-    case BLOCKDEV_ON_ERROR_IGNORE:
-        return BLOCK_ERROR_ACTION_IGNORE;
-    default:
-        abort();
-    }
-}
-
-static void send_qmp_error_event(BlockDriverState *bs,
-                                 BlockErrorAction action,
-                                 bool is_read, int error)
-{
-    IoOperationType optype;
-
-    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-    qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
-                                   bdrv_iostatus_is_enabled(bs),
-                                   error == ENOSPC, strerror(error),
-                                   &error_abort);
-}
-
-/* This is done by device models because, while the block layer knows
- * about the error, it does not know whether an operation comes from
- * the device or the block layer (from a job, for example).
- */
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
-                       bool is_read, int error)
-{
-    assert(error >= 0);
-
-    if (action == BLOCK_ERROR_ACTION_STOP) {
-        /* First set the iostatus, so that "info block" returns an iostatus
-         * that matches the events raised so far (an additional error iostatus
-         * is fine, but not a lost one).
-         */
-        bdrv_iostatus_set_err(bs, error);
-
-        /* Then raise the request to stop the VM and the event.
-         * qemu_system_vmstop_request_prepare has two effects.  First,
-         * it ensures that the STOP event always comes after the
-         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
-         * can observe the STOP event and do a "cont" before the STOP
-         * event is issued, the VM will not stop.  In this case, vm_start()
-         * also ensures that the STOP/RESUME pair of events is emitted.
-         */
-        qemu_system_vmstop_request_prepare();
-        send_qmp_error_event(bs, action, is_read, error);
-        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
-    } else {
-        send_qmp_error_event(bs, action, is_read, error);
-    }
-}
-
 int bdrv_is_read_only(BlockDriverState *bs)
 {
     return bs->read_only;
@@ -2766,6 +2683,11 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
         blk = blk_by_name(device);
 
         if (blk) {
+            if (!blk_bs(blk)) {
+                error_setg(errp, "Device '%s' has no medium", device);
+                return NULL;
+            }
+
             return blk_bs(blk);
         }
     }
@@ -3136,15 +3058,23 @@ void bdrv_invalidate_cache_all(Error **errp)
 /**
  * Return TRUE if the media is present
  */
-int bdrv_is_inserted(BlockDriverState *bs)
+bool bdrv_is_inserted(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
+    BdrvChild *child;
 
-    if (!drv)
-        return 0;
-    if (!drv->bdrv_is_inserted)
-        return 1;
-    return drv->bdrv_is_inserted(bs);
+    if (!drv) {
+        return false;
+    }
+    if (drv->bdrv_is_inserted) {
+        return drv->bdrv_is_inserted(bs);
+    }
+    QLIST_FOREACH(child, &bs->children, next) {
+        if (!bdrv_is_inserted(child->bs)) {
+            return false;
+        }
+    }
+    return true;
 }
 
 /**
@@ -3195,11 +3125,6 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked)
     }
 }
 
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
-{
-    bs->guest_block_size = align;
-}
-
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
     BdrvDirtyBitmap *bm;
@@ -3597,46 +3522,6 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
     return true;
 }
 
-void bdrv_iostatus_enable(BlockDriverState *bs)
-{
-    bs->iostatus_enabled = true;
-    bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-}
-
-/* The I/O status is only enabled if the drive explicitly
- * enables it _and_ the VM is configured to stop on errors */
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
-{
-    return (bs->iostatus_enabled &&
-           (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
-            bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
-            bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
-}
-
-void bdrv_iostatus_disable(BlockDriverState *bs)
-{
-    bs->iostatus_enabled = false;
-}
-
-void bdrv_iostatus_reset(BlockDriverState *bs)
-{
-    if (bdrv_iostatus_is_enabled(bs)) {
-        bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-        if (bs->job) {
-            block_job_iostatus_reset(bs->job);
-        }
-    }
-}
-
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
-{
-    assert(bdrv_iostatus_is_enabled(bs));
-    if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-        bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
-                                         BLOCK_DEVICE_IO_STATUS_FAILED;
-    }
-}
-
 void bdrv_img_create(const char *filename, const char *fmt,
                      const char *base_filename, const char *base_fmt,
                      char *options, uint64_t img_size, int flags,
@@ -4148,14 +4033,3 @@ void bdrv_refresh_filename(BlockDriverState *bs)
         QDECREF(json);
     }
 }
-
-/* This accessor function purpose is to allow the device models to access the
- * BlockAcctStats structure embedded inside a BlockDriverState without being
- * aware of the BlockDriverState structure layout.
- * It will go away when the BlockAcctStats structure will be moved inside
- * the device models.
- */
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
-{
-    return &bs->stats;
-}
diff --git a/block/accounting.c b/block/accounting.c
index 01d594ffdc..a423560206 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -47,14 +47,6 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
 }
 
 
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
-                               unsigned int nb_sectors)
-{
-    if (stats->wr_highest_sector < sector_num + nb_sectors - 1) {
-        stats->wr_highest_sector = sector_num + nb_sectors - 1;
-    }
-}
-
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                       int num_requests)
 {
diff --git a/block/backup.c b/block/backup.c
index 5696431711..ec01db8eff 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -21,6 +21,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
 
 #define BACKUP_CLUSTER_BITS 16
 #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
@@ -215,7 +216,9 @@ static void backup_iostatus_reset(BlockJob *job)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common);
 
-    bdrv_iostatus_reset(s->target);
+    if (s->target->blk) {
+        blk_iostatus_reset(s->target->blk);
+    }
 }
 
 static const BlockJobDriver backup_job_driver = {
@@ -360,8 +363,10 @@ static void coroutine_fn backup_run(void *opaque)
     job->bitmap = hbitmap_alloc(end, 0);
 
     bdrv_set_enable_write_cache(target, true);
-    bdrv_set_on_error(target, on_target_error, on_target_error);
-    bdrv_iostatus_enable(target);
+    if (target->blk) {
+        blk_set_on_error(target->blk, on_target_error, on_target_error);
+        blk_iostatus_enable(target->blk);
+    }
 
     bdrv_add_before_write_notifier(bs, &before_write);
 
@@ -451,7 +456,9 @@ static void coroutine_fn backup_run(void *opaque)
     }
     hbitmap_free(job->bitmap);
 
-    bdrv_iostatus_disable(target);
+    if (target->blk) {
+        blk_iostatus_disable(target->blk);
+    }
     bdrv_op_unblock_all(target, job->common.blocker);
 
     data = g_malloc(sizeof(*data));
@@ -480,7 +487,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
 
     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
         return;
     }
diff --git a/block/block-backend.c b/block/block-backend.c
index 225655126e..19fdaaec1a 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -12,12 +12,17 @@
 
 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
+#include "block/blockjob.h"
+#include "block/throttle-groups.h"
 #include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
 #include "qapi-event.h"
 
 /* Number of coroutines to reserve per attached device model */
 #define COROUTINE_POOL_RESERVATION 64
 
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+
 struct BlockBackend {
     char *name;
     int refcnt;
@@ -29,15 +34,31 @@ struct BlockBackend {
     /* TODO change to DeviceState when all users are qdevified */
     const BlockDevOps *dev_ops;
     void *dev_opaque;
+
+    /* the block size for which the guest device expects atomicity */
+    int guest_block_size;
+
+    /* If the BDS tree is removed, some of its options are stored here (which
+     * can be used to restore those options in the new BDS on insert) */
+    BlockBackendRootState root_state;
+
+    /* I/O stats (display with "info blockstats"). */
+    BlockAcctStats stats;
+
+    BlockdevOnError on_read_error, on_write_error;
+    bool iostatus_enabled;
+    BlockDeviceIoStatus iostatus;
 };
 
 typedef struct BlockBackendAIOCB {
     BlockAIOCB common;
     QEMUBH *bh;
+    BlockBackend *blk;
     int ret;
 } BlockBackendAIOCB;
 
 static const AIOCBInfo block_backend_aiocb_info = {
+    .get_aio_context = blk_aiocb_get_aio_context,
     .aiocb_size = sizeof(BlockBackendAIOCB),
 };
 
@@ -145,6 +166,10 @@ static void blk_delete(BlockBackend *blk)
         bdrv_unref(blk->bs);
         blk->bs = NULL;
     }
+    if (blk->root_state.throttle_state) {
+        g_free(blk->root_state.throttle_group);
+        throttle_group_unref(blk->root_state.throttle_state);
+    }
     /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */
     if (blk->name[0]) {
         QTAILQ_REMOVE(&blk_backends, blk, link);
@@ -309,6 +334,17 @@ void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk)
 }
 
 /*
+ * Associates a new BlockDriverState with @blk.
+ */
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+    assert(!blk->bs && !bs->blk);
+    bdrv_ref(bs);
+    blk->bs = bs;
+    bs->blk = blk;
+}
+
+/*
  * Attach device model @dev to @blk.
  * Return 0 on success, -EBUSY when a device model is attached already.
  */
@@ -320,7 +356,7 @@ int blk_attach_dev(BlockBackend *blk, void *dev)
     }
     blk_ref(blk);
     blk->dev = dev;
-    bdrv_iostatus_reset(blk->bs);
+    blk_iostatus_reset(blk);
     return 0;
 }
 
@@ -347,7 +383,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
     blk->dev = NULL;
     blk->dev_ops = NULL;
     blk->dev_opaque = NULL;
-    bdrv_set_guest_block_size(blk->bs, 512);
+    blk->guest_block_size = 512;
     blk_unref(blk);
 }
 
@@ -452,7 +488,47 @@ void blk_dev_resize_cb(BlockBackend *blk)
 
 void blk_iostatus_enable(BlockBackend *blk)
 {
-    bdrv_iostatus_enable(blk->bs);
+    blk->iostatus_enabled = true;
+    blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool blk_iostatus_is_enabled(const BlockBackend *blk)
+{
+    return (blk->iostatus_enabled &&
+           (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+            blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
+            blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
+}
+
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
+{
+    return blk->iostatus;
+}
+
+void blk_iostatus_disable(BlockBackend *blk)
+{
+    blk->iostatus_enabled = false;
+}
+
+void blk_iostatus_reset(BlockBackend *blk)
+{
+    if (blk_iostatus_is_enabled(blk)) {
+        blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+        if (blk->bs && blk->bs->job) {
+            block_job_iostatus_reset(blk->bs->job);
+        }
+    }
+}
+
+void blk_iostatus_set_err(BlockBackend *blk, int error)
+{
+    assert(blk_iostatus_is_enabled(blk));
+    if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+        blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+                                          BLOCK_DEVICE_IO_STATUS_FAILED;
+    }
 }
 
 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
@@ -464,7 +540,7 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
         return -EIO;
     }
 
-    if (!blk_is_inserted(blk)) {
+    if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -558,6 +634,7 @@ static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb,
     QEMUBH *bh;
 
     acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
+    acb->blk = blk;
     acb->ret = ret;
 
     bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
@@ -602,16 +679,28 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
 
 int64_t blk_getlength(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_getlength(blk->bs);
 }
 
 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
 {
-    bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+    if (!blk->bs) {
+        *nb_sectors_ptr = 0;
+    } else {
+        bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+    }
 }
 
 int64_t blk_nb_sectors(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_nb_sectors(blk->bs);
 }
 
@@ -642,6 +731,10 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
                           BlockCompletionFunc *cb, void *opaque)
 {
+    if (!blk_is_available(blk)) {
+        return abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+    }
+
     return bdrv_aio_flush(blk->bs, cb, opaque);
 }
 
@@ -683,12 +776,20 @@ int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
 
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_ioctl(blk->bs, req, buf);
 }
 
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                           BlockCompletionFunc *cb, void *opaque)
 {
+    if (!blk_is_available(blk)) {
+        return abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+    }
+
     return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
 }
 
@@ -704,11 +805,19 @@ int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
 
 int blk_co_flush(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_co_flush(blk->bs);
 }
 
 int blk_flush(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_flush(blk->bs);
 }
 
@@ -719,7 +828,9 @@ int blk_flush_all(void)
 
 void blk_drain(BlockBackend *blk)
 {
-    bdrv_drain(blk->bs);
+    if (blk->bs) {
+        bdrv_drain(blk->bs);
+    }
 }
 
 void blk_drain_all(void)
@@ -727,76 +838,178 @@ void blk_drain_all(void)
     bdrv_drain_all();
 }
 
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+                      BlockdevOnError on_write_error)
+{
+    blk->on_read_error = on_read_error;
+    blk->on_write_error = on_write_error;
+}
+
 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
 {
-    return bdrv_get_on_error(blk->bs, is_read);
+    return is_read ? blk->on_read_error : blk->on_write_error;
 }
 
 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
                                       int error)
 {
-    return bdrv_get_error_action(blk->bs, is_read, error);
+    BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+
+    switch (on_err) {
+    case BLOCKDEV_ON_ERROR_ENOSPC:
+        return (error == ENOSPC) ?
+               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_STOP:
+        return BLOCK_ERROR_ACTION_STOP;
+    case BLOCKDEV_ON_ERROR_REPORT:
+        return BLOCK_ERROR_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_IGNORE:
+        return BLOCK_ERROR_ACTION_IGNORE;
+    default:
+        abort();
+    }
 }
 
+static void send_qmp_error_event(BlockBackend *blk,
+                                 BlockErrorAction action,
+                                 bool is_read, int error)
+{
+    IoOperationType optype;
+
+    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
+    qapi_event_send_block_io_error(blk_name(blk), optype, action,
+                                   blk_iostatus_is_enabled(blk),
+                                   error == ENOSPC, strerror(error),
+                                   &error_abort);
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
                       bool is_read, int error)
 {
-    bdrv_error_action(blk->bs, action, is_read, error);
+    assert(error >= 0);
+
+    if (action == BLOCK_ERROR_ACTION_STOP) {
+        /* First set the iostatus, so that "info block" returns an iostatus
+         * that matches the events raised so far (an additional error iostatus
+         * is fine, but not a lost one).
+         */
+        blk_iostatus_set_err(blk, error);
+
+        /* Then raise the request to stop the VM and the event.
+         * qemu_system_vmstop_request_prepare has two effects.  First,
+         * it ensures that the STOP event always comes after the
+         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
+         * can observe the STOP event and do a "cont" before the STOP
+         * event is issued, the VM will not stop.  In this case, vm_start()
+         * also ensures that the STOP/RESUME pair of events is emitted.
+         */
+        qemu_system_vmstop_request_prepare();
+        send_qmp_error_event(blk, action, is_read, error);
+        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
+    } else {
+        send_qmp_error_event(blk, action, is_read, error);
+    }
 }
 
 int blk_is_read_only(BlockBackend *blk)
 {
-    return bdrv_is_read_only(blk->bs);
+    if (blk->bs) {
+        return bdrv_is_read_only(blk->bs);
+    } else {
+        return blk->root_state.read_only;
+    }
 }
 
 int blk_is_sg(BlockBackend *blk)
 {
+    if (!blk->bs) {
+        return 0;
+    }
+
     return bdrv_is_sg(blk->bs);
 }
 
 int blk_enable_write_cache(BlockBackend *blk)
 {
-    return bdrv_enable_write_cache(blk->bs);
+    if (blk->bs) {
+        return bdrv_enable_write_cache(blk->bs);
+    } else {
+        return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB);
+    }
 }
 
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
 {
-    bdrv_set_enable_write_cache(blk->bs, wce);
+    if (blk->bs) {
+        bdrv_set_enable_write_cache(blk->bs, wce);
+    } else {
+        if (wce) {
+            blk->root_state.open_flags |= BDRV_O_CACHE_WB;
+        } else {
+            blk->root_state.open_flags &= ~BDRV_O_CACHE_WB;
+        }
+    }
 }
 
 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
 {
+    if (!blk->bs) {
+        error_setg(errp, "Device '%s' has no medium", blk->name);
+        return;
+    }
+
     bdrv_invalidate_cache(blk->bs, errp);
 }
 
-int blk_is_inserted(BlockBackend *blk)
+bool blk_is_inserted(BlockBackend *blk)
+{
+    return blk->bs && bdrv_is_inserted(blk->bs);
+}
+
+bool blk_is_available(BlockBackend *blk)
 {
-    return bdrv_is_inserted(blk->bs);
+    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
 }
 
 void blk_lock_medium(BlockBackend *blk, bool locked)
 {
-    bdrv_lock_medium(blk->bs, locked);
+    if (blk->bs) {
+        bdrv_lock_medium(blk->bs, locked);
+    }
 }
 
 void blk_eject(BlockBackend *blk, bool eject_flag)
 {
-    bdrv_eject(blk->bs, eject_flag);
+    if (blk->bs) {
+        bdrv_eject(blk->bs, eject_flag);
+    }
 }
 
 int blk_get_flags(BlockBackend *blk)
 {
-    return bdrv_get_flags(blk->bs);
+    if (blk->bs) {
+        return bdrv_get_flags(blk->bs);
+    } else {
+        return blk->root_state.open_flags;
+    }
 }
 
 int blk_get_max_transfer_length(BlockBackend *blk)
 {
-    return blk->bs->bl.max_transfer_length;
+    if (blk->bs) {
+        return blk->bs->bl.max_transfer_length;
+    } else {
+        return 0;
+    }
 }
 
 void blk_set_guest_block_size(BlockBackend *blk, int align)
 {
-    bdrv_set_guest_block_size(blk->bs, align);
+    blk->guest_block_size = align;
 }
 
 void *blk_blockalign(BlockBackend *blk, size_t size)
@@ -806,40 +1019,64 @@ void *blk_blockalign(BlockBackend *blk, size_t size)
 
 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
 {
+    if (!blk->bs) {
+        return false;
+    }
+
     return bdrv_op_is_blocked(blk->bs, op, errp);
 }
 
 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
 {
-    bdrv_op_unblock(blk->bs, op, reason);
+    if (blk->bs) {
+        bdrv_op_unblock(blk->bs, op, reason);
+    }
 }
 
 void blk_op_block_all(BlockBackend *blk, Error *reason)
 {
-    bdrv_op_block_all(blk->bs, reason);
+    if (blk->bs) {
+        bdrv_op_block_all(blk->bs, reason);
+    }
 }
 
 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
 {
-    bdrv_op_unblock_all(blk->bs, reason);
+    if (blk->bs) {
+        bdrv_op_unblock_all(blk->bs, reason);
+    }
 }
 
 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
-    return bdrv_get_aio_context(blk->bs);
+    if (blk->bs) {
+        return bdrv_get_aio_context(blk->bs);
+    } else {
+        return qemu_get_aio_context();
+    }
+}
+
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
+{
+    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
+    return blk_get_aio_context(blk_acb->blk);
 }
 
 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
 {
-    bdrv_set_aio_context(blk->bs, new_context);
+    if (blk->bs) {
+        bdrv_set_aio_context(blk->bs, new_context);
+    }
 }
 
 void blk_add_aio_context_notifier(BlockBackend *blk,
         void (*attached_aio_context)(AioContext *new_context, void *opaque),
         void (*detach_aio_context)(void *opaque), void *opaque)
 {
-    bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
-                                  detach_aio_context, opaque);
+    if (blk->bs) {
+        bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
+                                      detach_aio_context, opaque);
+    }
 }
 
 void blk_remove_aio_context_notifier(BlockBackend *blk,
@@ -848,28 +1085,36 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
                                      void (*detach_aio_context)(void *),
                                      void *opaque)
 {
-    bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
-                                     detach_aio_context, opaque);
+    if (blk->bs) {
+        bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
+                                         detach_aio_context, opaque);
+    }
 }
 
 void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
 {
-    bdrv_add_close_notifier(blk->bs, notify);
+    if (blk->bs) {
+        bdrv_add_close_notifier(blk->bs, notify);
+    }
 }
 
 void blk_io_plug(BlockBackend *blk)
 {
-    bdrv_io_plug(blk->bs);
+    if (blk->bs) {
+        bdrv_io_plug(blk->bs);
+    }
 }
 
 void blk_io_unplug(BlockBackend *blk)
 {
-    bdrv_io_unplug(blk->bs);
+    if (blk->bs) {
+        bdrv_io_unplug(blk->bs);
+    }
 }
 
 BlockAcctStats *blk_get_stats(BlockBackend *blk)
 {
-    return bdrv_get_stats(blk->bs);
+    return &blk->stats;
 }
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
@@ -902,6 +1147,10 @@ int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
 
 int blk_truncate(BlockBackend *blk, int64_t offset)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_truncate(blk->bs, offset);
 }
 
@@ -918,20 +1167,67 @@ int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
                      int64_t pos, int size)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_save_vmstate(blk->bs, buf, pos, size);
 }
 
 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_load_vmstate(blk->bs, buf, pos, size);
 }
 
 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_probe_blocksizes(blk->bs, bsz);
 }
 
 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_probe_geometry(blk->bs, geo);
 }
+
+/*
+ * Updates the BlockBackendRootState object with data from the currently
+ * attached BlockDriverState.
+ */
+void blk_update_root_state(BlockBackend *blk)
+{
+    assert(blk->bs);
+
+    blk->root_state.open_flags    = blk->bs->open_flags;
+    blk->root_state.read_only     = blk->bs->read_only;
+    blk->root_state.detect_zeroes = blk->bs->detect_zeroes;
+
+    if (blk->root_state.throttle_group) {
+        g_free(blk->root_state.throttle_group);
+        throttle_group_unref(blk->root_state.throttle_state);
+    }
+    if (blk->bs->throttle_state) {
+        const char *name = throttle_group_get_name(blk->bs);
+        blk->root_state.throttle_group = g_strdup(name);
+        blk->root_state.throttle_state = throttle_group_incref(name);
+    } else {
+        blk->root_state.throttle_group = NULL;
+        blk->root_state.throttle_state = NULL;
+    }
+}
+
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
+{
+    return &blk->root_state;
+}
diff --git a/block/commit.c b/block/commit.c
index d12e26fab6..fdebe87c16 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -17,6 +17,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
 
 enum {
     /*
@@ -213,7 +214,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
 
     if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
          on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, "Invalid parameter combination");
         return;
     }
diff --git a/block/curl.c b/block/curl.c
index 032cc8ae22..89941826ed 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -154,18 +154,20 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
     DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
     switch (action) {
         case CURL_POLL_IN:
-            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
-                               NULL, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               curl_multi_read, NULL, state);
             break;
         case CURL_POLL_OUT:
-            aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               NULL, curl_multi_do, state);
             break;
         case CURL_POLL_INOUT:
-            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
-                               curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               curl_multi_read, curl_multi_do, state);
             break;
         case CURL_POLL_REMOVE:
-            aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               NULL, NULL, NULL);
             break;
     }
 
diff --git a/block/io.c b/block/io.c
index 5311473a1d..8dcad3b3fe 100644
--- a/block/io.c
+++ b/block/io.c
@@ -23,6 +23,7 @@
  */
 
 #include "trace.h"
+#include "sysemu/block-backend.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
 #include "block/throttle-groups.h"
@@ -215,6 +216,8 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
 /* Check if any requests are in-flight (including throttled requests) */
 bool bdrv_requests_pending(BlockDriverState *bs)
 {
+    BdrvChild *child;
+
     if (!QLIST_EMPTY(&bs->tracked_requests)) {
         return true;
     }
@@ -224,12 +227,13 @@ bool bdrv_requests_pending(BlockDriverState *bs)
     if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
         return true;
     }
-    if (bs->file && bdrv_requests_pending(bs->file->bs)) {
-        return true;
-    }
-    if (bs->backing && bdrv_requests_pending(bs->backing->bs)) {
-        return true;
+
+    QLIST_FOREACH(child, &bs->children, next) {
+        if (bdrv_requests_pending(child->bs)) {
+            return true;
+        }
     }
+
     return false;
 }
 
@@ -1151,7 +1155,9 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
 
     bdrv_set_dirty(bs, sector_num, nb_sectors);
 
-    block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
+    if (bs->wr_highest_offset < offset + bytes) {
+        bs->wr_highest_offset = offset + bytes;
+    }
 
     if (ret >= 0) {
         bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
@@ -1903,7 +1909,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
         }
     }
 
-    block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
+    if (bs->blk) {
+        block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
+                              num_reqs - outidx - 1);
+    }
 
     return outidx + 1;
 }
@@ -2618,3 +2627,20 @@ void bdrv_flush_io_queue(BlockDriverState *bs)
     }
     bdrv_start_throttled_reqs(bs);
 }
+
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+    if (!bs->quiesce_counter++) {
+        aio_disable_external(bdrv_get_aio_context(bs));
+    }
+    bdrv_drain(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+    assert(bs->quiesce_counter > 0);
+    if (--bs->quiesce_counter > 0) {
+        return;
+    }
+    aio_enable_external(bdrv_get_aio_context(bs));
+}
diff --git a/block/iscsi.c b/block/iscsi.c
index 93f1ee4c63..9a628b7c66 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -291,8 +291,8 @@ iscsi_set_events(IscsiLun *iscsilun)
     int ev = iscsi_which_events(iscsi);
 
     if (ev != iscsilun->events) {
-        aio_set_fd_handler(iscsilun->aio_context,
-                           iscsi_get_fd(iscsi),
+        aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
+                           false,
                            (ev & POLLIN) ? iscsi_process_read : NULL,
                            (ev & POLLOUT) ? iscsi_process_write : NULL,
                            iscsilun);
@@ -1280,9 +1280,8 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
 {
     IscsiLun *iscsilun = bs->opaque;
 
-    aio_set_fd_handler(iscsilun->aio_context,
-                       iscsi_get_fd(iscsilun->iscsi),
-                       NULL, NULL, NULL);
+    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
+                       false, NULL, NULL, NULL);
     iscsilun->events = 0;
 
     if (iscsilun->nop_timer) {
diff --git a/block/linux-aio.c b/block/linux-aio.c
index c991443c5d..88b0520a8b 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -287,7 +287,7 @@ void laio_detach_aio_context(void *s_, AioContext *old_context)
 {
     struct qemu_laio_state *s = s_;
 
-    aio_set_event_notifier(old_context, &s->e, NULL);
+    aio_set_event_notifier(old_context, &s->e, false, NULL);
     qemu_bh_delete(s->completion_bh);
 }
 
@@ -296,7 +296,8 @@ void laio_attach_aio_context(void *s_, AioContext *new_context)
     struct qemu_laio_state *s = s_;
 
     s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
-    aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+    aio_set_event_notifier(new_context, &s->e, false,
+                           qemu_laio_completion_cb);
 }
 
 void *laio_init(void)
diff --git a/block/mirror.c b/block/mirror.c
index 7e43511832..b1252a1b29 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -14,6 +14,7 @@
 #include "trace.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
@@ -599,7 +600,9 @@ immediate_exit:
     g_free(s->cow_bitmap);
     g_free(s->in_flight_bitmap);
     bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
-    bdrv_iostatus_disable(s->target);
+    if (s->target->blk) {
+        blk_iostatus_disable(s->target->blk);
+    }
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
@@ -621,7 +624,9 @@ static void mirror_iostatus_reset(BlockJob *job)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
 
-    bdrv_iostatus_reset(s->target);
+    if (s->target->blk) {
+        blk_iostatus_reset(s->target->blk);
+    }
 }
 
 static void mirror_complete(BlockJob *job, Error **errp)
@@ -704,7 +709,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 
     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
         return;
     }
@@ -740,8 +745,10 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
     bdrv_set_enable_write_cache(s->target, true);
-    bdrv_set_on_error(s->target, on_target_error, on_target_error);
-    bdrv_iostatus_enable(s->target);
+    if (s->target->blk) {
+        blk_set_on_error(s->target->blk, on_target_error, on_target_error);
+        blk_iostatus_enable(s->target->blk);
+    }
     s->common.co = qemu_coroutine_create(mirror_run);
     trace_mirror_start(bs, s, s->common.co, opaque);
     qemu_coroutine_enter(s->common.co, s);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index e1bb9198c5..b7fd17a115 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -124,7 +124,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
     s->send_coroutine = qemu_coroutine_self();
     aio_context = bdrv_get_aio_context(bs);
 
-    aio_set_fd_handler(aio_context, s->sock,
+    aio_set_fd_handler(aio_context, s->sock, false,
                        nbd_reply_ready, nbd_restart_write, bs);
     if (qiov) {
         if (!s->is_unix) {
@@ -144,7 +144,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
     } else {
         rc = nbd_send_request(s->sock, request);
     }
-    aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, bs);
+    aio_set_fd_handler(aio_context, s->sock, false,
+                       nbd_reply_ready, NULL, bs);
     s->send_coroutine = NULL;
     qemu_co_mutex_unlock(&s->send_mutex);
     return rc;
@@ -348,14 +349,15 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
     aio_set_fd_handler(bdrv_get_aio_context(bs),
-                       nbd_get_client_session(bs)->sock, NULL, NULL, NULL);
+                       nbd_get_client_session(bs)->sock,
+                       false, NULL, NULL, NULL);
 }
 
 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                    AioContext *new_context)
 {
     aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock,
-                       nbd_reply_ready, NULL, bs);
+                       false, nbd_reply_ready, NULL, bs);
 }
 
 void nbd_client_close(BlockDriverState *bs)
diff --git a/block/nfs.c b/block/nfs.c
index 887a98e3fc..fd79f89945 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -63,11 +63,10 @@ static void nfs_set_events(NFSClient *client)
 {
     int ev = nfs_which_events(client->context);
     if (ev != client->events) {
-        aio_set_fd_handler(client->aio_context,
-                           nfs_get_fd(client->context),
+        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                           false,
                            (ev & POLLIN) ? nfs_process_read : NULL,
-                           (ev & POLLOUT) ? nfs_process_write : NULL,
-                           client);
+                           (ev & POLLOUT) ? nfs_process_write : NULL, client);
 
     }
     client->events = ev;
@@ -242,9 +241,8 @@ static void nfs_detach_aio_context(BlockDriverState *bs)
 {
     NFSClient *client = bs->opaque;
 
-    aio_set_fd_handler(client->aio_context,
-                       nfs_get_fd(client->context),
-                       NULL, NULL, NULL);
+    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                       false, NULL, NULL, NULL);
     client->events = 0;
 }
 
@@ -263,9 +261,8 @@ static void nfs_client_close(NFSClient *client)
         if (client->fh) {
             nfs_close(client->context, client->fh);
         }
-        aio_set_fd_handler(client->aio_context,
-                           nfs_get_fd(client->context),
-                           NULL, NULL, NULL);
+        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                           false, NULL, NULL, NULL);
         nfs_destroy_context(client->context);
     }
     memset(client, 0, sizeof(NFSClient));
diff --git a/block/qapi.c b/block/qapi.c
index 355ba324f1..ec0f5139e2 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -301,17 +301,17 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
         info->tray_open = blk_dev_is_tray_open(blk);
     }
 
-    if (bdrv_iostatus_is_enabled(bs)) {
+    if (blk_iostatus_is_enabled(blk)) {
         info->has_io_status = true;
-        info->io_status = bs->iostatus;
+        info->io_status = blk_iostatus(blk);
     }
 
-    if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
+    if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
         info->has_dirty_bitmaps = true;
         info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
     }
 
-    if (bs->drv) {
+    if (bs && bs->drv) {
         info->has_inserted = true;
         info->inserted = bdrv_block_device_info(bs, errp);
         if (info->inserted == NULL) {
@@ -344,18 +344,22 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
     }
 
     s->stats = g_malloc0(sizeof(*s->stats));
-    s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ];
-    s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
-    s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
-    s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
-    s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ];
-    s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE];
-    s->stats->wr_highest_offset =
-        bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
-    s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
-    s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
-    s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
-    s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
+    if (bs->blk) {
+        BlockAcctStats *stats = blk_get_stats(bs->blk);
+
+        s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+        s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+        s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+        s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
+        s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
+        s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+        s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+        s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+        s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+        s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+    }
+
+    s->stats->wr_highest_offset = bs->wr_highest_offset;
 
     if (bs->file) {
         s->has_parent = true;
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 3a527f0b6b..918c756c2e 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,11 +127,6 @@ do { \
 
 #define FTYPE_FILE   0
 #define FTYPE_CD     1
-#define FTYPE_FD     2
-
-/* if the FD is not accessed during that time (in ns), we try to
-   reopen it to see if the disk has been changed */
-#define FD_OPEN_TIMEOUT (1000000000)
 
 #define MAX_BLOCKSIZE	4096
 
@@ -141,13 +136,6 @@ typedef struct BDRVRawState {
     int open_flags;
     size_t buf_align;
 
-#if defined(__linux__)
-    /* linux floppy specific */
-    int64_t fd_open_time;
-    int64_t fd_error_time;
-    int fd_got_error;
-    int fd_media_changed;
-#endif
 #ifdef CONFIG_LINUX_AIO
     int use_aio;
     void *aio_ctx;
@@ -635,7 +623,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     }
 #endif
 
-    if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
+    if (s->type == FTYPE_CD) {
         raw_s->open_flags |= O_NONBLOCK;
     }
 
@@ -2187,47 +2175,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
 }
 
 #if defined(__linux__)
-/* Note: we do not have a reliable method to detect if the floppy is
-   present. The current method is to try to open the floppy at every
-   I/O and to keep it opened during a few hundreds of ms. */
-static int fd_open(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int last_media_present;
-
-    if (s->type != FTYPE_FD)
-        return 0;
-    last_media_present = (s->fd >= 0);
-    if (s->fd >= 0 &&
-        (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
-        qemu_close(s->fd);
-        s->fd = -1;
-        DPRINTF("Floppy closed\n");
-    }
-    if (s->fd < 0) {
-        if (s->fd_got_error &&
-            (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
-            DPRINTF("No floppy (open delayed)\n");
-            return -EIO;
-        }
-        s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
-        if (s->fd < 0) {
-            s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-            s->fd_got_error = 1;
-            if (last_media_present)
-                s->fd_media_changed = 1;
-            DPRINTF("No floppy\n");
-            return -EIO;
-        }
-        DPRINTF("Floppy opened\n");
-    }
-    if (!last_media_present)
-        s->fd_media_changed = 1;
-    s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    s->fd_got_error = 0;
-    return 0;
-}
-
 static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
     BDRVRawState *s = bs->opaque;
@@ -2256,8 +2203,8 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
     pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
     return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
+#endif /* linux */
 
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static int fd_open(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
@@ -2267,14 +2214,6 @@ static int fd_open(BlockDriverState *bs)
         return 0;
     return -EIO;
 }
-#else /* !linux && !FreeBSD */
-
-static int fd_open(BlockDriverState *bs)
-{
-    return 0;
-}
-
-#endif /* !linux && !FreeBSD */
 
 static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors,
@@ -2318,14 +2257,13 @@ static int hdev_create(const char *filename, QemuOpts *opts,
     int64_t total_size = 0;
     bool has_prefix;
 
-    /* This function is used by all three protocol block drivers and therefore
-     * any of these three prefixes may be given.
+    /* This function is used by both protocol block drivers and therefore either
+     * of these prefixes may be given.
      * The return value has to be stored somewhere, otherwise this is an error
      * due to -Werror=unused-value. */
     has_prefix =
         strstart(filename, "host_device:", &filename) ||
-        strstart(filename, "host_cdrom:" , &filename) ||
-        strstart(filename, "host_floppy:", &filename);
+        strstart(filename, "host_cdrom:" , &filename);
 
     (void)has_prefix;
 
@@ -2405,155 +2343,6 @@ static BlockDriver bdrv_host_device = {
 #endif
 };
 
-#ifdef __linux__
-static void floppy_parse_filename(const char *filename, QDict *options,
-                                  Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_floppy:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
-                       Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;
-
-    s->type = FTYPE_FD;
-
-    /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
-    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (ret) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
-        return ret;
-    }
-
-    /* close fd so that we can reopen it as needed */
-    qemu_close(s->fd);
-    s->fd = -1;
-    s->fd_media_changed = 1;
-
-    error_report("Host floppy pass-through is deprecated");
-    error_printf("Support for it will be removed in a future release.\n");
-    return 0;
-}
-
-static int floppy_probe_device(const char *filename)
-{
-    int fd, ret;
-    int prio = 0;
-    struct floppy_struct fdparam;
-    struct stat st;
-
-    if (strstart(filename, "/dev/fd", NULL) &&
-        !strstart(filename, "/dev/fdset/", NULL) &&
-        !strstart(filename, "/dev/fd/", NULL)) {
-        prio = 50;
-    }
-
-    fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
-    if (fd < 0) {
-        goto out;
-    }
-    ret = fstat(fd, &st);
-    if (ret == -1 || !S_ISBLK(st.st_mode)) {
-        goto outc;
-    }
-
-    /* Attempt to detect via a floppy specific ioctl */
-    ret = ioctl(fd, FDGETPRM, &fdparam);
-    if (ret >= 0)
-        prio = 100;
-
-outc:
-    qemu_close(fd);
-out:
-    return prio;
-}
-
-
-static int floppy_is_inserted(BlockDriverState *bs)
-{
-    return fd_open(bs) >= 0;
-}
-
-static int floppy_media_changed(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-
-    /*
-     * XXX: we do not have a true media changed indication.
-     * It does not work if the floppy is changed without trying to read it.
-     */
-    fd_open(bs);
-    ret = s->fd_media_changed;
-    s->fd_media_changed = 0;
-    DPRINTF("Floppy changed=%d\n", ret);
-    return ret;
-}
-
-static void floppy_eject(BlockDriverState *bs, bool eject_flag)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd;
-
-    if (s->fd >= 0) {
-        qemu_close(s->fd);
-        s->fd = -1;
-    }
-    fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
-    if (fd >= 0) {
-        if (ioctl(fd, FDEJECT, 0) < 0)
-            perror("FDEJECT");
-        qemu_close(fd);
-    }
-}
-
-static BlockDriver bdrv_host_floppy = {
-    .format_name        = "host_floppy",
-    .protocol_name      = "host_floppy",
-    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_probe_device	= floppy_probe_device,
-    .bdrv_parse_filename = floppy_parse_filename,
-    .bdrv_file_open     = floppy_open,
-    .bdrv_close         = raw_close,
-    .bdrv_reopen_prepare = raw_reopen_prepare,
-    .bdrv_reopen_commit  = raw_reopen_commit,
-    .bdrv_reopen_abort   = raw_reopen_abort,
-    .bdrv_create         = hdev_create,
-    .create_opts         = &raw_create_opts,
-
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
-    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
-    .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
-    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
-
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
-    /* removable device support */
-    .bdrv_is_inserted   = floppy_is_inserted,
-    .bdrv_media_changed = floppy_media_changed,
-    .bdrv_eject         = floppy_eject,
-};
-#endif
-
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static void cdrom_parse_filename(const char *filename, QDict *options,
                                  Error **errp)
@@ -2609,15 +2398,13 @@ out:
     return prio;
 }
 
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
 
     ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-    if (ret == CDS_DISC_OK)
-        return 1;
-    return 0;
+    return ret == CDS_DISC_OK;
 }
 
 static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
@@ -2743,7 +2530,7 @@ static int cdrom_reopen(BlockDriverState *bs)
     return 0;
 }
 
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
 {
     return raw_getlength(bs) > 0;
 }
@@ -2831,7 +2618,6 @@ static void bdrv_file_init(void)
     bdrv_register(&bdrv_file);
     bdrv_register(&bdrv_host_device);
 #ifdef __linux__
-    bdrv_register(&bdrv_host_floppy);
     bdrv_register(&bdrv_host_cdrom);
 #endif
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 63ee91164f..0aded31c22 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -154,11 +154,6 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
     return bdrv_truncate(bs->file->bs, offset);
 }
 
-static int raw_is_inserted(BlockDriverState *bs)
-{
-    return bdrv_is_inserted(bs->file->bs);
-}
-
 static int raw_media_changed(BlockDriverState *bs)
 {
     return bdrv_media_changed(bs->file->bs);
@@ -264,7 +259,6 @@ BlockDriver bdrv_raw = {
     .bdrv_refresh_limits  = &raw_refresh_limits,
     .bdrv_probe_blocksizes = &raw_probe_blocksizes,
     .bdrv_probe_geometry  = &raw_probe_geometry,
-    .bdrv_is_inserted     = &raw_is_inserted,
     .bdrv_media_changed   = &raw_media_changed,
     .bdrv_eject           = &raw_eject,
     .bdrv_lock_medium     = &raw_lock_medium,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index e7e58b782c..d80e4ed18d 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -651,14 +651,16 @@ static coroutine_fn void do_co_req(void *opaque)
     unsigned int *rlen = srco->rlen;
 
     co = qemu_coroutine_self();
-    aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       NULL, restart_co_req, co);
 
     ret = send_co_req(sockfd, hdr, data, wlen);
     if (ret < 0) {
         goto out;
     }
 
-    aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       restart_co_req, NULL, co);
 
     ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
     if (ret != sizeof(*hdr)) {
@@ -683,7 +685,8 @@ static coroutine_fn void do_co_req(void *opaque)
 out:
     /* there is at most one request for this sockfd, so it is safe to
      * set each handler to NULL. */
-    aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       NULL, NULL, NULL);
 
     srco->ret = ret;
     srco->finished = true;
@@ -735,7 +738,8 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
     BDRVSheepdogState *s = opaque;
     AIOReq *aio_req, *next;
 
-    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+                       NULL, NULL);
     close(s->fd);
     s->fd = -1;
 
@@ -938,7 +942,8 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
         return fd;
     }
 
-    aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, fd, false,
+                       co_read_response, NULL, s);
     return fd;
 }
 
@@ -1199,7 +1204,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
 
     qemu_co_mutex_lock(&s->lock);
     s->co_send = qemu_coroutine_self();
-    aio_set_fd_handler(s->aio_context, s->fd,
+    aio_set_fd_handler(s->aio_context, s->fd, false,
                        co_read_response, co_write_request, s);
     socket_set_cork(s->fd, 1);
 
@@ -1218,7 +1223,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     }
 out:
     socket_set_cork(s->fd, 0);
-    aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, s->fd, false,
+                       co_read_response, NULL, s);
     s->co_send = NULL;
     qemu_co_mutex_unlock(&s->lock);
 }
@@ -1368,7 +1374,8 @@ static void sd_detach_aio_context(BlockDriverState *bs)
 {
     BDRVSheepdogState *s = bs->opaque;
 
-    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+                       NULL, NULL);
 }
 
 static void sd_attach_aio_context(BlockDriverState *bs,
@@ -1377,7 +1384,8 @@ static void sd_attach_aio_context(BlockDriverState *bs,
     BDRVSheepdogState *s = bs->opaque;
 
     s->aio_context = new_context;
-    aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(new_context, s->fd, false,
+                       co_read_response, NULL, s);
 }
 
 /* TODO Convert to fine grained options */
@@ -1490,7 +1498,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     g_free(buf);
     return 0;
 out:
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+                       false, NULL, NULL, NULL);
     if (s->fd >= 0) {
         closesocket(s->fd);
     }
@@ -1528,7 +1537,8 @@ static void sd_reopen_commit(BDRVReopenState *state)
     BDRVSheepdogState *s = state->bs->opaque;
 
     if (s->fd) {
-        aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+        aio_set_fd_handler(s->aio_context, s->fd, false,
+                           NULL, NULL, NULL);
         closesocket(s->fd);
     }
 
@@ -1551,7 +1561,8 @@ static void sd_reopen_abort(BDRVReopenState *state)
     }
 
     if (re_s->fd) {
-        aio_set_fd_handler(s->aio_context, re_s->fd, NULL, NULL, NULL);
+        aio_set_fd_handler(s->aio_context, re_s->fd, false,
+                           NULL, NULL, NULL);
         closesocket(re_s->fd);
     }
 
@@ -1935,7 +1946,8 @@ static void sd_close(BlockDriverState *bs)
         error_report("%s, %s", sd_strerror(rsp->result), s->name);
     }
 
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+                       false, NULL, NULL, NULL);
     closesocket(s->fd);
     g_free(s->host_spec);
 }
diff --git a/block/ssh.c b/block/ssh.c
index d35b51f987..af025c08a0 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -800,14 +800,15 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
             rd_handler, wr_handler);
 
     aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       rd_handler, wr_handler, co);
+                       false, rd_handler, wr_handler, co);
 }
 
 static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
                                           BlockDriverState *bs)
 {
     DPRINTF("s->sock=%d", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+                       false, NULL, NULL, NULL);
 }
 
 /* A non-blocking call returned EAGAIN, so yield, ensuring the
diff --git a/block/stream.c b/block/stream.c
index 3f64fa2245..25af7eff62 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -16,6 +16,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
 
 enum {
     /*
@@ -222,7 +223,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
 
     if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
          on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, QERR_INVALID_PARAMETER, "on-error");
         return;
     }
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 1abc6fcaea..3419af7d96 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -33,8 +33,7 @@
  * its own locking.
  *
  * This locking is however handled internally in this file, so it's
- * mostly transparent to outside users (but see the documentation in
- * throttle_groups_lock()).
+ * transparent to outside users.
  *
  * The whole ThrottleGroup structure is private and invisible to
  * outside users, that only use it through its ThrottleState.
@@ -76,9 +75,9 @@ static QTAILQ_HEAD(, ThrottleGroup) throttle_groups =
  * created.
  *
  * @name: the name of the ThrottleGroup
- * @ret:  the ThrottleGroup
+ * @ret:  the ThrottleState member of the ThrottleGroup
  */
-static ThrottleGroup *throttle_group_incref(const char *name)
+ThrottleState *throttle_group_incref(const char *name)
 {
     ThrottleGroup *tg = NULL;
     ThrottleGroup *iter;
@@ -108,7 +107,7 @@ static ThrottleGroup *throttle_group_incref(const char *name)
 
     qemu_mutex_unlock(&throttle_groups_lock);
 
-    return tg;
+    return &tg->ts;
 }
 
 /* Decrease the reference count of a ThrottleGroup.
@@ -116,10 +115,12 @@ static ThrottleGroup *throttle_group_incref(const char *name)
  * When the reference count reaches zero the ThrottleGroup is
  * destroyed.
  *
- * @tg:  The ThrottleGroup to unref
+ * @ts:  The ThrottleGroup to unref, given by its ThrottleState member
  */
-static void throttle_group_unref(ThrottleGroup *tg)
+void throttle_group_unref(ThrottleState *ts)
 {
+    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+
     qemu_mutex_lock(&throttle_groups_lock);
     if (--tg->refcount == 0) {
         QTAILQ_REMOVE(&throttle_groups, tg, list);
@@ -401,7 +402,8 @@ static void write_timer_cb(void *opaque)
 void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
 {
     int i;
-    ThrottleGroup *tg = throttle_group_incref(groupname);
+    ThrottleState *ts = throttle_group_incref(groupname);
+    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     int clock_type = QEMU_CLOCK_REALTIME;
 
     if (qtest_enabled()) {
@@ -409,7 +411,7 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
         clock_type = QEMU_CLOCK_VIRTUAL;
     }
 
-    bs->throttle_state = &tg->ts;
+    bs->throttle_state = ts;
 
     qemu_mutex_lock(&tg->lock);
     /* If the ThrottleGroup is new set this BlockDriverState as the token */
@@ -461,38 +463,10 @@ void throttle_group_unregister_bs(BlockDriverState *bs)
     throttle_timers_destroy(&bs->throttle_timers);
     qemu_mutex_unlock(&tg->lock);
 
-    throttle_group_unref(tg);
+    throttle_group_unref(&tg->ts);
     bs->throttle_state = NULL;
 }
 
-/* Acquire the lock of this throttling group.
- *
- * You won't normally need to use this. None of the functions from the
- * ThrottleGroup API require you to acquire the lock since all of them
- * deal with it internally.
- *
- * This should only be used in exceptional cases when you want to
- * access the protected fields of a BlockDriverState directly
- * (e.g. bdrv_swap()).
- *
- * @bs: a BlockDriverState that is member of the group
- */
-void throttle_group_lock(BlockDriverState *bs)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    qemu_mutex_lock(&tg->lock);
-}
-
-/* Release the lock of this throttling group.
- *
- * See the comments in throttle_group_lock().
- */
-void throttle_group_unlock(BlockDriverState *bs)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    qemu_mutex_unlock(&tg->lock);
-}
-
 static void throttle_groups_init(void)
 {
     qemu_mutex_init(&throttle_groups_lock);
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 64e86827bc..bbf2f01c12 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -174,7 +174,7 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
 void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                   AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &aio->e, NULL);
+    aio_set_event_notifier(old_context, &aio->e, false, NULL);
     aio->is_aio_context_attached = false;
 }
 
@@ -182,7 +182,8 @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
                                   AioContext *new_context)
 {
     aio->is_aio_context_attached = true;
-    aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+    aio_set_event_notifier(new_context, &aio->e, false,
+                           win32_aio_completion_cb);
 }
 
 QEMUWin32AIOState *win32_aio_init(void)
diff --git a/blockdev.c b/blockdev.c
index 8141b6b3da..18712d25cc 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -124,14 +124,16 @@ void blockdev_mark_auto_del(BlockBackend *blk)
         return;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
+    if (bs) {
+        aio_context = bdrv_get_aio_context(bs);
+        aio_context_acquire(aio_context);
 
-    if (bs->job) {
-        block_job_cancel(bs->job);
-    }
+        if (bs->job) {
+            block_job_cancel(bs->job);
+        }
 
-    aio_context_release(aio_context);
+        aio_context_release(aio_context);
+    }
 
     dinfo->auto_del = 1;
 }
@@ -229,8 +231,8 @@ bool drive_check_orphaned(void)
             dinfo->type != IF_NONE) {
             fprintf(stderr, "Warning: Orphaned drive without device: "
                     "id=%s,file=%s,if=%s,bus=%d,unit=%d\n",
-                    blk_name(blk), blk_bs(blk)->filename, if_name[dinfo->type],
-                    dinfo->bus, dinfo->unit);
+                    blk_name(blk), blk_bs(blk) ? blk_bs(blk)->filename : "",
+                    if_name[dinfo->type], dinfo->bus, dinfo->unit);
             rs = true;
         }
     }
@@ -348,25 +350,134 @@ static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 
 typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;
 
+/* All parameters but @opts are optional and may be set to NULL. */
+static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
+    const char **throttling_group, ThrottleConfig *throttle_cfg,
+    BlockdevDetectZeroesOptions *detect_zeroes, Error **errp)
+{
+    const char *discard;
+    Error *local_error = NULL;
+    const char *aio;
+
+    if (bdrv_flags) {
+        if (!qemu_opt_get_bool(opts, "read-only", false)) {
+            *bdrv_flags |= BDRV_O_RDWR;
+        }
+        if (qemu_opt_get_bool(opts, "copy-on-read", false)) {
+            *bdrv_flags |= BDRV_O_COPY_ON_READ;
+        }
+
+        if ((discard = qemu_opt_get(opts, "discard")) != NULL) {
+            if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) {
+                error_setg(errp, "Invalid discard option");
+                return;
+            }
+        }
+
+        if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true)) {
+            *bdrv_flags |= BDRV_O_CACHE_WB;
+        }
+        if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
+            *bdrv_flags |= BDRV_O_NOCACHE;
+        }
+        if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
+            *bdrv_flags |= BDRV_O_NO_FLUSH;
+        }
+
+        if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
+            if (!strcmp(aio, "native")) {
+                *bdrv_flags |= BDRV_O_NATIVE_AIO;
+            } else if (!strcmp(aio, "threads")) {
+                /* this is the default */
+            } else {
+               error_setg(errp, "invalid aio option");
+               return;
+            }
+        }
+    }
+
+    /* disk I/O throttling */
+    if (throttling_group) {
+        *throttling_group = qemu_opt_get(opts, "throttling.group");
+    }
+
+    if (throttle_cfg) {
+        memset(throttle_cfg, 0, sizeof(*throttle_cfg));
+        throttle_cfg->buckets[THROTTLE_BPS_TOTAL].avg =
+            qemu_opt_get_number(opts, "throttling.bps-total", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_READ].avg  =
+            qemu_opt_get_number(opts, "throttling.bps-read", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_WRITE].avg =
+            qemu_opt_get_number(opts, "throttling.bps-write", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_TOTAL].avg =
+            qemu_opt_get_number(opts, "throttling.iops-total", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_READ].avg =
+            qemu_opt_get_number(opts, "throttling.iops-read", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_WRITE].avg =
+            qemu_opt_get_number(opts, "throttling.iops-write", 0);
+
+        throttle_cfg->buckets[THROTTLE_BPS_TOTAL].max =
+            qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_READ].max  =
+            qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_WRITE].max =
+            qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_TOTAL].max =
+            qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_READ].max =
+            qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_WRITE].max =
+            qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+
+        throttle_cfg->op_size =
+            qemu_opt_get_number(opts, "throttling.iops-size", 0);
+
+        if (!check_throttle_config(throttle_cfg, errp)) {
+            return;
+        }
+    }
+
+    if (detect_zeroes) {
+        *detect_zeroes =
+            qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+                            qemu_opt_get(opts, "detect-zeroes"),
+                            BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
+                            BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+                            &local_error);
+        if (local_error) {
+            error_propagate(errp, local_error);
+            return;
+        }
+
+        if (bdrv_flags &&
+            *detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+            !(*bdrv_flags & BDRV_O_UNMAP))
+        {
+            error_setg(errp, "setting detect-zeroes to unmap is not allowed "
+                             "without setting discard operation to unmap");
+            return;
+        }
+    }
+}
+
 /* Takes the ownership of bs_opts */
 static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
                                    Error **errp)
 {
     const char *buf;
-    int ro = 0;
     int bdrv_flags = 0;
     int on_read_error, on_write_error;
     BlockBackend *blk;
     BlockDriverState *bs;
     ThrottleConfig cfg;
     int snapshot = 0;
-    bool copy_on_read;
     Error *error = NULL;
     QemuOpts *opts;
     const char *id;
     bool has_driver_specific_opts;
-    BlockdevDetectZeroesOptions detect_zeroes;
-    const char *throttling_group;
+    BlockdevDetectZeroesOptions detect_zeroes =
+        BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
+    const char *throttling_group = NULL;
 
     /* Check common options by copying from bs_opts to opts, all other options
      * stay in bs_opts for processing by bdrv_open(). */
@@ -391,35 +502,12 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
 
     /* extract parameters */
     snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
-    ro = qemu_opt_get_bool(opts, "read-only", 0);
-    copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
-
-    if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
-        if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
-            error_setg(errp, "invalid discard option");
-            goto early_err;
-        }
-    }
 
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true)) {
-        bdrv_flags |= BDRV_O_CACHE_WB;
-    }
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
-        bdrv_flags |= BDRV_O_NOCACHE;
-    }
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
-        bdrv_flags |= BDRV_O_NO_FLUSH;
-    }
-
-    if ((buf = qemu_opt_get(opts, "aio")) != NULL) {
-        if (!strcmp(buf, "native")) {
-            bdrv_flags |= BDRV_O_NATIVE_AIO;
-        } else if (!strcmp(buf, "threads")) {
-            /* this is the default */
-        } else {
-           error_setg(errp, "invalid aio option");
-           goto early_err;
-        }
+    extract_common_blockdev_options(opts, &bdrv_flags, &throttling_group, &cfg,
+                                    &detect_zeroes, &error);
+    if (error) {
+        error_propagate(errp, error);
+        goto early_err;
     }
 
     if ((buf = qemu_opt_get(opts, "format")) != NULL) {
@@ -437,43 +525,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
         qdict_put(bs_opts, "driver", qstring_from_str(buf));
     }
 
-    /* disk I/O throttling */
-    memset(&cfg, 0, sizeof(cfg));
-    cfg.buckets[THROTTLE_BPS_TOTAL].avg =
-        qemu_opt_get_number(opts, "throttling.bps-total", 0);
-    cfg.buckets[THROTTLE_BPS_READ].avg  =
-        qemu_opt_get_number(opts, "throttling.bps-read", 0);
-    cfg.buckets[THROTTLE_BPS_WRITE].avg =
-        qemu_opt_get_number(opts, "throttling.bps-write", 0);
-    cfg.buckets[THROTTLE_OPS_TOTAL].avg =
-        qemu_opt_get_number(opts, "throttling.iops-total", 0);
-    cfg.buckets[THROTTLE_OPS_READ].avg =
-        qemu_opt_get_number(opts, "throttling.iops-read", 0);
-    cfg.buckets[THROTTLE_OPS_WRITE].avg =
-        qemu_opt_get_number(opts, "throttling.iops-write", 0);
-
-    cfg.buckets[THROTTLE_BPS_TOTAL].max =
-        qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
-    cfg.buckets[THROTTLE_BPS_READ].max  =
-        qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
-    cfg.buckets[THROTTLE_BPS_WRITE].max =
-        qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
-    cfg.buckets[THROTTLE_OPS_TOTAL].max =
-        qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
-    cfg.buckets[THROTTLE_OPS_READ].max =
-        qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
-    cfg.buckets[THROTTLE_OPS_WRITE].max =
-        qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
-
-    cfg.op_size = qemu_opt_get_number(opts, "throttling.iops-size", 0);
-
-    throttling_group = qemu_opt_get(opts, "throttling.group");
-
-    if (!check_throttle_config(&cfg, &error)) {
-        error_propagate(errp, error);
-        goto early_err;
-    }
-
     on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
     if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
         on_write_error = parse_block_error_action(buf, 0, &error);
@@ -492,34 +543,34 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
         }
     }
 
-    detect_zeroes =
-        qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
-                        qemu_opt_get(opts, "detect-zeroes"),
-                        BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
-                        BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
-                        &error);
-    if (error) {
-        error_propagate(errp, error);
-        goto early_err;
-    }
-
-    if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
-        !(bdrv_flags & BDRV_O_UNMAP)) {
-        error_setg(errp, "setting detect-zeroes to unmap is not allowed "
-                         "without setting discard operation to unmap");
-        goto early_err;
+    if (snapshot) {
+        /* always use cache=unsafe with snapshot */
+        bdrv_flags &= ~BDRV_O_CACHE_MASK;
+        bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
     }
 
     /* init */
     if ((!file || !*file) && !has_driver_specific_opts) {
-        blk = blk_new_with_bs(qemu_opts_id(opts), errp);
+        BlockBackendRootState *blk_rs;
+
+        blk = blk_new(qemu_opts_id(opts), errp);
         if (!blk) {
             goto early_err;
         }
 
-        bs = blk_bs(blk);
-        bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
-        bs->read_only = ro;
+        blk_rs = blk_get_root_state(blk);
+        blk_rs->open_flags    = bdrv_flags;
+        blk_rs->read_only     = !(bdrv_flags & BDRV_O_RDWR);
+        blk_rs->detect_zeroes = detect_zeroes;
+
+        if (throttle_enabled(&cfg)) {
+            if (!throttling_group) {
+                throttling_group = blk_name(blk);
+            }
+            blk_rs->throttle_group = g_strdup(throttling_group);
+            blk_rs->throttle_state = throttle_group_incref(throttling_group);
+            blk_rs->throttle_state->cfg = cfg;
+        }
 
         QDECREF(bs_opts);
     } else {
@@ -527,46 +578,30 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
             file = NULL;
         }
 
-        if (snapshot) {
-            /* always use cache=unsafe with snapshot */
-            bdrv_flags &= ~BDRV_O_CACHE_MASK;
-            bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
-        }
-
-        if (copy_on_read) {
-            bdrv_flags |= BDRV_O_COPY_ON_READ;
-        }
-
-        if (runstate_check(RUN_STATE_INMIGRATE)) {
-            bdrv_flags |= BDRV_O_INCOMING;
-        }
-
-        bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
-
         blk = blk_new_open(qemu_opts_id(opts), file, NULL, bs_opts, bdrv_flags,
                            errp);
         if (!blk) {
             goto err_no_bs_opts;
         }
         bs = blk_bs(blk);
-    }
 
-    bs->detect_zeroes = detect_zeroes;
+        bs->detect_zeroes = detect_zeroes;
 
-    bdrv_set_on_error(bs, on_read_error, on_write_error);
+        /* disk I/O throttling */
+        if (throttle_enabled(&cfg)) {
+            if (!throttling_group) {
+                throttling_group = blk_name(blk);
+            }
+            bdrv_io_limits_enable(bs, throttling_group);
+            bdrv_set_io_limits(bs, &cfg);
+        }
 
-    /* disk I/O throttling */
-    if (throttle_enabled(&cfg)) {
-        if (!throttling_group) {
-            throttling_group = blk_name(blk);
+        if (bdrv_key_required(bs)) {
+            autostart = 0;
         }
-        bdrv_io_limits_enable(bs, throttling_group);
-        bdrv_set_io_limits(bs, &cfg);
     }
 
-    if (bdrv_key_required(bs)) {
-        autostart = 0;
-    }
+    blk_set_on_error(blk, on_read_error, on_write_error);
 
 err_no_bs_opts:
     qemu_opts_del(opts);
@@ -579,6 +614,54 @@ err_no_opts:
     return NULL;
 }
 
+static QemuOptsList qemu_root_bds_opts;
+
+/* Takes the ownership of bs_opts */
+static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
+{
+    BlockDriverState *bs;
+    QemuOpts *opts;
+    Error *local_error = NULL;
+    BlockdevDetectZeroesOptions detect_zeroes;
+    int ret;
+    int bdrv_flags = 0;
+
+    opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp);
+    if (!opts) {
+        goto fail;
+    }
+
+    qemu_opts_absorb_qdict(opts, bs_opts, &local_error);
+    if (local_error) {
+        error_propagate(errp, local_error);
+        goto fail;
+    }
+
+    extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL,
+                                    &detect_zeroes, &local_error);
+    if (local_error) {
+        error_propagate(errp, local_error);
+        goto fail;
+    }
+
+    bs = NULL;
+    ret = bdrv_open(&bs, NULL, NULL, bs_opts, bdrv_flags, errp);
+    if (ret < 0) {
+        goto fail_no_bs_opts;
+    }
+
+    bs->detect_zeroes = detect_zeroes;
+
+fail_no_bs_opts:
+    qemu_opts_del(opts);
+    return bs;
+
+fail:
+    qemu_opts_del(opts);
+    QDECREF(bs_opts);
+    return NULL;
+}
+
 static void qemu_opt_rename(QemuOpts *opts, const char *from, const char *to,
                             Error **errp)
 {
@@ -1042,6 +1125,10 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
             monitor_printf(mon, "Device '%s' not found\n", device);
             return;
         }
+        if (!blk_is_available(blk)) {
+            monitor_printf(mon, "Device '%s' has no medium\n", device);
+            return;
+        }
         ret = bdrv_commit(blk_bs(blk));
     }
     if (ret < 0) {
@@ -1121,7 +1208,9 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
                   "Device '%s' not found", device);
         return NULL;
     }
-    bs = blk_bs(blk);
+
+    aio_context = blk_get_aio_context(blk);
+    aio_context_acquire(aio_context);
 
     if (!has_id) {
         id = NULL;
@@ -1133,11 +1222,14 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
 
     if (!id && !name) {
         error_setg(errp, "Name or id must be provided");
-        return NULL;
+        goto out_aio_context;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out_aio_context;
+    }
+    bs = blk_bs(blk);
 
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
         goto out_aio_context;
@@ -1278,6 +1370,7 @@ typedef struct InternalSnapshotState {
     BlockDriverState *bs;
     AioContext *aio_context;
     QEMUSnapshotInfo sn;
+    bool created;
 } InternalSnapshotState;
 
 static void internal_snapshot_prepare(BlkTransactionState *common,
@@ -1311,16 +1404,19 @@ static void internal_snapshot_prepare(BlkTransactionState *common,
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
     /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
+    state->aio_context = blk_get_aio_context(blk);
     aio_context_acquire(state->aio_context);
 
-    if (!bdrv_is_inserted(bs)) {
+    if (!blk_is_available(blk)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         return;
     }
+    bs = blk_bs(blk);
+
+    state->bs = bs;
+    bdrv_drained_begin(bs);
 
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
         return;
@@ -1373,7 +1469,7 @@ static void internal_snapshot_prepare(BlkTransactionState *common,
     }
 
     /* 4. succeed, mark a snapshot is created */
-    state->bs = bs;
+    state->created = true;
 }
 
 static void internal_snapshot_abort(BlkTransactionState *common)
@@ -1384,7 +1480,7 @@ static void internal_snapshot_abort(BlkTransactionState *common)
     QEMUSnapshotInfo *sn = &state->sn;
     Error *local_error = NULL;
 
-    if (!bs) {
+    if (!state->created) {
         return;
     }
 
@@ -1405,6 +1501,9 @@ static void internal_snapshot_clean(BlkTransactionState *common)
                                              common, common);
 
     if (state->aio_context) {
+        if (state->bs) {
+            bdrv_drained_end(state->bs);
+        }
         aio_context_release(state->aio_context);
     }
 }
@@ -1477,6 +1576,7 @@ static void external_snapshot_prepare(BlkTransactionState *common,
     /* Acquire AioContext now so any threads operating on old_bs stop */
     state->aio_context = bdrv_get_aio_context(state->old_bs);
     aio_context_acquire(state->aio_context);
+    bdrv_drained_begin(state->old_bs);
 
     if (!bdrv_is_inserted(state->old_bs)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
@@ -1546,8 +1646,6 @@ static void external_snapshot_commit(BlkTransactionState *common)
      * don't want to abort all of them if one of them fails the reopen */
     bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
                 NULL);
-
-    aio_context_release(state->aio_context);
 }
 
 static void external_snapshot_abort(BlkTransactionState *common)
@@ -1557,7 +1655,14 @@ static void external_snapshot_abort(BlkTransactionState *common)
     if (state->new_bs) {
         bdrv_unref(state->new_bs);
     }
+}
+
+static void external_snapshot_clean(BlkTransactionState *common)
+{
+    ExternalSnapshotState *state =
+                             DO_UPCAST(ExternalSnapshotState, common, common);
     if (state->aio_context) {
+        bdrv_drained_end(state->old_bs);
         aio_context_release(state->aio_context);
     }
 }
@@ -1572,7 +1677,6 @@ typedef struct DriveBackupState {
 static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
 {
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
-    BlockDriverState *bs;
     BlockBackend *blk;
     DriveBackup *backup;
     Error *local_err = NULL;
@@ -1586,11 +1690,17 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
                   "Device '%s' not found", backup->device);
         return;
     }
-    bs = blk_bs(blk);
+
+    if (!blk_is_available(blk)) {
+        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+        return;
+    }
 
     /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
+    state->aio_context = blk_get_aio_context(blk);
     aio_context_acquire(state->aio_context);
+    bdrv_drained_begin(blk_bs(blk));
+    state->bs = blk_bs(blk);
 
     qmp_drive_backup(backup->device, backup->target,
                      backup->has_format, backup->format,
@@ -1606,7 +1716,6 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
         return;
     }
 
-    state->bs = bs;
     state->job = state->bs->job;
 }
 
@@ -1626,6 +1735,7 @@ static void drive_backup_clean(BlkTransactionState *common)
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
 
     if (state->aio_context) {
+        bdrv_drained_end(state->bs);
         aio_context_release(state->aio_context);
     }
 }
@@ -1641,8 +1751,7 @@ static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp)
 {
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
     BlockdevBackup *backup;
-    BlockDriverState *bs, *target;
-    BlockBackend *blk;
+    BlockBackend *blk, *target;
     Error *local_err = NULL;
 
     assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
@@ -1653,23 +1762,28 @@ static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp)
         error_setg(errp, "Device '%s' not found", backup->device);
         return;
     }
-    bs = blk_bs(blk);
 
-    blk = blk_by_name(backup->target);
-    if (!blk) {
+    if (!blk_is_available(blk)) {
+        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+        return;
+    }
+
+    target = blk_by_name(backup->target);
+    if (!target) {
         error_setg(errp, "Device '%s' not found", backup->target);
         return;
     }
-    target = blk_bs(blk);
 
     /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
-    if (state->aio_context != bdrv_get_aio_context(target)) {
+    state->aio_context = blk_get_aio_context(blk);
+    if (state->aio_context != blk_get_aio_context(target)) {
         state->aio_context = NULL;
         error_setg(errp, "Backup between two IO threads is not implemented");
         return;
     }
     aio_context_acquire(state->aio_context);
+    state->bs = blk_bs(blk);
+    bdrv_drained_begin(state->bs);
 
     qmp_blockdev_backup(backup->device, backup->target,
                         backup->sync,
@@ -1682,7 +1796,6 @@ static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp)
         return;
     }
 
-    state->bs = bs;
     state->job = state->bs->job;
 }
 
@@ -1702,6 +1815,7 @@ static void blockdev_backup_clean(BlkTransactionState *common)
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
 
     if (state->aio_context) {
+        bdrv_drained_end(state->bs);
         aio_context_release(state->aio_context);
     }
 }
@@ -1722,6 +1836,7 @@ static const BdrvActionOps actions[] = {
         .prepare  = external_snapshot_prepare,
         .commit   = external_snapshot_commit,
         .abort = external_snapshot_abort,
+        .clean = external_snapshot_clean,
     },
     [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
         .instance_size = sizeof(DriveBackupState),
@@ -1820,6 +1935,11 @@ static void eject_device(BlockBackend *blk, int force, Error **errp)
     BlockDriverState *bs = blk_bs(blk);
     AioContext *aio_context;
 
+    if (!bs) {
+        /* No medium inserted, so there is nothing to do */
+        return;
+    }
+
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
@@ -1886,7 +2006,8 @@ void qmp_block_passwd(bool has_device, const char *device,
 }
 
 /* Assumes AioContext is held */
-static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
+static void qmp_bdrv_open_encrypted(BlockDriverState **pbs,
+                                    const char *filename,
                                     int bdrv_flags, const char *format,
                                     const char *password, Error **errp)
 {
@@ -1899,13 +2020,13 @@ static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
         qdict_put(options, "driver", qstring_from_str(format));
     }
 
-    ret = bdrv_open(&bs, filename, NULL, options, bdrv_flags, &local_err);
+    ret = bdrv_open(pbs, filename, NULL, options, bdrv_flags, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         return;
     }
 
-    bdrv_add_key(bs, password, errp);
+    bdrv_add_key(*pbs, password, errp);
 }
 
 void qmp_change_blockdev(const char *device, const char *filename,
@@ -1915,6 +2036,7 @@ void qmp_change_blockdev(const char *device, const char *filename,
     BlockDriverState *bs;
     AioContext *aio_context;
     int bdrv_flags;
+    bool new_bs;
     Error *err = NULL;
 
     blk = blk_by_name(device);
@@ -1924,8 +2046,9 @@ void qmp_change_blockdev(const char *device, const char *filename,
         return;
     }
     bs = blk_bs(blk);
+    new_bs = !bs;
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
     eject_device(blk, 0, &err);
@@ -1934,10 +2057,21 @@ void qmp_change_blockdev(const char *device, const char *filename,
         goto out;
     }
 
-    bdrv_flags = bdrv_is_read_only(bs) ? 0 : BDRV_O_RDWR;
-    bdrv_flags |= bdrv_is_snapshot(bs) ? BDRV_O_SNAPSHOT : 0;
+    bdrv_flags = blk_is_read_only(blk) ? 0 : BDRV_O_RDWR;
+    bdrv_flags |= blk_get_root_state(blk)->open_flags & ~BDRV_O_RDWR;
 
-    qmp_bdrv_open_encrypted(bs, filename, bdrv_flags, format, NULL, errp);
+    qmp_bdrv_open_encrypted(&bs, filename, bdrv_flags, format, NULL, &err);
+    if (err) {
+        error_propagate(errp, err);
+        goto out;
+    }
+
+    if (new_bs) {
+        blk_insert_bs(blk, bs);
+        /* Has been sent automatically by bdrv_open() if blk_bs(blk) was not
+         * NULL */
+        blk_dev_change_media_cb(blk, true);
+    }
 
 out:
     aio_context_release(aio_context);
@@ -1977,7 +2111,15 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
                   "Device '%s' not found", device);
         return;
     }
+
+    aio_context = blk_get_aio_context(blk);
+    aio_context_acquire(aio_context);
+
     bs = blk_bs(blk);
+    if (!bs) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
 
     memset(&cfg, 0, sizeof(cfg));
     cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps;
@@ -2012,12 +2154,9 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
     }
 
     if (!check_throttle_config(&cfg, errp)) {
-        return;
+        goto out;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
     if (throttle_enabled(&cfg)) {
         /* Enable I/O limits if they're not enabled yet, otherwise
          * just update the throttling group. */
@@ -2033,6 +2172,7 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
         bdrv_io_limits_disable(bs);
     }
 
+out:
     aio_context_release(aio_context);
 }
 
@@ -2145,7 +2285,6 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
         error_report("Device '%s' not found", id);
         return;
     }
-    bs = blk_bs(blk);
 
     if (!blk_legacy_dinfo(blk)) {
         error_report("Deleting device added with blockdev-add"
@@ -2153,16 +2292,19 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
-        error_report_err(local_err);
-        aio_context_release(aio_context);
-        return;
-    }
+    bs = blk_bs(blk);
+    if (bs) {
+        if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
+            error_report_err(local_err);
+            aio_context_release(aio_context);
+            return;
+        }
 
-    bdrv_close(bs);
+        bdrv_close(bs);
+    }
 
     /* if we have a device attached to this BlockDriverState
      * then we need to make the drive anonymous until the device
@@ -2172,8 +2314,8 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
     if (blk_get_attached_dev(blk)) {
         blk_hide_on_behalf_of_hmp_drive_del(blk);
         /* Further I/O must not pause the guest */
-        bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
-                          BLOCKDEV_ON_ERROR_REPORT);
+        blk_set_on_error(blk, BLOCKDEV_ON_ERROR_REPORT,
+                         BLOCKDEV_ON_ERROR_REPORT);
     } else {
         blk_unref(blk);
     }
@@ -2295,11 +2437,16 @@ void qmp_block_stream(const char *device,
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
         goto out;
     }
@@ -2370,11 +2517,16 @@ void qmp_block_commit(const char *device,
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) {
         goto out;
     }
@@ -2480,17 +2632,17 @@ void qmp_drive_backup(const char *device, const char *target,
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
     /* Although backup_run has this check too, we need to use bs->drv below, so
      * do an early check redundantly. */
-    if (!bdrv_is_inserted(bs)) {
+    if (!blk_is_available(blk)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         goto out;
     }
+    bs = blk_bs(blk);
 
     if (!has_format) {
         format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
@@ -2587,7 +2739,7 @@ void qmp_blockdev_backup(const char *device, const char *target,
                          BlockdevOnError on_target_error,
                          Error **errp)
 {
-    BlockBackend *blk;
+    BlockBackend *blk, *target_blk;
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     Error *local_err = NULL;
@@ -2608,17 +2760,27 @@ void qmp_blockdev_backup(const char *device, const char *target,
         error_setg(errp, "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
-    blk = blk_by_name(target);
-    if (!blk) {
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
+    target_blk = blk_by_name(target);
+    if (!target_blk) {
         error_setg(errp, "Device '%s' not found", target);
         goto out;
     }
-    target_bs = blk_bs(blk);
+
+    if (!blk_is_available(target_blk)) {
+        error_setg(errp, "Device '%s' has no medium", target);
+        goto out;
+    }
+    target_bs = blk_bs(target_blk);
 
     bdrv_ref(target_bs);
     bdrv_set_aio_context(target_bs, aio_context);
@@ -2695,15 +2857,15 @@ void qmp_drive_mirror(const char *device, const char *target,
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
-    if (!bdrv_is_inserted(bs)) {
+    if (!blk_is_available(blk)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         goto out;
     }
+    bs = blk_bs(blk);
 
     if (!has_format) {
         format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
@@ -2833,17 +2995,22 @@ static BlockJob *find_block_job(const char *device, AioContext **aio_context,
     BlockBackend *blk;
     BlockDriverState *bs;
 
+    *aio_context = NULL;
+
     blk = blk_by_name(device);
     if (!blk) {
         goto notfound;
     }
-    bs = blk_bs(blk);
 
-    *aio_context = bdrv_get_aio_context(bs);
+    *aio_context = blk_get_aio_context(blk);
     aio_context_acquire(*aio_context);
 
+    if (!blk_is_available(blk)) {
+        goto notfound;
+    }
+    bs = blk_bs(blk);
+
     if (!bs->job) {
-        aio_context_release(*aio_context);
         goto notfound;
     }
 
@@ -2852,7 +3019,10 @@ static BlockJob *find_block_job(const char *device, AioContext **aio_context,
 notfound:
     error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
               "No active block job on device '%s'", device);
-    *aio_context = NULL;
+    if (*aio_context) {
+        aio_context_release(*aio_context);
+        *aio_context = NULL;
+    }
     return NULL;
 }
 
@@ -2959,11 +3129,16 @@ void qmp_change_backing_file(const char *device,
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
     image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -3030,17 +3205,12 @@ out:
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
     QmpOutputVisitor *ov = qmp_output_visitor_new();
-    BlockBackend *blk;
+    BlockDriverState *bs;
+    BlockBackend *blk = NULL;
     QObject *obj;
     QDict *qdict;
     Error *local_err = NULL;
 
-    /* Require an ID in the top level */
-    if (!options->has_id) {
-        error_setg(errp, "Block device needs an ID");
-        goto fail;
-    }
-
     /* TODO Sort it out in raw-posix and drive_new(): Reject aio=native with
      * cache.direct=false instead of silently switching to aio=threads, except
      * when called from drive_new().
@@ -3068,14 +3238,33 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 
     qdict_flatten(qdict);
 
-    blk = blockdev_init(NULL, qdict, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        goto fail;
+    if (options->has_id) {
+        blk = blockdev_init(NULL, qdict, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            goto fail;
+        }
+
+        bs = blk_bs(blk);
+    } else {
+        if (!qdict_get_try_str(qdict, "node-name")) {
+            error_setg(errp, "'id' and/or 'node-name' need to be specified for "
+                       "the root node");
+            goto fail;
+        }
+
+        bs = bds_tree_init(qdict, errp);
+        if (!bs) {
+            goto fail;
+        }
     }
 
-    if (bdrv_key_required(blk_bs(blk))) {
-        blk_unref(blk);
+    if (bs && bdrv_key_required(bs)) {
+        if (blk) {
+            blk_unref(blk);
+        } else {
+            bdrv_unref(bs);
+        }
         error_setg(errp, "blockdev-add doesn't support encrypted devices");
         goto fail;
     }
@@ -3220,6 +3409,47 @@ QemuOptsList qemu_common_drive_opts = {
     },
 };
 
+static QemuOptsList qemu_root_bds_opts = {
+    .name = "root-bds",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
+    .desc = {
+        {
+            .name = "discard",
+            .type = QEMU_OPT_STRING,
+            .help = "discard operation (ignore/off, unmap/on)",
+        },{
+            .name = "cache.writeback",
+            .type = QEMU_OPT_BOOL,
+            .help = "enables writeback mode for any caches",
+        },{
+            .name = "cache.direct",
+            .type = QEMU_OPT_BOOL,
+            .help = "enables use of O_DIRECT (bypass the host page cache)",
+        },{
+            .name = "cache.no-flush",
+            .type = QEMU_OPT_BOOL,
+            .help = "ignore any flush requests for the device",
+        },{
+            .name = "aio",
+            .type = QEMU_OPT_STRING,
+            .help = "host AIO implementation (threads, native)",
+        },{
+            .name = "read-only",
+            .type = QEMU_OPT_BOOL,
+            .help = "open drive file as read-only",
+        },{
+            .name = "copy-on-read",
+            .type = QEMU_OPT_BOOL,
+            .help = "copy read data from backing file into image file",
+        },{
+            .name = "detect-zeroes",
+            .type = QEMU_OPT_STRING,
+            .help = "try to optimize zero writes (off, on, unmap)",
+        },
+        { /* end of list */ }
+    },
+};
+
 QemuOptsList qemu_drive_opts = {
     .name = "drive",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
diff --git a/blockjob.c b/blockjob.c
index 1da5491228..c02fe598b8 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -29,6 +29,7 @@
 #include "block/block.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
 #include "qemu/coroutine.h"
@@ -354,8 +355,8 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
         job->user_paused = true;
         block_job_pause(job);
         block_job_iostatus_set_err(job, error);
-        if (bs != job->bs) {
-            bdrv_iostatus_set_err(bs, error);
+        if (bs->blk && bs != job->bs) {
+            blk_iostatus_set_err(bs->blk, error);
         }
     }
     return action;
diff --git a/configure b/configure
index 211bc6ef36..7a1d08dc4a 100755
--- a/configure
+++ b/configure
@@ -331,6 +331,8 @@ gtkabi=""
 gtk_gl="no"
 gnutls=""
 gnutls_hash=""
+nettle=""
+gcrypt=""
 vte=""
 virglrenderer=""
 tpm="yes"
@@ -417,9 +419,6 @@ if test "$debug_info" = "yes"; then
     LDFLAGS="-g $LDFLAGS"
 fi
 
-test_cflags=""
-test_libs=""
-
 # make source path absolute
 source_path=`cd "$source_path"; pwd`
 
@@ -1114,6 +1113,14 @@ for opt do
   ;;
   --enable-gnutls) gnutls="yes"
   ;;
+  --disable-nettle) nettle="no"
+  ;;
+  --enable-nettle) nettle="yes"
+  ;;
+  --disable-gcrypt) gcrypt="no"
+  ;;
+  --enable-gcrypt) gcrypt="yes"
+  ;;
   --enable-rdma) rdma="yes"
   ;;
   --disable-rdma) rdma="no"
@@ -1324,6 +1331,8 @@ disabled with --disable-FEATURE, default is enabled if available:
   sparse          sparse checker
 
   gnutls          GNUTLS cryptography support
+  nettle          nettle cryptography support
+  gcrypt          libgcrypt cryptography support
   sdl             SDL UI
   --with-sdlabi     select preferred SDL ABI 1.2 or 2.0
   gtk             gtk UI
@@ -2254,20 +2263,76 @@ else
     gnutls_hash="no"
 fi
 
-if test "$gnutls_gcrypt" != "no"; then
-    if has "libgcrypt-config"; then
+
+# If user didn't give a --disable/enable-gcrypt flag,
+# then mark as disabled if user requested nettle
+# explicitly, or if gnutls links to nettle
+if test -z "$gcrypt"
+then
+    if test "$nettle" = "yes" || test "$gnutls_nettle" = "yes"
+    then
+        gcrypt="no"
+    fi
+fi
+
+# If user didn't give a --disable/enable-nettle flag,
+# then mark as disabled if user requested gcrypt
+# explicitly, or if gnutls links to gcrypt
+if test -z "$nettle"
+then
+    if test "$gcrypt" = "yes" || test "$gnutls_gcrypt" = "yes"
+    then
+        nettle="no"
+    fi
+fi
+
+has_libgcrypt_config() {
+    if ! has "libgcrypt-config"
+    then
+	return 1
+    fi
+
+    if test -n "$cross_prefix"
+    then
+	host=`libgcrypt-config --host`
+	if test "$host-" != $cross_prefix
+	then
+	    return 1
+	fi
+    fi
+
+    return 0
+}
+
+if test "$gcrypt" != "no"; then
+    if has_libgcrypt_config; then
         gcrypt_cflags=`libgcrypt-config --cflags`
         gcrypt_libs=`libgcrypt-config --libs`
+        # Debian has remove -lgpg-error from libgcrypt-config
+        # as it "spreads unnecessary dependencies" which in
+        # turn breaks static builds...
+        if test "$static" = "yes"
+        then
+            gcrypt_libs="$gcrypt_libs -lgpg-error"
+        fi
         libs_softmmu="$gcrypt_libs $libs_softmmu"
         libs_tools="$gcrypt_libs $libs_tools"
         QEMU_CFLAGS="$QEMU_CFLAGS $gcrypt_cflags"
+        gcrypt="yes"
+        if test -z "$nettle"; then
+           nettle="no"
+        fi
     else
-        feature_not_found "gcrypt" "Install gcrypt devel"
+        if test "$gcrypt" = "yes"; then
+            feature_not_found "gcrypt" "Install gcrypt devel"
+        else
+            gcrypt="no"
+        fi
     fi
 fi
 
 
-if test "$gnutls_nettle" != "no"; then
+if test "$nettle" != "no"; then
     if $pkg_config --exists "nettle"; then
         nettle_cflags=`$pkg_config --cflags nettle`
         nettle_libs=`$pkg_config --libs nettle`
@@ -2275,20 +2340,30 @@ if test "$gnutls_nettle" != "no"; then
         libs_softmmu="$nettle_libs $libs_softmmu"
         libs_tools="$nettle_libs $libs_tools"
         QEMU_CFLAGS="$QEMU_CFLAGS $nettle_cflags"
+        nettle="yes"
     else
-        feature_not_found "nettle" "Install nettle devel"
+        if test "$nettle" = "yes"; then
+            feature_not_found "nettle" "Install nettle devel"
+        else
+            nettle="no"
+        fi
     fi
 fi
 
+if test "$gcrypt" = "yes" && test "$nettle" = "yes"
+then
+    error_exit "Only one of gcrypt & nettle can be enabled"
+fi
+
 ##########################################
 # libtasn1 - only for the TLS creds/session test suite
 
 tasn1=yes
+tasn1_cflags=""
+tasn1_libs=""
 if $pkg_config --exists "libtasn1"; then
     tasn1_cflags=`$pkg_config --cflags libtasn1`
     tasn1_libs=`$pkg_config --libs libtasn1`
-    test_cflags="$test_cflags $tasn1_cflags"
-    test_libs="$test_libs $tasn1_libs"
 else
     tasn1=no
 fi
@@ -4433,6 +4508,7 @@ if test "$want_tools" = "yes" ; then
   tools="qemu-img\$(EXESUF) qemu-io\$(EXESUF) $tools"
   if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
     tools="qemu-nbd\$(EXESUF) $tools"
+    tools="ivshmem-client\$(EXESUF) ivshmem-server\$(EXESUF) $tools"
   fi
 fi
 if test "$softmmu" = yes ; then
@@ -4621,8 +4697,8 @@ echo "GTK support       $gtk"
 echo "GTK GL support    $gtk_gl"
 echo "GNUTLS support    $gnutls"
 echo "GNUTLS hash       $gnutls_hash"
-echo "GNUTLS gcrypt     $gnutls_gcrypt"
-echo "GNUTLS nettle     $gnutls_nettle ${gnutls_nettle+($nettle_version)}"
+echo "libgcrypt         $gcrypt"
+echo "nettle            $nettle ${nettle+($nettle_version)}"
 echo "libtasn1          $tasn1"
 echo "VTE support       $vte"
 echo "curses support    $curses"
@@ -4991,11 +5067,11 @@ fi
 if test "$gnutls_hash" = "yes" ; then
   echo "CONFIG_GNUTLS_HASH=y" >> $config_host_mak
 fi
-if test "$gnutls_gcrypt" = "yes" ; then
-  echo "CONFIG_GNUTLS_GCRYPT=y" >> $config_host_mak
+if test "$gcrypt" = "yes" ; then
+  echo "CONFIG_GCRYPT=y" >> $config_host_mak
 fi
-if test "$gnutls_nettle" = "yes" ; then
-  echo "CONFIG_GNUTLS_NETTLE=y" >> $config_host_mak
+if test "$nettle" = "yes" ; then
+  echo "CONFIG_NETTLE=y" >> $config_host_mak
   echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak
 fi
 if test "$tasn1" = "yes" ; then
@@ -5330,8 +5406,8 @@ echo "EXESUF=$EXESUF" >> $config_host_mak
 echo "DSOSUF=$DSOSUF" >> $config_host_mak
 echo "LDFLAGS_SHARED=$LDFLAGS_SHARED" >> $config_host_mak
 echo "LIBS_QGA+=$libs_qga" >> $config_host_mak
-echo "TEST_LIBS=$test_libs" >> $config_host_mak
-echo "TEST_CFLAGS=$test_cflags" >> $config_host_mak
+echo "TASN1_LIBS=$tasn1_libs" >> $config_host_mak
+echo "TASN1_CFLAGS=$tasn1_cflags" >> $config_host_mak
 echo "POD2MAN=$POD2MAN" >> $config_host_mak
 echo "TRANSLATE_OPT_CFLAGS=$TRANSLATE_OPT_CFLAGS" >> $config_host_mak
 if test "$gcov" = "yes" ; then
diff --git a/contrib/ivshmem-client/Makefile.objs b/contrib/ivshmem-client/Makefile.objs
new file mode 100644
index 0000000000..bfab2d20dd
--- /dev/null
+++ b/contrib/ivshmem-client/Makefile.objs
@@ -0,0 +1 @@
+ivshmem-client-obj-y = ivshmem-client.o main.o
diff --git a/contrib/ivshmem-client/ivshmem-client.c b/contrib/ivshmem-client/ivshmem-client.c
new file mode 100644
index 0000000000..31619d80e9
--- /dev/null
+++ b/contrib/ivshmem-client/ivshmem-client.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+
+#include "ivshmem-client.h"
+
+/* log a message on stdout if verbose=1 */
+#define IVSHMEM_CLIENT_DEBUG(client, fmt, ...) do { \
+        if ((client)->verbose) {         \
+            printf(fmt, ## __VA_ARGS__); \
+        }                                \
+    } while (0)
+
+/* read message from the unix socket */
+static int
+ivshmem_client_read_one_msg(IvshmemClient *client, int64_t *index, int *fd)
+{
+    int ret;
+    struct msghdr msg;
+    struct iovec iov[1];
+    union {
+        struct cmsghdr cmsg;
+        char control[CMSG_SPACE(sizeof(int))];
+    } msg_control;
+    struct cmsghdr *cmsg;
+
+    iov[0].iov_base = index;
+    iov[0].iov_len = sizeof(*index);
+
+    memset(&msg, 0, sizeof(msg));
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 1;
+    msg.msg_control = &msg_control;
+    msg.msg_controllen = sizeof(msg_control);
+
+    ret = recvmsg(client->sock_fd, &msg, 0);
+    if (ret < sizeof(*index)) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read message: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+    if (ret == 0) {
+        IVSHMEM_CLIENT_DEBUG(client, "lost connection to server\n");
+        return -1;
+    }
+
+    *index = GINT64_FROM_LE(*index);
+    *fd = -1;
+
+    for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+
+        if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) ||
+            cmsg->cmsg_level != SOL_SOCKET ||
+            cmsg->cmsg_type != SCM_RIGHTS) {
+            continue;
+        }
+
+        memcpy(fd, CMSG_DATA(cmsg), sizeof(*fd));
+    }
+
+    return 0;
+}
+
+/* free a peer when the server advertises a disconnection or when the
+ * client is freed */
+static void
+ivshmem_client_free_peer(IvshmemClient *client, IvshmemClientPeer *peer)
+{
+    unsigned vector;
+
+    QTAILQ_REMOVE(&client->peer_list, peer, next);
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        close(peer->vectors[vector]);
+    }
+
+    g_free(peer);
+}
+
+/* handle message coming from server (new peer, new vectors) */
+static int
+ivshmem_client_handle_server_msg(IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    int64_t peer_id;
+    int ret, fd;
+
+    ret = ivshmem_client_read_one_msg(client, &peer_id, &fd);
+    if (ret < 0) {
+        return -1;
+    }
+
+    /* can return a peer or the local client */
+    peer = ivshmem_client_search_peer(client, peer_id);
+
+    /* delete peer */
+    if (fd == -1) {
+
+        if (peer == NULL || peer == &client->local) {
+            IVSHMEM_CLIENT_DEBUG(client, "receive delete for invalid "
+                                 "peer %" PRId64 "\n", peer_id);
+            return -1;
+        }
+
+        IVSHMEM_CLIENT_DEBUG(client, "delete peer id = %" PRId64 "\n", peer_id);
+        ivshmem_client_free_peer(client, peer);
+        return 0;
+    }
+
+    /* new peer */
+    if (peer == NULL) {
+        peer = g_malloc0(sizeof(*peer));
+        peer->id = peer_id;
+        peer->vectors_count = 0;
+        QTAILQ_INSERT_TAIL(&client->peer_list, peer, next);
+        IVSHMEM_CLIENT_DEBUG(client, "new peer id = %" PRId64 "\n", peer_id);
+    }
+
+    /* new vector */
+    IVSHMEM_CLIENT_DEBUG(client, "  new vector %d (fd=%d) for peer id %"
+                         PRId64 "\n", peer->vectors_count, fd, peer->id);
+    if (peer->vectors_count >= G_N_ELEMENTS(peer->vectors)) {
+        IVSHMEM_CLIENT_DEBUG(client, "Too many vectors received, failing");
+        return -1;
+    }
+
+    peer->vectors[peer->vectors_count] = fd;
+    peer->vectors_count++;
+
+    return 0;
+}
+
+/* init a new ivshmem client */
+int
+ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
+                    IvshmemClientNotifCb notif_cb, void *notif_arg,
+                    bool verbose)
+{
+    int ret;
+    unsigned i;
+
+    memset(client, 0, sizeof(*client));
+
+    ret = snprintf(client->unix_sock_path, sizeof(client->unix_sock_path),
+                   "%s", unix_sock_path);
+
+    if (ret < 0 || ret >= sizeof(client->unix_sock_path)) {
+        IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
+        return -1;
+    }
+
+    for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
+        client->local.vectors[i] = -1;
+    }
+
+    QTAILQ_INIT(&client->peer_list);
+    client->local.id = -1;
+
+    client->notif_cb = notif_cb;
+    client->notif_arg = notif_arg;
+    client->verbose = verbose;
+    client->shm_fd = -1;
+    client->sock_fd = -1;
+
+    return 0;
+}
+
+/* create and connect to the unix socket */
+int
+ivshmem_client_connect(IvshmemClient *client)
+{
+    struct sockaddr_un sun;
+    int fd, ret;
+    int64_t tmp;
+
+    IVSHMEM_CLIENT_DEBUG(client, "connect to client %s\n",
+                         client->unix_sock_path);
+
+    client->sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (client->sock_fd < 0) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot create socket: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    sun.sun_family = AF_UNIX;
+    ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
+                   client->unix_sock_path);
+    if (ret < 0 || ret >= sizeof(sun.sun_path)) {
+        IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
+        goto err_close;
+    }
+
+    if (connect(client->sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot connect to %s: %s\n", sun.sun_path,
+                             strerror(errno));
+        goto err_close;
+    }
+
+    /* first, we expect a protocol version */
+    if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
+        (tmp != IVSHMEM_PROTOCOL_VERSION) || fd != -1) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read from server\n");
+        goto err_close;
+    }
+
+    /* then, we expect our index + a fd == -1 */
+    if (ivshmem_client_read_one_msg(client, &client->local.id, &fd) < 0 ||
+        client->local.id < 0 || fd != -1) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (2)\n");
+        goto err_close;
+    }
+    IVSHMEM_CLIENT_DEBUG(client, "our_id=%" PRId64 "\n", client->local.id);
+
+    /* now, we expect shared mem fd + a -1 index, note that shm fd
+     * is not used */
+    if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
+        tmp != -1 || fd < 0) {
+        if (fd >= 0) {
+            close(fd);
+        }
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (3)\n");
+        goto err_close;
+    }
+    client->shm_fd = fd;
+    IVSHMEM_CLIENT_DEBUG(client, "shm_fd=%d\n", fd);
+
+    return 0;
+
+err_close:
+    close(client->sock_fd);
+    client->sock_fd = -1;
+    return -1;
+}
+
+/* close connection to the server, and free all peer structures */
+void
+ivshmem_client_close(IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    unsigned i;
+
+    IVSHMEM_CLIENT_DEBUG(client, "close client\n");
+
+    while ((peer = QTAILQ_FIRST(&client->peer_list)) != NULL) {
+        ivshmem_client_free_peer(client, peer);
+    }
+
+    close(client->shm_fd);
+    client->shm_fd = -1;
+    close(client->sock_fd);
+    client->sock_fd = -1;
+    client->local.id = -1;
+    for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
+        close(client->local.vectors[i]);
+        client->local.vectors[i] = -1;
+    }
+    client->local.vectors_count = 0;
+}
+
+/* get the fd_set according to the unix socket and peer list */
+void
+ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds, int *maxfd)
+{
+    int fd;
+    unsigned vector;
+
+    FD_SET(client->sock_fd, fds);
+    if (client->sock_fd >= *maxfd) {
+        *maxfd = client->sock_fd + 1;
+    }
+
+    for (vector = 0; vector < client->local.vectors_count; vector++) {
+        fd = client->local.vectors[vector];
+        FD_SET(fd, fds);
+        if (fd >= *maxfd) {
+            *maxfd = fd + 1;
+        }
+    }
+}
+
+/* handle events from eventfd: just print a message on notification */
+static int
+ivshmem_client_handle_event(IvshmemClient *client, const fd_set *cur, int maxfd)
+{
+    IvshmemClientPeer *peer;
+    uint64_t kick;
+    unsigned i;
+    int ret;
+
+    peer = &client->local;
+
+    for (i = 0; i < peer->vectors_count; i++) {
+        if (peer->vectors[i] >= maxfd || !FD_ISSET(peer->vectors[i], cur)) {
+            continue;
+        }
+
+        ret = read(peer->vectors[i], &kick, sizeof(kick));
+        if (ret < 0) {
+            return ret;
+        }
+        if (ret != sizeof(kick)) {
+            IVSHMEM_CLIENT_DEBUG(client, "invalid read size = %d\n", ret);
+            errno = EINVAL;
+            return -1;
+        }
+        IVSHMEM_CLIENT_DEBUG(client, "received event on fd %d vector %d: %"
+                             PRIu64 "\n", peer->vectors[i], i, kick);
+        if (client->notif_cb != NULL) {
+            client->notif_cb(client, peer, i, client->notif_arg);
+        }
+    }
+
+    return 0;
+}
+
+/* read and handle new messages on the given fd_set */
+int
+ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd)
+{
+    if (client->sock_fd < maxfd && FD_ISSET(client->sock_fd, fds) &&
+        ivshmem_client_handle_server_msg(client) < 0 && errno != EINTR) {
+        IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_server_msg() "
+                             "failed\n");
+        return -1;
+    } else if (ivshmem_client_handle_event(client, fds, maxfd) < 0 &&
+               errno != EINTR) {
+        IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_event() failed\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* send a notification on a vector of a peer */
+int
+ivshmem_client_notify(const IvshmemClient *client,
+                      const IvshmemClientPeer *peer, unsigned vector)
+{
+    uint64_t kick;
+    int fd;
+
+    if (vector >= peer->vectors_count) {
+        IVSHMEM_CLIENT_DEBUG(client, "invalid vector %u on peer %" PRId64 "\n",
+                             vector, peer->id);
+        return -1;
+    }
+    fd = peer->vectors[vector];
+    IVSHMEM_CLIENT_DEBUG(client, "notify peer %" PRId64
+                         " on vector %d, fd %d\n", peer->id, vector, fd);
+
+    kick = 1;
+    if (write(fd, &kick, sizeof(kick)) != sizeof(kick)) {
+        fprintf(stderr, "could not write to %d: %s\n", peer->vectors[vector],
+                strerror(errno));
+        return -1;
+    }
+    return 0;
+}
+
+/* send a notification to all vectors of a peer */
+int
+ivshmem_client_notify_all_vects(const IvshmemClient *client,
+                                const IvshmemClientPeer *peer)
+{
+    unsigned vector;
+    int ret = 0;
+
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        if (ivshmem_client_notify(client, peer, vector) < 0) {
+            ret = -1;
+        }
+    }
+
+    return ret;
+}
+
+/* send a notification to all peers */
+int
+ivshmem_client_notify_broadcast(const IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    int ret = 0;
+
+    QTAILQ_FOREACH(peer, &client->peer_list, next) {
+        if (ivshmem_client_notify_all_vects(client, peer) < 0) {
+            ret = -1;
+        }
+    }
+
+    return ret;
+}
+
+/* lookup peer from its id */
+IvshmemClientPeer *
+ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id)
+{
+    IvshmemClientPeer *peer;
+
+    if (peer_id == client->local.id) {
+        return &client->local;
+    }
+
+    QTAILQ_FOREACH(peer, &client->peer_list, next) {
+        if (peer->id == peer_id) {
+            return peer;
+        }
+    }
+    return NULL;
+}
+
+/* dump our info, the list of peers their vectors on stdout */
+void
+ivshmem_client_dump(const IvshmemClient *client)
+{
+    const IvshmemClientPeer *peer;
+    unsigned vector;
+
+    /* dump local infos */
+    peer = &client->local;
+    printf("our_id = %" PRId64 "\n", peer->id);
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        printf("  vector %d is enabled (fd=%d)\n", vector,
+               peer->vectors[vector]);
+    }
+
+    /* dump peers */
+    QTAILQ_FOREACH(peer, &client->peer_list, next) {
+        printf("peer_id = %" PRId64 "\n", peer->id);
+
+        for (vector = 0; vector < peer->vectors_count; vector++) {
+            printf("  vector %d is enabled (fd=%d)\n", vector,
+                   peer->vectors[vector]);
+        }
+    }
+}
diff --git a/contrib/ivshmem-client/ivshmem-client.h b/contrib/ivshmem-client/ivshmem-client.h
new file mode 100644
index 0000000000..3a4f809682
--- /dev/null
+++ b/contrib/ivshmem-client/ivshmem-client.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef _IVSHMEM_CLIENT_H_
+#define _IVSHMEM_CLIENT_H_
+
+/**
+ * This file provides helper to implement an ivshmem client. It is used
+ * on the host to ask QEMU to send an interrupt to an ivshmem PCI device in a
+ * guest. QEMU also implements an ivshmem client similar to this one, they both
+ * connect to an ivshmem server.
+ *
+ * A standalone ivshmem client based on this file is provided for debug/test
+ * purposes.
+ */
+
+#include <limits.h>
+#include <sys/select.h>
+
+#include "qemu/queue.h"
+#include "hw/misc/ivshmem.h"
+
+/**
+ * Maximum number of notification vectors supported by the client
+ */
+#define IVSHMEM_CLIENT_MAX_VECTORS 64
+
+/**
+ * Structure storing a peer
+ *
+ * Each time a client connects to an ivshmem server, it is advertised to
+ * all connected clients through the unix socket. When our ivshmem
+ * client receives a notification, it creates a IvshmemClientPeer
+ * structure to store the infos of this peer.
+ *
+ * This structure is also used to store the information of our own
+ * client in (IvshmemClient)->local.
+ */
+typedef struct IvshmemClientPeer {
+    QTAILQ_ENTRY(IvshmemClientPeer) next;    /**< next in list*/
+    int64_t id;                              /**< the id of the peer */
+    int vectors[IVSHMEM_CLIENT_MAX_VECTORS]; /**< one fd per vector */
+    unsigned vectors_count;                  /**< number of vectors */
+} IvshmemClientPeer;
+QTAILQ_HEAD(IvshmemClientPeerList, IvshmemClientPeer);
+
+typedef struct IvshmemClientPeerList IvshmemClientPeerList;
+typedef struct IvshmemClient IvshmemClient;
+
+/**
+ * Typedef of callback function used when our IvshmemClient receives a
+ * notification from a peer.
+ */
+typedef void (*IvshmemClientNotifCb)(
+    const IvshmemClient *client,
+    const IvshmemClientPeer *peer,
+    unsigned vect, void *arg);
+
+/**
+ * Structure describing an ivshmem client
+ *
+ * This structure stores all information related to our client: the name
+ * of the server unix socket, the list of peers advertised by the
+ * server, our own client information, and a pointer the notification
+ * callback function used when we receive a notification from a peer.
+ */
+struct IvshmemClient {
+    char unix_sock_path[PATH_MAX];      /**< path to unix sock */
+    int sock_fd;                        /**< unix sock filedesc */
+    int shm_fd;                         /**< shm file descriptor */
+
+    IvshmemClientPeerList peer_list;    /**< list of peers */
+    IvshmemClientPeer local;            /**< our own infos */
+
+    IvshmemClientNotifCb notif_cb;      /**< notification callback */
+    void *notif_arg;                    /**< notification argument */
+
+    bool verbose;                       /**< true to enable debug */
+};
+
+/**
+ * Initialize an ivshmem client
+ *
+ * @client:         A pointer to an uninitialized IvshmemClient structure
+ * @unix_sock_path: The pointer to the unix socket file name
+ * @notif_cb:       If not NULL, the pointer to the function to be called when
+ *                  our IvshmemClient receives a notification from a peer
+ * @notif_arg:      Opaque pointer given as-is to the notification callback
+ *                  function
+ * @verbose:        True to enable debug
+ *
+ * Returns:         0 on success, or a negative value on error
+ */
+int ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
+                        IvshmemClientNotifCb notif_cb, void *notif_arg,
+                        bool verbose);
+
+/**
+ * Connect to the server
+ *
+ * Connect to the server unix socket, and read the first initial
+ * messages sent by the server, giving the ID of the client and the file
+ * descriptor of the shared memory.
+ *
+ * @client: The ivshmem client
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_connect(IvshmemClient *client);
+
+/**
+ * Close connection to the server and free all peer structures
+ *
+ * @client: The ivshmem client
+ */
+void ivshmem_client_close(IvshmemClient *client);
+
+/**
+ * Fill a fd_set with file descriptors to be monitored
+ *
+ * This function will fill a fd_set with all file descriptors
+ * that must be polled (unix server socket and peers eventfd). The
+ * function will not initialize the fd_set, it is up to the caller
+ * to do this.
+ *
+ * @client: The ivshmem client
+ * @fds:    The fd_set to be updated
+ * @maxfd:  Must be set to the max file descriptor + 1 in fd_set. This value is
+ *          updated if this function adds a greater fd in fd_set.
+ */
+void ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds,
+                            int *maxfd);
+
+/**
+ * Read and handle new messages
+ *
+ * Given a fd_set filled by select(), handle incoming messages from
+ * server or peers.
+ *
+ * @client: The ivshmem client
+ * @fds:    The fd_set containing the file descriptors to be checked. Note
+ *          that file descriptors that are not related to our client are
+ *          ignored.
+ * @maxfd:  The maximum fd in fd_set, plus one.
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd);
+
+/**
+ * Send a notification to a vector of a peer
+ *
+ * @client: The ivshmem client
+ * @peer:   The peer to be notified
+ * @vector: The number of the vector
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_notify(const IvshmemClient *client,
+                          const IvshmemClientPeer *peer, unsigned vector);
+
+/**
+ * Send a notification to all vectors of a peer
+ *
+ * @client: The ivshmem client
+ * @peer:   The peer to be notified
+ *
+ * Returns: 0 on success, or a negative value on error (at least one
+ *          notification failed)
+ */
+int ivshmem_client_notify_all_vects(const IvshmemClient *client,
+                                    const IvshmemClientPeer *peer);
+
+/**
+ * Broadcat a notification to all vectors of all peers
+ *
+ * @client: The ivshmem client
+ *
+ * Returns: 0 on success, or a negative value on error (at least one
+ *          notification failed)
+ */
+int ivshmem_client_notify_broadcast(const IvshmemClient *client);
+
+/**
+ * Search a peer from its identifier
+ *
+ * Return the peer structure from its peer_id. If the given peer_id is
+ * the local id, the function returns the local peer structure.
+ *
+ * @client:  The ivshmem client
+ * @peer_id: The identifier of the peer structure
+ *
+ * Returns:  The peer structure, or NULL if not found
+ */
+IvshmemClientPeer *
+ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id);
+
+/**
+ * Dump information of this ivshmem client on stdout
+ *
+ * Dump the id and the vectors of the given ivshmem client and the list
+ * of its peers and their vectors on stdout.
+ *
+ * @client: The ivshmem client
+ */
+void ivshmem_client_dump(const IvshmemClient *client);
+
+#endif /* _IVSHMEM_CLIENT_H_ */
diff --git a/contrib/ivshmem-client/main.c b/contrib/ivshmem-client/main.c
new file mode 100644
index 0000000000..c004870ae3
--- /dev/null
+++ b/contrib/ivshmem-client/main.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu-common.h"
+
+#include "ivshmem-client.h"
+
+#define IVSHMEM_CLIENT_DEFAULT_VERBOSE        0
+#define IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
+
+typedef struct IvshmemClientArgs {
+    bool verbose;
+    const char *unix_sock_path;
+} IvshmemClientArgs;
+
+/* show ivshmem_client_usage and exit with given error code */
+static void
+ivshmem_client_usage(const char *name, int code)
+{
+    fprintf(stderr, "%s [opts]\n", name);
+    fprintf(stderr, "  -h: show this help\n");
+    fprintf(stderr, "  -v: verbose mode\n");
+    fprintf(stderr, "  -S <unix_sock_path>: path to the unix socket\n"
+                    "     to connect to.\n"
+                    "     default=%s\n", IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH);
+    exit(code);
+}
+
+/* parse the program arguments, exit on error */
+static void
+ivshmem_client_parse_args(IvshmemClientArgs *args, int argc, char *argv[])
+{
+    int c;
+
+    while ((c = getopt(argc, argv,
+                       "h"  /* help */
+                       "v"  /* verbose */
+                       "S:" /* unix_sock_path */
+                      )) != -1) {
+
+        switch (c) {
+        case 'h': /* help */
+            ivshmem_client_usage(argv[0], 0);
+            break;
+
+        case 'v': /* verbose */
+            args->verbose = 1;
+            break;
+
+        case 'S': /* unix_sock_path */
+            args->unix_sock_path = optarg;
+            break;
+
+        default:
+            ivshmem_client_usage(argv[0], 1);
+            break;
+        }
+    }
+}
+
+/* show command line help */
+static void
+ivshmem_client_cmdline_help(void)
+{
+    printf("dump: dump peers (including us)\n"
+           "int <peer> <vector>: notify one vector on a peer\n"
+           "int <peer> all: notify all vectors of a peer\n"
+           "int all: notify all vectors of all peers (excepting us)\n");
+}
+
+/* read stdin and handle commands */
+static int
+ivshmem_client_handle_stdin_command(IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    char buf[128];
+    char *s, *token;
+    int ret;
+    int peer_id, vector;
+
+    memset(buf, 0, sizeof(buf));
+    ret = read(0, buf, sizeof(buf) - 1);
+    if (ret < 0) {
+        return -1;
+    }
+
+    s = buf;
+    while ((token = strsep(&s, "\n\r;")) != NULL) {
+        if (!strcmp(token, "")) {
+            continue;
+        }
+        if (!strcmp(token, "?")) {
+            ivshmem_client_cmdline_help();
+        }
+        if (!strcmp(token, "help")) {
+            ivshmem_client_cmdline_help();
+        } else if (!strcmp(token, "dump")) {
+            ivshmem_client_dump(client);
+        } else if (!strcmp(token, "int all")) {
+            ivshmem_client_notify_broadcast(client);
+        } else if (sscanf(token, "int %d %d", &peer_id, &vector) == 2) {
+            peer = ivshmem_client_search_peer(client, peer_id);
+            if (peer == NULL) {
+                printf("cannot find peer_id = %d\n", peer_id);
+                continue;
+            }
+            ivshmem_client_notify(client, peer, vector);
+        } else if (sscanf(token, "int %d all", &peer_id) == 1) {
+            peer = ivshmem_client_search_peer(client, peer_id);
+            if (peer == NULL) {
+                printf("cannot find peer_id = %d\n", peer_id);
+                continue;
+            }
+            ivshmem_client_notify_all_vects(client, peer);
+        } else {
+            printf("invalid command, type help\n");
+        }
+    }
+
+    printf("cmd> ");
+    fflush(stdout);
+    return 0;
+}
+
+/* listen on stdin (command line), on unix socket (notifications of new
+ * and dead peers), and on eventfd (IRQ request) */
+static int
+ivshmem_client_poll_events(IvshmemClient *client)
+{
+    fd_set fds;
+    int ret, maxfd;
+
+    while (1) {
+
+        FD_ZERO(&fds);
+        FD_SET(0, &fds); /* add stdin in fd_set */
+        maxfd = 1;
+
+        ivshmem_client_get_fds(client, &fds, &maxfd);
+
+        ret = select(maxfd, &fds, NULL, NULL, NULL);
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+
+            fprintf(stderr, "select error: %s\n", strerror(errno));
+            break;
+        }
+        if (ret == 0) {
+            continue;
+        }
+
+        if (FD_ISSET(0, &fds) &&
+            ivshmem_client_handle_stdin_command(client) < 0 && errno != EINTR) {
+            fprintf(stderr, "ivshmem_client_handle_stdin_command() failed\n");
+            break;
+        }
+
+        if (ivshmem_client_handle_fds(client, &fds, maxfd) < 0) {
+            fprintf(stderr, "ivshmem_client_handle_fds() failed\n");
+            break;
+        }
+    }
+
+    return ret;
+}
+
+/* callback when we receive a notification (just display it) */
+static void
+ivshmem_client_notification_cb(const IvshmemClient *client,
+                               const IvshmemClientPeer *peer,
+                               unsigned vect, void *arg)
+{
+    (void)client;
+    (void)arg;
+    printf("receive notification from peer_id=%" PRId64 " vector=%u\n",
+           peer->id, vect);
+}
+
+int
+main(int argc, char *argv[])
+{
+    struct sigaction sa;
+    IvshmemClient client;
+    IvshmemClientArgs args = {
+        .verbose = IVSHMEM_CLIENT_DEFAULT_VERBOSE,
+        .unix_sock_path = IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH,
+    };
+
+    /* parse arguments, will exit on error */
+    ivshmem_client_parse_args(&args, argc, argv);
+
+    /* Ignore SIGPIPE, see this link for more info:
+     * http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
+    sa.sa_handler = SIG_IGN;
+    sa.sa_flags = 0;
+    if (sigemptyset(&sa.sa_mask) == -1 ||
+        sigaction(SIGPIPE, &sa, 0) == -1) {
+        perror("failed to ignore SIGPIPE; sigaction");
+        return 1;
+    }
+
+    ivshmem_client_cmdline_help();
+    printf("cmd> ");
+    fflush(stdout);
+
+    if (ivshmem_client_init(&client, args.unix_sock_path,
+                            ivshmem_client_notification_cb, NULL,
+                            args.verbose) < 0) {
+        fprintf(stderr, "cannot init client\n");
+        return 1;
+    }
+
+    while (1) {
+        if (ivshmem_client_connect(&client) < 0) {
+            fprintf(stderr, "cannot connect to server, retry in 1 second\n");
+            sleep(1);
+            continue;
+        }
+
+        fprintf(stdout, "listen on server socket %d\n", client.sock_fd);
+
+        if (ivshmem_client_poll_events(&client) == 0) {
+            continue;
+        }
+
+        /* disconnected from server, reset all peers */
+        fprintf(stdout, "disconnected from server\n");
+
+        ivshmem_client_close(&client);
+    }
+
+    return 0;
+}
diff --git a/contrib/ivshmem-server/Makefile.objs b/contrib/ivshmem-server/Makefile.objs
new file mode 100644
index 0000000000..c060dd3698
--- /dev/null
+++ b/contrib/ivshmem-server/Makefile.objs
@@ -0,0 +1 @@
+ivshmem-server-obj-y = ivshmem-server.o main.o
diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c
new file mode 100644
index 0000000000..5e5239ce45
--- /dev/null
+++ b/contrib/ivshmem-server/ivshmem-server.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#ifdef CONFIG_LINUX
+#include <sys/vfs.h>
+#endif
+
+#include "ivshmem-server.h"
+
+/* log a message on stdout if verbose=1 */
+#define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \
+        if ((server)->verbose) {         \
+            printf(fmt, ## __VA_ARGS__); \
+        }                                \
+    } while (0)
+
+/** maximum size of a huge page, used by ivshmem_server_ftruncate() */
+#define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024)
+
+/** default listen backlog (number of sockets not accepted) */
+#define IVSHMEM_SERVER_LISTEN_BACKLOG 10
+
+/* send message to a client unix socket */
+static int
+ivshmem_server_send_one_msg(int sock_fd, int64_t peer_id, int fd)
+{
+    int ret;
+    struct msghdr msg;
+    struct iovec iov[1];
+    union {
+        struct cmsghdr cmsg;
+        char control[CMSG_SPACE(sizeof(int))];
+    } msg_control;
+    struct cmsghdr *cmsg;
+
+    peer_id = GINT64_TO_LE(peer_id);
+    iov[0].iov_base = &peer_id;
+    iov[0].iov_len = sizeof(peer_id);
+
+    memset(&msg, 0, sizeof(msg));
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 1;
+
+    /* if fd is specified, add it in a cmsg */
+    if (fd >= 0) {
+        memset(&msg_control, 0, sizeof(msg_control));
+        msg.msg_control = &msg_control;
+        msg.msg_controllen = sizeof(msg_control);
+        cmsg = CMSG_FIRSTHDR(&msg);
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+        cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+        memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
+    }
+
+    ret = sendmsg(sock_fd, &msg, 0);
+    if (ret <= 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+/* free a peer when the server advertises a disconnection or when the
+ * server is freed */
+static void
+ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer)
+{
+    unsigned vector;
+    IvshmemServerPeer *other_peer;
+
+    IVSHMEM_SERVER_DEBUG(server, "free peer %" PRId64 "\n", peer->id);
+    close(peer->sock_fd);
+    QTAILQ_REMOVE(&server->peer_list, peer, next);
+
+    /* advertise the deletion to other peers */
+    QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+        ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1);
+    }
+
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        event_notifier_cleanup(&peer->vectors[vector]);
+    }
+
+    g_free(peer);
+}
+
+/* send the peer id and the shm_fd just after a new client connection */
+static int
+ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer)
+{
+    int ret;
+
+    /* send our protocol version first */
+    ret = ivshmem_server_send_one_msg(peer->sock_fd, IVSHMEM_PROTOCOL_VERSION,
+                                      -1);
+    if (ret < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send version: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    /* send the peer id to the client */
+    ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1);
+    if (ret < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    /* send the shm_fd */
+    ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd);
+    if (ret < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    return 0;
+}
+
+/* handle message on listening unix socket (new client connection) */
+static int
+ivshmem_server_handle_new_conn(IvshmemServer *server)
+{
+    IvshmemServerPeer *peer, *other_peer;
+    struct sockaddr_un unaddr;
+    socklen_t unaddr_len;
+    int newfd;
+    unsigned i;
+
+    /* accept the incoming connection */
+    unaddr_len = sizeof(unaddr);
+    newfd = qemu_accept(server->sock_fd,
+                        (struct sockaddr *)&unaddr, &unaddr_len);
+
+    if (newfd < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n", strerror(errno));
+        return -1;
+    }
+
+    qemu_set_nonblock(newfd);
+    IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd);
+
+    /* allocate new structure for this peer */
+    peer = g_malloc0(sizeof(*peer));
+    peer->sock_fd = newfd;
+
+    /* get an unused peer id */
+    /* XXX: this could use id allocation such as Linux IDA, or simply
+     * a free-list */
+    for (i = 0; i < G_MAXUINT16; i++) {
+        if (ivshmem_server_search_peer(server, server->cur_id) == NULL) {
+            break;
+        }
+        server->cur_id++;
+    }
+    if (i == G_MAXUINT16) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n");
+        goto fail;
+    }
+    peer->id = server->cur_id++;
+
+    /* create eventfd, one per vector */
+    peer->vectors_count = server->n_vectors;
+    for (i = 0; i < peer->vectors_count; i++) {
+        if (event_notifier_init(&peer->vectors[i], FALSE) < 0) {
+            IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n");
+            goto fail;
+        }
+    }
+
+    /* send peer id and shm fd */
+    if (ivshmem_server_send_initial_info(server, peer) < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n");
+        goto fail;
+    }
+
+    /* advertise the new peer to others */
+    QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+        for (i = 0; i < peer->vectors_count; i++) {
+            ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id,
+                                        peer->vectors[i].wfd);
+        }
+    }
+
+    /* advertise the other peers to the new one */
+    QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+        for (i = 0; i < peer->vectors_count; i++) {
+            ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id,
+                                        other_peer->vectors[i].wfd);
+        }
+    }
+
+    /* advertise the new peer to itself */
+    for (i = 0; i < peer->vectors_count; i++) {
+        ivshmem_server_send_one_msg(peer->sock_fd, peer->id,
+                                    event_notifier_get_fd(&peer->vectors[i]));
+    }
+
+    QTAILQ_INSERT_TAIL(&server->peer_list, peer, next);
+    IVSHMEM_SERVER_DEBUG(server, "new peer id = %" PRId64 "\n",
+                         peer->id);
+    return 0;
+
+fail:
+    while (i--) {
+        event_notifier_cleanup(&peer->vectors[i]);
+    }
+    close(newfd);
+    g_free(peer);
+    return -1;
+}
+
+/* Try to ftruncate a file to next power of 2 of shmsize.
+ * If it fails; all power of 2 above shmsize are tested until
+ * we reach the maximum huge page size. This is useful
+ * if the shm file is in a hugetlbfs that cannot be truncated to the
+ * shm_size value. */
+static int
+ivshmem_server_ftruncate(int fd, unsigned shmsize)
+{
+    int ret;
+    struct stat mapstat;
+
+    /* align shmsize to next power of 2 */
+    shmsize = pow2ceil(shmsize);
+
+    if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) {
+        return 0;
+    }
+
+    while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) {
+        ret = ftruncate(fd, shmsize);
+        if (ret == 0) {
+            return ret;
+        }
+        shmsize *= 2;
+    }
+
+    return -1;
+}
+
+/* Init a new ivshmem server */
+int
+ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
+                    const char *shm_path, size_t shm_size, unsigned n_vectors,
+                    bool verbose)
+{
+    int ret;
+
+    memset(server, 0, sizeof(*server));
+    server->verbose = verbose;
+
+    ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path),
+                   "%s", unix_sock_path);
+    if (ret < 0 || ret >= sizeof(server->unix_sock_path)) {
+        IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
+        return -1;
+    }
+    ret = snprintf(server->shm_path, sizeof(server->shm_path),
+                   "%s", shm_path);
+    if (ret < 0 || ret >= sizeof(server->shm_path)) {
+        IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n");
+        return -1;
+    }
+
+    server->shm_size = shm_size;
+    server->n_vectors = n_vectors;
+
+    QTAILQ_INIT(&server->peer_list);
+
+    return 0;
+}
+
+#ifdef CONFIG_LINUX
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+static long gethugepagesize(const char *path)
+{
+    struct statfs fs;
+    int ret;
+
+    do {
+        ret = statfs(path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+        return -1;
+    }
+
+    if (fs.f_type != HUGETLBFS_MAGIC) {
+        return -1;
+    }
+
+    return fs.f_bsize;
+}
+#endif
+
+/* open shm, create and bind to the unix socket */
+int
+ivshmem_server_start(IvshmemServer *server)
+{
+    struct sockaddr_un sun;
+    int shm_fd, sock_fd, ret;
+
+    /* open shm file */
+#ifdef CONFIG_LINUX
+    long hpagesize;
+
+    hpagesize = gethugepagesize(server->shm_path);
+    if (hpagesize < 0 && errno != ENOENT) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n",
+                             server->shm_path, strerror(errno));
+    }
+
+    if (hpagesize > 0) {
+        gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
+        IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path);
+        shm_fd = mkstemp(filename);
+        unlink(filename);
+        g_free(filename);
+    } else
+#endif
+    {
+        IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
+                             server->shm_path);
+        shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
+    }
+
+    if (shm_fd < 0) {
+        fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
+                strerror(errno));
+        return -1;
+    }
+    if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) {
+        fprintf(stderr, "ftruncate(%s) failed: %s\n", server->shm_path,
+                strerror(errno));
+        goto err_close_shm;
+    }
+
+    IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n",
+                         server->unix_sock_path);
+
+    /* create the unix listening socket */
+    sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (sock_fd < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n",
+                             strerror(errno));
+        goto err_close_shm;
+    }
+
+    sun.sun_family = AF_UNIX;
+    ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
+                   server->unix_sock_path);
+    if (ret < 0 || ret >= sizeof(sun.sun_path)) {
+        IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
+        goto err_close_sock;
+    }
+    if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", sun.sun_path,
+                             strerror(errno));
+        goto err_close_sock;
+    }
+
+    if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n", strerror(errno));
+        goto err_close_sock;
+    }
+
+    server->sock_fd = sock_fd;
+    server->shm_fd = shm_fd;
+
+    return 0;
+
+err_close_sock:
+    close(sock_fd);
+err_close_shm:
+    close(shm_fd);
+    return -1;
+}
+
+/* close connections to clients, the unix socket and the shm fd */
+void
+ivshmem_server_close(IvshmemServer *server)
+{
+    IvshmemServerPeer *peer, *npeer;
+
+    IVSHMEM_SERVER_DEBUG(server, "close server\n");
+
+    QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) {
+        ivshmem_server_free_peer(server, peer);
+    }
+
+    unlink(server->unix_sock_path);
+    close(server->sock_fd);
+    close(server->shm_fd);
+    server->sock_fd = -1;
+    server->shm_fd = -1;
+}
+
+/* get the fd_set according to the unix socket and the peer list */
+void
+ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd)
+{
+    IvshmemServerPeer *peer;
+
+    if (server->sock_fd == -1) {
+        return;
+    }
+
+    FD_SET(server->sock_fd, fds);
+    if (server->sock_fd >= *maxfd) {
+        *maxfd = server->sock_fd + 1;
+    }
+
+    QTAILQ_FOREACH(peer, &server->peer_list, next) {
+        FD_SET(peer->sock_fd, fds);
+        if (peer->sock_fd >= *maxfd) {
+            *maxfd = peer->sock_fd + 1;
+        }
+    }
+}
+
+/* process incoming messages on the sockets in fd_set */
+int
+ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd)
+{
+    IvshmemServerPeer *peer, *peer_next;
+
+    if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) &&
+        ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) {
+        IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() "
+                             "failed\n");
+        return -1;
+    }
+
+    QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) {
+        /* any message from a peer socket result in a close() */
+        IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n", peer->sock_fd);
+        if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) {
+            ivshmem_server_free_peer(server, peer);
+        }
+    }
+
+    return 0;
+}
+
+/* lookup peer from its id */
+IvshmemServerPeer *
+ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id)
+{
+    IvshmemServerPeer *peer;
+
+    QTAILQ_FOREACH(peer, &server->peer_list, next) {
+        if (peer->id == peer_id) {
+            return peer;
+        }
+    }
+    return NULL;
+}
+
+/* dump our info, the list of peers their vectors on stdout */
+void
+ivshmem_server_dump(const IvshmemServer *server)
+{
+    const IvshmemServerPeer *peer;
+    unsigned vector;
+
+    /* dump peers */
+    QTAILQ_FOREACH(peer, &server->peer_list, next) {
+        printf("peer_id = %" PRId64 "\n", peer->id);
+
+        for (vector = 0; vector < peer->vectors_count; vector++) {
+            printf("  vector %d is enabled (fd=%d)\n", vector,
+                   event_notifier_get_fd(&peer->vectors[vector]));
+        }
+    }
+}
diff --git a/contrib/ivshmem-server/ivshmem-server.h b/contrib/ivshmem-server/ivshmem-server.h
new file mode 100644
index 0000000000..c9359a0a8a
--- /dev/null
+++ b/contrib/ivshmem-server/ivshmem-server.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef _IVSHMEM_SERVER_H_
+#define _IVSHMEM_SERVER_H_
+
+/**
+ * The ivshmem server is a daemon that creates a unix socket in listen
+ * mode. The ivshmem clients (qemu or ivshmem-client) connect to this
+ * unix socket. For each client, the server will create some eventfd
+ * (see EVENTFD(2)), one per vector. These fd are transmitted to all
+ * clients using the SCM_RIGHTS cmsg message. Therefore, each client is
+ * able to send a notification to another client without beeing
+ * "profixied" by the server.
+ *
+ * We use this mechanism to send interruptions between guests.
+ * qemu is able to transform an event on a eventfd into a PCI MSI-x
+ * interruption in the guest.
+ *
+ * The ivshmem server is also able to share the file descriptor
+ * associated to the ivshmem shared memory.
+ */
+
+#include <limits.h>
+#include <sys/select.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "hw/misc/ivshmem.h"
+
+/**
+ * Maximum number of notification vectors supported by the server
+ */
+#define IVSHMEM_SERVER_MAX_VECTORS 64
+
+/**
+ * Structure storing a peer
+ *
+ * Each time a client connects to an ivshmem server, a new
+ * IvshmemServerPeer structure is created. This peer and all its
+ * vectors are advertised to all connected clients through the connected
+ * unix sockets.
+ */
+typedef struct IvshmemServerPeer {
+    QTAILQ_ENTRY(IvshmemServerPeer) next;    /**< next in list*/
+    int sock_fd;                             /**< connected unix sock */
+    int64_t id;                              /**< the id of the peer */
+    EventNotifier vectors[IVSHMEM_SERVER_MAX_VECTORS]; /**< one per vector */
+    unsigned vectors_count;                  /**< number of vectors */
+} IvshmemServerPeer;
+QTAILQ_HEAD(IvshmemServerPeerList, IvshmemServerPeer);
+
+typedef struct IvshmemServerPeerList IvshmemServerPeerList;
+
+/**
+ * Structure describing an ivshmem server
+ *
+ * This structure stores all information related to our server: the name
+ * of the server unix socket and the list of connected peers.
+ */
+typedef struct IvshmemServer {
+    char unix_sock_path[PATH_MAX];   /**< path to unix socket */
+    int sock_fd;                     /**< unix sock file descriptor */
+    char shm_path[PATH_MAX];         /**< path to shm */
+    size_t shm_size;                 /**< size of shm */
+    int shm_fd;                      /**< shm file descriptor */
+    unsigned n_vectors;              /**< number of vectors */
+    uint16_t cur_id;                 /**< id to be given to next client */
+    bool verbose;                    /**< true in verbose mode */
+    IvshmemServerPeerList peer_list; /**< list of peers */
+} IvshmemServer;
+
+/**
+ * Initialize an ivshmem server
+ *
+ * @server:         A pointer to an uninitialized IvshmemServer structure
+ * @unix_sock_path: The pointer to the unix socket file name
+ * @shm_path:       Path to the shared memory. The path corresponds to a POSIX
+ *                  shm name or a hugetlbfs mount point.
+ * @shm_size:       Size of shared memory
+ * @n_vectors:      Number of interrupt vectors per client
+ * @verbose:        True to enable verbose mode
+ *
+ * Returns:         0 on success, or a negative value on error
+ */
+int
+ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
+                    const char *shm_path, size_t shm_size, unsigned n_vectors,
+                    bool verbose);
+
+/**
+ * Open the shm, then create and bind to the unix socket
+ *
+ * @server: The pointer to the initialized IvshmemServer structure
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_server_start(IvshmemServer *server);
+
+/**
+ * Close the server
+ *
+ * Close connections to all clients, close the unix socket and the
+ * shared memory file descriptor. The structure remains initialized, so
+ * it is possible to call ivshmem_server_start() again after a call to
+ * ivshmem_server_close().
+ *
+ * @server: The ivshmem server
+ */
+void ivshmem_server_close(IvshmemServer *server);
+
+/**
+ * Fill a fd_set with file descriptors to be monitored
+ *
+ * This function will fill a fd_set with all file descriptors that must
+ * be polled (unix server socket and peers unix socket). The function
+ * will not initialize the fd_set, it is up to the caller to do it.
+ *
+ * @server: The ivshmem server
+ * @fds:    The fd_set to be updated
+ * @maxfd:  Must be set to the max file descriptor + 1 in fd_set. This value is
+ *          updated if this function adds a greater fd in fd_set.
+ */
+void
+ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd);
+
+/**
+ * Read and handle new messages
+ *
+ * Given a fd_set (for instance filled by a call to select()), handle
+ * incoming messages from peers.
+ *
+ * @server: The ivshmem server
+ * @fds:    The fd_set containing the file descriptors to be checked. Note that
+ *          file descriptors that are not related to our server are ignored.
+ * @maxfd:  The maximum fd in fd_set, plus one.
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd);
+
+/**
+ * Search a peer from its identifier
+ *
+ * @server:  The ivshmem server
+ * @peer_id: The identifier of the peer structure
+ *
+ * Returns:  The peer structure, or NULL if not found
+ */
+IvshmemServerPeer *
+ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id);
+
+/**
+ * Dump information of this ivshmem server and its peers on stdout
+ *
+ * @server: The ivshmem server
+ */
+void ivshmem_server_dump(const IvshmemServer *server);
+
+#endif /* _IVSHMEM_SERVER_H_ */
diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c
new file mode 100644
index 0000000000..54ff001c23
--- /dev/null
+++ b/contrib/ivshmem-server/main.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu-common.h"
+
+#include "ivshmem-server.h"
+
+#define IVSHMEM_SERVER_DEFAULT_VERBOSE        0
+#define IVSHMEM_SERVER_DEFAULT_FOREGROUND     0
+#define IVSHMEM_SERVER_DEFAULT_PID_FILE       "/var/run/ivshmem-server.pid"
+#define IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
+#define IVSHMEM_SERVER_DEFAULT_SHM_PATH       "ivshmem"
+#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE       (4*1024*1024)
+#define IVSHMEM_SERVER_DEFAULT_N_VECTORS      1
+
+/* used to quit on signal SIGTERM */
+static int ivshmem_server_quit;
+
+/* arguments given by the user */
+typedef struct IvshmemServerArgs {
+    bool verbose;
+    bool foreground;
+    const char *pid_file;
+    const char *unix_socket_path;
+    const char *shm_path;
+    uint64_t shm_size;
+    unsigned n_vectors;
+} IvshmemServerArgs;
+
+/* show ivshmem_server_usage and exit with given error code */
+static void
+ivshmem_server_usage(const char *name, int code)
+{
+    fprintf(stderr, "%s [opts]\n", name);
+    fprintf(stderr, "  -h: show this help\n");
+    fprintf(stderr, "  -v: verbose mode\n");
+    fprintf(stderr, "  -F: foreground mode (default is to daemonize)\n");
+    fprintf(stderr, "  -p <pid_file>: path to the PID file (used in daemon\n"
+                    "     mode only).\n"
+                    "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
+    fprintf(stderr, "  -S <unix_socket_path>: path to the unix socket\n"
+                    "     to listen to.\n"
+                    "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
+    fprintf(stderr, "  -m <shm_path>: path to the shared memory.\n"
+                    "     The path corresponds to a POSIX shm name or a\n"
+                    "     hugetlbfs mount point.\n"
+                    "     default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
+    fprintf(stderr, "  -l <size>: size of shared memory in bytes. The suffix\n"
+                    "     K, M and G can be used (ex: 1K means 1024).\n"
+                    "     default=%u\n", IVSHMEM_SERVER_DEFAULT_SHM_SIZE);
+    fprintf(stderr, "  -n <n_vects>: number of vectors.\n"
+                    "     default=%u\n", IVSHMEM_SERVER_DEFAULT_N_VECTORS);
+
+    exit(code);
+}
+
+/* parse the program arguments, exit on error */
+static void
+ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
+{
+    int c;
+    unsigned long long v;
+    Error *errp = NULL;
+
+    while ((c = getopt(argc, argv,
+                       "h"  /* help */
+                       "v"  /* verbose */
+                       "F"  /* foreground */
+                       "p:" /* pid_file */
+                       "S:" /* unix_socket_path */
+                       "m:" /* shm_path */
+                       "l:" /* shm_size */
+                       "n:" /* n_vectors */
+                      )) != -1) {
+
+        switch (c) {
+        case 'h': /* help */
+            ivshmem_server_usage(argv[0], 0);
+            break;
+
+        case 'v': /* verbose */
+            args->verbose = 1;
+            break;
+
+        case 'F': /* foreground */
+            args->foreground = 1;
+            break;
+
+        case 'p': /* pid_file */
+            args->pid_file = optarg;
+            break;
+
+        case 'S': /* unix_socket_path */
+            args->unix_socket_path = optarg;
+            break;
+
+        case 'm': /* shm_path */
+            args->shm_path = optarg;
+            break;
+
+        case 'l': /* shm_size */
+            parse_option_size("shm_size", optarg, &args->shm_size, &errp);
+            if (errp) {
+                fprintf(stderr, "cannot parse shm size: %s\n",
+                        error_get_pretty(errp));
+                error_free(errp);
+                ivshmem_server_usage(argv[0], 1);
+            }
+            break;
+
+        case 'n': /* n_vectors */
+            if (parse_uint_full(optarg, &v, 0) < 0) {
+                fprintf(stderr, "cannot parse n_vectors\n");
+                ivshmem_server_usage(argv[0], 1);
+            }
+            args->n_vectors = v;
+            break;
+
+        default:
+            ivshmem_server_usage(argv[0], 1);
+            break;
+        }
+    }
+
+    if (args->n_vectors > IVSHMEM_SERVER_MAX_VECTORS) {
+        fprintf(stderr, "too many requested vectors (max is %d)\n",
+                IVSHMEM_SERVER_MAX_VECTORS);
+        ivshmem_server_usage(argv[0], 1);
+    }
+
+    if (args->verbose == 1 && args->foreground == 0) {
+        fprintf(stderr, "cannot use verbose in daemon mode\n");
+        ivshmem_server_usage(argv[0], 1);
+    }
+}
+
+/* wait for events on listening server unix socket and connected client
+ * sockets */
+static int
+ivshmem_server_poll_events(IvshmemServer *server)
+{
+    fd_set fds;
+    int ret = 0, maxfd;
+
+    while (!ivshmem_server_quit) {
+
+        FD_ZERO(&fds);
+        maxfd = 0;
+        ivshmem_server_get_fds(server, &fds, &maxfd);
+
+        ret = select(maxfd, &fds, NULL, NULL, NULL);
+
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+
+            fprintf(stderr, "select error: %s\n", strerror(errno));
+            break;
+        }
+        if (ret == 0) {
+            continue;
+        }
+
+        if (ivshmem_server_handle_fds(server, &fds, maxfd) < 0) {
+            fprintf(stderr, "ivshmem_server_handle_fds() failed\n");
+            break;
+        }
+    }
+
+    return ret;
+}
+
+static void
+ivshmem_server_quit_cb(int signum)
+{
+    ivshmem_server_quit = 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+    IvshmemServer server;
+    struct sigaction sa, sa_quit;
+    IvshmemServerArgs args = {
+        .verbose = IVSHMEM_SERVER_DEFAULT_VERBOSE,
+        .foreground = IVSHMEM_SERVER_DEFAULT_FOREGROUND,
+        .pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
+        .unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
+        .shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
+        .shm_size = IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
+        .n_vectors = IVSHMEM_SERVER_DEFAULT_N_VECTORS,
+    };
+    int ret = 1;
+
+    /* parse arguments, will exit on error */
+    ivshmem_server_parse_args(&args, argc, argv);
+
+    /* Ignore SIGPIPE, see this link for more info:
+     * http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
+    sa.sa_handler = SIG_IGN;
+    sa.sa_flags = 0;
+    if (sigemptyset(&sa.sa_mask) == -1 ||
+        sigaction(SIGPIPE, &sa, 0) == -1) {
+        perror("failed to ignore SIGPIPE; sigaction");
+        goto err;
+    }
+
+    sa_quit.sa_handler = ivshmem_server_quit_cb;
+    sa_quit.sa_flags = 0;
+    if (sigemptyset(&sa_quit.sa_mask) == -1 ||
+        sigaction(SIGTERM, &sa_quit, 0) == -1) {
+        perror("failed to add SIGTERM handler; sigaction");
+        goto err;
+    }
+
+    /* init the ivshms structure */
+    if (ivshmem_server_init(&server, args.unix_socket_path, args.shm_path,
+                            args.shm_size, args.n_vectors, args.verbose) < 0) {
+        fprintf(stderr, "cannot init server\n");
+        goto err;
+    }
+
+    /* start the ivshmem server (open shm & unix socket) */
+    if (ivshmem_server_start(&server) < 0) {
+        fprintf(stderr, "cannot bind\n");
+        goto err;
+    }
+
+    /* daemonize if asked to */
+    if (!args.foreground) {
+        FILE *fp;
+
+        if (qemu_daemon(1, 1) < 0) {
+            fprintf(stderr, "cannot daemonize: %s\n", strerror(errno));
+            goto err_close;
+        }
+
+        /* write pid file */
+        fp = fopen(args.pid_file, "w");
+        if (fp == NULL) {
+            fprintf(stderr, "cannot write pid file: %s\n", strerror(errno));
+            goto err_close;
+        }
+
+        fprintf(fp, "%d\n", (int) getpid());
+        fclose(fp);
+    }
+
+    ivshmem_server_poll_events(&server);
+    fprintf(stdout, "server disconnected\n");
+    ret = 0;
+
+err_close:
+    ivshmem_server_close(&server);
+err:
+    return ret;
+}
diff --git a/cpu-exec.c b/cpu-exec.c
index 8fd56a69e0..7eef0830fe 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -477,7 +477,8 @@ int cpu_exec(CPUState *cpu)
                 /* see if we can patch the calling TB. When the TB
                    spans two pages, we cannot safely do a direct
                    jump. */
-                if (next_tb != 0 && tb->page_addr[1] == -1) {
+                if (next_tb != 0 && tb->page_addr[1] == -1
+                    && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
                     tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
                                 next_tb & TB_EXIT_MASK, tb);
                 }
diff --git a/crypto/cipher-builtin.c b/crypto/cipher-builtin.c
index 30f4853c86..39e31a7f42 100644
--- a/crypto/cipher-builtin.c
+++ b/crypto/cipher-builtin.c
@@ -25,8 +25,7 @@ typedef struct QCryptoCipherBuiltinAES QCryptoCipherBuiltinAES;
 struct QCryptoCipherBuiltinAES {
     AES_KEY encrypt_key;
     AES_KEY decrypt_key;
-    uint8_t *iv;
-    size_t niv;
+    uint8_t iv[AES_BLOCK_SIZE];
 };
 typedef struct QCryptoCipherBuiltinDESRFB QCryptoCipherBuiltinDESRFB;
 struct QCryptoCipherBuiltinDESRFB {
@@ -40,6 +39,7 @@ struct QCryptoCipherBuiltin {
         QCryptoCipherBuiltinAES aes;
         QCryptoCipherBuiltinDESRFB desrfb;
     } state;
+    size_t blocksize;
     void (*free)(QCryptoCipher *cipher);
     int (*setiv)(QCryptoCipher *cipher,
                  const uint8_t *iv, size_t niv,
@@ -61,7 +61,6 @@ static void qcrypto_cipher_free_aes(QCryptoCipher *cipher)
 {
     QCryptoCipherBuiltin *ctxt = cipher->opaque;
 
-    g_free(ctxt->state.aes.iv);
     g_free(ctxt);
     cipher->opaque = NULL;
 }
@@ -145,15 +144,13 @@ static int qcrypto_cipher_setiv_aes(QCryptoCipher *cipher,
                                      Error **errp)
 {
     QCryptoCipherBuiltin *ctxt = cipher->opaque;
-    if (niv != 16) {
-        error_setg(errp, "IV must be 16 bytes not %zu", niv);
+    if (niv != AES_BLOCK_SIZE) {
+        error_setg(errp, "IV must be %d bytes not %zu",
+                   AES_BLOCK_SIZE, niv);
         return -1;
     }
 
-    g_free(ctxt->state.aes.iv);
-    ctxt->state.aes.iv = g_new0(uint8_t, niv);
-    memcpy(ctxt->state.aes.iv, iv, niv);
-    ctxt->state.aes.niv = niv;
+    memcpy(ctxt->state.aes.iv, iv, AES_BLOCK_SIZE);
 
     return 0;
 }
@@ -185,6 +182,7 @@ static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
         goto error;
     }
 
+    ctxt->blocksize = AES_BLOCK_SIZE;
     ctxt->free = qcrypto_cipher_free_aes;
     ctxt->setiv = qcrypto_cipher_setiv_aes;
     ctxt->encrypt = qcrypto_cipher_encrypt_aes;
@@ -286,6 +284,7 @@ static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
     memcpy(ctxt->state.desrfb.key, key, nkey);
     ctxt->state.desrfb.nkey = nkey;
 
+    ctxt->blocksize = 8;
     ctxt->free = qcrypto_cipher_free_des_rfb;
     ctxt->setiv = qcrypto_cipher_setiv_des_rfb;
     ctxt->encrypt = qcrypto_cipher_encrypt_des_rfb;
@@ -374,6 +373,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 {
     QCryptoCipherBuiltin *ctxt = cipher->opaque;
 
+    if (len % ctxt->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctxt->blocksize);
+        return -1;
+    }
+
     return ctxt->encrypt(cipher, in, out, len, errp);
 }
 
@@ -386,6 +391,12 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
 {
     QCryptoCipherBuiltin *ctxt = cipher->opaque;
 
+    if (len % ctxt->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctxt->blocksize);
+        return -1;
+    }
+
     return ctxt->decrypt(cipher, in, out, len, errp);
 }
 
diff --git a/crypto/cipher-gcrypt.c b/crypto/cipher-gcrypt.c
index 8cfc562500..c4f811487a 100644
--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c
@@ -34,6 +34,11 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
     }
 }
 
+typedef struct QCryptoCipherGcrypt QCryptoCipherGcrypt;
+struct QCryptoCipherGcrypt {
+    gcry_cipher_hd_t handle;
+    size_t blocksize;
+};
 
 QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
                                   QCryptoCipherMode mode,
@@ -41,7 +46,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
                                   Error **errp)
 {
     QCryptoCipher *cipher;
-    gcry_cipher_hd_t handle;
+    QCryptoCipherGcrypt *ctx;
     gcry_error_t err;
     int gcryalg, gcrymode;
 
@@ -87,7 +92,9 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
     cipher->alg = alg;
     cipher->mode = mode;
 
-    err = gcry_cipher_open(&handle, gcryalg, gcrymode, 0);
+    ctx = g_new0(QCryptoCipherGcrypt, 1);
+
+    err = gcry_cipher_open(&ctx->handle, gcryalg, gcrymode, 0);
     if (err != 0) {
         error_setg(errp, "Cannot initialize cipher: %s",
                    gcry_strerror(err));
@@ -100,10 +107,12 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
          * bizarre RFB variant of DES :-)
          */
         uint8_t *rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey);
-        err = gcry_cipher_setkey(handle, rfbkey, nkey);
+        err = gcry_cipher_setkey(ctx->handle, rfbkey, nkey);
         g_free(rfbkey);
+        ctx->blocksize = 8;
     } else {
-        err = gcry_cipher_setkey(handle, key, nkey);
+        err = gcry_cipher_setkey(ctx->handle, key, nkey);
+        ctx->blocksize = 16;
     }
     if (err != 0) {
         error_setg(errp, "Cannot set key: %s",
@@ -111,11 +120,12 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
         goto error;
     }
 
-    cipher->opaque = handle;
+    cipher->opaque = ctx;
     return cipher;
 
  error:
-    gcry_cipher_close(handle);
+    gcry_cipher_close(ctx->handle);
+    g_free(ctx);
     g_free(cipher);
     return NULL;
 }
@@ -123,12 +133,13 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
 
 void qcrypto_cipher_free(QCryptoCipher *cipher)
 {
-    gcry_cipher_hd_t handle;
+    QCryptoCipherGcrypt *ctx;
     if (!cipher) {
         return;
     }
-    handle = cipher->opaque;
-    gcry_cipher_close(handle);
+    ctx = cipher->opaque;
+    gcry_cipher_close(ctx->handle);
+    g_free(ctx);
     g_free(cipher);
 }
 
@@ -139,10 +150,16 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
                            size_t len,
                            Error **errp)
 {
-    gcry_cipher_hd_t handle = cipher->opaque;
+    QCryptoCipherGcrypt *ctx = cipher->opaque;
     gcry_error_t err;
 
-    err = gcry_cipher_encrypt(handle,
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
+    err = gcry_cipher_encrypt(ctx->handle,
                               out, len,
                               in, len);
     if (err != 0) {
@@ -161,10 +178,16 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
                            size_t len,
                            Error **errp)
 {
-    gcry_cipher_hd_t handle = cipher->opaque;
+    QCryptoCipherGcrypt *ctx = cipher->opaque;
     gcry_error_t err;
 
-    err = gcry_cipher_decrypt(handle,
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
+    err = gcry_cipher_decrypt(ctx->handle,
                               out, len,
                               in, len);
     if (err != 0) {
@@ -180,11 +203,17 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,
                          const uint8_t *iv, size_t niv,
                          Error **errp)
 {
-    gcry_cipher_hd_t handle = cipher->opaque;
+    QCryptoCipherGcrypt *ctx = cipher->opaque;
     gcry_error_t err;
 
-    gcry_cipher_reset(handle);
-    err = gcry_cipher_setiv(handle, iv, niv);
+    if (niv != ctx->blocksize) {
+        error_setg(errp, "Expected IV size %zu not %zu",
+                   ctx->blocksize, niv);
+        return -1;
+    }
+
+    gcry_cipher_reset(ctx->handle);
+    err = gcry_cipher_setiv(ctx->handle, iv, niv);
     if (err != 0) {
         error_setg(errp, "Cannot set IV: %s",
                    gcry_strerror(err));
diff --git a/crypto/cipher-nettle.c b/crypto/cipher-nettle.c
index b01cb1c857..7449338d3b 100644
--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c
@@ -69,7 +69,7 @@ struct QCryptoCipherNettle {
     nettle_cipher_func *alg_encrypt;
     nettle_cipher_func *alg_decrypt;
     uint8_t *iv;
-    size_t niv;
+    size_t blocksize;
 };
 
 bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
@@ -125,7 +125,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
         ctx->alg_encrypt = des_encrypt_wrapper;
         ctx->alg_decrypt = des_decrypt_wrapper;
 
-        ctx->niv = DES_BLOCK_SIZE;
+        ctx->blocksize = DES_BLOCK_SIZE;
         break;
 
     case QCRYPTO_CIPHER_ALG_AES_128:
@@ -140,14 +140,14 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
         ctx->alg_encrypt = aes_encrypt_wrapper;
         ctx->alg_decrypt = aes_decrypt_wrapper;
 
-        ctx->niv = AES_BLOCK_SIZE;
+        ctx->blocksize = AES_BLOCK_SIZE;
         break;
     default:
         error_setg(errp, "Unsupported cipher algorithm %d", alg);
         goto error;
     }
 
-    ctx->iv = g_new0(uint8_t, ctx->niv);
+    ctx->iv = g_new0(uint8_t, ctx->blocksize);
     cipher->opaque = ctx;
 
     return cipher;
@@ -184,6 +184,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 {
     QCryptoCipherNettle *ctx = cipher->opaque;
 
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
     switch (cipher->mode) {
     case QCRYPTO_CIPHER_MODE_ECB:
         ctx->alg_encrypt(ctx->ctx_encrypt, len, out, in);
@@ -191,7 +197,7 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 
     case QCRYPTO_CIPHER_MODE_CBC:
         cbc_encrypt(ctx->ctx_encrypt, ctx->alg_encrypt,
-                    ctx->niv, ctx->iv,
+                    ctx->blocksize, ctx->iv,
                     len, out, in);
         break;
     default:
@@ -211,6 +217,12 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
 {
     QCryptoCipherNettle *ctx = cipher->opaque;
 
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
     switch (cipher->mode) {
     case QCRYPTO_CIPHER_MODE_ECB:
         ctx->alg_decrypt(ctx->ctx_decrypt ? ctx->ctx_decrypt : ctx->ctx_encrypt,
@@ -219,7 +231,7 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
 
     case QCRYPTO_CIPHER_MODE_CBC:
         cbc_decrypt(ctx->ctx_decrypt ? ctx->ctx_decrypt : ctx->ctx_encrypt,
-                    ctx->alg_decrypt, ctx->niv, ctx->iv,
+                    ctx->alg_decrypt, ctx->blocksize, ctx->iv,
                     len, out, in);
         break;
     default:
@@ -235,9 +247,9 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,
                          Error **errp)
 {
     QCryptoCipherNettle *ctx = cipher->opaque;
-    if (niv != ctx->niv) {
+    if (niv != ctx->blocksize) {
         error_setg(errp, "Expected IV size %zu not %zu",
-                   ctx->niv, niv);
+                   ctx->blocksize, niv);
         return -1;
     }
     memcpy(ctx->iv, iv, niv);
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 024a00cb54..c8bd180532 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -47,7 +47,7 @@ qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg,
     return true;
 }
 
-#if defined(CONFIG_GNUTLS_GCRYPT) || defined(CONFIG_GNUTLS_NETTLE)
+#if defined(CONFIG_GCRYPT) || defined(CONFIG_NETTLE)
 static uint8_t *
 qcrypto_cipher_munge_des_rfb_key(const uint8_t *key,
                                  size_t nkey)
@@ -63,11 +63,11 @@ qcrypto_cipher_munge_des_rfb_key(const uint8_t *key,
     }
     return ret;
 }
-#endif /* CONFIG_GNUTLS_GCRYPT || CONFIG_GNUTLS_NETTLE */
+#endif /* CONFIG_GCRYPT || CONFIG_NETTLE */
 
-#ifdef CONFIG_GNUTLS_GCRYPT
+#ifdef CONFIG_GCRYPT
 #include "crypto/cipher-gcrypt.c"
-#elif defined CONFIG_GNUTLS_NETTLE
+#elif defined CONFIG_NETTLE
 #include "crypto/cipher-nettle.c"
 #else
 #include "crypto/cipher-builtin.c"
diff --git a/crypto/init.c b/crypto/init.c
index 7447882c7b..d94faacdf2 100644
--- a/crypto/init.c
+++ b/crypto/init.c
@@ -24,8 +24,9 @@
 #ifdef CONFIG_GNUTLS
 #include <gnutls/gnutls.h>
 #include <gnutls/crypto.h>
+#endif
 
-#ifdef CONFIG_GNUTLS_GCRYPT
+#ifdef CONFIG_GCRYPT
 #include <gcrypt.h>
 #endif
 
@@ -37,6 +38,7 @@
  *  - When GNUTLS >= 2.12, we must not initialize gcrypt threading
  *    because GNUTLS will do that itself
  *  - When GNUTLS < 2.12 we must always initialize gcrypt threading
+ *  - When GNUTLS is disabled we must always initialize gcrypt threading
  *
  * But....
  *
@@ -47,12 +49,15 @@
  *
  *   - gcrypt < 1.6.0
  * AND
- *   - gnutls < 2.12
+ *      - gnutls < 2.12
+ *   OR
+ *      - gnutls is disabled
  *
  */
 
-#if (defined(CONFIG_GNUTLS_GCRYPT) &&           \
-     (!defined(GNUTLS_VERSION_NUMBER) ||        \
+#if (defined(CONFIG_GCRYPT) &&                  \
+     (!defined(CONFIG_GNUTLS) ||                \
+      !defined(GNUTLS_VERSION_NUMBER) ||       \
       (GNUTLS_VERSION_NUMBER < 0x020c00)) &&    \
      (!defined(GCRYPT_VERSION_NUMBER) ||        \
       (GCRYPT_VERSION_NUMBER < 0x010600)))
@@ -113,6 +118,7 @@ static struct gcry_thread_cbs qcrypto_gcrypt_thread_impl = {
 
 int qcrypto_init(Error **errp)
 {
+#ifdef CONFIG_GNUTLS
     int ret;
     ret = gnutls_global_init();
     if (ret < 0) {
@@ -125,8 +131,9 @@ int qcrypto_init(Error **errp)
     gnutls_global_set_log_level(10);
     gnutls_global_set_log_function(qcrypto_gnutls_log);
 #endif
+#endif
 
-#ifdef CONFIG_GNUTLS_GCRYPT
+#ifdef CONFIG_GCRYPT
     if (!gcry_check_version(GCRYPT_VERSION)) {
         error_setg(errp, "Unable to initialize gcrypt");
         return -1;
@@ -139,12 +146,3 @@ int qcrypto_init(Error **errp)
 
     return 0;
 }
-
-#else /* ! CONFIG_GNUTLS */
-
-int qcrypto_init(Error **errp G_GNUC_UNUSED)
-{
-    return 0;
-}
-
-#endif /* ! CONFIG_GNUTLS */
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 7e10903baa..f250119e1b 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -35,5 +35,5 @@ CONFIG_SDHCI=y
 CONFIG_EDU=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
-CONFIG_IVSHMEM=$(CONFIG_KVM)
+CONFIG_IVSHMEM=$(CONFIG_POSIX)
 CONFIG_ROCKER=y
diff --git a/disas/mips.c b/disas/mips.c
index 01336a8385..bf0bbaf86a 100644
--- a/disas/mips.c
+++ b/disas/mips.c
@@ -2420,9 +2420,11 @@ const struct mips_opcode mips_builtin_opcodes[] =
 {"hibernate","",        0x42000023, 0xffffffff,	0, 			0,		V1	},
 {"ins",     "t,r,+A,+B", 0x7c000004, 0xfc00003f, WR_t|RD_s,    		0,		I33	},
 {"jr",      "s",	0x00000008, 0xfc1fffff,	UBD|RD_s,		0,		I1	},
+{"jr",      "s",	0x00000009, 0xfc1fffff,	UBD|RD_s,		0,		I32R6	}, /* jalr */
 /* jr.hb is officially MIPS{32,64}R2, but it works on R1 as jr with
    the same hazard barrier effect.  */
 {"jr.hb",   "s",	0x00000408, 0xfc1fffff,	UBD|RD_s,		0,		I32	},
+{"jr.hb",   "s",	0x00000409, 0xfc1fffff,	UBD|RD_s,		0,		I32R6	}, /* jalr.hb */
 {"j",       "s",	0x00000008, 0xfc1fffff,	UBD|RD_s,		0,		I1	}, /* jr */
 /* SVR4 PIC code requires special handling for j, so it must be a
    macro.  */
diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt
index d92cc4833b..d2f1ce497e 100644
--- a/docs/qmp-events.txt
+++ b/docs/qmp-events.txt
@@ -28,6 +28,8 @@ Example:
     "data": { "actual": 944766976 },
     "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
 
+Note: this event is rate-limited.
+
 BLOCK_IMAGE_CORRUPTED
 ---------------------
 
@@ -296,6 +298,8 @@ Example:
      "data": { "reference": "usr1", "sector-num": 345435, "sectors-count": 5 },
      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
 
+Note: this event is rate-limited.
+
 QUORUM_REPORT_BAD
 -----------------
 
@@ -318,6 +322,8 @@ Example:
      "data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 },
      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
 
+Note: this event is rate-limited.
+
 RESET
 -----
 
@@ -358,6 +364,8 @@ Example:
     "data": { "offset": 78 },
     "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
 
+Note: this event is rate-limited.
+
 SHUTDOWN
 --------
 
@@ -632,6 +640,8 @@ Example:
     "data": { "id": "channel0", "open": true },
     "timestamp": { "seconds": 1401385907, "microseconds": 422329 } }
 
+Note: this event is rate-limited separately for each "id".
+
 WAKEUP
 ------
 
@@ -662,3 +672,5 @@ Example:
 
 Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
 followed respectively by the RESET, SHUTDOWN, or STOP events.
+
+Note: this event is rate-limited.
diff --git a/docs/qmp-spec.txt b/docs/qmp-spec.txt
index 4c28cd9438..4fb10a5d6b 100644
--- a/docs/qmp-spec.txt
+++ b/docs/qmp-spec.txt
@@ -175,6 +175,11 @@ The format of asynchronous events is:
 For a listing of supported asynchronous events, please, refer to the
 qmp-events.txt file.
 
+Some events are rate-limited to at most one per second.  If additional
+"similar" events arrive within one second, all but the last one are
+dropped, and the last one is delayed.  "Similar" normally means same
+event type.  See qmp-events.txt for details.
+
 2.5 QGA Synchronization
 -----------------------
 
diff --git a/docs/specs/ivshmem_device_spec.txt b/docs/specs/ivshmem_device_spec.txt
index 667a8628f0..d318d65c32 100644
--- a/docs/specs/ivshmem_device_spec.txt
+++ b/docs/specs/ivshmem_device_spec.txt
@@ -2,30 +2,106 @@
 Device Specification for Inter-VM shared memory device
 ------------------------------------------------------
 
-The Inter-VM shared memory device is designed to share a region of memory to
-userspace in multiple virtual guests.  The memory region does not belong to any
-guest, but is a POSIX memory object on the host.  Optionally, the device may
-support sending interrupts to other guests sharing the same memory region.
+The Inter-VM shared memory device is designed to share a memory region (created
+on the host via the POSIX shared memory API) between multiple QEMU processes
+running different guests. In order for all guests to be able to pick up the
+shared memory area, it is modeled by QEMU as a PCI device exposing said memory
+to the guest as a PCI BAR.
+The memory region does not belong to any guest, but is a POSIX memory object on
+the host. The host can access this shared memory if needed.
+
+The device also provides an optional communication mechanism between guests
+sharing the same memory object. More details about that in the section 'Guest to
+guest communication' section.
 
 
 The Inter-VM PCI device
 -----------------------
 
-*BARs*
+From the VM point of view, the ivshmem PCI device supports three BARs.
+
+- BAR0 is a 1 Kbyte MMIO region to support registers and interrupts when MSI is
+  not used.
+- BAR1 is used for MSI-X when it is enabled in the device.
+- BAR2 is used to access the shared memory object.
+
+It is your choice how to use the device but you must choose between two
+behaviors :
+
+- basically, if you only need the shared memory part, you will map BAR2.
+  This way, you have access to the shared memory in guest and can use it as you
+  see fit (memnic, for example, uses it in userland
+  http://dpdk.org/browse/memnic).
+
+- BAR0 and BAR1 are used to implement an optional communication mechanism
+  through interrupts in the guests. If you need an event mechanism between the
+  guests accessing the shared memory, you will most likely want to write a
+  kernel driver that will handle interrupts. See details in the section 'Guest
+  to guest communication' section.
+
+The behavior is chosen when starting your QEMU processes:
+- no communication mechanism needed, the first QEMU to start creates the shared
+  memory on the host, subsequent QEMU processes will use it.
+
+- communication mechanism needed, an ivshmem server must be started before any
+  QEMU processes, then each QEMU process connects to the server unix socket.
+
+For more details on the QEMU ivshmem parameters, see qemu-doc documentation.
+
+
+Guest to guest communication
+----------------------------
+
+This section details the communication mechanism between the guests accessing
+the ivhsmem shared memory.
 
-The device supports three BARs.  BAR0 is a 1 Kbyte MMIO region to support
-registers.  BAR1 is used for MSI-X when it is enabled in the device.  BAR2 is
-used to map the shared memory object from the host.  The size of BAR2 is
-specified when the guest is started and must be a power of 2 in size.
+*ivshmem server*
 
-*Registers*
+This server code is available in qemu.git/contrib/ivshmem-server.
 
-The device currently supports 4 registers of 32-bits each.  Registers
-are used for synchronization between guests sharing the same memory object when
-interrupts are supported (this requires using the shared memory server).
+The server must be started on the host before any guest.
+It creates a shared memory object then waits for clients to connect on a unix
+socket. All the messages are little-endian int64_t integer.
 
-The server assigns each VM an ID number and sends this ID number to the QEMU
-process when the guest starts.
+For each client (QEMU process) that connects to the server:
+- the server sends a protocol version, if client does not support it, the client
+  closes the communication,
+- the server assigns an ID for this client and sends this ID to him as the first
+  message,
+- the server sends a fd to the shared memory object to this client,
+- the server creates a new set of host eventfds associated to the new client and
+  sends this set to all already connected clients,
+- finally, the server sends all the eventfds sets for all clients to the new
+  client.
+
+The server signals all clients when one of them disconnects.
+
+The client IDs are limited to 16 bits because of the current implementation (see
+Doorbell register in 'PCI device registers' subsection). Hence only 65536
+clients are supported.
+
+All the file descriptors (fd to the shared memory, eventfds for each client)
+are passed to clients using SCM_RIGHTS over the server unix socket.
+
+Apart from the current ivshmem implementation in QEMU, an ivshmem client has
+been provided in qemu.git/contrib/ivshmem-client for debug.
+
+*QEMU as an ivshmem client*
+
+At initialisation, when creating the ivshmem device, QEMU first receives a
+protocol version and closes communication with server if it does not match.
+Then, QEMU gets its ID from the server then makes it available through BAR0
+IVPosition register for the VM to use (see 'PCI device registers' subsection).
+QEMU then uses the fd to the shared memory to map it to BAR2.
+eventfds for all other clients received from the server are stored to implement
+BAR0 Doorbell register (see 'PCI device registers' subsection).
+Finally, eventfds assigned to this QEMU process are used to send interrupts in
+this VM.
+
+*PCI device registers*
+
+From the VM point of view, the ivshmem PCI device supports 4 registers of
+32-bits each.
 
 enum ivshmem_registers {
     IntrMask = 0,
@@ -49,8 +125,8 @@ bit to 0 and unmasked by setting the first bit to 1.
 IVPosition Register: The IVPosition register is read-only and reports the
 guest's ID number.  The guest IDs are non-negative integers.  When using the
 server, since the server is a separate process, the VM ID will only be set when
-the device is ready (shared memory is received from the server and accessible via
-the device).  If the device is not ready, the IVPosition will return -1.
+the device is ready (shared memory is received from the server and accessible
+via the device).  If the device is not ready, the IVPosition will return -1.
 Applications should ensure that they have a valid VM ID before accessing the
 shared memory.
 
@@ -59,8 +135,8 @@ Doorbell register.  The doorbell register is 32-bits, logically divided into
 two 16-bit fields.  The high 16-bits are the guest ID to interrupt and the low
 16-bits are the interrupt vector to trigger.  The semantics of the value
 written to the doorbell depends on whether the device is using MSI or a regular
-pin-based interrupt.  In short, MSI uses vectors while regular interrupts set the
-status register.
+pin-based interrupt.  In short, MSI uses vectors while regular interrupts set
+the status register.
 
 Regular Interrupts
 
@@ -71,7 +147,7 @@ interrupt in the destination guest.
 
 Message Signalled Interrupts
 
-A ivshmem device may support multiple MSI vectors.  If so, the lower 16-bits
+An ivshmem device may support multiple MSI vectors.  If so, the lower 16-bits
 written to the Doorbell register must be between 0 and the maximum number of
 vectors the guest supports.  The lower 16 bits written to the doorbell is the
 MSI vector that will be raised in the destination guest.  The number of MSI
@@ -83,14 +159,3 @@ interrupt itself should be communicated via the shared memory region.  Devices
 supporting multiple MSI vectors can use different vectors to indicate different
 events have occurred.  The semantics of interrupt vectors are left to the
 user's discretion.
-
-
-Usage in the Guest
-------------------
-
-The shared memory device is intended to be used with the provided UIO driver.
-Very little configuration is needed.  The guest should map BAR0 to access the
-registers (an array of 32-bit ints allows simple writing) and map BAR2 to
-access the shared memory region itself.  The size of the shared memory region
-is specified when the guest (or shared memory server) is started.  A guest may
-map the whole shared memory region or only part of it.
diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
index 2ff0d5ce1b..ce428dfc18 100644
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -238,10 +238,12 @@ void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st,
 
     mdev->dimm = dev;
     mdev->is_enabled = true;
-    mdev->is_inserting = true;
+    if (dev->hotplugged) {
+        mdev->is_inserting = true;
 
-    /* do ACPI magic */
-    acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS);
+        /* do ACPI magic */
+        acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS);
+    }
     return;
 }
 
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5d38c47444..77d9267599 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -923,7 +923,7 @@ static void machvirt_init(MachineState *machine)
     qemu_irq pic[NUM_IRQS];
     MemoryRegion *sysmem = get_system_memory();
     int gic_version = vms->gic_version;
-    int n;
+    int n, max_cpus;
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     const char *cpu_model = machine->cpu_model;
     VirtBoardInfo *vbi;
@@ -957,6 +957,22 @@ static void machvirt_init(MachineState *machine)
         exit(1);
     }
 
+    /* The maximum number of CPUs depends on the GIC version, or on how
+     * many redistributors we can fit into the memory map.
+     */
+    if (gic_version == 3) {
+        max_cpus = vbi->memmap[VIRT_GIC_REDIST].size / 0x20000;
+    } else {
+        max_cpus = GIC_NCPU;
+    }
+
+    if (smp_cpus > max_cpus) {
+        error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
+                     "supported by machine 'mach-virt' (%d)",
+                     smp_cpus, max_cpus);
+        exit(1);
+    }
+
     vbi->smp_cpus = smp_cpus;
 
     if (machine->ram_size > vbi->memmap[VIRT_MEM].size) {
@@ -1155,10 +1171,11 @@ static void virt_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "ARM Virtual Machine",
     mc->init = machvirt_init;
-    /* Our maximum number of CPUs depends on how many redistributors
-     * we can fit into memory map
+    /* Start max_cpus at the maximum QEMU supports. We'll further restrict
+     * it later in machvirt_init, where we have more information about the
+     * configuration of the particular instance.
      */
-    mc->max_cpus = a15memmap[VIRT_GIC_REDIST].size / 0x20000;
+    mc->max_cpus = MAX_CPUMASK_BITS;
     mc->has_dynamic_sysbus = true;
     mc->block_default_type = IF_VIRTIO;
     mc->no_cdrom = 1;
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index b36ca3da74..87553bbc60 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -48,6 +48,14 @@ static const int uart_intr[XLNX_ZYNQMP_NUM_UARTS] = {
     21, 22,
 };
 
+static const uint64_t sdhci_addr[XLNX_ZYNQMP_NUM_SDHCI] = {
+    0xFF160000, 0xFF170000,
+};
+
+static const int sdhci_intr[XLNX_ZYNQMP_NUM_SDHCI] = {
+    48, 49,
+};
+
 typedef struct XlnxZynqMPGICRegion {
     int region_index;
     uint32_t address;
@@ -97,6 +105,13 @@ static void xlnx_zynqmp_init(Object *obj)
 
     object_initialize(&s->sata, sizeof(s->sata), TYPE_SYSBUS_AHCI);
     qdev_set_parent_bus(DEVICE(&s->sata), sysbus_get_default());
+
+    for (i = 0; i < XLNX_ZYNQMP_NUM_SDHCI; i++) {
+        object_initialize(&s->sdhci[i], sizeof(s->sdhci[i]),
+                          TYPE_SYSBUS_SDHCI);
+        qdev_set_parent_bus(DEVICE(&s->sdhci[i]),
+                            sysbus_get_default());
+    }
 }
 
 static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
@@ -258,6 +273,19 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
 
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->sata), 0, SATA_ADDR);
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->sata), 0, gic_spi[SATA_INTR]);
+
+    for (i = 0; i < XLNX_ZYNQMP_NUM_SDHCI; i++) {
+        object_property_set_bool(OBJECT(&s->sdhci[i]), true,
+                                 "realized", &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->sdhci[i]), 0,
+                        sdhci_addr[i]);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci[i]), 0,
+                           gic_spi[sdhci_intr[i]]);
+    }
 }
 
 static Property xlnx_zynqmp_props[] = {
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 6106e46158..c42ddeb297 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -283,7 +283,8 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
 
     /* Get this show started by hooking up our callbacks */
     aio_context_acquire(s->ctx);
-    aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify);
+    aio_set_event_notifier(s->ctx, &s->host_notifier, true,
+                           handle_notify);
     aio_context_release(s->ctx);
     return;
 
@@ -319,7 +320,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
     aio_context_acquire(s->ctx);
 
     /* Stop notifications for new requests from guest */
-    aio_set_event_notifier(s->ctx, &s->host_notifier, NULL);
+    aio_set_event_notifier(s->ctx, &s->host_notifier, true, NULL);
 
     /* Drain and switch bs back to the QEMU main loop */
     blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 6686a72803..4292eced32 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -192,6 +192,8 @@ typedef struct FDrive {
     uint8_t ro;               /* Is read-only           */
     uint8_t media_changed;    /* Is media changed       */
     uint8_t media_rate;       /* Data rate of medium    */
+
+    bool media_inserted;      /* Is there a medium in the tray */
 } FDrive;
 
 static void fd_init(FDrive *drv)
@@ -261,7 +263,7 @@ static int fd_seek(FDrive *drv, uint8_t head, uint8_t track, uint8_t sect,
 #endif
         drv->head = head;
         if (drv->track != track) {
-            if (drv->blk != NULL && blk_is_inserted(drv->blk)) {
+            if (drv->media_inserted) {
                 drv->media_changed = 0;
             }
             ret = 1;
@@ -270,7 +272,7 @@ static int fd_seek(FDrive *drv, uint8_t head, uint8_t track, uint8_t sect,
         drv->sect = sect;
     }
 
-    if (drv->blk == NULL || !blk_is_inserted(drv->blk)) {
+    if (!drv->media_inserted) {
         ret = 2;
     }
 
@@ -296,7 +298,7 @@ static void fd_revalidate(FDrive *drv)
         ro = blk_is_read_only(drv->blk);
         pick_geometry(drv->blk, &nb_heads, &max_track,
                       &last_sect, drv->drive, &drive, &rate);
-        if (!blk_is_inserted(drv->blk)) {
+        if (!drv->media_inserted) {
             FLOPPY_DPRINTF("No disk in drive\n");
         } else {
             FLOPPY_DPRINTF("Floppy disk (%d h %d t %d s) %s\n", nb_heads,
@@ -692,7 +694,7 @@ static bool fdrive_media_changed_needed(void *opaque)
 {
     FDrive *drive = opaque;
 
-    return (drive->blk != NULL && drive->media_changed != 1);
+    return (drive->media_inserted && drive->media_changed != 1);
 }
 
 static const VMStateDescription vmstate_fdrive_media_changed = {
@@ -2184,12 +2186,21 @@ static void fdctrl_change_cb(void *opaque, bool load)
 {
     FDrive *drive = opaque;
 
+    drive->media_inserted = load && drive->blk && blk_is_inserted(drive->blk);
+
     drive->media_changed = 1;
     fd_revalidate(drive);
 }
 
+static bool fdctrl_is_tray_open(void *opaque)
+{
+    FDrive *drive = opaque;
+    return !drive->media_inserted;
+}
+
 static const BlockDevOps fdctrl_block_ops = {
     .change_media_cb = fdctrl_change_cb,
+    .is_tray_open = fdctrl_is_tray_open,
 };
 
 /* Init functions */
@@ -2217,6 +2228,7 @@ static void fdctrl_connect_drives(FDCtrl *fdctrl, Error **errp)
         fdctrl_change_cb(drive, 0);
         if (drive->blk) {
             blk_set_dev_ops(drive->blk, &fdctrl_block_ops, drive);
+            drive->media_inserted = blk_is_inserted(drive->blk);
         }
     }
 }
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 8beb26b4db..093e475dc9 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -798,6 +798,11 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
 static void virtio_blk_save(QEMUFile *f, void *opaque)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    VirtIOBlock *s = VIRTIO_BLK(vdev);
+
+    if (s->dataplane) {
+        virtio_blk_data_plane_stop(s->dataplane);
+    }
 
     virtio_save(vdev, f);
 }
@@ -839,10 +844,7 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
         req->next = s->rq;
         s->rq = req;
 
-        virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
-            req->elem.in_num, 1);
-        virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
-            req->elem.out_num, 0);
+        virtqueue_map(&req->elem);
     }
 
     return 0;
@@ -975,7 +977,7 @@ static Property virtio_blk_properties[] = {
     DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial),
     DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true),
 #ifdef __linux__
-    DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true),
+    DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, false),
 #endif
     DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
                     true),
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36d7398f4f..1bbc111939 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -931,9 +931,11 @@ static int blk_connect(struct XenDevice *xendev)
     blk_attach_dev_nofail(blkdev->blk, blkdev);
     blkdev->file_size = blk_getlength(blkdev->blk);
     if (blkdev->file_size < 0) {
+        BlockDriverState *bs = blk_bs(blkdev->blk);
+        const char *drv_name = bs ? bdrv_get_format_name(bs) : NULL;
         xen_be_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n",
                       (int)blkdev->file_size, strerror(-blkdev->file_size),
-                      bdrv_get_format_name(blk_bs(blkdev->blk)) ?: "-");
+                      drv_name ?: "-");
         blkdev->file_size = 0;
     }
 
diff --git a/hw/char/escc.c b/hw/char/escc.c
index ba653efd68..9816154206 100644
--- a/hw/char/escc.c
+++ b/hw/char/escc.c
@@ -1035,6 +1035,7 @@ static void escc_class_init(ObjectClass *klass, void *data)
     dc->reset = escc_reset;
     dc->vmsd = &vmstate_escc;
     dc->props = escc_properties;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
 }
 
 static const TypeInfo escc_info = {
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
index f0c4c722d6..f30f9c24be 100644
--- a/hw/char/imx_serial.c
+++ b/hw/char/imx_serial.c
@@ -22,25 +22,17 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/char.h"
 
-//#define DEBUG_SERIAL 1
-#ifdef DEBUG_SERIAL
-#define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_SERIAL, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
+#ifndef DEBUG_IMX_UART
+#define DEBUG_IMX_UART 0
 #endif
 
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-//#define DEBUG_IMPLEMENTATION 1
-#ifdef DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-    do  { fprintf(stderr, "%s: " fmt, TYPE_IMX_SERIAL, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_UART) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_SERIAL, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const VMStateDescription vmstate_imx_serial = {
     .name = TYPE_IMX_SERIAL,
@@ -115,7 +107,8 @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset,
     IMXSerialState *s = (IMXSerialState *)opaque;
     uint32_t c;
 
-    DPRINTF("read(offset=%x)\n", offset >> 2);
+    DPRINTF("read(offset=0x%" HWADDR_PRIx ")\n", offset);
+
     switch (offset >> 2) {
     case 0x0: /* URXD */
         c = s->readbuff;
@@ -167,7 +160,8 @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset,
         return 0x0; /* TODO */
 
     default:
-        IPRINTF("%s: bad offset: 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
         return 0;
     }
 }
@@ -178,9 +172,8 @@ static void imx_serial_write(void *opaque, hwaddr offset,
     IMXSerialState *s = (IMXSerialState *)opaque;
     unsigned char ch;
 
-    DPRINTF("write(offset=%x, value = %x) to %s\n",
-            offset >> 2,
-            (unsigned int)value, s->chr ? s->chr->label : "NODEV");
+    DPRINTF("write(offset=0x%" HWADDR_PRIx ", value = 0x%x) to %s\n",
+            offset, (unsigned int)value, s->chr ? s->chr->label : "NODEV");
 
     switch (offset >> 2) {
     case 0x10: /* UTXD */
@@ -198,7 +191,9 @@ static void imx_serial_write(void *opaque, hwaddr offset,
 
     case 0x20: /* UCR1 */
         s->ucr1 = value & 0xffff;
+
         DPRINTF("write(ucr1=%x)\n", (unsigned int)value);
+
         imx_update(s);
         break;
 
@@ -266,12 +261,14 @@ static void imx_serial_write(void *opaque, hwaddr offset,
 
     case 0x2d: /* UTS1 */
     case 0x23: /* UCR4 */
-        IPRINTF("Unimplemented Register %x written to\n", offset >> 2);
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: Unimplemented reg 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
         /* TODO */
         break;
 
     default:
-        IPRINTF("%s: Bad offset 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
     }
 }
 
@@ -284,7 +281,9 @@ static int imx_can_receive(void *opaque)
 static void imx_put_data(void *opaque, uint32_t value)
 {
     IMXSerialState *s = (IMXSerialState *)opaque;
+
     DPRINTF("received char\n");
+
     s->usr1 |= USR1_RRDY;
     s->usr2 |= USR2_RDR;
     s->uts1 &= ~UTS1_RXEMPTY;
@@ -319,8 +318,7 @@ static void imx_serial_realize(DeviceState *dev, Error **errp)
         qemu_chr_add_handlers(s->chr, imx_can_receive, imx_receive,
                               imx_event, s);
     } else {
-        DPRINTF("No char dev for uart at 0x%lx\n",
-                (unsigned long)s->iomem.ram_addr);
+        DPRINTF("No char dev for uart\n");
     }
 }
 
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index be97058712..497b0afd9f 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -705,10 +705,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,
 
                 qemu_get_buffer(f, (unsigned char *)&port->elem,
                                 sizeof(port->elem));
-                virtqueue_map_sg(port->elem.in_sg, port->elem.in_addr,
-                                 port->elem.in_num, 1);
-                virtqueue_map_sg(port->elem.out_sg, port->elem.out_addr,
-                                 port->elem.out_num, 1);
+                virtqueue_map(&port->elem);
 
                 /*
                  *  Port was throttled on source machine.  Let's
diff --git a/hw/gpio/imx_gpio.c b/hw/gpio/imx_gpio.c
index d56ffcd8d7..3170585a27 100644
--- a/hw/gpio/imx_gpio.c
+++ b/hw/gpio/imx_gpio.c
@@ -29,11 +29,12 @@ typedef enum IMXGPIOLevel {
 } IMXGPIOLevel;
 
 #define DPRINTF(fmt, args...) \
-          do { \
-              if (DEBUG_IMX_GPIO) { \
-                  fprintf(stderr, "%s: " fmt , __func__, ##args); \
-              } \
-          } while (0)
+    do { \
+        if (DEBUG_IMX_GPIO) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_GPIO, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const char *imx_gpio_reg_name(uint32_t reg)
 {
@@ -176,19 +177,19 @@ static uint64_t imx_gpio_read(void *opaque, hwaddr offset, unsigned size)
         if (s->has_edge_sel) {
             reg_value = s->edge_sel;
         } else {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: EDGE_SEL register not "
                           "present on this version of GPIO device\n",
                           TYPE_IMX_GPIO, __func__);
         }
         break;
 
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
-                      TYPE_IMX_GPIO, __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPIO, __func__, offset);
         break;
     }
 
-    DPRINTF("(%s) = 0x%"PRIx32"\n", imx_gpio_reg_name(offset), reg_value);
+    DPRINTF("(%s) = 0x%" PRIx32 "\n", imx_gpio_reg_name(offset), reg_value);
 
     return reg_value;
 }
@@ -198,7 +199,7 @@ static void imx_gpio_write(void *opaque, hwaddr offset, uint64_t value,
 {
     IMXGPIOState *s = IMX_GPIO(opaque);
 
-    DPRINTF("(%s, value = 0x%"PRIx32")\n", imx_gpio_reg_name(offset),
+    DPRINTF("(%s, value = 0x%" PRIx32 ")\n", imx_gpio_reg_name(offset),
             (uint32_t)value);
 
     switch (offset) {
@@ -238,15 +239,15 @@ static void imx_gpio_write(void *opaque, hwaddr offset, uint64_t value,
             s->edge_sel = value;
             imx_gpio_set_all_int_lines(s);
         } else {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: EDGE_SEL register not "
                           "present on this version of GPIO device\n",
                           TYPE_IMX_GPIO, __func__);
         }
         break;
 
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
-                      TYPE_IMX_GPIO, __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPIO, __func__, offset);
         break;
     }
 
diff --git a/hw/i2c/imx_i2c.c b/hw/i2c/imx_i2c.c
index 8474872e07..cb62c7a2c9 100644
--- a/hw/i2c/imx_i2c.c
+++ b/hw/i2c/imx_i2c.c
@@ -21,13 +21,17 @@
 #include "hw/i2c/imx_i2c.h"
 #include "hw/i2c/i2c.h"
 
-#ifndef IMX_I2C_DEBUG
-#define IMX_I2C_DEBUG                 0
+#ifndef DEBUG_IMX_I2C
+#define DEBUG_IMX_I2C 0
 #endif
 
-#if IMX_I2C_DEBUG
-#define DPRINT(fmt, args...)              \
-    do { fprintf(stderr, "%s: "fmt, __func__, ## args); } while (0)
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_I2C) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_I2C, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const char *imx_i2c_get_regname(unsigned offset)
 {
@@ -46,9 +50,6 @@ static const char *imx_i2c_get_regname(unsigned offset)
         return "[?]";
     }
 }
-#else
-#define DPRINT(fmt, args...)              do { } while (0)
-#endif
 
 static inline bool imx_i2c_is_enabled(IMXI2CState *s)
 {
@@ -121,11 +122,11 @@ static uint64_t imx_i2c_read(void *opaque, hwaddr offset,
 
             if (s->address == ADDR_RESET) {
                 /* something is wrong as the address is not set */
-                qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Trying to read "
+                qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Trying to read "
                               "without specifying the slave address\n",
                               TYPE_IMX_I2C, __func__);
             } else if (s->i2cr & I2CR_MTX) {
-                qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Trying to read "
+                qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Trying to read "
                               "but MTX is set\n", TYPE_IMX_I2C, __func__);
             } else {
                 /* get the next byte */
@@ -134,7 +135,7 @@ static uint64_t imx_i2c_read(void *opaque, hwaddr offset,
                 if (ret >= 0) {
                     imx_i2c_raise_interrupt(s);
                 } else {
-                    qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: read failed "
+                    qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: read failed "
                                   "for device 0x%02x\n", TYPE_IMX_I2C,
                                   __func__, s->address);
                     ret = 0xff;
@@ -143,19 +144,19 @@ static uint64_t imx_i2c_read(void *opaque, hwaddr offset,
 
             s->i2dr_read = ret;
         } else {
-            qemu_log_mask(LOG_UNIMP, "%s[%s]: slave mode not implemented\n",
+            qemu_log_mask(LOG_UNIMP, "[%s]%s: slave mode not implemented\n",
                           TYPE_IMX_I2C, __func__);
         }
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_I2C, __func__, s->address);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_I2C, __func__, offset);
         value = 0;
         break;
     }
 
-    DPRINT("read %s [0x%02x] -> 0x%02x\n", imx_i2c_get_regname(offset),
-           (unsigned int)offset, value);
+    DPRINTF("read %s [0x%" HWADDR_PRIx "] -> 0x%02x\n",
+            imx_i2c_get_regname(offset), offset, value);
 
     return (uint64_t)value;
 }
@@ -165,8 +166,8 @@ static void imx_i2c_write(void *opaque, hwaddr offset,
 {
     IMXI2CState *s = IMX_I2C(opaque);
 
-    DPRINT("write %s [0x%02x] <- 0x%02x\n", imx_i2c_get_regname(offset),
-           (unsigned int)offset, (int)value);
+    DPRINTF("write %s [0x%" HWADDR_PRIx "] <- 0x%02x\n",
+            imx_i2c_get_regname(offset), offset, (int)value);
 
     value &= 0xff;
 
@@ -264,13 +265,13 @@ static void imx_i2c_write(void *opaque, hwaddr offset,
                 }
             }
         } else {
-            qemu_log_mask(LOG_UNIMP, "%s[%s]: slave mode not implemented\n",
+            qemu_log_mask(LOG_UNIMP, "[%s]%s: slave mode not implemented\n",
                           TYPE_IMX_I2C, __func__);
         }
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_I2C, __func__, s->address);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_I2C, __func__, offset);
         break;
     }
 }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 3d958bae5b..0cb8afd2c2 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1616,7 +1616,6 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
-    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *mr = ddc->get_memory_region(dimm);
@@ -1632,8 +1631,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
-                        pcmc->inter_dimm_gap, &local_err);
+    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
     if (local_err) {
         goto out;
     }
@@ -1953,7 +1951,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
 
-    pcmc->inter_dimm_gap = true;
     pcmc->get_hotplug_handler = mc->get_hotplug_handler;
     mc->get_hotplug_handler = pc_get_hotpug_handler;
     mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 9d4425a5b9..393dcc4544 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -487,7 +487,6 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
     m->alias = NULL;
     m->is_default = 0;
     pcmc->broken_reserved_end = true;
-    pcmc->inter_dimm_gap = false;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 3744abd397..2f8f3963c4 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -385,7 +385,6 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
     pc_q35_2_5_machine_options(m);
     m->alias = NULL;
     pcmc->broken_reserved_end = true;
-    pcmc->inter_dimm_gap = false;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }
 
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index 8682c42e46..de83f4e3ee 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -33,6 +33,7 @@
 #include "trace.h"
 #include "exec/address-spaces.h"
 #include "sysemu/block-backend.h"
+#include "qemu/error-report.h"
 
 #include <xenguest.h>
 
@@ -382,13 +383,16 @@ static const VMStateDescription vmstate_xen_platform = {
     }
 };
 
-static int xen_platform_initfn(PCIDevice *dev)
+static void xen_platform_realize(PCIDevice *dev, Error **errp)
 {
     PCIXenPlatformState *d = XEN_PLATFORM(dev);
     uint8_t *pci_conf;
 
     /* Device will crash on reset if xen is not initialized */
-    assert(xen_enabled());
+    if (!xen_enabled()) {
+        error_setg(errp, "xen-platform device requires the Xen accelerator");
+        return;
+    }
 
     pci_conf = dev->config;
 
@@ -407,8 +411,6 @@ static int xen_platform_initfn(PCIDevice *dev)
                      &d->mmio_bar);
 
     platform_fixed_ioport_init(d);
-
-    return 0;
 }
 
 static void platform_reset(DeviceState *dev)
@@ -423,7 +425,7 @@ static void xen_platform_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
-    k->init = xen_platform_initfn;
+    k->realize = xen_platform_realize;
     k->vendor_id = PCI_VENDOR_ID_XEN;
     k->device_id = PCI_DEVICE_ID_XEN_PLATFORM;
     k->class_id = PCI_CLASS_OTHERS << 8 | 0x80;
diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c
index 66fb9d96d5..27f3da21a7 100644
--- a/hw/ide/cmd646.c
+++ b/hw/ide/cmd646.c
@@ -417,6 +417,7 @@ static void cmd646_ide_class_init(ObjectClass *klass, void *data)
     k->config_read = cmd646_pci_config_read;
     k->config_write = cmd646_pci_config_write;
     dc->props = cmd646_ide_properties;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 }
 
 static const TypeInfo cmd646_ide_info = {
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 66ac2baa94..893c9b9bae 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -590,6 +590,7 @@ static void macio_ide_class_init(ObjectClass *oc, void *data)
     dc->realize = macio_ide_realizefn;
     dc->reset = macio_ide_reset;
     dc->vmsd = &vmstate_pmac;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 }
 
 static const TypeInfo macio_ide_type_info = {
diff --git a/hw/input/adb.c b/hw/input/adb.c
index a18eea2652..09eead96b6 100644
--- a/hw/input/adb.c
+++ b/hw/input/adb.c
@@ -362,6 +362,7 @@ static void adb_kbd_class_init(ObjectClass *oc, void *data)
 
     akc->parent_realize = dc->realize;
     dc->realize = adb_kbd_realizefn;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
 
     adc->devreq = adb_kbd_request;
     dc->reset = adb_kbd_reset;
@@ -566,6 +567,7 @@ static void adb_mouse_class_init(ObjectClass *oc, void *data)
 
     amc->parent_realize = dc->realize;
     dc->realize = adb_mouse_realizefn;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
 
     adc->devreq = adb_mouse_request;
     dc->reset = adb_mouse_reset;
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index e8b2386908..0ceebbf87e 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -20,6 +20,7 @@
  */
 
 #include "hw/sysbus.h"
+#include "migration/migration.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
 #include "gic_internal.h"
@@ -307,11 +308,6 @@ static void kvm_arm_gic_put(GICState *s)
     int num_cpu;
     int num_irq;
 
-    if (!kvm_arm_gic_can_save_restore(s)) {
-            DPRINTF("Cannot put kernel gic state, no kernel interface");
-            return;
-    }
-
     /* Note: We do the restore in a slightly different order than the save
      * (where the order doesn't matter and is simply ordered according to the
      * register offset values */
@@ -411,11 +407,6 @@ static void kvm_arm_gic_get(GICState *s)
     int i;
     int cpu;
 
-    if (!kvm_arm_gic_can_save_restore(s)) {
-            DPRINTF("Cannot get kernel gic state, no kernel interface");
-            return;
-    }
-
     /*****************************************************************
      * Distributor State
      */
@@ -503,7 +494,10 @@ static void kvm_arm_gic_reset(DeviceState *dev)
     KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
 
     kgc->parent_reset(dev);
-    kvm_arm_gic_put(s);
+
+    if (kvm_arm_gic_can_save_restore(s)) {
+        kvm_arm_gic_put(s);
+    }
 }
 
 static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
@@ -573,6 +567,12 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
                             KVM_DEV_ARM_VGIC_GRP_ADDR,
                             KVM_VGIC_V2_ADDR_TYPE_CPU,
                             s->dev_fd);
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+        error_setg(&s->migration_blocker, "This operating system kernel does "
+                                          "not support vGICv2 migration");
+        migrate_add_blocker(s->migration_blocker);
+    }
 }
 
 static void kvm_arm_gic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/imx_avic.c b/hw/intc/imx_avic.c
index 96c376b6af..e0535ffc57 100644
--- a/hw/intc/imx_avic.c
+++ b/hw/intc/imx_avic.c
@@ -17,27 +17,17 @@
 
 #include "hw/intc/imx_avic.h"
 
-#define DEBUG_INT 1
-#undef DEBUG_INT /* comment out for debugging */
-
-#ifdef DEBUG_INT
-#define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_AVIC, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
+#ifndef DEBUG_IMX_AVIC
+#define DEBUG_IMX_AVIC 0
 #endif
 
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-#define DEBUG_IMPLEMENTATION 1
-#if DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-    do  { fprintf(stderr, "%s: " fmt, TYPE_IMX_AVIC, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_AVIC) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_AVIC, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const VMStateDescription vmstate_imx_avic = {
     .name = TYPE_IMX_AVIC,
@@ -115,8 +105,8 @@ static uint64_t imx_avic_read(void *opaque,
 {
     IMXAVICState *s = (IMXAVICState *)opaque;
 
+    DPRINTF("read(offset = 0x%" HWADDR_PRIx ")\n", offset);
 
-    DPRINTF("read(offset = 0x%x)\n", offset >> 2);
     switch (offset >> 2) {
     case 0: /* INTCNTL */
         return s->intcntl;
@@ -213,7 +203,8 @@ static uint64_t imx_avic_read(void *opaque,
         return 0x4;
 
     default:
-        IPRINTF("%s: Bad offset 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_AVIC, __func__, offset);
         return 0;
     }
 }
@@ -225,13 +216,13 @@ static void imx_avic_write(void *opaque, hwaddr offset,
 
     /* Vector Registers not yet supported */
     if (offset >= 0x100 && offset <= 0x2fc) {
-        IPRINTF("%s to vector register %d ignored\n", __func__,
-                (unsigned int)((offset - 0x100) >> 2));
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: vector %d ignored\n",
+                      TYPE_IMX_AVIC, __func__, (int)((offset - 0x100) >> 2));
         return;
     }
 
-    DPRINTF("%s(0x%x) = %x\n", __func__,
-            (unsigned int)offset>>2, (unsigned int)val);
+    DPRINTF("(0x%" HWADDR_PRIx ") = 0x%x\n", offset, (unsigned int)val);
+
     switch (offset >> 2) {
     case 0: /* Interrupt Control Register, INTCNTL */
         s->intcntl = val & (ABFEN | NIDIS | FIDIS | NIAD | FIAD | NM);
@@ -305,7 +296,8 @@ static void imx_avic_write(void *opaque, hwaddr offset,
         return;
 
     default:
-        IPRINTF("%s: Bad offset %x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_AVIC, __func__, offset);
     }
     imx_avic_update(s);
 }
diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 14ab0e31b8..bfcf155356 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -1643,6 +1643,7 @@ static void openpic_class_init(ObjectClass *oc, void *data)
     dc->props = openpic_properties;
     dc->reset = openpic_reset;
     dc->vmsd = &vmstate_openpic;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
 static const TypeInfo openpic_info = {
diff --git a/hw/intc/openpic_kvm.c b/hw/intc/openpic_kvm.c
index f7cac585a9..649f476ac8 100644
--- a/hw/intc/openpic_kvm.c
+++ b/hw/intc/openpic_kvm.c
@@ -275,6 +275,7 @@ static void kvm_openpic_class_init(ObjectClass *oc, void *data)
     dc->realize = kvm_openpic_realize;
     dc->props = kvm_openpic_properties;
     dc->reset = kvm_openpic_reset;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
 static const TypeInfo kvm_openpic_info = {
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 2bae994667..80f424b442 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -33,8 +33,7 @@ typedef struct pc_dimms_capacity {
 } pc_dimms_capacity;
 
 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, bool gap,
-                         Error **errp)
+                         MemoryRegion *mr, uint64_t align, Error **errp)
 {
     int slot;
     MachineState *machine = MACHINE(qdev_get_machine());
@@ -50,7 +49,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
 
     addr = pc_dimm_get_free_addr(hpms->base,
                                  memory_region_size(&hpms->mr),
-                                 !addr ? NULL : &addr, align, gap,
+                                 !addr ? NULL : &addr, align,
                                  memory_region_size(mr), &local_err);
     if (local_err) {
         goto out;
@@ -295,8 +294,8 @@ static int pc_dimm_built_list(Object *obj, void *opaque)
 
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, bool gap,
-                               uint64_t size, Error **errp)
+                               uint64_t *hint, uint64_t align, uint64_t size,
+                               Error **errp)
 {
     GSList *list = NULL, *item;
     uint64_t new_addr, ret = 0;
@@ -341,15 +340,13 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
             goto out;
         }
 
-        if (ranges_overlap(dimm->addr, dimm_size, new_addr,
-                           size + (gap ? 1 : 0))) {
+        if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) {
             if (hint) {
                 DeviceState *d = DEVICE(dimm);
                 error_setg(errp, "address range conflicts with '%s'", d->id);
                 goto out;
             }
-            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + (gap ? 1 : 0),
-                                     align);
+            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align);
         }
     }
     ret = new_addr;
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index c1f570a79f..91c36baa55 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -901,7 +901,7 @@ static void main_cpu_reset(void *opaque)
 
     if (kvm_enabled()) {
         /* Start running from the bootloader we wrote to end of RAM */
-        env->active_tc.PC = 0x40000000 + loaderparams.ram_size;
+        env->active_tc.PC = 0x40000000 + loaderparams.ram_low_size;
     }
 }
 
diff --git a/hw/misc/imx_ccm.c b/hw/misc/imx_ccm.c
index 2e19dbb1bb..4cc2bbc0ed 100644
--- a/hw/misc/imx_ccm.c
+++ b/hw/misc/imx_ccm.c
@@ -16,14 +16,18 @@
 #define CKIH_FREQ 26000000 /* 26MHz crystal input */
 #define CKIL_FREQ    32768 /* nominal 32khz clock */
 
-//#define DEBUG_CCM 1
-#ifdef DEBUG_CCM
-#define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_CCM, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
+#ifndef DEBUG_IMX_CCM
+#define DEBUG_IMX_CCM 0
 #endif
 
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_CCM) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_CCM, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
+
 static int imx_ccm_post_load(void *opaque, int version_id);
 
 static const VMStateDescription vmstate_imx_ccm = {
@@ -109,7 +113,7 @@ static void update_clocks(IMXCCMState *s)
     s->hsp_clk_freq = s->mcu_clk_freq / (1 + EXTRACT(s->pdr0, HSP));
     s->ipg_clk_freq = s->hsp_clk_freq / (1 + EXTRACT(s->pdr0, IPG));
 
-    DPRINTF("%s: mcu %uMHz, HSP %uMHz, IPG %uHz\n", __func__,
+    DPRINTF("mcu %uMHz, HSP %uMHz, IPG %uHz\n",
             s->mcu_clk_freq / 1000000,
             s->hsp_clk_freq / 1000000,
             s->ipg_clk_freq);
@@ -135,7 +139,8 @@ static uint64_t imx_ccm_read(void *opaque, hwaddr offset,
 {
     IMXCCMState *s = (IMXCCMState *)opaque;
 
-    DPRINTF("%s(offset=%x)", __func__, offset >> 2);
+    DPRINTF("(offset=0x%" HWADDR_PRIx ")\n", offset);
+
     switch (offset >> 2) {
     case 0: /* CCMR */
         DPRINTF(" ccmr = 0x%x\n", s->ccmr);
@@ -166,9 +171,11 @@ static uint64_t imx_ccm_read(void *opaque, hwaddr offset,
     case 23:
         DPRINTF(" pcmr0 = 0x%x\n", s->pmcr0);
         return s->pmcr0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_CCM, __func__, offset);
+        return 0;
     }
-    DPRINTF(" return 0\n");
-    return 0;
 }
 
 static void imx_ccm_write(void *opaque, hwaddr offset,
@@ -176,8 +183,9 @@ static void imx_ccm_write(void *opaque, hwaddr offset,
 {
     IMXCCMState *s = (IMXCCMState *)opaque;
 
-    DPRINTF("%s(offset=%x, value = %x)\n", __func__,
-            offset >> 2, (unsigned int)value);
+    DPRINTF("(offset=0x%" HWADDR_PRIx ", value = 0x%x)\n",
+            offset, (unsigned int)value);
+
     switch (offset >> 2) {
     case 0:
         s->ccmr = CCMR_FPMF | (value & 0x3b6fdfff);
@@ -205,6 +213,8 @@ static void imx_ccm_write(void *opaque, hwaddr offset,
         return;
 
     default:
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_CCM, __func__, offset);
         return;
     }
     update_clocks(s);
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index cc76989a39..83d7bd3e5f 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -19,6 +19,7 @@
 #include "hw/hw.h"
 #include "hw/i386/pc.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "sysemu/kvm.h"
 #include "migration/migration.h"
@@ -26,6 +27,10 @@
 #include "qemu/event_notifier.h"
 #include "qemu/fifo8.h"
 #include "sysemu/char.h"
+#include "sysemu/hostmem.h"
+#include "qapi/visitor.h"
+
+#include "hw/misc/ivshmem.h"
 
 #include <sys/mman.h>
 #include <sys/types.h>
@@ -34,6 +39,7 @@
 #define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
 #define PCI_DEVICE_ID_IVSHMEM   0x1110
 
+#define IVSHMEM_MAX_PEERS G_MAXUINT16
 #define IVSHMEM_IOEVENTFD   0
 #define IVSHMEM_MSI     1
 
@@ -54,24 +60,26 @@
 #define IVSHMEM(obj) \
     OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM)
 
+#define IVSHMEM_MEMDEV_PROP "memdev"
+
 typedef struct Peer {
     int nb_eventfds;
     EventNotifier *eventfds;
 } Peer;
 
-typedef struct EventfdEntry {
+typedef struct MSIVector {
     PCIDevice *pdev;
-    int vector;
-} EventfdEntry;
+    int virq;
+} MSIVector;
 
 typedef struct IVShmemState {
     /*< private >*/
     PCIDevice parent_obj;
     /*< public >*/
 
+    HostMemoryBackend *hostmem;
     uint32_t intrmask;
     uint32_t intrstatus;
-    uint32_t doorbell;
 
     CharDriverState **eventfd_chr;
     CharDriverState *server_chr;
@@ -85,18 +93,15 @@ typedef struct IVShmemState {
     MemoryRegion bar;
     MemoryRegion ivshmem;
     uint64_t ivshmem_size; /* size of shared memory region */
-    uint32_t ivshmem_attr;
     uint32_t ivshmem_64bit;
-    int shm_fd; /* shared memory file descriptor */
 
     Peer *peers;
-    int nb_peers; /* how many guests we have space for */
-    int max_peer; /* maximum numbered peer */
+    int nb_peers; /* how many peers we have space for */
 
     int vm_id;
     uint32_t vectors;
     uint32_t features;
-    EventfdEntry *eventfd_table;
+    MSIVector *msi_vectors;
 
     Error *migration_blocker;
 
@@ -119,12 +124,8 @@ static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
     return (ivs->features & (1 << feature));
 }
 
-static inline bool is_power_of_two(uint64_t x) {
-    return (x & (x - 1)) == 0;
-}
-
 /* accessing registers - based on rtl8139 */
-static void ivshmem_update_irq(IVShmemState *s, int val)
+static void ivshmem_update_irq(IVShmemState *s)
 {
     PCIDevice *d = PCI_DEVICE(s);
     int isr;
@@ -145,7 +146,7 @@ static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
 
     s->intrmask = val;
 
-    ivshmem_update_irq(s, val);
+    ivshmem_update_irq(s);
 }
 
 static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
@@ -163,7 +164,7 @@ static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
 
     s->intrstatus = val;
 
-    ivshmem_update_irq(s, val);
+    ivshmem_update_irq(s);
 }
 
 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
@@ -173,7 +174,7 @@ static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
     /* reading ISR clears all interrupts */
     s->intrstatus = 0;
 
-    ivshmem_update_irq(s, 0);
+    ivshmem_update_irq(s);
 
     return ret;
 }
@@ -201,7 +202,7 @@ static void ivshmem_io_write(void *opaque, hwaddr addr,
 
         case DOORBELL:
             /* check that dest VM ID is reasonable */
-            if (dest > s->max_peer) {
+            if (dest >= s->nb_peers) {
                 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
                 break;
             }
@@ -210,10 +211,13 @@ static void ivshmem_io_write(void *opaque, hwaddr addr,
             if (vector < s->peers[dest].nb_eventfds) {
                 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
                 event_notifier_set(&s->peers[dest].eventfds[vector]);
+            } else {
+                IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
+                                vector, dest);
             }
             break;
         default:
-            IVSHMEM_DPRINTF("Invalid VM Doorbell VM %d\n", dest);
+            IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr);
     }
 }
 
@@ -236,7 +240,7 @@ static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
 
         case IVPOSITION:
             /* return my VM ID if the memory is mapped */
-            if (s->shm_fd > 0) {
+            if (memory_region_is_mapped(&s->ivshmem)) {
                 ret = s->vm_id;
             } else {
                 ret = -1;
@@ -265,14 +269,14 @@ static void ivshmem_receive(void *opaque, const uint8_t *buf, int size)
 {
     IVShmemState *s = opaque;
 
-    ivshmem_IntrStatus_write(s, *buf);
+    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x size: %d\n", *buf, size);
 
-    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
+    ivshmem_IntrStatus_write(s, *buf);
 }
 
 static int ivshmem_can_receive(void * opaque)
 {
-    return 8;
+    return sizeof(int64_t);
 }
 
 static void ivshmem_event(void *opaque, int event)
@@ -282,11 +286,70 @@ static void ivshmem_event(void *opaque, int event)
 
 static void fake_irqfd(void *opaque, const uint8_t *buf, int size) {
 
-    EventfdEntry *entry = opaque;
+    MSIVector *entry = opaque;
     PCIDevice *pdev = entry->pdev;
+    IVShmemState *s = IVSHMEM(pdev);
+    int vector = entry - s->msi_vectors;
+
+    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
+    msix_notify(pdev, vector);
+}
+
+static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
+                                 MSIMessage msg)
+{
+    IVShmemState *s = IVSHMEM(dev);
+    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
+    MSIVector *v = &s->msi_vectors[vector];
+    int ret;
+
+    IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
+
+    ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
+}
+
+static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
+{
+    IVShmemState *s = IVSHMEM(dev);
+    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
+    int ret;
+
+    IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
+
+    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n,
+                                                s->msi_vectors[vector].virq);
+    if (ret != 0) {
+        error_report("remove_irqfd_notifier_gsi failed");
+    }
+}
+
+static void ivshmem_vector_poll(PCIDevice *dev,
+                                unsigned int vector_start,
+                                unsigned int vector_end)
+{
+    IVShmemState *s = IVSHMEM(dev);
+    unsigned int vector;
+
+    IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
+
+    vector_end = MIN(vector_end, s->vectors);
+
+    for (vector = vector_start; vector < vector_end; vector++) {
+        EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
+
+        if (!msix_is_masked(dev, vector)) {
+            continue;
+        }
 
-    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, entry->vector);
-    msix_notify(pdev, entry->vector);
+        if (event_notifier_test_and_clear(notifier)) {
+            msix_set_pending(dev, vector);
+        }
+    }
 }
 
 static CharDriverState* create_eventfd_chr_device(void * opaque, EventNotifier *n,
@@ -294,24 +357,26 @@ static CharDriverState* create_eventfd_chr_device(void * opaque, EventNotifier *
 {
     /* create a event character device based on the passed eventfd */
     IVShmemState *s = opaque;
-    CharDriverState * chr;
+    PCIDevice *pdev = PCI_DEVICE(s);
     int eventfd = event_notifier_get_fd(n);
+    CharDriverState *chr;
+
+    s->msi_vectors[vector].pdev = pdev;
 
     chr = qemu_chr_open_eventfd(eventfd);
 
     if (chr == NULL) {
-        error_report("creating eventfd for eventfd %d failed", eventfd);
-        exit(1);
+        error_report("creating chardriver for eventfd %d failed", eventfd);
+        return NULL;
     }
     qemu_chr_fe_claim_no_fail(chr);
 
     /* if MSI is supported we need multiple interrupts */
     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
-        s->eventfd_table[vector].pdev = PCI_DEVICE(s);
-        s->eventfd_table[vector].vector = vector;
+        s->msi_vectors[vector].pdev = PCI_DEVICE(s);
 
         qemu_chr_add_handlers(chr, ivshmem_can_receive, fake_irqfd,
-                      ivshmem_event, &s->eventfd_table[vector]);
+                      ivshmem_event, &s->msi_vectors[vector]);
     } else {
         qemu_chr_add_handlers(chr, ivshmem_can_receive, ivshmem_receive,
                       ivshmem_event, s);
@@ -321,22 +386,23 @@ static CharDriverState* create_eventfd_chr_device(void * opaque, EventNotifier *
 
 }
 
-static int check_shm_size(IVShmemState *s, int fd) {
+static int check_shm_size(IVShmemState *s, int fd, Error **errp)
+{
     /* check that the guest isn't going to try and map more memory than the
      * the object has allocated return -1 to indicate error */
 
     struct stat buf;
 
     if (fstat(fd, &buf) < 0) {
-        error_report("exiting: fstat on fd %d failed: %s",
-                     fd, strerror(errno));
+        error_setg(errp, "exiting: fstat on fd %d failed: %s",
+                   fd, strerror(errno));
         return -1;
     }
 
     if (s->ivshmem_size > buf.st_size) {
-        error_report("Requested memory size greater"
-                     " than shared object size (%" PRIu64 " > %" PRIu64")",
-                     s->ivshmem_size, (uint64_t)buf.st_size);
+        error_setg(errp, "Requested memory size greater"
+                   " than shared object size (%" PRIu64 " > %" PRIu64")",
+                   s->ivshmem_size, (uint64_t)buf.st_size);
         return -1;
     } else {
         return 0;
@@ -345,13 +411,16 @@ static int check_shm_size(IVShmemState *s, int fd) {
 
 /* create the shared memory BAR when we are not using the server, so we can
  * create the BAR and map the memory immediately */
-static void create_shared_memory_BAR(IVShmemState *s, int fd) {
-
+static int create_shared_memory_BAR(IVShmemState *s, int fd, uint8_t attr,
+                                    Error **errp)
+{
     void * ptr;
 
-    s->shm_fd = fd;
-
     ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+    if (ptr == MAP_FAILED) {
+        error_setg_errno(errp, errno, "Failed to mmap shared memory");
+        return -1;
+    }
 
     memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s), "ivshmem.bar2",
                                s->ivshmem_size, ptr);
@@ -359,7 +428,9 @@ static void create_shared_memory_BAR(IVShmemState *s, int fd) {
     memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
 
     /* region for shared memory */
-    pci_register_bar(PCI_DEVICE(s), 2, s->ivshmem_attr, &s->bar);
+    pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar);
+
+    return 0;
 }
 
 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
@@ -382,25 +453,26 @@ static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
                               &s->peers[posn].eventfds[i]);
 }
 
-static void close_guest_eventfds(IVShmemState *s, int posn)
+static void close_peer_eventfds(IVShmemState *s, int posn)
 {
-    int i, guest_curr_max;
+    int i, n;
 
     if (!ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
         return;
     }
     if (posn < 0 || posn >= s->nb_peers) {
+        error_report("invalid peer %d", posn);
         return;
     }
 
-    guest_curr_max = s->peers[posn].nb_eventfds;
+    n = s->peers[posn].nb_eventfds;
 
     memory_region_transaction_begin();
-    for (i = 0; i < guest_curr_max; i++) {
+    for (i = 0; i < n; i++) {
         ivshmem_del_eventfd(s, posn, i);
     }
     memory_region_transaction_commit();
-    for (i = 0; i < guest_curr_max; i++) {
+    for (i = 0; i < n; i++) {
         event_notifier_cleanup(&s->peers[posn].eventfds[i]);
     }
 
@@ -409,125 +481,207 @@ static void close_guest_eventfds(IVShmemState *s, int posn)
 }
 
 /* this function increase the dynamic storage need to store data about other
- * guests */
-static int increase_dynamic_storage(IVShmemState *s, int new_min_size)
+ * peers */
+static int resize_peers(IVShmemState *s, int new_min_size)
 {
 
-    int j, old_nb_alloc;
+    int j, old_size;
 
-    /* check for integer overflow */
-    if (new_min_size >= INT_MAX / sizeof(Peer) - 1 || new_min_size <= 0) {
+    /* limit number of max peers */
+    if (new_min_size <= 0 || new_min_size > IVSHMEM_MAX_PEERS) {
         return -1;
     }
-
-    old_nb_alloc = s->nb_peers;
-
-    if (new_min_size >= s->nb_peers) {
-        /* +1 because #new_min_size is used as last array index */
-        s->nb_peers = new_min_size + 1;
-    } else {
+    if (new_min_size <= s->nb_peers) {
         return 0;
     }
 
-    IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nb_peers);
+    old_size = s->nb_peers;
+    s->nb_peers = new_min_size;
+
+    IVSHMEM_DPRINTF("bumping storage to %d peers\n", s->nb_peers);
+
     s->peers = g_realloc(s->peers, s->nb_peers * sizeof(Peer));
 
-    /* zero out new pointers */
-    for (j = old_nb_alloc; j < s->nb_peers; j++) {
-        s->peers[j].eventfds = NULL;
+    for (j = old_size; j < s->nb_peers; j++) {
+        s->peers[j].eventfds = g_new0(EventNotifier, s->vectors);
         s->peers[j].nb_eventfds = 0;
     }
 
     return 0;
 }
 
-static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
+static bool fifo_update_and_get(IVShmemState *s, const uint8_t *buf, int size,
+                                void *data, size_t len)
 {
-    IVShmemState *s = opaque;
-    int incoming_fd, tmp_fd;
-    int guest_max_eventfd;
-    long incoming_posn;
+    const uint8_t *p;
+    uint32_t num;
 
-    if (fifo8_is_empty(&s->incoming_fifo) && size == sizeof(incoming_posn)) {
-        memcpy(&incoming_posn, buf, size);
-    } else {
-        const uint8_t *p;
-        uint32_t num;
+    assert(len <= sizeof(int64_t)); /* limitation of the fifo */
+    if (fifo8_is_empty(&s->incoming_fifo) && size == len) {
+        memcpy(data, buf, size);
+        return true;
+    }
+
+    IVSHMEM_DPRINTF("short read of %d bytes\n", size);
+
+    num = MIN(size, sizeof(int64_t) - fifo8_num_used(&s->incoming_fifo));
+    fifo8_push_all(&s->incoming_fifo, buf, num);
+
+    if (fifo8_num_used(&s->incoming_fifo) < len) {
+        assert(num == 0);
+        return false;
+    }
+
+    size -= num;
+    buf += num;
+    p = fifo8_pop_buf(&s->incoming_fifo, len, &num);
+    assert(num == len);
+
+    memcpy(data, p, len);
+
+    if (size > 0) {
+        fifo8_push_all(&s->incoming_fifo, buf, size);
+    }
+
+    return true;
+}
+
+static bool fifo_update_and_get_i64(IVShmemState *s,
+                                    const uint8_t *buf, int size, int64_t *i64)
+{
+    if (fifo_update_and_get(s, buf, size, i64, sizeof(*i64))) {
+        *i64 = GINT64_FROM_LE(*i64);
+        return true;
+    }
+
+    return false;
+}
 
-        IVSHMEM_DPRINTF("short read of %d bytes\n", size);
-        num = MAX(size, sizeof(long) - fifo8_num_used(&s->incoming_fifo));
-        fifo8_push_all(&s->incoming_fifo, buf, num);
-        if (fifo8_num_used(&s->incoming_fifo) < sizeof(incoming_posn)) {
+static int ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector)
+{
+    PCIDevice *pdev = PCI_DEVICE(s);
+    MSIMessage msg = msix_get_message(pdev, vector);
+    int ret;
+
+    IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
+
+    if (s->msi_vectors[vector].pdev != NULL) {
+        return 0;
+    }
+
+    ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev);
+    if (ret < 0) {
+        error_report("ivshmem: kvm_irqchip_add_msi_route failed");
+        return -1;
+    }
+
+    s->msi_vectors[vector].virq = ret;
+    s->msi_vectors[vector].pdev = pdev;
+
+    return 0;
+}
+
+static void setup_interrupt(IVShmemState *s, int vector)
+{
+    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
+    bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
+        ivshmem_has_feature(s, IVSHMEM_MSI);
+    PCIDevice *pdev = PCI_DEVICE(s);
+
+    IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
+
+    if (!with_irqfd) {
+        IVSHMEM_DPRINTF("with eventfd");
+        s->eventfd_chr[vector] = create_eventfd_chr_device(s, n, vector);
+    } else if (msix_enabled(pdev)) {
+        IVSHMEM_DPRINTF("with irqfd");
+        if (ivshmem_add_kvm_msi_virq(s, vector) < 0) {
             return;
         }
-        size -= num;
-        buf += num;
-        p = fifo8_pop_buf(&s->incoming_fifo, sizeof(incoming_posn), &num);
-        g_assert(num == sizeof(incoming_posn));
-        memcpy(&incoming_posn, p, sizeof(incoming_posn));
-        if (size > 0) {
-            fifo8_push_all(&s->incoming_fifo, buf, size);
+
+        if (!msix_is_masked(pdev, vector)) {
+            kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
+                                               s->msi_vectors[vector].virq);
         }
+    } else {
+        /* it will be delayed until msix is enabled, in write_config */
+        IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled");
+    }
+}
+
+static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
+{
+    IVShmemState *s = opaque;
+    int incoming_fd;
+    int new_eventfd;
+    int64_t incoming_posn;
+    Error *err = NULL;
+    Peer *peer;
+
+    if (!fifo_update_and_get_i64(s, buf, size, &incoming_posn)) {
+        return;
     }
 
     if (incoming_posn < -1) {
-        IVSHMEM_DPRINTF("invalid incoming_posn %ld\n", incoming_posn);
+        IVSHMEM_DPRINTF("invalid incoming_posn %" PRId64 "\n", incoming_posn);
         return;
     }
 
     /* pick off s->server_chr->msgfd and store it, posn should accompany msg */
-    tmp_fd = qemu_chr_fe_get_msgfd(s->server_chr);
-    IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd);
+    incoming_fd = qemu_chr_fe_get_msgfd(s->server_chr);
+    IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n",
+                    incoming_posn, incoming_fd);
 
-    /* make sure we have enough space for this guest */
+    /* make sure we have enough space for this peer */
     if (incoming_posn >= s->nb_peers) {
-        if (increase_dynamic_storage(s, incoming_posn) < 0) {
-            error_report("increase_dynamic_storage() failed");
-            if (tmp_fd != -1) {
-                close(tmp_fd);
+        if (resize_peers(s, incoming_posn + 1) < 0) {
+            error_report("failed to resize peers array");
+            if (incoming_fd != -1) {
+                close(incoming_fd);
             }
             return;
         }
     }
 
-    if (tmp_fd == -1) {
+    peer = &s->peers[incoming_posn];
+
+    if (incoming_fd == -1) {
         /* if posn is positive and unseen before then this is our posn*/
-        if ((incoming_posn >= 0) &&
-                            (s->peers[incoming_posn].eventfds == NULL)) {
+        if (incoming_posn >= 0 && s->vm_id == -1) {
             /* receive our posn */
             s->vm_id = incoming_posn;
-            return;
         } else {
-            /* otherwise an fd == -1 means an existing guest has gone away */
-            IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn);
-            close_guest_eventfds(s, incoming_posn);
-            return;
+            /* otherwise an fd == -1 means an existing peer has gone away */
+            IVSHMEM_DPRINTF("posn %" PRId64 " has gone away\n", incoming_posn);
+            close_peer_eventfds(s, incoming_posn);
         }
-    }
-
-    /* because of the implementation of get_msgfd, we need a dup */
-    incoming_fd = dup(tmp_fd);
-
-    if (incoming_fd == -1) {
-        error_report("could not allocate file descriptor %s", strerror(errno));
-        close(tmp_fd);
         return;
     }
 
     /* if the position is -1, then it's shared memory region fd */
     if (incoming_posn == -1) {
-
         void * map_ptr;
 
-        s->max_peer = 0;
+        if (memory_region_is_mapped(&s->ivshmem)) {
+            error_report("shm already initialized");
+            close(incoming_fd);
+            return;
+        }
 
-        if (check_shm_size(s, incoming_fd) == -1) {
-            exit(1);
+        if (check_shm_size(s, incoming_fd, &err) == -1) {
+            error_report_err(err);
+            close(incoming_fd);
+            return;
         }
 
         /* mmap the region and map into the BAR2 */
         map_ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED,
                                                             incoming_fd, 0);
+        if (map_ptr == MAP_FAILED) {
+            error_report("Failed to mmap shared memory %s", strerror(errno));
+            close(incoming_fd);
+            return;
+        }
         memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s),
                                    "ivshmem.bar2", s->ivshmem_size, map_ptr);
         vmstate_register_ram(&s->ivshmem, DEVICE(s));
@@ -537,44 +691,59 @@ static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
 
         memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
 
-        /* only store the fd if it is successfully mapped */
-        s->shm_fd = incoming_fd;
+        close(incoming_fd);
+        return;
+    }
 
+    /* each peer has an associated array of eventfds, and we keep
+     * track of how many eventfds received so far */
+    /* get a new eventfd: */
+    if (peer->nb_eventfds >= s->vectors) {
+        error_report("Too many eventfd received, device has %d vectors",
+                     s->vectors);
+        close(incoming_fd);
         return;
     }
 
-    /* each guest has an array of eventfds, and we keep track of how many
-     * guests for each VM */
-    guest_max_eventfd = s->peers[incoming_posn].nb_eventfds;
+    new_eventfd = peer->nb_eventfds++;
+
+    /* this is an eventfd for a particular peer VM */
+    IVSHMEM_DPRINTF("eventfds[%" PRId64 "][%d] = %d\n", incoming_posn,
+                    new_eventfd, incoming_fd);
+    event_notifier_init_fd(&peer->eventfds[new_eventfd], incoming_fd);
+    fcntl_setfl(incoming_fd, O_NONBLOCK); /* msix/irqfd poll non block */
 
-    if (guest_max_eventfd == 0) {
-        /* one eventfd per MSI vector */
-        s->peers[incoming_posn].eventfds = g_new(EventNotifier, s->vectors);
+    if (incoming_posn == s->vm_id) {
+        setup_interrupt(s, new_eventfd);
     }
 
-    /* this is an eventfd for a particular guest VM */
-    IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn,
-                    guest_max_eventfd, incoming_fd);
-    event_notifier_init_fd(&s->peers[incoming_posn].eventfds[guest_max_eventfd],
-                           incoming_fd);
+    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
+        ivshmem_add_eventfd(s, incoming_posn, new_eventfd);
+    }
+}
 
-    /* increment count for particular guest */
-    s->peers[incoming_posn].nb_eventfds++;
+static void ivshmem_check_version(void *opaque, const uint8_t * buf, int size)
+{
+    IVShmemState *s = opaque;
+    int tmp;
+    int64_t version;
 
-    /* keep track of the maximum VM ID */
-    if (incoming_posn > s->max_peer) {
-        s->max_peer = incoming_posn;
+    if (!fifo_update_and_get_i64(s, buf, size, &version)) {
+        return;
     }
 
-    if (incoming_posn == s->vm_id) {
-        s->eventfd_chr[guest_max_eventfd] = create_eventfd_chr_device(s,
-                   &s->peers[s->vm_id].eventfds[guest_max_eventfd],
-                   guest_max_eventfd);
+    tmp = qemu_chr_fe_get_msgfd(s->server_chr);
+    if (tmp != -1 || version != IVSHMEM_PROTOCOL_VERSION) {
+        fprintf(stderr, "incompatible version, you are connecting to a ivshmem-"
+                "server using a different protocol please check your setup\n");
+        qemu_chr_delete(s->server_chr);
+        s->server_chr = NULL;
+        return;
     }
 
-    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
-        ivshmem_add_eventfd(s, incoming_posn, guest_max_eventfd);
-    }
+    IVSHMEM_DPRINTF("version check ok, switch to real chardev handler\n");
+    qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read,
+                          ivshmem_event, s);
 }
 
 /* Select the MSI-X vectors used by device.
@@ -585,6 +754,7 @@ static void ivshmem_use_msix(IVShmemState * s)
     PCIDevice *d = PCI_DEVICE(s);
     int i;
 
+    IVSHMEM_DPRINTF("%s, msix present: %d\n", __func__, msix_present(d));
     if (!msix_present(d)) {
         return;
     }
@@ -599,128 +769,127 @@ static void ivshmem_reset(DeviceState *d)
     IVShmemState *s = IVSHMEM(d);
 
     s->intrstatus = 0;
+    s->intrmask = 0;
     ivshmem_use_msix(s);
 }
 
-static uint64_t ivshmem_get_size(IVShmemState * s) {
-
-    uint64_t value;
-    char *ptr;
-
-    value = strtoull(s->sizearg, &ptr, 10);
-    switch (*ptr) {
-        case 0: case 'M': case 'm':
-            value <<= 20;
-            break;
-        case 'G': case 'g':
-            value <<= 30;
-            break;
-        default:
-            error_report("invalid ram size: %s", s->sizearg);
-            exit(1);
-    }
-
-    /* BARs must be a power of 2 */
-    if (!is_power_of_two(value)) {
-        error_report("size must be power of 2");
-        exit(1);
-    }
-
-    return value;
-}
-
-static void ivshmem_setup_msi(IVShmemState * s)
+static int ivshmem_setup_msi(IVShmemState * s)
 {
     if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1)) {
-        IVSHMEM_DPRINTF("msix initialization failed\n");
-        exit(1);
+        return -1;
     }
 
     IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
 
     /* allocate QEMU char devices for receiving interrupts */
-    s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
+    s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
 
     ivshmem_use_msix(s);
+    return 0;
 }
 
-static void ivshmem_save(QEMUFile* f, void *opaque)
+static void ivshmem_enable_irqfd(IVShmemState *s)
 {
-    IVShmemState *proxy = opaque;
-    PCIDevice *pci_dev = PCI_DEVICE(proxy);
-
-    IVSHMEM_DPRINTF("ivshmem_save\n");
-    pci_device_save(pci_dev, f);
+    PCIDevice *pdev = PCI_DEVICE(s);
+    int i;
 
-    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
-        msix_save(pci_dev, f);
-    } else {
-        qemu_put_be32(f, proxy->intrstatus);
-        qemu_put_be32(f, proxy->intrmask);
+    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
+        ivshmem_add_kvm_msi_virq(s, i);
     }
 
+    if (msix_set_vector_notifiers(pdev,
+                                  ivshmem_vector_unmask,
+                                  ivshmem_vector_mask,
+                                  ivshmem_vector_poll)) {
+        error_report("ivshmem: msix_set_vector_notifiers failed");
+    }
 }
 
-static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
+static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
 {
-    IVSHMEM_DPRINTF("ivshmem_load\n");
-
-    IVShmemState *proxy = opaque;
-    PCIDevice *pci_dev = PCI_DEVICE(proxy);
-    int ret;
+    IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
 
-    if (version_id > 0) {
-        return -EINVAL;
+    if (s->msi_vectors[vector].pdev == NULL) {
+        return;
     }
 
-    if (proxy->role_val == IVSHMEM_PEER) {
-        error_report("'peer' devices are not migratable");
-        return -EINVAL;
-    }
+    /* it was cleaned when masked in the frontend. */
+    kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
 
-    ret = pci_device_load(pci_dev, f);
-    if (ret) {
-        return ret;
-    }
+    s->msi_vectors[vector].pdev = NULL;
+}
 
-    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
-        msix_load(pci_dev, f);
-	ivshmem_use_msix(proxy);
-    } else {
-        proxy->intrstatus = qemu_get_be32(f);
-        proxy->intrmask = qemu_get_be32(f);
+static void ivshmem_disable_irqfd(IVShmemState *s)
+{
+    PCIDevice *pdev = PCI_DEVICE(s);
+    int i;
+
+    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
+        ivshmem_remove_kvm_msi_virq(s, i);
     }
 
-    return 0;
+    msix_unset_vector_notifiers(pdev);
 }
 
-static void ivshmem_write_config(PCIDevice *pci_dev, uint32_t address,
-				 uint32_t val, int len)
+static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
+                                 uint32_t val, int len)
 {
-    pci_default_write_config(pci_dev, address, val, len);
+    IVShmemState *s = IVSHMEM(pdev);
+    int is_enabled, was_enabled = msix_enabled(pdev);
+
+    pci_default_write_config(pdev, address, val, len);
+    is_enabled = msix_enabled(pdev);
+
+    if (kvm_msi_via_irqfd_enabled() && s->vm_id != -1) {
+        if (!was_enabled && is_enabled) {
+            ivshmem_enable_irqfd(s);
+        } else if (was_enabled && !is_enabled) {
+            ivshmem_disable_irqfd(s);
+        }
+    }
 }
 
-static int pci_ivshmem_init(PCIDevice *dev)
+static void pci_ivshmem_realize(PCIDevice *dev, Error **errp)
 {
     IVShmemState *s = IVSHMEM(dev);
     uint8_t *pci_conf;
+    uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY |
+        PCI_BASE_ADDRESS_MEM_PREFETCH;
 
-    if (s->sizearg == NULL)
-        s->ivshmem_size = 4 << 20; /* 4 MB default */
-    else {
-        s->ivshmem_size = ivshmem_get_size(s);
+    if (!!s->server_chr + !!s->shmobj + !!s->hostmem != 1) {
+        error_setg(errp, "You must specify either a shmobj, a chardev"
+                   " or a hostmem");
+        return;
     }
 
-    fifo8_create(&s->incoming_fifo, sizeof(long));
+    if (s->hostmem) {
+        MemoryRegion *mr;
 
-    register_savevm(DEVICE(dev), "ivshmem", 0, 0, ivshmem_save, ivshmem_load,
-                                                                        dev);
+        if (s->sizearg) {
+            g_warning("size argument ignored with hostmem");
+        }
+
+        mr = host_memory_backend_get_memory(s->hostmem, errp);
+        s->ivshmem_size = memory_region_size(mr);
+    } else if (s->sizearg == NULL) {
+        s->ivshmem_size = 4 << 20; /* 4 MB default */
+    } else {
+        char *end;
+        int64_t size = qemu_strtosz(s->sizearg, &end);
+        if (size < 0 || *end != '\0' || !is_power_of_2(size)) {
+            error_setg(errp, "Invalid size %s", s->sizearg);
+            return;
+        }
+        s->ivshmem_size = size;
+    }
+
+    fifo8_create(&s->incoming_fifo, sizeof(int64_t));
 
     /* IRQFD requires MSI */
     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
         !ivshmem_has_feature(s, IVSHMEM_MSI)) {
-        error_report("ioeventfd/irqfd requires MSI");
-        exit(1);
+        error_setg(errp, "ioeventfd/irqfd requires MSI");
+        return;
     }
 
     /* check that role is reasonable */
@@ -730,8 +899,8 @@ static int pci_ivshmem_init(PCIDevice *dev)
         } else if (strncmp(s->role, "master", 7) == 0) {
             s->role_val = IVSHMEM_MASTER;
         } else {
-            error_report("'role' must be 'peer' or 'master'");
-            exit(1);
+            error_setg(errp, "'role' must be 'peer' or 'master'");
+            return;
         }
     } else {
         s->role_val = IVSHMEM_MASTER; /* default */
@@ -748,8 +917,6 @@ static int pci_ivshmem_init(PCIDevice *dev)
 
     pci_config_set_interrupt_pin(pci_conf, 1);
 
-    s->shm_fd = 0;
-
     memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
                           "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
 
@@ -758,51 +925,51 @@ static int pci_ivshmem_init(PCIDevice *dev)
                      &s->ivshmem_mmio);
 
     memory_region_init(&s->bar, OBJECT(s), "ivshmem-bar2-container", s->ivshmem_size);
-    s->ivshmem_attr = PCI_BASE_ADDRESS_SPACE_MEMORY |
-        PCI_BASE_ADDRESS_MEM_PREFETCH;
     if (s->ivshmem_64bit) {
-        s->ivshmem_attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+        attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
     }
 
-    if ((s->server_chr != NULL) &&
-                        (strncmp(s->server_chr->filename, "unix:", 5) == 0)) {
-        /* if we get a UNIX socket as the parameter we will talk
-         * to the ivshmem server to receive the memory region */
+    if (s->hostmem != NULL) {
+        MemoryRegion *mr;
 
-        if (s->shmobj != NULL) {
-            error_report("WARNING: do not specify both 'chardev' "
-                         "and 'shm' with ivshmem");
+        IVSHMEM_DPRINTF("using hostmem\n");
+
+        mr = host_memory_backend_get_memory(MEMORY_BACKEND(s->hostmem), errp);
+        vmstate_register_ram(mr, DEVICE(s));
+        memory_region_add_subregion(&s->bar, 0, mr);
+        pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar);
+    } else if (s->server_chr != NULL) {
+        if (strncmp(s->server_chr->filename, "unix:", 5)) {
+            error_setg(errp, "chardev is not a unix client socket");
+            return;
         }
 
+        /* if we get a UNIX socket as the parameter we will talk
+         * to the ivshmem server to receive the memory region */
+
         IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
                         s->server_chr->filename);
 
-        if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
-            ivshmem_setup_msi(s);
+        if (ivshmem_has_feature(s, IVSHMEM_MSI) &&
+            ivshmem_setup_msi(s)) {
+            error_setg(errp, "msix initialization failed");
+            return;
         }
 
-        /* we allocate enough space for 16 guests and grow as needed */
-        s->nb_peers = 16;
+        /* we allocate enough space for 16 peers and grow as needed */
+        resize_peers(s, 16);
         s->vm_id = -1;
 
-        /* allocate/initialize space for interrupt handling */
-        s->peers = g_malloc0(s->nb_peers * sizeof(Peer));
-
-        pci_register_bar(dev, 2, s->ivshmem_attr, &s->bar);
+        pci_register_bar(dev, 2, attr, &s->bar);
 
         s->eventfd_chr = g_malloc0(s->vectors * sizeof(CharDriverState *));
 
-        qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read,
-                     ivshmem_event, s);
+        qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive,
+                              ivshmem_check_version, ivshmem_event, s);
     } else {
         /* just map the file immediately, we're not using a server */
         int fd;
 
-        if (s->shmobj == NULL) {
-            error_report("Must specify 'chardev' or 'shm' to ivshmem");
-            exit(1);
-        }
-
         IVSHMEM_DPRINTF("using shm_open (shm object = %s)\n", s->shmobj);
 
         /* try opening with O_EXCL and if it succeeds zero the memory
@@ -816,39 +983,155 @@ static int pci_ivshmem_init(PCIDevice *dev)
 
         } else if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR,
                         S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
-            error_report("could not open shared file");
-            exit(1);
+            error_setg(errp, "could not open shared file");
+            return;
+        }
 
+        if (check_shm_size(s, fd, errp) == -1) {
+            return;
         }
 
-        if (check_shm_size(s, fd) == -1) {
-            exit(1);
+        create_shared_memory_BAR(s, fd, attr, errp);
+        close(fd);
+    }
+}
+
+static void pci_ivshmem_exit(PCIDevice *dev)
+{
+    IVShmemState *s = IVSHMEM(dev);
+    int i;
+
+    fifo8_destroy(&s->incoming_fifo);
+
+    if (s->migration_blocker) {
+        migrate_del_blocker(s->migration_blocker);
+        error_free(s->migration_blocker);
+    }
+
+    if (memory_region_is_mapped(&s->ivshmem)) {
+        if (!s->hostmem) {
+            void *addr = memory_region_get_ram_ptr(&s->ivshmem);
+
+            if (munmap(addr, s->ivshmem_size) == -1) {
+                error_report("Failed to munmap shared memory %s",
+                             strerror(errno));
+            }
         }
 
-        create_shared_memory_BAR(s, fd);
+        vmstate_unregister_ram(&s->ivshmem, DEVICE(dev));
+        memory_region_del_subregion(&s->bar, &s->ivshmem);
+    }
 
+    if (s->eventfd_chr) {
+        for (i = 0; i < s->vectors; i++) {
+            if (s->eventfd_chr[i]) {
+                qemu_chr_free(s->eventfd_chr[i]);
+            }
+        }
+        g_free(s->eventfd_chr);
     }
 
-    dev->config_write = ivshmem_write_config;
+    if (s->peers) {
+        for (i = 0; i < s->nb_peers; i++) {
+            close_peer_eventfds(s, i);
+        }
+        g_free(s->peers);
+    }
+
+    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
+        msix_uninit_exclusive_bar(dev);
+    }
+
+    g_free(s->msi_vectors);
+}
+
+static bool test_msix(void *opaque, int version_id)
+{
+    IVShmemState *s = opaque;
+
+    return ivshmem_has_feature(s, IVSHMEM_MSI);
+}
+
+static bool test_no_msix(void *opaque, int version_id)
+{
+    return !test_msix(opaque, version_id);
+}
+
+static int ivshmem_pre_load(void *opaque)
+{
+    IVShmemState *s = opaque;
+
+    if (s->role_val == IVSHMEM_PEER) {
+        error_report("'peer' devices are not migratable");
+        return -EINVAL;
+    }
 
     return 0;
 }
 
-static void pci_ivshmem_uninit(PCIDevice *dev)
+static int ivshmem_post_load(void *opaque, int version_id)
 {
-    IVShmemState *s = IVSHMEM(dev);
+    IVShmemState *s = opaque;
 
-    if (s->migration_blocker) {
-        migrate_del_blocker(s->migration_blocker);
-        error_free(s->migration_blocker);
+    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
+        ivshmem_use_msix(s);
     }
 
-    memory_region_del_subregion(&s->bar, &s->ivshmem);
-    vmstate_unregister_ram(&s->ivshmem, DEVICE(dev));
-    unregister_savevm(DEVICE(dev), "ivshmem", s);
-    fifo8_destroy(&s->incoming_fifo);
+    return 0;
 }
 
+static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id)
+{
+    IVShmemState *s = opaque;
+    PCIDevice *pdev = PCI_DEVICE(s);
+    int ret;
+
+    IVSHMEM_DPRINTF("ivshmem_load_old\n");
+
+    if (version_id != 0) {
+        return -EINVAL;
+    }
+
+    if (s->role_val == IVSHMEM_PEER) {
+        error_report("'peer' devices are not migratable");
+        return -EINVAL;
+    }
+
+    ret = pci_device_load(pdev, f);
+    if (ret) {
+        return ret;
+    }
+
+    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
+        msix_load(pdev, f);
+        ivshmem_use_msix(s);
+    } else {
+        s->intrstatus = qemu_get_be32(f);
+        s->intrmask = qemu_get_be32(f);
+    }
+
+    return 0;
+}
+
+static const VMStateDescription ivshmem_vmsd = {
+    .name = "ivshmem",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_load = ivshmem_pre_load,
+    .post_load = ivshmem_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
+
+        VMSTATE_MSIX_TEST(parent_obj, IVShmemState, test_msix),
+        VMSTATE_UINT32_TEST(intrstatus, IVShmemState, test_no_msix),
+        VMSTATE_UINT32_TEST(intrmask, IVShmemState, test_no_msix),
+
+        VMSTATE_END_OF_LIST()
+    },
+    .load_state_old = ivshmem_load_old,
+    .minimum_version_id_old = 0
+};
+
 static Property ivshmem_properties[] = {
     DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
     DEFINE_PROP_STRING("size", IVShmemState, sizearg),
@@ -866,20 +1149,50 @@ static void ivshmem_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
-    k->init = pci_ivshmem_init;
-    k->exit = pci_ivshmem_uninit;
+    k->realize = pci_ivshmem_realize;
+    k->exit = pci_ivshmem_exit;
+    k->config_write = ivshmem_write_config;
     k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
     k->device_id = PCI_DEVICE_ID_IVSHMEM;
     k->class_id = PCI_CLASS_MEMORY_RAM;
     dc->reset = ivshmem_reset;
     dc->props = ivshmem_properties;
+    dc->vmsd = &ivshmem_vmsd;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->desc = "Inter-VM shared memory";
+}
+
+static void ivshmem_check_memdev_is_busy(Object *obj, const char *name,
+                                         Object *val, Error **errp)
+{
+    MemoryRegion *mr;
+
+    mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp);
+    if (memory_region_is_mapped(mr)) {
+        char *path = object_get_canonical_path_component(val);
+        error_setg(errp, "can't use already busy memdev: %s", path);
+        g_free(path);
+    } else {
+        qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
+    }
+}
+
+static void ivshmem_init(Object *obj)
+{
+    IVShmemState *s = IVSHMEM(obj);
+
+    object_property_add_link(obj, IVSHMEM_MEMDEV_PROP, TYPE_MEMORY_BACKEND,
+                             (Object **)&s->hostmem,
+                             ivshmem_check_memdev_is_busy,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             &error_abort);
 }
 
 static const TypeInfo ivshmem_info = {
     .name          = TYPE_IVSHMEM,
     .parent        = TYPE_PCI_DEVICE,
     .instance_size = sizeof(IVShmemState),
+    .instance_init = ivshmem_init,
     .class_init    = ivshmem_class_init,
 };
 
diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
index 5d7043e99c..0fd75b376f 100644
--- a/hw/misc/macio/cuda.c
+++ b/hw/misc/macio/cuda.c
@@ -738,6 +738,7 @@ static void cuda_class_init(ObjectClass *oc, void *data)
     dc->reset = cuda_reset;
     dc->vmsd = &vmstate_cuda;
     dc->props = cuda_properties;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo cuda_type_info = {
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index c661f86c21..adb990e565 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -393,6 +393,7 @@ static void macio_class_init(ObjectClass *klass, void *data)
     k->vendor_id = PCI_VENDOR_ID_APPLE;
     k->class_id = PCI_CLASS_OTHERS << 8;
     dc->props = macio_properties;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo macio_oldworld_type_info = {
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index 1127223cfd..3639fc17f0 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -964,6 +964,7 @@ static void gem_reset(DeviceState *d)
 {
     int i;
     CadenceGEMState *s = CADENCE_GEM(d);
+    const uint8_t *a;
 
     DB_PRINT("\n");
 
@@ -982,6 +983,11 @@ static void gem_reset(DeviceState *d)
     s->regs[GEM_DESCONF5] = 0x002f2145;
     s->regs[GEM_DESCONF6] = 0x00000200;
 
+    /* Set MAC address */
+    a = &s->conf.macaddr.a[0];
+    s->regs[GEM_SPADDR1LO] = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24);
+    s->regs[GEM_SPADDR1HI] = a[4] | (a[5] << 8);
+
     for (i = 0; i < 4; i++) {
         s->sar_active[i] = false;
     }
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
index 725f3fa335..c50bf7ff34 100644
--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c
@@ -27,31 +27,29 @@
 /* For crc32 */
 #include <zlib.h>
 
-#ifndef IMX_FEC_DEBUG
-#define IMX_FEC_DEBUG          0
+#ifndef DEBUG_IMX_FEC
+#define DEBUG_IMX_FEC 0
 #endif
 
-#ifndef IMX_PHY_DEBUG
-#define IMX_PHY_DEBUG          0
-#endif
-
-#if IMX_FEC_DEBUG
-#define FEC_PRINTF(fmt, ...) \
-    do { fprintf(stderr, "%s[%s]: " fmt , TYPE_IMX_FEC, __func__, \
-                 ## __VA_ARGS__); \
+#define FEC_PRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_FEC) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_FEC, \
+                                             __func__, ##args); \
+        } \
     } while (0)
-#else
-#define FEC_PRINTF(fmt, ...) do {} while (0)
+
+#ifndef DEBUG_IMX_PHY
+#define DEBUG_IMX_PHY 0
 #endif
 
-#if IMX_PHY_DEBUG
-#define PHY_PRINTF(fmt, ...) \
-    do { fprintf(stderr, "%s.phy[%s]: " fmt , TYPE_IMX_FEC, __func__, \
-                 ## __VA_ARGS__); \
+#define PHY_PRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_PHY) { \
+            fprintf(stderr, "[%s.phy]%s: " fmt , TYPE_IMX_FEC, \
+                                                 __func__, ##args); \
+        } \
     } while (0)
-#else
-#define PHY_PRINTF(fmt, ...) do {} while (0)
-#endif
 
 static const VMStateDescription vmstate_imx_fec = {
     .name = TYPE_IMX_FEC,
@@ -182,12 +180,12 @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
     case 18:
     case 27:
     case 31:
-        qemu_log_mask(LOG_UNIMP, "%s.phy[%s]: reg %d not implemented\n",
+        qemu_log_mask(LOG_UNIMP, "[%s.phy]%s: reg %d not implemented\n",
                       TYPE_IMX_FEC, __func__, reg);
         val = 0;
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad address at offset %d\n",
                       TYPE_IMX_FEC, __func__, reg);
         val = 0;
         break;
@@ -230,11 +228,11 @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
     case 18:
     case 27:
     case 31:
-        qemu_log_mask(LOG_UNIMP, "%s.phy[%s]: reg %d not implemented\n",
+        qemu_log_mask(LOG_UNIMP, "[%s.phy)%s: reg %d not implemented\n",
                       TYPE_IMX_FEC, __func__, reg);
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s.phy[%s]: Bad address at offset %d\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad address at offset %d\n",
                       TYPE_IMX_FEC, __func__, reg);
         break;
     }
@@ -357,7 +355,7 @@ static uint64_t imx_fec_read(void *opaque, hwaddr addr, unsigned size)
 {
     IMXFECState *s = IMX_FEC(opaque);
 
-    FEC_PRINTF("reading from @ 0x%03x\n", (int)addr);
+    FEC_PRINTF("reading from @ 0x%" HWADDR_PRIx "\n", addr);
 
     switch (addr & 0x3ff) {
     case 0x004:
@@ -417,8 +415,8 @@ static uint64_t imx_fec_read(void *opaque, hwaddr addr, unsigned size)
     case 0x308:
         return s->miigsk_enr;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_FEC, __func__, (int)addr);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr);
         return 0;
     }
 }
@@ -428,7 +426,7 @@ static void imx_fec_write(void *opaque, hwaddr addr,
 {
     IMXFECState *s = IMX_FEC(opaque);
 
-    FEC_PRINTF("writing 0x%08x @ 0x%03x\n", (int)value, (int)addr);
+    FEC_PRINTF("writing 0x%08x @ 0x%" HWADDR_PRIx "\n", (int)value, addr);
 
     switch (addr & 0x3ff) {
     case 0x004: /* EIR */
@@ -530,8 +528,8 @@ static void imx_fec_write(void *opaque, hwaddr addr,
         s->miigsk_enr = (value & 0x2) ? 0x6 : 0;
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_FEC, __func__, (int)addr);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr);
         break;
     }
 
@@ -561,7 +559,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
     FEC_PRINTF("len %d\n", (int)size);
 
     if (!s->rx_enabled) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Unexpected packet\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
                       TYPE_IMX_FEC, __func__);
         return 0;
     }
@@ -592,14 +590,16 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
              * save the remainder for when more RX buffers are
              * available, or flag an error.
              */
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Lost end of frame\n",
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Lost end of frame\n",
                           TYPE_IMX_FEC, __func__);
             break;
         }
         buf_len = (size <= s->emrbr) ? size : s->emrbr;
         bd.length = buf_len;
         size -= buf_len;
-        FEC_PRINTF("rx_bd %x length %d\n", addr, bd.length);
+
+        FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
+
         /* The last 4 bytes are the CRC.  */
         if (size < 4) {
             buf_len += size - 4;
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 3c5e10dd6d..5e3a233237 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1289,6 +1289,10 @@ static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s)
 static void vmxnet3_fill_stats(VMXNET3State *s)
 {
     int i;
+
+    if (!s->device_active)
+        return;
+
     for (i = 0; i < s->txq_num; i++) {
         cpu_physical_memory_write(s->txq_descr[i].tx_stats_pa,
                                   &s->txq_descr[i].txq_stats,
diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index d35f8a3121..9f165664c6 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -123,6 +123,7 @@ static void macio_nvram_class_init(ObjectClass *oc, void *data)
     dc->reset = macio_nvram_reset;
     dc->vmsd = &vmstate_macio_nvram;
     dc->props = macio_nvram_properties;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
 static const TypeInfo macio_nvram_type_info = {
diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c
index bfe707a1a1..ea31b72e7c 100644
--- a/hw/pci-host/grackle.c
+++ b/hw/pci-host/grackle.c
@@ -146,8 +146,10 @@ static const TypeInfo grackle_pci_info = {
 static void pci_grackle_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init = pci_grackle_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo grackle_pci_host_info = {
diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index f0144eb7b0..215b64ffed 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -446,8 +446,10 @@ static const TypeInfo unin_internal_pci_host_info = {
 static void pci_unin_main_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     sbc->init = pci_unin_main_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo pci_unin_main_info = {
@@ -460,8 +462,10 @@ static const TypeInfo pci_unin_main_info = {
 static void pci_u3_agp_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     sbc->init = pci_u3_agp_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo pci_u3_agp_info = {
@@ -474,8 +478,10 @@ static const TypeInfo pci_u3_agp_info = {
 static void pci_unin_agp_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     sbc->init = pci_unin_agp_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo pci_unin_agp_info = {
@@ -488,8 +494,10 @@ static const TypeInfo pci_unin_agp_info = {
 static void pci_unin_internal_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     sbc->init = pci_unin_internal_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo pci_unin_internal_info = {
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 2fdada4e8f..64c93d83d6 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -200,8 +200,14 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
     return pci_get_long(dev->msix_pba + addr);
 }
 
+static void msix_pba_mmio_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+}
+
 static const MemoryRegionOps msix_pba_mmio_ops = {
     .read = msix_pba_mmio_read,
+    .write = msix_pba_mmio_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index b0bf54061f..168b9cc56b 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -847,6 +847,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     PCIConfigWriteFunc *config_write = pc->config_write;
     Error *local_err = NULL;
     AddressSpace *dma_as;
+    DeviceState *dev = DEVICE(pci_dev);
+
+    pci_dev->bus = bus;
 
     if (devfn < 0) {
         for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
@@ -864,9 +867,17 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                    PCI_SLOT(devfn), PCI_FUNC(devfn), name,
                    bus->devices[devfn]->name);
         return NULL;
+    } else if (dev->hotplugged &&
+               pci_get_function_0(pci_dev)) {
+        error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
+                   " new func %s cannot be exposed to guest.",
+                   PCI_SLOT(devfn),
+                   bus->devices[PCI_DEVFN(PCI_SLOT(devfn), 0)]->name,
+                   name);
+
+       return NULL;
     }
 
-    pci_dev->bus = bus;
     pci_dev->devfn = devfn;
     dma_as = pci_device_iommu_address_space(pci_dev);
 
@@ -2454,6 +2465,33 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range)
     pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
 }
 
+static bool pcie_has_upstream_port(PCIDevice *dev)
+{
+    PCIDevice *parent_dev = pci_bridge_get_device(dev->bus);
+
+    /* Device associated with an upstream port.
+     * As there are several types of these, it's easier to check the
+     * parent device: upstream ports are always connected to
+     * root or downstream ports.
+     */
+    return parent_dev &&
+        pci_is_express(parent_dev) &&
+        parent_dev->exp.exp_cap &&
+        (pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_ROOT_PORT ||
+         pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_DOWNSTREAM);
+}
+
+PCIDevice *pci_get_function_0(PCIDevice *pci_dev)
+{
+    if(pcie_has_upstream_port(pci_dev)) {
+        /* With an upstream PCIe port, we only support 1 device at slot 0 */
+        return pci_dev->bus->devices[0];
+    } else {
+        /* Other bus types might support multiple devices at slots 0-31 */
+        return pci_dev->bus->devices[PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 0)];
+    }
+}
+
 static const TypeInfo pci_device_type_info = {
     .name = TYPE_PCI_DEVICE,
     .parent = TYPE_DEVICE,
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index 3e26f9256c..49f59a5dbc 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -20,6 +20,7 @@
 
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_host.h"
+#include "hw/pci/pci_bus.h"
 #include "trace.h"
 
 /* debug PCI */
@@ -52,6 +53,13 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
                                   uint32_t limit, uint32_t val, uint32_t len)
 {
     assert(len <= 4);
+    /* non-zero functions are only exposed when function 0 is present,
+     * allowing direct removal of unexposed functions.
+     */
+    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+        return;
+    }
+
     trace_pci_cfg_write(pci_dev->name, PCI_SLOT(pci_dev->devfn),
                         PCI_FUNC(pci_dev->devfn), addr, val);
     pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr));
@@ -63,6 +71,13 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
     uint32_t ret;
 
     assert(len <= 4);
+    /* non-zero functions are only exposed when function 0 is present,
+     * allowing direct removal of unexposed functions.
+     */
+    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+        return ~0x0;
+    }
+
     ret = pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
     trace_pci_cfg_read(pci_dev->name, PCI_SLOT(pci_dev->devfn),
                        PCI_FUNC(pci_dev->devfn), addr, ret);
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6e28985bd1..32c65c27a4 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -249,25 +249,43 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
         return;
     }
 
-    /* TODO: multifunction hot-plug.
-     * Right now, only a device of function = 0 is allowed to be
-     * hot plugged/unplugged.
+    /* To enable multifunction hot-plug, we just ensure the function
+     * 0 added last. When function 0 is added, we set the sltsta and
+     * inform OS via event notification.
      */
-    assert(PCI_FUNC(pci_dev->devfn) == 0);
+    if (pci_get_function_0(pci_dev)) {
+        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
+                                   PCI_EXP_SLTSTA_PDS);
+        pcie_cap_slot_event(PCI_DEVICE(hotplug_dev),
+                            PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+    }
+}
 
-    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
-                               PCI_EXP_SLTSTA_PDS);
-    pcie_cap_slot_event(PCI_DEVICE(hotplug_dev),
-                        PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+    object_unparent(OBJECT(dev));
 }
 
 void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
                                          DeviceState *dev, Error **errp)
 {
     uint8_t *exp_cap;
+    PCIDevice *pci_dev = PCI_DEVICE(dev);
+    PCIBus *bus = pci_dev->bus;
 
     pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
 
+    /* In case user cancel the operation of multi-function hot-add,
+     * remove the function that is unexposed to guest individually,
+     * without interaction with guest.
+     */
+    if (pci_dev->devfn &&
+        !bus->devices[0]) {
+        pcie_unplug_device(bus, pci_dev, NULL);
+
+        return;
+    }
+
     pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev));
 }
 
@@ -378,11 +396,6 @@ void pcie_cap_slot_reset(PCIDevice *dev)
     hotplug_event_update_event_status(dev);
 }
 
-static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
-{
-    object_unparent(OBJECT(dev));
-}
-
 void pcie_cap_slot_write_config(PCIDevice *dev,
                                 uint32_t addr, uint32_t val, int len)
 {
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index d95222bd7d..5ad28f75cf 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -42,11 +42,9 @@
 #include "sysemu/arch_init.h"
 #include "sysemu/qtest.h"
 #include "exec/address-spaces.h"
+#include "trace.h"
 #include "elf.h"
 
-//#define HARD_DEBUG_PPC_IO
-//#define DEBUG_PPC_IO
-
 /* SMP is not enabled, for now */
 #define MAX_CPUS 1
 
@@ -57,26 +55,6 @@
 #define KERNEL_LOAD_ADDR 0x01000000
 #define INITRD_LOAD_ADDR 0x01800000
 
-#if defined (HARD_DEBUG_PPC_IO) && !defined (DEBUG_PPC_IO)
-#define DEBUG_PPC_IO
-#endif
-
-#if defined (HARD_DEBUG_PPC_IO)
-#define PPC_IO_DPRINTF(fmt, ...)                         \
-do {                                                     \
-    if (qemu_loglevel_mask(CPU_LOG_IOPORT)) {            \
-        qemu_log("%s: " fmt, __func__ , ## __VA_ARGS__); \
-    } else {                                             \
-        printf("%s : " fmt, __func__ , ## __VA_ARGS__);  \
-    }                                                    \
-} while (0)
-#elif defined (DEBUG_PPC_IO)
-#define PPC_IO_DPRINTF(fmt, ...) \
-qemu_log_mask(CPU_LOG_IOPORT, fmt, ## __VA_ARGS__)
-#else
-#define PPC_IO_DPRINTF(fmt, ...) do { } while (0)
-#endif
-
 /* Constants for devices init */
 static const int ide_iobase[2] = { 0x1f0, 0x170 };
 static const int ide_iobase2[2] = { 0x3f6, 0x376 };
@@ -199,8 +177,7 @@ static void PREP_io_800_writeb (void *opaque, uint32_t addr, uint32_t val)
 {
     sysctrl_t *sysctrl = opaque;
 
-    PPC_IO_DPRINTF("0x%08" PRIx32 " => 0x%02" PRIx32 "\n",
-                   addr - PPC_IO_BASE, val);
+    trace_prep_io_800_writeb(addr - PPC_IO_BASE, val);
     switch (addr) {
     case 0x0092:
         /* Special port 92 */
@@ -327,8 +304,7 @@ static uint32_t PREP_io_800_readb (void *opaque, uint32_t addr)
         printf("ERROR: unaffected IO port: %04" PRIx32 " read\n", addr);
         break;
     }
-    PPC_IO_DPRINTF("0x%08" PRIx32 " <= 0x%02" PRIx32 "\n",
-                   addr - PPC_IO_BASE, retval);
+    trace_prep_io_800_readb(addr - PPC_IO_BASE, retval);
 
     return retval;
 }
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 3852ad1967..0ed8527969 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -597,6 +597,24 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
     uint32_t vcpus_per_socket = smp_threads * smp_cores;
     uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
 
+    /* Note: we keep CI large pages off for now because a 64K capable guest
+     * provisioned with large pages might otherwise try to map a qemu
+     * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
+     * even if that qemu runs on a 4k host.
+     *
+     * We can later add this bit back when we are confident this is not
+     * an issue (!HV KVM or 64K host)
+     */
+    uint8_t pa_features_206[] = { 6, 0,
+        0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
+    uint8_t pa_features_207[] = { 24, 0,
+        0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
+        0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+        0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
+    uint8_t *pa_features;
+    size_t pa_size;
+
     _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
     _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
 
@@ -625,6 +643,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
 
     _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
     _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "slb-size", env->slb_nr)));
     _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
     _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
@@ -662,6 +681,19 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
                           page_sizes_prop, page_sizes_prop_size)));
     }
 
+    /* Do the ibm,pa-features property, adjust it for ci-large-pages */
+    if (env->mmu_model == POWERPC_MMU_2_06) {
+        pa_features = pa_features_206;
+        pa_size = sizeof(pa_features_206);
+    } else /* env->mmu_model == POWERPC_MMU_2_07 */ {
+        pa_features = pa_features_207;
+        pa_size = sizeof(pa_features_207);
+    }
+    if (env->ci_large_pages) {
+        pa_features[3] |= 0x20;
+    }
+    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
+
     _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
                            cs->cpu_index / vcpus_per_socket)));
 
@@ -979,7 +1011,7 @@ static void emulate_spapr_hypercall(PowerPCCPU *cpu)
 #define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
 #define DIRTY_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
 
-static void spapr_reset_htab(sPAPRMachineState *spapr)
+static void spapr_alloc_htab(sPAPRMachineState *spapr)
 {
     long shift;
     int index;
@@ -992,20 +1024,47 @@ static void spapr_reset_htab(sPAPRMachineState *spapr)
 
     if (shift > 0) {
         /* Kernel handles htab, we don't need to allocate one */
+        if (shift != spapr->htab_shift) {
+            error_setg(&error_abort, "Failed to allocate HTAB of requested size, try with smaller maxmem");
+        }
+
         spapr->htab_shift = shift;
         kvmppc_kern_htab = true;
+    } else {
+        /* Allocate htab */
+        spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
+
+        /* And clear it */
+        memset(spapr->htab, 0, HTAB_SIZE(spapr));
+
+        for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
+            DIRTY_HPTE(HPTE(spapr->htab, index));
+        }
+    }
+}
+
+/*
+ * Clear HTAB entries during reset.
+ *
+ * If host kernel has allocated HTAB, KVM_PPC_ALLOCATE_HTAB ioctl is
+ * used to clear HTAB. Otherwise QEMU-allocated HTAB is cleared manually.
+ */
+static void spapr_reset_htab(sPAPRMachineState *spapr)
+{
+    long shift;
+    int index;
+
+    shift = kvmppc_reset_htab(spapr->htab_shift);
+    if (shift > 0) {
+        if (shift != spapr->htab_shift) {
+            error_setg(&error_abort, "Requested HTAB allocation failed during reset");
+        }
 
         /* Tell readers to update their file descriptor */
         if (spapr->htab_fd >= 0) {
             spapr->htab_fd_stale = true;
         }
     } else {
-        if (!spapr->htab) {
-            /* Allocate an htab if we don't yet have one */
-            spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
-        }
-
-        /* And clear it */
         memset(spapr->htab, 0, HTAB_SIZE(spapr));
 
         for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
@@ -1710,6 +1769,7 @@ static void ppc_spapr_init(MachineState *machine)
         }
         spapr->htab_shift++;
     }
+    spapr_alloc_htab(spapr);
 
     /* Set up Interrupt Controller before we create the VCPUs */
     spapr->icp = xics_system_init(machine,
@@ -2097,7 +2157,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, false, &local_err);
+    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
     if (local_err) {
         goto out;
     }
@@ -2225,7 +2285,11 @@ static const TypeInfo spapr_machine_info = {
     },
 };
 
+#define SPAPR_COMPAT_2_4 \
+        HW_COMPAT_2_4
+
 #define SPAPR_COMPAT_2_3 \
+        SPAPR_COMPAT_2_4 \
         HW_COMPAT_2_3 \
         {\
             .driver   = "spapr-pci-host-bridge",\
@@ -2339,11 +2403,16 @@ static const TypeInfo spapr_machine_2_3_info = {
 
 static void spapr_machine_2_4_class_init(ObjectClass *oc, void *data)
 {
+    static GlobalProperty compat_props[] = {
+        SPAPR_COMPAT_2_4
+        { /* end of list */ }
+    };
     MachineClass *mc = MACHINE_CLASS(oc);
 
     mc->desc = "pSeries Logical Partition (PAPR compliant) v2.4";
     mc->alias = "pseries";
     mc->is_default = 0;
+    mc->compat_props = compat_props;
 }
 
 static const TypeInfo spapr_machine_2_4_info = {
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index f61504e0c5..ed28565d8c 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -146,7 +146,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
         tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
                                               window_size,
                                               &tcet->fd,
-                                              tcet->vfio_accel);
+                                              tcet->need_vfio);
     }
 
     if (!tcet->table) {
@@ -168,11 +168,43 @@ static int spapr_tce_table_realize(DeviceState *dev)
     return 0;
 }
 
+void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool need_vfio)
+{
+    size_t table_size = tcet->nb_table * sizeof(uint64_t);
+    void *newtable;
+
+    if (need_vfio == tcet->need_vfio) {
+        /* Nothing to do */
+        return;
+    }
+
+    if (!need_vfio) {
+        /* FIXME: We don't support transition back to KVM accelerated
+         * TCEs yet */
+        return;
+    }
+
+    tcet->need_vfio = true;
+
+    if (tcet->fd < 0) {
+        /* Table is already in userspace, nothing to be do */
+        return;
+    }
+
+    newtable = g_malloc(table_size);
+    memcpy(newtable, tcet->table, table_size);
+
+    kvmppc_remove_spapr_tce(tcet->table, tcet->fd, tcet->nb_table);
+
+    tcet->fd = -1;
+    tcet->table = newtable;
+}
+
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
                                    uint32_t nb_table,
-                                   bool vfio_accel)
+                                   bool need_vfio)
 {
     sPAPRTCETable *tcet;
     char tmp[64];
@@ -192,7 +224,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
     tcet->bus_offset = bus_offset;
     tcet->page_shift = page_shift;
     tcet->nb_table = nb_table;
-    tcet->vfio_accel = vfio_accel;
+    tcet->need_vfio = need_vfio;
 
     snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
     object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 617b7f3fdd..55fa8db9e2 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1083,6 +1083,12 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
     void *fdt = NULL;
     int fdt_start_offset = 0, fdt_size;
 
+    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
+
+        spapr_tce_set_need_vfio(tcet, true);
+    }
+
     if (dev->hotplugged) {
         fdt = create_device_tree(&fdt_size);
         fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
@@ -1387,7 +1393,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
     sPAPRTCETable *tcet;
     uint32_t nb_table;
 
-    nb_table = SPAPR_PCI_DMA32_SIZE >> SPAPR_TCE_PAGE_SHIFT;
+    nb_table = sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT;
     tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
                                0, SPAPR_TCE_PAGE_SHIFT, nb_table, false);
     if (!tcet) {
@@ -1397,7 +1403,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
     }
 
     /* Register default 32bit DMA window */
-    memory_region_add_subregion(&sphb->iommu_root, 0,
+    memory_region_add_subregion(&sphb->iommu_root, sphb->dma_win_addr,
                                 spapr_tce_get_iommu(tcet));
 }
 
@@ -1430,6 +1436,9 @@ static Property spapr_phb_properties[] = {
                        SPAPR_PCI_IO_WIN_SIZE),
     DEFINE_PROP_BOOL("dynamic-reconfiguration", sPAPRPHBState, dr_enabled,
                      true),
+    /* Default DMA window is 0..1GB */
+    DEFINE_PROP_UINT64("dma_win_addr", sPAPRPHBState, dma_win_addr, 0),
+    DEFINE_PROP_UINT64("dma_win_size", sPAPRPHBState, dma_win_size, 0x40000000),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index faba773592..84221f4f57 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -20,6 +20,7 @@
 #include "qemu/config-file.h"
 #include "s390-pci-bus.h"
 #include "hw/s390x/storage-keys.h"
+#include "hw/compat.h"
 
 #define TYPE_S390_CCW_MACHINE               "s390-ccw-machine"
 
@@ -236,6 +237,7 @@ static const TypeInfo ccw_machine_info = {
 };
 
 #define CCW_COMPAT_2_4 \
+        HW_COMPAT_2_4 \
         {\
             .driver   = TYPE_S390_SKEYS,\
             .property = "migration-enabled",\
diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c
index 891424fae9..f4f5140a4a 100644
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c
@@ -750,7 +750,6 @@ static void vscsi_report_luns(VSCSIState *s, vscsi_req *req)
     len = n+8;
 
     resp_data = g_malloc0(len);
-    memset(resp_data, 0, len);
     stl_be_p(resp_data, n);
     i = found_lun0 ? 8 : 16;
     QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 1248fd93ee..0d8d71e245 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -60,7 +60,7 @@ static VirtIOSCSIVring *virtio_scsi_vring_init(VirtIOSCSI *s,
     r = g_new(VirtIOSCSIVring, 1);
     r->host_notifier = *virtio_queue_get_host_notifier(vq);
     r->guest_notifier = *virtio_queue_get_guest_notifier(vq);
-    aio_set_event_notifier(s->ctx, &r->host_notifier, handler);
+    aio_set_event_notifier(s->ctx, &r->host_notifier, true, handler);
 
     r->parent = s;
 
@@ -71,7 +71,7 @@ static VirtIOSCSIVring *virtio_scsi_vring_init(VirtIOSCSI *s,
     return r;
 
 fail_vring:
-    aio_set_event_notifier(s->ctx, &r->host_notifier, NULL);
+    aio_set_event_notifier(s->ctx, &r->host_notifier, true, NULL);
     k->set_host_notifier(qbus->parent, n, false);
     g_free(r);
     return NULL;
@@ -162,14 +162,17 @@ static void virtio_scsi_clear_aio(VirtIOSCSI *s)
     int i;
 
     if (s->ctrl_vring) {
-        aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier, NULL);
+        aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier,
+                               true, NULL);
     }
     if (s->event_vring) {
-        aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier, NULL);
+        aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier,
+                               true, NULL);
     }
     if (s->cmd_vrings) {
         for (i = 0; i < vs->conf.num_queues && s->cmd_vrings[i]; i++) {
-            aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier, NULL);
+            aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier,
+                                   true, NULL);
         }
     }
 }
@@ -290,10 +293,13 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s)
 
     aio_context_acquire(s->ctx);
 
-    aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier, NULL);
-    aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier, NULL);
+    aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier,
+                           true, NULL);
+    aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier,
+                           true, NULL);
     for (i = 0; i < vs->conf.num_queues; i++) {
-        aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier, NULL);
+        aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier,
+                               true, NULL);
     }
 
     blk_drain_all(); /* ensure there are no in-flight requests */
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 20885fb7f1..76554011cb 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -205,20 +205,8 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq)
     assert(n < vs->conf.num_queues);
     req = virtio_scsi_init_req(s, vs->cmd_vqs[n]);
     qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem));
-    /* TODO: add a way for SCSIBusInfo's load_request to fail,
-     * and fail migration instead of asserting here.
-     * When we do, we might be able to re-enable NDEBUG below.
-     */
-#ifdef NDEBUG
-#error building with NDEBUG is not supported
-#endif
-    assert(req->elem.in_num <= ARRAY_SIZE(req->elem.in_sg));
-    assert(req->elem.out_num <= ARRAY_SIZE(req->elem.out_sg));
 
-    virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
-                     req->elem.in_num, 1);
-    virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
-                     req->elem.out_num, 0);
+    virtqueue_map(&req->elem);
 
     if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size,
                               sizeof(VirtIOSCSICmdResp) + vs->sense_size) < 0) {
@@ -665,6 +653,11 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
 static void virtio_scsi_save(QEMUFile *f, void *opaque)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    VirtIOSCSI *s = VIRTIO_SCSI(vdev);
+
+    if (s->dataplane_started) {
+        virtio_scsi_dataplane_stop(s);
+    }
     virtio_save(vdev, f);
 }
 
diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index 2209ef1d52..b430d56687 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c
@@ -28,7 +28,7 @@
 #include "qemu/error-report.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 
 enum {
     ENABLE_CMD_TX   = (1<<0),
diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c
index 35d8033402..5bc47193f9 100644
--- a/hw/sd/omap_mmc.c
+++ b/hw/sd/omap_mmc.c
@@ -18,7 +18,7 @@
  */
 #include "hw/hw.h"
 #include "hw/arm/omap.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 
 struct omap_mmc_s {
     qemu_irq irq;
diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 5242176a33..326c53ad92 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -10,7 +10,7 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "hw/sysbus.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 
 //#define DEBUG_PL181 1
 
diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c
index d1fe6d58e8..b217080075 100644
--- a/hw/sd/pxa2xx_mmci.c
+++ b/hw/sd/pxa2xx_mmci.c
@@ -12,7 +12,7 @@
 
 #include "hw/hw.h"
 #include "hw/arm/pxa.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 #include "hw/qdev.h"
 
 struct PXA2xxMMCIState {
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 393a75c0bc..ce4d44be91 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -31,7 +31,7 @@
 
 #include "hw/hw.h"
 #include "sysemu/block-backend.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 #include "qemu/bitmap.h"
 
 //#define DEBUG_SD 1
diff --git a/hw/sd/sdhci.h b/hw/sd/sdhci-internal.h
index e2de92d553..c712daf4ee 100644
--- a/hw/sd/sdhci.h
+++ b/hw/sd/sdhci-internal.h
@@ -21,15 +21,10 @@
  * You should have received a copy of the GNU General Public License along
  * with this program; if not, see <http://www.gnu.org/licenses/>.
  */
+#ifndef SDHCI_INTERNAL_H
+#define SDHCI_INTERNAL_H
 
-#ifndef SDHCI_H
-#define SDHCI_H
-
-#include "qemu-common.h"
-#include "hw/block/block.h"
-#include "hw/pci/pci.h"
-#include "hw/sysbus.h"
-#include "hw/sd.h"
+#include "hw/sd/sdhci.h"
 
 /* R/W SDMA System Address register 0x0 */
 #define SDHC_SYSAD                     0x00
@@ -232,66 +227,6 @@ enum {
     sdhc_gap_write  = 2   /* SDHC stopped at block gap during write operation */
 };
 
-/* SD/MMC host controller state */
-typedef struct SDHCIState {
-    union {
-        PCIDevice pcidev;
-        SysBusDevice busdev;
-    };
-    SDState *card;
-    MemoryRegion iomem;
-    BlockConf conf;
-
-    QEMUTimer *insert_timer;       /* timer for 'changing' sd card. */
-    QEMUTimer *transfer_timer;
-    qemu_irq eject_cb;
-    qemu_irq ro_cb;
-    qemu_irq irq;
-
-    uint32_t sdmasysad;    /* SDMA System Address register */
-    uint16_t blksize;      /* Host DMA Buff Boundary and Transfer BlkSize Reg */
-    uint16_t blkcnt;       /* Blocks count for current transfer */
-    uint32_t argument;     /* Command Argument Register */
-    uint16_t trnmod;       /* Transfer Mode Setting Register */
-    uint16_t cmdreg;       /* Command Register */
-    uint32_t rspreg[4];    /* Response Registers 0-3 */
-    uint32_t prnsts;       /* Present State Register */
-    uint8_t  hostctl;      /* Host Control Register */
-    uint8_t  pwrcon;       /* Power control Register */
-    uint8_t  blkgap;       /* Block Gap Control Register */
-    uint8_t  wakcon;       /* WakeUp Control Register */
-    uint16_t clkcon;       /* Clock control Register */
-    uint8_t  timeoutcon;   /* Timeout Control Register */
-    uint8_t  admaerr;      /* ADMA Error Status Register */
-    uint16_t norintsts;    /* Normal Interrupt Status Register */
-    uint16_t errintsts;    /* Error Interrupt Status Register */
-    uint16_t norintstsen;  /* Normal Interrupt Status Enable Register */
-    uint16_t errintstsen;  /* Error Interrupt Status Enable Register */
-    uint16_t norintsigen;  /* Normal Interrupt Signal Enable Register */
-    uint16_t errintsigen;  /* Error Interrupt Signal Enable Register */
-    uint16_t acmd12errsts; /* Auto CMD12 error status register */
-    uint64_t admasysaddr;  /* ADMA System Address Register */
-
-    uint32_t capareg;      /* Capabilities Register */
-    uint32_t maxcurr;      /* Maximum Current Capabilities Register */
-    uint8_t  *fifo_buffer; /* SD host i/o FIFO buffer */
-    uint32_t buf_maxsz;
-    uint16_t data_count;   /* current element in FIFO buffer */
-    uint8_t  stopped_state;/* Current SDHC state */
-    /* Buffer Data Port Register - virtual access point to R and W buffers */
-    /* Software Reset Register - always reads as 0 */
-    /* Force Event Auto CMD12 Error Interrupt Reg - write only */
-    /* Force Event Error Interrupt Register- write only */
-    /* RO Host Controller Version Register always reads as 0x2401 */
-} SDHCIState;
-
 extern const VMStateDescription sdhci_vmstate;
 
-#define TYPE_PCI_SDHCI "sdhci-pci"
-#define PCI_SDHCI(obj) OBJECT_CHECK(SDHCIState, (obj), TYPE_PCI_SDHCI)
-
-#define TYPE_SYSBUS_SDHCI "generic-sdhci"
-#define SYSBUS_SDHCI(obj)                               \
-     OBJECT_CHECK(SDHCIState, (obj), TYPE_SYSBUS_SDHCI)
-
-#endif /* SDHCI_H */
+#endif
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 7f73527d44..d70d1a6ab9 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -29,8 +29,7 @@
 #include "sysemu/dma.h"
 #include "qemu/timer.h"
 #include "qemu/bitops.h"
-
-#include "sdhci.h"
+#include "sdhci-internal.h"
 
 /* host controller debug messages */
 #ifndef SDHC_DEBUG
diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
index e4b2d4f83b..c49ff62f56 100644
--- a/hw/sd/ssi-sd.c
+++ b/hw/sd/ssi-sd.c
@@ -13,7 +13,7 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "hw/ssi.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 
 //#define DEBUG_SSI_SD 1
 
diff --git a/hw/timer/imx_epit.c b/hw/timer/imx_epit.c
index 9649851526..967be4ad27 100644
--- a/hw/timer/imx_epit.c
+++ b/hw/timer/imx_epit.c
@@ -16,8 +16,17 @@
 #include "hw/misc/imx_ccm.h"
 #include "qemu/main-loop.h"
 
-#define DEBUG_TIMER 0
-#if DEBUG_TIMER
+#ifndef DEBUG_IMX_EPIT
+#define DEBUG_IMX_EPIT 0
+#endif
+
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_EPIT) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_EPIT, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static char const *imx_epit_reg_name(uint32_t reg)
 {
@@ -37,24 +46,6 @@ static char const *imx_epit_reg_name(uint32_t reg)
     }
 }
 
-#  define DPRINTF(fmt, args...) \
-    do { fprintf(stderr, "%s: " fmt , __func__, ##args); } while (0)
-#else
-#  define DPRINTF(fmt, args...) do {} while (0)
-#endif
-
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-#define DEBUG_IMPLEMENTATION 1
-#if DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-          do { fprintf(stderr, "%s: " fmt, __func__, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
-
 /*
  * Exact clock frequencies vary from board to board.
  * These are typical.
@@ -136,9 +127,8 @@ static uint64_t imx_epit_read(void *opaque, hwaddr offset, unsigned size)
 {
     IMXEPITState *s = IMX_EPIT(opaque);
     uint32_t reg_value = 0;
-    uint32_t reg = offset >> 2;
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0: /* Control Register */
         reg_value = s->cr;
         break;
@@ -161,11 +151,12 @@ static uint64_t imx_epit_read(void *opaque, hwaddr offset, unsigned size)
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_EPIT, __func__, offset);
         break;
     }
 
-    DPRINTF("(%s) = 0x%08x\n", imx_epit_reg_name(reg), reg_value);
+    DPRINTF("(%s) = 0x%08x\n", imx_epit_reg_name(offset >> 2), reg_value);
 
     return reg_value;
 }
@@ -190,12 +181,12 @@ static void imx_epit_write(void *opaque, hwaddr offset, uint64_t value,
                            unsigned size)
 {
     IMXEPITState *s = IMX_EPIT(opaque);
-    uint32_t reg = offset >> 2;
     uint64_t oldcr;
 
-    DPRINTF("(%s, value = 0x%08x)\n", imx_epit_reg_name(reg), (uint32_t)value);
+    DPRINTF("(%s, value = 0x%08x)\n", imx_epit_reg_name(offset >> 2),
+            (uint32_t)value);
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0: /* CR */
 
         oldcr = s->cr;
@@ -271,7 +262,8 @@ static void imx_epit_write(void *opaque, hwaddr offset, uint64_t value,
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_EPIT, __func__, offset);
 
         break;
     }
diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index 4bac67d333..7257f4201a 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c
@@ -16,11 +16,17 @@
 #include "hw/misc/imx_ccm.h"
 #include "qemu/main-loop.h"
 
-/*
- * Define to 1 for debug messages
- */
-#define DEBUG_TIMER 0
-#if DEBUG_TIMER
+#ifndef DEBUG_IMX_GPT
+#define DEBUG_IMX_GPT 0
+#endif
+
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_GPT) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_GPT, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static char const *imx_gpt_reg_name(uint32_t reg)
 {
@@ -50,24 +56,6 @@ static char const *imx_gpt_reg_name(uint32_t reg)
     }
 }
 
-#  define DPRINTF(fmt, args...) \
-          do { printf("%s: " fmt , __func__, ##args); } while (0)
-#else
-#  define DPRINTF(fmt, args...) do {} while (0)
-#endif
-
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-#define DEBUG_IMPLEMENTATION 1
-#if DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-          do { fprintf(stderr, "%s: " fmt, __func__, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
-
 static const VMStateDescription vmstate_imx_timer_gpt = {
     .name = TYPE_IMX_GPT,
     .version_id = 3,
@@ -224,9 +212,8 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size)
 {
     IMXGPTState *s = IMX_GPT(opaque);
     uint32_t reg_value = 0;
-    uint32_t reg = offset >> 2;
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0: /* Control Register */
         reg_value = s->cr;
         break;
@@ -256,12 +243,14 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size)
         break;
 
     case 7: /* input Capture Register 1 */
-        qemu_log_mask(LOG_UNIMP, "icr1 feature is not implemented\n");
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: icr1 feature is not implemented\n",
+                      TYPE_IMX_GPT, __func__);
         reg_value = s->icr1;
         break;
 
     case 8: /* input Capture Register 2 */
-        qemu_log_mask(LOG_UNIMP, "icr2 feature is not implemented\n");
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: icr2 feature is not implemented\n",
+                      TYPE_IMX_GPT, __func__);
         reg_value = s->icr2;
         break;
 
@@ -271,11 +260,12 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size)
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPT, __func__, offset);
         break;
     }
 
-    DPRINTF("(%s) = 0x%08x\n", imx_gpt_reg_name(reg), reg_value);
+    DPRINTF("(%s) = 0x%08x\n", imx_gpt_reg_name(offset >> 2), reg_value);
 
     return reg_value;
 }
@@ -322,12 +312,11 @@ static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value,
 {
     IMXGPTState *s = IMX_GPT(opaque);
     uint32_t oldreg;
-    uint32_t reg = offset >> 2;
 
-    DPRINTF("(%s, value = 0x%08x)\n", imx_gpt_reg_name(reg),
+    DPRINTF("(%s, value = 0x%08x)\n", imx_gpt_reg_name(offset >> 2),
             (uint32_t)value);
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0:
         oldreg = s->cr;
         s->cr = value & ~0x7c14;
@@ -403,7 +392,8 @@ static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value,
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPT, __func__, offset);
         break;
     }
 }
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index 9a4e7dc0cb..597d8fd184 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -613,20 +613,22 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
         return;
     }
 
-    bdrv_add_key(blk_bs(blk), NULL, &err);
-    if (err) {
-        if (monitor_cur_is_qmp()) {
-            error_propagate(errp, err);
-            return;
-        }
-        error_free(err);
-        err = NULL;
-        if (cur_mon) {
-            monitor_read_bdrv_key_start(cur_mon, blk_bs(blk),
-                                        usb_msd_password_cb, s);
-            s->dev.auto_attach = 0;
-        } else {
-            autostart = 0;
+    if (blk_bs(blk)) {
+        bdrv_add_key(blk_bs(blk), NULL, &err);
+        if (err) {
+            if (monitor_cur_is_qmp()) {
+                error_propagate(errp, err);
+                return;
+            }
+            error_free(err);
+            err = NULL;
+            if (cur_mon) {
+                monitor_read_bdrv_key_start(cur_mon, blk_bs(blk),
+                                            usb_msd_password_cb, s);
+                s->dev.auto_attach = 0;
+            } else {
+                autostart = 0;
+            }
         }
     }
 
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 78442ba980..83c84f1cd6 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -89,7 +89,7 @@ typedef struct VhostUserMsg {
         struct vhost_vring_state state;
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
-    };
+    } payload;
 } QEMU_PACKED VhostUserMsg;
 
 static VhostUserMsg m __attribute__ ((unused));
@@ -200,8 +200,8 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
     VhostUserMsg msg = {
         .request = VHOST_USER_SET_LOG_BASE,
         .flags = VHOST_USER_VERSION,
-        .u64 = base,
-        .size = sizeof(m.u64),
+        .payload.u64 = base,
+        .size = sizeof(msg.payload.u64),
     };
 
     if (shmfd && log->fd != -1) {
@@ -247,17 +247,17 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
                                 &ram_addr);
         fd = qemu_get_ram_fd(ram_addr);
         if (fd > 0) {
-            msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
-            msg.memory.regions[fd_num].memory_size  = reg->memory_size;
-            msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
-            msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
+            msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
+            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
+            msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
+            msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
                 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
             assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
             fds[fd_num++] = fd;
         }
     }
 
-    msg.memory.nregions = fd_num;
+    msg.payload.memory.nregions = fd_num;
 
     if (!fd_num) {
         error_report("Failed initializing vhost-user memory map, "
@@ -265,8 +265,8 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
         return -1;
     }
 
-    msg.size = sizeof(m.memory.nregions);
-    msg.size += sizeof(m.memory.padding);
+    msg.size = sizeof(msg.payload.memory.nregions);
+    msg.size += sizeof(msg.payload.memory.padding);
     msg.size += fd_num * sizeof(VhostUserMemoryRegion);
 
     vhost_user_write(dev, &msg, fds, fd_num);
@@ -280,8 +280,8 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev,
     VhostUserMsg msg = {
         .request = VHOST_USER_SET_VRING_ADDR,
         .flags = VHOST_USER_VERSION,
-        .addr = *addr,
-        .size = sizeof(*addr),
+        .payload.addr = *addr,
+        .size = sizeof(msg.payload.addr),
     };
 
     vhost_user_write(dev, &msg, NULL, 0);
@@ -303,8 +303,8 @@ static int vhost_set_vring(struct vhost_dev *dev,
     VhostUserMsg msg = {
         .request = request,
         .flags = VHOST_USER_VERSION,
-        .state = *ring,
-        .size = sizeof(*ring),
+        .payload.state = *ring,
+        .size = sizeof(msg.payload.state),
     };
 
     vhost_user_write(dev, &msg, NULL, 0);
@@ -345,8 +345,8 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
     VhostUserMsg msg = {
         .request = VHOST_USER_GET_VRING_BASE,
         .flags = VHOST_USER_VERSION,
-        .state = *ring,
-        .size = sizeof(*ring),
+        .payload.state = *ring,
+        .size = sizeof(msg.payload.state),
     };
 
     vhost_user_write(dev, &msg, NULL, 0);
@@ -361,12 +361,12 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
         return -1;
     }
 
-    if (msg.size != sizeof(m.state)) {
+    if (msg.size != sizeof(msg.payload.state)) {
         error_report("Received bad msg size.");
         return -1;
     }
 
-    *ring = msg.state;
+    *ring = msg.payload.state;
 
     return 0;
 }
@@ -380,14 +380,14 @@ static int vhost_set_vring_file(struct vhost_dev *dev,
     VhostUserMsg msg = {
         .request = request,
         .flags = VHOST_USER_VERSION,
-        .u64 = file->index & VHOST_USER_VRING_IDX_MASK,
-        .size = sizeof(m.u64),
+        .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
+        .size = sizeof(msg.payload.u64),
     };
 
     if (ioeventfd_enabled() && file->fd > 0) {
         fds[fd_num++] = file->fd;
     } else {
-        msg.u64 |= VHOST_USER_VRING_NOFD_MASK;
+        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
     }
 
     vhost_user_write(dev, &msg, fds, fd_num);
@@ -412,8 +412,8 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
     VhostUserMsg msg = {
         .request = request,
         .flags = VHOST_USER_VERSION,
-        .u64 = u64,
-        .size = sizeof(m.u64),
+        .payload.u64 = u64,
+        .size = sizeof(msg.payload.u64),
     };
 
     vhost_user_write(dev, &msg, NULL, 0);
@@ -456,12 +456,12 @@ static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
         return -1;
     }
 
-    if (msg.size != sizeof(m.u64)) {
+    if (msg.size != sizeof(msg.payload.u64)) {
         error_report("Received bad msg size.");
         return -1;
     }
 
-    *u64 = msg.u64;
+    *u64 = msg.payload.u64;
 
     return 0;
 }
@@ -591,8 +591,8 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
                            VHOST_USER_PROTOCOL_F_RARP)) {
         msg.request = VHOST_USER_SEND_RARP;
         msg.flags = VHOST_USER_VERSION;
-        memcpy((char *)&msg.u64, mac_addr, 6);
-        msg.size = sizeof(m.u64);
+        memcpy((char *)&msg.payload.u64, mac_addr, 6);
+        msg.size = sizeof(msg.payload.u64);
 
         err = vhost_user_write(dev, &msg, NULL, 0);
         return err;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index d0bc72e0e4..939f802110 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -448,28 +448,59 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
     return in_bytes <= in_total && out_bytes <= out_total;
 }
 
-void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
-    size_t num_sg, int is_write)
+static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
+                                unsigned int *num_sg, unsigned int max_size,
+                                int is_write)
 {
     unsigned int i;
     hwaddr len;
 
-    if (num_sg > VIRTQUEUE_MAX_SIZE) {
-        error_report("virtio: map attempt out of bounds: %zd > %d",
-                     num_sg, VIRTQUEUE_MAX_SIZE);
-        exit(1);
-    }
+    /* Note: this function MUST validate input, some callers
+     * are passing in num_sg values received over the network.
+     */
+    /* TODO: teach all callers that this can fail, and return failure instead
+     * of asserting here.
+     * When we do, we might be able to re-enable NDEBUG below.
+     */
+#ifdef NDEBUG
+#error building with NDEBUG is not supported
+#endif
+    assert(*num_sg <= max_size);
 
-    for (i = 0; i < num_sg; i++) {
+    for (i = 0; i < *num_sg; i++) {
         len = sg[i].iov_len;
         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
-        if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
+        if (!sg[i].iov_base) {
             error_report("virtio: error trying to map MMIO memory");
             exit(1);
         }
+        if (len == sg[i].iov_len) {
+            continue;
+        }
+        if (*num_sg >= max_size) {
+            error_report("virtio: memory split makes iovec too large");
+            exit(1);
+        }
+        memmove(sg + i + 1, sg + i, sizeof(*sg) * (*num_sg - i));
+        memmove(addr + i + 1, addr + i, sizeof(*addr) * (*num_sg - i));
+        assert(len < sg[i + 1].iov_len);
+        sg[i].iov_len = len;
+        addr[i + 1] += len;
+        sg[i + 1].iov_len -= len;
+        ++*num_sg;
     }
 }
 
+void virtqueue_map(VirtQueueElement *elem)
+{
+    virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
+                        MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)),
+                        1);
+    virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
+                        MIN(ARRAY_SIZE(elem->out_sg), ARRAY_SIZE(elem->out_addr)),
+                        0);
+}
+
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 {
     unsigned int i, head, max;
@@ -531,8 +562,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
     } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
 
     /* Now map what we have collected */
-    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
-    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
+    virtqueue_map(elem);
 
     elem->index = head;
 
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 2b54f52707..aa96288236 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -938,10 +938,18 @@ static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data)
     dc->props = xen_pci_passthrough_properties;
 };
 
+static void xen_pci_passthrough_finalize(Object *obj)
+{
+    XenPCIPassthroughState *s = XEN_PT_DEVICE(obj);
+
+    xen_pt_msix_delete(s);
+}
+
 static const TypeInfo xen_pci_passthrough_info = {
     .name = TYPE_XEN_PT_DEVICE,
     .parent = TYPE_PCI_DEVICE,
     .instance_size = sizeof(XenPCIPassthroughState),
+    .instance_finalize = xen_pci_passthrough_finalize,
     .class_init = xen_pci_passthrough_class_init,
 };
 
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index 3bc22eb1d1..c545280085 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -305,6 +305,7 @@ void xen_pt_msi_disable(XenPCIPassthroughState *s);
 
 int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t base);
 void xen_pt_msix_delete(XenPCIPassthroughState *s);
+void xen_pt_msix_unmap(XenPCIPassthroughState *s);
 int xen_pt_msix_update(XenPCIPassthroughState *s);
 int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index);
 void xen_pt_msix_disable(XenPCIPassthroughState *s);
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 4a5bc11f30..0efee112fd 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -2079,7 +2079,7 @@ void xen_pt_config_delete(XenPCIPassthroughState *s)
 
     /* free MSI/MSI-X info table */
     if (s->msix) {
-        xen_pt_msix_delete(s);
+        xen_pt_msix_unmap(s);
     }
     g_free(s->msi);
 
diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index e3d71945cd..82de2bced3 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c
@@ -610,7 +610,7 @@ error_out:
     return rc;
 }
 
-void xen_pt_msix_delete(XenPCIPassthroughState *s)
+void xen_pt_msix_unmap(XenPCIPassthroughState *s)
 {
     XenPTMSIX *msix = s->msix;
 
@@ -627,6 +627,17 @@ void xen_pt_msix_delete(XenPCIPassthroughState *s)
     }
 
     memory_region_del_subregion(&s->bar[msix->bar_index], &msix->mmio);
+}
+
+void xen_pt_msix_delete(XenPCIPassthroughState *s)
+{
+    XenPTMSIX *msix = s->msix;
+
+    if (!msix) {
+        return;
+    }
+
+    object_unparent(OBJECT(&msix->mmio));
 
     g_free(s->msix);
     s->msix = NULL;
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 4c406cff7a..66637cdfed 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -40,7 +40,6 @@ typedef struct BlockAcctStats {
     uint64_t nr_ops[BLOCK_MAX_IOTYPE];
     uint64_t total_time_ns[BLOCK_MAX_IOTYPE];
     uint64_t merged[BLOCK_MAX_IOTYPE];
-    uint64_t wr_highest_sector;
 } BlockAcctStats;
 
 typedef struct BlockAcctCookie {
@@ -52,8 +51,6 @@ typedef struct BlockAcctCookie {
 void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
                       int64_t bytes, enum BlockAcctType type);
 void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie);
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
-                               unsigned int nb_sectors);
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                            int num_requests);
 
diff --git a/include/block/aio.h b/include/block/aio.h
index 400b1b0020..bcc7d43f6a 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -122,6 +122,8 @@ struct AioContext {
 
     /* TimerLists for calling timers - one per clock type */
     QEMUTimerListGroup tlg;
+
+    int external_disable_cnt;
 };
 
 /**
@@ -299,6 +301,7 @@ bool aio_poll(AioContext *ctx, bool blocking);
  */
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
+                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque);
@@ -312,6 +315,7 @@ void aio_set_fd_handler(AioContext *ctx,
  */
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *notifier,
+                            bool is_external,
                             EventNotifierHandler *io_read);
 
 /* Return a GSource that lets the main loop poll the file descriptors attached
@@ -373,4 +377,40 @@ static inline void aio_timer_init(AioContext *ctx,
  */
 int64_t aio_compute_timeout(AioContext *ctx);
 
+/**
+ * aio_disable_external:
+ * @ctx: the aio context
+ *
+ * Disable the further processing of external clients.
+ */
+static inline void aio_disable_external(AioContext *ctx)
+{
+    atomic_inc(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_enable_external:
+ * @ctx: the aio context
+ *
+ * Enable the processing of external clients.
+ */
+static inline void aio_enable_external(AioContext *ctx)
+{
+    assert(ctx->external_disable_cnt > 0);
+    atomic_dec(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_node_check:
+ * @ctx: the aio context
+ * @is_external: Whether or not the checked node is an external event source.
+ *
+ * Check if the node's is_external flag is okay to be polled by the ctx at this
+ * moment. True means green light.
+ */
+static inline bool aio_node_check(AioContext *ctx, bool is_external)
+{
+    return !is_external || !atomic_read(&ctx->external_disable_cnt);
+}
+
 #endif
diff --git a/include/block/block.h b/include/block/block.h
index 84f05ad408..610db923d5 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -174,11 +174,6 @@ typedef enum BlockOpType {
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
-void bdrv_iostatus_enable(BlockDriverState *bs);
-void bdrv_iostatus_reset(BlockDriverState *bs);
-void bdrv_iostatus_disable(BlockDriverState *bs);
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
 void bdrv_info_print(Monitor *mon, const QObject *data);
 void bdrv_info(Monitor *mon, QObject **ret_data);
 void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -389,17 +384,11 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
                             int64_t sector_num, int nb_sectors, int *pnum);
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
-                       BlockdevOnError on_write_error);
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
-                       bool is_read, int error);
 int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
 int bdrv_enable_write_cache(BlockDriverState *bs);
 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
-int bdrv_is_inserted(BlockDriverState *bs);
+bool bdrv_is_inserted(BlockDriverState *bs);
 int bdrv_media_changed(BlockDriverState *bs);
 void bdrv_lock_medium(BlockDriverState *bs, bool locked);
 void bdrv_eject(BlockDriverState *bs, bool eject_flag);
@@ -466,7 +455,6 @@ void bdrv_img_create(const char *filename, const char *fmt,
 size_t bdrv_min_mem_align(BlockDriverState *bs);
 /* Returns optimal alignment in bytes for bounce buffer */
 size_t bdrv_opt_mem_align(BlockDriverState *bs);
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
 void *qemu_blockalign0(BlockDriverState *bs, size_t size);
 void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
@@ -622,6 +610,23 @@ void bdrv_io_plug(BlockDriverState *bs);
 void bdrv_io_unplug(BlockDriverState *bs);
 void bdrv_flush_io_queue(BlockDriverState *bs);
 
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs);
+/**
+ * bdrv_drained_begin:
+ *
+ * Begin a quiesced section for exclusive access to the BDS, by disabling
+ * external request sources including NBD server and device model. Note that
+ * this doesn't block timers or coroutines from submitting more requests, which
+ * means block_job_pause is still necessary.
+ *
+ * This function can be recursive.
+ */
+void bdrv_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_drained_end:
+ *
+ * End a quiescent section started by bdrv_drained_begin().
+ */
+void bdrv_drained_end(BlockDriverState *bs);
 
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a480f944cf..3ceeb5a940 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -26,6 +26,7 @@
 
 #include "block/accounting.h"
 #include "block/block.h"
+#include "block/throttle-groups.h"
 #include "qemu/option.h"
 #include "qemu/queue.h"
 #include "qemu/coroutine.h"
@@ -212,7 +213,7 @@ struct BlockDriver {
         const char *backing_file, const char *backing_fmt);
 
     /* removable device specific */
-    int (*bdrv_is_inserted)(BlockDriverState *bs);
+    bool (*bdrv_is_inserted)(BlockDriverState *bs);
     int (*bdrv_media_changed)(BlockDriverState *bs);
     void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
     void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
@@ -399,8 +400,8 @@ struct BlockDriverState {
     unsigned       pending_reqs[2];
     QLIST_ENTRY(BlockDriverState) round_robin;
 
-    /* I/O stats (display with "info blockstats"). */
-    BlockAcctStats stats;
+    /* Offset after the highest byte written to */
+    uint64_t wr_highest_offset;
 
     /* I/O Limits */
     BlockLimits bl;
@@ -411,18 +412,9 @@ struct BlockDriverState {
     /* Alignment requirement for offset/length of I/O requests */
     unsigned int request_alignment;
 
-    /* the block size for which the guest device expects atomicity */
-    int guest_block_size;
-
     /* do we need to tell the quest if we have a volatile write cache? */
     int enable_write_cache;
 
-    /* NOTE: the following infos are only hints for real hardware
-       drivers. They are not used by the block driver */
-    BlockdevOnError on_read_error, on_write_error;
-    bool iostatus_enabled;
-    BlockDeviceIoStatus iostatus;
-
     /* the following member gives a name to every node on the bs graph. */
     char node_name[32];
     /* element of the list of named nodes building the graph */
@@ -456,6 +448,17 @@ struct BlockDriverState {
     /* threshold limit for writes, in bytes. "High water mark". */
     uint64_t write_threshold_offset;
     NotifierWithReturn write_threshold_notifier;
+
+    int quiesce_counter;
+};
+
+struct BlockBackendRootState {
+    int open_flags;
+    bool read_only;
+    BlockdevDetectZeroesOptions detect_zeroes;
+
+    char *throttle_group;
+    ThrottleState *throttle_state;
 };
 
 static inline BlockDriverState *backing_bs(BlockDriverState *bs)
diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h
index fab113f6d1..aba28f30b6 100644
--- a/include/block/throttle-groups.h
+++ b/include/block/throttle-groups.h
@@ -30,6 +30,9 @@
 
 const char *throttle_group_get_name(BlockDriverState *bs);
 
+ThrottleState *throttle_group_incref(const char *name);
+void throttle_group_unref(ThrottleState *ts);
+
 void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg);
 void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg);
 
@@ -40,7 +43,4 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs,
                                                         unsigned int bytes,
                                                         bool is_write);
 
-void throttle_group_lock(BlockDriverState *bs);
-void throttle_group_unlock(BlockDriverState *bs);
-
 #endif
diff --git a/include/glib-compat.h b/include/glib-compat.h
index fb25f437b4..03d8b12675 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h
@@ -165,6 +165,14 @@ static inline GThread *g_thread_new(const char *name,
 #define CompatGCond GCond
 #endif /* glib 2.31 */
 
+#if !GLIB_CHECK_VERSION(2, 32, 0)
+/* Beware, function returns gboolean since 2.39.2, see GLib commit 9101915 */
+static inline void g_hash_table_add(GHashTable *hash_table, gpointer key)
+{
+    g_hash_table_replace(hash_table, key, key);
+}
+#endif
+
 #ifndef g_assert_true
 #define g_assert_true(expr)                                                    \
     do {                                                                       \
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index 4005a9960b..d1160920cc 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -24,6 +24,7 @@
 #include "hw/char/cadence_uart.h"
 #include "hw/ide/pci.h"
 #include "hw/ide/ahci.h"
+#include "hw/sd/sdhci.h"
 
 #define TYPE_XLNX_ZYNQMP "xlnx,zynqmp"
 #define XLNX_ZYNQMP(obj) OBJECT_CHECK(XlnxZynqMPState, (obj), \
@@ -33,6 +34,7 @@
 #define XLNX_ZYNQMP_NUM_RPU_CPUS 2
 #define XLNX_ZYNQMP_NUM_GEMS 4
 #define XLNX_ZYNQMP_NUM_UARTS 2
+#define XLNX_ZYNQMP_NUM_SDHCI 2
 
 #define XLNX_ZYNQMP_NUM_OCM_BANKS 4
 #define XLNX_ZYNQMP_OCM_RAM_0_ADDRESS 0xFFFC0000
@@ -63,6 +65,7 @@ typedef struct XlnxZynqMPState {
     CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS];
     CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS];
     SysbusAHCIState sata;
+    SDHCIState sdhci[XLNX_ZYNQMP_NUM_SDHCI];
 
     char *boot_cpu;
     ARMCPU *boot_cpu_ptr;
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 095de5d12f..93e71afb4a 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -2,7 +2,11 @@
 #define HW_COMPAT_H
 
 #define HW_COMPAT_2_4 \
-        /* empty */
+        {\
+            .driver   = "virtio-blk-device",\
+            .property = "scsi",\
+            .value    = "true",\
+        },
 
 #define HW_COMPAT_2_3 \
         {\
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index c5961d7c03..606dbc2854 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -60,7 +60,6 @@ struct PCMachineClass {
 
     /*< public >*/
     bool broken_reserved_end;
-    bool inter_dimm_gap;
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
 };
@@ -318,6 +317,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
             .driver   = "Broadwell-noTSX-" TYPE_X86_CPU,\
             .property = "abm",\
             .value    = "off",\
+        },\
+        {\
+            .driver   = "host" "-" TYPE_X86_CPU,\
+            .property = "host-cache-info",\
+            .value    = "on",\
         },
 
 #define PC_COMPAT_2_3 \
diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h
index 564a72b2cf..f4c349a2ef 100644
--- a/include/hw/intc/arm_gic_common.h
+++ b/include/hw/intc/arm_gic_common.h
@@ -111,6 +111,7 @@ typedef struct GICState {
     bool security_extn;
     bool irq_reset_nonsecure; /* configure IRQs as group 1 (NS) on reset? */
     int dev_fd; /* kvm device fd if backed by kvm vgic support */
+    Error *migration_blocker;
 } GICState;
 
 #define TYPE_ARM_GIC_COMMON "arm_gic_common"
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index c1ee7b0408..d83bf30ea9 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -83,16 +83,15 @@ typedef struct MemoryHotplugState {
 
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, bool gap,
-                               uint64_t size, Error **errp);
+                               uint64_t *hint, uint64_t align, uint64_t size,
+                               Error **errp);
 
 int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
 
 int qmp_pc_dimm_device_list(Object *obj, void *opaque);
 uint64_t pc_existing_dimms_capacity(Error **errp);
 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, bool gap,
-                         Error **errp);
+                         MemoryRegion *mr, uint64_t align, Error **errp);
 void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
                            MemoryRegion *mr);
 #endif
diff --git a/include/hw/misc/ivshmem.h b/include/hw/misc/ivshmem.h
new file mode 100644
index 0000000000..433ef53d79
--- /dev/null
+++ b/include/hw/misc/ivshmem.h
@@ -0,0 +1,25 @@
+
+/*
+ * Inter-VM Shared Memory PCI device.
+ *
+ * Author:
+ *      Cam Macdonell <cam@cs.ualberta.ca>
+ *
+ * Based On: cirrus_vga.c
+ *          Copyright (c) 2004 Fabrice Bellard
+ *          Copyright (c) 2004 Makoto Suzuki (suzu)
+ *
+ *      and rtl8139.c
+ *          Copyright (c) 2006 Igor Kovalenko
+ *
+ * This code is licensed under the GNU GPL v2.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#ifndef IVSHMEM_H
+#define IVSHMEM_H
+
+#define IVSHMEM_PROTOCOL_VERSION 0
+
+#endif /* IVSHMEM_H */
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 5322b560eb..7de5e029b1 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -78,6 +78,7 @@ struct sPAPRPHBState {
     MemoryRegion memwindow, iowindow, msiwindow;
 
     uint32_t dma_liobn;
+    hwaddr dma_win_addr, dma_win_size;
     AddressSpace iommu_as;
     MemoryRegion iommu_root;
 
@@ -115,8 +116,6 @@ struct sPAPRPHBVFIOState {
 
 #define SPAPR_PCI_MSI_WINDOW         0x40000000000ULL
 
-#define SPAPR_PCI_DMA32_SIZE         0x40000000
-
 static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h
index 954d82b350..72e5f931c5 100644
--- a/include/hw/pci/msix.h
+++ b/include/hw/pci/msix.h
@@ -46,12 +46,16 @@ void msix_unset_vector_notifiers(PCIDevice *dev);
 
 extern const VMStateDescription vmstate_msix;
 
-#define VMSTATE_MSIX(_field, _state) {                               \
-    .name       = (stringify(_field)),                               \
-    .size       = sizeof(PCIDevice),                                 \
-    .vmsd       = &vmstate_msix,                                     \
-    .flags      = VMS_STRUCT,                                        \
-    .offset     = vmstate_offset_value(_state, _field, PCIDevice),   \
+#define VMSTATE_MSIX_TEST(_field, _state, _test) {                   \
+    .name         = (stringify(_field)),                             \
+    .size         = sizeof(PCIDevice),                               \
+    .vmsd         = &vmstate_msix,                                   \
+    .flags        = VMS_STRUCT,                                      \
+    .offset       = vmstate_offset_value(_state, _field, PCIDevice), \
+    .field_exists = (_test)                                          \
 }
 
+#define VMSTATE_MSIX(_f, _s)                                         \
+    VMSTATE_MSIX_TEST(_f, _s, NULL)
+
 #endif
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index f5e7fd818a..379b6e1a45 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -397,6 +397,7 @@ void pci_for_each_bus_depth_first(PCIBus *bus,
                                   void *(*begin)(PCIBus *bus, void *parent_state),
                                   void (*end)(PCIBus *bus, void *state),
                                   void *parent_state);
+PCIDevice *pci_get_function_0(PCIDevice *pci_dev);
 
 /* Use this wrapper when specific scan order is not required. */
 static inline
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 56c5b0b2b8..5baa90683b 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -563,7 +563,7 @@ struct sPAPRTCETable {
     uint32_t page_shift;
     uint64_t *table;
     bool bypass;
-    bool vfio_accel;
+    bool need_vfio;
     int fd;
     MemoryRegion iommu;
     struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */
@@ -588,7 +588,9 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
                                    uint32_t nb_table,
-                                   bool vfio_accel);
+                                   bool need_vfio);
+void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool need_vfio);
+
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
                  uint32_t liobn, uint64_t window, uint32_t size);
diff --git a/include/hw/sd.h b/include/hw/sd/sd.h
index 79adb5bb48..79adb5bb48 100644
--- a/include/hw/sd.h
+++ b/include/hw/sd/sd.h
diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
new file mode 100644
index 0000000000..9b3031fb81
--- /dev/null
+++ b/include/hw/sd/sdhci.h
@@ -0,0 +1,94 @@
+/*
+ * SD Association Host Standard Specification v2.0 controller emulation
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Mitsyanko Igor <i.mitsyanko@samsung.com>
+ * Peter A.G. Crosthwaite <peter.crosthwaite@petalogix.com>
+ *
+ * Based on MMC controller for Samsung S5PC1xx-based board emulation
+ * by Alexey Merkulov and Vladimir Monakhov.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU _General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef SDHCI_H
+#define SDHCI_H
+
+#include "qemu-common.h"
+#include "hw/block/block.h"
+#include "hw/pci/pci.h"
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+
+/* SD/MMC host controller state */
+typedef struct SDHCIState {
+    union {
+        PCIDevice pcidev;
+        SysBusDevice busdev;
+    };
+    SDState *card;
+    MemoryRegion iomem;
+    BlockConf conf;
+
+    QEMUTimer *insert_timer;       /* timer for 'changing' sd card. */
+    QEMUTimer *transfer_timer;
+    qemu_irq eject_cb;
+    qemu_irq ro_cb;
+    qemu_irq irq;
+
+    uint32_t sdmasysad;    /* SDMA System Address register */
+    uint16_t blksize;      /* Host DMA Buff Boundary and Transfer BlkSize Reg */
+    uint16_t blkcnt;       /* Blocks count for current transfer */
+    uint32_t argument;     /* Command Argument Register */
+    uint16_t trnmod;       /* Transfer Mode Setting Register */
+    uint16_t cmdreg;       /* Command Register */
+    uint32_t rspreg[4];    /* Response Registers 0-3 */
+    uint32_t prnsts;       /* Present State Register */
+    uint8_t  hostctl;      /* Host Control Register */
+    uint8_t  pwrcon;       /* Power control Register */
+    uint8_t  blkgap;       /* Block Gap Control Register */
+    uint8_t  wakcon;       /* WakeUp Control Register */
+    uint16_t clkcon;       /* Clock control Register */
+    uint8_t  timeoutcon;   /* Timeout Control Register */
+    uint8_t  admaerr;      /* ADMA Error Status Register */
+    uint16_t norintsts;    /* Normal Interrupt Status Register */
+    uint16_t errintsts;    /* Error Interrupt Status Register */
+    uint16_t norintstsen;  /* Normal Interrupt Status Enable Register */
+    uint16_t errintstsen;  /* Error Interrupt Status Enable Register */
+    uint16_t norintsigen;  /* Normal Interrupt Signal Enable Register */
+    uint16_t errintsigen;  /* Error Interrupt Signal Enable Register */
+    uint16_t acmd12errsts; /* Auto CMD12 error status register */
+    uint64_t admasysaddr;  /* ADMA System Address Register */
+
+    uint32_t capareg;      /* Capabilities Register */
+    uint32_t maxcurr;      /* Maximum Current Capabilities Register */
+    uint8_t  *fifo_buffer; /* SD host i/o FIFO buffer */
+    uint32_t buf_maxsz;
+    uint16_t data_count;   /* current element in FIFO buffer */
+    uint8_t  stopped_state;/* Current SDHC state */
+    /* Buffer Data Port Register - virtual access point to R and W buffers */
+    /* Software Reset Register - always reads as 0 */
+    /* Force Event Auto CMD12 Error Interrupt Reg - write only */
+    /* Force Event Error Interrupt Register- write only */
+    /* RO Host Controller Version Register always reads as 0x2401 */
+} SDHCIState;
+
+#define TYPE_PCI_SDHCI "sdhci-pci"
+#define PCI_SDHCI(obj) OBJECT_CHECK(SDHCIState, (obj), TYPE_PCI_SDHCI)
+
+#define TYPE_SYSBUS_SDHCI "generic-sdhci"
+#define SYSBUS_SDHCI(obj)                               \
+     OBJECT_CHECK(SDHCIState, (obj), TYPE_SYSBUS_SDHCI)
+
+#endif /* SDHCI_H */
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 9d09115fab..205fadf234 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -151,8 +151,7 @@ void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len, unsigned int idx);
 
-void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
-    size_t num_sg, int is_write);
+void virtqueue_map(VirtQueueElement *elem);
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
                           unsigned int out_bytes);
diff --git a/include/qapi/qmp/qbool.h b/include/qapi/qmp/qbool.h
index 4aa6be3b33..d9256e4268 100644
--- a/include/qapi/qmp/qbool.h
+++ b/include/qapi/qmp/qbool.h
@@ -18,7 +18,7 @@
 #include "qapi/qmp/qobject.h"
 
 typedef struct QBool {
-    QObject_HEAD;
+    QObject base;
     bool value;
 } QBool;
 
diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h
index a37f4c1567..787c658967 100644
--- a/include/qapi/qmp/qdict.h
+++ b/include/qapi/qmp/qdict.h
@@ -28,7 +28,7 @@ typedef struct QDictEntry {
 } QDictEntry;
 
 typedef struct QDict {
-    QObject_HEAD;
+    QObject base;
     size_t size;
     QLIST_HEAD(,QDictEntry) table[QDICT_BUCKET_MAX];
 } QDict;
diff --git a/include/qapi/qmp/qfloat.h b/include/qapi/qmp/qfloat.h
index a8658443dc..46745e50d1 100644
--- a/include/qapi/qmp/qfloat.h
+++ b/include/qapi/qmp/qfloat.h
@@ -18,7 +18,7 @@
 #include "qapi/qmp/qobject.h"
 
 typedef struct QFloat {
-    QObject_HEAD;
+    QObject base;
     double value;
 } QFloat;
 
diff --git a/include/qapi/qmp/qint.h b/include/qapi/qmp/qint.h
index 48a41b0f2a..339a9abb8f 100644
--- a/include/qapi/qmp/qint.h
+++ b/include/qapi/qmp/qint.h
@@ -17,7 +17,7 @@
 #include "qapi/qmp/qobject.h"
 
 typedef struct QInt {
-    QObject_HEAD;
+    QObject base;
     int64_t value;
 } QInt;
 
diff --git a/include/qapi/qmp/qlist.h b/include/qapi/qmp/qlist.h
index 6cc4831df3..b1bf7852c5 100644
--- a/include/qapi/qmp/qlist.h
+++ b/include/qapi/qmp/qlist.h
@@ -22,7 +22,7 @@ typedef struct QListEntry {
 } QListEntry;
 
 typedef struct QList {
-    QObject_HEAD;
+    QObject base;
     QTAILQ_HEAD(,QListEntry) head;
 } QList;
 
diff --git a/include/qapi/qmp/qobject.h b/include/qapi/qmp/qobject.h
index 260d2ed3cc..c856f553b7 100644
--- a/include/qapi/qmp/qobject.h
+++ b/include/qapi/qmp/qobject.h
@@ -59,10 +59,6 @@ typedef struct QObject {
     size_t refcnt;
 } QObject;
 
-/* Objects definitions must include this */
-#define QObject_HEAD  \
-    QObject base
-
 /* Get the 'base' part of an object */
 #define QOBJECT(obj) (&(obj)->base)
 
diff --git a/include/qapi/qmp/qstring.h b/include/qapi/qmp/qstring.h
index 1bc3666107..34675a7fc0 100644
--- a/include/qapi/qmp/qstring.h
+++ b/include/qapi/qmp/qstring.h
@@ -17,7 +17,7 @@
 #include "qapi/qmp/qobject.h"
 
 typedef struct QString {
-    QObject_HEAD;
+    QObject base;
     char *string;
     size_t length;
     size_t capacity;
diff --git a/include/qemu/event_notifier.h b/include/qemu/event_notifier.h
index 88b57af7ce..a8f28540c9 100644
--- a/include/qemu/event_notifier.h
+++ b/include/qemu/event_notifier.h
@@ -38,7 +38,7 @@ int event_notifier_set_handler(EventNotifier *, EventNotifierHandler *);
 
 #ifdef CONFIG_POSIX
 void event_notifier_init_fd(EventNotifier *, int fd);
-int event_notifier_get_fd(EventNotifier *);
+int event_notifier_get_fd(const EventNotifier *);
 #else
 HANDLE event_notifier_get_handle(EventNotifier *);
 #endif
diff --git a/include/qemu/log.h b/include/qemu/log.h
index f880e66dbc..7de45001f2 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -41,6 +41,7 @@ static inline bool qemu_log_enabled(void)
 #define LOG_UNIMP          (1 << 10)
 #define LOG_GUEST_ERROR    (1 << 11)
 #define CPU_LOG_MMU        (1 << 12)
+#define CPU_LOG_TB_NOCHAIN (1 << 13)
 
 /* Returns true if a bit is set in the current loglevel mask
  */
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index d4a8f7a6d5..d961362d36 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -11,6 +11,7 @@ typedef struct AddressSpace AddressSpace;
 typedef struct AioContext AioContext;
 typedef struct AudioState AudioState;
 typedef struct BlockBackend BlockBackend;
+typedef struct BlockBackendRootState BlockBackendRootState;
 typedef struct BlockDriverState BlockDriverState;
 typedef struct BusClass BusClass;
 typedef struct BusState BusState;
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 8fc960fcbf..9306a527bb 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -72,10 +72,16 @@ BlockBackend *blk_by_name(const char *name);
 BlockBackend *blk_next(BlockBackend *blk);
 
 BlockDriverState *blk_bs(BlockBackend *blk);
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
 
 void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk);
 
 void blk_iostatus_enable(BlockBackend *blk);
+bool blk_iostatus_is_enabled(const BlockBackend *blk);
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
+void blk_iostatus_disable(BlockBackend *blk);
+void blk_iostatus_reset(BlockBackend *blk);
+void blk_iostatus_set_err(BlockBackend *blk, int error);
 int blk_attach_dev(BlockBackend *blk, void *dev);
 void blk_attach_dev_nofail(BlockBackend *blk, void *dev);
 void blk_detach_dev(BlockBackend *blk, void *dev);
@@ -120,6 +126,8 @@ int blk_flush(BlockBackend *blk);
 int blk_flush_all(void);
 void blk_drain(BlockBackend *blk);
 void blk_drain_all(void);
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+                      BlockdevOnError on_write_error);
 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
                                       int error);
@@ -130,7 +138,8 @@ int blk_is_sg(BlockBackend *blk);
 int blk_enable_write_cache(BlockBackend *blk);
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
 void blk_invalidate_cache(BlockBackend *blk, Error **errp);
-int blk_is_inserted(BlockBackend *blk);
+bool blk_is_inserted(BlockBackend *blk);
+bool blk_is_available(BlockBackend *blk);
 void blk_lock_medium(BlockBackend *blk, bool locked);
 void blk_eject(BlockBackend *blk, bool eject_flag);
 int blk_get_flags(BlockBackend *blk);
@@ -155,6 +164,8 @@ void blk_add_close_notifier(BlockBackend *blk, Notifier *notify);
 void blk_io_plug(BlockBackend *blk);
 void blk_io_unplug(BlockBackend *blk);
 BlockAcctStats *blk_get_stats(BlockBackend *blk);
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
+void blk_update_root_state(BlockBackend *blk);
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
                   BlockCompletionFunc *cb, void *opaque);
diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index edf76693d9..aff193f080 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -128,11 +128,19 @@ CharDriverState *qemu_chr_new(const char *label, const char *filename,
 /**
  * @qemu_chr_delete:
  *
- * Destroy a character backend.
+ * Destroy a character backend and remove it from the list of
+ * identified character backends.
  */
 void qemu_chr_delete(CharDriverState *chr);
 
 /**
+ * @qemu_chr_free:
+ *
+ * Destroy a character backend.
+ */
+void qemu_chr_free(CharDriverState *chr);
+
+/**
  * @qemu_chr_fe_set_echo:
  *
  * Ask the backend to override its normal echo setting.  This only really
diff --git a/iohandler.c b/iohandler.c
index 55f8501524..eb68083559 100644
--- a/iohandler.c
+++ b/iohandler.c
@@ -55,7 +55,8 @@ void qemu_set_fd_handler(int fd,
                          void *opaque)
 {
     iohandler_init();
-    aio_set_fd_handler(iohandler_ctx, fd, fd_read, fd_write, opaque);
+    aio_set_fd_handler(iohandler_ctx, fd, false,
+                       fd_read, fd_write, opaque);
 }
 
 /* reaping of zombies.  right now we're not passing the status to
diff --git a/migration/block.c b/migration/block.c
index ed865ed23b..f7bb1e0126 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -808,6 +808,11 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
                 return -EINVAL;
             }
             bs = blk_bs(blk);
+            if (!bs) {
+                fprintf(stderr, "Block device %s has no medium\n",
+                        device_name);
+                return -EINVAL;
+            }
 
             if (bs != bs_prev) {
                 bs_prev = bs;
diff --git a/monitor.c b/monitor.c
index 4f1ba2f666..6cd747f4f9 100644
--- a/monitor.c
+++ b/monitor.c
@@ -181,13 +181,16 @@ typedef struct {
  * instance.
  */
 typedef struct MonitorQAPIEventState {
-    QAPIEvent event;    /* Event being tracked */
-    int64_t rate;       /* Minimum time (in ns) between two events */
-    int64_t last;       /* QEMU_CLOCK_REALTIME value at last emission */
+    QAPIEvent event;    /* Throttling state for this event type and... */
+    QDict *data;        /* ... data, see qapi_event_throttle_equal() */
     QEMUTimer *timer;   /* Timer for handling delayed events */
-    QObject *data;      /* Event pending delayed dispatch */
+    QDict *qdict;       /* Delayed event (if any) */
 } MonitorQAPIEventState;
 
+typedef struct {
+    int64_t rate;       /* Minimum time (in ns) between two events */
+} MonitorQAPIEventConf;
+
 struct Monitor {
     CharDriverState *chr;
     int reset_seen;
@@ -438,132 +441,161 @@ static void monitor_protocol_emitter(Monitor *mon, QObject *data,
 }
 
 
-static MonitorQAPIEventState monitor_qapi_event_state[QAPI_EVENT_MAX];
+static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT_MAX] = {
+    /* Limit guest-triggerable events to 1 per second */
+    [QAPI_EVENT_RTC_CHANGE]        = { 1000 * SCALE_MS },
+    [QAPI_EVENT_WATCHDOG]          = { 1000 * SCALE_MS },
+    [QAPI_EVENT_BALLOON_CHANGE]    = { 1000 * SCALE_MS },
+    [QAPI_EVENT_QUORUM_REPORT_BAD] = { 1000 * SCALE_MS },
+    [QAPI_EVENT_QUORUM_FAILURE]    = { 1000 * SCALE_MS },
+    [QAPI_EVENT_VSERPORT_CHANGE]   = { 1000 * SCALE_MS },
+};
+
+GHashTable *monitor_qapi_event_state;
 
 /*
  * Emits the event to every monitor instance, @event is only used for trace
  * Called with monitor_lock held.
  */
-static void monitor_qapi_event_emit(QAPIEvent event, QObject *data)
+static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict)
 {
     Monitor *mon;
 
-    trace_monitor_protocol_event_emit(event, data);
+    trace_monitor_protocol_event_emit(event, qdict);
     QLIST_FOREACH(mon, &mon_list, entry) {
         if (monitor_is_qmp(mon) && mon->qmp.in_command_mode) {
-            monitor_json_emitter(mon, data);
+            monitor_json_emitter(mon, QOBJECT(qdict));
         }
     }
 }
 
+static void monitor_qapi_event_handler(void *opaque);
+
 /*
  * Queue a new event for emission to Monitor instances,
  * applying any rate limiting if required.
  */
 static void
-monitor_qapi_event_queue(QAPIEvent event, QDict *data, Error **errp)
+monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp)
 {
+    MonitorQAPIEventConf *evconf;
     MonitorQAPIEventState *evstate;
-    assert(event < QAPI_EVENT_MAX);
-    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 
-    evstate = &(monitor_qapi_event_state[event]);
-    trace_monitor_protocol_event_queue(event,
-                                       data,
-                                       evstate->rate,
-                                       evstate->last,
-                                       now);
+    assert(event < QAPI_EVENT_MAX);
+    evconf = &monitor_qapi_event_conf[event];
+    trace_monitor_protocol_event_queue(event, qdict, evconf->rate);
 
-    /* Rate limit of 0 indicates no throttling */
     qemu_mutex_lock(&monitor_lock);
-    if (!evstate->rate) {
-        monitor_qapi_event_emit(event, QOBJECT(data));
-        evstate->last = now;
+
+    if (!evconf->rate) {
+        /* Unthrottled event */
+        monitor_qapi_event_emit(event, qdict);
     } else {
-        int64_t delta = now - evstate->last;
-        if (evstate->data ||
-            delta < evstate->rate) {
-            /* If there's an existing event pending, replace
-             * it with the new event, otherwise schedule a
-             * timer for delayed emission
+        QDict *data = qobject_to_qdict(qdict_get(qdict, "data"));
+        MonitorQAPIEventState key = { .event = event, .data = data };
+
+        evstate = g_hash_table_lookup(monitor_qapi_event_state, &key);
+        assert(!evstate || timer_pending(evstate->timer));
+
+        if (evstate) {
+            /*
+             * Timer is pending for (at least) evconf->rate ns after
+             * last send.  Store event for sending when timer fires,
+             * replacing a prior stored event if any.
              */
-            if (evstate->data) {
-                qobject_decref(evstate->data);
-            } else {
-                int64_t then = evstate->last + evstate->rate;
-                timer_mod_ns(evstate->timer, then);
-            }
-            evstate->data = QOBJECT(data);
-            qobject_incref(evstate->data);
+            QDECREF(evstate->qdict);
+            evstate->qdict = qdict;
+            QINCREF(evstate->qdict);
         } else {
-            monitor_qapi_event_emit(event, QOBJECT(data));
-            evstate->last = now;
+            /*
+             * Last send was (at least) evconf->rate ns ago.
+             * Send immediately, and arm the timer to call
+             * monitor_qapi_event_handler() in evconf->rate ns.  Any
+             * events arriving before then will be delayed until then.
+             */
+            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+            monitor_qapi_event_emit(event, qdict);
+
+            evstate = g_new(MonitorQAPIEventState, 1);
+            evstate->event = event;
+            evstate->data = data;
+            QINCREF(evstate->data);
+            evstate->qdict = NULL;
+            evstate->timer = timer_new_ns(QEMU_CLOCK_REALTIME,
+                                          monitor_qapi_event_handler,
+                                          evstate);
+            g_hash_table_add(monitor_qapi_event_state, evstate);
+            timer_mod_ns(evstate->timer, now + evconf->rate);
         }
     }
+
     qemu_mutex_unlock(&monitor_lock);
 }
 
 /*
- * The callback invoked by QemuTimer when a delayed
- * event is ready to be emitted
+ * This function runs evconf->rate ns after sending a throttled
+ * event.
+ * If another event has since been stored, send it.
  */
 static void monitor_qapi_event_handler(void *opaque)
 {
     MonitorQAPIEventState *evstate = opaque;
-    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    MonitorQAPIEventConf *evconf = &monitor_qapi_event_conf[evstate->event];
 
-    trace_monitor_protocol_event_handler(evstate->event,
-                                         evstate->data,
-                                         evstate->last,
-                                         now);
+    trace_monitor_protocol_event_handler(evstate->event, evstate->qdict);
     qemu_mutex_lock(&monitor_lock);
-    if (evstate->data) {
-        monitor_qapi_event_emit(evstate->event, evstate->data);
-        qobject_decref(evstate->data);
-        evstate->data = NULL;
+
+    if (evstate->qdict) {
+        int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+        monitor_qapi_event_emit(evstate->event, evstate->qdict);
+        QDECREF(evstate->qdict);
+        evstate->qdict = NULL;
+        timer_mod_ns(evstate->timer, now + evconf->rate);
+    } else {
+        g_hash_table_remove(monitor_qapi_event_state, evstate);
+        QDECREF(evstate->data);
+        timer_free(evstate->timer);
+        g_free(evstate);
     }
-    evstate->last = now;
+
     qemu_mutex_unlock(&monitor_lock);
 }
 
-/*
- * @event: the event ID to be limited
- * @rate: the rate limit in milliseconds
- *
- * Sets a rate limit on a particular event, so no
- * more than 1 event will be emitted within @rate
- * milliseconds
- */
-static void
-monitor_qapi_event_throttle(QAPIEvent event, int64_t rate)
+static unsigned int qapi_event_throttle_hash(const void *key)
 {
-    MonitorQAPIEventState *evstate;
-    assert(event < QAPI_EVENT_MAX);
+    const MonitorQAPIEventState *evstate = key;
+    unsigned int hash = evstate->event * 255;
 
-    evstate = &(monitor_qapi_event_state[event]);
+    if (evstate->event == QAPI_EVENT_VSERPORT_CHANGE) {
+        hash += g_str_hash(qdict_get_str(evstate->data, "id"));
+    }
 
-    trace_monitor_protocol_event_throttle(event, rate);
-    evstate->event = event;
-    assert(rate * SCALE_MS <= INT64_MAX);
-    evstate->rate = rate * SCALE_MS;
-    evstate->last = 0;
-    evstate->data = NULL;
-    evstate->timer = timer_new(QEMU_CLOCK_REALTIME,
-                               SCALE_MS,
-                               monitor_qapi_event_handler,
-                               evstate);
+    return hash;
 }
 
-static void monitor_qapi_event_init(void)
+static gboolean qapi_event_throttle_equal(const void *a, const void *b)
 {
-    /* Limit guest-triggerable events to 1 per second */
-    monitor_qapi_event_throttle(QAPI_EVENT_RTC_CHANGE, 1000);
-    monitor_qapi_event_throttle(QAPI_EVENT_WATCHDOG, 1000);
-    monitor_qapi_event_throttle(QAPI_EVENT_BALLOON_CHANGE, 1000);
-    monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_REPORT_BAD, 1000);
-    monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_FAILURE, 1000);
-    monitor_qapi_event_throttle(QAPI_EVENT_VSERPORT_CHANGE, 1000);
+    const MonitorQAPIEventState *eva = a;
+    const MonitorQAPIEventState *evb = b;
 
+    if (eva->event != evb->event) {
+        return FALSE;
+    }
+
+    if (eva->event == QAPI_EVENT_VSERPORT_CHANGE) {
+        return !strcmp(qdict_get_str(eva->data, "id"),
+                       qdict_get_str(evb->data, "id"));
+    }
+
+    return TRUE;
+}
+
+static void monitor_qapi_event_init(void)
+{
+    monitor_qapi_event_state = g_hash_table_new(qapi_event_throttle_hash,
+                                                qapi_event_throttle_equal);
     qmp_event_set_func_emit(monitor_qapi_event_queue);
 }
 
@@ -4145,6 +4177,10 @@ int monitor_read_block_device_key(Monitor *mon, const char *device,
         monitor_printf(mon, "Device not found %s\n", device);
         return -1;
     }
+    if (!blk_bs(blk)) {
+        monitor_printf(mon, "Device '%s' has no medium\n", device);
+        return -1;
+    }
 
     bdrv_add_key(blk_bs(blk), NULL, &err);
     if (err) {
diff --git a/nbd.c b/nbd.c
index fc34c449c8..b3d9654499 100644
--- a/nbd.c
+++ b/nbd.c
@@ -1446,6 +1446,7 @@ static void nbd_set_handlers(NBDClient *client)
 {
     if (client->exp && client->exp->ctx) {
         aio_set_fd_handler(client->exp->ctx, client->sock,
+                           true,
                            client->can_read ? nbd_read : NULL,
                            client->send_coroutine ? nbd_restart_write : NULL,
                            client);
@@ -1455,7 +1456,8 @@ static void nbd_set_handlers(NBDClient *client)
 static void nbd_unset_handlers(NBDClient *client)
 {
     if (client->exp && client->exp->ctx) {
-        aio_set_fd_handler(client->exp->ctx, client->sock, NULL, NULL, NULL);
+        aio_set_fd_handler(client->exp->ctx, client->sock,
+                           true, NULL, NULL, NULL);
     }
 }
 
diff --git a/net/dump.c b/net/dump.c
index 08259afcb6..dd0555f8bd 100644
--- a/net/dump.c
+++ b/net/dump.c
@@ -25,12 +25,13 @@
 #include "clients.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
+#include "qemu/iov.h"
 #include "qemu/log.h"
 #include "qemu/timer.h"
-#include "hub.h"
+#include "qapi/visitor.h"
+#include "net/filter.h"
 
 typedef struct DumpState {
-    NetClientState nc;
     int64_t start_ts;
     int fd;
     int pcap_caplen;
@@ -57,12 +58,13 @@ struct pcap_sf_pkthdr {
     uint32_t len;
 };
 
-static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt)
 {
-    DumpState *s = DO_UPCAST(DumpState, nc, nc);
     struct pcap_sf_pkthdr hdr;
     int64_t ts;
     int caplen;
+    size_t size = iov_size(iov, cnt);
+    struct iovec dumpiov[cnt + 1];
 
     /* Early return in case of previous error. */
     if (s->fd < 0) {
@@ -76,8 +78,12 @@ static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size)
     hdr.ts.tv_usec = ts % 1000000;
     hdr.caplen = caplen;
     hdr.len = size;
-    if (write(s->fd, &hdr, sizeof(hdr)) != sizeof(hdr) ||
-        write(s->fd, buf, caplen) != caplen) {
+
+    dumpiov[0].iov_base = &hdr;
+    dumpiov[0].iov_len = sizeof(hdr);
+    cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, 0, caplen);
+
+    if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) {
         qemu_log("-net dump write error - stop dump\n");
         close(s->fd);
         s->fd = -1;
@@ -86,27 +92,16 @@ static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size)
     return size;
 }
 
-static void dump_cleanup(NetClientState *nc)
+static void dump_cleanup(DumpState *s)
 {
-    DumpState *s = DO_UPCAST(DumpState, nc, nc);
-
     close(s->fd);
+    s->fd = -1;
 }
 
-static NetClientInfo net_dump_info = {
-    .type = NET_CLIENT_OPTIONS_KIND_DUMP,
-    .size = sizeof(DumpState),
-    .receive = dump_receive,
-    .cleanup = dump_cleanup,
-};
-
-static int net_dump_init(NetClientState *peer, const char *device,
-                         const char *name, const char *filename, int len,
-                         Error **errp)
+static int net_dump_state_init(DumpState *s, const char *filename,
+                               int len, Error **errp)
 {
     struct pcap_file_hdr hdr;
-    NetClientState *nc;
-    DumpState *s;
     struct tm tm;
     int fd;
 
@@ -130,13 +125,6 @@ static int net_dump_init(NetClientState *peer, const char *device,
         return -1;
     }
 
-    nc = qemu_new_net_client(&net_dump_info, peer, device, name);
-
-    snprintf(nc->info_str, sizeof(nc->info_str),
-             "dump to %s (len=%d)", filename, len);
-
-    s = DO_UPCAST(DumpState, nc, nc);
-
     s->fd = fd;
     s->pcap_caplen = len;
 
@@ -146,13 +134,58 @@ static int net_dump_init(NetClientState *peer, const char *device,
     return 0;
 }
 
+/* Dumping via VLAN netclient */
+
+struct DumpNetClient {
+    NetClientState nc;
+    DumpState ds;
+};
+typedef struct DumpNetClient DumpNetClient;
+
+static ssize_t dumpclient_receive(NetClientState *nc, const uint8_t *buf,
+                                  size_t size)
+{
+    DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
+    struct iovec iov = {
+        .iov_base = (void *)buf,
+        .iov_len = size
+    };
+
+    return dump_receive_iov(&dc->ds, &iov, 1);
+}
+
+static ssize_t dumpclient_receive_iov(NetClientState *nc,
+                                      const struct iovec *iov, int cnt)
+{
+    DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
+
+    return dump_receive_iov(&dc->ds, iov, cnt);
+}
+
+static void dumpclient_cleanup(NetClientState *nc)
+{
+    DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
+
+    dump_cleanup(&dc->ds);
+}
+
+static NetClientInfo net_dump_info = {
+    .type = NET_CLIENT_OPTIONS_KIND_DUMP,
+    .size = sizeof(DumpNetClient),
+    .receive = dumpclient_receive,
+    .receive_iov = dumpclient_receive_iov,
+    .cleanup = dumpclient_cleanup,
+};
+
 int net_init_dump(const NetClientOptions *opts, const char *name,
                   NetClientState *peer, Error **errp)
 {
-    int len;
+    int len, rc;
     const char *file;
     char def_file[128];
     const NetdevDumpOptions *dump;
+    NetClientState *nc;
+    DumpNetClient *dnc;
 
     assert(opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP);
     dump = opts->dump;
@@ -182,5 +215,140 @@ int net_init_dump(const NetClientOptions *opts, const char *name,
         len = 65536;
     }
 
-    return net_dump_init(peer, "dump", name, file, len, errp);
+    nc = qemu_new_net_client(&net_dump_info, peer, "dump", name);
+    snprintf(nc->info_str, sizeof(nc->info_str),
+             "dump to %s (len=%d)", file, len);
+
+    dnc = DO_UPCAST(DumpNetClient, nc, nc);
+    rc = net_dump_state_init(&dnc->ds, file, len, errp);
+    if (rc) {
+        qemu_del_net_client(nc);
+    }
+    return rc;
 }
+
+/* Dumping via filter */
+
+#define TYPE_FILTER_DUMP "filter-dump"
+
+#define FILTER_DUMP(obj) \
+    OBJECT_CHECK(NetFilterDumpState, (obj), TYPE_FILTER_DUMP)
+
+struct NetFilterDumpState {
+    NetFilterState nfs;
+    DumpState ds;
+    char *filename;
+    uint32_t maxlen;
+};
+typedef struct NetFilterDumpState NetFilterDumpState;
+
+static ssize_t filter_dump_receive_iov(NetFilterState *nf, NetClientState *sndr,
+                                       unsigned flags, const struct iovec *iov,
+                                       int iovcnt, NetPacketSent *sent_cb)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(nf);
+
+    dump_receive_iov(&nfds->ds, iov, iovcnt);
+    return 0;
+}
+
+static void filter_dump_cleanup(NetFilterState *nf)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(nf);
+
+    dump_cleanup(&nfds->ds);
+}
+
+static void filter_dump_setup(NetFilterState *nf, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(nf);
+
+    if (!nfds->filename) {
+        error_setg(errp, "dump filter needs 'file' property set!");
+        return;
+    }
+
+    net_dump_state_init(&nfds->ds, nfds->filename, nfds->maxlen, errp);
+}
+
+static void filter_dump_get_maxlen(Object *obj, Visitor *v, void *opaque,
+                                   const char *name, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+    uint32_t value = nfds->maxlen;
+
+    visit_type_uint32(v, &value, name, errp);
+}
+
+static void filter_dump_set_maxlen(Object *obj, Visitor *v, void *opaque,
+                                   const char *name, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+    Error *local_err = NULL;
+    uint32_t value;
+
+    visit_type_uint32(v, &value, name, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    if (value == 0) {
+        error_setg(&local_err, "Property '%s.%s' doesn't take value '%u'",
+                   object_get_typename(obj), name, value);
+        goto out;
+    }
+    nfds->maxlen = value;
+
+out:
+    error_propagate(errp, local_err);
+}
+
+static char *file_dump_get_filename(Object *obj, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+    return g_strdup(nfds->filename);
+}
+
+static void file_dump_set_filename(Object *obj, const char *value, Error **errp)
+{
+   NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+    g_free(nfds->filename);
+    nfds->filename = g_strdup(value);
+}
+
+static void filter_dump_instance_init(Object *obj)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+    nfds->maxlen = 65536;
+
+    object_property_add(obj, "maxlen", "int", filter_dump_get_maxlen,
+                        filter_dump_set_maxlen, NULL, NULL, NULL);
+    object_property_add_str(obj, "file", file_dump_get_filename,
+                            file_dump_set_filename, NULL);
+}
+
+static void filter_dump_class_init(ObjectClass *oc, void *data)
+{
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    nfc->setup = filter_dump_setup;
+    nfc->cleanup = filter_dump_cleanup;
+    nfc->receive_iov = filter_dump_receive_iov;
+}
+
+static const TypeInfo filter_dump_info = {
+    .name = TYPE_FILTER_DUMP,
+    .parent = TYPE_NETFILTER,
+    .class_init = filter_dump_class_init,
+    .instance_init = filter_dump_instance_init,
+    .instance_size = sizeof(NetFilterDumpState),
+};
+
+static void filter_dump_register_types(void)
+{
+    type_register_static(&filter_dump_info);
+}
+
+type_init(filter_dump_register_types);
diff --git a/net/net.c b/net/net.c
index 3c68f3faa8..a3e9d1a9b3 100644
--- a/net/net.c
+++ b/net/net.c
@@ -708,7 +708,7 @@ static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov,
         offset = iov[0].iov_len;
     } else {
         buffer = buf;
-        offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer));
+        offset = iov_to_buf(iov, iovcnt, 0, buf, sizeof(buf));
     }
 
     if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
@@ -1197,10 +1197,11 @@ void print_net_client(Monitor *mon, NetClientState *nc)
         monitor_printf(mon, "filters:\n");
     }
     QTAILQ_FOREACH(nf, &nc->filters, next) {
-        monitor_printf(mon, "  - %s: type=%s%s\n",
-                       object_get_canonical_path_component(OBJECT(nf)),
+        char *path = object_get_canonical_path_component(OBJECT(nf));
+        monitor_printf(mon, "  - %s: type=%s%s\n", path,
                        object_get_typename(OBJECT(nf)),
                        nf->info_str);
+        g_free(path);
     }
 }
 
@@ -1227,6 +1228,12 @@ RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name,
             continue;
         }
 
+        /* only query information on queue 0 since the info is per nic,
+         * not per queue
+         */
+        if (nc->queue_index != 0)
+            continue;
+
         if (nc->info->query_rx_filter) {
             info = nc->info->query_rx_filter(nc);
             entry = g_malloc0(sizeof(*entry));
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index 540e45a74c..a419bfd031 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
Binary files differdiff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index 0da11883bb..cf52787034 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
Binary files differdiff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index 9bf3ce5450..08eecdf225 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
Binary files differdiff --git a/qapi/block-core.json b/qapi/block-core.json
index bb2189ef3a..425fdab706 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -215,10 +215,11 @@
 # @drv: the name of the block format used to open the backing device. As of
 #       0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
 #       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-#       'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
+#       'http', 'https', 'nbd', 'parallels', 'qcow',
 #       'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
 #       2.2: 'archipelago' added, 'cow' dropped
 #       2.3: 'host_floppy' deprecated
+#       2.5: 'host_floppy' dropped
 #
 # @backing_file: #optional the name of the backing file (for copy-on-write)
 #
@@ -1373,15 +1374,14 @@
 #
 # Drivers that are supported in block device operations.
 #
-# @host_device, @host_cdrom, @host_floppy: Since 2.1
-# @host_floppy: deprecated since 2.3
+# @host_device, @host_cdrom: Since 2.1
 #
 # Since: 2.0
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
             'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-            'host_floppy', 'http', 'https', 'null-aio', 'null-co', 'parallels',
+            'http', 'https', 'null-aio', 'null-co', 'parallels',
             'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx',
             'vmdk', 'vpc', 'vvfat' ] }
 
@@ -1393,9 +1393,12 @@
 #
 # @driver:        block driver name
 # @id:            #optional id by which the new block device can be referred to.
-#                 This is a required option on the top level of blockdev-add, and
-#                 currently not allowed on any other level.
-# @node-name:     #optional the name of a block driver state node (Since 2.0)
+#                 This option is only allowed on the top level of blockdev-add.
+#                 A BlockBackend will be created by blockdev-add if and only if
+#                 this option is given.
+# @node-name:     #optional the name of a block driver state node (Since 2.0).
+#                 This option is required on the top level of blockdev-add if
+#                 the @id option is not given there.
 # @discard:       #optional discard-related options (default: ignore)
 # @cache:         #optional cache-related options
 # @aio:           #optional AIO backend (default: threads)
@@ -1816,7 +1819,6 @@
 # TODO gluster: Wait for structured options
       'host_cdrom': 'BlockdevOptionsFile',
       'host_device':'BlockdevOptionsFile',
-      'host_floppy':'BlockdevOptionsFile',
       'http':       'BlockdevOptionsFile',
       'https':      'BlockdevOptionsFile',
 # TODO iscsi: Wait for structured options
@@ -1860,7 +1862,9 @@
 ##
 # @blockdev-add:
 #
-# Creates a new block device.
+# Creates a new block device. If the @id option is given at the top level, a
+# BlockBackend will be created; otherwise, @node-name is mandatory at the top
+# level and no BlockBackend will be created.
 #
 # This command is still a work in progress.  It doesn't support all
 # block drivers, it lacks a matching blockdev-del, and more.  Stay
diff --git a/qapi/qmp-input-visitor.c b/qapi/qmp-input-visitor.c
index 5dd9ed5ce5..eb6e110300 100644
--- a/qapi/qmp-input-visitor.c
+++ b/qapi/qmp-input-visitor.c
@@ -225,45 +225,45 @@ static void qmp_input_type_int(Visitor *v, int64_t *obj, const char *name,
                                Error **errp)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QInt *qint = qobject_to_qint(qmp_input_get_object(qiv, name, true));
 
-    if (!qobj || qobject_type(qobj) != QTYPE_QINT) {
+    if (!qint) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "integer");
         return;
     }
 
-    *obj = qint_get_int(qobject_to_qint(qobj));
+    *obj = qint_get_int(qint);
 }
 
 static void qmp_input_type_bool(Visitor *v, bool *obj, const char *name,
                                 Error **errp)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QBool *qbool = qobject_to_qbool(qmp_input_get_object(qiv, name, true));
 
-    if (!qobj || qobject_type(qobj) != QTYPE_QBOOL) {
+    if (!qbool) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "boolean");
         return;
     }
 
-    *obj = qbool_get_bool(qobject_to_qbool(qobj));
+    *obj = qbool_get_bool(qbool);
 }
 
 static void qmp_input_type_str(Visitor *v, char **obj, const char *name,
                                Error **errp)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QString *qstr = qobject_to_qstring(qmp_input_get_object(qiv, name, true));
 
-    if (!qobj || qobject_type(qobj) != QTYPE_QSTRING) {
+    if (!qstr) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "string");
         return;
     }
 
-    *obj = g_strdup(qstring_get_str(qobject_to_qstring(qobj)));
+    *obj = g_strdup(qstring_get_str(qstr));
 }
 
 static void qmp_input_type_number(Visitor *v, double *obj, const char *name,
@@ -271,19 +271,23 @@ static void qmp_input_type_number(Visitor *v, double *obj, const char *name,
 {
     QmpInputVisitor *qiv = to_qiv(v);
     QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QInt *qint;
+    QFloat *qfloat;
 
-    if (!qobj || (qobject_type(qobj) != QTYPE_QFLOAT &&
-        qobject_type(qobj) != QTYPE_QINT)) {
-        error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
-                   "number");
+    qint = qobject_to_qint(qobj);
+    if (qint) {
+        *obj = qint_get_int(qobject_to_qint(qobj));
         return;
     }
 
-    if (qobject_type(qobj) == QTYPE_QINT) {
-        *obj = qint_get_int(qobject_to_qint(qobj));
-    } else {
+    qfloat = qobject_to_qfloat(qobj);
+    if (qfloat) {
         *obj = qfloat_get_double(qobject_to_qfloat(qobj));
+        return;
     }
+
+    error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
+               "number");
 }
 
 static void qmp_input_type_any(Visitor *v, QObject **obj, const char *name,
diff --git a/qemu-char.c b/qemu-char.c
index 908e7124eb..c4eb4eea31 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -3876,9 +3876,8 @@ void qemu_chr_fe_release(CharDriverState *s)
     s->avail_connections++;
 }
 
-void qemu_chr_delete(CharDriverState *chr)
+void qemu_chr_free(CharDriverState *chr)
 {
-    QTAILQ_REMOVE(&chardevs, chr, next);
     if (chr->chr_close) {
         chr->chr_close(chr);
     }
@@ -3888,6 +3887,12 @@ void qemu_chr_delete(CharDriverState *chr)
     g_free(chr);
 }
 
+void qemu_chr_delete(CharDriverState *chr)
+{
+    QTAILQ_REMOVE(&chardevs, chr, next);
+    qemu_chr_free(chr);
+}
+
 ChardevInfoList *qmp_query_chardev(Error **errp)
 {
     ChardevInfoList *chr_list = NULL;
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 5b81aa052d..3126abdcd3 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -1266,9 +1266,13 @@ is qemu.git/contrib/ivshmem-server.  An example syntax when using the shared
 memory server is:
 
 @example
-qemu-system-i386 -device ivshmem,size=<size in format accepted by -m>[,chardev=<id>]
-                 [,msi=on][,ioeventfd=on][,vectors=n][,role=peer|master]
-qemu-system-i386 -chardev socket,path=<path>,id=<id>
+# First start the ivshmem server once and for all
+ivshmem-server -p <pidfile> -S <path> -m <shm name> -l <shm size> -n <vectors n>
+
+# Then start your qemu instances with matching arguments
+qemu-system-i386 -device ivshmem,size=<shm size>,vectors=<vectors n>,chardev=<id>
+                 [,msi=on][,ioeventfd=on][,role=peer|master]
+                 -chardev socket,path=<path>,id=<id>
 @end example
 
 When using the server, the guest will be assigned a VM ID (>=0) that allows guests
@@ -1289,6 +1293,19 @@ copy the shared memory on migration to the destination host.  With
 With the @option{peer} case, the device should be detached and then reattached
 after migration using the PCI hotplug support.
 
+@subsubsection ivshmem and hugepages
+
+Instead of specifying the <shm size> using POSIX shm, you may specify
+a memory backend that has hugepage support:
+
+@example
+qemu-system-i386 -object memory-backend-file,size=1G,mem-path=/mnt/hugepages,id=mb1
+                 -device ivshmem,memdev=mb1
+@end example
+
+ivshmem-server also supports hugepages mount points with the
+@option{-m} memory path argument.
+
 @node direct_linux_boot
 @section Direct Linux Boot
 
diff --git a/qemu-log.c b/qemu-log.c
index 13f3813f68..efd07c81ea 100644
--- a/qemu-log.c
+++ b/qemu-log.c
@@ -119,6 +119,9 @@ const QEMULogItem qemu_log_items[] = {
     { LOG_GUEST_ERROR, "guest_errors",
       "log when the guest OS does something invalid (eg accessing a\n"
       "non-existent register)" },
+    { CPU_LOG_TB_NOCHAIN, "nochain",
+      "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n"
+      "complete traces" },
     { 0, NULL, NULL },
 };
 
diff --git a/qemu-options.hx b/qemu-options.hx
index edee5f4844..949db7f2ea 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2012,6 +2012,7 @@ qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,sha
 Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default).
 At most @var{len} bytes (64k by default) per packet are stored. The file format is
 libpcap, so it can be analyzed with tools such as tcpdump or Wireshark.
+Note: For devices created with '-netdev', use '-object filter-dump,...' instead.
 
 @item -net none
 Indicate that no network devices should be configured. It is used to
@@ -3665,6 +3666,13 @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
 @option{tx}: the filter is attached to the transmit queue of the netdev,
              where it will receive packets sent by the netdev.
 
+@item -object filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}]
+
+Dump the network traffic on netdev @var{dev} to the file specified by
+@var{filename}. At most @var{len} bytes (64k by default) per packet are stored.
+The file format is libpcap, so it can be analyzed with tools such as tcpdump
+or Wireshark.
+
 @end table
 
 ETEXI
diff --git a/qga/main.c b/qga/main.c
index 068169fcbc..d2a0ffc807 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -573,7 +573,6 @@ static void process_command(GAState *s, QDict *req)
 static void process_event(JSONMessageParser *parser, QList *tokens)
 {
     GAState *s = container_of(parser, GAState, parser);
-    QObject *obj;
     QDict *qdict;
     Error *err = NULL;
     int ret;
@@ -581,9 +580,9 @@ static void process_event(JSONMessageParser *parser, QList *tokens)
     g_assert(s && parser);
 
     g_debug("process_event: called");
-    obj = json_parser_parse_err(tokens, NULL, &err);
-    if (err || !obj || qobject_type(obj) != QTYPE_QDICT) {
-        qobject_decref(obj);
+    qdict = qobject_to_qdict(json_parser_parse_err(tokens, NULL, &err));
+    if (err || !qdict) {
+        QDECREF(qdict);
         qdict = qdict_new();
         if (!err) {
             g_warning("failed to parse event: unknown error");
@@ -593,12 +592,8 @@ static void process_event(JSONMessageParser *parser, QList *tokens)
         }
         qdict_put_obj(qdict, "error", qmp_build_error_object(err));
         error_free(err);
-    } else {
-        qdict = qobject_to_qdict(obj);
     }
 
-    g_assert(qdict);
-
     /* handle host->guest commands */
     if (qdict_haskey(qdict, "execute")) {
         process_command(s, qdict);
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 2b52980cfc..d7cf0ff264 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2520,8 +2520,8 @@ Each json-object contain the following:
     - "wr_total_time_ns": total time spend on writes in nano-seconds (json-int)
     - "rd_total_time_ns": total time spend on reads in nano-seconds (json-int)
     - "flush_total_time_ns": total time spend on cache flushes in nano-seconds (json-int)
-    - "wr_highest_offset": Highest offset of a sector written since the
-                           BlockDriverState has been opened (json-int)
+    - "wr_highest_offset": The offset after the greatest byte written to the
+                           BlockDriverState since it has been opened (json-int)
     - "rd_merged": number of read requests that have been merged into
                    another request (json-int)
     - "wr_merged": number of write requests that have been merged into
diff --git a/qmp.c b/qmp.c
index d9ecedef93..ff54e5a765 100644
--- a/qmp.c
+++ b/qmp.c
@@ -24,6 +24,7 @@
 #include "sysemu/arch_init.h"
 #include "hw/qdev.h"
 #include "sysemu/blockdev.h"
+#include "sysemu/block-backend.h"
 #include "qom/qom-qobject.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qobject.h"
@@ -170,6 +171,7 @@ SpiceInfo *qmp_query_spice(Error **errp)
 void qmp_cont(Error **errp)
 {
     Error *local_err = NULL;
+    BlockBackend *blk;
     BlockDriverState *bs;
 
     if (runstate_needs_reset()) {
@@ -179,8 +181,8 @@ void qmp_cont(Error **errp)
         return;
     }
 
-    for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
-        bdrv_iostatus_reset(bs);
+    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+        blk_iostatus_reset(blk);
     }
     for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
         bdrv_add_key(bs, NULL, &local_err);
diff --git a/qobject/qbool.c b/qobject/qbool.c
index 5ff69f0b2d..bc6535fa49 100644
--- a/qobject/qbool.c
+++ b/qobject/qbool.c
@@ -51,9 +51,9 @@ bool qbool_get_bool(const QBool *qb)
  */
 QBool *qobject_to_qbool(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QBOOL)
+    if (!obj || qobject_type(obj) != QTYPE_QBOOL) {
         return NULL;
-
+    }
     return container_of(obj, QBool, base);
 }
 
diff --git a/qobject/qdict.c b/qobject/qdict.c
index 67b1a58abf..2d67bf1579 100644
--- a/qobject/qdict.c
+++ b/qobject/qdict.c
@@ -46,9 +46,9 @@ QDict *qdict_new(void)
  */
 QDict *qobject_to_qdict(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QDICT)
+    if (!obj || qobject_type(obj) != QTYPE_QDICT) {
         return NULL;
-
+    }
     return container_of(obj, QDict, base);
 }
 
@@ -229,8 +229,7 @@ double qdict_get_double(const QDict *qdict, const char *key)
  */
 int64_t qdict_get_int(const QDict *qdict, const char *key)
 {
-    QObject *obj = qdict_get_obj(qdict, key, QTYPE_QINT);
-    return qint_get_int(qobject_to_qint(obj));
+    return qint_get_int(qobject_to_qint(qdict_get(qdict, key)));
 }
 
 /**
@@ -243,8 +242,7 @@ int64_t qdict_get_int(const QDict *qdict, const char *key)
  */
 bool qdict_get_bool(const QDict *qdict, const char *key)
 {
-    QObject *obj = qdict_get_obj(qdict, key, QTYPE_QBOOL);
-    return qbool_get_bool(qobject_to_qbool(obj));
+    return qbool_get_bool(qobject_to_qbool(qdict_get(qdict, key)));
 }
 
 /**
@@ -270,7 +268,7 @@ QList *qdict_get_qlist(const QDict *qdict, const char *key)
  */
 QDict *qdict_get_qdict(const QDict *qdict, const char *key)
 {
-    return qobject_to_qdict(qdict_get_obj(qdict, key, QTYPE_QDICT));
+    return qobject_to_qdict(qdict_get(qdict, key));
 }
 
 /**
@@ -284,8 +282,7 @@ QDict *qdict_get_qdict(const QDict *qdict, const char *key)
  */
 const char *qdict_get_str(const QDict *qdict, const char *key)
 {
-    QObject *obj = qdict_get_obj(qdict, key, QTYPE_QSTRING);
-    return qstring_get_str(qobject_to_qstring(obj));
+    return qstring_get_str(qobject_to_qstring(qdict_get(qdict, key)));
 }
 
 /**
@@ -298,13 +295,9 @@ const char *qdict_get_str(const QDict *qdict, const char *key)
 int64_t qdict_get_try_int(const QDict *qdict, const char *key,
                           int64_t def_value)
 {
-    QObject *obj;
+    QInt *qint = qobject_to_qint(qdict_get(qdict, key));
 
-    obj = qdict_get(qdict, key);
-    if (!obj || qobject_type(obj) != QTYPE_QINT)
-        return def_value;
-
-    return qint_get_int(qobject_to_qint(obj));
+    return qint ? qint_get_int(qint) : def_value;
 }
 
 /**
@@ -316,13 +309,9 @@ int64_t qdict_get_try_int(const QDict *qdict, const char *key,
  */
 bool qdict_get_try_bool(const QDict *qdict, const char *key, bool def_value)
 {
-    QObject *obj;
+    QBool *qbool = qobject_to_qbool(qdict_get(qdict, key));
 
-    obj = qdict_get(qdict, key);
-    if (!obj || qobject_type(obj) != QTYPE_QBOOL)
-        return def_value;
-
-    return qbool_get_bool(qobject_to_qbool(obj));
+    return qbool ? qbool_get_bool(qbool) : def_value;
 }
 
 /**
@@ -335,13 +324,9 @@ bool qdict_get_try_bool(const QDict *qdict, const char *key, bool def_value)
  */
 const char *qdict_get_try_str(const QDict *qdict, const char *key)
 {
-    QObject *obj;
-
-    obj = qdict_get(qdict, key);
-    if (!obj || qobject_type(obj) != QTYPE_QSTRING)
-        return NULL;
+    QString *qstr = qobject_to_qstring(qdict_get(qdict, key));
 
-    return qstring_get_str(qobject_to_qstring(obj));
+    return qstr ? qstring_get_str(qstr) : NULL;
 }
 
 /**
diff --git a/qobject/qfloat.c b/qobject/qfloat.c
index 7de0992dba..c86516327f 100644
--- a/qobject/qfloat.c
+++ b/qobject/qfloat.c
@@ -51,9 +51,9 @@ double qfloat_get_double(const QFloat *qf)
  */
 QFloat *qobject_to_qfloat(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QFLOAT)
+    if (!obj || qobject_type(obj) != QTYPE_QFLOAT) {
         return NULL;
-
+    }
     return container_of(obj, QFloat, base);
 }
 
diff --git a/qobject/qint.c b/qobject/qint.c
index 86b9b04f0b..999688e9ce 100644
--- a/qobject/qint.c
+++ b/qobject/qint.c
@@ -50,9 +50,9 @@ int64_t qint_get_int(const QInt *qi)
  */
 QInt *qobject_to_qint(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QINT)
+    if (!obj || qobject_type(obj) != QTYPE_QINT) {
         return NULL;
-
+    }
     return container_of(obj, QInt, base);
 }
 
diff --git a/qobject/qlist.c b/qobject/qlist.c
index 1ced0de58e..298003aaf7 100644
--- a/qobject/qlist.c
+++ b/qobject/qlist.c
@@ -142,10 +142,9 @@ size_t qlist_size(const QList *qlist)
  */
 QList *qobject_to_qlist(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QLIST) {
+    if (!obj || qobject_type(obj) != QTYPE_QLIST) {
         return NULL;
     }
-
     return container_of(obj, QList, base);
 }
 
diff --git a/qobject/qstring.c b/qobject/qstring.c
index 607b7a142c..cb72dfbfc8 100644
--- a/qobject/qstring.c
+++ b/qobject/qstring.c
@@ -117,9 +117,9 @@ void qstring_append_chr(QString *qstring, int c)
  */
 QString *qobject_to_qstring(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QSTRING)
+    if (!obj || qobject_type(obj) != QTYPE_QSTRING) {
         return NULL;
-
+    }
     return container_of(obj, QString, base);
 }
 
diff --git a/roms/openbios b/roms/openbios
-Subproject 18f02b14de795c1aab4fe23c1810bfd0944da6a
+Subproject 3caee1794ac3f742315823d8447d21f33ce019e
diff --git a/scripts/qemu-gdb.py b/scripts/qemu-gdb.py
index d6f2e5a903..b3f8e04f77 100644
--- a/scripts/qemu-gdb.py
+++ b/scripts/qemu-gdb.py
@@ -26,7 +26,7 @@ import os, sys
 
 sys.path.append(os.path.dirname(__file__))
 
-from qemugdb import mtree, coroutine
+from qemugdb import aio, mtree, coroutine
 
 class QemuCommand(gdb.Command):
     '''Prefix for QEMU debug support commands'''
@@ -37,6 +37,10 @@ class QemuCommand(gdb.Command):
 QemuCommand()
 coroutine.CoroutineCommand()
 mtree.MtreeCommand()
+aio.HandlersCommand()
+
+coroutine.CoroutineSPFunction()
+coroutine.CoroutinePCFunction()
 
 # Default to silently passing through SIGUSR1, because QEMU sends it
 # to itself a lot.
diff --git a/scripts/qemugdb/aio.py b/scripts/qemugdb/aio.py
new file mode 100644
index 0000000000..2ba00c4444
--- /dev/null
+++ b/scripts/qemugdb/aio.py
@@ -0,0 +1,58 @@
+#!/usr/bin/python
+
+# GDB debugging support: aio/iohandler debug
+#
+# Copyright (c) 2015 Red Hat, Inc.
+#
+# Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+#
+
+import gdb
+from qemugdb import coroutine
+
+def isnull(ptr):
+    return ptr == gdb.Value(0).cast(ptr.type)
+
+def dump_aiocontext(context, verbose):
+    '''Display a dump and backtrace for an aiocontext'''
+    cur = context['aio_handlers']['lh_first']
+    # Get pointers to functions we're going to process specially
+    sym_fd_coroutine_enter = gdb.parse_and_eval('fd_coroutine_enter')
+
+    while not isnull(cur):
+        entry = cur.dereference()
+        gdb.write('----\n%s\n' % entry)
+        if verbose and cur['io_read'] == sym_fd_coroutine_enter:
+            coptr = (cur['opaque'].cast(gdb.lookup_type('FDYieldUntilData').pointer()))['co']
+            coptr = coptr.cast(gdb.lookup_type('CoroutineUContext').pointer())
+            coroutine.bt_jmpbuf(coptr['env']['__jmpbuf'])
+        cur = cur['node']['le_next'];
+
+    gdb.write('----\n')
+
+class HandlersCommand(gdb.Command):
+    '''Display aio handlers'''
+    def __init__(self):
+        gdb.Command.__init__(self, 'qemu handlers', gdb.COMMAND_DATA,
+                             gdb.COMPLETE_NONE)
+
+    def invoke(self, arg, from_tty):
+        verbose = False
+        argv = gdb.string_to_argv(arg)
+
+        if len(argv) > 0 and argv[0] == '--verbose':
+            verbose = True
+            argv.pop(0)
+
+        if len(argv) > 1:
+            gdb.write('usage: qemu handlers [--verbose] [handler]\n')
+            return
+
+        if len(argv) == 1:
+            handlers_name = argv[0]
+        else:
+            handlers_name = 'qemu_aio_context'
+        dump_aiocontext(gdb.parse_and_eval(handlers_name), verbose)
diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py
index 3c54918b5d..ab699794ab 100644
--- a/scripts/qemugdb/coroutine.py
+++ b/scripts/qemugdb/coroutine.py
@@ -15,8 +15,11 @@
 
 import gdb
 
+VOID_PTR = gdb.lookup_type('void').pointer()
+
 def get_fs_base():
-    '''Fetch %fs base value using arch_prctl(ARCH_GET_FS)'''
+    '''Fetch %fs base value using arch_prctl(ARCH_GET_FS).  This is
+       pthread_self().'''
     # %rsp - 120 is scratch space according to the SystemV ABI
     old = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)')
     gdb.execute('call arch_prctl(0x1003, $rsp - 120)', False, True)
@@ -24,17 +27,29 @@ def get_fs_base():
     gdb.execute('set *(uint64_t*)($rsp - 120) = %s' % old, False, True)
     return fs_base
 
+def pthread_self():
+    '''Fetch pthread_self() from the glibc start_thread function.'''
+    f = gdb.newest_frame()
+    while f.name() != 'start_thread':
+        f = f.older()
+        if f is None:
+            return get_fs_base()
+
+    try:
+        return f.read_var("arg")
+    except ValueError:
+        return get_fs_base()
+
 def get_glibc_pointer_guard():
     '''Fetch glibc pointer guard value'''
-    fs_base = get_fs_base()
+    fs_base = pthread_self()
     return gdb.parse_and_eval('*(uint64_t*)((uint64_t)%s + 0x30)' % fs_base)
 
 def glibc_ptr_demangle(val, pointer_guard):
     '''Undo effect of glibc's PTR_MANGLE()'''
     return gdb.parse_and_eval('(((uint64_t)%s >> 0x11) | ((uint64_t)%s << (64 - 0x11))) ^ (uint64_t)%s' % (val, val, pointer_guard))
 
-def bt_jmpbuf(jmpbuf):
-    '''Backtrace a jmpbuf'''
+def get_jmpbuf_regs(jmpbuf):
     JB_RBX  = 0
     JB_RBP  = 1
     JB_R12  = 2
@@ -44,35 +59,35 @@ def bt_jmpbuf(jmpbuf):
     JB_RSP  = 6
     JB_PC   = 7
 
-    old_rbx = gdb.parse_and_eval('(uint64_t)$rbx')
-    old_rbp = gdb.parse_and_eval('(uint64_t)$rbp')
-    old_rsp = gdb.parse_and_eval('(uint64_t)$rsp')
-    old_r12 = gdb.parse_and_eval('(uint64_t)$r12')
-    old_r13 = gdb.parse_and_eval('(uint64_t)$r13')
-    old_r14 = gdb.parse_and_eval('(uint64_t)$r14')
-    old_r15 = gdb.parse_and_eval('(uint64_t)$r15')
-    old_rip = gdb.parse_and_eval('(uint64_t)$rip')
-
     pointer_guard = get_glibc_pointer_guard()
-    gdb.execute('set $rbx = %s' % jmpbuf[JB_RBX])
-    gdb.execute('set $rbp = %s' % glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard))
-    gdb.execute('set $rsp = %s' % glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard))
-    gdb.execute('set $r12 = %s' % jmpbuf[JB_R12])
-    gdb.execute('set $r13 = %s' % jmpbuf[JB_R13])
-    gdb.execute('set $r14 = %s' % jmpbuf[JB_R14])
-    gdb.execute('set $r15 = %s' % jmpbuf[JB_R15])
-    gdb.execute('set $rip = %s' % glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard))
+    return {'rbx': jmpbuf[JB_RBX],
+        'rbp': glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard),
+        'rsp': glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard),
+        'r12': jmpbuf[JB_R12],
+        'r13': jmpbuf[JB_R13],
+        'r14': jmpbuf[JB_R14],
+        'r15': jmpbuf[JB_R15],
+        'rip': glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard) }
+
+def bt_jmpbuf(jmpbuf):
+    '''Backtrace a jmpbuf'''
+    regs = get_jmpbuf_regs(jmpbuf)
+    old = dict()
+
+    for i in regs:
+        old[i] = gdb.parse_and_eval('(uint64_t)$%s' % i)
+
+    for i in regs:
+        gdb.execute('set $%s = %s' % (i, regs[i]))
 
     gdb.execute('bt')
 
-    gdb.execute('set $rbx = %s' % old_rbx)
-    gdb.execute('set $rbp = %s' % old_rbp)
-    gdb.execute('set $rsp = %s' % old_rsp)
-    gdb.execute('set $r12 = %s' % old_r12)
-    gdb.execute('set $r13 = %s' % old_r13)
-    gdb.execute('set $r14 = %s' % old_r14)
-    gdb.execute('set $r15 = %s' % old_r15)
-    gdb.execute('set $rip = %s' % old_rip)
+    for i in regs:
+        gdb.execute('set $%s = %s' % (i, old[i]))
+
+def coroutine_to_jmpbuf(co):
+    coroutine_pointer = co.cast(gdb.lookup_type('CoroutineUContext').pointer())
+    return coroutine_pointer['env']['__jmpbuf']
 
 
 class CoroutineCommand(gdb.Command):
@@ -87,5 +102,18 @@ class CoroutineCommand(gdb.Command):
             gdb.write('usage: qemu coroutine <coroutine-pointer>\n')
             return
 
-        coroutine_pointer = gdb.parse_and_eval(argv[0]).cast(gdb.lookup_type('CoroutineUContext').pointer())
-        bt_jmpbuf(coroutine_pointer['env']['__jmpbuf'])
+        bt_jmpbuf(coroutine_to_jmpbuf(gdb.parse_and_eval(argv[0])))
+
+class CoroutineSPFunction(gdb.Function):
+    def __init__(self):
+        gdb.Function.__init__(self, 'qemu_coroutine_sp')
+
+    def invoke(self, addr):
+        return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rsp'].cast(VOID_PTR)
+
+class CoroutinePCFunction(gdb.Function):
+    def __init__(self):
+        gdb.Function.__init__(self, 'qemu_coroutine_pc')
+
+    def invoke(self, addr):
+        return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rip'].cast(VOID_PTR)
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index f936d1b5b9..87950c63ec 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -2917,6 +2917,11 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
 
         if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
             gen_excp(&ctx, EXCP_DEBUG, 0);
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            ctx.pc += 4;
             break;
         }
         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 3daa7f58f9..815fef8a30 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -279,6 +279,7 @@ typedef struct CPUARMState {
             };
             uint64_t far_el[4];
         };
+        uint64_t hpfar_el2;
         union { /* Translation result. */
             struct {
                 uint64_t _unused_par_0;
@@ -1525,8 +1526,6 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
     CPUARMState *env = cs->env_ptr;
     unsigned int cur_el = arm_current_el(env);
     bool secure = arm_is_secure(env);
-    bool scr;
-    bool hcr;
     bool pstate_unmasked;
     int8_t unmasked = 0;
 
@@ -1540,31 +1539,10 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
 
     switch (excp_idx) {
     case EXCP_FIQ:
-        /* If FIQs are routed to EL3 or EL2 then there are cases where we
-         * override the CPSR.F in determining if the exception is masked or
-         * not.  If neither of these are set then we fall back to the CPSR.F
-         * setting otherwise we further assess the state below.
-         */
-        hcr = (env->cp15.hcr_el2 & HCR_FMO);
-        scr = (env->cp15.scr_el3 & SCR_FIQ);
-
-        /* When EL3 is 32-bit, the SCR.FW bit controls whether the CPSR.F bit
-         * masks FIQ interrupts when taken in non-secure state.  If SCR.FW is
-         * set then FIQs can be masked by CPSR.F when non-secure but only
-         * when FIQs are only routed to EL3.
-         */
-        scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr);
         pstate_unmasked = !(env->daif & PSTATE_F);
         break;
 
     case EXCP_IRQ:
-        /* When EL3 execution state is 32-bit, if HCR.IMO is set then we may
-         * override the CPSR.I masking when in non-secure state.  The SCR.IRQ
-         * setting has already been taken into consideration when setting the
-         * target EL, so it does not have a further affect here.
-         */
-        hcr = (env->cp15.hcr_el2 & HCR_IMO);
-        scr = false;
         pstate_unmasked = !(env->daif & PSTATE_I);
         break;
 
@@ -1589,13 +1567,58 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
      * interrupt.
      */
     if ((target_el > cur_el) && (target_el != 1)) {
-        /* ARM_FEATURE_AARCH64 enabled means the highest EL is AArch64.
-         * This code currently assumes that EL2 is not implemented
-         * (and so that highest EL will be 3 and the target_el also 3).
-         */
-        if (arm_feature(env, ARM_FEATURE_AARCH64) ||
-            ((scr || hcr) && (!secure))) {
-            unmasked = 1;
+        /* Exceptions targeting a higher EL may not be maskable */
+        if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+            /* 64-bit masking rules are simple: exceptions to EL3
+             * can't be masked, and exceptions to EL2 can only be
+             * masked from Secure state. The HCR and SCR settings
+             * don't affect the masking logic, only the interrupt routing.
+             */
+            if (target_el == 3 || !secure) {
+                unmasked = 1;
+            }
+        } else {
+            /* The old 32-bit-only environment has a more complicated
+             * masking setup. HCR and SCR bits not only affect interrupt
+             * routing but also change the behaviour of masking.
+             */
+            bool hcr, scr;
+
+            switch (excp_idx) {
+            case EXCP_FIQ:
+                /* If FIQs are routed to EL3 or EL2 then there are cases where
+                 * we override the CPSR.F in determining if the exception is
+                 * masked or not. If neither of these are set then we fall back
+                 * to the CPSR.F setting otherwise we further assess the state
+                 * below.
+                 */
+                hcr = (env->cp15.hcr_el2 & HCR_FMO);
+                scr = (env->cp15.scr_el3 & SCR_FIQ);
+
+                /* When EL3 is 32-bit, the SCR.FW bit controls whether the
+                 * CPSR.F bit masks FIQ interrupts when taken in non-secure
+                 * state. If SCR.FW is set then FIQs can be masked by CPSR.F
+                 * when non-secure but only when FIQs are only routed to EL3.
+                 */
+                scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr);
+                break;
+            case EXCP_IRQ:
+                /* When EL3 execution state is 32-bit, if HCR.IMO is set then
+                 * we may override the CPSR.I masking when in non-secure state.
+                 * The SCR.IRQ setting has already been taken into consideration
+                 * when setting the target EL, so it does not have a further
+                 * affect here.
+                 */
+                hcr = (env->cp15.hcr_el2 & HCR_IMO);
+                scr = false;
+                break;
+            default:
+                g_assert_not_reached();
+            }
+
+            if ((scr || hcr) && !secure) {
+                unmasked = 1;
+            }
         }
     }
 
diff --git a/target-arm/helper.c b/target-arm/helper.c
index e7fda37466..1966f9c045 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -15,10 +15,17 @@
 #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
 
 #ifndef CONFIG_USER_ONLY
-static inline bool get_phys_addr(CPUARMState *env, target_ulong address,
-                                 int access_type, ARMMMUIdx mmu_idx,
-                                 hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
-                                 target_ulong *page_size, uint32_t *fsr);
+static bool get_phys_addr(CPUARMState *env, target_ulong address,
+                          int access_type, ARMMMUIdx mmu_idx,
+                          hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
+                          target_ulong *page_size, uint32_t *fsr,
+                          ARMMMUFaultInfo *fi);
+
+static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
+                               int access_type, ARMMMUIdx mmu_idx,
+                               hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
+                               target_ulong *page_size_ptr, uint32_t *fsr,
+                               ARMMMUFaultInfo *fi);
 
 /* Definitions for the PMCCNTR and PMCR registers */
 #define PMCRD   0x8
@@ -1778,9 +1785,10 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
     bool ret;
     uint64_t par64;
     MemTxAttrs attrs = {};
+    ARMMMUFaultInfo fi = {};
 
     ret = get_phys_addr(env, value, access_type, mmu_idx,
-                        &phys_addr, &attrs, &prot, &page_size, &fsr);
+                        &phys_addr, &attrs, &prot, &page_size, &fsr, &fi);
     if (extended_addresses_enabled(env)) {
         /* fsr is a DFSR/IFSR value for the long descriptor
          * translation table format, but with WnR always clear.
@@ -3230,6 +3238,10 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
     { .name = "MDCR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "HPFAR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
     REGINFO_SENTINEL
 };
 
@@ -3288,6 +3300,22 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
       .type = ARM_CP_ALIAS,
       .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[6]) },
+    { .name = "SPSR_IRQ", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 0,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[4]) },
+    { .name = "SPSR_ABT", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 1,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[2]) },
+    { .name = "SPSR_UND", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 2,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[3]) },
+    { .name = "SPSR_FIQ", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 3,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[5]) },
     { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .writefn = vbar_write,
@@ -3460,6 +3488,14 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1,
       .access = PL2_RW, .resetvalue = 0,
       .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el2), },
+    { .name = "HPFAR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .fieldoffset = offsetof(CPUARMState, cp15.hpfar_el2) },
+    { .name = "HPFAR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
+      .access = PL2_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.hpfar_el2) },
     REGINFO_SENTINEL
 };
 
@@ -6051,6 +6087,28 @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
     return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
 }
 
+/* Translate S2 section/page access permissions to protection flags
+ *
+ * @env:     CPUARMState
+ * @s2ap:    The 2-bit stage2 access permissions (S2AP)
+ * @xn:      XN (execute-never) bit
+ */
+static int get_S2prot(CPUARMState *env, int s2ap, int xn)
+{
+    int prot = 0;
+
+    if (s2ap & 1) {
+        prot |= PAGE_READ;
+    }
+    if (s2ap & 2) {
+        prot |= PAGE_WRITE;
+    }
+    if (!xn) {
+        prot |= PAGE_EXEC;
+    }
+    return prot;
+}
+
 /* Translate section/page access permissions to protection flags
  *
  * @env:     CPUARMState
@@ -6155,6 +6213,32 @@ static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
     return true;
 }
 
+/* Translate a S1 pagetable walk through S2 if needed.  */
+static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
+                               hwaddr addr, MemTxAttrs txattrs,
+                               uint32_t *fsr,
+                               ARMMMUFaultInfo *fi)
+{
+    if ((mmu_idx == ARMMMUIdx_S1NSE0 || mmu_idx == ARMMMUIdx_S1NSE1) &&
+        !regime_translation_disabled(env, ARMMMUIdx_S2NS)) {
+        target_ulong s2size;
+        hwaddr s2pa;
+        int s2prot;
+        int ret;
+
+        ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_S2NS, &s2pa,
+                                 &txattrs, &s2prot, &s2size, fsr, fi);
+        if (ret) {
+            fi->s2addr = addr;
+            fi->stage2 = true;
+            fi->s1ptw = true;
+            return ~0;
+        }
+        addr = s2pa;
+    }
+    return addr;
+}
+
 /* All loads done in the course of a page table walk go through here.
  * TODO: rather than ignoring errors from physical memory reads (which
  * are external aborts in ARM terminology) we should propagate this
@@ -6162,26 +6246,43 @@ static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
  * was being done for a CPU load/store or an address translation instruction
  * (but not if it was for a debug access).
  */
-static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure)
+static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
+                            ARMMMUIdx mmu_idx, uint32_t *fsr,
+                            ARMMMUFaultInfo *fi)
 {
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
     MemTxAttrs attrs = {};
 
     attrs.secure = is_secure;
+    addr = S1_ptw_translate(env, mmu_idx, addr, attrs, fsr, fi);
+    if (fi->s1ptw) {
+        return 0;
+    }
     return address_space_ldl(cs->as, addr, attrs, NULL);
 }
 
-static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure)
+static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
+                            ARMMMUIdx mmu_idx, uint32_t *fsr,
+                            ARMMMUFaultInfo *fi)
 {
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
     MemTxAttrs attrs = {};
 
     attrs.secure = is_secure;
+    addr = S1_ptw_translate(env, mmu_idx, addr, attrs, fsr, fi);
+    if (fi->s1ptw) {
+        return 0;
+    }
     return address_space_ldq(cs->as, addr, attrs, NULL);
 }
 
 static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
                              int access_type, ARMMMUIdx mmu_idx,
                              hwaddr *phys_ptr, int *prot,
-                             target_ulong *page_size, uint32_t *fsr)
+                             target_ulong *page_size, uint32_t *fsr,
+                             ARMMMUFaultInfo *fi)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
     int code;
@@ -6201,7 +6302,8 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
         code = 5;
         goto do_fault;
     }
-    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                       mmu_idx, fsr, fi);
     type = (desc & 3);
     domain = (desc >> 5) & 0x0f;
     if (regime_el(env, mmu_idx) == 1) {
@@ -6237,7 +6339,8 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
             /* Fine pagetable.  */
             table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
         }
-        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                           mmu_idx, fsr, fi);
         switch (desc & 3) {
         case 0: /* Page translation fault.  */
             code = 7;
@@ -6294,7 +6397,8 @@ do_fault:
 static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
                              int access_type, ARMMMUIdx mmu_idx,
                              hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
-                             target_ulong *page_size, uint32_t *fsr)
+                             target_ulong *page_size, uint32_t *fsr,
+                             ARMMMUFaultInfo *fi)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
     int code;
@@ -6317,7 +6421,8 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
         code = 5;
         goto do_fault;
     }
-    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                       mmu_idx, fsr, fi);
     type = (desc & 3);
     if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) {
         /* Section translation fault, or attempt to use the encoding
@@ -6368,7 +6473,8 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
         ns = extract32(desc, 3, 1);
         /* Lookup l2 entry.  */
         table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
-        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                           mmu_idx, fsr, fi);
         ap = ((desc >> 4) & 3) | ((desc >> 7) & 4);
         switch (desc & 3) {
         case 0: /* Page translation fault.  */
@@ -6442,17 +6548,78 @@ typedef enum {
     permission_fault = 3,
 } MMUFaultType;
 
+/*
+ * check_s2_startlevel
+ * @cpu:        ARMCPU
+ * @is_aa64:    True if the translation regime is in AArch64 state
+ * @startlevel: Suggested starting level
+ * @inputsize:  Bitsize of IPAs
+ * @stride:     Page-table stride (See the ARM ARM)
+ *
+ * Returns true if the suggested starting level is OK and false otherwise.
+ */
+static bool check_s2_startlevel(ARMCPU *cpu, bool is_aa64, int level,
+                                int inputsize, int stride)
+{
+    /* Negative levels are never allowed.  */
+    if (level < 0) {
+        return false;
+    }
+
+    if (is_aa64) {
+        unsigned int pamax = arm_pamax(cpu);
+
+        switch (stride) {
+        case 13: /* 64KB Pages.  */
+            if (level == 0 || (level == 1 && pamax <= 42)) {
+                return false;
+            }
+            break;
+        case 11: /* 16KB Pages.  */
+            if (level == 0 || (level == 1 && pamax <= 40)) {
+                return false;
+            }
+            break;
+        case 9: /* 4KB Pages.  */
+            if (level == 0 && pamax <= 42) {
+                return false;
+            }
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    } else {
+        const int grainsize = stride + 3;
+        int startsizecheck;
+
+        /* AArch32 only supports 4KB pages. Assert on that.  */
+        assert(stride == 9);
+
+        if (level == 0) {
+            return false;
+        }
+
+        startsizecheck = inputsize - ((3 - level) * stride + grainsize);
+        if (startsizecheck < 1 || startsizecheck > stride + 4) {
+            return false;
+        }
+    }
+    return true;
+}
+
 static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
                                int access_type, ARMMMUIdx mmu_idx,
                                hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
-                               target_ulong *page_size_ptr, uint32_t *fsr)
+                               target_ulong *page_size_ptr, uint32_t *fsr,
+                               ARMMMUFaultInfo *fi)
 {
-    CPUState *cs = CPU(arm_env_get_cpu(env));
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
     /* Read an LPAE long-descriptor translation table. */
     MMUFaultType fault_type = translation_fault;
     uint32_t level = 1;
     uint32_t epd = 0;
-    int32_t tsz;
+    int32_t t0sz, t1sz;
     uint32_t tg;
     uint64_t ttbr;
     int ttbr_select;
@@ -6460,8 +6627,9 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     uint32_t tableattrs;
     target_ulong page_size;
     uint32_t attrs;
-    int32_t granule_sz = 9;
+    int32_t stride = 9;
     int32_t va_size = 32;
+    int inputsize;
     int32_t tbi = 0;
     TCR *tcr = regime_tcr(env, mmu_idx);
     int ap, ns, xn, pxn;
@@ -6507,12 +6675,28 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
      * This is a Non-secure PL0/1 stage 1 translation, so controlled by
      * TTBCR/TTBR0/TTBR1 in accordance with ARM ARM DDI0406C table B-32:
      */
-    uint32_t t0sz = extract32(tcr->raw_tcr, 0, 6);
     if (va_size == 64) {
+        /* AArch64 translation.  */
+        t0sz = extract32(tcr->raw_tcr, 0, 6);
         t0sz = MIN(t0sz, 39);
         t0sz = MAX(t0sz, 16);
+    } else if (mmu_idx != ARMMMUIdx_S2NS) {
+        /* AArch32 stage 1 translation.  */
+        t0sz = extract32(tcr->raw_tcr, 0, 3);
+    } else {
+        /* AArch32 stage 2 translation.  */
+        bool sext = extract32(tcr->raw_tcr, 4, 1);
+        bool sign = extract32(tcr->raw_tcr, 3, 1);
+        t0sz = sextract32(tcr->raw_tcr, 0, 4);
+
+        /* If the sign-extend bit is not the same as t0sz[3], the result
+         * is unpredictable. Flag this as a guest error.  */
+        if (sign != sext) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "AArch32: VTCR.S / VTCR.T0SZ[3] missmatch\n");
+        }
     }
-    uint32_t t1sz = extract32(tcr->raw_tcr, 16, 6);
+    t1sz = extract32(tcr->raw_tcr, 16, 6);
     if (va_size == 64) {
         t1sz = MIN(t1sz, 39);
         t1sz = MAX(t1sz, 16);
@@ -6548,14 +6732,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         if (el < 2) {
             epd = extract32(tcr->raw_tcr, 7, 1);
         }
-        tsz = t0sz;
+        inputsize = va_size - t0sz;
 
         tg = extract32(tcr->raw_tcr, 14, 2);
         if (tg == 1) { /* 64KB pages */
-            granule_sz = 13;
+            stride = 13;
         }
         if (tg == 2) { /* 16KB pages */
-            granule_sz = 11;
+            stride = 11;
         }
     } else {
         /* We should only be here if TTBR1 is valid */
@@ -6563,19 +6747,19 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
 
         ttbr = regime_ttbr(env, mmu_idx, 1);
         epd = extract32(tcr->raw_tcr, 23, 1);
-        tsz = t1sz;
+        inputsize = va_size - t1sz;
 
         tg = extract32(tcr->raw_tcr, 30, 2);
         if (tg == 3)  { /* 64KB pages */
-            granule_sz = 13;
+            stride = 13;
         }
         if (tg == 1) { /* 16KB pages */
-            granule_sz = 11;
+            stride = 11;
         }
     }
 
     /* Here we should have set up all the parameters for the translation:
-     * va_size, ttbr, epd, tsz, granule_sz, tbi
+     * va_size, inputsize, ttbr, epd, stride, tbi
      */
 
     if (epd) {
@@ -6585,32 +6769,60 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         goto do_fault;
     }
 
-    /* The starting level depends on the virtual address size (which can be
-     * up to 48 bits) and the translation granule size. It indicates the number
-     * of strides (granule_sz bits at a time) needed to consume the bits
-     * of the input address. In the pseudocode this is:
-     *  level = 4 - RoundUp((inputsize - grainsize) / stride)
-     * where their 'inputsize' is our 'va_size - tsz', 'grainsize' is
-     * our 'granule_sz + 3' and 'stride' is our 'granule_sz'.
-     * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying:
-     *     = 4 - (va_size - tsz - granule_sz - 3 + granule_sz - 1) / granule_sz
-     *     = 4 - (va_size - tsz - 4) / granule_sz;
-     */
-    level = 4 - (va_size - tsz - 4) / granule_sz;
+    if (mmu_idx != ARMMMUIdx_S2NS) {
+        /* The starting level depends on the virtual address size (which can
+         * be up to 48 bits) and the translation granule size. It indicates
+         * the number of strides (stride bits at a time) needed to
+         * consume the bits of the input address. In the pseudocode this is:
+         *  level = 4 - RoundUp((inputsize - grainsize) / stride)
+         * where their 'inputsize' is our 'inputsize', 'grainsize' is
+         * our 'stride + 3' and 'stride' is our 'stride'.
+         * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying:
+         * = 4 - (inputsize - stride - 3 + stride - 1) / stride
+         * = 4 - (inputsize - 4) / stride;
+         */
+        level = 4 - (inputsize - 4) / stride;
+    } else {
+        /* For stage 2 translations the starting level is specified by the
+         * VTCR_EL2.SL0 field (whose interpretation depends on the page size)
+         */
+        int startlevel = extract32(tcr->raw_tcr, 6, 2);
+        bool ok;
+
+        if (va_size == 32 || stride == 9) {
+            /* AArch32 or 4KB pages */
+            level = 2 - startlevel;
+        } else {
+            /* 16KB or 64KB pages */
+            level = 3 - startlevel;
+        }
+
+        /* Check that the starting level is valid. */
+        ok = check_s2_startlevel(cpu, va_size == 64, level,
+                                 inputsize, stride);
+        if (!ok) {
+            /* AArch64 reports these as level 0 faults.
+             * AArch32 reports these as level 1 faults.
+             */
+            level = va_size == 64 ? 0 : 1;
+            fault_type = translation_fault;
+            goto do_fault;
+        }
+    }
 
     /* Clear the vaddr bits which aren't part of the within-region address,
      * so that we don't have to special case things when calculating the
      * first descriptor address.
      */
-    if (tsz) {
-        address &= (1ULL << (va_size - tsz)) - 1;
+    if (va_size != inputsize) {
+        address &= (1ULL << inputsize) - 1;
     }
 
-    descmask = (1ULL << (granule_sz + 3)) - 1;
+    descmask = (1ULL << (stride + 3)) - 1;
 
     /* Now we can extract the actual base address from the TTBR */
     descaddr = extract64(ttbr, 0, 48);
-    descaddr &= ~((1ULL << (va_size - tsz - (granule_sz * (4 - level)))) - 1);
+    descaddr &= ~((1ULL << (inputsize - (stride * (4 - level)))) - 1);
 
     /* Secure accesses start with the page table in secure memory and
      * can be downgraded to non-secure at any step. Non-secure accesses
@@ -6622,10 +6834,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         uint64_t descriptor;
         bool nstable;
 
-        descaddr |= (address >> (granule_sz * (4 - level))) & descmask;
+        descaddr |= (address >> (stride * (4 - level))) & descmask;
         descaddr &= ~7ULL;
         nstable = extract32(tableattrs, 4, 1);
-        descriptor = arm_ldq_ptw(cs, descaddr, !nstable);
+        descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fsr, fi);
+        if (fi->s1ptw) {
+            goto do_fault;
+        }
+
         if (!(descriptor & 1) ||
             (!(descriptor & 2) && (level == 3))) {
             /* Invalid, or the Reserved level 3 encoding */
@@ -6647,11 +6863,17 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
          * These are basically the same thing, although the number
          * of bits we pull in from the vaddr varies.
          */
-        page_size = (1ULL << ((granule_sz * (4 - level)) + 3));
+        page_size = (1ULL << ((stride * (4 - level)) + 3));
         descaddr |= (address & (page_size - 1));
-        /* Extract attributes from the descriptor and merge with table attrs */
+        /* Extract attributes from the descriptor */
         attrs = extract64(descriptor, 2, 10)
             | (extract64(descriptor, 52, 12) << 10);
+
+        if (mmu_idx == ARMMMUIdx_S2NS) {
+            /* Stage 2 table descriptors do not include any attribute fields */
+            break;
+        }
+        /* Merge in attributes from table descriptors */
         attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
         attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */
         /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
@@ -6673,11 +6895,16 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     }
 
     ap = extract32(attrs, 4, 2);
-    ns = extract32(attrs, 3, 1);
     xn = extract32(attrs, 12, 1);
-    pxn = extract32(attrs, 11, 1);
 
-    *prot = get_S1prot(env, mmu_idx, va_size == 64, ap, ns, xn, pxn);
+    if (mmu_idx == ARMMMUIdx_S2NS) {
+        ns = true;
+        *prot = get_S2prot(env, ap, xn);
+    } else {
+        ns = extract32(attrs, 3, 1);
+        pxn = extract32(attrs, 11, 1);
+        *prot = get_S1prot(env, mmu_idx, va_size == 64, ap, ns, xn, pxn);
+    }
 
     fault_type = permission_fault;
     if (!(*prot & (1 << access_type))) {
@@ -6698,6 +6925,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
 do_fault:
     /* Long-descriptor format IFSR/DFSR value */
     *fsr = (1 << 9) | (fault_type << 2) | level;
+    /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2.  */
+    fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_S2NS);
     return true;
 }
 
@@ -6960,20 +7189,45 @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
  * @page_size: set to the size of the page containing phys_ptr
  * @fsr: set to the DFSR/IFSR value on failure
  */
-static inline bool get_phys_addr(CPUARMState *env, target_ulong address,
-                                 int access_type, ARMMMUIdx mmu_idx,
-                                 hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
-                                 target_ulong *page_size, uint32_t *fsr)
+static bool get_phys_addr(CPUARMState *env, target_ulong address,
+                          int access_type, ARMMMUIdx mmu_idx,
+                          hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
+                          target_ulong *page_size, uint32_t *fsr,
+                          ARMMMUFaultInfo *fi)
 {
     if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
-        /* TODO: when we support EL2 we should here call ourselves recursively
-         * to do the stage 1 and then stage 2 translations. The arm_ld*_ptw
-         * functions will also need changing to perform ARMMMUIdx_S2NS loads
-         * rather than direct physical memory loads when appropriate.
-         * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
+        /* Call ourselves recursively to do the stage 1 and then stage 2
+         * translations.
          */
-        assert(!arm_feature(env, ARM_FEATURE_EL2));
-        mmu_idx += ARMMMUIdx_S1NSE0;
+        if (arm_feature(env, ARM_FEATURE_EL2)) {
+            hwaddr ipa;
+            int s2_prot;
+            int ret;
+
+            ret = get_phys_addr(env, address, access_type,
+                                mmu_idx + ARMMMUIdx_S1NSE0, &ipa, attrs,
+                                prot, page_size, fsr, fi);
+
+            /* If S1 fails or S2 is disabled, return early.  */
+            if (ret || regime_translation_disabled(env, ARMMMUIdx_S2NS)) {
+                *phys_ptr = ipa;
+                return ret;
+            }
+
+            /* S1 is done. Now do S2 translation.  */
+            ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_S2NS,
+                                     phys_ptr, attrs, &s2_prot,
+                                     page_size, fsr, fi);
+            fi->s2addr = ipa;
+            /* Combine the S1 and S2 perms.  */
+            *prot &= s2_prot;
+            return ret;
+        } else {
+            /*
+             * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
+             */
+            mmu_idx += ARMMMUIdx_S1NSE0;
+        }
     }
 
     /* The page table entries may downgrade secure to non-secure, but
@@ -7022,13 +7276,13 @@ static inline bool get_phys_addr(CPUARMState *env, target_ulong address,
 
     if (regime_using_lpae_format(env, mmu_idx)) {
         return get_phys_addr_lpae(env, address, access_type, mmu_idx, phys_ptr,
-                                  attrs, prot, page_size, fsr);
+                                  attrs, prot, page_size, fsr, fi);
     } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {
         return get_phys_addr_v6(env, address, access_type, mmu_idx, phys_ptr,
-                                attrs, prot, page_size, fsr);
+                                attrs, prot, page_size, fsr, fi);
     } else {
         return get_phys_addr_v5(env, address, access_type, mmu_idx, phys_ptr,
-                                prot, page_size, fsr);
+                                prot, page_size, fsr, fi);
     }
 }
 
@@ -7037,7 +7291,8 @@ static inline bool get_phys_addr(CPUARMState *env, target_ulong address,
  * fsr with ARM DFSR/IFSR fault register format value on failure.
  */
 bool arm_tlb_fill(CPUState *cs, vaddr address,
-                  int access_type, int mmu_idx, uint32_t *fsr)
+                  int access_type, int mmu_idx, uint32_t *fsr,
+                  ARMMMUFaultInfo *fi)
 {
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
@@ -7048,7 +7303,7 @@ bool arm_tlb_fill(CPUState *cs, vaddr address,
     MemTxAttrs attrs = {};
 
     ret = get_phys_addr(env, address, access_type, mmu_idx, &phys_addr,
-                        &attrs, &prot, &page_size, fsr);
+                        &attrs, &prot, &page_size, fsr, fi);
     if (!ret) {
         /* Map a single [sub]page.  */
         phys_addr &= TARGET_PAGE_MASK;
@@ -7071,9 +7326,10 @@ hwaddr arm_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     bool ret;
     uint32_t fsr;
     MemTxAttrs attrs = {};
+    ARMMMUFaultInfo fi = {};
 
     ret = get_phys_addr(env, addr, 0, cpu_mmu_index(env, false), &phys_addr,
-                        &attrs, &prot, &page_size, &fsr);
+                        &attrs, &prot, &page_size, &fsr, &fi);
 
     if (ret) {
         return -1;
diff --git a/target-arm/internals.h b/target-arm/internals.h
index 36a56aadb0..412827bcbf 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -152,6 +152,31 @@ static inline void update_spsel(CPUARMState *env, uint32_t imm)
     aarch64_restore_sp(env, cur_el);
 }
 
+/*
+ * arm_pamax
+ * @cpu: ARMCPU
+ *
+ * Returns the implementation defined bit-width of physical addresses.
+ * The ARMv8 reference manuals refer to this as PAMax().
+ */
+static inline unsigned int arm_pamax(ARMCPU *cpu)
+{
+    static const unsigned int pamax_map[] = {
+        [0] = 32,
+        [1] = 36,
+        [2] = 40,
+        [3] = 42,
+        [4] = 44,
+        [5] = 48,
+    };
+    unsigned int parange = extract32(cpu->id_aa64mmfr0, 0, 4);
+
+    /* id_aa64mmfr0 is a read-only register so values outside of the
+     * supported mappings can be considered an implementation error.  */
+    assert(parange < ARRAY_SIZE(pamax_map));
+    return pamax_map[parange];
+}
+
 /* Return true if extended addresses are enabled.
  * This is always the case if our translation regime is 64 bit,
  * but depends on TTBCR.EAE for 32 bit.
@@ -389,8 +414,21 @@ bool arm_is_psci_call(ARMCPU *cpu, int excp_type);
 void arm_handle_psci_call(ARMCPU *cpu);
 #endif
 
+/**
+ * ARMMMUFaultInfo: Information describing an ARM MMU Fault
+ * @s2addr: Address that caused a fault at stage 2
+ * @stage2: True if we faulted at stage 2
+ * @s1ptw: True if we faulted at stage 2 while doing a stage 1 page-table walk
+ */
+typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
+struct ARMMMUFaultInfo {
+    target_ulong s2addr;
+    bool stage2;
+    bool s1ptw;
+};
+
 /* Do a page table walk and add page to TLB if possible */
 bool arm_tlb_fill(CPUState *cpu, vaddr address, int rw, int mmu_idx,
-                  uint32_t *fsr);
+                  uint32_t *fsr, ARMMMUFaultInfo *fi);
 
 #endif
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 7929c71b43..a4c4ebf9cd 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -83,19 +83,27 @@ void tlb_fill(CPUState *cs, target_ulong addr, int is_write, int mmu_idx,
 {
     bool ret;
     uint32_t fsr = 0;
+    ARMMMUFaultInfo fi = {};
 
-    ret = arm_tlb_fill(cs, addr, is_write, mmu_idx, &fsr);
+    ret = arm_tlb_fill(cs, addr, is_write, mmu_idx, &fsr, &fi);
     if (unlikely(ret)) {
         ARMCPU *cpu = ARM_CPU(cs);
         CPUARMState *env = &cpu->env;
         uint32_t syn, exc;
-        bool same_el = (arm_current_el(env) != 0);
+        unsigned int target_el;
+        bool same_el;
 
         if (retaddr) {
             /* now we have a real cpu fault */
             cpu_restore_state(cs, retaddr);
         }
 
+        target_el = exception_target_el(env);
+        if (fi.stage2) {
+            target_el = 2;
+            env->cp15.hpfar_el2 = extract64(fi.s2addr, 12, 47) << 4;
+        }
+        same_el = arm_current_el(env) == target_el;
         /* AArch64 syndrome does not have an LPAE bit */
         syn = fsr & ~(1 << 9);
 
@@ -103,10 +111,10 @@ void tlb_fill(CPUState *cs, target_ulong addr, int is_write, int mmu_idx,
          * information; this is always true for exceptions reported to EL1.
          */
         if (is_write == 2) {
-            syn = syn_insn_abort(same_el, 0, 0, syn);
+            syn = syn_insn_abort(same_el, 0, fi.s1ptw, syn);
             exc = EXCP_PREFETCH_ABORT;
         } else {
-            syn = syn_data_abort(same_el, 0, 0, 0, is_write == 1, syn);
+            syn = syn_data_abort(same_el, 0, 0, fi.s1ptw, is_write == 1, syn);
             if (is_write == 1 && arm_feature(env, ARM_FEATURE_V6)) {
                 fsr |= (1 << 11);
             }
@@ -115,7 +123,7 @@ void tlb_fill(CPUState *cs, target_ulong addr, int is_write, int mmu_idx,
 
         env->exception.vaddress = addr;
         env->exception.fsr = fsr;
-        raise_exception(env, exc, syn, exception_target_el(env));
+        raise_exception(env, exc, syn, target_el);
     }
 }
 #endif
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 19f9d8d2c8..83b8376210 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -11096,8 +11096,11 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
                         dc->is_jmp = DISAS_UPDATE;
                     } else {
                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
-                        /* Advance PC so that clearing the breakpoint will
-                           invalidate this TB.  */
+                        /* The address covered by the breakpoint must be
+                           included in [tb->pc, tb->pc + tb->size) in order
+                           to for it to be properly cleared -- thus we
+                           increment the PC here so that the logic setting
+                           tb->size below does the right thing.  */
                         dc->pc += 4;
                         goto done_generating;
                     }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 9f1d740b4e..b10a455e70 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -11179,6 +11179,35 @@ undef:
                        default_exception_el(s));
 }
 
+static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
+{
+    /* Return true if the insn at dc->pc might cross a page boundary.
+     * (False positives are OK, false negatives are not.)
+     */
+    uint16_t insn;
+
+    if ((s->pc & 3) == 0) {
+        /* At a 4-aligned address we can't be crossing a page */
+        return false;
+    }
+
+    /* This must be a Thumb insn */
+    insn = arm_lduw_code(env, s->pc, s->bswap_code);
+
+    if ((insn >> 11) >= 0x1d) {
+        /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
+         * First half of a 32-bit Thumb insn. Thumb-1 cores might
+         * end up actually treating this as two 16-bit insns (see the
+         * code at the start of disas_thumb2_insn()) but we don't bother
+         * to check for that as it is unlikely, and false positives here
+         * are harmless.
+         */
+        return true;
+    }
+    /* Definitely a 16-bit insn, can't be crossing a page. */
+    return false;
+}
+
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
    basic block 'tb'.  */
 void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
@@ -11190,6 +11219,7 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
     target_ulong next_page_start;
     int num_insns;
     int max_insns;
+    bool end_of_page;
 
     /* generate intermediate code */
 
@@ -11348,8 +11378,11 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
                         dc->is_jmp = DISAS_UPDATE;
                     } else {
                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
-                        /* Advance PC so that clearing the breakpoint will
-                           invalidate this TB.  */
+                        /* The address covered by the breakpoint must be
+                           included in [tb->pc, tb->pc + tb->size) in order
+                           to for it to be properly cleared -- thus we
+                           increment the PC here so that the logic setting
+                           tb->size below does the right thing.  */
                         /* TODO: Advance PC by correct instruction length to
                          * avoid disassembler error messages */
                         dc->pc += 2;
@@ -11411,11 +11444,24 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
          * Otherwise the subsequent code could get translated several times.
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
+
+        /* We want to stop the TB if the next insn starts in a new page,
+         * or if it spans between this page and the next. This means that
+         * if we're looking at the last halfword in the page we need to
+         * see if it's a 16-bit Thumb insn (which will fit in this TB)
+         * or a 32-bit Thumb insn (which won't).
+         * This is to avoid generating a silly TB with a single 16-bit insn
+         * in it at the end of this page (which would execute correctly
+         * but isn't very efficient).
+         */
+        end_of_page = (dc->pc >= next_page_start) ||
+            ((dc->pc >= next_page_start - 3) && insn_crosses_page(env, dc));
+
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
              !dc->ss_active &&
-             dc->pc < next_page_start &&
+             !end_of_page &&
              num_insns < max_insns);
 
     if (tb->cflags & CF_LAST_IO) {
diff --git a/target-cris/translate.c b/target-cris/translate.c
index 964845c461..2d710cc108 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -3166,6 +3166,11 @@ void gen_intermediate_code(CPUCRISState *env, struct TranslationBlock *tb)
             tcg_gen_movi_tl(env_pc, dc->pc);
             t_gen_raise_exception(EXCP_DEBUG);
             dc->is_jmp = DISAS_UPDATE;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 2;
             break;
         }
 
diff --git a/target-i386/bpt_helper.c b/target-i386/bpt_helper.c
index c071c24782..dac1b1a360 100644
--- a/target-i386/bpt_helper.c
+++ b/target-i386/bpt_helper.c
@@ -21,64 +21,147 @@
 #include "exec/helper-proto.h"
 
 
-void hw_breakpoint_insert(CPUX86State *env, int index)
+#ifndef CONFIG_USER_ONLY
+static inline bool hw_local_breakpoint_enabled(unsigned long dr7, int index)
+{
+    return (dr7 >> (index * 2)) & 1;
+}
+
+static inline bool hw_global_breakpoint_enabled(unsigned long dr7, int index)
+{
+    return (dr7 >> (index * 2)) & 2;
+
+}
+static inline bool hw_breakpoint_enabled(unsigned long dr7, int index)
+{
+    return hw_global_breakpoint_enabled(dr7, index) ||
+           hw_local_breakpoint_enabled(dr7, index);
+}
+
+static inline int hw_breakpoint_type(unsigned long dr7, int index)
+{
+    return (dr7 >> (DR7_TYPE_SHIFT + (index * 4))) & 3;
+}
+
+static inline int hw_breakpoint_len(unsigned long dr7, int index)
+{
+    int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 4))) & 3);
+    return (len == 2) ? 8 : len + 1;
+}
+
+static int hw_breakpoint_insert(CPUX86State *env, int index)
 {
     CPUState *cs = CPU(x86_env_get_cpu(env));
-    int type = 0, err = 0;
+    target_ulong dr7 = env->dr[7];
+    target_ulong drN = env->dr[index];
+    int err = 0;
 
-    switch (hw_breakpoint_type(env->dr[7], index)) {
+    switch (hw_breakpoint_type(dr7, index)) {
     case DR7_TYPE_BP_INST:
-        if (hw_breakpoint_enabled(env->dr[7], index)) {
-            err = cpu_breakpoint_insert(cs, env->dr[index], BP_CPU,
+        if (hw_breakpoint_enabled(dr7, index)) {
+            err = cpu_breakpoint_insert(cs, drN, BP_CPU,
                                         &env->cpu_breakpoint[index]);
         }
         break;
-    case DR7_TYPE_DATA_WR:
-        type = BP_CPU | BP_MEM_WRITE;
-        break;
+
     case DR7_TYPE_IO_RW:
-        /* No support for I/O watchpoints yet */
+        /* Notice when we should enable calls to bpt_io.  */
+        return hw_breakpoint_enabled(env->dr[7], index)
+               ? HF_IOBPT_MASK : 0;
+
+    case DR7_TYPE_DATA_WR:
+        if (hw_breakpoint_enabled(dr7, index)) {
+            err = cpu_watchpoint_insert(cs, drN,
+                                        hw_breakpoint_len(dr7, index),
+                                        BP_CPU | BP_MEM_WRITE,
+                                        &env->cpu_watchpoint[index]);
+        }
         break;
+
     case DR7_TYPE_DATA_RW:
-        type = BP_CPU | BP_MEM_ACCESS;
+        if (hw_breakpoint_enabled(dr7, index)) {
+            err = cpu_watchpoint_insert(cs, drN,
+                                        hw_breakpoint_len(dr7, index),
+                                        BP_CPU | BP_MEM_ACCESS,
+                                        &env->cpu_watchpoint[index]);
+        }
         break;
     }
-
-    if (type != 0) {
-        err = cpu_watchpoint_insert(cs, env->dr[index],
-                                    hw_breakpoint_len(env->dr[7], index),
-                                    type, &env->cpu_watchpoint[index]);
-    }
-
     if (err) {
         env->cpu_breakpoint[index] = NULL;
     }
+    return 0;
 }
 
-void hw_breakpoint_remove(CPUX86State *env, int index)
+static void hw_breakpoint_remove(CPUX86State *env, int index)
 {
-    CPUState *cs;
+    CPUState *cs = CPU(x86_env_get_cpu(env));
 
-    if (!env->cpu_breakpoint[index]) {
-        return;
-    }
-    cs = CPU(x86_env_get_cpu(env));
     switch (hw_breakpoint_type(env->dr[7], index)) {
     case DR7_TYPE_BP_INST:
-        if (hw_breakpoint_enabled(env->dr[7], index)) {
+        if (env->cpu_breakpoint[index]) {
             cpu_breakpoint_remove_by_ref(cs, env->cpu_breakpoint[index]);
+            env->cpu_breakpoint[index] = NULL;
         }
         break;
+
     case DR7_TYPE_DATA_WR:
     case DR7_TYPE_DATA_RW:
-        cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]);
+        if (env->cpu_breakpoint[index]) {
+            cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]);
+            env->cpu_breakpoint[index] = NULL;
+        }
         break;
+
     case DR7_TYPE_IO_RW:
-        /* No support for I/O watchpoints yet */
+        /* HF_IOBPT_MASK cleared elsewhere.  */
         break;
     }
 }
 
+void cpu_x86_update_dr7(CPUX86State *env, uint32_t new_dr7)
+{
+    target_ulong old_dr7 = env->dr[7];
+    int iobpt = 0;
+    int i;
+
+    new_dr7 |= DR7_FIXED_1;
+
+    /* If nothing is changing except the global/local enable bits,
+       then we can make the change more efficient.  */
+    if (((old_dr7 ^ new_dr7) & ~0xff) == 0) {
+        /* Fold the global and local enable bits together into the
+           global fields, then xor to show which registers have
+           changed collective enable state.  */
+        int mod = ((old_dr7 | old_dr7 * 2) ^ (new_dr7 | new_dr7 * 2)) & 0xff;
+
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            if ((mod & (2 << i * 2)) && !hw_breakpoint_enabled(new_dr7, i)) {
+                hw_breakpoint_remove(env, i);
+            }
+        }
+        env->dr[7] = new_dr7;
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            if (mod & (2 << i * 2) && hw_breakpoint_enabled(new_dr7, i)) {
+                iobpt |= hw_breakpoint_insert(env, i);
+            } else if (hw_breakpoint_type(new_dr7, i) == DR7_TYPE_IO_RW
+                       && hw_breakpoint_enabled(new_dr7, i)) {
+                iobpt |= HF_IOBPT_MASK;
+            }
+        }
+    } else {
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            hw_breakpoint_remove(env, i);
+        }
+        env->dr[7] = new_dr7;
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            iobpt |= hw_breakpoint_insert(env, i);
+        }
+    }
+
+    env->hflags = (env->hflags & ~HF_IOBPT_MASK) | iobpt;
+}
+
 static bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update)
 {
     target_ulong dr6;
@@ -148,6 +231,7 @@ void breakpoint_handler(CPUState *cs)
         }
     }
 }
+#endif
 
 void helper_single_step(CPUX86State *env)
 {
@@ -158,25 +242,85 @@ void helper_single_step(CPUX86State *env)
     raise_exception(env, EXCP01_DB);
 }
 
-void helper_movl_drN_T0(CPUX86State *env, int reg, target_ulong t0)
+void helper_set_dr(CPUX86State *env, int reg, target_ulong t0)
 {
 #ifndef CONFIG_USER_ONLY
-    int i;
+    switch (reg) {
+    case 0: case 1: case 2: case 3:
+        if (hw_breakpoint_enabled(env->dr[7], reg)
+            && hw_breakpoint_type(env->dr[7], reg) != DR7_TYPE_IO_RW) {
+            hw_breakpoint_remove(env, reg);
+            env->dr[reg] = t0;
+            hw_breakpoint_insert(env, reg);
+        } else {
+            env->dr[reg] = t0;
+        }
+        return;
+    case 4:
+        if (env->cr[4] & CR4_DE_MASK) {
+            break;
+        }
+        /* fallthru */
+    case 6:
+        env->dr[6] = t0 | DR6_FIXED_1;
+        return;
+    case 5:
+        if (env->cr[4] & CR4_DE_MASK) {
+            break;
+        }
+        /* fallthru */
+    case 7:
+        cpu_x86_update_dr7(env, t0);
+        return;
+    }
+    raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC());
+#endif
+}
 
-    if (reg < 4) {
-        hw_breakpoint_remove(env, reg);
-        env->dr[reg] = t0;
-        hw_breakpoint_insert(env, reg);
-    } else if (reg == 7) {
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            hw_breakpoint_remove(env, i);
+target_ulong helper_get_dr(CPUX86State *env, int reg)
+{
+    switch (reg) {
+    case 0: case 1: case 2: case 3: case 6: case 7:
+        return env->dr[reg];
+    case 4:
+        if (env->cr[4] & CR4_DE_MASK) {
+            break;
+        } else {
+            return env->dr[6];
         }
-        env->dr[7] = t0;
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            hw_breakpoint_insert(env, i);
+    case 5:
+        if (env->cr[4] & CR4_DE_MASK) {
+            break;
+        } else {
+            return env->dr[7];
         }
-    } else {
-        env->dr[reg] = t0;
+    }
+    raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC());
+}
+
+/* Check if Port I/O is trapped by a breakpoint.  */
+void helper_bpt_io(CPUX86State *env, uint32_t port,
+                   uint32_t size, target_ulong next_eip)
+{
+#ifndef CONFIG_USER_ONLY
+    target_ulong dr7 = env->dr[7];
+    int i, hit = 0;
+
+    for (i = 0; i < DR7_MAX_BP; ++i) {
+        if (hw_breakpoint_type(dr7, i) == DR7_TYPE_IO_RW
+            && hw_breakpoint_enabled(dr7, i)) {
+            int bpt_len = hw_breakpoint_len(dr7, i);
+            if (port + size - 1 >= env->dr[i]
+                && port <= env->dr[i] + bpt_len - 1) {
+                hit |= 1 << i;
+            }
+        }
+    }
+
+    if (hit) {
+        env->dr[6] = (env->dr[6] & ~0xf) | hit;
+        env->eip = next_eip;
+        raise_exception(env, EXCP01_DB);
     }
 #endif
 }
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5f53af248f..9280bfc7d8 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -312,7 +312,7 @@ static const char *cpuid_6_feature_name[] = {
           CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \
           CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
           CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \
-          CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS)
+          CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE)
           /* partly implemented:
           CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */
           /* missing:
@@ -656,7 +656,6 @@ struct X86CPUDefinition {
     int stepping;
     FeatureWordArray features;
     char model_id[48];
-    bool cache_info_passthrough;
 };
 
 static X86CPUDefinition builtin_x86_defs[] = {
@@ -1420,6 +1419,7 @@ static X86CPUDefinition host_cpudef;
 
 static Property host_x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("migratable", X86CPU, migratable, true),
+    DEFINE_PROP_BOOL("host-cache-info", X86CPU, cache_info_passthrough, false),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -1446,7 +1446,6 @@ static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
     cpu_x86_fill_model_id(host_cpudef.model_id);
 
     xcc->cpu_def = &host_cpudef;
-    host_cpudef.cache_info_passthrough = true;
 
     /* level, xlevel, xlevel2, and the feature words are initialized on
      * instance_init, because they require KVM to be initialized.
@@ -1492,7 +1491,7 @@ static void report_unavailable_features(FeatureWord w, uint32_t mask)
     int i;
 
     for (i = 0; i < 32; ++i) {
-        if (1 << i & mask) {
+        if ((1UL << i) & mask) {
             const char *reg = get_register_name_32(f->cpuid_reg);
             assert(reg);
             fprintf(stderr, "warning: %s doesn't support requested feature: "
@@ -2094,7 +2093,6 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
     object_property_set_int(OBJECT(cpu), def->stepping, "stepping", errp);
     object_property_set_int(OBJECT(cpu), def->xlevel, "xlevel", errp);
     object_property_set_int(OBJECT(cpu), def->xlevel2, "xlevel2", errp);
-    cpu->cache_info_passthrough = def->cache_info_passthrough;
     object_property_set_str(OBJECT(cpu), def->model_id, "model-id", errp);
     for (w = 0; w < FEATURE_WORDS; w++) {
         env->features[w] = def->features[w];
@@ -3143,7 +3141,7 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("hv-reset", X86CPU, hyperv_reset, false),
     DEFINE_PROP_BOOL("hv-vpindex", X86CPU, hyperv_vpindex, false),
     DEFINE_PROP_BOOL("hv-runtime", X86CPU, hyperv_runtime, false),
-    DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false),
+    DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
     DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
     DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, 0),
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index a395b4b07a..62f78798b6 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -155,6 +155,7 @@
 #define HF_SVMI_SHIFT       21 /* SVM intercepts are active */
 #define HF_OSFXSR_SHIFT     22 /* CR4.OSFXSR */
 #define HF_SMAP_SHIFT       23 /* CR4.SMAP */
+#define HF_IOBPT_SHIFT      24 /* an io breakpoint enabled */
 
 #define HF_CPL_MASK          (3 << HF_CPL_SHIFT)
 #define HF_SOFTMMU_MASK      (1 << HF_SOFTMMU_SHIFT)
@@ -178,6 +179,7 @@
 #define HF_SVMI_MASK         (1 << HF_SVMI_SHIFT)
 #define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
 #define HF_SMAP_MASK         (1 << HF_SMAP_SHIFT)
+#define HF_IOBPT_MASK        (1 << HF_IOBPT_SHIFT)
 
 /* hflags2 */
 
@@ -235,6 +237,7 @@
 #define DR7_TYPE_SHIFT  16
 #define DR7_LEN_SHIFT   18
 #define DR7_FIXED_1     0x00000400
+#define DR7_GLOBAL_BP_MASK   0xaa
 #define DR7_LOCAL_BP_MASK    0x55
 #define DR7_MAX_BP           4
 #define DR7_TYPE_BP_INST     0x0
@@ -917,7 +920,7 @@ typedef struct CPUX86State {
     int error_code;
     int exception_is_int;
     target_ulong exception_next_eip;
-    target_ulong dr[8]; /* debug registers */
+    target_ulong dr[8]; /* debug registers; note dr4 and dr5 are unused */
     union {
         struct CPUBreakpoint *cpu_breakpoint[4];
         struct CPUWatchpoint *cpu_watchpoint[4];
@@ -1127,41 +1130,13 @@ void x86_stl_phys(CPUState *cs, hwaddr addr, uint32_t val);
 void x86_stq_phys(CPUState *cs, hwaddr addr, uint64_t val);
 #endif
 
-static inline bool hw_local_breakpoint_enabled(unsigned long dr7, int index)
-{
-    return (dr7 >> (index * 2)) & 1;
-}
-
-static inline bool hw_global_breakpoint_enabled(unsigned long dr7, int index)
-{
-    return (dr7 >> (index * 2)) & 2;
-
-}
-static inline bool hw_breakpoint_enabled(unsigned long dr7, int index)
-{
-    return hw_global_breakpoint_enabled(dr7, index) ||
-           hw_local_breakpoint_enabled(dr7, index);
-}
-
-static inline int hw_breakpoint_type(unsigned long dr7, int index)
-{
-    return (dr7 >> (DR7_TYPE_SHIFT + (index * 4))) & 3;
-}
-
-static inline int hw_breakpoint_len(unsigned long dr7, int index)
-{
-    int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 4))) & 3);
-    return (len == 2) ? 8 : len + 1;
-}
-
-void hw_breakpoint_insert(CPUX86State *env, int index);
-void hw_breakpoint_remove(CPUX86State *env, int index);
 void breakpoint_handler(CPUState *cs);
 
 /* will be suppressed */
 void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0);
 void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3);
 void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);
+void cpu_x86_update_dr7(CPUX86State *env, uint32_t new_dr7);
 
 /* hw/pc.c */
 uint64_t cpu_get_tsc(CPUX86State *env);
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 8454a048d2..ecfcfd1a97 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -40,7 +40,8 @@ DEF_HELPER_2(read_crN, tl, env, int)
 DEF_HELPER_3(write_crN, void, env, int, tl)
 DEF_HELPER_2(lmsw, void, env, tl)
 DEF_HELPER_1(clts, void, env)
-DEF_HELPER_3(movl_drN_T0, void, env, int, tl)
+DEF_HELPER_FLAGS_3(set_dr, TCG_CALL_NO_WG, void, env, int, tl)
+DEF_HELPER_FLAGS_2(get_dr, TCG_CALL_NO_WG, tl, env, int)
 DEF_HELPER_2(invlpg, void, env, tl)
 
 DEF_HELPER_4(enter_level, void, env, int, int, tl)
@@ -92,6 +93,7 @@ DEF_HELPER_3(outw, void, env, i32, i32)
 DEF_HELPER_2(inw, tl, env, i32)
 DEF_HELPER_3(outl, void, env, i32, i32)
 DEF_HELPER_2(inl, tl, env, i32)
+DEF_HELPER_FLAGS_4(bpt_io, TCG_CALL_NO_WG, void, env, i32, i32, tl)
 
 DEF_HELPER_3(svm_check_intercept_param, void, env, i32, i64)
 DEF_HELPER_3(vmexit, void, env, i32, i64)
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 67373663d0..a18e16e0de 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -367,8 +367,12 @@ static int cpu_post_load(void *opaque, int version_id)
 
     cpu_breakpoint_remove_all(cs, BP_CPU);
     cpu_watchpoint_remove_all(cs, BP_CPU);
-    for (i = 0; i < DR7_MAX_BP; i++) {
-        hw_breakpoint_insert(env, i);
+    {
+        /* Indicate all breakpoints disabled, as they are, then
+           let the helper re-enable them.  */
+        target_ulong dr7 = env->dr[7];
+        env->dr[7] = dr7 & ~(DR7_GLOBAL_BP_MASK | DR7_LOCAL_BP_MASK);
+        cpu_x86_update_dr7(env, dr7);
     }
     tlb_flush(cs, 1);
 
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 1cbe559366..20ee892224 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -501,13 +501,7 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector,
 #ifndef CONFIG_USER_ONLY
     /* reset local breakpoints */
     if (env->dr[7] & DR7_LOCAL_BP_MASK) {
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            if (hw_local_breakpoint_enabled(env->dr[7], i) &&
-                !hw_global_breakpoint_enabled(env->dr[7], i)) {
-                hw_breakpoint_remove(env, i);
-            }
-        }
-        env->dr[7] &= ~DR7_LOCAL_BP_MASK;
+        cpu_x86_update_dr7(env, env->dr[7] & ~DR7_LOCAL_BP_MASK);
     }
 #endif
 }
diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 02e24b9236..c272a98407 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c
@@ -266,7 +266,7 @@ void helper_rsm(CPUX86State *env)
 
     val = x86_ldl_phys(cs, sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = x86_ldl_phys(cs, sm_state + 0x7f00) & ~0x7fff;
+        env->smbase = x86_ldl_phys(cs, sm_state + 0x7f00);
     }
 #else
     cpu_x86_update_cr0(env, x86_ldl_phys(cs, sm_state + 0x7ffc));
@@ -319,7 +319,7 @@ void helper_rsm(CPUX86State *env)
 
     val = x86_ldl_phys(cs, sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = x86_ldl_phys(cs, sm_state + 0x7ef8) & ~0x7fff;
+        env->smbase = x86_ldl_phys(cs, sm_state + 0x7ef8);
     }
 #endif
     if ((env->hflags2 & HF2_SMM_INSIDE_NMI_MASK) == 0) {
diff --git a/target-i386/translate.c b/target-i386/translate.c
index ef10e685cc..b400d2470a 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1154,6 +1154,19 @@ static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
 
+static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
+{
+    if (s->flags & HF_IOBPT_MASK) {
+        TCGv_i32 t_size = tcg_const_i32(1 << ot);
+        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
+
+        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
+        tcg_temp_free_i32(t_size);
+        tcg_temp_free(t_next);
+    }
+}
+
+
 static inline void gen_ins(DisasContext *s, TCGMemOp ot)
 {
     if (s->tb->cflags & CF_USE_ICOUNT) {
@@ -1170,6 +1183,7 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
     gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_bpt_io(s, cpu_tmp2_i32, ot);
     if (s->tb->cflags & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -1187,9 +1201,9 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
     tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
     gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
+    gen_bpt_io(s, cpu_tmp2_i32, ot);
     if (s->tb->cflags & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -2418,7 +2432,7 @@ static void gen_pusha(DisasContext *s)
 {
     int i;
     gen_op_movl_A0_reg(R_ESP);
-    gen_op_addl_A0_im(-8 << s->dflag);
+    gen_op_addl_A0_im(-(8 << s->dflag));
     if (!s->ss32)
         tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
@@ -6269,6 +6283,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         tcg_gen_movi_i32(cpu_tmp2_i32, val);
         gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
         gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6289,6 +6304,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         tcg_gen_movi_i32(cpu_tmp2_i32, val);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6306,6 +6322,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
         gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6325,6 +6342,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -7609,18 +7627,20 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 ot = MO_64;
             else
                 ot = MO_32;
-            /* XXX: do it dynamically with CR4.DE bit */
-            if (reg == 4 || reg == 5 || reg >= 8)
+            if (reg >= 8) {
                 goto illegal_op;
+            }
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
                 gen_op_mov_v_reg(ot, cpu_T[0], rm);
-                gen_helper_movl_drN_T0(cpu_env, tcg_const_i32(reg), cpu_T[0]);
+                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
+                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
-                tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,dr[reg]));
+                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
+                gen_helper_get_dr(cpu_T[0], cpu_env, cpu_tmp2_i32);
                 gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             }
         }
@@ -7942,6 +7962,11 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
                                          tb->flags & HF_RF_MASK
                                          ? BP_GDB : BP_ANY))) {
             gen_debug(dc, pc_ptr - dc->cs_base);
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            pc_ptr += 1;
             goto done_generating;
         }
         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index c61ad0f9ab..fa5b0b93a3 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -1078,6 +1078,11 @@ void gen_intermediate_code(CPULM32State *env, struct TranslationBlock *tb)
             tcg_gen_movi_tl(cpu_pc, dc->pc);
             t_gen_raise_exception(dc, EXCP_DEBUG);
             dc->is_jmp = DISAS_UPDATE;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 4;
             break;
         }
 
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 5995ccea92..41ae2c6059 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -3004,6 +3004,11 @@ void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb)
         if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
             gen_exception(dc, dc->pc, EXCP_DEBUG);
             dc->is_jmp = DISAS_JUMP;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 2;
             break;
         }
 
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index a9c501099c..154b9d6836 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -1693,6 +1693,11 @@ void gen_intermediate_code(CPUMBState *env, struct TranslationBlock *tb)
         if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
             t_gen_raise_exception(dc, EXCP_DEBUG);
             dc->is_jmp = DISAS_UPDATE;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 4;
             break;
         }
 
diff --git a/target-mips/cpu.c b/target-mips/cpu.c
index 37880d20e0..639a24b362 100644
--- a/target-mips/cpu.c
+++ b/target-mips/cpu.c
@@ -53,12 +53,15 @@ static bool mips_cpu_has_work(CPUState *cs)
     CPUMIPSState *env = &cpu->env;
     bool has_work = false;
 
-    /* It is implementation dependent if non-enabled interrupts
-       wake-up the CPU, however most of the implementations only
+    /* Prior to MIPS Release 6 it is implementation dependent if non-enabled
+       interrupts wake-up the CPU, however most of the implementations only
        check for interrupts that can be taken. */
     if ((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
         cpu_mips_hw_interrupts_pending(env)) {
-        has_work = true;
+        if (cpu_mips_hw_interrupts_enabled(env) ||
+            (env->insn_flags & ISA_MIPS32R6)) {
+            has_work = true;
+        }
     }
 
     /* MIPS-MT has the ability to halt the CPU.  */
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index f32a0fd737..fa919c1a13 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -469,6 +469,7 @@ struct CPUMIPSState {
 #define CP0C5_CV         29
 #define CP0C5_EVA        28
 #define CP0C5_MSAEn      27
+#define CP0C5_XNP        13
 #define CP0C5_UFE        9
 #define CP0C5_FRE        8
 #define CP0C5_SBRI       6
@@ -637,23 +638,24 @@ static inline int cpu_mmu_index (CPUMIPSState *env, bool ifetch)
     return env->hflags & MIPS_HFLAG_KSU;
 }
 
-static inline int cpu_mips_hw_interrupts_pending(CPUMIPSState *env)
+static inline bool cpu_mips_hw_interrupts_enabled(CPUMIPSState *env)
 {
-    int32_t pending;
-    int32_t status;
-    int r;
-
-    if (!(env->CP0_Status & (1 << CP0St_IE)) ||
-        (env->CP0_Status & (1 << CP0St_EXL)) ||
-        (env->CP0_Status & (1 << CP0St_ERL)) ||
+    return (env->CP0_Status & (1 << CP0St_IE)) &&
+        !(env->CP0_Status & (1 << CP0St_EXL)) &&
+        !(env->CP0_Status & (1 << CP0St_ERL)) &&
+        !(env->hflags & MIPS_HFLAG_DM) &&
         /* Note that the TCStatus IXMT field is initialized to zero,
            and only MT capable cores can set it to one. So we don't
            need to check for MT capabilities here.  */
-        (env->active_tc.CP0_TCStatus & (1 << CP0TCSt_IXMT)) ||
-        (env->hflags & MIPS_HFLAG_DM)) {
-        /* Interrupts are disabled */
-        return 0;
-    }
+        !(env->active_tc.CP0_TCStatus & (1 << CP0TCSt_IXMT));
+}
+
+/* Check if there is pending and not masked out interrupt */
+static inline bool cpu_mips_hw_interrupts_pending(CPUMIPSState *env)
+{
+    int32_t pending;
+    int32_t status;
+    bool r;
 
     pending = env->CP0_Cause & CP0Ca_IP_mask;
     status = env->CP0_Status & CP0Ca_IP_mask;
@@ -667,7 +669,7 @@ static inline int cpu_mips_hw_interrupts_pending(CPUMIPSState *env)
         /* A MIPS configured with compatibility or VInt (Vectored Interrupts)
            treats the pending lines as individual interrupt lines, the status
            lines are individual masks.  */
-        r = pending & status;
+        r = (pending & status) != 0;
     }
     return r;
 }
@@ -1000,7 +1002,12 @@ static inline void cpu_mips_store_status(CPUMIPSState *env, target_ulong val)
 
     if (env->insn_flags & ISA_MIPS32R6) {
         bool has_supervisor = extract32(mask, CP0St_KSU, 2) == 0x3;
-
+#if defined(TARGET_MIPS64)
+        uint32_t ksux = (1 << CP0St_KX) & val;
+        ksux |= (ksux >> 1) & val; /* KX = 0 forces SX to be 0 */
+        ksux |= (ksux >> 1) & val; /* SX = 0 forces UX to be 0 */
+        val = (val & ~(7 << CP0St_UX)) | ksux;
+#endif
         if (has_supervisor && extract32(val, CP0St_KSU, 2) == 0x3) {
             mask &= ~(3 << CP0St_KSU);
         }
diff --git a/target-mips/helper.c b/target-mips/helper.c
index 01c4461573..b3fe816fec 100644
--- a/target-mips/helper.c
+++ b/target-mips/helper.c
@@ -293,9 +293,10 @@ static void raise_mmu_exception(CPUMIPSState *env, target_ulong address,
         (env->CP0_EntryHi & 0xFF) | (address & (TARGET_PAGE_MASK << 1));
 #if defined(TARGET_MIPS64)
     env->CP0_EntryHi &= env->SEGMask;
-    env->CP0_XContext = (env->CP0_XContext & ((~0ULL) << (env->SEGBITS - 7))) |
-                        ((address & 0xC00000000000ULL) >> (55 - env->SEGBITS)) |
-                        ((address & ((1ULL << env->SEGBITS) - 1) & 0xFFFFFFFFFFFFE000ULL) >> 9);
+    env->CP0_XContext =
+        /* PTEBase */   (env->CP0_XContext & ((~0ULL) << (env->SEGBITS - 7))) |
+        /* R */         (extract64(address, 62, 2) << (env->SEGBITS - 9)) |
+        /* BadVPN2 */   (extract64(address, 13, env->SEGBITS - 13) << 4);
 #endif
     cs->exception_index = exception;
     env->error_code = error_code;
@@ -759,7 +760,8 @@ bool mips_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
         MIPSCPU *cpu = MIPS_CPU(cs);
         CPUMIPSState *env = &cpu->env;
 
-        if (cpu_mips_hw_interrupts_pending(env)) {
+        if (cpu_mips_hw_interrupts_enabled(env) &&
+            cpu_mips_hw_interrupts_pending(env)) {
             /* Raise it */
             cs->exception_index = EXCP_EXT_INTERRUPT;
             env->error_code = 0;
diff --git a/target-mips/helper.h b/target-mips/helper.h
index d8cc766bdf..95b9149d89 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -358,6 +358,8 @@ DEF_HELPER_1(rdhwr_cpunum, tl, env)
 DEF_HELPER_1(rdhwr_synci_step, tl, env)
 DEF_HELPER_1(rdhwr_cc, tl, env)
 DEF_HELPER_1(rdhwr_ccres, tl, env)
+DEF_HELPER_1(rdhwr_performance, tl, env)
+DEF_HELPER_1(rdhwr_xnp, tl, env)
 DEF_HELPER_2(pmon, void, env, int)
 DEF_HELPER_1(wait, void, env)
 
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 6739fff216..056d53b9ef 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -1357,6 +1357,13 @@ void helper_mtc0_hwrena(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0x0000000F;
 
+    if ((env->CP0_Config1 & (1 << CP0C1_PC)) &&
+        (env->insn_flags & ISA_MIPS32R6)) {
+        mask |= (1 << 4);
+    }
+    if (env->insn_flags & ISA_MIPS32R6) {
+        mask |= (1 << 5);
+    }
     if (env->CP0_Config3 & (1 << CP0C3_ULRI)) {
         mask |= (1 << 29);
 
@@ -2185,53 +2192,52 @@ void helper_deret(CPUMIPSState *env)
 }
 #endif /* !CONFIG_USER_ONLY */
 
-target_ulong helper_rdhwr_cpunum(CPUMIPSState *env)
+static inline void check_hwrena(CPUMIPSState *env, int reg)
 {
-    if ((env->hflags & MIPS_HFLAG_CP0) ||
-        (env->CP0_HWREna & (1 << 0)))
-        return env->CP0_EBase & 0x3ff;
-    else
-        do_raise_exception(env, EXCP_RI, GETPC());
+    if ((env->hflags & MIPS_HFLAG_CP0) || (env->CP0_HWREna & (1 << reg))) {
+        return;
+    }
+    do_raise_exception(env, EXCP_RI, GETPC());
+}
 
-    return 0;
+target_ulong helper_rdhwr_cpunum(CPUMIPSState *env)
+{
+    check_hwrena(env, 0);
+    return env->CP0_EBase & 0x3ff;
 }
 
 target_ulong helper_rdhwr_synci_step(CPUMIPSState *env)
 {
-    if ((env->hflags & MIPS_HFLAG_CP0) ||
-        (env->CP0_HWREna & (1 << 1)))
-        return env->SYNCI_Step;
-    else
-        do_raise_exception(env, EXCP_RI, GETPC());
-
-    return 0;
+    check_hwrena(env, 1);
+    return env->SYNCI_Step;
 }
 
 target_ulong helper_rdhwr_cc(CPUMIPSState *env)
 {
-    if ((env->hflags & MIPS_HFLAG_CP0) ||
-        (env->CP0_HWREna & (1 << 2))) {
+    check_hwrena(env, 2);
 #ifdef CONFIG_USER_ONLY
-        return env->CP0_Count;
+    return env->CP0_Count;
 #else
-        return (int32_t)cpu_mips_get_count(env);
+    return (int32_t)cpu_mips_get_count(env);
 #endif
-    } else {
-        do_raise_exception(env, EXCP_RI, GETPC());
-    }
-
-    return 0;
 }
 
 target_ulong helper_rdhwr_ccres(CPUMIPSState *env)
 {
-    if ((env->hflags & MIPS_HFLAG_CP0) ||
-        (env->CP0_HWREna & (1 << 3)))
-        return env->CCRes;
-    else
-        do_raise_exception(env, EXCP_RI, GETPC());
+    check_hwrena(env, 3);
+    return env->CCRes;
+}
 
-    return 0;
+target_ulong helper_rdhwr_performance(CPUMIPSState *env)
+{
+    check_hwrena(env, 4);
+    return env->CP0_Performance0;
+}
+
+target_ulong helper_rdhwr_xnp(CPUMIPSState *env)
+{
+    check_hwrena(env, 5);
+    return (env->CP0_Config5 >> CP0C5_XNP) & 1;
 }
 
 void helper_pmon(CPUMIPSState *env, int function)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 897839ced9..56266471c1 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -323,6 +323,7 @@ enum {
     OPC_TLTIU    = (0x0B << 16) | OPC_REGIMM,
     OPC_TEQI     = (0x0C << 16) | OPC_REGIMM,
     OPC_TNEI     = (0x0E << 16) | OPC_REGIMM,
+    OPC_SIGRIE   = (0x17 << 16) | OPC_REGIMM,
     OPC_SYNCI    = (0x1F << 16) | OPC_REGIMM,
 
     OPC_DAHI     = (0x06 << 16) | OPC_REGIMM,
@@ -10333,7 +10334,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
     }
 }
 
-static void gen_rdhwr(DisasContext *ctx, int rt, int rd)
+static void gen_rdhwr(DisasContext *ctx, int rt, int rd, int sel)
 {
     TCGv t0;
 
@@ -10361,6 +10362,22 @@ static void gen_rdhwr(DisasContext *ctx, int rt, int rd)
         gen_helper_rdhwr_ccres(t0, cpu_env);
         gen_store_gpr(t0, rt);
         break;
+    case 4:
+        check_insn(ctx, ISA_MIPS32R6);
+        if (sel != 0) {
+            /* Performance counter registers are not implemented other than
+             * control register 0.
+             */
+            generate_exception(ctx, EXCP_RI);
+        }
+        gen_helper_rdhwr_performance(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 5:
+        check_insn(ctx, ISA_MIPS32R6);
+        gen_helper_rdhwr_xnp(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
     case 29:
 #if defined(CONFIG_USER_ONLY)
         tcg_gen_ld_tl(t0, cpu_env,
@@ -11979,6 +11996,7 @@ enum {
     ROTR = 0x3,
     SELEQZ = 0x5,
     SELNEZ = 0x6,
+    R6_RDHWR = 0x7,
 
     SLLV = 0x0,
     SRLV = 0x1,
@@ -12009,11 +12027,13 @@ enum {
     MODU = 0x7,
 
     /* The following can be distinguished by their lower 6 bits. */
+    BREAK32 = 0x07,
     INS = 0x0c,
     LSA = 0x0f,
     ALIGN = 0x1f,
     EXT = 0x2c,
-    POOL32AXF = 0x3c
+    POOL32AXF = 0x3c,
+    SIGRIE = 0x3f
 };
 
 /* POOL32AXF encoding of minor opcode field extension */
@@ -12931,7 +12951,8 @@ static void gen_pool32axf (CPUMIPSState *env, DisasContext *ctx, int rt, int rs)
             gen_cl(ctx, mips32_op, rt, rs);
             break;
         case RDHWR:
-            gen_rdhwr(ctx, rt, rs);
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
+            gen_rdhwr(ctx, rt, rs, 0);
             break;
         case WSBH:
             gen_bshfl(ctx, OPC_WSBH, rs, rt);
@@ -13486,6 +13507,10 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx)
                 check_insn(ctx, ISA_MIPS32R6);
                 gen_cond_move(ctx, OPC_SELNEZ, rd, rs, rt);
                 break;
+            case R6_RDHWR:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_rdhwr(ctx, rt, rs, extract32(ctx->opcode, 11, 3));
+                break;
             default:
                 goto pool32a_invalid;
             }
@@ -13629,9 +13654,13 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx)
         case POOL32AXF:
             gen_pool32axf(env, ctx, rt, rs);
             break;
-        case 0x07:
+        case BREAK32:
             generate_exception_end(ctx, EXCP_BREAK);
             break;
+        case SIGRIE:
+            check_insn(ctx, ISA_MIPS32R6);
+            generate_exception_end(ctx, EXCP_RI);
+            break;
         default:
         pool32a_invalid:
                 MIPS_INVAL("pool32a");
@@ -17732,7 +17761,7 @@ static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx)
         break;
 #endif
     case OPC_RDHWR:
-        gen_rdhwr(ctx, rt, rd);
+        gen_rdhwr(ctx, rt, rd, extract32(ctx->opcode, 6, 3));
         break;
     case OPC_FORK:
         check_insn(ctx, ASE_MT);
@@ -18950,6 +18979,10 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
             check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_trap(ctx, op1, rs, -1, imm);
             break;
+        case OPC_SIGRIE:
+            check_insn(ctx, ISA_MIPS32R6);
+            generate_exception_end(ctx, EXCP_RI);
+            break;
         case OPC_SYNCI:
             check_insn(ctx, ISA_MIPS32R2);
             /* Break the TB to be able to sync copied instructions
@@ -19594,8 +19627,10 @@ void gen_intermediate_code(CPUMIPSState *env, struct TranslationBlock *tb)
             save_cpu_state(&ctx, 1);
             ctx.bstate = BS_BRANCH;
             gen_helper_raise_exception_debug(cpu_env);
-            /* Include the breakpoint location or the tb won't
-             * be flushed when it must be.  */
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
             ctx.pc += 4;
             goto done_generating;
         }
diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index 1b45884e9b..bb33c7cfeb 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c
@@ -447,7 +447,7 @@ static const mips_def_t mips_defs[] =
                        (1 << CP0C3_RXI) | (1U << CP0C3_M),
         .CP0_Config4 = MIPS_CONFIG4 | (0xfc << CP0C4_KScrExist) |
                        (3 << CP0C4_IE) | (1U << CP0C4_M),
-        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_LLB),
+        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_LLB),
         .CP0_Config5_rw_bitmask = (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) |
                                   (1 << CP0C5_UFE),
         .CP0_LLAddr_rw_bitmask = 0,
@@ -665,7 +665,7 @@ static const mips_def_t mips_defs[] =
                        (1 << CP0C3_RXI) | (1 << CP0C3_LPA),
         .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) |
                        (0xfc << CP0C4_KScrExist),
-        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_LLB),
+        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_LLB),
         .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) |
                                   (1 << CP0C5_FRE) | (1 << CP0C5_UFE),
         .CP0_LLAddr_rw_bitmask = 0,
diff --git a/target-moxie/translate.c b/target-moxie/translate.c
index f84841efe2..6dedcb7a21 100644
--- a/target-moxie/translate.c
+++ b/target-moxie/translate.c
@@ -848,6 +848,11 @@ void gen_intermediate_code(CPUMoxieState *env, struct TranslationBlock *tb)
             tcg_gen_movi_i32(cpu_pc, ctx.pc);
             gen_helper_debug(cpu_env);
             ctx.bstate = BS_EXCP;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            ctx.pc += 2;
             goto done_generating;
         }
 
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index b66fde18fe..606490a47b 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1665,6 +1665,11 @@ void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
             tcg_gen_movi_tl(cpu_pc, dc->pc);
             gen_exception(dc, EXCP_DEBUG);
             dc->is_jmp = DISAS_UPDATE;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 4;
             break;
         }
 
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 98ce5a7ab0..b34aed6a19 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -117,14 +117,14 @@ enum powerpc_mmu_t {
 #define POWERPC_MMU_AMR      0x00040000
     /* 64 bits PowerPC MMU                                     */
     POWERPC_MMU_64B        = POWERPC_MMU_64 | 0x00000001,
+    /* Architecture 2.03 and later (has LPCR) */
+    POWERPC_MMU_2_03       = POWERPC_MMU_64 | 0x00000002,
     /* Architecture 2.06 variant                               */
     POWERPC_MMU_2_06       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
                              | POWERPC_MMU_AMR | 0x00000003,
-    /* Architecture 2.06 "degraded" (no 1T segments)           */
-    POWERPC_MMU_2_06a      = POWERPC_MMU_64 | POWERPC_MMU_AMR
-                             | 0x00000003,
-    /* Architecture 2.06 "degraded" (no 1T segments or AMR)    */
-    POWERPC_MMU_2_06d      = POWERPC_MMU_64 | 0x00000003,
+    /* Architecture 2.07 variant                               */
+    POWERPC_MMU_2_07       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
+                             | POWERPC_MMU_AMR | 0x00000004,
 #endif /* defined(TARGET_PPC64) */
 };
 
@@ -1073,6 +1073,7 @@ struct CPUPPCState {
     uint64_t insns_flags2;
 #if defined(TARGET_PPC64)
     struct ppc_segment_page_sizes sps;
+    bool ci_large_pages;
 #endif
 
 #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 38aa927eb0..ac70f0897b 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -259,7 +259,8 @@ static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
             info->flags |= KVM_PPC_1T_SEGMENTS;
         }
 
-        if (env->mmu_model == POWERPC_MMU_2_06) {
+        if (env->mmu_model == POWERPC_MMU_2_06 ||
+            env->mmu_model == POWERPC_MMU_2_07) {
             info->slb_size = 32;
         } else {
             info->slb_size = 64;
@@ -272,8 +273,9 @@ static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
         info->sps[i].enc[0].pte_enc = 0;
         i++;
 
-        /* 64K on MMU 2.06 */
-        if (env->mmu_model == POWERPC_MMU_2_06) {
+        /* 64K on MMU 2.06 and later */
+        if (env->mmu_model == POWERPC_MMU_2_06 ||
+            env->mmu_model == POWERPC_MMU_2_07) {
             info->sps[i].page_shift = 16;
             info->sps[i].slb_enc = 0x110;
             info->sps[i].enc[0].page_shift = 16;
@@ -412,6 +414,13 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
     /* Convert to QEMU form */
     memset(&env->sps, 0, sizeof(env->sps));
 
+    /* If we have HV KVM, we need to forbid CI large pages if our
+     * host page size is smaller than 64K.
+     */
+    if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
+        env->ci_large_pages = getpagesize() >= 0x10000;
+    }
+
     /*
      * XXX This loop should be an entry wide AND of the capabilities that
      *     the selected CPU has with the capabilities that KVM supports.
@@ -2070,7 +2079,7 @@ bool kvmppc_spapr_use_multitce(void)
 }
 
 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
-                              bool vfio_accel)
+                              bool need_vfio)
 {
     struct kvm_create_spapr_tce args = {
         .liobn = liobn,
@@ -2084,7 +2093,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
      * destroying the table, which the upper layers -will- do
      */
     *pfd = -1;
-    if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
+    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
         return NULL;
     }
 
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 470f6d62f7..309cbe0df1 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -36,7 +36,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
 off_t kvmppc_alloc_rma(void **rma);
 bool kvmppc_spapr_use_multitce(void);
 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
-                              bool vfio_accel);
+                              bool need_vfio);
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
 int kvmppc_reset_htab(int shift_hint);
 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 527c6adca3..e52d0e56c2 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1293,9 +1293,9 @@ void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env)
         break;
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_06a:
-    case POWERPC_MMU_2_06d:
+    case POWERPC_MMU_2_07:
         dump_slb(f, cpu_fprintf, env);
         break;
 #endif
@@ -1433,9 +1433,9 @@ hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     switch (env->mmu_model) {
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_06a:
-    case POWERPC_MMU_2_06d:
+    case POWERPC_MMU_2_07:
         return ppc_hash64_get_phys_page_debug(env, addr);
 #endif
 
@@ -1937,9 +1937,9 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
     case POWERPC_MMU_601:
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_06a:
-    case POWERPC_MMU_2_06d:
+    case POWERPC_MMU_2_07:
 #endif /* defined(TARGET_PPC64) */
         tlb_flush(CPU(cpu), 1);
         break;
@@ -2011,9 +2011,9 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
         break;
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_06a:
-    case POWERPC_MMU_2_06d:
+    case POWERPC_MMU_2_07:
         /* tlbie invalidate TLBs for all segments */
         /* XXX: given the fact that there are too many segments to invalidate,
          *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index c2bc1a7ec6..308ad68880 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -11327,9 +11327,9 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
     case POWERPC_MMU_SOFT_74xx:
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_06a:
-    case POWERPC_MMU_2_06d:
+    case POWERPC_MMU_2_07:
 #endif
         cpu_fprintf(f, " SDR1 " TARGET_FMT_lx "   DAR " TARGET_FMT_lx
                        "  DSISR " TARGET_FMT_lx "\n", env->spr[SPR_SDR1],
@@ -11488,6 +11488,11 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
 
         if (unlikely(cpu_breakpoint_test(cs, ctx.nip, BP_ANY))) {
             gen_debug_exception(ctxp);
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            ctx.nip += 4;
             break;
         }
 
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index b54147350d..4934c80b8f 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7864,6 +7864,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         gen_spr_book3s_ids(env);
         gen_spr_amr(env);
         gen_spr_book3s_purr(env);
+        env->ci_large_pages = true;
         break;
     default:
         g_assert_not_reached();
@@ -8019,7 +8020,7 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data)
                     (1ull << MSR_DR) |
                     (1ull << MSR_PMM) |
                     (1ull << MSR_RI);
-    pcc->mmu_model = POWERPC_MMU_64B;
+    pcc->mmu_model = POWERPC_MMU_2_03;
 #if defined(CONFIG_SOFTMMU)
     pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
 #endif
@@ -8243,7 +8244,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
                     (1ull << MSR_PMM) |
                     (1ull << MSR_RI) |
                     (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_2_06;
+    pcc->mmu_model = POWERPC_MMU_2_07;
 #if defined(CONFIG_SOFTMMU)
     pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
 #endif
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 05d51fe84a..c79a2cb57a 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -5360,6 +5360,11 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
         if (unlikely(cpu_breakpoint_test(cs, dc.pc, BP_ANY))) {
             status = EXIT_PC_STALE;
             do_debug = true;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc.pc += 2;
             break;
         }
 
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index f764bc2539..7bc621649a 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -1855,6 +1855,11 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
             tcg_gen_movi_i32(cpu_pc, ctx.pc);
             gen_helper_debug(cpu_env);
             ctx.bstate = BS_BRANCH;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            ctx.pc += 2;
             break;
         }
 
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index b59742ad2e..41a33193d8 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -5247,6 +5247,7 @@ void gen_intermediate_code(CPUSPARCState * env, TranslationBlock * tb)
             tcg_gen_insn_start(dc->pc, dc->npc);
         }
         num_insns++;
+        last_pc = dc->pc;
 
         if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
             if (dc->pc != pc_start) {
@@ -5262,7 +5263,6 @@ void gen_intermediate_code(CPUSPARCState * env, TranslationBlock * tb)
             gen_io_start();
         }
 
-        last_pc = dc->pc;
         insn = cpu_ldl_code(env, dc->pc);
 
         disas_sparc_insn(dc, insn);
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index 48f89fb1c5..d2f92f02fc 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -1917,9 +1917,11 @@ void gen_intermediate_code(CPUUniCore32State *env, TranslationBlock *tb)
             gen_set_pc_im(dc->pc);
             gen_exception(EXCP_DEBUG);
             dc->is_jmp = DISAS_JUMP;
-            /* Advance PC so that clearing the breakpoint will
-               invalidate this TB.  */
-            dc->pc += 2; /* FIXME */
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 4;
             goto done_generating;
         }
 
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index aa0c527dc4..06b0163412 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -3088,6 +3088,11 @@ void gen_intermediate_code(CPUXtensaState *env, TranslationBlock *tb)
             tcg_gen_movi_i32(cpu_pc, dc.pc);
             gen_exception(&dc, EXCP_DEBUG);
             dc.is_jmp = DISAS_UPDATE;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc.pc += 2;
             break;
         }
 
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 4305af9673..79e052ff7a 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -288,16 +288,24 @@ typedef enum {
     OPC_SRLV     = OPC_SPECIAL | 0x06,
     OPC_ROTRV    = OPC_SPECIAL | (0x01 <<  6) | 0x06,
     OPC_SRAV     = OPC_SPECIAL | 0x07,
-    OPC_JR       = OPC_SPECIAL | 0x08,
+    OPC_JR_R5    = OPC_SPECIAL | 0x08,
     OPC_JALR     = OPC_SPECIAL | 0x09,
     OPC_MOVZ     = OPC_SPECIAL | 0x0A,
     OPC_MOVN     = OPC_SPECIAL | 0x0B,
     OPC_MFHI     = OPC_SPECIAL | 0x10,
     OPC_MFLO     = OPC_SPECIAL | 0x12,
     OPC_MULT     = OPC_SPECIAL | 0x18,
+    OPC_MUL_R6   = OPC_SPECIAL | (0x02 <<  6) | 0x18,
+    OPC_MUH      = OPC_SPECIAL | (0x03 <<  6) | 0x18,
     OPC_MULTU    = OPC_SPECIAL | 0x19,
+    OPC_MULU     = OPC_SPECIAL | (0x02 <<  6) | 0x19,
+    OPC_MUHU     = OPC_SPECIAL | (0x03 <<  6) | 0x19,
     OPC_DIV      = OPC_SPECIAL | 0x1A,
+    OPC_DIV_R6   = OPC_SPECIAL | (0x02 <<  6) | 0x1A,
+    OPC_MOD      = OPC_SPECIAL | (0x03 <<  6) | 0x1A,
     OPC_DIVU     = OPC_SPECIAL | 0x1B,
+    OPC_DIVU_R6  = OPC_SPECIAL | (0x02 <<  6) | 0x1B,
+    OPC_MODU     = OPC_SPECIAL | (0x03 <<  6) | 0x1B,
     OPC_ADDU     = OPC_SPECIAL | 0x21,
     OPC_SUBU     = OPC_SPECIAL | 0x23,
     OPC_AND      = OPC_SPECIAL | 0x24,
@@ -306,13 +314,15 @@ typedef enum {
     OPC_NOR      = OPC_SPECIAL | 0x27,
     OPC_SLT      = OPC_SPECIAL | 0x2A,
     OPC_SLTU     = OPC_SPECIAL | 0x2B,
+    OPC_SELEQZ   = OPC_SPECIAL | 0x35,
+    OPC_SELNEZ   = OPC_SPECIAL | 0x37,
 
     OPC_REGIMM   = 0x01 << 26,
     OPC_BLTZ     = OPC_REGIMM | (0x00 << 16),
     OPC_BGEZ     = OPC_REGIMM | (0x01 << 16),
 
     OPC_SPECIAL2 = 0x1c << 26,
-    OPC_MUL      = OPC_SPECIAL2 | 0x002,
+    OPC_MUL_R5   = OPC_SPECIAL2 | 0x002,
 
     OPC_SPECIAL3 = 0x1f << 26,
     OPC_EXT      = OPC_SPECIAL3 | 0x000,
@@ -320,6 +330,15 @@ typedef enum {
     OPC_WSBH     = OPC_SPECIAL3 | 0x0a0,
     OPC_SEB      = OPC_SPECIAL3 | 0x420,
     OPC_SEH      = OPC_SPECIAL3 | 0x620,
+
+    /* MIPS r6 doesn't have JR, JALR should be used instead */
+    OPC_JR       = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5,
+
+    /*
+     * MIPS r6 replaces MUL with an alternative encoding which is
+     * backwards-compatible at the assembly level.
+     */
+    OPC_MUL      = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5,
 } MIPSInsn;
 
 /*
@@ -841,13 +860,20 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
 }
 
 static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
-                            TCGReg c1, TCGReg c2, TCGReg v)
+                            TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2)
 {
-    MIPSInsn m_opc = OPC_MOVN;
+    bool eqz = false;
+
+    /* If one of the values is zero, put it last to match SEL*Z instructions */
+    if (use_mips32r6_instructions && v1 == 0) {
+        v1 = v2;
+        v2 = 0;
+        cond = tcg_invert_cond(cond);
+    }
 
     switch (cond) {
     case TCG_COND_EQ:
-        m_opc = OPC_MOVZ;
+        eqz = true;
         /* FALLTHRU */
     case TCG_COND_NE:
         if (c2 != 0) {
@@ -860,14 +886,32 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
         /* Minimize code size by preferring a compare not requiring INV.  */
         if (mips_cmp_map[cond] & MIPS_CMP_INV) {
             cond = tcg_invert_cond(cond);
-            m_opc = OPC_MOVZ;
+            eqz = true;
         }
         tcg_out_setcond(s, cond, TCG_TMP0, c1, c2);
         c1 = TCG_TMP0;
         break;
     }
 
-    tcg_out_opc_reg(s, m_opc, ret, v, c1);
+    if (use_mips32r6_instructions) {
+        MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ;
+        MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ;
+
+        if (v2 != 0) {
+            tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1);
+        }
+        tcg_out_opc_reg(s, m_opc_t, ret, v1, c1);
+        if (v2 != 0) {
+            tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
+        }
+    } else {
+        MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN;
+
+        tcg_out_opc_reg(s, m_opc, ret, v1, c1);
+
+        /* This should be guaranteed via constraints */
+        tcg_debug_assert(v2 == ret);
+    }
 }
 
 static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
@@ -1445,21 +1489,45 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         i1 = OPC_MULT, i2 = OPC_MFLO;
         goto do_hilo1;
     case INDEX_op_mulsh_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2);
+            break;
+        }
         i1 = OPC_MULT, i2 = OPC_MFHI;
         goto do_hilo1;
     case INDEX_op_muluh_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2);
+            break;
+        }
         i1 = OPC_MULTU, i2 = OPC_MFHI;
         goto do_hilo1;
     case INDEX_op_div_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2);
+            break;
+        }
         i1 = OPC_DIV, i2 = OPC_MFLO;
         goto do_hilo1;
     case INDEX_op_divu_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2);
+            break;
+        }
         i1 = OPC_DIVU, i2 = OPC_MFLO;
         goto do_hilo1;
     case INDEX_op_rem_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2);
+            break;
+        }
         i1 = OPC_DIV, i2 = OPC_MFHI;
         goto do_hilo1;
     case INDEX_op_remu_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2);
+            break;
+        }
         i1 = OPC_DIVU, i2 = OPC_MFHI;
     do_hilo1:
         tcg_out_opc_reg(s, i1, 0, a1, a2);
@@ -1536,7 +1604,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_movcond_i32:
-        tcg_out_movcond(s, args[5], a0, a1, a2, args[3]);
+        tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]);
         break;
 
     case INDEX_op_setcond_i32:
@@ -1592,8 +1660,10 @@ static const TCGTargetOpDef mips_op_defs[] = {
 
     { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
     { INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
+#if !use_mips32r6_instructions
     { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } },
     { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } },
+#endif
     { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_div_i32, { "r", "rZ", "rZ" } },
@@ -1623,7 +1693,11 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
 
     { INDEX_op_brcond_i32, { "rZ", "rZ" } },
+#if use_mips32r6_instructions
+    { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
+#else
     { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } },
+#endif
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
 
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index f5ba52cacf..b1cda37b66 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -96,6 +96,13 @@ extern bool use_mips32_instructions;
 extern bool use_mips32r2_instructions;
 #endif
 
+/* MIPS32R6 instruction set detection */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
+#define use_mips32r6_instructions  1
+#else
+#define use_mips32r6_instructions  0
+#endif
+
 /* optional instructions */
 #define TCG_TARGET_HAS_div_i32          1
 #define TCG_TARGET_HAS_rem_i32          1
@@ -105,8 +112,8 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_mulu2_i32        1
-#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_mulu2_i32        (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muls2_i32        (!use_mips32r6_instructions)
 #define TCG_TARGET_HAS_muluh_i32        1
 #define TCG_TARGET_HAS_mulsh_i32        1
 
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 92ef719e40..2c72565fb9 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -700,14 +700,14 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
 {
     int mb, me;
 
-    if ((c & 0xffff) == c) {
+    if (mask_operand(c, &mb, &me)) {
+        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
+    } else if ((c & 0xffff) == c) {
         tcg_out32(s, ANDI | SAI(src, dst, c));
         return;
     } else if ((c & 0xffff0000) == c) {
         tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
         return;
-    } else if (mask_operand(c, &mb, &me)) {
-        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
         tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
@@ -719,18 +719,18 @@ static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
     int mb, me;
 
     assert(TCG_TARGET_REG_BITS == 64);
-    if ((c & 0xffff) == c) {
-        tcg_out32(s, ANDI | SAI(src, dst, c));
-        return;
-    } else if ((c & 0xffff0000) == c) {
-        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
-        return;
-    } else if (mask64_operand(c, &mb, &me)) {
+    if (mask64_operand(c, &mb, &me)) {
         if (mb == 0) {
             tcg_out_rld(s, RLDICR, dst, src, 0, me);
         } else {
             tcg_out_rld(s, RLDICL, dst, src, 0, mb);
         }
+    } else if ((c & 0xffff) == c) {
+        tcg_out32(s, ANDI | SAI(src, dst, c));
+        return;
+    } else if ((c & 0xffff0000) == c) {
+        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
+        return;
     } else {
         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
         tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
@@ -1239,11 +1239,36 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
 
 void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
-    TCGContext s;
+    tcg_insn_unit i1, i2;
+    uint64_t pair;
+    intptr_t diff = addr - jmp_addr;
+
+    if (in_range_b(diff)) {
+        i1 = B | (diff & 0x3fffffc);
+        i2 = NOP;
+    } else if (USE_REG_RA) {
+        intptr_t lo, hi;
+        diff = addr - (uintptr_t)tb_ret_addr;
+        lo = (int16_t)diff;
+        hi = (int32_t)(diff - lo);
+        assert(diff == hi + lo);
+        i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16);
+        i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo);
+    } else {
+        assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr);
+        i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16);
+        i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr);
+    }
+#ifdef HOST_WORDS_BIGENDIAN
+    pair = (uint64_t)i1 << 32 | i2;
+#else
+    pair = (uint64_t)i2 << 32 | i1;
+#endif
 
-    s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr;
-    tcg_out_b(&s, 0, (tcg_insn_unit *)addr);
-    flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s));
+    /* ??? __atomic_store_8, presuming there's some way to do that
+       for 32-bit, otherwise this is good enough for 64-bit.  */
+    *(uint64_t *)jmp_addr = pair;
+    flush_icache_range(jmp_addr, jmp_addr + 8);
 }
 
 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
@@ -1855,12 +1880,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         if (USE_REG_RA) {
             ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);
 
-            /* If we can use a direct branch, otherwise use the value in RA.
-               Note that the direct branch is always forward.  If it's in
-               range now, it'll still be in range after the movi.  Don't
-               bother about the 20 bytes where the test here fails but it
-               would succeed below.  */
-            if (!in_range_b(disp)) {
+            /* Use a direct branch if we can, otherwise use the value in RA.
+               Note that the direct branch is always backward, thus we need
+               to account for the possibility of 5 insns from the movi.  */
+            if (!in_range_b(disp - 20)) {
                 tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
                 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
                 tcg_out32(s, BCCTR | BO_ALWAYS);
@@ -1871,14 +1894,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out_b(s, 0, tb_ret_addr);
         break;
     case INDEX_op_goto_tb:
-        if (s->tb_jmp_offset) {
-            /* Direct jump method.  */
-            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
-            s->code_ptr += 7;
-        } else {
-            /* Indirect jump method.  */
-            tcg_abort();
+        tcg_debug_assert(s->tb_jmp_offset);
+        /* Direct jump.  Ensure the next insns are 8-byte aligned. */
+        if ((uintptr_t)s->code_ptr & 7) {
+            tcg_out32(s, NOP);
         }
+        s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
+        /* To be replaced by either a branch+nop or a load into TMP1.  */
+        s->code_ptr += 2;
+        tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
+        tcg_out32(s, BCCTR | BO_ALWAYS);
         s->tb_next_offset[args[0]] = tcg_current_code_size(s);
         break;
     case INDEX_op_br:
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index c6f95703eb..6d0410c4b9 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -173,18 +173,15 @@ DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
 DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64))
 DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64))
 
+#define TLADDR_ARGS  (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
+#define DATA64_ARGS  (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
+
 /* QEMU specific */
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-DEF(insn_start, 0, 0, 2 * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT)
-#else
-DEF(insn_start, 0, 0, TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT)
-#endif
+DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
+    TCG_OPF_NOT_PRESENT)
 DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
 DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
 
-#define TLADDR_ARGS    (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
-#define DATA64_ARGS  (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
-
 DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1,
diff --git a/tests/Makefile b/tests/Makefile
index 0531b30929..0739bfe1bf 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -150,6 +150,8 @@ gcov-files-pci-y += hw/display/virtio-gpu-pci.c
 gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c
 check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
 gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
+check-qtest-pci-$(CONFIG_POSIX) += tests/ivshmem-test$(EXESUF)
+gcov-files-pci-y += hw/misc/ivshmem.c
 
 check-qtest-i386-y = tests/endianness-test$(EXESUF)
 check-qtest-i386-y += tests/fdc-test$(EXESUF)
@@ -444,8 +446,16 @@ tests/test-mul64$(EXESUF): tests/test-mul64.o $(test-util-obj-y)
 tests/test-bitops$(EXESUF): tests/test-bitops.o $(test-util-obj-y)
 tests/test-crypto-hash$(EXESUF): tests/test-crypto-hash.o $(test-crypto-obj-y)
 tests/test-crypto-cipher$(EXESUF): tests/test-crypto-cipher.o $(test-crypto-obj-y)
+
+tests/crypto-tls-x509-helpers.o-cflags := $(TASN1_CFLAGS)
+tests/crypto-tls-x509-helpers.o-libs := $(TASN1_LIBS)
+tests/pkix_asn1_tab.o-cflags := $(TASN1_CFLAGS)
+
+tests/test-crypto-tlscredsx509.o-cflags := $(TASN1_CFLAGS)
 tests/test-crypto-tlscredsx509$(EXESUF): tests/test-crypto-tlscredsx509.o \
 	tests/crypto-tls-x509-helpers.o tests/pkix_asn1_tab.o $(test-crypto-obj-y)
+
+tests/test-crypto-tlssession.o-cflags := $(TASN1_CFLAGS)
 tests/test-crypto-tlssession$(EXESUF): tests/test-crypto-tlssession.o \
 	tests/crypto-tls-x509-helpers.o tests/pkix_asn1_tab.o $(test-crypto-obj-y)
 
@@ -514,12 +524,12 @@ tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_hel
 tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
 tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)
 tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y)
+tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
+tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o
 
 ifeq ($(CONFIG_POSIX),y)
 LIBS += -lutil
 endif
-LIBS += $(TEST_LIBS)
-CFLAGS += $(TEST_CFLAGS)
 
 # QTest rules
 
diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index 416394fc77..b5a4696d86 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c
@@ -304,9 +304,7 @@ static void test_media_insert(void)
     qmp_discard_response("{'execute':'change', 'arguments':{"
                          " 'device':'floppy0', 'target': %s, 'arg': 'raw' }}",
                          test_image);
-    qmp_discard_response(""); /* ignore event
-                                 (FIXME open -> open transition?!) */
-    qmp_discard_response(""); /* ignore event */
+    qmp_discard_response(""); /* ignore event (open -> close) */
 
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);
diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c
new file mode 100644
index 0000000000..c8f0cf0f70
--- /dev/null
+++ b/tests/ivshmem-test.c
@@ -0,0 +1,491 @@
+/*
+ * QTest testcase for ivshmem
+ *
+ * Copyright (c) 2014 SUSE LINUX Products GmbH
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <glib.h>
+#include <glib/gstdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "contrib/ivshmem-server/ivshmem-server.h"
+#include "libqos/pci-pc.h"
+#include "libqtest.h"
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#define TMPSHMSIZE (1 << 20)
+static char *tmpshm;
+static void *tmpshmem;
+static char *tmpdir;
+static char *tmpserver;
+
+static void save_fn(QPCIDevice *dev, int devfn, void *data)
+{
+    QPCIDevice **pdev = (QPCIDevice **) data;
+
+    *pdev = dev;
+}
+
+static QPCIDevice *get_device(void)
+{
+    QPCIDevice *dev;
+    QPCIBus *pcibus;
+
+    pcibus = qpci_init_pc();
+    qpci_device_foreach(pcibus, 0x1af4, 0x1110, save_fn, &dev);
+    g_assert(dev != NULL);
+
+    return dev;
+}
+
+typedef struct _IVState {
+    QTestState *qtest;
+    void *reg_base, *mem_base;
+    QPCIDevice *dev;
+} IVState;
+
+enum Reg {
+    INTRMASK = 0,
+    INTRSTATUS = 4,
+    IVPOSITION = 8,
+    DOORBELL = 12,
+};
+
+static const char* reg2str(enum Reg reg) {
+    switch (reg) {
+    case INTRMASK:
+        return "IntrMask";
+    case INTRSTATUS:
+        return "IntrStatus";
+    case IVPOSITION:
+        return "IVPosition";
+    case DOORBELL:
+        return "DoorBell";
+    default:
+        return NULL;
+    }
+}
+
+static inline unsigned in_reg(IVState *s, enum Reg reg)
+{
+    const char *name = reg2str(reg);
+    QTestState *qtest = global_qtest;
+    unsigned res;
+
+    global_qtest = s->qtest;
+    res = qpci_io_readl(s->dev, s->reg_base + reg);
+    g_test_message("*%s -> %x\n", name, res);
+    global_qtest = qtest;
+
+    return res;
+}
+
+static inline void out_reg(IVState *s, enum Reg reg, unsigned v)
+{
+    const char *name = reg2str(reg);
+    QTestState *qtest = global_qtest;
+
+    global_qtest = s->qtest;
+    g_test_message("%x -> *%s\n", v, name);
+    qpci_io_writel(s->dev, s->reg_base + reg, v);
+    global_qtest = qtest;
+}
+
+static void setup_vm_cmd(IVState *s, const char *cmd, bool msix)
+{
+    uint64_t barsize;
+
+    s->qtest = qtest_start(cmd);
+
+    s->dev = get_device();
+
+    /* FIXME: other bar order fails, mappings changes */
+    s->mem_base = qpci_iomap(s->dev, 2, &barsize);
+    g_assert_nonnull(s->mem_base);
+    g_assert_cmpuint(barsize, ==, TMPSHMSIZE);
+
+    if (msix) {
+        qpci_msix_enable(s->dev);
+    }
+
+    s->reg_base = qpci_iomap(s->dev, 0, &barsize);
+    g_assert_nonnull(s->reg_base);
+    g_assert_cmpuint(barsize, ==, 256);
+
+    qpci_device_enable(s->dev);
+}
+
+static void setup_vm(IVState *s)
+{
+    char *cmd = g_strdup_printf("-device ivshmem,shm=%s,size=1M", tmpshm);
+
+    setup_vm_cmd(s, cmd, false);
+
+    g_free(cmd);
+}
+
+static void test_ivshmem_single(void)
+{
+    IVState state, *s;
+    uint32_t data[1024];
+    int i;
+
+    setup_vm(&state);
+    s = &state;
+
+    /* valid io */
+    out_reg(s, INTRMASK, 0);
+    in_reg(s, INTRSTATUS);
+    in_reg(s, IVPOSITION);
+
+    out_reg(s, INTRMASK, 0xffffffff);
+    g_assert_cmpuint(in_reg(s, INTRMASK), ==, 0xffffffff);
+    out_reg(s, INTRSTATUS, 1);
+    /* XXX: intercept IRQ, not seen in resp */
+    g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 1);
+
+    /* invalid io */
+    out_reg(s, IVPOSITION, 1);
+    out_reg(s, DOORBELL, 8 << 16);
+
+    for (i = 0; i < G_N_ELEMENTS(data); i++) {
+        data[i] = i;
+    }
+    qtest_memwrite(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
+
+    for (i = 0; i < G_N_ELEMENTS(data); i++) {
+        g_assert_cmpuint(((uint32_t *)tmpshmem)[i], ==, i);
+    }
+
+    memset(data, 0, sizeof(data));
+
+    qtest_memread(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
+    for (i = 0; i < G_N_ELEMENTS(data); i++) {
+        g_assert_cmpuint(data[i], ==, i);
+    }
+
+    qtest_quit(s->qtest);
+}
+
+static void test_ivshmem_pair(void)
+{
+    IVState state1, state2, *s1, *s2;
+    char *data;
+    int i;
+
+    setup_vm(&state1);
+    s1 = &state1;
+    setup_vm(&state2);
+    s2 = &state2;
+
+    data = g_malloc0(TMPSHMSIZE);
+
+    /* host write, guest 1 & 2 read */
+    memset(tmpshmem, 0x42, TMPSHMSIZE);
+    qtest_memread(s1->qtest, (uintptr_t)s1->mem_base, data, TMPSHMSIZE);
+    for (i = 0; i < TMPSHMSIZE; i++) {
+        g_assert_cmpuint(data[i], ==, 0x42);
+    }
+    qtest_memread(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    for (i = 0; i < TMPSHMSIZE; i++) {
+        g_assert_cmpuint(data[i], ==, 0x42);
+    }
+
+    /* guest 1 write, guest 2 read */
+    memset(data, 0x43, TMPSHMSIZE);
+    qtest_memwrite(s1->qtest, (uintptr_t)s1->mem_base, data, TMPSHMSIZE);
+    memset(data, 0, TMPSHMSIZE);
+    qtest_memread(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    for (i = 0; i < TMPSHMSIZE; i++) {
+        g_assert_cmpuint(data[i], ==, 0x43);
+    }
+
+    /* guest 2 write, guest 1 read */
+    memset(data, 0x44, TMPSHMSIZE);
+    qtest_memwrite(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    memset(data, 0, TMPSHMSIZE);
+    qtest_memread(s1->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    for (i = 0; i < TMPSHMSIZE; i++) {
+        g_assert_cmpuint(data[i], ==, 0x44);
+    }
+
+    qtest_quit(s1->qtest);
+    qtest_quit(s2->qtest);
+    g_free(data);
+}
+
+typedef struct ServerThread {
+    GThread *thread;
+    IvshmemServer *server;
+    int pipe[2]; /* to handle quit */
+} ServerThread;
+
+static void *server_thread(void *data)
+{
+    ServerThread *t = data;
+    IvshmemServer *server = t->server;
+
+    while (true) {
+        fd_set fds;
+        int maxfd, ret;
+
+        FD_ZERO(&fds);
+        FD_SET(t->pipe[0], &fds);
+        maxfd = t->pipe[0] + 1;
+
+        ivshmem_server_get_fds(server, &fds, &maxfd);
+
+        ret = select(maxfd, &fds, NULL, NULL, NULL);
+
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+
+            g_critical("select error: %s\n", strerror(errno));
+            break;
+        }
+        if (ret == 0) {
+            continue;
+        }
+
+        if (FD_ISSET(t->pipe[0], &fds)) {
+            break;
+        }
+
+        if (ivshmem_server_handle_fds(server, &fds, maxfd) < 0) {
+            g_critical("ivshmem_server_handle_fds() failed\n");
+            break;
+        }
+    }
+
+    return NULL;
+}
+
+static void setup_vm_with_server(IVState *s, int nvectors)
+{
+    char *cmd = g_strdup_printf("-chardev socket,id=chr0,path=%s,nowait "
+                                "-device ivshmem,size=1M,chardev=chr0,vectors=%d",
+                                tmpserver, nvectors);
+
+    setup_vm_cmd(s, cmd, true);
+
+    g_free(cmd);
+}
+
+static void test_ivshmem_server(void)
+{
+    IVState state1, state2, *s1, *s2;
+    ServerThread thread;
+    IvshmemServer server;
+    int ret, vm1, vm2;
+    int nvectors = 2;
+    guint64 end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
+
+    memset(tmpshmem, 0x42, TMPSHMSIZE);
+    ret = ivshmem_server_init(&server, tmpserver, tmpshm,
+                              TMPSHMSIZE, nvectors,
+                              g_test_verbose());
+    g_assert_cmpint(ret, ==, 0);
+
+    ret = ivshmem_server_start(&server);
+    g_assert_cmpint(ret, ==, 0);
+
+    setup_vm_with_server(&state1, nvectors);
+    s1 = &state1;
+    setup_vm_with_server(&state2, nvectors);
+    s2 = &state2;
+
+    g_assert_cmpuint(in_reg(s1, IVPOSITION), ==, 0xffffffff);
+    g_assert_cmpuint(in_reg(s2, IVPOSITION), ==, 0xffffffff);
+
+    g_assert_cmpuint(qtest_readb(s1->qtest, (uintptr_t)s1->mem_base), ==, 0x00);
+
+    thread.server = &server;
+    ret = pipe(thread.pipe);
+    g_assert_cmpint(ret, ==, 0);
+    thread.thread = g_thread_new("ivshmem-server", server_thread, &thread);
+    g_assert(thread.thread != NULL);
+
+    /* waiting until mapping is done */
+    while (g_get_monotonic_time() < end_time) {
+        g_usleep(1000);
+
+        if (qtest_readb(s1->qtest, (uintptr_t)s1->mem_base) == 0x42 &&
+            qtest_readb(s2->qtest, (uintptr_t)s2->mem_base) == 0x42) {
+            break;
+        }
+    }
+
+    /* check got different VM ids */
+    vm1 = in_reg(s1, IVPOSITION);
+    vm2 = in_reg(s2, IVPOSITION);
+    g_assert_cmpuint(vm1, !=, vm2);
+
+    global_qtest = s1->qtest;
+    ret = qpci_msix_table_size(s1->dev);
+    g_assert_cmpuint(ret, ==, nvectors);
+
+    /* ping vm2 -> vm1 */
+    ret = qpci_msix_pending(s1->dev, 0);
+    g_assert_cmpuint(ret, ==, 0);
+    out_reg(s2, DOORBELL, vm1 << 16);
+    do {
+        g_usleep(10000);
+        ret = qpci_msix_pending(s1->dev, 0);
+    } while (ret == 0 && g_get_monotonic_time() < end_time);
+    g_assert_cmpuint(ret, !=, 0);
+
+    /* ping vm1 -> vm2 */
+    global_qtest = s2->qtest;
+    ret = qpci_msix_pending(s2->dev, 0);
+    g_assert_cmpuint(ret, ==, 0);
+    out_reg(s1, DOORBELL, vm2 << 16);
+    do {
+        g_usleep(10000);
+        ret = qpci_msix_pending(s2->dev, 0);
+    } while (ret == 0 && g_get_monotonic_time() < end_time);
+    g_assert_cmpuint(ret, !=, 0);
+
+    qtest_quit(s2->qtest);
+    qtest_quit(s1->qtest);
+
+    if (qemu_write_full(thread.pipe[1], "q", 1) != 1) {
+        g_error("qemu_write_full: %s", g_strerror(errno));
+    }
+
+    g_thread_join(thread.thread);
+
+    ivshmem_server_close(&server);
+    close(thread.pipe[1]);
+    close(thread.pipe[0]);
+}
+
+#define PCI_SLOT_HP             0x06
+
+static void test_ivshmem_hotplug(void)
+{
+    gchar *opts;
+
+    qtest_start("");
+
+    opts = g_strdup_printf("'shm': '%s', 'size': '1M'", tmpshm);
+
+    qpci_plug_device_test("ivshmem", "iv1", PCI_SLOT_HP, opts);
+    qpci_unplug_acpi_device_test("iv1", PCI_SLOT_HP);
+
+    qtest_end();
+    g_free(opts);
+}
+
+static void test_ivshmem_memdev(void)
+{
+    IVState state;
+
+    /* just for the sake of checking memory-backend property */
+    setup_vm_cmd(&state, "-object memory-backend-ram,size=1M,id=mb1"
+                 " -device ivshmem,memdev=mb1", false);
+
+    qtest_quit(state.qtest);
+}
+
+static void cleanup(void)
+{
+    if (tmpshmem) {
+        munmap(tmpshmem, TMPSHMSIZE);
+        tmpshmem = NULL;
+    }
+
+    if (tmpshm) {
+        shm_unlink(tmpshm);
+        g_free(tmpshm);
+        tmpshm = NULL;
+    }
+
+    if (tmpserver) {
+        g_unlink(tmpserver);
+        g_free(tmpserver);
+        tmpserver = NULL;
+    }
+
+    if (tmpdir) {
+        g_rmdir(tmpdir);
+        tmpdir = NULL;
+    }
+}
+
+static void abrt_handler(void *data)
+{
+    cleanup();
+}
+
+static gchar *mktempshm(int size, int *fd)
+{
+    while (true) {
+        gchar *name;
+
+        name = g_strdup_printf("/qtest-%u-%u", getpid(), g_random_int());
+        *fd = shm_open(name, O_CREAT|O_RDWR|O_EXCL,
+                       S_IRWXU|S_IRWXG|S_IRWXO);
+        if (*fd > 0) {
+            g_assert(ftruncate(*fd, size) == 0);
+            return name;
+        }
+
+        g_free(name);
+
+        if (errno != EEXIST) {
+            perror("shm_open");
+            return NULL;
+        }
+    }
+}
+
+int main(int argc, char **argv)
+{
+    int ret, fd;
+    gchar dir[] = "/tmp/ivshmem-test.XXXXXX";
+
+#if !GLIB_CHECK_VERSION(2, 31, 0)
+    if (!g_thread_supported()) {
+        g_thread_init(NULL);
+    }
+#endif
+
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_abrt_handler(abrt_handler, NULL);
+    /* shm */
+    tmpshm = mktempshm(TMPSHMSIZE, &fd);
+    if (!tmpshm) {
+        return 0;
+    }
+    tmpshmem = mmap(0, TMPSHMSIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+    g_assert(tmpshmem != MAP_FAILED);
+    /* server */
+    if (mkdtemp(dir) == NULL) {
+        g_error("mkdtemp: %s", g_strerror(errno));
+    }
+    tmpdir = dir;
+    tmpserver = g_strconcat(tmpdir, "/server", NULL);
+
+    qtest_add_func("/ivshmem/single", test_ivshmem_single);
+    qtest_add_func("/ivshmem/pair", test_ivshmem_pair);
+    qtest_add_func("/ivshmem/server", test_ivshmem_server);
+    qtest_add_func("/ivshmem/hotplug", test_ivshmem_hotplug);
+    qtest_add_func("/ivshmem/memdev", test_ivshmem_memdev);
+
+    ret = g_test_run();
+
+    cleanup();
+
+    return ret;
+}
diff --git a/tests/libqtest.c b/tests/libqtest.c
index b6d700c606..f6f3d7a950 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -48,6 +48,7 @@ struct QTestState
     pid_t qemu_pid;  /* our child QEMU process */
 };
 
+static GHookList abrt_hooks;
 static GList *qtest_instances;
 static struct sigaction sigact_old;
 
@@ -111,10 +112,7 @@ static void kill_qemu(QTestState *s)
 
 static void sigabrt_handler(int signo)
 {
-    GList *elem;
-    for (elem = qtest_instances; elem; elem = elem->next) {
-        kill_qemu(elem->data);
-    }
+    g_hook_list_invoke(&abrt_hooks, FALSE);
 }
 
 static void setup_sigabrt_handler(void)
@@ -135,6 +133,23 @@ static void cleanup_sigabrt_handler(void)
     sigaction(SIGABRT, &sigact_old, NULL);
 }
 
+void qtest_add_abrt_handler(void (*fn), const void *data)
+{
+    GHook *hook;
+
+    /* Only install SIGABRT handler once */
+    if (!abrt_hooks.is_setup) {
+        g_hook_list_init(&abrt_hooks, sizeof(GHook));
+        setup_sigabrt_handler();
+    }
+
+    hook = g_hook_alloc(&abrt_hooks);
+    hook->func = fn;
+    hook->data = (void *)data;
+
+    g_hook_prepend(&abrt_hooks, hook);
+}
+
 QTestState *qtest_init(const char *extra_args)
 {
     QTestState *s;
@@ -155,12 +170,7 @@ QTestState *qtest_init(const char *extra_args)
     sock = init_socket(socket_path);
     qmpsock = init_socket(qmp_socket_path);
 
-    /* Only install SIGABRT handler once */
-    if (!qtest_instances) {
-        setup_sigabrt_handler();
-    }
-
-    qtest_instances = g_list_prepend(qtest_instances, s);
+    qtest_add_abrt_handler(kill_qemu, s);
 
     s->qemu_pid = fork();
     if (s->qemu_pid == 0) {
@@ -208,13 +218,14 @@ QTestState *qtest_init(const char *extra_args)
 
 void qtest_quit(QTestState *s)
 {
+    qtest_instances = g_list_remove(qtest_instances, s);
+    g_hook_destroy_link(&abrt_hooks, g_hook_find_data(&abrt_hooks, TRUE, s));
+
     /* Uninstall SIGABRT handler on last instance */
-    if (qtest_instances && !qtest_instances->next) {
+    if (!qtest_instances) {
         cleanup_sigabrt_handler();
     }
 
-    qtest_instances = g_list_remove(qtest_instances, s);
-
     kill_qemu(s);
     close(s->fd);
     close(s->qmp_fd);
diff --git a/tests/libqtest.h b/tests/libqtest.h
index 9818ef712d..df087452ee 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -450,6 +450,8 @@ void qtest_add_data_func(const char *str, const void *data, void (*fn));
         g_free(path); \
     } while (0)
 
+void qtest_add_abrt_handler(void (*fn), const void *data);
+
 /**
  * qtest_start:
  * @args: other arguments to pass to QEMU
diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071
index 9eaa49b419..92ab991456 100755
--- a/tests/qemu-iotests/071
+++ b/tests/qemu-iotests/071
@@ -104,11 +104,20 @@ echo
 echo "=== Testing blkdebug on existing block device ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "file",
+            "filename": "$TEST_IMG"
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "$IMGFMT",
             "id": "drive0-debug",
             "file": {
@@ -133,20 +142,29 @@ echo
 echo "=== Testing blkverify on existing block device ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG,format=$IMGFMT,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "$IMGFMT",
+            "file": {
+                "driver": "file",
+                "filename": "$TEST_IMG"
+            }
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "blkverify",
             "id": "drive0-verify",
             "test": "drive0",
             "raw": {
-                "driver": "raw",
-                "file": {
-                    "driver": "file",
-                    "filename": "$TEST_IMG.base"
-                }
+                "driver": "file",
+                "filename": "$TEST_IMG.base"
             }
         }
     }
@@ -163,11 +181,20 @@ echo
 echo "=== Testing blkverify on existing raw block device ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG.base,format=raw,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "file",
+            "filename": "$TEST_IMG.base"
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "blkverify",
             "id": "drive0-verify",
             "test": {
@@ -193,11 +220,20 @@ echo
 echo "=== Testing blkdebug's set-state through QMP ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "file",
+            "filename": "$TEST_IMG"
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "$IMGFMT",
             "id": "drive0-debug",
             "file": {
diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out
index 8d2095aa7b..2b40eadae3 100644
--- a/tests/qemu-iotests/071.out
+++ b/tests/qemu-iotests/071.out
@@ -42,10 +42,11 @@ read failed: Input/output error
 
 === Testing blkdebug on existing block device ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 read failed: Input/output error
 {"return": ""}
 {"return": {}}
@@ -56,28 +57,31 @@ QEMU_PROG: Failed to flush the refcount block cache: Input/output error
 
 === Testing blkverify on existing block device ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT,format=IMGFMT,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
 
 
 === Testing blkverify on existing raw block device ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT.base,format=raw,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
 
 
 === Testing blkdebug's set-state through QMP ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 read 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {"return": ""}
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index 51873ff7db..e4b4c6cbf3 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -102,17 +102,29 @@ $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
 echo
 echo "== checking mixed reference/option specification =="
 
-run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive2",
+            "driver": "$IMGFMT",
+            "file": {
+                "driver": "file",
+                "filename": "$TEST_DIR/2.raw"
+            }
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "quorum",
             "id": "drive0-quorum",
             "vote-threshold": 2,
             "children": [
                 {
-                    "driver": "raw",
+                    "driver": "$IMGFMT",
                     "file": {
                         "driver": "file",
                         "filename": "$TEST_DIR/1.raw"
@@ -120,7 +132,7 @@ run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
                 },
                 "drive2",
                 {
-                    "driver": "raw",
+                    "driver": "$IMGFMT",
                     "file": {
                         "driver": "file",
                         "filename": "$TEST_DIR/3.raw"
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 044793dcc5..70632314c8 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -26,11 +26,12 @@ read 10485760/10485760 bytes at offset 0
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 == checking mixed reference/option specification ==
-Testing: -drive file=TEST_DIR/2.IMGFMT,format=IMGFMT,if=none,id=drive2
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "NODE_NAME", "sectors-count": 20480, "sector-num": 0}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0}}
 read 10485760/10485760 bytes at offset 0
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {"return": ""}
diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index 8694749947..af44299e07 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087
@@ -54,7 +54,7 @@ size=128M
 _make_test_img $size
 
 echo
-echo === Missing ID ===
+echo === Missing ID and node-name ===
 echo
 
 run_qemu <<EOF
diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index c509a408e2..7d62cd5840 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out
@@ -1,12 +1,12 @@
 QA output created by 087
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 
-=== Missing ID ===
+=== Missing ID and node-name ===
 
 Testing:
 QMP_VERSION
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Block device needs an ID"}}
+{"error": {"class": "GenericError", "desc": "'id' and/or 'node-name' need to be specified for the root node"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 
diff --git a/tests/test-aio.c b/tests/test-aio.c
index 217e33772e..1623803e8c 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -118,6 +118,12 @@ static void *test_acquire_thread(void *opaque)
     return NULL;
 }
 
+static void set_event_notifier(AioContext *ctx, EventNotifier *notifier,
+                               EventNotifierHandler *handler)
+{
+    aio_set_event_notifier(ctx, notifier, false, handler);
+}
+
 static void dummy_notifier_read(EventNotifier *unused)
 {
     g_assert(false); /* should never be invoked */
@@ -131,7 +137,7 @@ static void test_acquire(void)
 
     /* Dummy event notifier ensures aio_poll() will block */
     event_notifier_init(&notifier, false);
-    aio_set_event_notifier(ctx, &notifier, dummy_notifier_read);
+    set_event_notifier(ctx, &notifier, dummy_notifier_read);
     g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */
 
     qemu_mutex_init(&data.start_lock);
@@ -149,7 +155,7 @@ static void test_acquire(void)
     aio_context_release(ctx);
 
     qemu_thread_join(&thread);
-    aio_set_event_notifier(ctx, &notifier, NULL);
+    set_event_notifier(ctx, &notifier, NULL);
     event_notifier_cleanup(&notifier);
 
     g_assert(data.thread_acquired);
@@ -308,11 +314,11 @@ static void test_set_event_notifier(void)
 {
     EventNotifierTestData data = { .n = 0, .active = 0 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
     event_notifier_cleanup(&data.e);
@@ -322,7 +328,7 @@ static void test_wait_event_notifier(void)
 {
     EventNotifierTestData data = { .n = 0, .active = 1 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 1);
@@ -336,7 +342,7 @@ static void test_wait_event_notifier(void)
     g_assert_cmpint(data.n, ==, 1);
     g_assert_cmpint(data.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 1);
 
@@ -347,7 +353,7 @@ static void test_flush_event_notifier(void)
 {
     EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 10);
@@ -363,18 +369,41 @@ static void test_flush_event_notifier(void)
     g_assert_cmpint(data.active, ==, 0);
     g_assert(!aio_poll(ctx, false));
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     event_notifier_cleanup(&data.e);
 }
 
+static void test_aio_external_client(void)
+{
+    int i, j;
+
+    for (i = 1; i < 3; i++) {
+        EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
+        event_notifier_init(&data.e, false);
+        aio_set_event_notifier(ctx, &data.e, true, event_ready_cb);
+        event_notifier_set(&data.e);
+        for (j = 0; j < i; j++) {
+            aio_disable_external(ctx);
+        }
+        for (j = 0; j < i; j++) {
+            assert(!aio_poll(ctx, false));
+            assert(event_notifier_test_and_clear(&data.e));
+            event_notifier_set(&data.e);
+            aio_enable_external(ctx);
+        }
+        assert(aio_poll(ctx, false));
+        event_notifier_cleanup(&data.e);
+    }
+}
+
 static void test_wait_event_notifier_noflush(void)
 {
     EventNotifierTestData data = { .n = 0 };
     EventNotifierTestData dummy = { .n = 0, .active = 1 };
 
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
 
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
@@ -387,7 +416,7 @@ static void test_wait_event_notifier_noflush(void)
 
     /* An active event notifier forces aio_poll to look at EventNotifiers.  */
     event_notifier_init(&dummy.e, false);
-    aio_set_event_notifier(ctx, &dummy.e, event_ready_cb);
+    set_event_notifier(ctx, &dummy.e, event_ready_cb);
 
     event_notifier_set(&data.e);
     g_assert(aio_poll(ctx, false));
@@ -407,10 +436,10 @@ static void test_wait_event_notifier_noflush(void)
     g_assert_cmpint(dummy.n, ==, 1);
     g_assert_cmpint(dummy.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &dummy.e, NULL);
+    set_event_notifier(ctx, &dummy.e, NULL);
     event_notifier_cleanup(&dummy.e);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 2);
 
@@ -428,7 +457,7 @@ static void test_timer_schedule(void)
      * an fd to wait on. Fixing this breaks other tests. So create a dummy one.
      */
     event_notifier_init(&e, false);
-    aio_set_event_notifier(ctx, &e, dummy_io_handler_read);
+    set_event_notifier(ctx, &e, dummy_io_handler_read);
     aio_poll(ctx, false);
 
     aio_timer_init(ctx, &data.timer, data.clock_type,
@@ -467,7 +496,7 @@ static void test_timer_schedule(void)
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 2);
 
-    aio_set_event_notifier(ctx, &e, NULL);
+    set_event_notifier(ctx, &e, NULL);
     event_notifier_cleanup(&e);
 
     timer_del(&data.timer);
@@ -638,11 +667,11 @@ static void test_source_set_event_notifier(void)
 {
     EventNotifierTestData data = { .n = 0, .active = 0 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
     event_notifier_cleanup(&data.e);
@@ -652,7 +681,7 @@ static void test_source_wait_event_notifier(void)
 {
     EventNotifierTestData data = { .n = 0, .active = 1 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 1);
@@ -666,7 +695,7 @@ static void test_source_wait_event_notifier(void)
     g_assert_cmpint(data.n, ==, 1);
     g_assert_cmpint(data.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 1);
 
@@ -677,7 +706,7 @@ static void test_source_flush_event_notifier(void)
 {
     EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 10);
@@ -693,7 +722,7 @@ static void test_source_flush_event_notifier(void)
     g_assert_cmpint(data.active, ==, 0);
     g_assert(!g_main_context_iteration(NULL, false));
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     event_notifier_cleanup(&data.e);
 }
@@ -704,7 +733,7 @@ static void test_source_wait_event_notifier_noflush(void)
     EventNotifierTestData dummy = { .n = 0, .active = 1 };
 
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
 
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
@@ -717,7 +746,7 @@ static void test_source_wait_event_notifier_noflush(void)
 
     /* An active event notifier forces aio_poll to look at EventNotifiers.  */
     event_notifier_init(&dummy.e, false);
-    aio_set_event_notifier(ctx, &dummy.e, event_ready_cb);
+    set_event_notifier(ctx, &dummy.e, event_ready_cb);
 
     event_notifier_set(&data.e);
     g_assert(g_main_context_iteration(NULL, false));
@@ -737,10 +766,10 @@ static void test_source_wait_event_notifier_noflush(void)
     g_assert_cmpint(dummy.n, ==, 1);
     g_assert_cmpint(dummy.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &dummy.e, NULL);
+    set_event_notifier(ctx, &dummy.e, NULL);
     event_notifier_cleanup(&dummy.e);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 2);
 
@@ -759,7 +788,7 @@ static void test_source_timer_schedule(void)
      * an fd to wait on. Fixing this breaks other tests. So create a dummy one.
      */
     event_notifier_init(&e, false);
-    aio_set_event_notifier(ctx, &e, dummy_io_handler_read);
+    set_event_notifier(ctx, &e, dummy_io_handler_read);
     do {} while (g_main_context_iteration(NULL, false));
 
     aio_timer_init(ctx, &data.timer, data.clock_type,
@@ -784,7 +813,7 @@ static void test_source_timer_schedule(void)
     g_assert_cmpint(data.n, ==, 2);
     g_assert(qemu_clock_get_ns(data.clock_type) > expiry);
 
-    aio_set_event_notifier(ctx, &e, NULL);
+    set_event_notifier(ctx, &e, NULL);
     event_notifier_cleanup(&e);
 
     timer_del(&data.timer);
@@ -826,6 +855,7 @@ int main(int argc, char **argv)
     g_test_add_func("/aio/event/wait",              test_wait_event_notifier);
     g_test_add_func("/aio/event/wait/no-flush-cb",  test_wait_event_notifier_noflush);
     g_test_add_func("/aio/event/flush",             test_flush_event_notifier);
+    g_test_add_func("/aio/external-client",         test_aio_external_client);
     g_test_add_func("/aio/timer/schedule",          test_timer_schedule);
 
     g_test_add_func("/aio-gsource/flush",                   test_source_flush);
diff --git a/tests/test-crypto-cipher.c b/tests/test-crypto-cipher.c
index 9d38d2640a..f4946a0af0 100644
--- a/tests/test-crypto-cipher.c
+++ b/tests/test-crypto-cipher.c
@@ -287,6 +287,79 @@ static void test_cipher(const void *opaque)
     qcrypto_cipher_free(cipher);
 }
 
+
+static void test_cipher_null_iv(void)
+{
+    QCryptoCipher *cipher;
+    uint8_t key[32] = { 0 };
+    uint8_t plaintext[32] = { 0 };
+    uint8_t ciphertext[32] = { 0 };
+
+    cipher = qcrypto_cipher_new(
+        QCRYPTO_CIPHER_ALG_AES_256,
+        QCRYPTO_CIPHER_MODE_CBC,
+        key, sizeof(key),
+        &error_abort);
+    g_assert(cipher != NULL);
+
+    /* Don't call qcrypto_cipher_setiv */
+
+    qcrypto_cipher_encrypt(cipher,
+                           plaintext,
+                           ciphertext,
+                           sizeof(plaintext),
+                           &error_abort);
+
+    qcrypto_cipher_free(cipher);
+}
+
+static void test_cipher_short_plaintext(void)
+{
+    Error *err = NULL;
+    QCryptoCipher *cipher;
+    uint8_t key[32] = { 0 };
+    uint8_t plaintext1[20] = { 0 };
+    uint8_t ciphertext1[20] = { 0 };
+    uint8_t plaintext2[40] = { 0 };
+    uint8_t ciphertext2[40] = { 0 };
+    int ret;
+
+    cipher = qcrypto_cipher_new(
+        QCRYPTO_CIPHER_ALG_AES_256,
+        QCRYPTO_CIPHER_MODE_CBC,
+        key, sizeof(key),
+        &error_abort);
+    g_assert(cipher != NULL);
+
+    /* Should report an error as plaintext is shorter
+     * than block size
+     */
+    ret = qcrypto_cipher_encrypt(cipher,
+                                 plaintext1,
+                                 ciphertext1,
+                                 sizeof(plaintext1),
+                                 &err);
+    g_assert(ret == -1);
+    g_assert(err != NULL);
+
+    error_free(err);
+    err = NULL;
+
+    /* Should report an error as plaintext is larger than
+     * block size, but not a multiple of block size
+     */
+    ret = qcrypto_cipher_encrypt(cipher,
+                                 plaintext2,
+                                 ciphertext2,
+                                 sizeof(plaintext2),
+                                 &err);
+    g_assert(ret == -1);
+    g_assert(err != NULL);
+
+    error_free(err);
+    qcrypto_cipher_free(cipher);
+}
+
 int main(int argc, char **argv)
 {
     size_t i;
@@ -298,5 +371,12 @@ int main(int argc, char **argv)
     for (i = 0; i < G_N_ELEMENTS(test_data); i++) {
         g_test_add_data_func(test_data[i].path, &test_data[i], test_cipher);
     }
+
+    g_test_add_func("/crypto/cipher/null-iv",
+                    test_cipher_null_iv);
+
+    g_test_add_func("/crypto/cipher/short-plaintext",
+                    test_cipher_short_plaintext);
+
     return g_test_run();
 }
diff --git a/tests/test-qga.c b/tests/test-qga.c
index 0531c9f3bc..64738465c5 100644
--- a/tests/test-qga.c
+++ b/tests/test-qga.c
@@ -273,13 +273,15 @@ static void test_qga_get_fsinfo(gconstpointer fix)
     g_assert_nonnull(ret);
     qmp_assert_no_error(ret);
 
-    /* check there is at least a fs */
+    /* sanity-check the response if there are any filesystems */
     list = qdict_get_qlist(ret, "return");
     entry = qlist_first(list);
-    g_assert(qdict_haskey(qobject_to_qdict(entry->value), "name"));
-    g_assert(qdict_haskey(qobject_to_qdict(entry->value), "mountpoint"));
-    g_assert(qdict_haskey(qobject_to_qdict(entry->value), "type"));
-    g_assert(qdict_haskey(qobject_to_qdict(entry->value), "disk"));
+    if (entry) {
+        g_assert(qdict_haskey(qobject_to_qdict(entry->value), "name"));
+        g_assert(qdict_haskey(qobject_to_qdict(entry->value), "mountpoint"));
+        g_assert(qdict_haskey(qobject_to_qdict(entry->value), "type"));
+        g_assert(qdict_haskey(qobject_to_qdict(entry->value), "disk"));
+    }
 
     QDECREF(ret);
 }
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
new file mode 100644
index 0000000000..fa18ad55fb
--- /dev/null
+++ b/tests/vhost-user-bridge.c
@@ -0,0 +1,1110 @@
+/*
+ * Vhost User Bridge
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Victor Kaplansky <victork@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+/*
+ * TODO:
+ *     - main should get parameters from the command line.
+ *     - implement all request handlers.
+ *     - test for broken requests and virtqueue.
+ *     - implement features defined by Virtio 1.0 spec.
+ *     - support mergeable buffers and indirect descriptors.
+ *     - implement RESET_DEVICE request.
+ *     - implement clean shutdown.
+ *     - implement non-blocking writes to UDP backend.
+ *     - implement polling strategy.
+ */
+
+#include <stddef.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/unistd.h>
+#include <sys/mman.h>
+#include <sys/eventfd.h>
+#include <arpa/inet.h>
+
+#include <linux/vhost.h>
+
+#include "qemu/atomic.h"
+#include "standard-headers/linux/virtio_net.h"
+#include "standard-headers/linux/virtio_ring.h"
+
+#define VHOST_USER_BRIDGE_DEBUG 1
+
+#define DPRINT(...) \
+    do { \
+        if (VHOST_USER_BRIDGE_DEBUG) { \
+            printf(__VA_ARGS__); \
+        } \
+    } while (0)
+
+typedef void (*CallbackFunc)(int sock, void *ctx);
+
+typedef struct Event {
+    void *ctx;
+    CallbackFunc callback;
+} Event;
+
+typedef struct Dispatcher {
+    int max_sock;
+    fd_set fdset;
+    Event events[FD_SETSIZE];
+} Dispatcher;
+
+static void
+vubr_die(const char *s)
+{
+    perror(s);
+    exit(1);
+}
+
+static int
+dispatcher_init(Dispatcher *dispr)
+{
+    FD_ZERO(&dispr->fdset);
+    dispr->max_sock = -1;
+    return 0;
+}
+
+static int
+dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
+{
+    if (sock >= FD_SETSIZE) {
+        fprintf(stderr,
+                "Error: Failed to add new event. sock %d should be less than %d\n",
+                sock, FD_SETSIZE);
+        return -1;
+    }
+
+    dispr->events[sock].ctx = ctx;
+    dispr->events[sock].callback = cb;
+
+    FD_SET(sock, &dispr->fdset);
+    if (sock > dispr->max_sock) {
+        dispr->max_sock = sock;
+    }
+    DPRINT("Added sock %d for watching. max_sock: %d\n",
+           sock, dispr->max_sock);
+    return 0;
+}
+
+#if 0
+/* dispatcher_remove() is not currently in use but may be useful
+ * in the future. */
+static int
+dispatcher_remove(Dispatcher *dispr, int sock)
+{
+    if (sock >= FD_SETSIZE) {
+        fprintf(stderr,
+                "Error: Failed to remove event. sock %d should be less than %d\n",
+                sock, FD_SETSIZE);
+        return -1;
+    }
+
+    FD_CLR(sock, &dispr->fdset);
+    return 0;
+}
+#endif
+
+/* timeout in us */
+static int
+dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
+{
+    struct timeval tv;
+    tv.tv_sec = timeout / 1000000;
+    tv.tv_usec = timeout % 1000000;
+
+    fd_set fdset = dispr->fdset;
+
+    /* wait until some of sockets become readable. */
+    int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
+
+    if (rc == -1) {
+        vubr_die("select");
+    }
+
+    /* Timeout */
+    if (rc == 0) {
+        return 0;
+    }
+
+    /* Now call callback for every ready socket. */
+
+    int sock;
+    for (sock = 0; sock < dispr->max_sock + 1; sock++)
+        if (FD_ISSET(sock, &fdset)) {
+            Event *e = &dispr->events[sock];
+            e->callback(sock, e->ctx);
+        }
+
+    return 0;
+}
+
+typedef struct VubrVirtq {
+    int call_fd;
+    int kick_fd;
+    uint32_t size;
+    uint16_t last_avail_index;
+    uint16_t last_used_index;
+    struct vring_desc *desc;
+    struct vring_avail *avail;
+    struct vring_used *used;
+} VubrVirtq;
+
+/* Based on qemu/hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS    8
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+
+enum VhostUserProtocolFeature {
+    VHOST_USER_PROTOCOL_F_MQ = 0,
+    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+    VHOST_USER_PROTOCOL_F_RARP = 2,
+
+    VHOST_USER_PROTOCOL_F_MAX
+};
+
+#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
+
+typedef enum VhostUserRequest {
+    VHOST_USER_NONE = 0,
+    VHOST_USER_GET_FEATURES = 1,
+    VHOST_USER_SET_FEATURES = 2,
+    VHOST_USER_SET_OWNER = 3,
+    VHOST_USER_RESET_DEVICE = 4,
+    VHOST_USER_SET_MEM_TABLE = 5,
+    VHOST_USER_SET_LOG_BASE = 6,
+    VHOST_USER_SET_LOG_FD = 7,
+    VHOST_USER_SET_VRING_NUM = 8,
+    VHOST_USER_SET_VRING_ADDR = 9,
+    VHOST_USER_SET_VRING_BASE = 10,
+    VHOST_USER_GET_VRING_BASE = 11,
+    VHOST_USER_SET_VRING_KICK = 12,
+    VHOST_USER_SET_VRING_CALL = 13,
+    VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+    VHOST_USER_GET_QUEUE_NUM = 17,
+    VHOST_USER_SET_VRING_ENABLE = 18,
+    VHOST_USER_SEND_RARP = 19,
+    VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+    uint64_t guest_phys_addr;
+    uint64_t memory_size;
+    uint64_t userspace_addr;
+    uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+    uint32_t nregions;
+    uint32_t padding;
+    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+    VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     (0x3)
+#define VHOST_USER_REPLY_MASK       (0x1<<2)
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+    union {
+#define VHOST_USER_VRING_IDX_MASK   (0xff)
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+        uint64_t u64;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        VhostUserMemory memory;
+    } payload;
+    int fds[VHOST_MEMORY_MAX_NREGIONS];
+    int fd_num;
+} QEMU_PACKED VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    (0x1)
+
+#define MAX_NR_VIRTQUEUE (8)
+
+typedef struct VubrDevRegion {
+    /* Guest Physical address. */
+    uint64_t gpa;
+    /* Memory region size. */
+    uint64_t size;
+    /* QEMU virtual address (userspace). */
+    uint64_t qva;
+    /* Starting offset in our mmaped space. */
+    uint64_t mmap_offset;
+    /* Start address of mmaped space. */
+    uint64_t mmap_addr;
+} VubrDevRegion;
+
+typedef struct VubrDev {
+    int sock;
+    Dispatcher dispatcher;
+    uint32_t nregions;
+    VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+    VubrVirtq vq[MAX_NR_VIRTQUEUE];
+    int backend_udp_sock;
+    struct sockaddr_in backend_udp_dest;
+} VubrDev;
+
+static const char *vubr_request_str[] = {
+    [VHOST_USER_NONE]                   =  "VHOST_USER_NONE",
+    [VHOST_USER_GET_FEATURES]           =  "VHOST_USER_GET_FEATURES",
+    [VHOST_USER_SET_FEATURES]           =  "VHOST_USER_SET_FEATURES",
+    [VHOST_USER_SET_OWNER]              =  "VHOST_USER_SET_OWNER",
+    [VHOST_USER_RESET_DEVICE]           =  "VHOST_USER_RESET_DEVICE",
+    [VHOST_USER_SET_MEM_TABLE]          =  "VHOST_USER_SET_MEM_TABLE",
+    [VHOST_USER_SET_LOG_BASE]           =  "VHOST_USER_SET_LOG_BASE",
+    [VHOST_USER_SET_LOG_FD]             =  "VHOST_USER_SET_LOG_FD",
+    [VHOST_USER_SET_VRING_NUM]          =  "VHOST_USER_SET_VRING_NUM",
+    [VHOST_USER_SET_VRING_ADDR]         =  "VHOST_USER_SET_VRING_ADDR",
+    [VHOST_USER_SET_VRING_BASE]         =  "VHOST_USER_SET_VRING_BASE",
+    [VHOST_USER_GET_VRING_BASE]         =  "VHOST_USER_GET_VRING_BASE",
+    [VHOST_USER_SET_VRING_KICK]         =  "VHOST_USER_SET_VRING_KICK",
+    [VHOST_USER_SET_VRING_CALL]         =  "VHOST_USER_SET_VRING_CALL",
+    [VHOST_USER_SET_VRING_ERR]          =  "VHOST_USER_SET_VRING_ERR",
+    [VHOST_USER_GET_PROTOCOL_FEATURES]  =  "VHOST_USER_GET_PROTOCOL_FEATURES",
+    [VHOST_USER_SET_PROTOCOL_FEATURES]  =  "VHOST_USER_SET_PROTOCOL_FEATURES",
+    [VHOST_USER_GET_QUEUE_NUM]          =  "VHOST_USER_GET_QUEUE_NUM",
+    [VHOST_USER_SET_VRING_ENABLE]       =  "VHOST_USER_SET_VRING_ENABLE",
+    [VHOST_USER_SEND_RARP]              =  "VHOST_USER_SEND_RARP",
+    [VHOST_USER_MAX]                    =  "VHOST_USER_MAX",
+};
+
+static void
+print_buffer(uint8_t *buf, size_t len)
+{
+    int i;
+    printf("Raw buffer:\n");
+    for (i = 0; i < len; i++) {
+        if (i % 16 == 0) {
+            printf("\n");
+        }
+        if (i % 4 == 0) {
+            printf("   ");
+        }
+        printf("%02x ", buf[i]);
+    }
+    printf("\n............................................................\n");
+}
+
+/* Translate guest physical address to our virtual address.  */
+static uint64_t
+gpa_to_va(VubrDev *dev, uint64_t guest_addr)
+{
+    int i;
+
+    /* Find matching memory region.  */
+    for (i = 0; i < dev->nregions; i++) {
+        VubrDevRegion *r = &dev->regions[i];
+
+        if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
+            return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
+        }
+    }
+
+    assert(!"address not found in regions");
+    return 0;
+}
+
+/* Translate qemu virtual address to our virtual address.  */
+static uint64_t
+qva_to_va(VubrDev *dev, uint64_t qemu_addr)
+{
+    int i;
+
+    /* Find matching memory region.  */
+    for (i = 0; i < dev->nregions; i++) {
+        VubrDevRegion *r = &dev->regions[i];
+
+        if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
+            return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+        }
+    }
+
+    assert(!"address not found in regions");
+    return 0;
+}
+
+static void
+vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
+{
+    char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
+    struct iovec iov = {
+        .iov_base = (char *)vmsg,
+        .iov_len = VHOST_USER_HDR_SIZE,
+    };
+    struct msghdr msg = {
+        .msg_iov = &iov,
+        .msg_iovlen = 1,
+        .msg_control = control,
+        .msg_controllen = sizeof(control),
+    };
+    size_t fd_size;
+    struct cmsghdr *cmsg;
+    int rc;
+
+    rc = recvmsg(conn_fd, &msg, 0);
+
+    if (rc <= 0) {
+        vubr_die("recvmsg");
+    }
+
+    vmsg->fd_num = 0;
+    for (cmsg = CMSG_FIRSTHDR(&msg);
+         cmsg != NULL;
+         cmsg = CMSG_NXTHDR(&msg, cmsg))
+    {
+        if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+            fd_size = cmsg->cmsg_len - CMSG_LEN(0);
+            vmsg->fd_num = fd_size / sizeof(int);
+            memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+            break;
+        }
+    }
+
+    if (vmsg->size > sizeof(vmsg->payload)) {
+        fprintf(stderr,
+                "Error: too big message request: %d, size: vmsg->size: %u, "
+                "while sizeof(vmsg->payload) = %lu\n",
+                vmsg->request, vmsg->size, sizeof(vmsg->payload));
+        exit(1);
+    }
+
+    if (vmsg->size) {
+        rc = read(conn_fd, &vmsg->payload, vmsg->size);
+        if (rc <= 0) {
+            vubr_die("recvmsg");
+        }
+
+        assert(rc == vmsg->size);
+    }
+}
+
+static void
+vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
+{
+    int rc;
+
+    do {
+        rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
+    } while (rc < 0 && errno == EINTR);
+
+    if (rc < 0) {
+        vubr_die("write");
+    }
+}
+
+static void
+vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
+{
+    int slen = sizeof(struct sockaddr_in);
+
+    if (sendto(dev->backend_udp_sock, buf, len, 0,
+               (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
+        vubr_die("sendto()");
+    }
+}
+
+static int
+vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
+{
+    int slen = sizeof(struct sockaddr_in);
+    int rc;
+
+    rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
+                  (struct sockaddr *) &dev->backend_udp_dest,
+                  (socklen_t *)&slen);
+    if (rc == -1) {
+        vubr_die("recvfrom()");
+    }
+
+    return rc;
+}
+
+static void
+vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
+{
+    int hdrlen = sizeof(struct virtio_net_hdr_v1);
+
+    if (VHOST_USER_BRIDGE_DEBUG) {
+        print_buffer(buf, len);
+    }
+    vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
+}
+
+/* Kick the guest if necessary. */
+static void
+vubr_virtqueue_kick(VubrVirtq *vq)
+{
+    if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
+        DPRINT("Kicking the guest...\n");
+        eventfd_write(vq->call_fd, 1);
+    }
+}
+
+static void
+vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
+{
+    struct vring_desc *desc   = vq->desc;
+    struct vring_avail *avail = vq->avail;
+    struct vring_used *used   = vq->used;
+
+    unsigned int size = vq->size;
+
+    uint16_t avail_index = atomic_mb_read(&avail->idx);
+
+    /* We check the available descriptors before posting the
+     * buffer, so here we assume that enough available
+     * descriptors. */
+    assert(vq->last_avail_index != avail_index);
+    uint16_t a_index = vq->last_avail_index % size;
+    uint16_t u_index = vq->last_used_index % size;
+    uint16_t d_index = avail->ring[a_index];
+
+    int i = d_index;
+
+    DPRINT("Post packet to guest on vq:\n");
+    DPRINT("    size             = %d\n", vq->size);
+    DPRINT("    last_avail_index = %d\n", vq->last_avail_index);
+    DPRINT("    last_used_index  = %d\n", vq->last_used_index);
+    DPRINT("    a_index = %d\n", a_index);
+    DPRINT("    u_index = %d\n", u_index);
+    DPRINT("    d_index = %d\n", d_index);
+    DPRINT("    desc[%d].addr    = 0x%016"PRIx64"\n", i, desc[i].addr);
+    DPRINT("    desc[%d].len     = %d\n", i, desc[i].len);
+    DPRINT("    desc[%d].flags   = %d\n", i, desc[i].flags);
+    DPRINT("    avail->idx = %d\n", avail_index);
+    DPRINT("    used->idx  = %d\n", used->idx);
+
+    if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
+        /* FIXME: we should find writable descriptor. */
+        fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
+        exit(1);
+    }
+
+    void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
+    uint32_t chunk_len = desc[i].len;
+
+    if (len <= chunk_len) {
+        memcpy(chunk_start, buf, len);
+    } else {
+        fprintf(stderr,
+                "Received too long packet from the backend. Dropping...\n");
+        return;
+    }
+
+    /* Add descriptor to the used ring. */
+    used->ring[u_index].id = d_index;
+    used->ring[u_index].len = len;
+
+    vq->last_avail_index++;
+    vq->last_used_index++;
+
+    atomic_mb_set(&used->idx, vq->last_used_index);
+
+    /* Kick the guest if necessary. */
+    vubr_virtqueue_kick(vq);
+}
+
+static int
+vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
+{
+    struct vring_desc *desc   = vq->desc;
+    struct vring_avail *avail = vq->avail;
+    struct vring_used *used   = vq->used;
+
+    unsigned int size = vq->size;
+
+    uint16_t a_index = vq->last_avail_index % size;
+    uint16_t u_index = vq->last_used_index % size;
+    uint16_t d_index = avail->ring[a_index];
+
+    uint32_t i, len = 0;
+    size_t buf_size = 4096;
+    uint8_t buf[4096];
+
+    DPRINT("Chunks: ");
+    i = d_index;
+    do {
+        void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
+        uint32_t chunk_len = desc[i].len;
+
+        if (len + chunk_len < buf_size) {
+            memcpy(buf + len, chunk_start, chunk_len);
+            DPRINT("%d ", chunk_len);
+        } else {
+            fprintf(stderr, "Error: too long packet. Dropping...\n");
+            break;
+        }
+
+        len += chunk_len;
+
+        if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
+            break;
+        }
+
+        i = desc[i].next;
+    } while (1);
+    DPRINT("\n");
+
+    if (!len) {
+        return -1;
+    }
+
+    /* Add descriptor to the used ring. */
+    used->ring[u_index].id = d_index;
+    used->ring[u_index].len = len;
+
+    vubr_consume_raw_packet(dev, buf, len);
+
+    return 0;
+}
+
+static void
+vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
+{
+    struct vring_avail *avail = vq->avail;
+    struct vring_used *used = vq->used;
+
+    while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
+        vubr_process_desc(dev, vq);
+        vq->last_avail_index++;
+        vq->last_used_index++;
+    }
+
+    atomic_mb_set(&used->idx, vq->last_used_index);
+}
+
+static void
+vubr_backend_recv_cb(int sock, void *ctx)
+{
+    VubrDev *dev = (VubrDev *) ctx;
+    VubrVirtq *rx_vq = &dev->vq[0];
+    uint8_t buf[4096];
+    struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
+    int hdrlen = sizeof(struct virtio_net_hdr_v1);
+    int buflen = sizeof(buf);
+    int len;
+
+    DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
+
+    uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
+
+    /* If there is no available descriptors, just do nothing.
+     * The buffer will be handled by next arrived UDP packet,
+     * or next kick on receive virtq. */
+    if (rx_vq->last_avail_index == avail_index) {
+        DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
+        return;
+    }
+
+    len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
+
+    *hdr = (struct virtio_net_hdr_v1) { };
+    hdr->num_buffers = 1;
+    vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
+}
+
+static void
+vubr_kick_cb(int sock, void *ctx)
+{
+    VubrDev *dev = (VubrDev *) ctx;
+    eventfd_t kick_data;
+    ssize_t rc;
+
+    rc = eventfd_read(sock, &kick_data);
+    if (rc == -1) {
+        vubr_die("eventfd_read()");
+    } else {
+        DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
+        vubr_process_avail(dev, &dev->vq[1]);
+    }
+}
+
+static int
+vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    vmsg->payload.u64 =
+            ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
+             (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+             (1ULL << VIRTIO_NET_F_CTRL_RX) |
+             (1ULL << VHOST_F_LOG_ALL));
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    /* reply */
+    return 1;
+}
+
+static int
+vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    return 0;
+}
+
+static int
+vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    return 0;
+}
+
+static int
+vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    int i;
+    VhostUserMemory *memory = &vmsg->payload.memory;
+    dev->nregions = memory->nregions;
+
+    DPRINT("Nregions: %d\n", memory->nregions);
+    for (i = 0; i < dev->nregions; i++) {
+        void *mmap_addr;
+        VhostUserMemoryRegion *msg_region = &memory->regions[i];
+        VubrDevRegion *dev_region = &dev->regions[i];
+
+        DPRINT("Region %d\n", i);
+        DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
+               msg_region->guest_phys_addr);
+        DPRINT("    memory_size:     0x%016"PRIx64"\n",
+               msg_region->memory_size);
+        DPRINT("    userspace_addr   0x%016"PRIx64"\n",
+               msg_region->userspace_addr);
+        DPRINT("    mmap_offset      0x%016"PRIx64"\n",
+               msg_region->mmap_offset);
+
+        dev_region->gpa         = msg_region->guest_phys_addr;
+        dev_region->size        = msg_region->memory_size;
+        dev_region->qva         = msg_region->userspace_addr;
+        dev_region->mmap_offset = msg_region->mmap_offset;
+
+        /* We don't use offset argument of mmap() since the
+         * mapped address has to be page aligned, and we use huge
+         * pages.  */
+        mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
+                         PROT_READ | PROT_WRITE, MAP_SHARED,
+                         vmsg->fds[i], 0);
+
+        if (mmap_addr == MAP_FAILED) {
+            vubr_die("mmap");
+        }
+
+        dev_region->mmap_addr = (uint64_t) mmap_addr;
+        DPRINT("    mmap_addr:       0x%016"PRIx64"\n", dev_region->mmap_addr);
+    }
+
+    return 0;
+}
+
+static int
+vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+    unsigned int num = vmsg->payload.state.num;
+
+    DPRINT("State.index: %d\n", index);
+    DPRINT("State.num:   %d\n", num);
+    dev->vq[index].size = num;
+    return 0;
+}
+
+static int
+vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    struct vhost_vring_addr *vra = &vmsg->payload.addr;
+    unsigned int index = vra->index;
+    VubrVirtq *vq = &dev->vq[index];
+
+    DPRINT("vhost_vring_addr:\n");
+    DPRINT("    index:  %d\n", vra->index);
+    DPRINT("    flags:  %d\n", vra->flags);
+    DPRINT("    desc_user_addr:   0x%016llx\n", vra->desc_user_addr);
+    DPRINT("    used_user_addr:   0x%016llx\n", vra->used_user_addr);
+    DPRINT("    avail_user_addr:  0x%016llx\n", vra->avail_user_addr);
+    DPRINT("    log_guest_addr:   0x%016llx\n", vra->log_guest_addr);
+
+    vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
+    vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
+    vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
+
+    DPRINT("Setting virtq addresses:\n");
+    DPRINT("    vring_desc  at %p\n", vq->desc);
+    DPRINT("    vring_used  at %p\n", vq->used);
+    DPRINT("    vring_avail at %p\n", vq->avail);
+
+    vq->last_used_index = vq->used->idx;
+    return 0;
+}
+
+static int
+vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+    unsigned int num = vmsg->payload.state.num;
+
+    DPRINT("State.index: %d\n", index);
+    DPRINT("State.num:   %d\n", num);
+    dev->vq[index].last_avail_index = num;
+
+    return 0;
+}
+
+static int
+vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    uint64_t u64_arg = vmsg->payload.u64;
+    int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
+    assert(vmsg->fd_num == 1);
+
+    dev->vq[index].kick_fd = vmsg->fds[0];
+    DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
+
+    if (index % 2 == 1) {
+        /* TX queue. */
+        dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
+                       dev, vubr_kick_cb);
+
+        DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
+               dev->vq[index].kick_fd, index);
+    }
+    return 0;
+}
+
+static int
+vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    uint64_t u64_arg = vmsg->payload.u64;
+    int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
+    assert(vmsg->fd_num == 1);
+
+    dev->vq[index].call_fd = vmsg->fds[0];
+    DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
+
+    return 0;
+}
+
+static int
+vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    return 0;
+}
+
+static int
+vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    /* FIXME: unimplented */
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    return 0;
+}
+
+static int
+vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    /* FIXME: unimplented */
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    return 0;
+}
+
+static int
+vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    /* Print out generic part of the request. */
+    DPRINT(
+           "==================   Vhost user message from QEMU   ==================\n");
+    DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
+           vmsg->request);
+    DPRINT("Flags:   0x%x\n", vmsg->flags);
+    DPRINT("Size:    %d\n", vmsg->size);
+
+    if (vmsg->fd_num) {
+        int i;
+        DPRINT("Fds:");
+        for (i = 0; i < vmsg->fd_num; i++) {
+            DPRINT(" %d", vmsg->fds[i]);
+        }
+        DPRINT("\n");
+    }
+
+    switch (vmsg->request) {
+    case VHOST_USER_NONE:
+        return vubr_none_exec(dev, vmsg);
+    case VHOST_USER_GET_FEATURES:
+        return vubr_get_features_exec(dev, vmsg);
+    case VHOST_USER_SET_FEATURES:
+        return vubr_set_features_exec(dev, vmsg);
+    case VHOST_USER_SET_OWNER:
+        return vubr_set_owner_exec(dev, vmsg);
+    case VHOST_USER_RESET_DEVICE:
+        return vubr_reset_device_exec(dev, vmsg);
+    case VHOST_USER_SET_MEM_TABLE:
+        return vubr_set_mem_table_exec(dev, vmsg);
+    case VHOST_USER_SET_LOG_BASE:
+        return vubr_set_log_base_exec(dev, vmsg);
+    case VHOST_USER_SET_LOG_FD:
+        return vubr_set_log_fd_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_NUM:
+        return vubr_set_vring_num_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ADDR:
+        return vubr_set_vring_addr_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_BASE:
+        return vubr_set_vring_base_exec(dev, vmsg);
+    case VHOST_USER_GET_VRING_BASE:
+        return vubr_get_vring_base_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_KICK:
+        return vubr_set_vring_kick_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_CALL:
+        return vubr_set_vring_call_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ERR:
+        return vubr_set_vring_err_exec(dev, vmsg);
+    case VHOST_USER_GET_PROTOCOL_FEATURES:
+        return vubr_get_protocol_features_exec(dev, vmsg);
+    case VHOST_USER_SET_PROTOCOL_FEATURES:
+        return vubr_set_protocol_features_exec(dev, vmsg);
+    case VHOST_USER_GET_QUEUE_NUM:
+        return vubr_get_queue_num_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ENABLE:
+        return vubr_set_vring_enable_exec(dev, vmsg);
+    case VHOST_USER_SEND_RARP:
+        return vubr_send_rarp_exec(dev, vmsg);
+
+    case VHOST_USER_MAX:
+        assert(vmsg->request != VHOST_USER_MAX);
+    }
+    return 0;
+}
+
+static void
+vubr_receive_cb(int sock, void *ctx)
+{
+    VubrDev *dev = (VubrDev *) ctx;
+    VhostUserMsg vmsg;
+    int reply_requested;
+
+    vubr_message_read(sock, &vmsg);
+    reply_requested = vubr_execute_request(dev, &vmsg);
+    if (reply_requested) {
+        /* Set the version in the flags when sending the reply */
+        vmsg.flags &= ~VHOST_USER_VERSION_MASK;
+        vmsg.flags |= VHOST_USER_VERSION;
+        vmsg.flags |= VHOST_USER_REPLY_MASK;
+        vubr_message_write(sock, &vmsg);
+    }
+}
+
+static void
+vubr_accept_cb(int sock, void *ctx)
+{
+    VubrDev *dev = (VubrDev *)ctx;
+    int conn_fd;
+    struct sockaddr_un un;
+    socklen_t len = sizeof(un);
+
+    conn_fd = accept(sock, (struct sockaddr *) &un, &len);
+    if (conn_fd  == -1) {
+        vubr_die("accept()");
+    }
+    DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
+    dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
+}
+
+static VubrDev *
+vubr_new(const char *path)
+{
+    VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
+    dev->nregions = 0;
+    int i;
+    struct sockaddr_un un;
+    size_t len;
+
+    for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
+        dev->vq[i] = (VubrVirtq) {
+            .call_fd = -1, .kick_fd = -1,
+            .size = 0,
+            .last_avail_index = 0, .last_used_index = 0,
+            .desc = 0, .avail = 0, .used = 0,
+        };
+    }
+
+    /* Get a UNIX socket. */
+    dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (dev->sock == -1) {
+        vubr_die("socket");
+    }
+
+    un.sun_family = AF_UNIX;
+    strcpy(un.sun_path, path);
+    len = sizeof(un.sun_family) + strlen(path);
+    unlink(path);
+
+    if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
+        vubr_die("bind");
+    }
+
+    if (listen(dev->sock, 1) == -1) {
+        vubr_die("listen");
+    }
+
+    dispatcher_init(&dev->dispatcher);
+    dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev,
+                   vubr_accept_cb);
+
+    DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
+    return dev;
+}
+
+static void
+vubr_backend_udp_setup(VubrDev *dev,
+                       const char *local_host,
+                       uint16_t local_port,
+                       const char *dest_host,
+                       uint16_t dest_port)
+{
+    int sock;
+    struct sockaddr_in si_local = {
+        .sin_family = AF_INET,
+        .sin_port = htons(local_port),
+    };
+
+    if (inet_aton(local_host, &si_local.sin_addr) == 0) {
+        fprintf(stderr, "inet_aton() failed.\n");
+        exit(1);
+    }
+
+    /* setup destination for sends */
+    dev->backend_udp_dest = (struct sockaddr_in) {
+        .sin_family = AF_INET,
+        .sin_port = htons(dest_port),
+    };
+    if (inet_aton(dest_host, &dev->backend_udp_dest.sin_addr) == 0) {
+        fprintf(stderr, "inet_aton() failed.\n");
+        exit(1);
+    }
+
+    sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+    if (sock == -1) {
+        vubr_die("socket");
+    }
+
+    if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
+        vubr_die("bind");
+    }
+
+    dev->backend_udp_sock = sock;
+    dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
+    DPRINT("Waiting for data from udp backend on %s:%d...\n",
+           local_host, local_port);
+}
+
+static void
+vubr_run(VubrDev *dev)
+{
+    while (1) {
+        /* timeout 200ms */
+        dispatcher_wait(&dev->dispatcher, 200000);
+        /* Here one can try polling strategy. */
+    }
+}
+
+int
+main(int argc, char *argv[])
+{
+    VubrDev *dev;
+
+    dev = vubr_new("/tmp/vubr.sock");
+    if (!dev) {
+        return 1;
+    }
+
+    vubr_backend_udp_setup(dev,
+                                 "127.0.0.1", 4444,
+                                 "127.0.0.1", 5555);
+    vubr_run(dev);
+    return 0;
+}
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index a74c934cc0..b6dde753f8 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -98,7 +98,7 @@ typedef struct VhostUserMsg {
         struct vhost_vring_state state;
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
-    };
+    } payload;
 } QEMU_PACKED VhostUserMsg;
 
 static VhostUserMsg m __attribute__ ((unused));
@@ -242,23 +242,23 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
     case VHOST_USER_GET_FEATURES:
         /* send back features to qemu */
         msg.flags |= VHOST_USER_REPLY_MASK;
-        msg.size = sizeof(m.u64);
-        msg.u64 = 0x1ULL << VHOST_F_LOG_ALL |
+        msg.size = sizeof(m.payload.u64);
+        msg.payload.u64 = 0x1ULL << VHOST_F_LOG_ALL |
             0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
         break;
 
     case VHOST_USER_SET_FEATURES:
-	g_assert_cmpint(msg.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES),
+	g_assert_cmpint(msg.payload.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES),
 			!=, 0ULL);
         break;
 
     case VHOST_USER_GET_PROTOCOL_FEATURES:
         /* send back features to qemu */
         msg.flags |= VHOST_USER_REPLY_MASK;
-        msg.size = sizeof(m.u64);
-        msg.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
+        msg.size = sizeof(m.payload.u64);
+        msg.payload.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
         break;
@@ -266,15 +266,15 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
     case VHOST_USER_GET_VRING_BASE:
         /* send back vring base to qemu */
         msg.flags |= VHOST_USER_REPLY_MASK;
-        msg.size = sizeof(m.state);
-        msg.state.num = 0;
+        msg.size = sizeof(m.payload.state);
+        msg.payload.state.num = 0;
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
         break;
 
     case VHOST_USER_SET_MEM_TABLE:
         /* received the mem table */
-        memcpy(&s->memory, &msg.memory, sizeof(msg.memory));
+        memcpy(&s->memory, &msg.payload.memory, sizeof(msg.payload.memory));
         s->fds_num = qemu_chr_fe_get_msgfds(chr, s->fds, G_N_ELEMENTS(s->fds));
 
         /* signal the test that it can continue */
diff --git a/trace-events b/trace-events
index f237c7fd4f..72136b9846 100644
--- a/trace-events
+++ b/trace-events
@@ -1031,9 +1031,9 @@ esp_pci_sbac_write(uint32_t reg, uint32_t val) "sbac: 0x%8.8x -> 0x%8.8x"
 # monitor.c
 handle_qmp_command(void *mon, const char *cmd_name) "mon %p cmd_name \"%s\""
 monitor_protocol_emitter(void *mon) "mon %p"
-monitor_protocol_event_handler(uint32_t event, void *data, uint64_t last, uint64_t now) "event=%d data=%p last=%" PRId64 " now=%" PRId64
+monitor_protocol_event_handler(uint32_t event, void *qdict) "event=%d data=%p"
 monitor_protocol_event_emit(uint32_t event, void *data) "event=%d data=%p"
-monitor_protocol_event_queue(uint32_t event, void *data, uint64_t rate, uint64_t last, uint64_t now) "event=%d data=%p rate=%" PRId64 " last=%" PRId64 " now=%" PRId64
+monitor_protocol_event_queue(uint32_t event, void *qdict, uint64_t rate) "event=%d data=%p rate=%" PRId64
 monitor_protocol_event_throttle(uint32_t event, uint64_t rate) "event=%d rate=%" PRId64
 
 # hw/net/opencores_eth.c
@@ -1383,6 +1383,10 @@ spapr_iommu_new_table(uint64_t liobn, void *tcet, void *table, int fd) "liobn=%"
 # hw/ppc/ppc.c
 ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
 
+# hw/ppc/prep.c
+prep_io_800_writeb(uint32_t addr, uint32_t val) "0x%08" PRIx32 " => 0x%02" PRIx32
+prep_io_800_readb(uint32_t addr, uint32_t retval) "0x%08" PRIx32 " <= 0x%02" PRIx32
+
 # util/hbitmap.c
 hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
 hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
diff --git a/translate-all.c b/translate-all.c
index 333eba4f5d..20ce40ec28 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -468,6 +468,8 @@ static inline PageDesc *page_find(tb_page_addr_t index)
 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 #elif defined(__sparc__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__powerpc64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 #elif defined(__aarch64__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
 #elif defined(__arm__)
diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
index ed4ca2b01e..d4a0c63e12 100644
--- a/util/event_notifier-posix.c
+++ b/util/event_notifier-posix.c
@@ -77,7 +77,7 @@ void event_notifier_cleanup(EventNotifier *e)
     close(e->wfd);
 }
 
-int event_notifier_get_fd(EventNotifier *e)
+int event_notifier_get_fd(const EventNotifier *e)
 {
     return e->rfd;
 }
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 13942694cc..c37acbe58e 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -26,7 +26,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
     void *ptr1;
 
     if (ptr == MAP_FAILED) {
-        return NULL;
+        return MAP_FAILED;
     }
 
     /* Make sure align is a power of 2 */
@@ -41,7 +41,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
                 fd, 0);
     if (ptr1 == MAP_FAILED) {
         munmap(ptr, total);
-        return NULL;
+        return MAP_FAILED;
     }
 
     ptr += offset;
diff --git a/vl.c b/vl.c
index 332d8287d8..f5f7c3f7c4 100644
--- a/vl.c
+++ b/vl.c
@@ -2769,7 +2769,12 @@ static bool object_create_initial(const char *type)
         return false;
     }
 
-    if (g_str_equal(type, "filter-buffer")) {
+    /*
+     * return false for concrete netfilters since
+     * they depend on netdevs already existing
+     */
+    if (g_str_equal(type, "filter-buffer") ||
+        g_str_equal(type, "filter-dump")) {
         return false;
     }
 
@@ -4101,8 +4106,8 @@ int main(int argc, char **argv, char **envp)
 
     machine_class->max_cpus = machine_class->max_cpus ?: 1; /* Default to UP */
     if (max_cpus > machine_class->max_cpus) {
-        fprintf(stderr, "Number of SMP cpus requested (%d), exceeds max cpus "
-                "supported by machine `%s' (%d)\n", max_cpus,
+        fprintf(stderr, "Number of SMP CPUs requested (%d) exceeds max CPUs "
+                "supported by machine '%s' (%d)\n", max_cpus,
                 machine_class->name, machine_class->max_cpus);
         exit(1);
     }