summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--MAINTAINERS5
-rw-r--r--Makefile3
-rw-r--r--Makefile.objs15
-rw-r--r--async.c98
-rw-r--r--balloon.c61
-rw-r--r--balloon.h12
-rw-r--r--block.c310
-rw-r--r--block.h7
-rw-r--r--block/qcow.c180
-rw-r--r--block/qcow2-cluster.c26
-rw-r--r--block/qcow2.c240
-rw-r--r--block/qcow2.h5
-rw-r--r--block/qed-table.c14
-rw-r--r--block/qed.c4
-rw-r--r--block/raw-posix.c39
-rw-r--r--block/raw-win32.c35
-rw-r--r--block/raw.c7
-rw-r--r--block/vpc.c8
-rw-r--r--block_int.h10
-rw-r--r--blockdev.c17
-rwxr-xr-xconfigure73
-rw-r--r--coroutine-gthread.c131
-rw-r--r--coroutine-ucontext.c230
-rw-r--r--coroutine-win32.c92
-rw-r--r--dma.h4
-rw-r--r--hw/bt-hid.c62
-rw-r--r--hw/hid.c403
-rw-r--r--hw/hid.h58
-rw-r--r--hw/milkymist-softusb.c14
-rw-r--r--hw/scsi-bus.c74
-rw-r--r--hw/scsi-defs.h62
-rw-r--r--hw/scsi-disk.c79
-rw-r--r--hw/scsi-generic.c2
-rw-r--r--hw/usb-bt.c31
-rw-r--r--hw/usb-ccid.c46
-rw-r--r--hw/usb-ehci.c162
-rw-r--r--hw/usb-hid.c519
-rw-r--r--hw/usb-hub.c8
-rw-r--r--hw/usb-libhw.c63
-rw-r--r--hw/usb-msd.c109
-rw-r--r--hw/usb-musb.c22
-rw-r--r--hw/usb-net.c65
-rw-r--r--hw/usb-ohci.c23
-rw-r--r--hw/usb-serial.c26
-rw-r--r--hw/usb-uhci.c51
-rw-r--r--hw/usb-wacom.c6
-rw-r--r--hw/usb.c86
-rw-r--r--hw/usb.h13
-rw-r--r--hw/virtio-balloon.c76
-rw-r--r--hw/virtio-pci.c14
-rw-r--r--hw/virtio.h1
-rw-r--r--iov.c54
-rw-r--r--iov.h4
-rw-r--r--linux-aio.c43
-rw-r--r--posix-aio-compat.c30
-rw-r--r--qemu-common.h5
-rw-r--r--qemu-coroutine-int.h49
-rw-r--r--qemu-coroutine-lock.c117
-rw-r--r--qemu-coroutine.c75
-rw-r--r--qemu-coroutine.h159
-rw-r--r--slirp/arp_table.c95
-rw-r--r--slirp/bootp.c21
-rw-r--r--slirp/if.c28
-rw-r--r--slirp/main.h2
-rw-r--r--slirp/mbuf.c2
-rw-r--r--slirp/mbuf.h2
-rw-r--r--slirp/slirp.c135
-rw-r--r--slirp/slirp.h47
-rw-r--r--target-sparc/op_helper.c25
-rw-r--r--test-coroutine.c192
-rw-r--r--trace-events16
-rw-r--r--usb-bsd.c14
-rw-r--r--usb-linux.c48
-rw-r--r--usb-redir.c59
75 files changed, 3208 insertions, 1716 deletions
diff --git a/.gitignore b/.gitignore
index 54835bcb97..59c343c414 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@ qemu-io
 qemu-ga
 qemu-monitor.texi
 QMP/qmp-commands.txt
+test-coroutine
 .gdbinit
 *.a
 *.aux
diff --git a/MAINTAINERS b/MAINTAINERS
index 6115e4ec08..7cbcd7e60d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -431,9 +431,10 @@ S: Maintained
 F: net/
 
 SLIRP
-M: qemu-devel@nongnu.org
-S: Orphan
+M: Jan Kiszka <jan.kiszka@siemens.com>
+S: Maintained
 F: slirp/
+T: git://git.kiszka.org/qemu.git queues/slirp
 
 Usermode Emulation
 ------------------
diff --git a/Makefile b/Makefile
index 48552512d6..2becedcf88 100644
--- a/Makefile
+++ b/Makefile
@@ -151,7 +151,7 @@ qemu-io$(EXESUF): qemu-io.o cmd.o qemu-tool.o qemu-error.o $(oslib-obj-y) $(trac
 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $@")
 
-check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o: $(GENERATED_HEADERS)
+check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o test-coroutine.o: $(GENERATED_HEADERS)
 
 CHECK_PROG_DEPS = qemu-malloc.o $(oslib-obj-y) $(trace-obj-y) qemu-tool.o
 
@@ -161,6 +161,7 @@ check-qdict: check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qlist.o $(C
 check-qlist: check-qlist.o qlist.o qint.o $(CHECK_PROG_DEPS)
 check-qfloat: check-qfloat.o qfloat.o $(CHECK_PROG_DEPS)
 check-qjson: check-qjson.o qfloat.o qint.o qdict.o qstring.o qlist.o qbool.o qjson.o json-streamer.o json-lexer.o json-parser.o error.o qerror.o qemu-error.o $(CHECK_PROG_DEPS)
+test-coroutine: test-coroutine.o qemu-timer-common.o async.o $(coroutine-obj-y) $(CHECK_PROG_DEPS)
 
 $(qapi-obj-y): $(GENERATED_HEADERS)
 qapi-dir := qapi-generated
diff --git a/Makefile.objs b/Makefile.objs
index 6991a9f52a..432b6198e9 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -11,10 +11,21 @@ oslib-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o
 oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o
 
 #######################################################################
+# coroutines
+coroutine-obj-y = qemu-coroutine.o qemu-coroutine-lock.o
+ifeq ($(CONFIG_UCONTEXT_COROUTINE),y)
+coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o
+else
+coroutine-obj-$(CONFIG_POSIX) += coroutine-gthread.o
+endif
+coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
+
+#######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img
 
 block-obj-y = cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o async.o
 block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o
+block-obj-y += $(coroutine-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
@@ -89,6 +100,7 @@ common-obj-y += i2c.o smbus.o smbus_eeprom.o
 common-obj-y += eeprom93xx.o
 common-obj-y += scsi-disk.o cdrom.o
 common-obj-y += scsi-generic.o scsi-bus.o
+common-obj-y += hid.o
 common-obj-y += usb.o usb-hub.o usb-$(HOST_USB).o usb-hid.o usb-msd.o usb-wacom.o
 common-obj-y += usb-serial.o usb-net.o usb-bus.o usb-desc.o
 common-obj-$(CONFIG_SSI) += ssi.o
@@ -151,7 +163,7 @@ common-obj-y += qemu-timer.o qemu-timer-common.o
 
 slirp-obj-y = cksum.o if.o ip_icmp.o ip_input.o ip_output.o
 slirp-obj-y += slirp.o mbuf.o misc.o sbuf.o socket.o tcp_input.o tcp_output.o
-slirp-obj-y += tcp_subr.o tcp_timer.o udp.o bootp.o tftp.o
+slirp-obj-y += tcp_subr.o tcp_timer.o udp.o bootp.o tftp.o arp_table.o
 common-obj-$(CONFIG_SLIRP) += $(addprefix slirp/, $(slirp-obj-y))
 
 # xen backend driver support
@@ -172,6 +184,7 @@ user-obj-y += cutils.o cache-utils.o
 hw-obj-y =
 hw-obj-y += vl.o loader.o
 hw-obj-$(CONFIG_VIRTIO) += virtio-console.o
+hw-obj-y += usb-libhw.o
 hw-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
 hw-obj-y += fw_cfg.o
 hw-obj-$(CONFIG_PCI) += pci.o pci_bridge.o
diff --git a/async.c b/async.c
index fd313dffb7..3fe70b9deb 100644
--- a/async.c
+++ b/async.c
@@ -25,92 +25,8 @@
 #include "qemu-common.h"
 #include "qemu-aio.h"
 
-/*
- * An AsyncContext protects the callbacks of AIO requests and Bottom Halves
- * against interfering with each other. A typical example is qcow2 that accepts
- * asynchronous requests, but relies for manipulation of its metadata on
- * synchronous bdrv_read/write that doesn't trigger any callbacks.
- *
- * However, these functions are often emulated using AIO which means that AIO
- * callbacks must be run - but at the same time we must not run callbacks of
- * other requests as they might start to modify metadata and corrupt the
- * internal state of the caller of bdrv_read/write.
- *
- * To achieve the desired semantics we switch into a new AsyncContext.
- * Callbacks must only be run if they belong to the current AsyncContext.
- * Otherwise they need to be queued until their own context is active again.
- * This is how you can make qemu_aio_wait() wait only for your own callbacks.
- *
- * The AsyncContexts form a stack. When you leave a AsyncContexts, you always
- * return to the old ("parent") context.
- */
-struct AsyncContext {
-    /* Consecutive number of the AsyncContext (position in the stack) */
-    int id;
-
-    /* Anchor of the list of Bottom Halves belonging to the context */
-    struct QEMUBH *first_bh;
-
-    /* Link to parent context */
-    struct AsyncContext *parent;
-};
-
-/* The currently active AsyncContext */
-static struct AsyncContext *async_context = &(struct AsyncContext) { 0 };
-
-/*
- * Enter a new AsyncContext. Already scheduled Bottom Halves and AIO callbacks
- * won't be called until this context is left again.
- */
-void async_context_push(void)
-{
-    struct AsyncContext *new = qemu_mallocz(sizeof(*new));
-    new->parent = async_context;
-    new->id = async_context->id + 1;
-    async_context = new;
-}
-
-/* Run queued AIO completions and destroy Bottom Half */
-static void bh_run_aio_completions(void *opaque)
-{
-    QEMUBH **bh = opaque;
-    qemu_bh_delete(*bh);
-    qemu_free(bh);
-    qemu_aio_process_queue();
-}
-/*
- * Leave the currently active AsyncContext. All Bottom Halves belonging to the
- * old context are executed before changing the context.
- */
-void async_context_pop(void)
-{
-    struct AsyncContext *old = async_context;
-    QEMUBH **bh;
-
-    /* Flush the bottom halves, we don't want to lose them */
-    while (qemu_bh_poll());
-
-    /* Switch back to the parent context */
-    async_context = async_context->parent;
-    qemu_free(old);
-
-    if (async_context == NULL) {
-        abort();
-    }
-
-    /* Schedule BH to run any queued AIO completions as soon as possible */
-    bh = qemu_malloc(sizeof(*bh));
-    *bh = qemu_bh_new(bh_run_aio_completions, bh);
-    qemu_bh_schedule(*bh);
-}
-
-/*
- * Returns the ID of the currently active AsyncContext
- */
-int get_async_context_id(void)
-{
-    return async_context->id;
-}
+/* Anchor of the list of Bottom Halves belonging to the context */
+static struct QEMUBH *first_bh;
 
 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -130,8 +46,8 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
     bh = qemu_mallocz(sizeof(QEMUBH));
     bh->cb = cb;
     bh->opaque = opaque;
-    bh->next = async_context->first_bh;
-    async_context->first_bh = bh;
+    bh->next = first_bh;
+    first_bh = bh;
     return bh;
 }
 
@@ -141,7 +57,7 @@ int qemu_bh_poll(void)
     int ret;
 
     ret = 0;
-    for (bh = async_context->first_bh; bh; bh = next) {
+    for (bh = first_bh; bh; bh = next) {
         next = bh->next;
         if (!bh->deleted && bh->scheduled) {
             bh->scheduled = 0;
@@ -153,7 +69,7 @@ int qemu_bh_poll(void)
     }
 
     /* remove deleted bhs */
-    bhp = &async_context->first_bh;
+    bhp = &first_bh;
     while (*bhp) {
         bh = *bhp;
         if (bh->deleted) {
@@ -199,7 +115,7 @@ void qemu_bh_update_timeout(int *timeout)
 {
     QEMUBH *bh;
 
-    for (bh = async_context->first_bh; bh; bh = bh->next) {
+    for (bh = first_bh; bh; bh = bh->next) {
         if (!bh->deleted && bh->scheduled) {
             if (bh->idle) {
                 /* idle bottom halves will be polled at least
diff --git a/balloon.c b/balloon.c
index 248c1b50a9..f56fdc1c4b 100644
--- a/balloon.c
+++ b/balloon.c
@@ -1,7 +1,9 @@
 /*
- * QEMU System Emulator
+ * Generic Balloon handlers and management
  *
  * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (C) 2011 Red Hat, Inc.
+ * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -30,44 +32,53 @@
 #include "balloon.h"
 #include "trace.h"
 
+static QEMUBalloonEvent *balloon_event_fn;
+static QEMUBalloonStatus *balloon_stat_fn;
+static void *balloon_opaque;
 
-static QEMUBalloonEvent *qemu_balloon_event;
-void *qemu_balloon_event_opaque;
-
-void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque)
+int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
+                             QEMUBalloonStatus *stat_func, void *opaque)
 {
-    qemu_balloon_event = func;
-    qemu_balloon_event_opaque = opaque;
+    if (balloon_event_fn || balloon_stat_fn || balloon_opaque) {
+        /* We're already registered one balloon handler.  How many can
+         * a guest really have?
+         */
+        error_report("Another balloon device already registered");
+        return -1;
+    }
+    balloon_event_fn = event_func;
+    balloon_stat_fn = stat_func;
+    balloon_opaque = opaque;
+    return 0;
 }
 
-int qemu_balloon(ram_addr_t target, MonitorCompletion cb, void *opaque)
+static int qemu_balloon(ram_addr_t target)
 {
-    if (qemu_balloon_event) {
-        trace_balloon_event(qemu_balloon_event_opaque, target);
-        qemu_balloon_event(qemu_balloon_event_opaque, target, cb, opaque);
-        return 1;
-    } else {
+    if (!balloon_event_fn) {
         return 0;
     }
+    trace_balloon_event(balloon_opaque, target);
+    balloon_event_fn(balloon_opaque, target);
+    return 1;
 }
 
-int qemu_balloon_status(MonitorCompletion cb, void *opaque)
+static int qemu_balloon_status(MonitorCompletion cb, void *opaque)
 {
-    if (qemu_balloon_event) {
-        qemu_balloon_event(qemu_balloon_event_opaque, 0, cb, opaque);
-        return 1;
-    } else {
+    if (!balloon_stat_fn) {
         return 0;
     }
+    balloon_stat_fn(balloon_opaque, cb, opaque);
+    return 1;
 }
 
 static void print_balloon_stat(const char *key, QObject *obj, void *opaque)
 {
     Monitor *mon = opaque;
 
-    if (strcmp(key, "actual"))
+    if (strcmp(key, "actual")) {
         monitor_printf(mon, ",%s=%" PRId64, key,
                        qint_get_int(qobject_to_qint(obj)));
+    }
 }
 
 void monitor_print_balloon(Monitor *mon, const QObject *data)
@@ -75,9 +86,9 @@ void monitor_print_balloon(Monitor *mon, const QObject *data)
     QDict *qdict;
 
     qdict = qobject_to_qdict(data);
-    if (!qdict_haskey(qdict, "actual"))
+    if (!qdict_haskey(qdict, "actual")) {
         return;
-
+    }
     monitor_printf(mon, "balloon: actual=%" PRId64,
                    qdict_get_int(qdict, "actual") >> 20);
     qdict_iter(qdict, print_balloon_stat, mon);
@@ -129,6 +140,7 @@ int do_info_balloon(Monitor *mon, MonitorCompletion cb, void *opaque)
 int do_balloon(Monitor *mon, const QDict *params,
 	       MonitorCompletion cb, void *opaque)
 {
+    int64_t target;
     int ret;
 
     if (kvm_enabled() && !kvm_has_sync_mmu()) {
@@ -136,7 +148,12 @@ int do_balloon(Monitor *mon, const QDict *params,
         return -1;
     }
 
-    ret = qemu_balloon(qdict_get_int(params, "value"), cb, opaque);
+    target = qdict_get_int(params, "value");
+    if (target <= 0) {
+        qerror_report(QERR_INVALID_PARAMETER_VALUE, "target", "a size");
+        return -1;
+    }
+    ret = qemu_balloon(target);
     if (ret == 0) {
         qerror_report(QERR_DEVICE_NOT_ACTIVE, "balloon");
         return -1;
diff --git a/balloon.h b/balloon.h
index d478e28475..3df14e645a 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,14 +16,12 @@
 
 #include "monitor.h"
 
-typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target,
-                                MonitorCompletion cb, void *cb_data);
+typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef void (QEMUBalloonStatus)(void *opaque, MonitorCompletion cb,
+                                 void *cb_data);
 
-void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
-
-int qemu_balloon(ram_addr_t target, MonitorCompletion cb, void *opaque);
-
-int qemu_balloon_status(MonitorCompletion cb, void *opaque);
+int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
+			     QEMUBalloonStatus *stat_func, void *opaque);
 
 void monitor_print_balloon(Monitor *mon, const QObject *data);
 int do_info_balloon(Monitor *mon, MonitorCompletion cb, void *opaque);
diff --git a/block.c b/block.c
index 9549b9eff9..26910ca143 100644
--- a/block.c
+++ b/block.c
@@ -28,6 +28,7 @@
 #include "block_int.h"
 #include "module.h"
 #include "qemu-objects.h"
+#include "qemu-coroutine.h"
 
 #ifdef CONFIG_BSD
 #include <sys/types.h>
@@ -57,6 +58,19 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
                         uint8_t *buf, int nb_sectors);
 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
                          const uint8_t *buf, int nb_sectors);
+static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque);
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
 
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
     QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -169,14 +183,25 @@ void path_combine(char *dest, int dest_size,
 
 void bdrv_register(BlockDriver *bdrv)
 {
-    if (!bdrv->bdrv_aio_readv) {
-        /* add AIO emulation layer */
-        bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
-        bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
-    } else if (!bdrv->bdrv_read) {
-        /* add synchronous IO emulation layer */
+    if (bdrv->bdrv_co_readv) {
+        /* Emulate AIO by coroutines, and sync by AIO */
+        bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
+        bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
         bdrv->bdrv_read = bdrv_read_em;
         bdrv->bdrv_write = bdrv_write_em;
+     } else {
+        bdrv->bdrv_co_readv = bdrv_co_readv_em;
+        bdrv->bdrv_co_writev = bdrv_co_writev_em;
+
+        if (!bdrv->bdrv_aio_readv) {
+            /* add AIO emulation layer */
+            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
+            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
+        } else if (!bdrv->bdrv_read) {
+            /* add synchronous IO emulation layer */
+            bdrv->bdrv_read = bdrv_read_em;
+            bdrv->bdrv_write = bdrv_write_em;
+        }
     }
 
     if (!bdrv->bdrv_aio_flush)
@@ -730,6 +755,8 @@ void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
 {
     assert(bs->peer == qdev);
     bs->peer = NULL;
+    bs->change_cb = NULL;
+    bs->change_opaque = NULL;
 }
 
 DeviceState *bdrv_get_attached(BlockDriverState *bs)
@@ -920,6 +947,17 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
                                    nb_sectors * BDRV_SECTOR_SIZE);
 }
 
+static inline bool bdrv_has_async_rw(BlockDriver *drv)
+{
+    return drv->bdrv_co_readv != bdrv_co_readv_em
+        || drv->bdrv_aio_readv != bdrv_aio_readv_em;
+}
+
+static inline bool bdrv_has_async_flush(BlockDriver *drv)
+{
+    return drv->bdrv_aio_flush != bdrv_aio_flush_em;
+}
+
 /* return < 0 if error. See bdrv_write() for the return codes */
 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
               uint8_t *buf, int nb_sectors)
@@ -928,6 +966,18 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
 
     if (!drv)
         return -ENOMEDIUM;
+
+    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
+        QEMUIOVector qiov;
+        struct iovec iov = {
+            .iov_base = (void *)buf,
+            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+        };
+
+        qemu_iovec_init_external(&qiov, &iov, 1);
+        return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
+    }
+
     if (bdrv_check_request(bs, sector_num, nb_sectors))
         return -EIO;
 
@@ -972,8 +1022,21 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
                const uint8_t *buf, int nb_sectors)
 {
     BlockDriver *drv = bs->drv;
+
     if (!bs->drv)
         return -ENOMEDIUM;
+
+    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
+        QEMUIOVector qiov;
+        struct iovec iov = {
+            .iov_base = (void *)buf,
+            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+        };
+
+        qemu_iovec_init_external(&qiov, &iov, 1);
+        return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+    }
+
     if (bs->read_only)
         return -EACCES;
     if (bdrv_check_request(bs, sector_num, nb_sectors))
@@ -1108,17 +1171,49 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
     return 0;
 }
 
-/*
- * Writes to the file and ensures that no writes are reordered across this
- * request (acts as a barrier)
- *
- * Returns 0 on success, -errno in error cases.
- */
-int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
-    const uint8_t *buf, int nb_sectors)
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov)
+{
+    BlockDriver *drv = bs->drv;
+
+    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+        return -EIO;
+    }
+
+    return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+}
+
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov)
 {
-    return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num,
-        buf, BDRV_SECTOR_SIZE * nb_sectors);
+    BlockDriver *drv = bs->drv;
+
+    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
+
+    if (!bs->drv) {
+        return -ENOMEDIUM;
+    }
+    if (bs->read_only) {
+        return -EACCES;
+    }
+    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+        return -EIO;
+    }
+
+    if (bs->dirty_bitmap) {
+        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
+    }
+
+    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
+        bs->wr_highest_sector = sector_num + nb_sectors - 1;
+    }
+
+    return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
 }
 
 /**
@@ -1591,6 +1686,10 @@ int bdrv_flush(BlockDriverState *bs)
         return 0;
     }
 
+    if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
+        return bdrv_co_flush_em(bs);
+    }
+
     if (bs->drv && bs->drv->bdrv_flush) {
         return bs->drv->bdrv_flush(bs);
     }
@@ -2580,6 +2679,89 @@ static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
 }
 
+
+typedef struct BlockDriverAIOCBCoroutine {
+    BlockDriverAIOCB common;
+    BlockRequest req;
+    bool is_write;
+    QEMUBH* bh;
+} BlockDriverAIOCBCoroutine;
+
+static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
+{
+    qemu_aio_flush();
+}
+
+static AIOPool bdrv_em_co_aio_pool = {
+    .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
+    .cancel             = bdrv_aio_co_cancel_em,
+};
+
+static void bdrv_co_rw_bh(void *opaque)
+{
+    BlockDriverAIOCBCoroutine *acb = opaque;
+
+    acb->common.cb(acb->common.opaque, acb->req.error);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_release(acb);
+}
+
+static void coroutine_fn bdrv_co_rw(void *opaque)
+{
+    BlockDriverAIOCBCoroutine *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+
+    if (!acb->is_write) {
+        acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
+            acb->req.nb_sectors, acb->req.qiov);
+    } else {
+        acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
+            acb->req.nb_sectors, acb->req.qiov);
+    }
+
+    acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
+    qemu_bh_schedule(acb->bh);
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+                                               int64_t sector_num,
+                                               QEMUIOVector *qiov,
+                                               int nb_sectors,
+                                               BlockDriverCompletionFunc *cb,
+                                               void *opaque,
+                                               bool is_write)
+{
+    Coroutine *co;
+    BlockDriverAIOCBCoroutine *acb;
+
+    acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
+    acb->req.sector = sector_num;
+    acb->req.nb_sectors = nb_sectors;
+    acb->req.qiov = qiov;
+    acb->is_write = is_write;
+
+    co = qemu_coroutine_create(bdrv_co_rw);
+    qemu_coroutine_enter(co, acb);
+
+    return &acb->common;
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
+                                 false);
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
+                                 true);
+}
+
 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
@@ -2636,8 +2818,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
     struct iovec iov;
     QEMUIOVector qiov;
 
-    async_context_push();
-
     async_ret = NOT_DONE;
     iov.iov_base = (void *)buf;
     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
@@ -2655,7 +2835,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
 
 
 fail:
-    async_context_pop();
     return async_ret;
 }
 
@@ -2667,8 +2846,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
     struct iovec iov;
     QEMUIOVector qiov;
 
-    async_context_push();
-
     async_ret = NOT_DONE;
     iov.iov_base = (void *)buf;
     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
@@ -2684,7 +2861,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
     }
 
 fail:
-    async_context_pop();
     return async_ret;
 }
 
@@ -2726,6 +2902,77 @@ void qemu_aio_release(void *p)
 }
 
 /**************************************************************/
+/* Coroutine block device emulation */
+
+typedef struct CoroutineIOCompletion {
+    Coroutine *coroutine;
+    int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+    CoroutineIOCompletion *co = opaque;
+
+    co->ret = ret;
+    qemu_coroutine_enter(co->coroutine, NULL);
+}
+
+static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
+                                      int nb_sectors, QEMUIOVector *iov,
+                                      bool is_write)
+{
+    CoroutineIOCompletion co = {
+        .coroutine = qemu_coroutine_self(),
+    };
+    BlockDriverAIOCB *acb;
+
+    if (is_write) {
+        acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
+                              bdrv_co_io_em_complete, &co);
+    } else {
+        acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
+                             bdrv_co_io_em_complete, &co);
+    }
+
+    trace_bdrv_co_io(is_write, acb);
+    if (!acb) {
+        return -EIO;
+    }
+    qemu_coroutine_yield();
+
+    return co.ret;
+}
+
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov)
+{
+    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
+}
+
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov)
+{
+    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
+}
+
+static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
+{
+    CoroutineIOCompletion co = {
+        .coroutine = qemu_coroutine_self(),
+    };
+    BlockDriverAIOCB *acb;
+
+    acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
+    if (!acb) {
+        return -EIO;
+    }
+    qemu_coroutine_yield();
+    return co.ret;
+}
+
+/**************************************************************/
 /* removable device support */
 
 /**
@@ -2768,25 +3015,16 @@ int bdrv_media_changed(BlockDriverState *bs)
 int bdrv_eject(BlockDriverState *bs, int eject_flag)
 {
     BlockDriver *drv = bs->drv;
-    int ret;
 
-    if (bs->locked) {
+    if (eject_flag && bs->locked) {
         return -EBUSY;
     }
 
-    if (!drv || !drv->bdrv_eject) {
-        ret = -ENOTSUP;
-    } else {
-        ret = drv->bdrv_eject(bs, eject_flag);
-    }
-    if (ret == -ENOTSUP) {
-        ret = 0;
+    if (drv && drv->bdrv_eject) {
+        drv->bdrv_eject(bs, eject_flag);
     }
-    if (ret >= 0) {
-        bs->tray_open = eject_flag;
-    }
-
-    return ret;
+    bs->tray_open = eject_flag;
+    return 0;
 }
 
 int bdrv_is_locked(BlockDriverState *bs)
diff --git a/block.h b/block.h
index 59cc410e3b..a3bfaafef0 100644
--- a/block.h
+++ b/block.h
@@ -4,6 +4,7 @@
 #include "qemu-aio.h"
 #include "qemu-common.h"
 #include "qemu-option.h"
+#include "qemu-coroutine.h"
 #include "qobject.h"
 
 /* block.c */
@@ -85,8 +86,10 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
                 const void *buf, int count);
 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
     const void *buf, int count);
-int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
-    const uint8_t *buf, int nb_sectors);
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov);
 int bdrv_truncate(BlockDriverState *bs, int64_t offset);
 int64_t bdrv_getlength(BlockDriverState *bs);
 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
diff --git a/block/qcow.c b/block/qcow.c
index 227b104e36..6447c2a1c0 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -73,6 +73,7 @@ typedef struct BDRVQcowState {
     uint32_t crypt_method_header;
     AES_KEY aes_encrypt_key;
     AES_KEY aes_decrypt_key;
+    CoMutex lock;
 } BDRVQcowState;
 
 static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
@@ -517,11 +518,11 @@ static AIOPool qcow_aio_pool = {
 
 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int is_write)
+        int is_write)
 {
     QCowAIOCB *acb;
 
-    acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&qcow_aio_pool, bs, NULL, NULL);
     if (!acb)
         return NULL;
     acb->hd_aiocb = NULL;
@@ -542,48 +543,15 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
     return acb;
 }
 
-static void qcow_aio_read_cb(void *opaque, int ret);
-static void qcow_aio_write_cb(void *opaque, int ret);
-
-static void qcow_aio_rw_bh(void *opaque)
-{
-    QCowAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-
-    if (acb->is_write) {
-        qcow_aio_write_cb(opaque, 0);
-    } else {
-        qcow_aio_read_cb(opaque, 0);
-    }
-}
-
-static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
-{
-    if (acb->bh) {
-        return -EIO;
-    }
-
-    acb->bh = qemu_bh_new(cb, acb);
-    if (!acb->bh) {
-        return -EIO;
-    }
-
-    qemu_bh_schedule(acb->bh);
-
-    return 0;
-}
-
-static void qcow_aio_read_cb(void *opaque, int ret)
+static int qcow_aio_read_cb(void *opaque)
 {
     QCowAIOCB *acb = opaque;
     BlockDriverState *bs = acb->common.bs;
     BDRVQcowState *s = bs->opaque;
     int index_in_cluster;
+    int ret;
 
     acb->hd_aiocb = NULL;
-    if (ret < 0)
-        goto done;
 
  redo:
     /* post process the read buffer */
@@ -605,8 +573,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
 
     if (acb->nb_sectors == 0) {
         /* request completed */
-        ret = 0;
-        goto done;
+        return 0;
     }
 
     /* prepare next AIO request */
@@ -623,11 +590,12 @@ static void qcow_aio_read_cb(void *opaque, int ret)
             acb->hd_iov.iov_base = (void *)acb->buf;
             acb->hd_iov.iov_len = acb->n * 512;
             qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-            acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
-                &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
-            if (acb->hd_aiocb == NULL) {
-                ret = -EIO;
-                goto done;
+            qemu_co_mutex_unlock(&s->lock);
+            ret = bdrv_co_readv(bs->backing_hd, acb->sector_num,
+                                acb->n, &acb->hd_qiov);
+            qemu_co_mutex_lock(&s->lock);
+            if (ret < 0) {
+                return -EIO;
             }
         } else {
             /* Note: in this case, no need to wait */
@@ -637,64 +605,56 @@ static void qcow_aio_read_cb(void *opaque, int ret)
     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
         /* add AIO support for compressed blocks ? */
         if (decompress_cluster(bs, acb->cluster_offset) < 0) {
-            ret = -EIO;
-            goto done;
+            return -EIO;
         }
         memcpy(acb->buf,
                s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
         goto redo;
     } else {
         if ((acb->cluster_offset & 511) != 0) {
-            ret = -EIO;
-            goto done;
+            return -EIO;
         }
         acb->hd_iov.iov_base = (void *)acb->buf;
         acb->hd_iov.iov_len = acb->n * 512;
         qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-        acb->hd_aiocb = bdrv_aio_readv(bs->file,
+        qemu_co_mutex_unlock(&s->lock);
+        ret = bdrv_co_readv(bs->file,
                             (acb->cluster_offset >> 9) + index_in_cluster,
-                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
-        if (acb->hd_aiocb == NULL) {
-            ret = -EIO;
-            goto done;
+                            acb->n, &acb->hd_qiov);
+        qemu_co_mutex_lock(&s->lock);
+        if (ret < 0) {
+            return ret;
         }
     }
 
-    return;
-
-done:
-    if (acb->qiov->niov > 1) {
-        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
-        qemu_vfree(acb->orig_buf);
-    }
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_aio_release(acb);
+    return 1;
 }
 
-static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
+                         int nb_sectors, QEMUIOVector *qiov)
 {
+    BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
     int ret;
 
-    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-    if (!acb)
-        return NULL;
+    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 0);
 
-    ret = qcow_schedule_bh(qcow_aio_rw_bh, acb);
-    if (ret < 0) {
-        if (acb->qiov->niov > 1) {
-            qemu_vfree(acb->orig_buf);
-        }
-        qemu_aio_release(acb);
-        return NULL;
+    qemu_co_mutex_lock(&s->lock);
+    do {
+        ret = qcow_aio_read_cb(acb);
+    } while (ret > 0);
+    qemu_co_mutex_unlock(&s->lock);
+
+    if (acb->qiov->niov > 1) {
+        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
+        qemu_vfree(acb->orig_buf);
     }
+    qemu_aio_release(acb);
 
-    return &acb->common;
+    return ret;
 }
 
-static void qcow_aio_write_cb(void *opaque, int ret)
+static int qcow_aio_write_cb(void *opaque)
 {
     QCowAIOCB *acb = opaque;
     BlockDriverState *bs = acb->common.bs;
@@ -702,20 +662,17 @@ static void qcow_aio_write_cb(void *opaque, int ret)
     int index_in_cluster;
     uint64_t cluster_offset;
     const uint8_t *src_buf;
+    int ret;
 
     acb->hd_aiocb = NULL;
 
-    if (ret < 0)
-        goto done;
-
     acb->nb_sectors -= acb->n;
     acb->sector_num += acb->n;
     acb->buf += acb->n * 512;
 
     if (acb->nb_sectors == 0) {
         /* request completed */
-        ret = 0;
-        goto done;
+        return 0;
     }
 
     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -726,16 +683,11 @@ static void qcow_aio_write_cb(void *opaque, int ret)
                                         index_in_cluster,
                                         index_in_cluster + acb->n);
     if (!cluster_offset || (cluster_offset & 511) != 0) {
-        ret = -EIO;
-        goto done;
+        return -EIO;
     }
     if (s->crypt_method) {
         if (!acb->cluster_data) {
             acb->cluster_data = qemu_mallocz(s->cluster_size);
-            if (!acb->cluster_data) {
-                ret = -ENOMEM;
-                goto done;
-            }
         }
         encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
                         acb->n, 1, &s->aes_encrypt_key);
@@ -747,26 +699,19 @@ static void qcow_aio_write_cb(void *opaque, int ret)
     acb->hd_iov.iov_base = (void *)src_buf;
     acb->hd_iov.iov_len = acb->n * 512;
     qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-    acb->hd_aiocb = bdrv_aio_writev(bs->file,
-                                    (cluster_offset >> 9) + index_in_cluster,
-                                    &acb->hd_qiov, acb->n,
-                                    qcow_aio_write_cb, acb);
-    if (acb->hd_aiocb == NULL) {
-        ret = -EIO;
-        goto done;
+    qemu_co_mutex_unlock(&s->lock);
+    ret = bdrv_co_writev(bs->file,
+                         (cluster_offset >> 9) + index_in_cluster,
+                         acb->n, &acb->hd_qiov);
+    qemu_co_mutex_lock(&s->lock);
+    if (ret < 0) {
+        return ret;
     }
-    return;
-
-done:
-    if (acb->qiov->niov > 1)
-        qemu_vfree(acb->orig_buf);
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_aio_release(acb);
+    return 1;
 }
 
-static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
+                          int nb_sectors, QEMUIOVector *qiov)
 {
     BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
@@ -774,21 +719,20 @@ static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
 
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
-    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-    if (!acb)
-        return NULL;
+    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 1);
 
+    qemu_co_mutex_lock(&s->lock);
+    do {
+        ret = qcow_aio_write_cb(acb);
+    } while (ret > 0);
+    qemu_co_mutex_unlock(&s->lock);
 
-    ret = qcow_schedule_bh(qcow_aio_rw_bh, acb);
-    if (ret < 0) {
-        if (acb->qiov->niov > 1) {
-            qemu_vfree(acb->orig_buf);
-        }
-        qemu_aio_release(acb);
-        return NULL;
+    if (acb->qiov->niov > 1) {
+        qemu_vfree(acb->orig_buf);
     }
+    qemu_aio_release(acb);
 
-    return &acb->common;
+    return ret;
 }
 
 static void qcow_close(BlockDriverState *bs)
@@ -1020,8 +964,8 @@ static BlockDriver bdrv_qcow = {
     .bdrv_is_allocated	= qcow_is_allocated,
     .bdrv_set_key	= qcow_set_key,
     .bdrv_make_empty	= qcow_make_empty,
-    .bdrv_aio_readv	= qcow_aio_readv,
-    .bdrv_aio_writev	= qcow_aio_writev,
+    .bdrv_co_readv  = qcow_co_readv,
+    .bdrv_co_writev = qcow_co_writev,
     .bdrv_aio_flush	= qcow_aio_flush,
     .bdrv_write_compressed = qcow_write_compressed,
     .bdrv_get_info	= qcow_get_info,
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 882f50a80b..81cf77d83c 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -697,12 +697,12 @@ err:
  * m->depends_on is set to NULL and the other fields in m are meaningless.
  *
  * If the cluster is newly allocated, m->nb_clusters is set to the number of
- * contiguous clusters that have been allocated. This may be 0 if the request
- * conflict with another write request in flight; in this case, m->depends_on
- * is set and the remaining fields of m are meaningless.
+ * contiguous clusters that have been allocated. In this case, the other
+ * fields of m are valid and contain information about the first allocated
+ * cluster.
  *
- * If m->nb_clusters is non-zero, the other fields of m are valid and contain
- * information about the first allocated cluster.
+ * If the request conflicts with another write request in flight, the coroutine
+ * is queued and will be reentered when the dependency has completed.
  *
  * Return 0 on success and -errno in error cases
  */
@@ -721,6 +721,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
         return ret;
     }
 
+again:
     nb_clusters = size_to_clusters(s, n_end << 9);
 
     nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
@@ -792,12 +793,12 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
             }
 
             if (nb_clusters == 0) {
-                /* Set dependency and wait for a callback */
-                m->depends_on = old_alloc;
-                m->nb_clusters = 0;
-                *num = 0;
-
-                goto out_wait_dependency;
+                /* Wait for the dependency to complete. We need to recheck
+                 * the free/allocated clusters when we continue. */
+                qemu_co_mutex_unlock(&s->lock);
+                qemu_co_queue_wait(&old_alloc->dependent_requests);
+                qemu_co_mutex_lock(&s->lock);
+                goto again;
             }
         }
     }
@@ -834,9 +835,6 @@ out:
 
     return 0;
 
-out_wait_dependency:
-    return qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-
 fail:
     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
 fail_put:
diff --git a/block/qcow2.c b/block/qcow2.c
index 48e1b95689..f07d550a96 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -276,6 +276,9 @@ static int qcow2_open(BlockDriverState *bs, int flags)
         goto fail;
     }
 
+    /* Initialise locks */
+    qemu_co_mutex_init(&s->lock);
+
 #ifdef DEBUG_ALLOC
     qcow2_check_refcounts(bs);
 #endif
@@ -379,7 +382,6 @@ typedef struct QCowAIOCB {
     uint64_t cluster_offset;
     uint8_t *cluster_data;
     bool is_write;
-    BlockDriverAIOCB *hd_aiocb;
     QEMUIOVector hd_qiov;
     QEMUBH *bh;
     QCowL2Meta l2meta;
@@ -389,8 +391,6 @@ typedef struct QCowAIOCB {
 static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
-    if (acb->hd_aiocb)
-        bdrv_aio_cancel(acb->hd_aiocb);
     qemu_aio_release(acb);
 }
 
@@ -399,46 +399,16 @@ static AIOPool qcow2_aio_pool = {
     .cancel             = qcow2_aio_cancel,
 };
 
-static void qcow2_aio_read_cb(void *opaque, int ret);
-static void qcow2_aio_write_cb(void *opaque, int ret);
-
-static void qcow2_aio_rw_bh(void *opaque)
-{
-    QCowAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-
-    if (acb->is_write) {
-        qcow2_aio_write_cb(opaque, 0);
-    } else {
-        qcow2_aio_read_cb(opaque, 0);
-    }
-}
-
-static int qcow2_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
-{
-    if (acb->bh)
-        return -EIO;
-
-    acb->bh = qemu_bh_new(cb, acb);
-    if (!acb->bh)
-        return -EIO;
-
-    qemu_bh_schedule(acb->bh);
-
-    return 0;
-}
-
-static void qcow2_aio_read_cb(void *opaque, int ret)
+/*
+ * Returns 0 when the request is completed successfully, 1 when there is still
+ * a part left to do and -errno in error cases.
+ */
+static int qcow2_aio_read_cb(QCowAIOCB *acb)
 {
-    QCowAIOCB *acb = opaque;
     BlockDriverState *bs = acb->common.bs;
     BDRVQcowState *s = bs->opaque;
     int index_in_cluster, n1;
-
-    acb->hd_aiocb = NULL;
-    if (ret < 0)
-        goto done;
+    int ret;
 
     /* post process the read buffer */
     if (!acb->cluster_offset) {
@@ -463,8 +433,7 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
 
     if (acb->remaining_sectors == 0) {
         /* request completed */
-        ret = 0;
-        goto done;
+        return 0;
     }
 
     /* prepare next AIO request */
@@ -477,7 +446,7 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
     ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
         &acb->cur_nr_sectors, &acb->cluster_offset);
     if (ret < 0) {
-        goto done;
+        return ret;
     }
 
     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -494,42 +463,35 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
                 acb->sector_num, acb->cur_nr_sectors);
             if (n1 > 0) {
                 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
-                acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
-                                    &acb->hd_qiov, n1, qcow2_aio_read_cb, acb);
-                if (acb->hd_aiocb == NULL) {
-                    ret = -EIO;
-                    goto done;
+                qemu_co_mutex_unlock(&s->lock);
+                ret = bdrv_co_readv(bs->backing_hd, acb->sector_num,
+                                    n1, &acb->hd_qiov);
+                qemu_co_mutex_lock(&s->lock);
+                if (ret < 0) {
+                    return ret;
                 }
-            } else {
-                ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
-                if (ret < 0)
-                    goto done;
             }
+            return 1;
         } else {
             /* Note: in this case, no need to wait */
             qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors);
-            ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
-            if (ret < 0)
-                goto done;
+            return 1;
         }
     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
         /* add AIO support for compressed blocks ? */
         ret = qcow2_decompress_cluster(bs, acb->cluster_offset);
         if (ret < 0) {
-            goto done;
+            return ret;
         }
 
         qemu_iovec_from_buffer(&acb->hd_qiov,
             s->cluster_cache + index_in_cluster * 512,
             512 * acb->cur_nr_sectors);
 
-        ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
-        if (ret < 0)
-            goto done;
+        return 1;
     } else {
         if ((acb->cluster_offset & 511) != 0) {
-            ret = -EIO;
-            goto done;
+            return -EIO;
         }
 
         if (s->crypt_method) {
@@ -550,21 +512,17 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
         }
 
         BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-        acb->hd_aiocb = bdrv_aio_readv(bs->file,
+        qemu_co_mutex_unlock(&s->lock);
+        ret = bdrv_co_readv(bs->file,
                             (acb->cluster_offset >> 9) + index_in_cluster,
-                            &acb->hd_qiov, acb->cur_nr_sectors,
-                            qcow2_aio_read_cb, acb);
-        if (acb->hd_aiocb == NULL) {
-            ret = -EIO;
-            goto done;
+                            acb->cur_nr_sectors, &acb->hd_qiov);
+        qemu_co_mutex_lock(&s->lock);
+        if (ret < 0) {
+            return ret;
         }
     }
 
-    return;
-done:
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_iovec_destroy(&acb->hd_qiov);
-    qemu_aio_release(acb);
+    return 1;
 }
 
 static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
@@ -577,7 +535,6 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
     acb = qemu_aio_get(&qcow2_aio_pool, bs, cb, opaque);
     if (!acb)
         return NULL;
-    acb->hd_aiocb = NULL;
     acb->sector_num = sector_num;
     acb->qiov = qiov;
     acb->is_write = is_write;
@@ -589,70 +546,65 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
     acb->cur_nr_sectors = 0;
     acb->cluster_offset = 0;
     acb->l2meta.nb_clusters = 0;
-    QLIST_INIT(&acb->l2meta.dependent_requests);
+    qemu_co_queue_init(&acb->l2meta.dependent_requests);
     return acb;
 }
 
-static BlockDriverAIOCB *qcow2_aio_readv(BlockDriverState *bs,
-                                         int64_t sector_num,
-                                         QEMUIOVector *qiov, int nb_sectors,
-                                         BlockDriverCompletionFunc *cb,
-                                         void *opaque)
+static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+                          int nb_sectors, QEMUIOVector *qiov)
 {
+    BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
     int ret;
 
-    acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-    if (!acb)
-        return NULL;
+    acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 0);
 
-    ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
-    if (ret < 0) {
-        qemu_iovec_destroy(&acb->hd_qiov);
-        qemu_aio_release(acb);
-        return NULL;
-    }
+    qemu_co_mutex_lock(&s->lock);
+    do {
+        ret = qcow2_aio_read_cb(acb);
+    } while (ret > 0);
+    qemu_co_mutex_unlock(&s->lock);
 
-    return &acb->common;
+    qemu_iovec_destroy(&acb->hd_qiov);
+    qemu_aio_release(acb);
+
+    return ret;
 }
 
-static void run_dependent_requests(QCowL2Meta *m)
+static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m)
 {
-    QCowAIOCB *req;
-    QCowAIOCB *next;
-
     /* Take the request off the list of running requests */
     if (m->nb_clusters != 0) {
         QLIST_REMOVE(m, next_in_flight);
     }
 
     /* Restart all dependent requests */
-    QLIST_FOREACH_SAFE(req, &m->dependent_requests, next_depend, next) {
-        qcow2_aio_write_cb(req, 0);
+    if (!qemu_co_queue_empty(&m->dependent_requests)) {
+        qemu_co_mutex_unlock(&s->lock);
+        while(qemu_co_queue_next(&m->dependent_requests));
+        qemu_co_mutex_lock(&s->lock);
     }
-
-    /* Empty the list for the next part of the request */
-    QLIST_INIT(&m->dependent_requests);
 }
 
-static void qcow2_aio_write_cb(void *opaque, int ret)
+/*
+ * Returns 0 when the request is completed successfully, 1 when there is still
+ * a part left to do and -errno in error cases.
+ */
+static int qcow2_aio_write_cb(QCowAIOCB *acb)
 {
-    QCowAIOCB *acb = opaque;
     BlockDriverState *bs = acb->common.bs;
     BDRVQcowState *s = bs->opaque;
     int index_in_cluster;
     int n_end;
+    int ret;
 
-    acb->hd_aiocb = NULL;
-
-    if (ret >= 0) {
-        ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta);
-    }
+    ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta);
 
-    run_dependent_requests(&acb->l2meta);
+    run_dependent_requests(s, &acb->l2meta);
 
-    if (ret < 0)
-        goto done;
+    if (ret < 0) {
+        return ret;
+    }
 
     acb->remaining_sectors -= acb->cur_nr_sectors;
     acb->sector_num += acb->cur_nr_sectors;
@@ -660,8 +612,7 @@ static void qcow2_aio_write_cb(void *opaque, int ret)
 
     if (acb->remaining_sectors == 0) {
         /* request completed */
-        ret = 0;
-        goto done;
+        return 0;
     }
 
     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -673,18 +624,10 @@ static void qcow2_aio_write_cb(void *opaque, int ret)
     ret = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9,
         index_in_cluster, n_end, &acb->cur_nr_sectors, &acb->l2meta);
     if (ret < 0) {
-        goto done;
+        return ret;
     }
 
     acb->cluster_offset = acb->l2meta.cluster_offset;
-
-    /* Need to wait for another request? If so, we are done for now. */
-    if (acb->l2meta.nb_clusters == 0 && acb->l2meta.depends_on != NULL) {
-        QLIST_INSERT_HEAD(&acb->l2meta.depends_on->dependent_requests,
-            acb, next_depend);
-        return;
-    }
-
     assert((acb->cluster_offset & 511) == 0);
 
     qemu_iovec_reset(&acb->hd_qiov);
@@ -709,51 +652,40 @@ static void qcow2_aio_write_cb(void *opaque, int ret)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    acb->hd_aiocb = bdrv_aio_writev(bs->file,
-                                    (acb->cluster_offset >> 9) + index_in_cluster,
-                                    &acb->hd_qiov, acb->cur_nr_sectors,
-                                    qcow2_aio_write_cb, acb);
-    if (acb->hd_aiocb == NULL) {
-        ret = -EIO;
-        goto fail;
+    qemu_co_mutex_unlock(&s->lock);
+    ret = bdrv_co_writev(bs->file,
+                         (acb->cluster_offset >> 9) + index_in_cluster,
+                         acb->cur_nr_sectors, &acb->hd_qiov);
+    qemu_co_mutex_lock(&s->lock);
+    if (ret < 0) {
+        return ret;
     }
 
-    return;
-
-fail:
-    if (acb->l2meta.nb_clusters != 0) {
-        QLIST_REMOVE(&acb->l2meta, next_in_flight);
-    }
-done:
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_iovec_destroy(&acb->hd_qiov);
-    qemu_aio_release(acb);
+    return 1;
 }
 
-static BlockDriverAIOCB *qcow2_aio_writev(BlockDriverState *bs,
-                                          int64_t sector_num,
-                                          QEMUIOVector *qiov, int nb_sectors,
-                                          BlockDriverCompletionFunc *cb,
-                                          void *opaque)
+static int qcow2_co_writev(BlockDriverState *bs,
+                           int64_t sector_num,
+                           int nb_sectors,
+                           QEMUIOVector *qiov)
 {
     BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
     int ret;
 
+    acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 1);
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
-    acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-    if (!acb)
-        return NULL;
+    qemu_co_mutex_lock(&s->lock);
+    do {
+        ret = qcow2_aio_write_cb(acb);
+    } while (ret > 0);
+    qemu_co_mutex_unlock(&s->lock);
 
-    ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
-    if (ret < 0) {
-        qemu_iovec_destroy(&acb->hd_qiov);
-        qemu_aio_release(acb);
-        return NULL;
-    }
+    qemu_iovec_destroy(&acb->hd_qiov);
+    qemu_aio_release(acb);
 
-    return &acb->common;
+    return ret;
 }
 
 static void qcow2_close(BlockDriverState *bs)
@@ -881,7 +813,7 @@ static int preallocate(BlockDriverState *bs)
 
     nb_sectors = bdrv_getlength(bs) >> 9;
     offset = 0;
-    QLIST_INIT(&meta.dependent_requests);
+    qemu_co_queue_init(&meta.dependent_requests);
     meta.cluster_offset = 0;
 
     while (nb_sectors) {
@@ -899,7 +831,7 @@ static int preallocate(BlockDriverState *bs)
 
         /* There are no dependent requests, but we need to remove our request
          * from the list of in-flight requests */
-        run_dependent_requests(&meta);
+        run_dependent_requests(bs->opaque, &meta);
 
         /* TODO Preallocate data if requested */
 
@@ -1387,8 +1319,8 @@ static BlockDriver bdrv_qcow2 = {
     .bdrv_set_key       = qcow2_set_key,
     .bdrv_make_empty    = qcow2_make_empty,
 
-    .bdrv_aio_readv     = qcow2_aio_readv,
-    .bdrv_aio_writev    = qcow2_aio_writev,
+    .bdrv_co_readv      = qcow2_co_readv,
+    .bdrv_co_writev     = qcow2_co_writev,
     .bdrv_aio_flush     = qcow2_aio_flush,
 
     .bdrv_discard           = qcow2_discard,
diff --git a/block/qcow2.h b/block/qcow2.h
index 6a0a21b694..de23abe1a4 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -26,6 +26,7 @@
 #define BLOCK_QCOW2_H
 
 #include "aes.h"
+#include "qemu-coroutine.h"
 
 //#define DEBUG_ALLOC
 //#define DEBUG_ALLOC2
@@ -114,6 +115,8 @@ typedef struct BDRVQcowState {
     int64_t free_cluster_index;
     int64_t free_byte_offset;
 
+    CoMutex lock;
+
     uint32_t crypt_method; /* current crypt method, 0 if no key yet */
     uint32_t crypt_method_header;
     AES_KEY aes_encrypt_key;
@@ -146,7 +149,7 @@ typedef struct QCowL2Meta
     int nb_available;
     int nb_clusters;
     struct QCowL2Meta *depends_on;
-    QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
+    CoQueue dependent_requests;
 
     QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;
diff --git a/block/qed-table.c b/block/qed-table.c
index d38c673547..d96afa81d7 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -179,16 +179,12 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
 {
     int ret = -EINPROGRESS;
 
-    async_context_push();
-
     qed_read_table(s, s->header.l1_table_offset,
                    s->l1_table, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
         qemu_aio_wait();
     }
 
-    async_context_pop();
-
     return ret;
 }
 
@@ -205,15 +201,11 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
 {
     int ret = -EINPROGRESS;
 
-    async_context_push();
-
     qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
         qemu_aio_wait();
     }
 
-    async_context_pop();
-
     return ret;
 }
 
@@ -282,14 +274,11 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
 {
     int ret = -EINPROGRESS;
 
-    async_context_push();
-
     qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
         qemu_aio_wait();
     }
 
-    async_context_pop();
     return ret;
 }
 
@@ -307,13 +296,10 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
 {
     int ret = -EINPROGRESS;
 
-    async_context_push();
-
     qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
         qemu_aio_wait();
     }
 
-    async_context_pop();
     return ret;
 }
diff --git a/block/qed.c b/block/qed.c
index 39703793e9..333f067582 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -680,16 +680,12 @@ static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
     };
     QEDRequest request = { .l2_table = NULL };
 
-    async_context_push();
-
     qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
 
     while (cb.is_allocated == -1) {
         qemu_aio_wait();
     }
 
-    async_context_pop();
-
     qed_unref_l2_cache_entry(request.l2_table);
 
     return cb.is_allocated;
diff --git a/block/raw-posix.c b/block/raw-posix.c
index cd89c8312a..c5c99446c0 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -230,13 +230,15 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
         }
     }
 
+    /* We're falling back to POSIX AIO in some cases so init always */
+    if (paio_init() < 0) {
+        goto out_free_buf;
+    }
+
 #ifdef CONFIG_LINUX_AIO
     if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
                       (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
 
-        /* We're falling back to POSIX AIO in some cases */
-        paio_init();
-
         s->aio_ctx = laio_init();
         if (!s->aio_ctx) {
             goto out_free_buf;
@@ -245,9 +247,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
     } else
 #endif
     {
-        if (paio_init() < 0) {
-            goto out_free_buf;
-        }
 #ifdef CONFIG_LINUX_AIO
         s->use_aio = 0;
 #endif
@@ -587,7 +586,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
 
     /*
      * If O_DIRECT is used the buffer needs to be aligned on a sector
-     * boundary.  Check if this is the case or telll the low-level
+     * boundary.  Check if this is the case or tell the low-level
      * driver that it needs to copy the buffer.
      */
     if (s->aligned_buf) {
@@ -1254,7 +1253,7 @@ static int floppy_media_changed(BlockDriverState *bs)
     return ret;
 }
 
-static int floppy_eject(BlockDriverState *bs, int eject_flag)
+static void floppy_eject(BlockDriverState *bs, int eject_flag)
 {
     BDRVRawState *s = bs->opaque;
     int fd;
@@ -1269,8 +1268,6 @@ static int floppy_eject(BlockDriverState *bs, int eject_flag)
             perror("FDEJECT");
         close(fd);
     }
-
-    return 0;
 }
 
 static BlockDriver bdrv_host_floppy = {
@@ -1348,7 +1345,7 @@ static int cdrom_is_inserted(BlockDriverState *bs)
     return 0;
 }
 
-static int cdrom_eject(BlockDriverState *bs, int eject_flag)
+static void cdrom_eject(BlockDriverState *bs, int eject_flag)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -1359,11 +1356,9 @@ static int cdrom_eject(BlockDriverState *bs, int eject_flag)
         if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
             perror("CDROMEJECT");
     }
-
-    return 0;
 }
 
-static int cdrom_set_locked(BlockDriverState *bs, int locked)
+static void cdrom_set_locked(BlockDriverState *bs, int locked)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -1374,8 +1369,6 @@ static int cdrom_set_locked(BlockDriverState *bs, int locked)
          */
         /* perror("CDROM_LOCKDOOR"); */
     }
-
-    return 0;
 }
 
 static BlockDriver bdrv_host_cdrom = {
@@ -1464,12 +1457,12 @@ static int cdrom_is_inserted(BlockDriverState *bs)
     return raw_getlength(bs) > 0;
 }
 
-static int cdrom_eject(BlockDriverState *bs, int eject_flag)
+static void cdrom_eject(BlockDriverState *bs, int eject_flag)
 {
     BDRVRawState *s = bs->opaque;
 
     if (s->fd < 0)
-        return -ENOTSUP;
+        return;
 
     (void) ioctl(s->fd, CDIOCALLOW);
 
@@ -1481,17 +1474,15 @@ static int cdrom_eject(BlockDriverState *bs, int eject_flag)
             perror("CDIOCCLOSE");
     }
 
-    if (cdrom_reopen(bs) < 0)
-        return -ENOTSUP;
-    return 0;
+    cdrom_reopen(bs);
 }
 
-static int cdrom_set_locked(BlockDriverState *bs, int locked)
+static void cdrom_set_locked(BlockDriverState *bs, int locked)
 {
     BDRVRawState *s = bs->opaque;
 
     if (s->fd < 0)
-        return -ENOTSUP;
+        return;
     if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
         /*
          * Note: an error can happen if the distribution automatically
@@ -1499,8 +1490,6 @@ static int cdrom_set_locked(BlockDriverState *bs, int locked)
          */
         /* perror("CDROM_LOCKDOOR"); */
     }
-
-    return 0;
 }
 
 static BlockDriver bdrv_host_cdrom = {
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 91067e7595..e47cfe0f4a 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -393,41 +393,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
     return 0;
 }
 
-#if 0
-/***********************************************/
-/* removable device additional commands */
-
-static int raw_is_inserted(BlockDriverState *bs)
-{
-    return 1;
-}
-
-static int raw_media_changed(BlockDriverState *bs)
-{
-    return -ENOTSUP;
-}
-
-static int raw_eject(BlockDriverState *bs, int eject_flag)
-{
-    DWORD ret_count;
-
-    if (s->type == FTYPE_FILE)
-        return -ENOTSUP;
-    if (eject_flag) {
-        DeviceIoControl(s->hfile, IOCTL_STORAGE_EJECT_MEDIA,
-                        NULL, 0, NULL, 0, &lpBytesReturned, NULL);
-    } else {
-        DeviceIoControl(s->hfile, IOCTL_STORAGE_LOAD_MEDIA,
-                        NULL, 0, NULL, 0, &lpBytesReturned, NULL);
-    }
-}
-
-static int raw_set_locked(BlockDriverState *bs, int locked)
-{
-    return -ENOTSUP;
-}
-#endif
-
 static int hdev_has_zero_init(BlockDriverState *bs)
 {
     return 0;
diff --git a/block/raw.c b/block/raw.c
index b0f72d6a62..cb6203eeca 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -75,15 +75,14 @@ static int raw_is_inserted(BlockDriverState *bs)
     return bdrv_is_inserted(bs->file);
 }
 
-static int raw_eject(BlockDriverState *bs, int eject_flag)
+static void raw_eject(BlockDriverState *bs, int eject_flag)
 {
-    return bdrv_eject(bs->file, eject_flag);
+    bdrv_eject(bs->file, eject_flag);
 }
 
-static int raw_set_locked(BlockDriverState *bs, int locked)
+static void raw_set_locked(BlockDriverState *bs, int locked)
 {
     bdrv_set_locked(bs->file, locked);
-    return 0;
 }
 
 static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
diff --git a/block/vpc.c b/block/vpc.c
index 56865da5bc..fdd5236892 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -156,6 +156,7 @@ static int vpc_open(BlockDriverState *bs, int flags)
     struct vhd_dyndisk_header* dyndisk_header;
     uint8_t buf[HEADER_SIZE];
     uint32_t checksum;
+    int err = -1;
 
     if (bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE)
         goto fail;
@@ -176,6 +177,11 @@ static int vpc_open(BlockDriverState *bs, int flags)
     bs->total_sectors = (int64_t)
         be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 
+    if (bs->total_sectors >= 65535 * 16 * 255) {
+        err = -EFBIG;
+        goto fail;
+    }
+
     if (bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE)
             != HEADER_SIZE)
         goto fail;
@@ -222,7 +228,7 @@ static int vpc_open(BlockDriverState *bs, int flags)
 
     return 0;
  fail:
-    return -1;
+    return err;
 }
 
 /*
diff --git a/block_int.h b/block_int.h
index efb68038c4..f6d02b38a7 100644
--- a/block_int.h
+++ b/block_int.h
@@ -27,6 +27,7 @@
 #include "block.h"
 #include "qemu-option.h"
 #include "qemu-queue.h"
+#include "qemu-coroutine.h"
 
 #define BLOCK_FLAG_ENCRYPT	1
 #define BLOCK_FLAG_COMPAT6	4
@@ -77,6 +78,11 @@ struct BlockDriver {
     int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors);
 
+    int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+    int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+
     int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs,
         int num_reqs);
     int (*bdrv_merge_requests)(BlockDriverState *bs, BlockRequest* a,
@@ -112,8 +118,8 @@ struct BlockDriver {
     /* removable device specific */
     int (*bdrv_is_inserted)(BlockDriverState *bs);
     int (*bdrv_media_changed)(BlockDriverState *bs);
-    int (*bdrv_eject)(BlockDriverState *bs, int eject_flag);
-    int (*bdrv_set_locked)(BlockDriverState *bs, int locked);
+    void (*bdrv_eject)(BlockDriverState *bs, int eject_flag);
+    void (*bdrv_set_locked)(BlockDriverState *bs, int locked);
 
     /* to control generic scsi devices */
     int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
diff --git a/blockdev.c b/blockdev.c
index 0b8d3a4f83..a25367a9e3 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -646,16 +646,13 @@ out:
 
 static int eject_device(Monitor *mon, BlockDriverState *bs, int force)
 {
-    if (!force) {
-        if (!bdrv_is_removable(bs)) {
-            qerror_report(QERR_DEVICE_NOT_REMOVABLE,
-                           bdrv_get_device_name(bs));
-            return -1;
-        }
-        if (bdrv_is_locked(bs)) {
-            qerror_report(QERR_DEVICE_LOCKED, bdrv_get_device_name(bs));
-            return -1;
-        }
+    if (!bdrv_is_removable(bs)) {
+        qerror_report(QERR_DEVICE_NOT_REMOVABLE, bdrv_get_device_name(bs));
+        return -1;
+    }
+    if (!force && bdrv_is_locked(bs)) {
+        qerror_report(QERR_DEVICE_LOCKED, bdrv_get_device_name(bs));
+        return -1;
     }
     bdrv_close(bs);
     return 0;
diff --git a/configure b/configure
index 77194cf9a7..27f1fa9266 100755
--- a/configure
+++ b/configure
@@ -181,6 +181,7 @@ smartcard_nss=""
 usb_redir=""
 opengl=""
 zlib="yes"
+guest_agent="yes"
 
 # parse CC options first
 for opt do
@@ -757,6 +758,10 @@ for opt do
   ;;
   --disable-zlib-test) zlib="no"
   ;;
+  --enable-guest-agent) guest_agent="yes"
+  ;;
+  --disable-guest-agent) guest_agent="no"
+  ;;
   *) echo "ERROR: unknown option $opt"; show_help="yes"
   ;;
   esac
@@ -846,7 +851,6 @@ if [ "$softmmu" = "yes" ] ; then
     default_target_list="\
 i386-softmmu \
 x86_64-softmmu \
-alpha-softmmu \
 arm-softmmu \
 cris-softmmu \
 lm32-softmmu \
@@ -1035,6 +1039,8 @@ echo "  --disable-smartcard-nss  disable smartcard nss support"
 echo "  --enable-smartcard-nss   enable smartcard nss support"
 echo "  --disable-usb-redir      disable usb network redirection support"
 echo "  --enable-usb-redir       enable usb network redirection support"
+echo "  --disable-guest-agent    disable building of the QEMU Guest Agent"
+echo "  --enable-guest-agent     enable building of the QEMU Guest Agent"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -1094,11 +1100,13 @@ if test "$solaris" = "yes" ; then
   fi
 fi
 
-if has $python; then
-  :
-else
-  echo "Python not found. Use --python=/path/to/python"
-  exit 1
+if test "$guest_agent" != "no" ; then
+  if has $python; then
+    :
+  else
+    echo "Python not found. Use --python=/path/to/python"
+    exit 1
+  fi
 fi
 
 if test -z "$target_list" ; then
@@ -1513,11 +1521,17 @@ int main(void) {
     return 0;
 }
 EOF
+  if $pkg_config libpng --modversion >/dev/null 2>&1; then
+    vnc_png_cflags=`$pkg_config libpng --cflags 2> /dev/null`
+    vnc_png_libs=`$pkg_config libpng --libs 2> /dev/null`
+  else
     vnc_png_cflags=""
     vnc_png_libs="-lpng"
+  fi
   if compile_prog "$vnc_png_cflags" "$vnc_png_libs" ; then
     vnc_png=yes
     libs_softmmu="$vnc_png_libs $libs_softmmu"
+    QEMU_CFLAGS="$QEMU_CFLAGS $vnc_png_cflags"
   else
     if test "$vnc_png" = "yes" ; then
       feature_not_found "vnc-png"
@@ -1830,14 +1844,16 @@ fi
 
 ##########################################
 # glib support probe
-if $pkg_config --modversion glib-2.0 > /dev/null 2>&1 ; then
-    glib_cflags=`$pkg_config --cflags glib-2.0 2>/dev/null`
-    glib_libs=`$pkg_config --libs glib-2.0 2>/dev/null`
-    libs_softmmu="$glib_libs $libs_softmmu"
-    libs_tools="$glib_libs $libs_tools"
-else
-    echo "glib-2.0 required to compile QEMU"
-    exit 1
+if test "$guest_agent" != "no" ; then
+    if $pkg_config --modversion glib-2.0 > /dev/null 2>&1 ; then
+        glib_cflags=`$pkg_config --cflags glib-2.0 2>/dev/null`
+        glib_libs=`$pkg_config --libs glib-2.0 2>/dev/null`
+        libs_softmmu="$glib_libs $libs_softmmu"
+        libs_tools="$glib_libs $libs_tools"
+    else
+        echo "glib-2.0 required to compile QEMU"
+        exit 1
+    fi
 fi
 
 ##########################################
@@ -2521,7 +2537,7 @@ fi
 # __sync_fetch_and_and requires at least -march=i486. Many toolchains
 # use i686 as default anyway, but for those that don't, an explicit
 # specification is necessary
-if test $vhost_net = "yes" && test $cpu = "i386"; then
+if test "$vhost_net" = "yes" && test "$cpu" = "i386"; then
   cat > $TMPC << EOF
 int sfaa(unsigned *ptr)
 {
@@ -2541,6 +2557,20 @@ EOF
 fi
 
 ##########################################
+# check if we have makecontext
+
+ucontext_coroutine=no
+if test "$darwin" != "yes"; then
+  cat > $TMPC << EOF
+#include <ucontext.h>
+int main(void) { makecontext(0, 0, 0); }
+EOF
+  if compile_prog "" "" ; then
+      ucontext_coroutine=yes
+  fi
+fi
+
+##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
 
@@ -2597,7 +2627,9 @@ if test "$softmmu" = yes ; then
   tools="qemu-img\$(EXESUF) qemu-io\$(EXESUF) $tools"
   if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
       tools="qemu-nbd\$(EXESUF) $tools"
+    if [ "$guest_agent" = "yes" ]; then
       tools="qemu-ga\$(EXESUF) $tools"
+    fi
     if [ "$check_utests" = "yes" ]; then
       tools="check-qint check-qstring check-qdict check-qlist $tools"
       tools="check-qfloat check-qjson $tools"
@@ -2699,8 +2731,9 @@ echo "xfsctl support    $xfs"
 echo "nss used          $smartcard_nss"
 echo "usb net redir     $usb_redir"
 echo "OpenGL support    $opengl"
+echo "build guest agent $guest_agent"
 
-if test $sdl_too_old = "yes"; then
+if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
 fi
 
@@ -2788,7 +2821,7 @@ fi
 if test "$static" = "yes" ; then
   echo "CONFIG_STATIC=y" >> $config_host_mak
 fi
-if test $profiler = "yes" ; then
+if test "$profiler" = "yes" ; then
   echo "CONFIG_PROFILER=y" >> $config_host_mak
 fi
 if test "$slirp" = "yes" ; then
@@ -3015,6 +3048,10 @@ if test "$rbd" = "yes" ; then
   echo "CONFIG_RBD=y" >> $config_host_mak
 fi
 
+if test "$ucontext_coroutine" = "yes" ; then
+  echo "CONFIG_UCONTEXT_COROUTINE=y" >> $config_host_mak
+fi
+
 # USB host support
 case "$usb" in
 linux)
@@ -3342,7 +3379,7 @@ case "$target_arch2" in
       \( "$target_arch2" = "x86_64" -a "$cpu" = "i386"   \) -o \
       \( "$target_arch2" = "i386"   -a "$cpu" = "x86_64" \) \) ; then
       echo "CONFIG_KVM=y" >> $config_target_mak
-      if test $vhost_net = "yes" ; then
+      if test "$vhost_net" = "yes" ; then
         echo "CONFIG_VHOST_NET=y" >> $config_target_mak
       fi
     fi
diff --git a/coroutine-gthread.c b/coroutine-gthread.c
new file mode 100644
index 0000000000..f09877e14f
--- /dev/null
+++ b/coroutine-gthread.c
@@ -0,0 +1,131 @@
+/*
+ * GThread coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <glib.h>
+#include "qemu-common.h"
+#include "qemu-coroutine-int.h"
+
+typedef struct {
+    Coroutine base;
+    GThread *thread;
+    bool runnable;
+    CoroutineAction action;
+} CoroutineGThread;
+
+static GCond *coroutine_cond;
+static GStaticMutex coroutine_lock = G_STATIC_MUTEX_INIT;
+static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT;
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+    if (!g_thread_supported()) {
+        g_thread_init(NULL);
+    }
+
+    coroutine_cond = g_cond_new();
+}
+
+static void coroutine_wait_runnable_locked(CoroutineGThread *co)
+{
+    while (!co->runnable) {
+        g_cond_wait(coroutine_cond, g_static_mutex_get_mutex(&coroutine_lock));
+    }
+}
+
+static void coroutine_wait_runnable(CoroutineGThread *co)
+{
+    g_static_mutex_lock(&coroutine_lock);
+    coroutine_wait_runnable_locked(co);
+    g_static_mutex_unlock(&coroutine_lock);
+}
+
+static gpointer coroutine_thread(gpointer opaque)
+{
+    CoroutineGThread *co = opaque;
+
+    g_static_private_set(&coroutine_key, co, NULL);
+    coroutine_wait_runnable(co);
+    co->base.entry(co->base.entry_arg);
+    qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE);
+    return NULL;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    CoroutineGThread *co;
+
+    co = qemu_mallocz(sizeof(*co));
+    co->thread = g_thread_create_full(coroutine_thread, co, 0, TRUE, TRUE,
+                                      G_THREAD_PRIORITY_NORMAL, NULL);
+    if (!co->thread) {
+        qemu_free(co);
+        return NULL;
+    }
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_);
+
+    g_thread_join(co->thread);
+    qemu_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_,
+                                      Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_);
+    CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_);
+
+    g_static_mutex_lock(&coroutine_lock);
+    from->runnable = false;
+    from->action = action;
+    to->runnable = true;
+    to->action = action;
+    g_cond_broadcast(coroutine_cond);
+
+    if (action != COROUTINE_TERMINATE) {
+        coroutine_wait_runnable_locked(from);
+    }
+    g_static_mutex_unlock(&coroutine_lock);
+    return from->action;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    CoroutineGThread *co = g_static_private_get(&coroutine_key);
+
+    if (!co) {
+        co = qemu_mallocz(sizeof(*co));
+        co->runnable = true;
+        g_static_private_set(&coroutine_key, co, (GDestroyNotify)qemu_free);
+    }
+
+    return &co->base;
+}
+
+bool qemu_in_coroutine(void)
+{
+    CoroutineGThread *co = g_static_private_get(&coroutine_key);
+
+    return co && co->base.caller;
+}
diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c
new file mode 100644
index 0000000000..41c2379a2a
--- /dev/null
+++ b/coroutine-ucontext.c
@@ -0,0 +1,230 @@
+/*
+ * ucontext coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Kevin Wolf <kwolf@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <ucontext.h>
+#include "qemu-common.h"
+#include "qemu-coroutine-int.h"
+
+enum {
+    /* Maximum free pool size prevents holding too many freed coroutines */
+    POOL_MAX_SIZE = 64,
+};
+
+typedef struct {
+    Coroutine base;
+    void *stack;
+    jmp_buf env;
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+    /** Currently executing coroutine */
+    Coroutine *current;
+
+    /** Free list to speed up creation */
+    QLIST_HEAD(, Coroutine) pool;
+    unsigned int pool_size;
+
+    /** The default coroutine */
+    CoroutineUContext leader;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+/*
+ * va_args to makecontext() must be type 'int', so passing
+ * the pointer we need may require several int args. This
+ * union is a quick hack to let us do that
+ */
+union cc_arg {
+    void *p;
+    int i[2];
+};
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    if (!s) {
+        s = qemu_mallocz(sizeof(*s));
+        s->current = &s->leader.base;
+        QLIST_INIT(&s->pool);
+        pthread_setspecific(thread_state_key, s);
+    }
+    return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+    CoroutineThreadState *s = opaque;
+    Coroutine *co;
+    Coroutine *tmp;
+
+    QLIST_FOREACH_SAFE(co, &s->pool, pool_next, tmp) {
+        qemu_free(DO_UPCAST(CoroutineUContext, base, co)->stack);
+        qemu_free(co);
+    }
+    qemu_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+    int ret;
+
+    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+    if (ret != 0) {
+        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+        abort();
+    }
+}
+
+static void coroutine_trampoline(int i0, int i1)
+{
+    union cc_arg arg;
+    CoroutineUContext *self;
+    Coroutine *co;
+
+    arg.i[0] = i0;
+    arg.i[1] = i1;
+    self = arg.p;
+    co = &self->base;
+
+    /* Initialize longjmp environment and switch back the caller */
+    if (!setjmp(self->env)) {
+        longjmp(*(jmp_buf *)co->entry_arg, 1);
+    }
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+static Coroutine *coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineUContext *co;
+    ucontext_t old_uc, uc;
+    jmp_buf old_env;
+    union cc_arg arg;
+
+    /* The ucontext functions preserve signal masks which incurs a system call
+     * overhead.  setjmp()/longjmp() does not preserve signal masks but only
+     * works on the current stack.  Since we need a way to create and switch to
+     * a new stack, use the ucontext functions for that but setjmp()/longjmp()
+     * for everything else.
+     */
+
+    if (getcontext(&uc) == -1) {
+        abort();
+    }
+
+    co = qemu_mallocz(sizeof(*co));
+    co->stack = qemu_malloc(stack_size);
+    co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+    uc.uc_link = &old_uc;
+    uc.uc_stack.ss_sp = co->stack;
+    uc.uc_stack.ss_size = stack_size;
+    uc.uc_stack.ss_flags = 0;
+
+    arg.p = co;
+
+    makecontext(&uc, (void (*)(void))coroutine_trampoline,
+                2, arg.i[0], arg.i[1]);
+
+    /* swapcontext() in, longjmp() back out */
+    if (!setjmp(old_env)) {
+        swapcontext(&old_uc, &uc);
+    }
+    return &co->base;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    CoroutineThreadState *s = coroutine_get_thread_state();
+    Coroutine *co;
+
+    co = QLIST_FIRST(&s->pool);
+    if (co) {
+        QLIST_REMOVE(co, pool_next);
+        s->pool_size--;
+    } else {
+        co = coroutine_new();
+    }
+    return co;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineThreadState *s = coroutine_get_thread_state();
+    CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+    if (s->pool_size < POOL_MAX_SIZE) {
+        QLIST_INSERT_HEAD(&s->pool, &co->base, pool_next);
+        co->base.caller = NULL;
+        s->pool_size++;
+        return;
+    }
+
+    qemu_free(co->stack);
+    qemu_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    CoroutineThreadState *s = coroutine_get_thread_state();
+    int ret;
+
+    s->current = to_;
+
+    ret = setjmp(from->env);
+    if (ret == 0) {
+        longjmp(to->env, action);
+    }
+    return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    CoroutineThreadState *s = coroutine_get_thread_state();
+
+    return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    return s && s->current->caller;
+}
diff --git a/coroutine-win32.c b/coroutine-win32.c
new file mode 100644
index 0000000000..0e29448473
--- /dev/null
+++ b/coroutine-win32.c
@@ -0,0 +1,92 @@
+/*
+ * Win32 coroutine initialization code
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu-coroutine-int.h"
+
+typedef struct
+{
+    Coroutine base;
+
+    LPVOID fiber;
+    CoroutineAction action;
+} CoroutineWin32;
+
+static __thread CoroutineWin32 leader;
+static __thread Coroutine *current;
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
+    CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
+
+    current = to_;
+
+    to->action = action;
+    SwitchToFiber(to->fiber);
+    return from->action;
+}
+
+static void CALLBACK coroutine_trampoline(void *co_)
+{
+    Coroutine *co = co_;
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineWin32 *co;
+
+    co = qemu_mallocz(sizeof(*co));
+    co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base);
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_);
+
+    DeleteFiber(co->fiber);
+    qemu_free(co);
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    if (!current) {
+        current = &leader.base;
+        leader.fiber = ConvertThreadToFiber(NULL);
+    }
+    return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    return current && current->caller;
+}
diff --git a/dma.h b/dma.h
index 3d8324bb54..a6db5bacbb 100644
--- a/dma.h
+++ b/dma.h
@@ -20,12 +20,12 @@ typedef struct {
     target_phys_addr_t len;
 } ScatterGatherEntry;
 
-typedef struct {
+struct QEMUSGList {
     ScatterGatherEntry *sg;
     int nsg;
     int nalloc;
     target_phys_addr_t size;
-} QEMUSGList;
+};
 
 void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint);
 void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base,
diff --git a/hw/bt-hid.c b/hw/bt-hid.c
index 09120af074..5f1afe3e89 100644
--- a/hw/bt-hid.c
+++ b/hw/bt-hid.c
@@ -19,7 +19,9 @@
  */
 
 #include "qemu-common.h"
-#include "usb.h"
+#include "qemu-timer.h"
+#include "console.h"
+#include "hid.h"
 #include "bt.h"
 
 enum hid_transaction_req {
@@ -86,7 +88,7 @@ struct bt_hid_device_s {
     struct bt_l2cap_device_s btdev;
     struct bt_l2cap_conn_params_s *control;
     struct bt_l2cap_conn_params_s *interrupt;
-    USBDevice *usbdev;
+    HIDState hid;
 
     int proto;
     int connected;
@@ -111,7 +113,7 @@ static void bt_hid_reset(struct bt_hid_device_s *s)
     bt_l2cap_device_done(&s->btdev);
     bt_l2cap_device_init(&s->btdev, net);
 
-    s->usbdev->info->handle_reset(s->usbdev);
+    hid_reset(&s->hid);
     s->proto = BT_HID_PROTO_REPORT;
     s->state = bt_state_ready;
     s->dataother.len = 0;
@@ -124,23 +126,16 @@ static void bt_hid_reset(struct bt_hid_device_s *s)
 
 static int bt_hid_out(struct bt_hid_device_s *s)
 {
-    USBPacket p;
-
     if (s->data_type == BT_DATA_OUTPUT) {
-        p.pid = USB_TOKEN_OUT;
-        p.devep = 1;
-        p.data = s->dataout.buffer;
-        p.len = s->dataout.len;
-        s->dataout.len = s->usbdev->info->handle_data(s->usbdev, &p);
-
-        return s->dataout.len;
+        /* nothing */
+        ;
     }
 
     if (s->data_type == BT_DATA_FEATURE) {
         /* XXX:
          * does this send a USB_REQ_CLEAR_FEATURE/USB_REQ_SET_FEATURE
          * or a SET_REPORT? */
-        p.devep = 0;
+        ;
     }
 
     return -1;
@@ -148,14 +143,8 @@ static int bt_hid_out(struct bt_hid_device_s *s)
 
 static int bt_hid_in(struct bt_hid_device_s *s)
 {
-    USBPacket p;
-
-    p.pid = USB_TOKEN_IN;
-    p.devep = 1;
-    p.data = s->datain.buffer;
-    p.len = sizeof(s->datain.buffer);
-    s->datain.len = s->usbdev->info->handle_data(s->usbdev, &p);
-
+    s->datain.len = hid_keyboard_poll(&s->hid, s->datain.buffer,
+                                      sizeof(s->datain.buffer));
     return s->datain.len;
 }
 
@@ -323,8 +312,7 @@ static void bt_hid_control_transaction(struct bt_hid_device_s *s,
             break;
         }
         s->proto = parameter;
-        s->usbdev->info->handle_control(s->usbdev, NULL, SET_PROTOCOL, s->proto, 0, 0,
-                                        NULL);
+        s->hid.protocol = parameter;
         ret = BT_HS_SUCCESSFUL;
         break;
 
@@ -333,8 +321,7 @@ static void bt_hid_control_transaction(struct bt_hid_device_s *s,
             ret = BT_HS_ERR_INVALID_PARAMETER;
             break;
         }
-        s->usbdev->info->handle_control(s->usbdev, NULL, GET_IDLE, 0, 0, 1,
-                        s->control->sdu_out(s->control, 1));
+        *s->control->sdu_out(s->control, 1) = s->hid.idle;
         s->control->sdu_submit(s->control);
         break;
 
@@ -344,11 +331,7 @@ static void bt_hid_control_transaction(struct bt_hid_device_s *s,
             break;
         }
 
-        /* We don't need to know about the Idle Rate here really,
-         * so just pass it on to the device.  */
-        ret = s->usbdev->info->handle_control(s->usbdev, NULL,
-                        SET_IDLE, data[1], 0, 0, NULL) ?
-                BT_HS_SUCCESSFUL : BT_HS_ERR_INVALID_PARAMETER;
+        s->hid.idle = data[1];
         /* XXX: Does this generate a handshake? */
         break;
 
@@ -385,9 +368,10 @@ static void bt_hid_control_sdu(void *opaque, const uint8_t *data, int len)
     bt_hid_control_transaction(hid, data, len);
 }
 
-static void bt_hid_datain(void *opaque)
+static void bt_hid_datain(HIDState *hs)
 {
-    struct bt_hid_device_s *hid = opaque;
+    struct bt_hid_device_s *hid =
+        container_of(hs, struct bt_hid_device_s, hid);
 
     /* If suspended, wake-up and send a wake-up event first.  We might
      * want to also inspect the input report and ignore event like
@@ -450,7 +434,7 @@ static void bt_hid_connected_update(struct bt_hid_device_s *hid)
     hid->btdev.device.inquiry_scan = !hid->connected;
 
     if (hid->connected && !prev) {
-        hid->usbdev->info->handle_reset(hid->usbdev);
+        hid_reset(&hid->hid);
         hid->proto = BT_HID_PROTO_REPORT;
     }
 
@@ -518,7 +502,7 @@ static void bt_hid_destroy(struct bt_device_s *dev)
         bt_hid_send_control(hid, BT_HC_VIRTUAL_CABLE_UNPLUG);
     bt_l2cap_device_done(&hid->btdev);
 
-    hid->usbdev->info->handle_destroy(hid->usbdev);
+    hid_free(&hid->hid);
 
     qemu_free(hid);
 }
@@ -531,7 +515,7 @@ enum peripheral_minor_class {
 };
 
 static struct bt_device_s *bt_hid_init(struct bt_scatternet_s *net,
-                USBDevice *dev, enum peripheral_minor_class minor)
+                                       enum peripheral_minor_class minor)
 {
     struct bt_hid_device_s *s = qemu_mallocz(sizeof(*s));
     uint32_t class =
@@ -551,9 +535,8 @@ static struct bt_device_s *bt_hid_init(struct bt_scatternet_s *net,
     bt_l2cap_psm_register(&s->btdev, BT_PSM_HID_INTR,
                     BT_HID_MTU, bt_hid_new_interrupt_ch);
 
-    s->usbdev = dev;
-    s->btdev.device.lmp_name = s->usbdev->product_desc;
-    usb_hid_datain_cb(s->usbdev, s, bt_hid_datain);
+    hid_init(&s->hid, HID_KEYBOARD, bt_hid_datain);
+    s->btdev.device.lmp_name = "BT Keyboard";
 
     s->btdev.device.handle_destroy = bt_hid_destroy;
 
@@ -566,6 +549,5 @@ static struct bt_device_s *bt_hid_init(struct bt_scatternet_s *net,
 
 struct bt_device_s *bt_keyboard_init(struct bt_scatternet_s *net)
 {
-    USBDevice *dev = usb_create_simple(NULL /* FIXME */, "usb-kbd");
-    return bt_hid_init(net, dev, class_keyboard);
+    return bt_hid_init(net, class_keyboard);
 }
diff --git a/hw/hid.c b/hw/hid.c
new file mode 100644
index 0000000000..7b5ef5fc92
--- /dev/null
+++ b/hw/hid.c
@@ -0,0 +1,403 @@
+/*
+ * QEMU HID devices
+ *
+ * Copyright (c) 2005 Fabrice Bellard
+ * Copyright (c) 2007 OpenMoko, Inc.  (andrew@openedhand.com)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "console.h"
+#include "qemu-timer.h"
+#include "hid.h"
+
+#define HID_USAGE_ERROR_ROLLOVER        0x01
+#define HID_USAGE_POSTFAIL              0x02
+#define HID_USAGE_ERROR_UNDEFINED       0x03
+
+/* Indices are QEMU keycodes, values are from HID Usage Table.  Indices
+ * above 0x80 are for keys that come after 0xe0 or 0xe1+0x1d or 0xe1+0x9d.  */
+static const uint8_t hid_usage_keys[0x100] = {
+    0x00, 0x29, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
+    0x24, 0x25, 0x26, 0x27, 0x2d, 0x2e, 0x2a, 0x2b,
+    0x14, 0x1a, 0x08, 0x15, 0x17, 0x1c, 0x18, 0x0c,
+    0x12, 0x13, 0x2f, 0x30, 0x28, 0xe0, 0x04, 0x16,
+    0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x0f, 0x33,
+    0x34, 0x35, 0xe1, 0x31, 0x1d, 0x1b, 0x06, 0x19,
+    0x05, 0x11, 0x10, 0x36, 0x37, 0x38, 0xe5, 0x55,
+    0xe2, 0x2c, 0x32, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e,
+    0x3f, 0x40, 0x41, 0x42, 0x43, 0x53, 0x47, 0x5f,
+    0x60, 0x61, 0x56, 0x5c, 0x5d, 0x5e, 0x57, 0x59,
+    0x5a, 0x5b, 0x62, 0x63, 0x00, 0x00, 0x00, 0x44,
+    0x45, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e,
+    0xe8, 0xe9, 0x71, 0x72, 0x73, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0xe3, 0xe7, 0x65,
+
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x58, 0xe4, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x46,
+    0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x4a,
+    0x52, 0x4b, 0x00, 0x50, 0x00, 0x4f, 0x00, 0x4d,
+    0x51, 0x4e, 0x49, 0x4c, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0xe3, 0xe7, 0x65, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+bool hid_has_events(HIDState *hs)
+{
+    return hs->n > 0;
+}
+
+void hid_set_next_idle(HIDState *hs, int64_t curtime)
+{
+    hs->next_idle_clock = curtime + (get_ticks_per_sec() * hs->idle * 4) / 1000;
+}
+
+static void hid_pointer_event_clear(HIDPointerEvent *e, int buttons)
+{
+    e->xdx = e->ydy = e->dz = 0;
+    e->buttons_state = buttons;
+}
+
+static void hid_pointer_event_combine(HIDPointerEvent *e, int xyrel,
+                                      int x1, int y1, int z1) {
+    if (xyrel) {
+        e->xdx += x1;
+        e->ydy += y1;
+    } else {
+        e->xdx = x1;
+        e->ydy = y1;
+        /* Windows drivers do not like the 0/0 position and ignore such
+         * events. */
+        if (!(x1 | y1)) {
+            x1 = 1;
+        }
+    }
+    e->dz += z1;
+}
+
+static void hid_pointer_event(void *opaque,
+                              int x1, int y1, int z1, int buttons_state)
+{
+    HIDState *hs = opaque;
+    unsigned use_slot = (hs->head + hs->n - 1) & QUEUE_MASK;
+    unsigned previous_slot = (use_slot - 1) & QUEUE_MASK;
+
+    /* We combine events where feasible to keep the queue small.  We shouldn't
+     * combine anything with the first event of a particular button state, as
+     * that would change the location of the button state change.  When the
+     * queue is empty, a second event is needed because we don't know if
+     * the first event changed the button state.  */
+    if (hs->n == QUEUE_LENGTH) {
+        /* Queue full.  Discard old button state, combine motion normally.  */
+        hs->ptr.queue[use_slot].buttons_state = buttons_state;
+    } else if (hs->n < 2 ||
+               hs->ptr.queue[use_slot].buttons_state != buttons_state ||
+               hs->ptr.queue[previous_slot].buttons_state !=
+               hs->ptr.queue[use_slot].buttons_state) {
+        /* Cannot or should not combine, so add an empty item to the queue.  */
+        QUEUE_INCR(use_slot);
+        hs->n++;
+        hid_pointer_event_clear(&hs->ptr.queue[use_slot], buttons_state);
+    }
+    hid_pointer_event_combine(&hs->ptr.queue[use_slot],
+                              hs->kind == HID_MOUSE,
+                              x1, y1, z1);
+    hs->event(hs);
+}
+
+static void hid_keyboard_event(void *opaque, int keycode)
+{
+    HIDState *hs = opaque;
+    int slot;
+
+    if (hs->n == QUEUE_LENGTH) {
+        fprintf(stderr, "usb-kbd: warning: key event queue full\n");
+        return;
+    }
+    slot = (hs->head + hs->n) & QUEUE_MASK; hs->n++;
+    hs->kbd.keycodes[slot] = keycode;
+    hs->event(hs);
+}
+
+static void hid_keyboard_process_keycode(HIDState *hs)
+{
+    uint8_t hid_code, key;
+    int i, keycode, slot;
+
+    if (hs->n == 0) {
+        return;
+    }
+    slot = hs->head & QUEUE_MASK; QUEUE_INCR(hs->head); hs->n--;
+    keycode = hs->kbd.keycodes[slot];
+
+    key = keycode & 0x7f;
+    hid_code = hid_usage_keys[key | ((hs->kbd.modifiers >> 1) & (1 << 7))];
+    hs->kbd.modifiers &= ~(1 << 8);
+
+    switch (hid_code) {
+    case 0x00:
+        return;
+
+    case 0xe0:
+        if (hs->kbd.modifiers & (1 << 9)) {
+            hs->kbd.modifiers ^= 3 << 8;
+            return;
+        }
+    case 0xe1 ... 0xe7:
+        if (keycode & (1 << 7)) {
+            hs->kbd.modifiers &= ~(1 << (hid_code & 0x0f));
+            return;
+        }
+    case 0xe8 ... 0xef:
+        hs->kbd.modifiers |= 1 << (hid_code & 0x0f);
+        return;
+    }
+
+    if (keycode & (1 << 7)) {
+        for (i = hs->kbd.keys - 1; i >= 0; i--) {
+            if (hs->kbd.key[i] == hid_code) {
+                hs->kbd.key[i] = hs->kbd.key[-- hs->kbd.keys];
+                hs->kbd.key[hs->kbd.keys] = 0x00;
+                break;
+            }
+        }
+        if (i < 0) {
+            return;
+        }
+    } else {
+        for (i = hs->kbd.keys - 1; i >= 0; i--) {
+            if (hs->kbd.key[i] == hid_code) {
+                break;
+            }
+        }
+        if (i < 0) {
+            if (hs->kbd.keys < sizeof(hs->kbd.key)) {
+                hs->kbd.key[hs->kbd.keys++] = hid_code;
+            }
+        } else {
+            return;
+        }
+    }
+}
+
+static inline int int_clamp(int val, int vmin, int vmax)
+{
+    if (val < vmin) {
+        return vmin;
+    } else if (val > vmax) {
+        return vmax;
+    } else {
+        return val;
+    }
+}
+
+int hid_pointer_poll(HIDState *hs, uint8_t *buf, int len)
+{
+    int dx, dy, dz, b, l;
+    int index;
+    HIDPointerEvent *e;
+
+    if (!hs->ptr.mouse_grabbed) {
+        qemu_activate_mouse_event_handler(hs->ptr.eh_entry);
+        hs->ptr.mouse_grabbed = 1;
+    }
+
+    /* When the buffer is empty, return the last event.  Relative
+       movements will all be zero.  */
+    index = (hs->n ? hs->head : hs->head - 1);
+    e = &hs->ptr.queue[index & QUEUE_MASK];
+
+    if (hs->kind == HID_MOUSE) {
+        dx = int_clamp(e->xdx, -127, 127);
+        dy = int_clamp(e->ydy, -127, 127);
+        e->xdx -= dx;
+        e->ydy -= dy;
+    } else {
+        dx = e->xdx;
+        dy = e->ydy;
+    }
+    dz = int_clamp(e->dz, -127, 127);
+    e->dz -= dz;
+
+    b = 0;
+    if (e->buttons_state & MOUSE_EVENT_LBUTTON) {
+        b |= 0x01;
+    }
+    if (e->buttons_state & MOUSE_EVENT_RBUTTON) {
+        b |= 0x02;
+    }
+    if (e->buttons_state & MOUSE_EVENT_MBUTTON) {
+        b |= 0x04;
+    }
+
+    if (hs->n &&
+        !e->dz &&
+        (hs->kind == HID_TABLET || (!e->xdx && !e->ydy))) {
+        /* that deals with this event */
+        QUEUE_INCR(hs->head);
+        hs->n--;
+    }
+
+    /* Appears we have to invert the wheel direction */
+    dz = 0 - dz;
+    l = 0;
+    switch (hs->kind) {
+    case HID_MOUSE:
+        if (len > l) {
+            buf[l++] = b;
+        }
+        if (len > l) {
+            buf[l++] = dx;
+        }
+        if (len > l) {
+            buf[l++] = dy;
+        }
+        if (len > l) {
+            buf[l++] = dz;
+        }
+        break;
+
+    case HID_TABLET:
+        if (len > l) {
+            buf[l++] = b;
+        }
+        if (len > l) {
+            buf[l++] = dx & 0xff;
+        }
+        if (len > l) {
+            buf[l++] = dx >> 8;
+        }
+        if (len > l) {
+            buf[l++] = dy & 0xff;
+        }
+        if (len > l) {
+            buf[l++] = dy >> 8;
+        }
+        if (len > l) {
+            buf[l++] = dz;
+        }
+        break;
+
+    default:
+        abort();
+    }
+
+    return l;
+}
+
+int hid_keyboard_poll(HIDState *hs, uint8_t *buf, int len)
+{
+    if (len < 2) {
+        return 0;
+    }
+
+    hid_keyboard_process_keycode(hs);
+
+    buf[0] = hs->kbd.modifiers & 0xff;
+    buf[1] = 0;
+    if (hs->kbd.keys > 6) {
+        memset(buf + 2, HID_USAGE_ERROR_ROLLOVER, MIN(8, len) - 2);
+    } else {
+        memcpy(buf + 2, hs->kbd.key, MIN(8, len) - 2);
+    }
+
+    return MIN(8, len);
+}
+
+int hid_keyboard_write(HIDState *hs, uint8_t *buf, int len)
+{
+    if (len > 0) {
+        int ledstate = 0;
+        /* 0x01: Num Lock LED
+         * 0x02: Caps Lock LED
+         * 0x04: Scroll Lock LED
+         * 0x08: Compose LED
+         * 0x10: Kana LED */
+        hs->kbd.leds = buf[0];
+        if (hs->kbd.leds & 0x04) {
+            ledstate |= QEMU_SCROLL_LOCK_LED;
+        }
+        if (hs->kbd.leds & 0x01) {
+            ledstate |= QEMU_NUM_LOCK_LED;
+        }
+        if (hs->kbd.leds & 0x02) {
+            ledstate |= QEMU_CAPS_LOCK_LED;
+        }
+        kbd_put_ledstate(ledstate);
+    }
+    return 0;
+}
+
+void hid_reset(HIDState *hs)
+{
+    switch (hs->kind) {
+    case HID_KEYBOARD:
+        qemu_add_kbd_event_handler(hid_keyboard_event, hs);
+        memset(hs->kbd.keycodes, 0, sizeof(hs->kbd.keycodes));
+        memset(hs->kbd.key, 0, sizeof(hs->kbd.key));
+        hs->kbd.keys = 0;
+        break;
+    case HID_MOUSE:
+    case HID_TABLET:
+        memset(hs->ptr.queue, 0, sizeof(hs->ptr.queue));
+        break;
+    }
+    hs->head = 0;
+    hs->n = 0;
+    hs->protocol = 1;
+    hs->idle = 0;
+}
+
+void hid_free(HIDState *hs)
+{
+    switch (hs->kind) {
+    case HID_KEYBOARD:
+        qemu_remove_kbd_event_handler();
+        break;
+    case HID_MOUSE:
+    case HID_TABLET:
+        qemu_remove_mouse_event_handler(hs->ptr.eh_entry);
+        break;
+    }
+}
+
+void hid_init(HIDState *hs, int kind, HIDEventFunc event)
+{
+    hs->kind = kind;
+    hs->event = event;
+
+    if (hs->kind == HID_MOUSE) {
+        hs->ptr.eh_entry = qemu_add_mouse_event_handler(hid_pointer_event, hs,
+                                                        0, "QEMU HID Mouse");
+    } else if (hs->kind == HID_TABLET) {
+        hs->ptr.eh_entry = qemu_add_mouse_event_handler(hid_pointer_event, hs,
+                                                        1, "QEMU HID Tablet");
+    }
+}
diff --git a/hw/hid.h b/hw/hid.h
new file mode 100644
index 0000000000..4a8fa5b63f
--- /dev/null
+++ b/hw/hid.h
@@ -0,0 +1,58 @@
+#ifndef QEMU_HID_H
+#define QEMU_HID_H
+
+#define HID_MOUSE     1
+#define HID_TABLET    2
+#define HID_KEYBOARD  3
+
+typedef struct HIDPointerEvent {
+    int32_t xdx, ydy; /* relative iff it's a mouse, otherwise absolute */
+    int32_t dz, buttons_state;
+} HIDPointerEvent;
+
+#define QUEUE_LENGTH    16 /* should be enough for a triple-click */
+#define QUEUE_MASK      (QUEUE_LENGTH-1u)
+#define QUEUE_INCR(v)   ((v)++, (v) &= QUEUE_MASK)
+
+typedef struct HIDState HIDState;
+typedef void (*HIDEventFunc)(HIDState *s);
+
+typedef struct HIDMouseState {
+    HIDPointerEvent queue[QUEUE_LENGTH];
+    int mouse_grabbed;
+    QEMUPutMouseEntry *eh_entry;
+} HIDMouseState;
+
+typedef struct HIDKeyboardState {
+    uint32_t keycodes[QUEUE_LENGTH];
+    uint16_t modifiers;
+    uint8_t leds;
+    uint8_t key[16];
+    int32_t keys;
+} HIDKeyboardState;
+
+struct HIDState {
+    union {
+        HIDMouseState ptr;
+        HIDKeyboardState kbd;
+    };
+    uint32_t head; /* index into circular queue */
+    uint32_t n;
+    int kind;
+    int32_t protocol;
+    uint8_t idle;
+    int64_t next_idle_clock;
+    HIDEventFunc event;
+};
+
+void hid_init(HIDState *hs, int kind, HIDEventFunc event);
+void hid_reset(HIDState *hs);
+void hid_free(HIDState *hs);
+
+bool hid_has_events(HIDState *hs);
+void hid_set_next_idle(HIDState *hs, int64_t curtime);
+int hid_pointer_poll(HIDState *hs, uint8_t *buf, int len);
+int hid_keyboard_poll(HIDState *hs, uint8_t *buf, int len);
+int hid_keyboard_write(HIDState *hs, uint8_t *buf, int len);
+
+#endif /* QEMU_HID_H */
diff --git a/hw/milkymist-softusb.c b/hw/milkymist-softusb.c
index ce2bfc60f2..75c85aeb6f 100644
--- a/hw/milkymist-softusb.c
+++ b/hw/milkymist-softusb.c
@@ -234,11 +234,11 @@ static void softusb_usbdev_datain(void *opaque)
 
     USBPacket p;
 
-    p.pid = USB_TOKEN_IN;
-    p.devep = 1;
-    p.data = s->kbd_usb_buffer;
-    p.len = sizeof(s->kbd_usb_buffer);
+    usb_packet_init(&p);
+    usb_packet_setup(&p, USB_TOKEN_IN, 0, 1);
+    usb_packet_addbuf(&p, s->kbd_usb_buffer, sizeof(s->kbd_usb_buffer));
     s->usbdev->info->handle_data(s->usbdev, &p);
+    usb_packet_cleanup(&p);
 
     softusb_kbd_changed(s);
 }
@@ -310,10 +310,12 @@ static int milkymist_softusb_init(SysBusDevice *dev)
     usb_bus_new(&s->usbbus, &softusb_bus_ops, NULL);
 
     /* our two ports */
+    /* FIXME: claim to support full speed devices. qemu mouse and keyboard
+     * report themselves as full speed devices. */
     usb_register_port(&s->usbbus, &s->usbport[0], NULL, 0, &softusb_ops,
-            USB_SPEED_MASK_LOW);
+            USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL);
     usb_register_port(&s->usbbus, &s->usbport[1], NULL, 1, &softusb_ops,
-            USB_SPEED_MASK_LOW);
+            USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL);
 
     /* and finally create an usb keyboard */
     s->usbdev = usb_create_simple(&s->usbbus, "usb-kbd");
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 8b1a412210..0b0344c1fd 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -223,7 +223,7 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd)
 
     switch(cmd[0]) {
     case TEST_UNIT_READY:
-    case REZERO_UNIT:
+    case REWIND:
     case START_STOP:
     case SEEK_6:
     case WRITE_FILEMARKS:
@@ -232,24 +232,24 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd)
     case RELEASE:
     case ERASE:
     case ALLOW_MEDIUM_REMOVAL:
-    case VERIFY:
+    case VERIFY_10:
     case SEEK_10:
     case SYNCHRONIZE_CACHE:
     case LOCK_UNLOCK_CACHE:
     case LOAD_UNLOAD:
     case SET_CD_SPEED:
     case SET_LIMITS:
-    case WRITE_LONG:
+    case WRITE_LONG_10:
     case MOVE_MEDIUM:
     case UPDATE_BLOCK:
         req->cmd.xfer = 0;
         break;
     case MODE_SENSE:
         break;
-    case WRITE_SAME:
+    case WRITE_SAME_10:
         req->cmd.xfer = 1;
         break;
-    case READ_CAPACITY:
+    case READ_CAPACITY_10:
         req->cmd.xfer = 8;
         break;
     case READ_BLOCK_LIMITS:
@@ -265,7 +265,7 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd)
         req->cmd.xfer *= 8;
         break;
     case WRITE_10:
-    case WRITE_VERIFY:
+    case WRITE_VERIFY_10:
     case WRITE_6:
     case WRITE_12:
     case WRITE_VERIFY_12:
@@ -325,7 +325,7 @@ static void scsi_req_xfer_mode(SCSIRequest *req)
     switch (req->cmd.buf[0]) {
     case WRITE_6:
     case WRITE_10:
-    case WRITE_VERIFY:
+    case WRITE_VERIFY_10:
     case WRITE_12:
     case WRITE_VERIFY_12:
     case WRITE_16:
@@ -345,15 +345,13 @@ static void scsi_req_xfer_mode(SCSIRequest *req)
     case SEARCH_HIGH:
     case SEARCH_LOW:
     case UPDATE_BLOCK:
-    case WRITE_LONG:
-    case WRITE_SAME:
+    case WRITE_LONG_10:
+    case WRITE_SAME_10:
     case SEARCH_HIGH_12:
     case SEARCH_EQUAL_12:
     case SEARCH_LOW_12:
-    case SET_WINDOW:
     case MEDIUM_SCAN:
     case SEND_VOLUME_TAG:
-    case WRITE_LONG_2:
     case PERSISTENT_RESERVE_OUT:
     case MAINTENANCE_OUT:
         req->cmd.mode = SCSI_XFER_TO_DEV;
@@ -517,8 +515,7 @@ static const char *scsi_command_name(uint8_t cmd)
 {
     static const char *names[] = {
         [ TEST_UNIT_READY          ] = "TEST_UNIT_READY",
-        [ REZERO_UNIT              ] = "REZERO_UNIT",
-        /* REWIND and REZERO_UNIT use the same operation code */
+        [ REWIND                   ] = "REWIND",
         [ REQUEST_SENSE            ] = "REQUEST_SENSE",
         [ FORMAT_UNIT              ] = "FORMAT_UNIT",
         [ READ_BLOCK_LIMITS        ] = "READ_BLOCK_LIMITS",
@@ -543,14 +540,12 @@ static const char *scsi_command_name(uint8_t cmd)
         [ RECEIVE_DIAGNOSTIC       ] = "RECEIVE_DIAGNOSTIC",
         [ SEND_DIAGNOSTIC          ] = "SEND_DIAGNOSTIC",
         [ ALLOW_MEDIUM_REMOVAL     ] = "ALLOW_MEDIUM_REMOVAL",
-
-        [ SET_WINDOW               ] = "SET_WINDOW",
-        [ READ_CAPACITY            ] = "READ_CAPACITY",
+        [ READ_CAPACITY_10         ] = "READ_CAPACITY_10",
         [ READ_10                  ] = "READ_10",
         [ WRITE_10                 ] = "WRITE_10",
         [ SEEK_10                  ] = "SEEK_10",
-        [ WRITE_VERIFY             ] = "WRITE_VERIFY",
-        [ VERIFY                   ] = "VERIFY",
+        [ WRITE_VERIFY_10          ] = "WRITE_VERIFY_10",
+        [ VERIFY_10                ] = "VERIFY_10",
         [ SEARCH_HIGH              ] = "SEARCH_HIGH",
         [ SEARCH_EQUAL             ] = "SEARCH_EQUAL",
         [ SEARCH_LOW               ] = "SEARCH_LOW",
@@ -566,11 +561,14 @@ static const char *scsi_command_name(uint8_t cmd)
         [ WRITE_BUFFER             ] = "WRITE_BUFFER",
         [ READ_BUFFER              ] = "READ_BUFFER",
         [ UPDATE_BLOCK             ] = "UPDATE_BLOCK",
-        [ READ_LONG                ] = "READ_LONG",
-        [ WRITE_LONG               ] = "WRITE_LONG",
+        [ READ_LONG_10             ] = "READ_LONG_10",
+        [ WRITE_LONG_10            ] = "WRITE_LONG_10",
         [ CHANGE_DEFINITION        ] = "CHANGE_DEFINITION",
-        [ WRITE_SAME               ] = "WRITE_SAME",
+        [ WRITE_SAME_10            ] = "WRITE_SAME_10",
+        [ UNMAP                    ] = "UNMAP",
         [ READ_TOC                 ] = "READ_TOC",
+        [ REPORT_DENSITY_SUPPORT   ] = "REPORT_DENSITY_SUPPORT",
+        [ GET_CONFIGURATION        ] = "GET_CONFIGURATION",
         [ LOG_SELECT               ] = "LOG_SELECT",
         [ LOG_SENSE                ] = "LOG_SENSE",
         [ MODE_SELECT_10           ] = "MODE_SELECT_10",
@@ -579,27 +577,39 @@ static const char *scsi_command_name(uint8_t cmd)
         [ MODE_SENSE_10            ] = "MODE_SENSE_10",
         [ PERSISTENT_RESERVE_IN    ] = "PERSISTENT_RESERVE_IN",
         [ PERSISTENT_RESERVE_OUT   ] = "PERSISTENT_RESERVE_OUT",
+        [ WRITE_FILEMARKS_16       ] = "WRITE_FILEMARKS_16",
+        [ EXTENDED_COPY            ] = "EXTENDED_COPY",
+        [ ATA_PASSTHROUGH          ] = "ATA_PASSTHROUGH",
+        [ ACCESS_CONTROL_IN        ] = "ACCESS_CONTROL_IN",
+        [ ACCESS_CONTROL_OUT       ] = "ACCESS_CONTROL_OUT",
+        [ READ_16                  ] = "READ_16",
+        [ COMPARE_AND_WRITE        ] = "COMPARE_AND_WRITE",
+        [ WRITE_16                 ] = "WRITE_16",
+        [ WRITE_VERIFY_16          ] = "WRITE_VERIFY_16",
+        [ VERIFY_16                ] = "VERIFY_16",
+        [ SYNCHRONIZE_CACHE_16     ] = "SYNCHRONIZE_CACHE_16",
+        [ LOCATE_16                ] = "LOCATE_16",
+        [ WRITE_SAME_16            ] = "WRITE_SAME_16",
+        [ ERASE_16                 ] = "ERASE_16",
+        [ SERVICE_ACTION_IN        ] = "SERVICE_ACTION_IN",
+        [ WRITE_LONG_16            ] = "WRITE_LONG_16",
+        [ REPORT_LUNS              ] = "REPORT_LUNS",
+        [ BLANK                    ] = "BLANK",
+        [ MAINTENANCE_IN           ] = "MAINTENANCE_IN",
+        [ MAINTENANCE_OUT          ] = "MAINTENANCE_OUT",
         [ MOVE_MEDIUM              ] = "MOVE_MEDIUM",
+        [ LOAD_UNLOAD              ] = "LOAD_UNLOAD",
         [ READ_12                  ] = "READ_12",
         [ WRITE_12                 ] = "WRITE_12",
         [ WRITE_VERIFY_12          ] = "WRITE_VERIFY_12",
+        [ VERIFY_12                ] = "VERIFY_12",
         [ SEARCH_HIGH_12           ] = "SEARCH_HIGH_12",
         [ SEARCH_EQUAL_12          ] = "SEARCH_EQUAL_12",
         [ SEARCH_LOW_12            ] = "SEARCH_LOW_12",
         [ READ_ELEMENT_STATUS      ] = "READ_ELEMENT_STATUS",
         [ SEND_VOLUME_TAG          ] = "SEND_VOLUME_TAG",
-        [ WRITE_LONG_2             ] = "WRITE_LONG_2",
-
-        [ REPORT_DENSITY_SUPPORT   ] = "REPORT_DENSITY_SUPPORT",
-        [ GET_CONFIGURATION        ] = "GET_CONFIGURATION",
-        [ READ_16                  ] = "READ_16",
-        [ WRITE_16                 ] = "WRITE_16",
-        [ WRITE_VERIFY_16          ] = "WRITE_VERIFY_16",
-        [ SERVICE_ACTION_IN        ] = "SERVICE_ACTION_IN",
-        [ REPORT_LUNS              ] = "REPORT_LUNS",
-        [ LOAD_UNLOAD              ] = "LOAD_UNLOAD",
+        [ READ_DEFECT_DATA_12      ] = "READ_DEFECT_DATA_12",
         [ SET_CD_SPEED             ] = "SET_CD_SPEED",
-        [ BLANK                    ] = "BLANK",
     };
 
     if (cmd >= ARRAY_SIZE(names) || names[cmd] == NULL)
diff --git a/hw/scsi-defs.h b/hw/scsi-defs.h
index 413cce07b5..27010b74c0 100644
--- a/hw/scsi-defs.h
+++ b/hw/scsi-defs.h
@@ -25,7 +25,7 @@
  */
 
 #define TEST_UNIT_READY       0x00
-#define REZERO_UNIT           0x01
+#define REWIND                0x01
 #define REQUEST_SENSE         0x03
 #define FORMAT_UNIT           0x04
 #define READ_BLOCK_LIMITS     0x05
@@ -48,14 +48,13 @@
 #define RECEIVE_DIAGNOSTIC    0x1c
 #define SEND_DIAGNOSTIC       0x1d
 #define ALLOW_MEDIUM_REMOVAL  0x1e
-
-#define SET_WINDOW            0x24
-#define READ_CAPACITY         0x25
+#define READ_CAPACITY_10      0x25
 #define READ_10               0x28
 #define WRITE_10              0x2a
 #define SEEK_10               0x2b
-#define WRITE_VERIFY          0x2e
-#define VERIFY                0x2f
+#define LOCATE_10             0x2b
+#define WRITE_VERIFY_10       0x2e
+#define VERIFY_10             0x2f
 #define SEARCH_HIGH           0x30
 #define SEARCH_EQUAL          0x31
 #define SEARCH_LOW            0x32
@@ -71,11 +70,14 @@
 #define WRITE_BUFFER          0x3b
 #define READ_BUFFER           0x3c
 #define UPDATE_BLOCK          0x3d
-#define READ_LONG             0x3e
-#define WRITE_LONG            0x3f
+#define READ_LONG_10          0x3e
+#define WRITE_LONG_10         0x3f
 #define CHANGE_DEFINITION     0x40
-#define WRITE_SAME            0x41
+#define WRITE_SAME_10         0x41
+#define UNMAP                 0x42
 #define READ_TOC              0x43
+#define REPORT_DENSITY_SUPPORT 0x44
+#define GET_CONFIGURATION     0x46
 #define LOG_SELECT            0x4c
 #define LOG_SENSE             0x4d
 #define MODE_SELECT_10        0x55
@@ -84,32 +86,40 @@
 #define MODE_SENSE_10         0x5a
 #define PERSISTENT_RESERVE_IN 0x5e
 #define PERSISTENT_RESERVE_OUT 0x5f
+#define VARLENGTH_CDB         0x7f
+#define WRITE_FILEMARKS_16    0x80
+#define EXTENDED_COPY         0x83
+#define ATA_PASSTHROUGH       0x85
+#define ACCESS_CONTROL_IN     0x86
+#define ACCESS_CONTROL_OUT    0x87
+#define READ_16               0x88
+#define COMPARE_AND_WRITE     0x89
+#define WRITE_16              0x8a
+#define WRITE_VERIFY_16       0x8e
+#define VERIFY_16             0x8f
+#define SYNCHRONIZE_CACHE_16  0x91
+#define LOCATE_16             0x92
 #define WRITE_SAME_16         0x93
+#define ERASE_16              0x93
+#define SERVICE_ACTION_IN     0x9e
+#define WRITE_LONG_16         0x9f
+#define REPORT_LUNS           0xa0
+#define BLANK                 0xa1
 #define MAINTENANCE_IN        0xa3
 #define MAINTENANCE_OUT       0xa4
 #define MOVE_MEDIUM           0xa5
+#define LOAD_UNLOAD           0xa6
 #define READ_12               0xa8
 #define WRITE_12              0xaa
 #define WRITE_VERIFY_12       0xae
+#define VERIFY_12             0xaf
 #define SEARCH_HIGH_12        0xb0
 #define SEARCH_EQUAL_12       0xb1
 #define SEARCH_LOW_12         0xb2
 #define READ_ELEMENT_STATUS   0xb8
 #define SEND_VOLUME_TAG       0xb6
-#define WRITE_LONG_2          0xea
-
-/* from hw/scsi-generic.c */
-#define REWIND 0x01
-#define REPORT_DENSITY_SUPPORT 0x44
-#define GET_CONFIGURATION 0x46
-#define READ_16 0x88
-#define WRITE_16 0x8a
-#define WRITE_VERIFY_16 0x8e
-#define SERVICE_ACTION_IN 0x9e
-#define REPORT_LUNS 0xa0
-#define LOAD_UNLOAD 0xa6
-#define SET_CD_SPEED 0xbb
-#define BLANK 0xa1
+#define READ_DEFECT_DATA_12   0xb7
+#define SET_CD_SPEED          0xbb
 
 /*
  *  SAM Status codes
@@ -154,6 +164,7 @@
 
 #define TYPE_DISK           0x00
 #define TYPE_TAPE           0x01
+#define TYPE_PRINTER        0x02
 #define TYPE_PROCESSOR      0x03    /* HP scanners use this */
 #define TYPE_WORM           0x04    /* Treated as ROM by our system */
 #define TYPE_ROM            0x05
@@ -161,6 +172,9 @@
 #define TYPE_MOD            0x07    /* Magneto-optical disk -
 				     * - treated as TYPE_DISK */
 #define TYPE_MEDIUM_CHANGER 0x08
-#define TYPE_ENCLOSURE	    0x0d    /* Enclosure Services Device */
+#define TYPE_STORAGE_ARRAY  0x0c    /* Storage array device */
+#define TYPE_ENCLOSURE      0x0d    /* Enclosure Services Device */
+#define TYPE_RBC            0x0e    /* Simplified Direct-Access Device */
+#define TYPE_OSD            0x11    /* Object-storage Device */
 #define TYPE_NO_LUN         0x7f
 
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index f42a5d1f85..fa198f928c 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -59,8 +59,6 @@ typedef struct SCSIDiskReq {
     uint32_t status;
 } SCSIDiskReq;
 
-typedef enum { SCSI_HD, SCSI_CD } SCSIDriveKind;
-
 struct SCSIDiskState
 {
     SCSIDevice qdev;
@@ -74,7 +72,6 @@ struct SCSIDiskState
     char *version;
     char *serial;
     SCSISense sense;
-    SCSIDriveKind drive_kind;
 };
 
 static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type);
@@ -382,7 +379,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             return -1;
         }
 
-        if (s->drive_kind == SCSI_CD) {
+        if (s->qdev.type == TYPE_ROM) {
             outbuf[buflen++] = 5;
         } else {
             outbuf[buflen++] = 0;
@@ -401,7 +398,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             if (s->serial)
                 outbuf[buflen++] = 0x80; // unit serial number
             outbuf[buflen++] = 0x83; // device identification
-            if (s->drive_kind == SCSI_HD) {
+            if (s->qdev.type == TYPE_DISK) {
                 outbuf[buflen++] = 0xb0; // block limits
                 outbuf[buflen++] = 0xb2; // thin provisioning
             }
@@ -460,7 +457,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             unsigned int opt_io_size =
                     s->qdev.conf.opt_io_size / s->qdev.blocksize;
 
-            if (s->drive_kind == SCSI_CD) {
+            if (s->qdev.type == TYPE_ROM) {
                 DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n",
                         page_code);
                 return -1;
@@ -526,16 +523,15 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
     memset(outbuf, 0, buflen);
 
     if (req->lun) {
-        outbuf[0] = 0x7f;	/* LUN not supported */
+        outbuf[0] = 0x7f;       /* LUN not supported */
         return buflen;
     }
 
-    if (s->drive_kind == SCSI_CD) {
-        outbuf[0] = 5;
+    outbuf[0] = s->qdev.type & 0x1f;
+    if (s->qdev.type == TYPE_ROM) {
         outbuf[1] = 0x80;
         memcpy(&outbuf[16], "QEMU CD-ROM     ", 16);
     } else {
-        outbuf[0] = 0;
         outbuf[1] = s->removable ? 0x80 : 0;
         memcpy(&outbuf[16], "QEMU HARDDISK   ", 16);
     }
@@ -661,7 +657,7 @@ static int mode_sense_page(SCSIRequest *req, int page, uint8_t *p,
         return p[1] + 2;
 
     case 0x2a: /* CD Capabilities and Mechanical Status page. */
-        if (s->drive_kind != SCSI_CD)
+        if (s->qdev.type != TYPE_ROM)
             return 0;
         p[0] = 0x2a;
         p[1] = 0x14;
@@ -836,7 +832,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
     case TEST_UNIT_READY:
         if (!bdrv_is_inserted(s->bs))
             goto not_ready;
-	break;
+        break;
     case REQUEST_SENSE:
         if (req->cmd.xfer < 4)
             goto illegal_request;
@@ -848,7 +844,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
         buflen = scsi_disk_emulate_inquiry(req, outbuf);
         if (buflen < 0)
             goto illegal_request;
-	break;
+        break;
     case MODE_SENSE:
     case MODE_SENSE_10:
         buflen = scsi_disk_emulate_mode_sense(req, outbuf);
@@ -877,18 +873,18 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
             goto illegal_request;
         break;
     case START_STOP:
-        if (s->drive_kind == SCSI_CD && (req->cmd.buf[4] & 2)) {
+        if (s->qdev.type == TYPE_ROM && (req->cmd.buf[4] & 2)) {
             /* load/eject medium */
             bdrv_eject(s->bs, !(req->cmd.buf[4] & 1));
         }
-	break;
+        break;
     case ALLOW_MEDIUM_REMOVAL:
         bdrv_set_locked(s->bs, req->cmd.buf[4] & 1);
-	break;
-    case READ_CAPACITY:
+        break;
+    case READ_CAPACITY_10:
         /* The normal LEN field for this command is zero.  */
-	memset(outbuf, 0, 8);
-	bdrv_get_geometry(s->bs, &nb_sectors);
+        memset(outbuf, 0, 8);
+        bdrv_get_geometry(s->bs, &nb_sectors);
         if (!nb_sectors)
             goto not_ready;
         nb_sectors /= s->cluster_size;
@@ -908,7 +904,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
         outbuf[6] = s->cluster_size * 2;
         outbuf[7] = 0;
         buflen = 8;
-	break;
+        break;
     case SYNCHRONIZE_CACHE:
         ret = bdrv_flush(s->bs);
         if (ret < 0) {
@@ -970,13 +966,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
         outbuf[3] = 8;
         buflen = 16;
         break;
-    case VERIFY:
-        break;
-    case REZERO_UNIT:
-        DPRINTF("Rezero Unit\n");
-        if (!bdrv_is_inserted(s->bs)) {
-            goto not_ready;
-        }
+    case VERIFY_10:
         break;
     default:
         scsi_command_complete(r, CHECK_CONDITION, SENSE_CODE(INVALID_OPCODE));
@@ -1052,14 +1042,13 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
     case RELEASE_10:
     case START_STOP:
     case ALLOW_MEDIUM_REMOVAL:
-    case READ_CAPACITY:
+    case READ_CAPACITY_10:
     case SYNCHRONIZE_CACHE:
     case READ_TOC:
     case GET_CONFIGURATION:
     case SERVICE_ACTION_IN:
     case REPORT_LUNS:
-    case VERIFY:
-    case REZERO_UNIT:
+    case VERIFY_10:
         rc = scsi_disk_emulate_command(r, outbuf);
         if (rc < 0) {
             return 0;
@@ -1082,7 +1071,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
     case WRITE_10:
     case WRITE_12:
     case WRITE_16:
-    case WRITE_VERIFY:
+    case WRITE_VERIFY_10:
     case WRITE_VERIFY_12:
     case WRITE_VERIFY_16:
         len = r->req.cmd.xfer / s->qdev.blocksize;
@@ -1190,7 +1179,7 @@ static void scsi_destroy(SCSIDevice *dev)
     blockdev_mark_auto_del(s->qdev.conf.bs);
 }
 
-static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind)
+static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
     DriveInfo *dinfo;
@@ -1200,9 +1189,8 @@ static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind)
         return -1;
     }
     s->bs = s->qdev.conf.bs;
-    s->drive_kind = kind;
 
-    if (kind == SCSI_HD && !bdrv_is_inserted(s->bs)) {
+    if (scsi_type == TYPE_DISK && !bdrv_is_inserted(s->bs)) {
         error_report("Device needs media, but drive is empty");
         return -1;
     }
@@ -1224,44 +1212,47 @@ static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind)
         return -1;
     }
 
-    if (kind == SCSI_CD) {
+    if (scsi_type == TYPE_ROM) {
         s->qdev.blocksize = 2048;
-    } else {
+    } else if (scsi_type == TYPE_DISK) {
         s->qdev.blocksize = s->qdev.conf.logical_block_size;
+    } else {
+        error_report("scsi-disk: Unhandled SCSI type %02x", scsi_type);
+        return -1;
     }
     s->cluster_size = s->qdev.blocksize / 512;
     s->bs->buffer_alignment = s->qdev.blocksize;
 
-    s->qdev.type = TYPE_DISK;
+    s->qdev.type = scsi_type;
     qemu_add_vm_change_state_handler(scsi_dma_restart_cb, s);
-    bdrv_set_removable(s->bs, kind == SCSI_CD);
+    bdrv_set_removable(s->bs, scsi_type == TYPE_ROM);
     add_boot_device_path(s->qdev.conf.bootindex, &dev->qdev, ",0");
     return 0;
 }
 
 static int scsi_hd_initfn(SCSIDevice *dev)
 {
-    return scsi_initfn(dev, SCSI_HD);
+    return scsi_initfn(dev, TYPE_DISK);
 }
 
 static int scsi_cd_initfn(SCSIDevice *dev)
 {
-    return scsi_initfn(dev, SCSI_CD);
+    return scsi_initfn(dev, TYPE_ROM);
 }
 
 static int scsi_disk_initfn(SCSIDevice *dev)
 {
-    SCSIDriveKind kind;
     DriveInfo *dinfo;
+    uint8_t scsi_type;
 
     if (!dev->conf.bs) {
-        kind = SCSI_HD;         /* will die in scsi_initfn() */
+        scsi_type = TYPE_DISK;  /* will die in scsi_initfn() */
     } else {
         dinfo = drive_get_by_blockdev(dev->conf.bs);
-        kind = dinfo->media_cd ? SCSI_CD : SCSI_HD;
+        scsi_type = dinfo->media_cd ? TYPE_ROM : TYPE_DISK;
     }
 
-    return scsi_initfn(dev, kind);
+    return scsi_initfn(dev, scsi_type);
 }
 
 #define DEFINE_SCSI_DISK_PROPERTIES()                           \
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index 63361b3542..7b0026eb98 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -406,7 +406,7 @@ static int get_blocksize(BlockDriverState *bdrv)
 
     memset(cmd, 0, sizeof(cmd));
     memset(buf, 0, sizeof(buf));
-    cmd[0] = READ_CAPACITY;
+    cmd[0] = READ_CAPACITY_10;
 
     memset(&io_header, 0, sizeof(io_header));
     io_header.interface_id = 'S';
diff --git a/hw/usb-bt.c b/hw/usb-bt.c
index 4557802bbc..529fa3355d 100644
--- a/hw/usb-bt.c
+++ b/hw/usb-bt.c
@@ -294,9 +294,9 @@ static inline int usb_bt_fifo_dequeue(struct usb_hci_in_fifo_s *fifo,
     if (likely(!fifo->len))
         return USB_RET_STALL;
 
-    len = MIN(p->len, fifo->fifo[fifo->start].len);
-    memcpy(p->data, fifo->fifo[fifo->start].data, len);
-    if (len == p->len) {
+    len = MIN(p->iov.size, fifo->fifo[fifo->start].len);
+    usb_packet_copy(p, fifo->fifo[fifo->start].data, len);
+    if (len == p->iov.size) {
         fifo->fifo[fifo->start].len -= len;
         fifo->fifo[fifo->start].data += len;
     } else {
@@ -319,20 +319,13 @@ static inline void usb_bt_fifo_out_enqueue(struct USBBtState *s,
                 struct usb_hci_out_fifo_s *fifo,
                 void (*send)(struct HCIInfo *, const uint8_t *, int),
                 int (*complete)(const uint8_t *, int),
-                const uint8_t *data, int len)
+                USBPacket *p)
 {
-    if (fifo->len) {
-        memcpy(fifo->data + fifo->len, data, len);
-        fifo->len += len;
-        if (complete(fifo->data, fifo->len)) {
-            send(s->hci, fifo->data, fifo->len);
-            fifo->len = 0;
-        }
-    } else if (complete(data, len))
-        send(s->hci, data, len);
-    else {
-        memcpy(fifo->data, data, len);
-        fifo->len = len;
+    usb_packet_copy(p, fifo->data + fifo->len, p->iov.size);
+    fifo->len += p->iov.size;
+    if (complete(fifo->data, fifo->len)) {
+        send(s->hci, fifo->data, fifo->len);
+        fifo->len = 0;
     }
 
     /* TODO: do we need to loop? */
@@ -432,7 +425,7 @@ static int usb_bt_handle_control(USBDevice *dev, USBPacket *p,
     case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_DEVICE) << 8):
         if (s->config)
             usb_bt_fifo_out_enqueue(s, &s->outcmd, s->hci->cmd_send,
-                            usb_bt_hci_cmd_complete, data, length);
+                            usb_bt_hci_cmd_complete, p);
         break;
     default:
     fail:
@@ -474,12 +467,12 @@ static int usb_bt_handle_data(USBDevice *dev, USBPacket *p)
         switch (p->devep & 0xf) {
         case USB_ACL_EP:
             usb_bt_fifo_out_enqueue(s, &s->outacl, s->hci->acl_send,
-                            usb_bt_hci_acl_complete, p->data, p->len);
+                            usb_bt_hci_acl_complete, p);
             break;
 
         case USB_SCO_EP:
             usb_bt_fifo_out_enqueue(s, &s->outsco, s->hci->sco_send,
-                            usb_bt_hci_sco_complete, p->data, p->len);
+                            usb_bt_hci_sco_complete, p);
             break;
 
         default:
diff --git a/hw/usb-ccid.c b/hw/usb-ccid.c
index 4dda2c4833..66aeb211af 100644
--- a/hw/usb-ccid.c
+++ b/hw/usb-ccid.c
@@ -934,16 +934,16 @@ static int ccid_handle_bulk_out(USBCCIDState *s, USBPacket *p)
 {
     CCID_Header *ccid_header;
 
-    if (p->len + s->bulk_out_pos > BULK_OUT_DATA_SIZE) {
+    if (p->iov.size + s->bulk_out_pos > BULK_OUT_DATA_SIZE) {
         return USB_RET_STALL;
     }
     ccid_header = (CCID_Header *)s->bulk_out_data;
-    memcpy(s->bulk_out_data + s->bulk_out_pos, p->data, p->len);
-    s->bulk_out_pos += p->len;
-    if (p->len == CCID_MAX_PACKET_SIZE) {
+    usb_packet_copy(p, s->bulk_out_data + s->bulk_out_pos, p->iov.size);
+    s->bulk_out_pos += p->iov.size;
+    if (p->iov.size == CCID_MAX_PACKET_SIZE) {
         DPRINTF(s, D_VERBOSE,
-            "usb-ccid: bulk_in: expecting more packets (%d/%d)\n",
-            p->len, ccid_header->dwLength);
+            "usb-ccid: bulk_in: expecting more packets (%zd/%d)\n",
+            p->iov.size, ccid_header->dwLength);
         return 0;
     }
     if (s->bulk_out_pos < 10) {
@@ -1006,15 +1006,17 @@ static int ccid_handle_bulk_out(USBCCIDState *s, USBPacket *p)
     return 0;
 }
 
-static int ccid_bulk_in_copy_to_guest(USBCCIDState *s, uint8_t *data, int len)
+static int ccid_bulk_in_copy_to_guest(USBCCIDState *s, USBPacket *p)
 {
     int ret = 0;
 
-    assert(len > 0);
+    assert(p->iov.size > 0);
     ccid_bulk_in_get(s);
     if (s->current_bulk_in != NULL) {
-        ret = MIN(s->current_bulk_in->len - s->current_bulk_in->pos, len);
-        memcpy(data, s->current_bulk_in->data + s->current_bulk_in->pos, ret);
+        ret = MIN(s->current_bulk_in->len - s->current_bulk_in->pos,
+                  p->iov.size);
+        usb_packet_copy(p, s->current_bulk_in->data +
+                        s->current_bulk_in->pos, ret);
         s->current_bulk_in->pos += ret;
         if (s->current_bulk_in->pos == s->current_bulk_in->len) {
             ccid_bulk_in_release(s);
@@ -1025,11 +1027,13 @@ static int ccid_bulk_in_copy_to_guest(USBCCIDState *s, uint8_t *data, int len)
     }
     if (ret > 0) {
         DPRINTF(s, D_MORE_INFO,
-                "%s: %d/%d req/act to guest (BULK_IN)\n", __func__, len, ret);
+                "%s: %zd/%d req/act to guest (BULK_IN)\n",
+                __func__, p->iov.size, ret);
     }
-    if (ret != USB_RET_NAK && ret < len) {
+    if (ret != USB_RET_NAK && ret < p->iov.size) {
         DPRINTF(s, 1,
-            "%s: returning short (EREMOTEIO) %d < %d\n", __func__, ret, len);
+                "%s: returning short (EREMOTEIO) %d < %zd\n",
+                __func__, ret, p->iov.size);
     }
     return ret;
 }
@@ -1038,8 +1042,7 @@ static int ccid_handle_data(USBDevice *dev, USBPacket *p)
 {
     USBCCIDState *s = DO_UPCAST(USBCCIDState, dev, dev);
     int ret = 0;
-    uint8_t *data = p->data;
-    int len = p->len;
+    uint8_t buf[2];
 
     switch (p->pid) {
     case USB_TOKEN_OUT:
@@ -1049,24 +1052,25 @@ static int ccid_handle_data(USBDevice *dev, USBPacket *p)
     case USB_TOKEN_IN:
         switch (p->devep & 0xf) {
         case CCID_BULK_IN_EP:
-            if (!len) {
+            if (!p->iov.size) {
                 ret = USB_RET_NAK;
             } else {
-                ret = ccid_bulk_in_copy_to_guest(s, data, len);
+                ret = ccid_bulk_in_copy_to_guest(s, p);
             }
             break;
         case CCID_INT_IN_EP:
             if (s->notify_slot_change) {
                 /* page 56, RDR_to_PC_NotifySlotChange */
-                data[0] = CCID_MESSAGE_TYPE_RDR_to_PC_NotifySlotChange;
-                data[1] = s->bmSlotICCState;
+                buf[0] = CCID_MESSAGE_TYPE_RDR_to_PC_NotifySlotChange;
+                buf[1] = s->bmSlotICCState;
+                usb_packet_copy(p, buf, 2);
                 ret = 2;
                 s->notify_slot_change = false;
                 s->bmSlotICCState &= ~SLOT_0_CHANGED_MASK;
                 DPRINTF(s, D_INFO,
                         "handle_data: int_in: notify_slot_change %X, "
-                        "requested len %d\n",
-                        s->bmSlotICCState, len);
+                        "requested len %zd\n",
+                        s->bmSlotICCState, p->iov.size);
             }
             break;
         default:
diff --git a/hw/usb-ehci.c b/hw/usb-ehci.c
index 8b0dcc335d..2b43895315 100644
--- a/hw/usb-ehci.c
+++ b/hw/usb-ehci.c
@@ -28,6 +28,7 @@
 #include "pci.h"
 #include "monitor.h"
 #include "trace.h"
+#include "dma.h"
 
 #define EHCI_DEBUG   0
 
@@ -269,6 +270,7 @@ typedef struct EHCIqtd {
 
     uint32_t bufptr[5];               // Standard buffer pointer
 #define QTD_BUFPTR_MASK               0xfffff000
+#define QTD_BUFPTR_SH                 12
 } EHCIqtd;
 
 /*  EHCI spec version 1.0 Section 3.6
@@ -357,7 +359,7 @@ struct EHCIQueue {
     uint32_t qtdaddr;      // address QTD read from
 
     USBPacket packet;
-    uint8_t buffer[BUFF_SIZE];
+    QEMUSGList sgl;
     int pid;
     uint32_t tbytes;
     enum async_state async;
@@ -414,7 +416,7 @@ struct EHCIState {
     uint32_t p_fetch_addr;   // which address to look at next
 
     USBPacket ipacket;
-    uint8_t ibuffer[BUFF_SIZE];
+    QEMUSGList isgl;
     int isoch_pause;
 
     uint64_t last_run_ns;
@@ -1165,58 +1167,56 @@ static int ehci_qh_do_overlay(EHCIQueue *q)
     return 0;
 }
 
-static int ehci_buffer_rw(EHCIQueue *q, int bytes, int rw)
+static int ehci_init_transfer(EHCIQueue *q)
 {
-    int bufpos = 0;
-    int cpage, offset;
-    uint32_t head;
-    uint32_t tail;
-
-
-    if (!bytes) {
-        return 0;
-    }
-
-    cpage = get_field(q->qh.token, QTD_TOKEN_CPAGE);
-    if (cpage > 4) {
-        fprintf(stderr, "cpage out of range (%d)\n", cpage);
-        return USB_RET_PROCERR;
-    }
+    uint32_t cpage, offset, bytes, plen;
+    target_phys_addr_t page;
 
+    cpage  = get_field(q->qh.token, QTD_TOKEN_CPAGE);
+    bytes  = get_field(q->qh.token, QTD_TOKEN_TBYTES);
     offset = q->qh.bufptr[0] & ~QTD_BUFPTR_MASK;
+    qemu_sglist_init(&q->sgl, 5);
 
-    do {
-        /* start and end of this page */
-        head = q->qh.bufptr[cpage] & QTD_BUFPTR_MASK;
-        tail = head + ~QTD_BUFPTR_MASK + 1;
-        /* add offset into page */
-        head |= offset;
-
-        if (bytes <= (tail - head)) {
-            tail = head + bytes;
+    while (bytes > 0) {
+        if (cpage > 4) {
+            fprintf(stderr, "cpage out of range (%d)\n", cpage);
+            return USB_RET_PROCERR;
         }
 
-        trace_usb_ehci_data(rw, cpage, offset, head, tail-head, bufpos);
-        cpu_physical_memory_rw(head, q->buffer + bufpos, tail - head, rw);
-
-        bufpos += (tail - head);
-        offset += (tail - head);
-        bytes -= (tail - head);
-
-        if (bytes > 0) {
-            cpage++;
+        page  = q->qh.bufptr[cpage] & QTD_BUFPTR_MASK;
+        page += offset;
+        plen  = bytes;
+        if (plen > 4096 - offset) {
+            plen = 4096 - offset;
             offset = 0;
+            cpage++;
         }
-    } while (bytes > 0);
 
-    /* save cpage */
-    set_field(&q->qh.token, cpage, QTD_TOKEN_CPAGE);
+        qemu_sglist_add(&q->sgl, page, plen);
+        bytes -= plen;
+    }
+    return 0;
+}
 
-    /* save offset into cpage */
-    q->qh.bufptr[0] &= QTD_BUFPTR_MASK;
-    q->qh.bufptr[0] |= offset;
+static void ehci_finish_transfer(EHCIQueue *q, int status)
+{
+    uint32_t cpage, offset;
 
-    return 0;
+    qemu_sglist_destroy(&q->sgl);
+
+    if (status > 0) {
+        /* update cpage & offset */
+        cpage  = get_field(q->qh.token, QTD_TOKEN_CPAGE);
+        offset = q->qh.bufptr[0] & ~QTD_BUFPTR_MASK;
+
+        offset += status;
+        cpage  += offset >> QTD_BUFPTR_SH;
+        offset &= ~QTD_BUFPTR_MASK;
+
+        set_field(&q->qh.token, cpage, QTD_TOKEN_CPAGE);
+        q->qh.bufptr[0] &= QTD_BUFPTR_MASK;
+        q->qh.bufptr[0] |= offset;
+    }
 }
 
 static void ehci_async_complete_packet(USBPort *port, USBPacket *packet)
@@ -1235,7 +1235,7 @@ static void ehci_async_complete_packet(USBPort *port, USBPacket *packet)
     trace_usb_ehci_queue_action(q, "wakeup");
     assert(q->async == EHCI_ASYNC_INFLIGHT);
     q->async = EHCI_ASYNC_FINISHED;
-    q->usb_status = packet->len;
+    q->usb_status = packet->result;
 }
 
 static void ehci_execute_complete(EHCIQueue *q)
@@ -1295,10 +1295,6 @@ err:
         }
 
         if (q->tbytes && q->pid == USB_TOKEN_IN) {
-            if (ehci_buffer_rw(q, q->usb_status, 1) != 0) {
-                q->usb_status = USB_RET_PROCERR;
-                return;
-            }
             q->tbytes -= q->usb_status;
         } else {
             q->tbytes = 0;
@@ -1307,6 +1303,8 @@ err:
         DPRINTF("updating tbytes to %d\n", q->tbytes);
         set_field(&q->qh.token, q->tbytes, QTD_TOKEN_TBYTES);
     }
+    ehci_finish_transfer(q, q->usb_status);
+    usb_packet_unmap(&q->packet);
 
     q->qh.token ^= QTD_TOKEN_DTOGGLE;
     q->qh.token &= ~QTD_TOKEN_ACTIVE;
@@ -1346,8 +1344,7 @@ static int ehci_execute(EHCIQueue *q)
         default: fprintf(stderr, "bad token\n"); break;
     }
 
-    if ((q->tbytes && q->pid != USB_TOKEN_IN) &&
-        (ehci_buffer_rw(q, q->tbytes, 0) != 0)) {
+    if (ehci_init_transfer(q) != 0) {
         return USB_RET_PROCERR;
     }
 
@@ -1356,6 +1353,9 @@ static int ehci_execute(EHCIQueue *q)
 
     ret = USB_RET_NODEV;
 
+    usb_packet_setup(&q->packet, q->pid, devadr, endp);
+    usb_packet_map(&q->packet, &q->sgl);
+
     // TO-DO: associating device with ehci port
     for(i = 0; i < NB_PORTS; i++) {
         port = &q->ehci->ports[i];
@@ -1367,17 +1367,12 @@ static int ehci_execute(EHCIQueue *q)
             continue;
         }
 
-        q->packet.pid = q->pid;
-        q->packet.devaddr = devadr;
-        q->packet.devep = endp;
-        q->packet.data = q->buffer;
-        q->packet.len = q->tbytes;
-
         ret = usb_handle_packet(dev, &q->packet);
 
-        DPRINTF("submit: qh %x next %x qtd %x pid %x len %d (total %d) endp %x ret %d\n",
+        DPRINTF("submit: qh %x next %x qtd %x pid %x len %zd "
+                "(total %d) endp %x ret %d\n",
                 q->qhaddr, q->qh.next, q->qtdaddr, q->pid,
-                q->packet.len, q->tbytes, endp, ret);
+                q->packet.iov.size, q->tbytes, endp, ret);
 
         if (ret != USB_RET_NODEV) {
             break;
@@ -1401,7 +1396,7 @@ static int ehci_process_itd(EHCIState *ehci,
     USBPort *port;
     USBDevice *dev;
     int ret;
-    uint32_t i, j, len, len1, len2, pid, dir, devaddr, endp;
+    uint32_t i, j, len, pid, dir, devaddr, endp;
     uint32_t pg, off, ptr1, ptr2, max, mult;
 
     dir =(itd->bufptr[1] & ITD_BUFPTR_DIRECTION);
@@ -1426,29 +1421,23 @@ static int ehci_process_itd(EHCIState *ehci,
                 return USB_RET_PROCERR;
             }
 
+            qemu_sglist_init(&ehci->isgl, 2);
             if (off + len > 4096) {
                 /* transfer crosses page border */
-                len2 = off + len - 4096;
-                len1 = len - len2;
+                uint32_t len2 = off + len - 4096;
+                uint32_t len1 = len - len2;
+                qemu_sglist_add(&ehci->isgl, ptr1 + off, len1);
+                qemu_sglist_add(&ehci->isgl, ptr2, len2);
             } else {
-                len1 = len;
-                len2 = 0;
+                qemu_sglist_add(&ehci->isgl, ptr1 + off, len);
             }
 
-            if (!dir) {
-                pid = USB_TOKEN_OUT;
-                trace_usb_ehci_data(0, pg, off, ptr1 + off, len1, 0);
-                cpu_physical_memory_rw(ptr1 + off, &ehci->ibuffer[0], len1, 0);
-                if (len2) {
-                    trace_usb_ehci_data(0, pg+1, 0, ptr2, len2, len1);
-                    cpu_physical_memory_rw(ptr2, &ehci->ibuffer[len1], len2, 0);
-                }
-            } else {
-                pid = USB_TOKEN_IN;
-            }
+            pid = dir ? USB_TOKEN_IN : USB_TOKEN_OUT;
 
-            ret = USB_RET_NODEV;
+            usb_packet_setup(&ehci->ipacket, pid, devaddr, endp);
+            usb_packet_map(&ehci->ipacket, &ehci->isgl);
 
+            ret = USB_RET_NODEV;
             for (j = 0; j < NB_PORTS; j++) {
                 port = &ehci->ports[j];
                 dev = port->dev;
@@ -1457,12 +1446,6 @@ static int ehci_process_itd(EHCIState *ehci,
                     continue;
                 }
 
-                ehci->ipacket.pid = pid;
-                ehci->ipacket.devaddr = devaddr;
-                ehci->ipacket.devep = endp;
-                ehci->ipacket.data = ehci->ibuffer;
-                ehci->ipacket.len = len;
-
                 ret = usb_handle_packet(dev, &ehci->ipacket);
 
                 if (ret != USB_RET_NODEV) {
@@ -1470,6 +1453,9 @@ static int ehci_process_itd(EHCIState *ehci,
                 }
             }
 
+            usb_packet_unmap(&ehci->ipacket);
+            qemu_sglist_destroy(&ehci->isgl);
+
 #if 0
             /*  In isoch, there is no facility to indicate a NAK so let's
              *  instead just complete a zero-byte transaction.  Setting
@@ -1507,20 +1493,6 @@ static int ehci_process_itd(EHCIState *ehci,
                     set_field(&itd->transact[i], len - ret, ITD_XACT_LENGTH);
                 } else {
                     /* IN */
-                    if (len1 > ret) {
-                        len1 = ret;
-                    }
-                    if (len2 > ret - len1) {
-                        len2 = ret - len1;
-                    }
-                    if (len1) {
-                        trace_usb_ehci_data(1, pg, off, ptr1 + off, len1, 0);
-                        cpu_physical_memory_rw(ptr1 + off, &ehci->ibuffer[0], len1, 1);
-                    }
-                    if (len2) {
-                        trace_usb_ehci_data(1, pg+1, 0, ptr2, len2, len1);
-                        cpu_physical_memory_rw(ptr2, &ehci->ibuffer[len1], len2, 1);
-                    }
                     set_field(&itd->transact[i], ret, ITD_XACT_LENGTH);
                 }
 
diff --git a/hw/usb-hid.c b/hw/usb-hid.c
index 9008320c86..e5d57de888 100644
--- a/hw/usb-hid.c
+++ b/hw/usb-hid.c
@@ -27,6 +27,7 @@
 #include "usb.h"
 #include "usb-desc.h"
 #include "qemu-timer.h"
+#include "hid.h"
 
 /* HID interface requests */
 #define GET_REPORT   0xa101
@@ -41,46 +42,9 @@
 #define USB_DT_REPORT 0x22
 #define USB_DT_PHY    0x23
 
-#define USB_MOUSE     1
-#define USB_TABLET    2
-#define USB_KEYBOARD  3
-
-typedef struct USBPointerEvent {
-    int32_t xdx, ydy; /* relative iff it's a mouse, otherwise absolute */
-    int32_t dz, buttons_state;
-} USBPointerEvent;
-
-#define QUEUE_LENGTH    16 /* should be enough for a triple-click */
-#define QUEUE_MASK      (QUEUE_LENGTH-1u)
-#define QUEUE_INCR(v)   ((v)++, (v) &= QUEUE_MASK)
-
-typedef struct USBMouseState {
-    USBPointerEvent queue[QUEUE_LENGTH];
-    int mouse_grabbed;
-    QEMUPutMouseEntry *eh_entry;
-} USBMouseState;
-
-typedef struct USBKeyboardState {
-    uint32_t keycodes[QUEUE_LENGTH];
-    uint16_t modifiers;
-    uint8_t leds;
-    uint8_t key[16];
-    int32_t keys;
-} USBKeyboardState;
-
 typedef struct USBHIDState {
     USBDevice dev;
-    union {
-        USBMouseState ptr;
-        USBKeyboardState kbd;
-    };
-    uint32_t head; /* index into circular queue */
-    uint32_t n;
-    int kind;
-    int32_t protocol;
-    uint8_t idle;
-    int64_t next_idle_clock;
-    int changed;
+    HIDState hid;
     void *datain_opaque;
     void (*datain)(void *);
 } USBHIDState;
@@ -394,344 +358,29 @@ static const uint8_t qemu_keyboard_hid_report_descriptor[] = {
     0xc0,		/* End Collection */
 };
 
-#define USB_HID_USAGE_ERROR_ROLLOVER	0x01
-#define USB_HID_USAGE_POSTFAIL		0x02
-#define USB_HID_USAGE_ERROR_UNDEFINED	0x03
-
-/* Indices are QEMU keycodes, values are from HID Usage Table.  Indices
- * above 0x80 are for keys that come after 0xe0 or 0xe1+0x1d or 0xe1+0x9d.  */
-static const uint8_t usb_hid_usage_keys[0x100] = {
-    0x00, 0x29, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
-    0x24, 0x25, 0x26, 0x27, 0x2d, 0x2e, 0x2a, 0x2b,
-    0x14, 0x1a, 0x08, 0x15, 0x17, 0x1c, 0x18, 0x0c,
-    0x12, 0x13, 0x2f, 0x30, 0x28, 0xe0, 0x04, 0x16,
-    0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x0f, 0x33,
-    0x34, 0x35, 0xe1, 0x31, 0x1d, 0x1b, 0x06, 0x19,
-    0x05, 0x11, 0x10, 0x36, 0x37, 0x38, 0xe5, 0x55,
-    0xe2, 0x2c, 0x32, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e,
-    0x3f, 0x40, 0x41, 0x42, 0x43, 0x53, 0x47, 0x5f,
-    0x60, 0x61, 0x56, 0x5c, 0x5d, 0x5e, 0x57, 0x59,
-    0x5a, 0x5b, 0x62, 0x63, 0x00, 0x00, 0x00, 0x44,
-    0x45, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e,
-    0xe8, 0xe9, 0x71, 0x72, 0x73, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0xe3, 0xe7, 0x65,
-
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x58, 0xe4, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x46,
-    0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x4a,
-    0x52, 0x4b, 0x00, 0x50, 0x00, 0x4f, 0x00, 0x4d,
-    0x51, 0x4e, 0x49, 0x4c, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0xe3, 0xe7, 0x65, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
-
-static void usb_hid_changed(USBHIDState *hs)
+static void usb_hid_changed(HIDState *hs)
 {
-    hs->changed = 1;
-
-    if (hs->datain)
-        hs->datain(hs->datain_opaque);
-
-    usb_wakeup(&hs->dev);
-}
+    USBHIDState *us = container_of(hs, USBHIDState, hid);
 
-static void usb_pointer_event_clear(USBPointerEvent *e, int buttons) {
-    e->xdx = e->ydy = e->dz = 0;
-    e->buttons_state = buttons;
-}
-
-static void usb_pointer_event_combine(USBPointerEvent *e, int xyrel,
-                                      int x1, int y1, int z1) {
-    if (xyrel) {
-        e->xdx += x1;
-        e->ydy += y1;
-    } else {
-        e->xdx = x1;
-        e->ydy = y1;
-        /* Windows drivers do not like the 0/0 position and ignore such
-         * events. */
-        if (!(x1 | y1)) {
-            x1 = 1;
-        }
+    if (us->datain) {
+        us->datain(us->datain_opaque);
     }
-    e->dz += z1;
-}
 
-static void usb_pointer_event(void *opaque,
-                              int x1, int y1, int z1, int buttons_state)
-{
-    USBHIDState *hs = opaque;
-    USBMouseState *s = &hs->ptr;
-    unsigned use_slot = (hs->head + hs->n - 1) & QUEUE_MASK;
-    unsigned previous_slot = (use_slot - 1) & QUEUE_MASK;
-
-    /* We combine events where feasible to keep the queue small.  We shouldn't
-     * combine anything with the first event of a particular button state, as
-     * that would change the location of the button state change.  When the
-     * queue is empty, a second event is needed because we don't know if
-     * the first event changed the button state.  */
-    if (hs->n == QUEUE_LENGTH) {
-        /* Queue full.  Discard old button state, combine motion normally.  */
-        s->queue[use_slot].buttons_state = buttons_state;
-    } else if (hs->n < 2 ||
-               s->queue[use_slot].buttons_state != buttons_state ||
-               s->queue[previous_slot].buttons_state != s->queue[use_slot].buttons_state) {
-        /* Cannot or should not combine, so add an empty item to the queue.  */
-        QUEUE_INCR(use_slot);
-        hs->n++;
-        usb_pointer_event_clear(&s->queue[use_slot], buttons_state);
-    }
-    usb_pointer_event_combine(&s->queue[use_slot],
-                              hs->kind == USB_MOUSE,
-                              x1, y1, z1);
-    usb_hid_changed(hs);
+    usb_wakeup(&us->dev);
 }
 
-static void usb_keyboard_event(void *opaque, int keycode)
+static void usb_hid_handle_reset(USBDevice *dev)
 {
-    USBHIDState *hs = opaque;
-    USBKeyboardState *s = &hs->kbd;
-    int slot;
+    USBHIDState *us = DO_UPCAST(USBHIDState, dev, dev);
 
-    if (hs->n == QUEUE_LENGTH) {
-        fprintf(stderr, "usb-kbd: warning: key event queue full\n");
-        return;
-    }
-    slot = (hs->head + hs->n) & QUEUE_MASK; hs->n++;
-    s->keycodes[slot] = keycode;
-    usb_hid_changed(hs);
-}
-
-static void usb_keyboard_process_keycode(USBHIDState *hs)
-{
-    USBKeyboardState *s = &hs->kbd;
-    uint8_t hid_code, key;
-    int i, keycode, slot;
-
-    if (hs->n == 0) {
-        return;
-    }
-    slot = hs->head & QUEUE_MASK; QUEUE_INCR(hs->head); hs->n--;
-    keycode = s->keycodes[slot];
-
-    key = keycode & 0x7f;
-    hid_code = usb_hid_usage_keys[key | ((s->modifiers >> 1) & (1 << 7))];
-    s->modifiers &= ~(1 << 8);
-
-    switch (hid_code) {
-    case 0x00:
-        return;
-
-    case 0xe0:
-        if (s->modifiers & (1 << 9)) {
-            s->modifiers ^= 3 << 8;
-            return;
-        }
-    case 0xe1 ... 0xe7:
-        if (keycode & (1 << 7)) {
-            s->modifiers &= ~(1 << (hid_code & 0x0f));
-            return;
-        }
-    case 0xe8 ... 0xef:
-        s->modifiers |= 1 << (hid_code & 0x0f);
-        return;
-    }
-
-    if (keycode & (1 << 7)) {
-        for (i = s->keys - 1; i >= 0; i --)
-            if (s->key[i] == hid_code) {
-                s->key[i] = s->key[-- s->keys];
-                s->key[s->keys] = 0x00;
-                break;
-            }
-        if (i < 0)
-            return;
-    } else {
-        for (i = s->keys - 1; i >= 0; i --)
-            if (s->key[i] == hid_code)
-                break;
-        if (i < 0) {
-            if (s->keys < sizeof(s->key))
-                s->key[s->keys ++] = hid_code;
-        } else
-            return;
-    }
-}
-
-static inline int int_clamp(int val, int vmin, int vmax)
-{
-    if (val < vmin)
-        return vmin;
-    else if (val > vmax)
-        return vmax;
-    else
-        return val;
-}
-
-static int usb_pointer_poll(USBHIDState *hs, uint8_t *buf, int len)
-{
-    int dx, dy, dz, b, l;
-    int index;
-    USBMouseState *s = &hs->ptr;
-    USBPointerEvent *e;
-
-    if (!s->mouse_grabbed) {
-        qemu_activate_mouse_event_handler(s->eh_entry);
-        s->mouse_grabbed = 1;
-    }
-
-    /* When the buffer is empty, return the last event.  Relative
-       movements will all be zero.  */
-    index = (hs->n ? hs->head : hs->head - 1);
-    e = &s->queue[index & QUEUE_MASK];
-
-    if (hs->kind == USB_MOUSE) {
-        dx = int_clamp(e->xdx, -127, 127);
-        dy = int_clamp(e->ydy, -127, 127);
-        e->xdx -= dx;
-        e->ydy -= dy;
-    } else {
-        dx = e->xdx;
-        dy = e->ydy;
-    }
-    dz = int_clamp(e->dz, -127, 127);
-    e->dz -= dz;
-
-    b = 0;
-    if (e->buttons_state & MOUSE_EVENT_LBUTTON)
-        b |= 0x01;
-    if (e->buttons_state & MOUSE_EVENT_RBUTTON)
-        b |= 0x02;
-    if (e->buttons_state & MOUSE_EVENT_MBUTTON)
-        b |= 0x04;
-
-    if (hs->n &&
-        !e->dz &&
-        (hs->kind == USB_TABLET || (!e->xdx && !e->ydy))) {
-        /* that deals with this event */
-        QUEUE_INCR(hs->head);
-        hs->n--;
-    }
-
-    /* Appears we have to invert the wheel direction */
-    dz = 0 - dz;
-    l = 0;
-    switch (hs->kind) {
-    case USB_MOUSE:
-        if (len > l)
-            buf[l++] = b;
-        if (len > l)
-            buf[l++] = dx;
-        if (len > l)
-            buf[l++] = dy;
-        if (len > l)
-            buf[l++] = dz;
-        break;
-
-    case USB_TABLET:
-        if (len > l)
-            buf[l++] = b;
-        if (len > l)
-            buf[l++] = dx & 0xff;
-        if (len > l)
-            buf[l++] = dx >> 8;
-        if (len > l)
-            buf[l++] = dy & 0xff;
-        if (len > l)
-            buf[l++] = dy >> 8;
-        if (len > l)
-            buf[l++] = dz;
-        break;
-
-    default:
-        abort();
-    }
-
-    return l;
-}
-
-static int usb_keyboard_poll(USBHIDState *hs, uint8_t *buf, int len)
-{
-    USBKeyboardState *s = &hs->kbd;
-    if (len < 2)
-        return 0;
-
-    usb_keyboard_process_keycode(hs);
-
-    buf[0] = s->modifiers & 0xff;
-    buf[1] = 0;
-    if (s->keys > 6)
-        memset(buf + 2, USB_HID_USAGE_ERROR_ROLLOVER, MIN(8, len) - 2);
-    else
-        memcpy(buf + 2, s->key, MIN(8, len) - 2);
-
-    return MIN(8, len);
-}
-
-static int usb_keyboard_write(USBKeyboardState *s, uint8_t *buf, int len)
-{
-    if (len > 0) {
-        int ledstate = 0;
-        /* 0x01: Num Lock LED
-         * 0x02: Caps Lock LED
-         * 0x04: Scroll Lock LED
-         * 0x08: Compose LED
-         * 0x10: Kana LED */
-        s->leds = buf[0];
-        if (s->leds & 0x04)
-            ledstate |= QEMU_SCROLL_LOCK_LED;
-        if (s->leds & 0x01)
-            ledstate |= QEMU_NUM_LOCK_LED;
-        if (s->leds & 0x02)
-            ledstate |= QEMU_CAPS_LOCK_LED;
-        kbd_put_ledstate(ledstate);
-    }
-    return 0;
-}
-
-static void usb_mouse_handle_reset(USBDevice *dev)
-{
-    USBHIDState *s = (USBHIDState *)dev;
-
-    memset(s->ptr.queue, 0, sizeof (s->ptr.queue));
-    s->head = 0;
-    s->n = 0;
-    s->protocol = 1;
-}
-
-static void usb_keyboard_handle_reset(USBDevice *dev)
-{
-    USBHIDState *s = (USBHIDState *)dev;
-
-    qemu_add_kbd_event_handler(usb_keyboard_event, s);
-    memset(s->kbd.keycodes, 0, sizeof (s->kbd.keycodes));
-    s->head = 0;
-    s->n = 0;
-    memset(s->kbd.key, 0, sizeof (s->kbd.key));
-    s->kbd.keys = 0;
-    s->protocol = 1;
-}
-
-static void usb_hid_set_next_idle(USBHIDState *s, int64_t curtime)
-{
-    s->next_idle_clock = curtime + (get_ticks_per_sec() * s->idle * 4) / 1000;
+    hid_reset(&us->hid);
 }
 
 static int usb_hid_handle_control(USBDevice *dev, USBPacket *p,
                int request, int value, int index, int length, uint8_t *data)
 {
-    USBHIDState *s = (USBHIDState *)dev;
+    USBHIDState *us = DO_UPCAST(USBHIDState, dev, dev);
+    HIDState *hs = &us->hid;
     int ret;
 
     ret = usb_desc_handle_control(dev, p, request, value, index, length, data);
@@ -740,7 +389,7 @@ static int usb_hid_handle_control(USBDevice *dev, USBPacket *p,
     }
 
     ret = 0;
-    switch(request) {
+    switch (request) {
     case DeviceRequest | USB_REQ_GET_INTERFACE:
         data[0] = 0;
         ret = 1;
@@ -750,17 +399,17 @@ static int usb_hid_handle_control(USBDevice *dev, USBPacket *p,
         break;
         /* hid specific requests */
     case InterfaceRequest | USB_REQ_GET_DESCRIPTOR:
-        switch(value >> 8) {
+        switch (value >> 8) {
         case 0x22:
-	    if (s->kind == USB_MOUSE) {
+            if (hs->kind == HID_MOUSE) {
 		memcpy(data, qemu_mouse_hid_report_descriptor,
 		       sizeof(qemu_mouse_hid_report_descriptor));
 		ret = sizeof(qemu_mouse_hid_report_descriptor);
-	    } else if (s->kind == USB_TABLET) {
-		memcpy(data, qemu_tablet_hid_report_descriptor,
+            } else if (hs->kind == HID_TABLET) {
+                memcpy(data, qemu_tablet_hid_report_descriptor,
 		       sizeof(qemu_tablet_hid_report_descriptor));
 		ret = sizeof(qemu_tablet_hid_report_descriptor);
-            } else if (s->kind == USB_KEYBOARD) {
+            } else if (hs->kind == HID_KEYBOARD) {
                 memcpy(data, qemu_keyboard_hid_report_descriptor,
                        sizeof(qemu_keyboard_hid_report_descriptor));
                 ret = sizeof(qemu_keyboard_hid_report_descriptor);
@@ -771,38 +420,40 @@ static int usb_hid_handle_control(USBDevice *dev, USBPacket *p,
         }
         break;
     case GET_REPORT:
-        if (s->kind == USB_MOUSE || s->kind == USB_TABLET) {
-            ret = usb_pointer_poll(s, data, length);
-        } else if (s->kind == USB_KEYBOARD) {
-            ret = usb_keyboard_poll(s, data, length);
+        if (hs->kind == HID_MOUSE || hs->kind == HID_TABLET) {
+            ret = hid_pointer_poll(hs, data, length);
+        } else if (hs->kind == HID_KEYBOARD) {
+            ret = hid_keyboard_poll(hs, data, length);
         }
-        s->changed = s->n > 0;
         break;
     case SET_REPORT:
-        if (s->kind == USB_KEYBOARD)
-            ret = usb_keyboard_write(&s->kbd, data, length);
-        else
+        if (hs->kind == HID_KEYBOARD) {
+            ret = hid_keyboard_write(hs, data, length);
+        } else {
             goto fail;
+        }
         break;
     case GET_PROTOCOL:
-        if (s->kind != USB_KEYBOARD && s->kind != USB_MOUSE)
+        if (hs->kind != HID_KEYBOARD && hs->kind != HID_MOUSE) {
             goto fail;
+        }
         ret = 1;
-        data[0] = s->protocol;
+        data[0] = hs->protocol;
         break;
     case SET_PROTOCOL:
-        if (s->kind != USB_KEYBOARD && s->kind != USB_MOUSE)
+        if (hs->kind != HID_KEYBOARD && hs->kind != HID_MOUSE) {
             goto fail;
+        }
         ret = 0;
-        s->protocol = value;
+        hs->protocol = value;
         break;
     case GET_IDLE:
         ret = 1;
-        data[0] = s->idle;
+        data[0] = hs->idle;
         break;
     case SET_IDLE:
-        s->idle = (uint8_t) (value >> 8);
-        usb_hid_set_next_idle(s, qemu_get_clock_ns(vm_clock));
+        hs->idle = (uint8_t) (value >> 8);
+        hid_set_next_idle(hs, qemu_get_clock_ns(vm_clock));
         ret = 0;
         break;
     default:
@@ -815,23 +466,26 @@ static int usb_hid_handle_control(USBDevice *dev, USBPacket *p,
 
 static int usb_hid_handle_data(USBDevice *dev, USBPacket *p)
 {
-    USBHIDState *s = (USBHIDState *)dev;
+    USBHIDState *us = DO_UPCAST(USBHIDState, dev, dev);
+    HIDState *hs = &us->hid;
+    uint8_t buf[p->iov.size];
     int ret = 0;
 
-    switch(p->pid) {
+    switch (p->pid) {
     case USB_TOKEN_IN:
         if (p->devep == 1) {
             int64_t curtime = qemu_get_clock_ns(vm_clock);
-            if (!s->changed && (!s->idle || s->next_idle_clock - curtime > 0))
+            if (!hid_has_events(hs) &&
+                (!hs->idle || hs->next_idle_clock - curtime > 0)) {
                 return USB_RET_NAK;
-            usb_hid_set_next_idle(s, curtime);
-            if (s->kind == USB_MOUSE || s->kind == USB_TABLET) {
-                ret = usb_pointer_poll(s, p->data, p->len);
             }
-            else if (s->kind == USB_KEYBOARD) {
-                ret = usb_keyboard_poll(s, p->data, p->len);
+            hid_set_next_idle(hs, curtime);
+            if (hs->kind == HID_MOUSE || hs->kind == HID_TABLET) {
+                ret = hid_pointer_poll(hs, buf, p->iov.size);
+            } else if (hs->kind == HID_KEYBOARD) {
+                ret = hid_keyboard_poll(hs, buf, p->iov.size);
             }
-            s->changed = s->n > 0;
+            usb_packet_copy(p, buf, ret);
         } else {
             goto fail;
         }
@@ -847,50 +501,33 @@ static int usb_hid_handle_data(USBDevice *dev, USBPacket *p)
 
 static void usb_hid_handle_destroy(USBDevice *dev)
 {
-    USBHIDState *s = (USBHIDState *)dev;
+    USBHIDState *us = DO_UPCAST(USBHIDState, dev, dev);
 
-    switch(s->kind) {
-    case USB_KEYBOARD:
-        qemu_remove_kbd_event_handler();
-        break;
-    default:
-        qemu_remove_mouse_event_handler(s->ptr.eh_entry);
-    }
+    hid_free(&us->hid);
 }
 
 static int usb_hid_initfn(USBDevice *dev, int kind)
 {
-    USBHIDState *s = DO_UPCAST(USBHIDState, dev, dev);
+    USBHIDState *us = DO_UPCAST(USBHIDState, dev, dev);
 
     usb_desc_init(dev);
-    s->kind = kind;
-
-    if (s->kind == USB_MOUSE) {
-        s->ptr.eh_entry = qemu_add_mouse_event_handler(usb_pointer_event, s,
-                                                       0, "QEMU USB Mouse");
-    } else if (s->kind == USB_TABLET) {
-        s->ptr.eh_entry = qemu_add_mouse_event_handler(usb_pointer_event, s,
-                                                       1, "QEMU USB Tablet");
-    }
-
-    /* Force poll routine to be run and grab input the first time.  */
-    s->changed = 1;
+    hid_init(&us->hid, kind, usb_hid_changed);
     return 0;
 }
 
 static int usb_tablet_initfn(USBDevice *dev)
 {
-    return usb_hid_initfn(dev, USB_TABLET);
+    return usb_hid_initfn(dev, HID_TABLET);
 }
 
 static int usb_mouse_initfn(USBDevice *dev)
 {
-    return usb_hid_initfn(dev, USB_MOUSE);
+    return usb_hid_initfn(dev, HID_MOUSE);
 }
 
 static int usb_keyboard_initfn(USBDevice *dev)
 {
-    return usb_hid_initfn(dev, USB_KEYBOARD);
+    return usb_hid_initfn(dev, HID_KEYBOARD);
 }
 
 void usb_hid_datain_cb(USBDevice *dev, void *opaque, void (*datain)(void *))
@@ -905,8 +542,8 @@ static int usb_hid_post_load(void *opaque, int version_id)
 {
     USBHIDState *s = opaque;
 
-    if (s->idle) {
-        usb_hid_set_next_idle(s, qemu_get_clock_ns(vm_clock));
+    if (s->hid.idle) {
+        hid_set_next_idle(&s->hid, qemu_get_clock_ns(vm_clock));
     }
     return 0;
 }
@@ -916,10 +553,10 @@ static const VMStateDescription vmstate_usb_ptr_queue = {
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField []) {
-        VMSTATE_INT32(xdx, USBPointerEvent),
-        VMSTATE_INT32(ydy, USBPointerEvent),
-        VMSTATE_INT32(dz, USBPointerEvent),
-        VMSTATE_INT32(buttons_state, USBPointerEvent),
+        VMSTATE_INT32(xdx, HIDPointerEvent),
+        VMSTATE_INT32(ydy, HIDPointerEvent),
+        VMSTATE_INT32(dz, HIDPointerEvent),
+        VMSTATE_INT32(buttons_state, HIDPointerEvent),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -930,12 +567,12 @@ static const VMStateDescription vmstate_usb_ptr = {
     .post_load = usb_hid_post_load,
     .fields = (VMStateField []) {
         VMSTATE_USB_DEVICE(dev, USBHIDState),
-        VMSTATE_STRUCT_ARRAY(ptr.queue, USBHIDState, QUEUE_LENGTH, 0,
-                             vmstate_usb_ptr_queue, USBPointerEvent),
-        VMSTATE_UINT32(head, USBHIDState),
-        VMSTATE_UINT32(n, USBHIDState),
-        VMSTATE_INT32(protocol, USBHIDState),
-        VMSTATE_UINT8(idle, USBHIDState),
+        VMSTATE_STRUCT_ARRAY(hid.ptr.queue, USBHIDState, QUEUE_LENGTH, 0,
+                             vmstate_usb_ptr_queue, HIDPointerEvent),
+        VMSTATE_UINT32(hid.head, USBHIDState),
+        VMSTATE_UINT32(hid.n, USBHIDState),
+        VMSTATE_INT32(hid.protocol, USBHIDState),
+        VMSTATE_UINT8(hid.idle, USBHIDState),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -947,15 +584,15 @@ static const VMStateDescription vmstate_usb_kbd = {
     .post_load = usb_hid_post_load,
     .fields = (VMStateField []) {
         VMSTATE_USB_DEVICE(dev, USBHIDState),
-        VMSTATE_UINT32_ARRAY(kbd.keycodes, USBHIDState, QUEUE_LENGTH),
-        VMSTATE_UINT32(head, USBHIDState),
-        VMSTATE_UINT32(n, USBHIDState),
-        VMSTATE_UINT16(kbd.modifiers, USBHIDState),
-        VMSTATE_UINT8(kbd.leds, USBHIDState),
-        VMSTATE_UINT8_ARRAY(kbd.key, USBHIDState, 16),
-        VMSTATE_INT32(kbd.keys, USBHIDState),
-        VMSTATE_INT32(protocol, USBHIDState),
-        VMSTATE_UINT8(idle, USBHIDState),
+        VMSTATE_UINT32_ARRAY(hid.kbd.keycodes, USBHIDState, QUEUE_LENGTH),
+        VMSTATE_UINT32(hid.head, USBHIDState),
+        VMSTATE_UINT32(hid.n, USBHIDState),
+        VMSTATE_UINT16(hid.kbd.modifiers, USBHIDState),
+        VMSTATE_UINT8(hid.kbd.leds, USBHIDState),
+        VMSTATE_UINT8_ARRAY(hid.kbd.key, USBHIDState, 16),
+        VMSTATE_INT32(hid.kbd.keys, USBHIDState),
+        VMSTATE_INT32(hid.protocol, USBHIDState),
+        VMSTATE_UINT8(hid.idle, USBHIDState),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -970,7 +607,7 @@ static struct USBDeviceInfo hid_info[] = {
         .usb_desc       = &desc_tablet,
         .init           = usb_tablet_initfn,
         .handle_packet  = usb_generic_handle_packet,
-        .handle_reset   = usb_mouse_handle_reset,
+        .handle_reset   = usb_hid_handle_reset,
         .handle_control = usb_hid_handle_control,
         .handle_data    = usb_hid_handle_data,
         .handle_destroy = usb_hid_handle_destroy,
@@ -983,7 +620,7 @@ static struct USBDeviceInfo hid_info[] = {
         .usb_desc       = &desc_mouse,
         .init           = usb_mouse_initfn,
         .handle_packet  = usb_generic_handle_packet,
-        .handle_reset   = usb_mouse_handle_reset,
+        .handle_reset   = usb_hid_handle_reset,
         .handle_control = usb_hid_handle_control,
         .handle_data    = usb_hid_handle_data,
         .handle_destroy = usb_hid_handle_destroy,
@@ -996,7 +633,7 @@ static struct USBDeviceInfo hid_info[] = {
         .usb_desc       = &desc_keyboard,
         .init           = usb_keyboard_initfn,
         .handle_packet  = usb_generic_handle_packet,
-        .handle_reset   = usb_keyboard_handle_reset,
+        .handle_reset   = usb_hid_handle_reset,
         .handle_control = usb_hid_handle_control,
         .handle_data    = usb_hid_handle_data,
         .handle_destroy = usb_hid_handle_destroy,
diff --git a/hw/usb-hub.c b/hw/usb-hub.c
index b49a2fe882..c49c547d0c 100644
--- a/hw/usb-hub.c
+++ b/hw/usb-hub.c
@@ -394,11 +394,12 @@ static int usb_hub_handle_data(USBDevice *dev, USBPacket *p)
         if (p->devep == 1) {
             USBHubPort *port;
             unsigned int status;
+            uint8_t buf[4];
             int i, n;
             n = (NUM_PORTS + 1 + 7) / 8;
-            if (p->len == 1) { /* FreeBSD workaround */
+            if (p->iov.size == 1) { /* FreeBSD workaround */
                 n = 1;
-            } else if (n > p->len) {
+            } else if (n > p->iov.size) {
                 return USB_RET_BABBLE;
             }
             status = 0;
@@ -409,8 +410,9 @@ static int usb_hub_handle_data(USBDevice *dev, USBPacket *p)
             }
             if (status != 0) {
                 for(i = 0; i < n; i++) {
-                    p->data[i] = status >> (8 * i);
+                    buf[i] = status >> (8 * i);
                 }
+                usb_packet_copy(p, buf, n);
                 ret = n;
             } else {
                 ret = USB_RET_NAK; /* usb11 11.13.1 */
diff --git a/hw/usb-libhw.c b/hw/usb-libhw.c
new file mode 100644
index 0000000000..162b42bd5b
--- /dev/null
+++ b/hw/usb-libhw.c
@@ -0,0 +1,63 @@
+/*
+ * QEMU USB emulation, libhw bits.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "cpu-common.h"
+#include "usb.h"
+#include "dma.h"
+
+int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
+{
+    int is_write = (p->pid == USB_TOKEN_IN);
+    target_phys_addr_t len;
+    void *mem;
+    int i;
+
+    for (i = 0; i < sgl->nsg; i++) {
+        len = sgl->sg[i].len;
+        mem = cpu_physical_memory_map(sgl->sg[i].base, &len,
+                                      is_write);
+        if (!mem) {
+            goto err;
+        }
+        qemu_iovec_add(&p->iov, mem, len);
+        if (len != sgl->sg[i].len) {
+            goto err;
+        }
+    }
+    return 0;
+
+err:
+    usb_packet_unmap(p);
+    return -1;
+}
+
+void usb_packet_unmap(USBPacket *p)
+{
+    int is_write = (p->pid == USB_TOKEN_IN);
+    int i;
+
+    for (i = 0; i < p->iov.niov; i++) {
+        cpu_physical_memory_unmap(p->iov.iov[i].iov_base,
+                                  p->iov.iov[i].iov_len, is_write,
+                                  p->iov.iov[i].iov_len);
+    }
+}
diff --git a/hw/usb-msd.c b/hw/usb-msd.c
index cdeac581e3..90e57fbf6b 100644
--- a/hw/usb-msd.c
+++ b/hw/usb-msd.c
@@ -43,8 +43,6 @@ typedef struct {
     enum USBMSDMode mode;
     uint32_t scsi_len;
     uint8_t *scsi_buf;
-    uint32_t usb_len;
-    uint8_t *usb_buf;
     uint32_t data_len;
     uint32_t residue;
     uint32_t tag;
@@ -176,20 +174,14 @@ static const USBDesc desc = {
     .str  = desc_strings,
 };
 
-static void usb_msd_copy_data(MSDState *s)
+static void usb_msd_copy_data(MSDState *s, USBPacket *p)
 {
     uint32_t len;
-    len = s->usb_len;
+    len = p->iov.size - p->result;
     if (len > s->scsi_len)
         len = s->scsi_len;
-    if (s->mode == USB_MSDM_DATAIN) {
-        memcpy(s->usb_buf, s->scsi_buf, len);
-    } else {
-        memcpy(s->scsi_buf, s->usb_buf, len);
-    }
-    s->usb_len -= len;
+    usb_packet_copy(p, s->scsi_buf, len);
     s->scsi_len -= len;
-    s->usb_buf += len;
     s->scsi_buf += len;
     s->data_len -= len;
     if (s->scsi_len == 0 || s->data_len == 0) {
@@ -207,8 +199,9 @@ static void usb_msd_send_status(MSDState *s, USBPacket *p)
     csw.residue = s->residue;
     csw.status = s->result;
 
-    len = MIN(sizeof(csw), p->len);
-    memcpy(p->data, &csw, len);
+    len = MIN(sizeof(csw), p->iov.size);
+    usb_packet_copy(p, &csw, len);
+    p->result = len;
 }
 
 static void usb_msd_transfer_data(SCSIRequest *req, uint32_t len)
@@ -220,8 +213,9 @@ static void usb_msd_transfer_data(SCSIRequest *req, uint32_t len)
     s->scsi_len = len;
     s->scsi_buf = scsi_req_get_buf(req);
     if (p) {
-        usb_msd_copy_data(s);
-        if (s->packet && s->usb_len == 0) {
+        usb_msd_copy_data(s, p);
+        p = s->packet;
+        if (p && p->result == p->iov.size) {
             /* Set s->packet to NULL before calling usb_packet_complete
                because another request may be issued before
                usb_packet_complete returns.  */
@@ -248,11 +242,9 @@ static void usb_msd_command_complete(SCSIRequest *req, uint32_t status)
             s->mode = USB_MSDM_CBW;
         } else {
             if (s->data_len) {
-                s->data_len -= s->usb_len;
-                if (s->mode == USB_MSDM_DATAIN) {
-                    memset(s->usb_buf, 0, s->usb_len);
-                }
-                s->usb_len = 0;
+                int len = (p->iov.size - p->result);
+                usb_packet_skip(p, len);
+                s->data_len -= len;
             }
             if (s->data_len == 0) {
                 s->mode = USB_MSDM_CSW;
@@ -342,8 +334,6 @@ static int usb_msd_handle_data(USBDevice *dev, USBPacket *p)
     int ret = 0;
     struct usb_msd_cbw cbw;
     uint8_t devep = p->devep;
-    uint8_t *data = p->data;
-    int len = p->len;
 
     switch (p->pid) {
     case USB_TOKEN_OUT:
@@ -352,11 +342,11 @@ static int usb_msd_handle_data(USBDevice *dev, USBPacket *p)
 
         switch (s->mode) {
         case USB_MSDM_CBW:
-            if (len != 31) {
+            if (p->iov.size != 31) {
                 fprintf(stderr, "usb-msd: Bad CBW size");
                 goto fail;
             }
-            memcpy(&cbw, data, 31);
+            usb_packet_copy(p, &cbw, 31);
             if (le32_to_cpu(cbw.sig) != 0x43425355) {
                 fprintf(stderr, "usb-msd: Bad signature %08x\n",
                         le32_to_cpu(cbw.sig));
@@ -387,36 +377,39 @@ static int usb_msd_handle_data(USBDevice *dev, USBPacket *p)
             if (s->mode != USB_MSDM_CSW && s->residue == 0) {
                 scsi_req_continue(s->req);
             }
-            ret = len;
+            ret = p->result;
             break;
 
         case USB_MSDM_DATAOUT:
-            DPRINTF("Data out %d/%d\n", len, s->data_len);
-            if (len > s->data_len)
+            DPRINTF("Data out %zd/%d\n", p->iov.size, s->data_len);
+            if (p->iov.size > s->data_len) {
                 goto fail;
+            }
 
-            s->usb_buf = data;
-            s->usb_len = len;
             if (s->scsi_len) {
-                usb_msd_copy_data(s);
+                usb_msd_copy_data(s, p);
             }
-            if (s->residue && s->usb_len) {
-                s->data_len -= s->usb_len;
-                if (s->data_len == 0)
-                    s->mode = USB_MSDM_CSW;
-                s->usb_len = 0;
+            if (s->residue) {
+                int len = p->iov.size - p->result;
+                if (len) {
+                    usb_packet_skip(p, len);
+                    s->data_len -= len;
+                    if (s->data_len == 0) {
+                        s->mode = USB_MSDM_CSW;
+                    }
+                }
             }
-            if (s->usb_len) {
+            if (p->result < p->iov.size) {
                 DPRINTF("Deferring packet %p\n", p);
                 s->packet = p;
                 ret = USB_RET_ASYNC;
             } else {
-                ret = len;
+                ret = p->result;
             }
             break;
 
         default:
-            DPRINTF("Unexpected write (len %d)\n", len);
+            DPRINTF("Unexpected write (len %zd)\n", p->iov.size);
             goto fail;
         }
         break;
@@ -427,18 +420,20 @@ static int usb_msd_handle_data(USBDevice *dev, USBPacket *p)
 
         switch (s->mode) {
         case USB_MSDM_DATAOUT:
-            if (s->data_len != 0 || len < 13)
+            if (s->data_len != 0 || p->iov.size < 13) {
                 goto fail;
+            }
             /* Waiting for SCSI write to complete.  */
             s->packet = p;
             ret = USB_RET_ASYNC;
             break;
 
         case USB_MSDM_CSW:
-            DPRINTF("Command status %d tag 0x%x, len %d\n",
-                    s->result, s->tag, len);
-            if (len < 13)
+            DPRINTF("Command status %d tag 0x%x, len %zd\n",
+                    s->result, s->tag, p->iov.size);
+            if (p->iov.size < 13) {
                 goto fail;
+            }
 
             usb_msd_send_status(s, p);
             s->mode = USB_MSDM_CBW;
@@ -446,32 +441,32 @@ static int usb_msd_handle_data(USBDevice *dev, USBPacket *p)
             break;
 
         case USB_MSDM_DATAIN:
-            DPRINTF("Data in %d/%d, scsi_len %d\n", len, s->data_len, s->scsi_len);
-            if (len > s->data_len)
-                len = s->data_len;
-            s->usb_buf = data;
-            s->usb_len = len;
+            DPRINTF("Data in %zd/%d, scsi_len %d\n",
+                    p->iov.size, s->data_len, s->scsi_len);
             if (s->scsi_len) {
-                usb_msd_copy_data(s);
+                usb_msd_copy_data(s, p);
             }
-            if (s->residue && s->usb_len) {
-                s->data_len -= s->usb_len;
-                memset(s->usb_buf, 0, s->usb_len);
-                if (s->data_len == 0)
-                    s->mode = USB_MSDM_CSW;
-                s->usb_len = 0;
+            if (s->residue) {
+                int len = p->iov.size - p->result;
+                if (len) {
+                    usb_packet_skip(p, len);
+                    s->data_len -= len;
+                    if (s->data_len == 0) {
+                        s->mode = USB_MSDM_CSW;
+                    }
+                }
             }
-            if (s->usb_len) {
+            if (p->result < p->iov.size) {
                 DPRINTF("Deferring packet %p\n", p);
                 s->packet = p;
                 ret = USB_RET_ASYNC;
             } else {
-                ret = len;
+                ret = p->result;
             }
             break;
 
         default:
-            DPRINTF("Unexpected read (len %d)\n", len);
+            DPRINTF("Unexpected read (len %zd)\n", p->iov.size);
             goto fail;
         }
         break;
diff --git a/hw/usb-musb.c b/hw/usb-musb.c
index 035dda8372..d3ccde9199 100644
--- a/hw/usb-musb.c
+++ b/hw/usb-musb.c
@@ -365,6 +365,8 @@ struct MUSBState *musb_init(qemu_irq *irqs)
         s->ep[i].maxp[1] = 0x40;
         s->ep[i].musb = s;
         s->ep[i].epnum = i;
+        usb_packet_init(&s->ep[i].packey[0].p);
+        usb_packet_init(&s->ep[i].packey[1].p);
     }
 
     usb_bus_new(&s->bus, &musb_bus_ops, NULL /* FIXME */);
@@ -605,12 +607,10 @@ static void musb_packet(MUSBState *s, MUSBEndPoint *ep,
     ep->interrupt[dir] = ttype == USB_ENDPOINT_XFER_INT;
     ep->delayed_cb[dir] = cb;
 
-    ep->packey[dir].p.pid = pid;
     /* A wild guess on the FADDR semantics... */
-    ep->packey[dir].p.devaddr = ep->faddr[idx];
-    ep->packey[dir].p.devep = ep->type[idx] & 0xf;
-    ep->packey[dir].p.data = (void *) ep->buf[idx];
-    ep->packey[dir].p.len = len;
+    usb_packet_setup(&ep->packey[dir].p, pid, ep->faddr[idx],
+                     ep->type[idx] & 0xf);
+    usb_packet_addbuf(&ep->packey[dir].p, ep->buf[idx], len);
     ep->packey[dir].ep = ep;
     ep->packey[dir].dir = dir;
 
@@ -738,7 +738,7 @@ static void musb_rx_packet_complete(USBPacket *packey, void *opaque)
 
     if (ep->status[1] == USB_RET_STALL) {
         ep->status[1] = 0;
-        packey->len = 0;
+        packey->result = 0;
 
         ep->csr[1] |= MGC_M_RXCSR_H_RXSTALL;
         if (!epnum)
@@ -752,7 +752,7 @@ static void musb_rx_packet_complete(USBPacket *packey, void *opaque)
          * Data-errors in Isochronous.  */
         if (ep->interrupt[1])
             return musb_packet(s, ep, epnum, USB_TOKEN_IN,
-                            packey->len, musb_rx_packet_complete, 1);
+                            packey->iov.size, musb_rx_packet_complete, 1);
 
         ep->csr[1] |= MGC_M_RXCSR_DATAERROR;
         if (!epnum)
@@ -777,14 +777,14 @@ static void musb_rx_packet_complete(USBPacket *packey, void *opaque)
     /* TODO: check len for over/underruns of an OUT packet?  */
     /* TODO: perhaps make use of e->ext_size[1] here.  */
 
-    packey->len = ep->status[1];
+    packey->result = ep->status[1];
 
     if (!(ep->csr[1] & (MGC_M_RXCSR_H_RXSTALL | MGC_M_RXCSR_DATAERROR))) {
         ep->csr[1] |= MGC_M_RXCSR_FIFOFULL | MGC_M_RXCSR_RXPKTRDY;
         if (!epnum)
             ep->csr[0] |= MGC_M_CSR0_RXPKTRDY;
 
-        ep->rxcount = packey->len; /* XXX: MIN(packey->len, ep->maxp[1]); */
+        ep->rxcount = packey->result; /* XXX: MIN(packey->len, ep->maxp[1]); */
         /* In DMA mode: assert DMA request for this EP */
     }
 
@@ -856,12 +856,12 @@ static void musb_rx_req(MUSBState *s, int epnum)
      * 64 bytes of the FIFO, only move the FIFO start and return. (Obsolete) */
     if (ep->packey[1].p.pid == USB_TOKEN_IN && ep->status[1] >= 0 &&
                     (ep->fifostart[1]) + ep->rxcount <
-                    ep->packey[1].p.len) {
+                    ep->packey[1].p.iov.size) {
         TRACE("0x%08x, %d",  ep->fifostart[1], ep->rxcount );
         ep->fifostart[1] += ep->rxcount;
         ep->fifolen[1] = 0;
 
-        ep->rxcount = MIN(ep->packey[0].p.len - (ep->fifostart[1]),
+        ep->rxcount = MIN(ep->packey[0].p.iov.size - (ep->fifostart[1]),
                         ep->maxp[1]);
 
         ep->csr[1] &= ~MGC_M_RXCSR_H_REQPKT;
diff --git a/hw/usb-net.c b/hw/usb-net.c
index 4212e5b3c5..0cb47d63b3 100644
--- a/hw/usb-net.c
+++ b/hw/usb-net.c
@@ -29,6 +29,7 @@
 #include "net.h"
 #include "qemu-queue.h"
 #include "sysemu.h"
+#include "iov.h"
 
 /*#define TRAFFIC_DEBUG*/
 /* Thanks to NetChip Technologies for donating this product ID.
@@ -1121,28 +1122,23 @@ static int usb_net_handle_control(USBDevice *dev, USBPacket *p,
 
 static int usb_net_handle_statusin(USBNetState *s, USBPacket *p)
 {
+    le32 buf[2];
     int ret = 8;
 
-    if (p->len < 8)
+    if (p->iov.size < 8) {
         return USB_RET_STALL;
+    }
 
-    ((le32 *) p->data)[0] = cpu_to_le32(1);
-    ((le32 *) p->data)[1] = cpu_to_le32(0);
+    buf[0] = cpu_to_le32(1);
+    buf[1] = cpu_to_le32(0);
+    usb_packet_copy(p, buf, 8);
     if (!s->rndis_resp.tqh_first)
         ret = USB_RET_NAK;
 
 #ifdef TRAFFIC_DEBUG
-    fprintf(stderr, "usbnet: interrupt poll len %u return %d", p->len, ret);
-    {
-        int i;
-        fprintf(stderr, ":");
-        for (i = 0; i < ret; i++) {
-            if (!(i & 15))
-                fprintf(stderr, "\n%04x:", i);
-            fprintf(stderr, " %02x", p->data[i]);
-        }
-        fprintf(stderr, "\n\n");
-    }
+    fprintf(stderr, "usbnet: interrupt poll len %zu return %d",
+            p->iov.size, ret);
+    iov_hexdump(p->iov.iov, p->iov.niov, stderr, "usbnet", ret);
 #endif
 
     return ret;
@@ -1162,9 +1158,10 @@ static int usb_net_handle_datain(USBNetState *s, USBPacket *p)
         return ret;
     }
     ret = s->in_len - s->in_ptr;
-    if (ret > p->len)
-        ret = p->len;
-    memcpy(p->data, &s->in_buf[s->in_ptr], ret);
+    if (ret > p->iov.size) {
+        ret = p->iov.size;
+    }
+    usb_packet_copy(p, &s->in_buf[s->in_ptr], ret);
     s->in_ptr += ret;
     if (s->in_ptr >= s->in_len &&
                     (is_rndis(s) || (s->in_len & (64 - 1)) || !ret)) {
@@ -1173,17 +1170,8 @@ static int usb_net_handle_datain(USBNetState *s, USBPacket *p)
     }
 
 #ifdef TRAFFIC_DEBUG
-    fprintf(stderr, "usbnet: data in len %u return %d", p->len, ret);
-    {
-        int i;
-        fprintf(stderr, ":");
-        for (i = 0; i < ret; i++) {
-            if (!(i & 15))
-                fprintf(stderr, "\n%04x:", i);
-            fprintf(stderr, " %02x", p->data[i]);
-        }
-        fprintf(stderr, "\n\n");
-    }
+    fprintf(stderr, "usbnet: data in len %zu return %d", p->iov.size, ret);
+    iov_hexdump(p->iov.iov, p->iov.niov, stderr, "usbnet", ret);
 #endif
 
     return ret;
@@ -1191,29 +1179,20 @@ static int usb_net_handle_datain(USBNetState *s, USBPacket *p)
 
 static int usb_net_handle_dataout(USBNetState *s, USBPacket *p)
 {
-    int ret = p->len;
+    int ret = p->iov.size;
     int sz = sizeof(s->out_buf) - s->out_ptr;
     struct rndis_packet_msg_type *msg =
             (struct rndis_packet_msg_type *) s->out_buf;
     uint32_t len;
 
 #ifdef TRAFFIC_DEBUG
-    fprintf(stderr, "usbnet: data out len %u\n", p->len);
-    {
-        int i;
-        fprintf(stderr, ":");
-        for (i = 0; i < p->len; i++) {
-            if (!(i & 15))
-                fprintf(stderr, "\n%04x:", i);
-            fprintf(stderr, " %02x", p->data[i]);
-        }
-        fprintf(stderr, "\n\n");
-    }
+    fprintf(stderr, "usbnet: data out len %zu\n", p->iov.size);
+    iov_hexdump(p->iov.iov, p->iov.niov, stderr, "usbnet", p->iov.size);
 #endif
 
     if (sz > ret)
         sz = ret;
-    memcpy(&s->out_buf[s->out_ptr], p->data, sz);
+    usb_packet_copy(p, &s->out_buf[s->out_ptr], sz);
     s->out_ptr += sz;
 
     if (!is_rndis(s)) {
@@ -1277,8 +1256,8 @@ static int usb_net_handle_data(USBDevice *dev, USBPacket *p)
     }
     if (ret == USB_RET_STALL)
         fprintf(stderr, "usbnet: failed data transaction: "
-                        "pid 0x%x ep 0x%x len 0x%x\n",
-                        p->pid, p->devep, p->len);
+                        "pid 0x%x ep 0x%x len 0x%zx\n",
+                        p->pid, p->devep, p->iov.size);
     return ret;
 }
 
diff --git a/hw/usb-ohci.c b/hw/usb-ohci.c
index 337b250261..d39bcb0c0d 100644
--- a/hw/usb-ohci.c
+++ b/hw/usb-ohci.c
@@ -777,18 +777,17 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
     }
 
     if (completion) {
-        ret = ohci->usb_packet.len;
+        ret = ohci->usb_packet.result;
     } else {
         ret = USB_RET_NODEV;
         for (i = 0; i < ohci->num_ports; i++) {
             dev = ohci->rhport[i].port.dev;
             if ((ohci->rhport[i].ctrl & OHCI_PORT_PES) == 0)
                 continue;
-            ohci->usb_packet.pid = pid;
-            ohci->usb_packet.devaddr = OHCI_BM(ed->flags, ED_FA);
-            ohci->usb_packet.devep = OHCI_BM(ed->flags, ED_EN);
-            ohci->usb_packet.data = ohci->usb_buf;
-            ohci->usb_packet.len = len;
+            usb_packet_setup(&ohci->usb_packet, pid,
+                             OHCI_BM(ed->flags, ED_FA),
+                             OHCI_BM(ed->flags, ED_EN));
+            usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, len);
             ret = usb_handle_packet(dev, &ohci->usb_packet);
             if (ret != USB_RET_NODEV)
                 break;
@@ -959,7 +958,7 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
     }
 #endif
     if (completion) {
-        ret = ohci->usb_packet.len;
+        ret = ohci->usb_packet.result;
         ohci->async_td = 0;
         ohci->async_complete = 0;
     } else {
@@ -980,11 +979,10 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
 #endif
                 return 1;
             }
-            ohci->usb_packet.pid = pid;
-            ohci->usb_packet.devaddr = OHCI_BM(ed->flags, ED_FA);
-            ohci->usb_packet.devep = OHCI_BM(ed->flags, ED_EN);
-            ohci->usb_packet.data = ohci->usb_buf;
-            ohci->usb_packet.len = len;
+            usb_packet_setup(&ohci->usb_packet, pid,
+                             OHCI_BM(ed->flags, ED_FA),
+                             OHCI_BM(ed->flags, ED_EN));
+            usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, len);
             ret = usb_handle_packet(dev, &ohci->usb_packet);
             if (ret != USB_RET_NODEV)
                 break;
@@ -1761,6 +1759,7 @@ static int usb_ohci_init(OHCIState *ohci, DeviceState *dev,
     ohci->localmem_base = localmem_base;
 
     ohci->name = dev->info->name;
+    usb_packet_init(&ohci->usb_packet);
 
     ohci->async_td = 0;
     qemu_register_reset(ohci_reset, ohci);
diff --git a/hw/usb-serial.c b/hw/usb-serial.c
index 298c1e9d95..bf2b775e83 100644
--- a/hw/usb-serial.c
+++ b/hw/usb-serial.c
@@ -359,37 +359,42 @@ static int usb_serial_handle_control(USBDevice *dev, USBPacket *p,
 static int usb_serial_handle_data(USBDevice *dev, USBPacket *p)
 {
     USBSerialState *s = (USBSerialState *)dev;
-    int ret = 0;
+    int i, ret = 0;
     uint8_t devep = p->devep;
-    uint8_t *data = p->data;
-    int len = p->len;
-    int first_len;
+    struct iovec *iov;
+    uint8_t header[2];
+    int first_len, len;
 
     switch (p->pid) {
     case USB_TOKEN_OUT:
         if (devep != 2)
             goto fail;
-        qemu_chr_write(s->cs, data, len);
+        for (i = 0; i < p->iov.niov; i++) {
+            iov = p->iov.iov + i;
+            qemu_chr_write(s->cs, iov->iov_base, iov->iov_len);
+        }
         break;
 
     case USB_TOKEN_IN:
         if (devep != 1)
             goto fail;
         first_len = RECV_BUF - s->recv_ptr;
+        len = p->iov.size;
         if (len <= 2) {
             ret = USB_RET_NAK;
             break;
         }
-        *data++ = usb_get_modem_lines(s) | 1;
+        header[0] = usb_get_modem_lines(s) | 1;
         /* We do not have the uart details */
         /* handle serial break */
         if (s->event_trigger && s->event_trigger & FTDI_BI) {
             s->event_trigger &= ~FTDI_BI;
-            *data = FTDI_BI;
+            header[1] = FTDI_BI;
+            usb_packet_copy(p, header, 2);
             ret = 2;
             break;
         } else {
-            *data++ = 0;
+            header[1] = 0;
         }
         len -= 2;
         if (len > s->recv_used)
@@ -400,9 +405,10 @@ static int usb_serial_handle_data(USBDevice *dev, USBPacket *p)
         }
         if (first_len > len)
             first_len = len;
-        memcpy(data, s->recv_buf + s->recv_ptr, first_len);
+        usb_packet_copy(p, header, 2);
+        usb_packet_copy(p, s->recv_buf + s->recv_ptr, first_len);
         if (len > first_len)
-            memcpy(data + first_len, s->recv_buf, len - first_len);
+            usb_packet_copy(p, s->recv_buf, len - first_len);
         s->recv_used -= len;
         s->recv_ptr = (s->recv_ptr + len) % RECV_BUF;
         ret = len + 2;
diff --git a/hw/usb-uhci.c b/hw/usb-uhci.c
index da74c57c62..824e3a5e8b 100644
--- a/hw/usb-uhci.c
+++ b/hw/usb-uhci.c
@@ -30,6 +30,8 @@
 #include "pci.h"
 #include "qemu-timer.h"
 #include "usb-uhci.h"
+#include "iov.h"
+#include "dma.h"
 
 //#define DEBUG
 //#define DEBUG_DUMP_DATA
@@ -93,17 +95,12 @@ static const char *pid2str(int pid)
 #endif
 
 #ifdef DEBUG_DUMP_DATA
-static void dump_data(const uint8_t *data, int len)
+static void dump_data(USBPacket *p, int ret)
 {
-    int i;
-
-    printf("uhci: data: ");
-    for(i = 0; i < len; i++)
-        printf(" %02x", data[i]);
-    printf("\n");
+    iov_hexdump(p->iov.iov, p->iov.niov, stderr, "uhci", ret);
 }
 #else
-static void dump_data(const uint8_t *data, int len) {}
+static void dump_data(USBPacket *p, int ret) {}
 #endif
 
 typedef struct UHCIState UHCIState;
@@ -115,6 +112,7 @@ typedef struct UHCIState UHCIState;
  */
 typedef struct UHCIAsync {
     USBPacket packet;
+    QEMUSGList sgl;
     UHCIState *uhci;
     QTAILQ_ENTRY(UHCIAsync) next;
     uint32_t  td;
@@ -122,7 +120,6 @@ typedef struct UHCIAsync {
     int8_t    valid;
     uint8_t   isoc;
     uint8_t   done;
-    uint8_t   buffer[2048];
 } UHCIAsync;
 
 typedef struct UHCIPort {
@@ -179,12 +176,16 @@ static UHCIAsync *uhci_async_alloc(UHCIState *s)
     async->token = 0;
     async->done  = 0;
     async->isoc  = 0;
+    usb_packet_init(&async->packet);
+    qemu_sglist_init(&async->sgl, 1);
 
     return async;
 }
 
 static void uhci_async_free(UHCIState *s, UHCIAsync *async)
 {
+    usb_packet_cleanup(&async->packet);
+    qemu_sglist_destroy(&async->sgl);
     qemu_free(async);
 }
 
@@ -648,10 +649,10 @@ static int uhci_broadcast_packet(UHCIState *s, USBPacket *p)
 {
     int i, ret;
 
-    DPRINTF("uhci: packet enter. pid %s addr 0x%02x ep %d len %d\n",
-           pid2str(p->pid), p->devaddr, p->devep, p->len);
+    DPRINTF("uhci: packet enter. pid %s addr 0x%02x ep %d len %zd\n",
+           pid2str(p->pid), p->devaddr, p->devep, p->iov.size);
     if (p->pid == USB_TOKEN_OUT || p->pid == USB_TOKEN_SETUP)
-        dump_data(p->data, p->len);
+        dump_data(p, 0);
 
     ret = USB_RET_NODEV;
     for (i = 0; i < NB_PORTS && ret == USB_RET_NODEV; i++) {
@@ -662,9 +663,9 @@ static int uhci_broadcast_packet(UHCIState *s, USBPacket *p)
             ret = usb_handle_packet(dev, p);
     }
 
-    DPRINTF("uhci: packet exit. ret %d len %d\n", ret, p->len);
+    DPRINTF("uhci: packet exit. ret %d len %zd\n", ret, p->iov.size);
     if (p->pid == USB_TOKEN_IN && ret > 0)
-        dump_data(p->data, ret);
+        dump_data(p, ret);
 
     return ret;
 }
@@ -684,7 +685,7 @@ static int uhci_complete_td(UHCIState *s, UHCI_TD *td, UHCIAsync *async, uint32_
     max_len = ((td->token >> 21) + 1) & 0x7ff;
     pid = td->token & 0xff;
 
-    ret = async->packet.len;
+    ret = async->packet.result;
 
     if (td->ctrl & TD_CTRL_IOS)
         td->ctrl &= ~TD_CTRL_ACTIVE;
@@ -692,7 +693,7 @@ static int uhci_complete_td(UHCIState *s, UHCI_TD *td, UHCIAsync *async, uint32_
     if (ret < 0)
         goto out;
 
-    len = async->packet.len;
+    len = async->packet.result;
     td->ctrl = (td->ctrl & ~0x7ff) | ((len - 1) & 0x7ff);
 
     /* The NAK bit may have been set by a previous frame, so clear it
@@ -708,11 +709,6 @@ static int uhci_complete_td(UHCIState *s, UHCI_TD *td, UHCIAsync *async, uint32_
             goto out;
         }
 
-        if (len > 0) {
-            /* write the data back */
-            cpu_physical_memory_write(td->buffer, async->buffer, len);
-        }
-
         if ((td->ctrl & TD_CTRL_SPD) && len < max_len) {
             *int_mask |= 0x02;
             /* short packet: do not update QH */
@@ -827,16 +823,14 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td, uint32_t *in
     max_len = ((td->token >> 21) + 1) & 0x7ff;
     pid = td->token & 0xff;
 
-    async->packet.pid     = pid;
-    async->packet.devaddr = (td->token >> 8) & 0x7f;
-    async->packet.devep   = (td->token >> 15) & 0xf;
-    async->packet.data    = async->buffer;
-    async->packet.len     = max_len;
+    usb_packet_setup(&async->packet, pid, (td->token >> 8) & 0x7f,
+                     (td->token >> 15) & 0xf);
+    qemu_sglist_add(&async->sgl, td->buffer, max_len);
+    usb_packet_map(&async->packet, &async->sgl);
 
     switch(pid) {
     case USB_TOKEN_OUT:
     case USB_TOKEN_SETUP:
-        cpu_physical_memory_read(td->buffer, async->buffer, max_len);
         len = uhci_broadcast_packet(s, &async->packet);
         if (len >= 0)
             len = max_len;
@@ -859,10 +853,11 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td, uint32_t *in
         return 2;
     }
 
-    async->packet.len = len;
+    async->packet.result = len;
 
 done:
     len = uhci_complete_td(s, td, async, int_mask);
+    usb_packet_unmap(&async->packet);
     uhci_async_free(s, async);
     return len;
 }
diff --git a/hw/usb-wacom.c b/hw/usb-wacom.c
index d76ee97e49..25580067f2 100644
--- a/hw/usb-wacom.c
+++ b/hw/usb-wacom.c
@@ -308,6 +308,7 @@ static int usb_wacom_handle_control(USBDevice *dev, USBPacket *p,
 static int usb_wacom_handle_data(USBDevice *dev, USBPacket *p)
 {
     USBWacomState *s = (USBWacomState *) dev;
+    uint8_t buf[p->iov.size];
     int ret = 0;
 
     switch (p->pid) {
@@ -317,9 +318,10 @@ static int usb_wacom_handle_data(USBDevice *dev, USBPacket *p)
                 return USB_RET_NAK;
             s->changed = 0;
             if (s->mode == WACOM_MODE_HID)
-                ret = usb_mouse_poll(s, p->data, p->len);
+                ret = usb_mouse_poll(s, buf, p->iov.size);
             else if (s->mode == WACOM_MODE_WACOM)
-                ret = usb_wacom_poll(s, p->data, p->len);
+                ret = usb_wacom_poll(s, buf, p->iov.size);
+            usb_packet_copy(p, buf, ret);
             break;
         }
         /* Fall through.  */
diff --git a/hw/usb.c b/hw/usb.c
index 27a983ca5c..685e775a00 100644
--- a/hw/usb.c
+++ b/hw/usb.c
@@ -25,6 +25,7 @@
  */
 #include "qemu-common.h"
 #include "usb.h"
+#include "iov.h"
 
 void usb_attach(USBPort *port, USBDevice *dev)
 {
@@ -72,10 +73,11 @@ static int do_token_setup(USBDevice *s, USBPacket *p)
     int request, value, index;
     int ret = 0;
 
-    if (p->len != 8)
+    if (p->iov.size != 8) {
         return USB_RET_STALL;
- 
-    memcpy(s->setup_buf, p->data, 8);
+    }
+
+    usb_packet_copy(p, s->setup_buf, p->iov.size);
     s->setup_len   = (s->setup_buf[7] << 8) | s->setup_buf[6];
     s->setup_index = 0;
 
@@ -144,9 +146,10 @@ static int do_token_in(USBDevice *s, USBPacket *p)
     case SETUP_STATE_DATA:
         if (s->setup_buf[0] & USB_DIR_IN) {
             int len = s->setup_len - s->setup_index;
-            if (len > p->len)
-                len = p->len;
-            memcpy(p->data, s->data_buf + s->setup_index, len);
+            if (len > p->iov.size) {
+                len = p->iov.size;
+            }
+            usb_packet_copy(p, s->data_buf + s->setup_index, len);
             s->setup_index += len;
             if (s->setup_index >= s->setup_len)
                 s->setup_state = SETUP_STATE_ACK;
@@ -179,9 +182,10 @@ static int do_token_out(USBDevice *s, USBPacket *p)
     case SETUP_STATE_DATA:
         if (!(s->setup_buf[0] & USB_DIR_IN)) {
             int len = s->setup_len - s->setup_index;
-            if (len > p->len)
-                len = p->len;
-            memcpy(s->data_buf + s->setup_index, p->data, len);
+            if (len > p->iov.size) {
+                len = p->iov.size;
+            }
+            usb_packet_copy(p, s->data_buf + s->setup_index, len);
             s->setup_index += len;
             if (s->setup_index >= s->setup_len)
                 s->setup_state = SETUP_STATE_ACK;
@@ -251,22 +255,22 @@ int usb_generic_handle_packet(USBDevice *s, USBPacket *p)
    usb_packet_complete to complete their async control packets. */
 void usb_generic_async_ctrl_complete(USBDevice *s, USBPacket *p)
 {
-    if (p->len < 0) {
+    if (p->result < 0) {
         s->setup_state = SETUP_STATE_IDLE;
     }
 
     switch (s->setup_state) {
     case SETUP_STATE_SETUP:
-        if (p->len < s->setup_len) {
-            s->setup_len = p->len;
+        if (p->result < s->setup_len) {
+            s->setup_len = p->result;
         }
         s->setup_state = SETUP_STATE_DATA;
-        p->len = 8;
+        p->result = 8;
         break;
 
     case SETUP_STATE_ACK:
         s->setup_state = SETUP_STATE_IDLE;
-        p->len = 0;
+        p->result = 0;
         break;
 
     default:
@@ -347,3 +351,57 @@ void usb_cancel_packet(USBPacket * p)
     p->owner->info->cancel_packet(p->owner, p);
     p->owner = NULL;
 }
+
+
+void usb_packet_init(USBPacket *p)
+{
+    qemu_iovec_init(&p->iov, 1);
+}
+
+void usb_packet_setup(USBPacket *p, int pid, uint8_t addr, uint8_t ep)
+{
+    p->pid = pid;
+    p->devaddr = addr;
+    p->devep = ep;
+    p->result = 0;
+    qemu_iovec_reset(&p->iov);
+}
+
+void usb_packet_addbuf(USBPacket *p, void *ptr, size_t len)
+{
+    qemu_iovec_add(&p->iov, ptr, len);
+}
+
+void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes)
+{
+    assert(p->result >= 0);
+    assert(p->result + bytes <= p->iov.size);
+    switch (p->pid) {
+    case USB_TOKEN_SETUP:
+    case USB_TOKEN_OUT:
+        iov_to_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes);
+        break;
+    case USB_TOKEN_IN:
+        iov_from_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes);
+        break;
+    default:
+        fprintf(stderr, "%s: invalid pid: %x\n", __func__, p->pid);
+        abort();
+    }
+    p->result += bytes;
+}
+
+void usb_packet_skip(USBPacket *p, size_t bytes)
+{
+    assert(p->result >= 0);
+    assert(p->result + bytes <= p->iov.size);
+    if (p->pid == USB_TOKEN_IN) {
+        iov_clear(p->iov.iov, p->iov.niov, p->result, bytes);
+    }
+    p->result += bytes;
+}
+
+void usb_packet_cleanup(USBPacket *p)
+{
+    qemu_iovec_destroy(&p->iov);
+}
diff --git a/hw/usb.h b/hw/usb.h
index ded2de29b9..84d04df2e1 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -285,12 +285,21 @@ struct USBPacket {
     int pid;
     uint8_t devaddr;
     uint8_t devep;
-    uint8_t *data;
-    int len;
+    QEMUIOVector iov;
+    int result; /* transfer length or USB_RET_* status code */
     /* Internal use by the USB layer.  */
     USBDevice *owner;
 };
 
+void usb_packet_init(USBPacket *p);
+void usb_packet_setup(USBPacket *p, int pid, uint8_t addr, uint8_t ep);
+void usb_packet_addbuf(USBPacket *p, void *ptr, size_t len);
+int usb_packet_map(USBPacket *p, QEMUSGList *sgl);
+void usb_packet_unmap(USBPacket *p);
+void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes);
+void usb_packet_skip(USBPacket *p, size_t bytes);
+void usb_packet_cleanup(USBPacket *p);
+
 int usb_handle_packet(USBDevice *dev, USBPacket *p);
 void usb_packet_complete(USBDevice *dev, USBPacket *p);
 void usb_cancel_packet(USBPacket * p);
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 70a8710343..072a88a382 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -1,7 +1,9 @@
 /*
- * Virtio Block Device
+ * Virtio Balloon Device
  *
  * Copyright IBM, Corp. 2008
+ * Copyright (C) 2011 Red Hat, Inc.
+ * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
  *
  * Authors:
  *  Anthony Liguori   <aliguori@us.ibm.com>
@@ -43,6 +45,7 @@ typedef struct VirtIOBalloon
     size_t stats_vq_offset;
     MonitorCompletion *stats_callback;
     void *stats_opaque_callback_data;
+    DeviceState *qdev;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -199,36 +202,44 @@ static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
     return f;
 }
 
-static void virtio_balloon_to_target(void *opaque, ram_addr_t target,
-                                     MonitorCompletion cb, void *cb_data)
+static void virtio_balloon_stat(void *opaque, MonitorCompletion cb,
+                                void *cb_data)
 {
     VirtIOBalloon *dev = opaque;
 
-    if (target > ram_size)
-        target = ram_size;
+    /* For now, only allow one request at a time.  This restriction can be
+     * removed later by queueing callback and data pairs.
+     */
+    if (dev->stats_callback != NULL) {
+        return;
+    }
+    dev->stats_callback = cb;
+    dev->stats_opaque_callback_data = cb_data;
+
+    if (ENABLE_GUEST_STATS
+        && (dev->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ))) {
+        virtqueue_push(dev->svq, &dev->stats_vq_elem, dev->stats_vq_offset);
+        virtio_notify(&dev->vdev, dev->svq);
+        return;
+    }
+
+    /* Stats are not supported.  Clear out any stale values that might
+     * have been set by a more featureful guest kernel.
+     */
+    reset_stats(dev);
+    complete_stats_request(dev);
+}
 
+static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
+{
+    VirtIOBalloon *dev = opaque;
+
+    if (target > ram_size) {
+        target = ram_size;
+    }
     if (target) {
         dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
         virtio_notify_config(&dev->vdev);
-    } else {
-        /* For now, only allow one request at a time.  This restriction can be
-         * removed later by queueing callback and data pairs.
-         */
-        if (dev->stats_callback != NULL) {
-            return;
-        }
-        dev->stats_callback = cb;
-        dev->stats_opaque_callback_data = cb_data; 
-        if (ENABLE_GUEST_STATS && (dev->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ))) {
-            virtqueue_push(dev->svq, &dev->stats_vq_elem, dev->stats_vq_offset);
-            virtio_notify(&dev->vdev, dev->svq);
-        } else {
-            /* Stats are not supported.  Clear out any stale values that might
-             * have been set by a more featureful guest kernel.
-             */
-            reset_stats(dev);
-            complete_stats_request(dev);
-        }
     }
 }
 
@@ -259,6 +270,7 @@ static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
 VirtIODevice *virtio_balloon_init(DeviceState *dev)
 {
     VirtIOBalloon *s;
+    int ret;
 
     s = (VirtIOBalloon *)virtio_common_init("virtio-balloon",
                                             VIRTIO_ID_BALLOON,
@@ -268,15 +280,29 @@ VirtIODevice *virtio_balloon_init(DeviceState *dev)
     s->vdev.set_config = virtio_balloon_set_config;
     s->vdev.get_features = virtio_balloon_get_features;
 
+    ret = qemu_add_balloon_handler(virtio_balloon_to_target,
+                                   virtio_balloon_stat, s);
+    if (ret < 0) {
+        virtio_cleanup(&s->vdev);
+        return NULL;
+    }
+
     s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
     s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats);
 
     reset_stats(s);
-    qemu_add_balloon_handler(virtio_balloon_to_target, s);
 
+    s->qdev = dev;
     register_savevm(dev, "virtio-balloon", -1, 1,
                     virtio_balloon_save, virtio_balloon_load, s);
 
     return &s->vdev;
 }
+
+void virtio_balloon_exit(VirtIODevice *vdev)
+{
+    VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
+    unregister_savevm(s->qdev, "virtio-balloon", s);
+    virtio_cleanup(vdev);
+}
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index d685243728..316bf92db0 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -788,10 +788,22 @@ static int virtio_balloon_init_pci(PCIDevice *pci_dev)
     VirtIODevice *vdev;
 
     vdev = virtio_balloon_init(&pci_dev->qdev);
+    if (!vdev) {
+        return -1;
+    }
     virtio_init_pci(proxy, vdev);
     return 0;
 }
 
+static int virtio_balloon_exit_pci(PCIDevice *pci_dev)
+{
+    VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
+
+    virtio_pci_stop_ioeventfd(proxy);
+    virtio_balloon_exit(proxy->vdev);
+    return virtio_exit_pci(pci_dev);
+}
+
 static PCIDeviceInfo virtio_info[] = {
     {
         .qdev.name = "virtio-blk-pci",
@@ -866,7 +878,7 @@ static PCIDeviceInfo virtio_info[] = {
         .qdev.alias = "virtio-balloon",
         .qdev.size = sizeof(VirtIOPCIProxy),
         .init      = virtio_balloon_init_pci,
-        .exit      = virtio_exit_pci,
+        .exit      = virtio_balloon_exit_pci,
         .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET,
         .device_id = PCI_DEVICE_ID_VIRTIO_BALLOON,
         .revision  = VIRTIO_PCI_ABI_VERSION,
diff --git a/hw/virtio.h b/hw/virtio.h
index 0fd0bb0ac5..c1292647fe 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -213,6 +213,7 @@ VirtIODevice *virtio_9p_init(DeviceState *dev, V9fsConf *conf);
 void virtio_net_exit(VirtIODevice *vdev);
 void virtio_blk_exit(VirtIODevice *vdev);
 void virtio_serial_exit(VirtIODevice *vdev);
+void virtio_balloon_exit(VirtIODevice *vdev);
 
 #define DEFINE_VIRTIO_COMMON_FEATURES(_state, _field) \
 	DEFINE_PROP_BIT("indirect_desc", _state, _field, \
diff --git a/iov.c b/iov.c
index 1e027914d4..e7385c41f4 100644
--- a/iov.c
+++ b/iov.c
@@ -62,6 +62,29 @@ size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
     return buf_off;
 }
 
+size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt,
+                 size_t iov_off, size_t size)
+{
+    size_t iovec_off, buf_off;
+    unsigned int i;
+
+    iovec_off = 0;
+    buf_off = 0;
+    for (i = 0; i < iov_cnt && size; i++) {
+        if (iov_off < (iovec_off + iov[i].iov_len)) {
+            size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size);
+
+            memset(iov[i].iov_base + (iov_off - iovec_off), 0, len);
+
+            buf_off += len;
+            iov_off += len;
+            size -= len;
+        }
+        iovec_off += iov[i].iov_len;
+    }
+    return buf_off;
+}
+
 size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
 {
     size_t len;
@@ -73,3 +96,34 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
     }
     return len;
 }
+
+void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
+                 FILE *fp, const char *prefix, size_t limit)
+{
+    unsigned int i, v, b;
+    uint8_t *c;
+
+    c = iov[0].iov_base;
+    for (i = 0, v = 0, b = 0; b < limit; i++, b++) {
+        if (i == iov[v].iov_len) {
+            i = 0; v++;
+            if (v == iov_cnt) {
+                break;
+            }
+            c = iov[v].iov_base;
+        }
+        if ((b % 16) == 0) {
+            fprintf(fp, "%s: %04x:", prefix, b);
+        }
+        if ((b % 4) == 0) {
+            fprintf(fp, " ");
+        }
+        fprintf(fp, " %02x", c[i]);
+        if ((b % 16) == 15) {
+            fprintf(fp, "\n");
+        }
+    }
+    if ((b % 16) != 0) {
+        fprintf(fp, "\n");
+    }
+}
diff --git a/iov.h b/iov.h
index 110f67ab53..94d2f78284 100644
--- a/iov.h
+++ b/iov.h
@@ -17,3 +17,7 @@ size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt,
 size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
                   void *buf, size_t iov_off, size_t size);
 size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt);
+size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt,
+                 size_t iov_off, size_t size);
+void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
+                 FILE *fp, const char *prefix, size_t limit);
diff --git a/linux-aio.c b/linux-aio.c
index 68f4b3d757..dc3faf2499 100644
--- a/linux-aio.c
+++ b/linux-aio.c
@@ -31,7 +31,6 @@ struct qemu_laiocb {
     struct iocb iocb;
     ssize_t ret;
     size_t nbytes;
-    int async_context_id;
     QLIST_ENTRY(qemu_laiocb) node;
 };
 
@@ -39,7 +38,6 @@ struct qemu_laio_state {
     io_context_t ctx;
     int efd;
     int count;
-    QLIST_HEAD(, qemu_laiocb) completed_reqs;
 };
 
 static inline ssize_t io_event_ret(struct io_event *ev)
@@ -49,7 +47,6 @@ static inline ssize_t io_event_ret(struct io_event *ev)
 
 /*
  * Completes an AIO request (calls the callback and frees the ACB).
- * Be sure to be in the right AsyncContext before calling this function.
  */
 static void qemu_laio_process_completion(struct qemu_laio_state *s,
     struct qemu_laiocb *laiocb)
@@ -72,42 +69,12 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
 }
 
 /*
- * Processes all queued AIO requests, i.e. requests that have return from OS
- * but their callback was not called yet. Requests that cannot have their
- * callback called in the current AsyncContext, remain in the queue.
- *
- * Returns 1 if at least one request could be completed, 0 otherwise.
+ * All requests are directly processed when they complete, so there's nothing
+ * left to do during qemu_aio_wait().
  */
 static int qemu_laio_process_requests(void *opaque)
 {
-    struct qemu_laio_state *s = opaque;
-    struct qemu_laiocb *laiocb, *next;
-    int res = 0;
-
-    QLIST_FOREACH_SAFE (laiocb, &s->completed_reqs, node, next) {
-        if (laiocb->async_context_id == get_async_context_id()) {
-            qemu_laio_process_completion(s, laiocb);
-            QLIST_REMOVE(laiocb, node);
-            res = 1;
-        }
-    }
-
-    return res;
-}
-
-/*
- * Puts a request in the completion queue so that its callback is called the
- * next time when it's possible. If we already are in the right AsyncContext,
- * the request is completed immediately instead.
- */
-static void qemu_laio_enqueue_completed(struct qemu_laio_state *s,
-    struct qemu_laiocb* laiocb)
-{
-    if (laiocb->async_context_id == get_async_context_id()) {
-        qemu_laio_process_completion(s, laiocb);
-    } else {
-        QLIST_INSERT_HEAD(&s->completed_reqs, laiocb, node);
-    }
+    return 0;
 }
 
 static void qemu_laio_completion_cb(void *opaque)
@@ -141,7 +108,7 @@ static void qemu_laio_completion_cb(void *opaque)
                     container_of(iocb, struct qemu_laiocb, iocb);
 
             laiocb->ret = io_event_ret(&events[i]);
-            qemu_laio_enqueue_completed(s, laiocb);
+            qemu_laio_process_completion(s, laiocb);
         }
     }
 }
@@ -204,7 +171,6 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
     laiocb->nbytes = nb_sectors * 512;
     laiocb->ctx = s;
     laiocb->ret = -EINPROGRESS;
-    laiocb->async_context_id = get_async_context_id();
 
     iocbs = &laiocb->iocb;
 
@@ -239,7 +205,6 @@ void *laio_init(void)
     struct qemu_laio_state *s;
 
     s = qemu_mallocz(sizeof(*s));
-    QLIST_INIT(&s->completed_reqs);
     s->efd = eventfd(0, 0);
     if (s->efd == -1)
         goto out_free_state;
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index c4116e30f2..8dc00cbb0f 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -49,8 +49,6 @@ struct qemu_paiocb {
     ssize_t ret;
     int active;
     struct qemu_paiocb *next;
-
-    int async_context_id;
 };
 
 typedef struct PosixAioState {
@@ -200,6 +198,12 @@ static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
     return len;
 }
 
+/*
+ * Read/writes the data to/from a given linear buffer.
+ *
+ * Returns the number of bytes handles or -errno in case of an error. Short
+ * reads are only returned if the end of the file is reached.
+ */
 static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
 {
     ssize_t offset = 0;
@@ -336,6 +340,19 @@ static void *aio_thread(void *unused)
 
         switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
         case QEMU_AIO_READ:
+            ret = handle_aiocb_rw(aiocb);
+            if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
+                /* A short read means that we have reached EOF. Pad the buffer
+                 * with zeros for bytes after EOF. */
+                QEMUIOVector qiov;
+
+                qemu_iovec_init_external(&qiov, aiocb->aio_iov,
+                                         aiocb->aio_niov);
+                qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - ret, ret);
+
+                ret = aiocb->aio_nbytes;
+            }
+            break;
         case QEMU_AIO_WRITE:
             ret = handle_aiocb_rw(aiocb);
             break;
@@ -420,7 +437,6 @@ static int posix_aio_process_queue(void *opaque)
     struct qemu_paiocb *acb, **pacb;
     int ret;
     int result = 0;
-    int async_context_id = get_async_context_id();
 
     for(;;) {
         pacb = &s->first_aio;
@@ -429,12 +445,6 @@ static int posix_aio_process_queue(void *opaque)
             if (!acb)
                 return result;
 
-            /* we're only interested in requests in the right context */
-            if (acb->async_context_id != async_context_id) {
-                pacb = &acb->next;
-                continue;
-            }
-
             ret = qemu_paio_error(acb);
             if (ret == ECANCELED) {
                 /* remove the request */
@@ -575,7 +585,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
     acb->aio_type = type;
     acb->aio_fildes = fd;
     acb->ev_signo = SIGUSR2;
-    acb->async_context_id = get_async_context_id();
 
     if (qiov) {
         acb->aio_iov = qiov->iov;
@@ -604,7 +613,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
     acb->aio_type = QEMU_AIO_IOCTL;
     acb->aio_fildes = fd;
     acb->ev_signo = SIGUSR2;
-    acb->async_context_id = get_async_context_id();
     acb->aio_offset = 0;
     acb->aio_ioctl_buf = buf;
     acb->aio_ioctl_cmd = req;
diff --git a/qemu-common.h b/qemu-common.h
index 1e3c66511e..afbd04d321 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -115,10 +115,6 @@ int qemu_main(int argc, char **argv, char **envp);
 /* bottom halves */
 typedef void QEMUBHFunc(void *opaque);
 
-void async_context_push(void);
-void async_context_pop(void);
-int get_async_context_id(void);
-
 QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
 void qemu_bh_schedule(QEMUBH *bh);
 /* Bottom halfs that are scheduled from a bottom half handler are instantly
@@ -270,6 +266,7 @@ typedef struct I2SCodec I2SCodec;
 typedef struct SSIBus SSIBus;
 typedef struct EventNotifier EventNotifier;
 typedef struct VirtIODevice VirtIODevice;
+typedef struct QEMUSGList QEMUSGList;
 
 typedef uint64_t pcibus_t;
 
diff --git a/qemu-coroutine-int.h b/qemu-coroutine-int.h
new file mode 100644
index 0000000000..d495615cf6
--- /dev/null
+++ b/qemu-coroutine-int.h
@@ -0,0 +1,49 @@
+/*
+ * Coroutine internals
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_COROUTINE_INT_H
+#define QEMU_COROUTINE_INT_H
+
+#include "qemu-queue.h"
+#include "qemu-coroutine.h"
+
+typedef enum {
+    COROUTINE_YIELD = 1,
+    COROUTINE_TERMINATE = 2,
+} CoroutineAction;
+
+struct Coroutine {
+    CoroutineEntry *entry;
+    void *entry_arg;
+    Coroutine *caller;
+    QLIST_ENTRY(Coroutine) pool_next;
+    QTAILQ_ENTRY(Coroutine) co_queue_next;
+};
+
+Coroutine *qemu_coroutine_new(void);
+void qemu_coroutine_delete(Coroutine *co);
+CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
+                                      CoroutineAction action);
+
+#endif
diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c
new file mode 100644
index 0000000000..a80f437c59
--- /dev/null
+++ b/qemu-coroutine-lock.c
@@ -0,0 +1,117 @@
+/*
+ * coroutine queues and locks
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu-coroutine.h"
+#include "qemu-coroutine-int.h"
+#include "qemu-queue.h"
+#include "trace.h"
+
+static QTAILQ_HEAD(, Coroutine) unlock_bh_queue =
+    QTAILQ_HEAD_INITIALIZER(unlock_bh_queue);
+static QEMUBH* unlock_bh;
+
+static void qemu_co_queue_next_bh(void *opaque)
+{
+    Coroutine *next;
+
+    trace_qemu_co_queue_next_bh();
+    while ((next = QTAILQ_FIRST(&unlock_bh_queue))) {
+        QTAILQ_REMOVE(&unlock_bh_queue, next, co_queue_next);
+        qemu_coroutine_enter(next, NULL);
+    }
+}
+
+void qemu_co_queue_init(CoQueue *queue)
+{
+    QTAILQ_INIT(&queue->entries);
+
+    if (!unlock_bh) {
+        unlock_bh = qemu_bh_new(qemu_co_queue_next_bh, NULL);
+    }
+}
+
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
+{
+    Coroutine *self = qemu_coroutine_self();
+    QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+    qemu_coroutine_yield();
+    assert(qemu_in_coroutine());
+}
+
+bool qemu_co_queue_next(CoQueue *queue)
+{
+    Coroutine *next;
+
+    next = QTAILQ_FIRST(&queue->entries);
+    if (next) {
+        QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+        QTAILQ_INSERT_TAIL(&unlock_bh_queue, next, co_queue_next);
+        trace_qemu_co_queue_next(next);
+        qemu_bh_schedule(unlock_bh);
+    }
+
+    return (next != NULL);
+}
+
+bool qemu_co_queue_empty(CoQueue *queue)
+{
+    return (QTAILQ_FIRST(&queue->entries) == NULL);
+}
+
+void qemu_co_mutex_init(CoMutex *mutex)
+{
+    memset(mutex, 0, sizeof(*mutex));
+    qemu_co_queue_init(&mutex->queue);
+}
+
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    trace_qemu_co_mutex_lock_entry(mutex, self);
+
+    while (mutex->locked) {
+        qemu_co_queue_wait(&mutex->queue);
+    }
+
+    mutex->locked = true;
+
+    trace_qemu_co_mutex_lock_return(mutex, self);
+}
+
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    trace_qemu_co_mutex_unlock_entry(mutex, self);
+
+    assert(mutex->locked == true);
+    assert(qemu_in_coroutine());
+
+    mutex->locked = false;
+    qemu_co_queue_next(&mutex->queue);
+
+    trace_qemu_co_mutex_unlock_return(mutex, self);
+}
diff --git a/qemu-coroutine.c b/qemu-coroutine.c
new file mode 100644
index 0000000000..600be2643c
--- /dev/null
+++ b/qemu-coroutine.c
@@ -0,0 +1,75 @@
+/*
+ * QEMU coroutines
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *  Kevin Wolf         <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu-common.h"
+#include "qemu-coroutine.h"
+#include "qemu-coroutine-int.h"
+
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
+{
+    Coroutine *co = qemu_coroutine_new();
+    co->entry = entry;
+    return co;
+}
+
+static void coroutine_swap(Coroutine *from, Coroutine *to)
+{
+    CoroutineAction ret;
+
+    ret = qemu_coroutine_switch(from, to, COROUTINE_YIELD);
+
+    switch (ret) {
+    case COROUTINE_YIELD:
+        return;
+    case COROUTINE_TERMINATE:
+        trace_qemu_coroutine_terminate(to);
+        qemu_coroutine_delete(to);
+        return;
+    default:
+        abort();
+    }
+}
+
+void qemu_coroutine_enter(Coroutine *co, void *opaque)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    trace_qemu_coroutine_enter(self, co, opaque);
+
+    if (co->caller) {
+        fprintf(stderr, "Co-routine re-entered recursively\n");
+        abort();
+    }
+
+    co->caller = self;
+    co->entry_arg = opaque;
+    coroutine_swap(self, co);
+}
+
+void coroutine_fn qemu_coroutine_yield(void)
+{
+    Coroutine *self = qemu_coroutine_self();
+    Coroutine *to = self->caller;
+
+    trace_qemu_coroutine_yield(self, to);
+
+    if (!to) {
+        fprintf(stderr, "Co-routine is yielding to no one\n");
+        abort();
+    }
+
+    self->caller = NULL;
+    coroutine_swap(self, to);
+}
diff --git a/qemu-coroutine.h b/qemu-coroutine.h
new file mode 100644
index 0000000000..2f2fd95552
--- /dev/null
+++ b/qemu-coroutine.h
@@ -0,0 +1,159 @@
+/*
+ * QEMU coroutine implementation
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *  Kevin Wolf         <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_COROUTINE_H
+#define QEMU_COROUTINE_H
+
+#include <stdbool.h>
+#include "qemu-queue.h"
+
+/**
+ * Coroutines are a mechanism for stack switching and can be used for
+ * cooperative userspace threading.  These functions provide a simple but
+ * useful flavor of coroutines that is suitable for writing sequential code,
+ * rather than callbacks, for operations that need to give up control while
+ * waiting for events to complete.
+ *
+ * These functions are re-entrant and may be used outside the global mutex.
+ */
+
+/**
+ * Mark a function that executes in coroutine context
+ *
+ * Functions that execute in coroutine context cannot be called directly from
+ * normal functions.  In the future it would be nice to enable compiler or
+ * static checker support for catching such errors.  This annotation might make
+ * it possible and in the meantime it serves as documentation.
+ *
+ * For example:
+ *
+ *   static void coroutine_fn foo(void) {
+ *       ....
+ *   }
+ */
+#define coroutine_fn
+
+typedef struct Coroutine Coroutine;
+
+/**
+ * Coroutine entry point
+ *
+ * When the coroutine is entered for the first time, opaque is passed in as an
+ * argument.
+ *
+ * When this function returns, the coroutine is destroyed automatically and
+ * execution continues in the caller who last entered the coroutine.
+ */
+typedef void coroutine_fn CoroutineEntry(void *opaque);
+
+/**
+ * Create a new coroutine
+ *
+ * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
+ */
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry);
+
+/**
+ * Transfer control to a coroutine
+ *
+ * The opaque argument is passed as the argument to the entry point when
+ * entering the coroutine for the first time.  It is subsequently ignored.
+ */
+void qemu_coroutine_enter(Coroutine *coroutine, void *opaque);
+
+/**
+ * Transfer control back to a coroutine's caller
+ *
+ * This function does not return until the coroutine is re-entered using
+ * qemu_coroutine_enter().
+ */
+void coroutine_fn qemu_coroutine_yield(void);
+
+/**
+ * Get the currently executing coroutine
+ */
+Coroutine *coroutine_fn qemu_coroutine_self(void);
+
+/**
+ * Return whether or not currently inside a coroutine
+ *
+ * This can be used to write functions that work both when in coroutine context
+ * and when not in coroutine context.  Note that such functions cannot use the
+ * coroutine_fn annotation since they work outside coroutine context.
+ */
+bool qemu_in_coroutine(void);
+
+
+
+/**
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
+ * them later. They provide the fundamental primitives on which coroutine locks
+ * are built.
+ */
+typedef struct CoQueue {
+    QTAILQ_HEAD(, Coroutine) entries;
+} CoQueue;
+
+/**
+ * Initialise a CoQueue. This must be called before any other operation is used
+ * on the CoQueue.
+ */
+void qemu_co_queue_init(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
+
+/**
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
+ *
+ * Returns true if a coroutine was restarted, false if the queue is empty.
+ */
+bool qemu_co_queue_next(CoQueue *queue);
+
+/**
+ * Checks if the CoQueue is empty.
+ */
+bool qemu_co_queue_empty(CoQueue *queue);
+
+
+/**
+ * Provides a mutex that can be used to synchronise coroutines
+ */
+typedef struct CoMutex {
+    bool locked;
+    CoQueue queue;
+} CoMutex;
+
+/**
+ * Initialises a CoMutex. This must be called before any other operation is used
+ * on the CoMutex.
+ */
+void qemu_co_mutex_init(CoMutex *mutex);
+
+/**
+ * Locks the mutex. If the lock cannot be taken immediately, control is
+ * transferred to the caller of the current coroutine.
+ */
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
+
+/**
+ * Unlocks the mutex and schedules the next coroutine that was waiting for this
+ * lock to be run.
+ */
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
+
+#endif /* QEMU_COROUTINE_H */
diff --git a/slirp/arp_table.c b/slirp/arp_table.c
new file mode 100644
index 0000000000..820dee22b0
--- /dev/null
+++ b/slirp/arp_table.c
@@ -0,0 +1,95 @@
+/*
+ * ARP table
+ *
+ * Copyright (c) 2011 AdaCore
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "slirp.h"
+
+void arp_table_add(Slirp *slirp, int ip_addr, uint8_t ethaddr[ETH_ALEN])
+{
+    const in_addr_t broadcast_addr =
+        ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr;
+    ArpTable *arptbl = &slirp->arp_table;
+    int i;
+
+    DEBUG_CALL("arp_table_add");
+    DEBUG_ARG("ip = 0x%x", ip_addr);
+    DEBUG_ARGS((dfd, " hw addr = %02x:%02x:%02x:%02x:%02x:%02x\n",
+                ethaddr[0], ethaddr[1], ethaddr[2],
+                ethaddr[3], ethaddr[4], ethaddr[5]));
+
+    /* Check 0.0.0.0/8 invalid source-only addresses */
+    assert((ip_addr & htonl(~(0xf << 28))) != 0);
+
+    if (ip_addr == 0xffffffff || ip_addr == broadcast_addr) {
+        /* Do not register broadcast addresses */
+        return;
+    }
+
+    /* Search for an entry */
+    for (i = 0; i < ARP_TABLE_SIZE; i++) {
+        if (arptbl->table[i].ar_sip == ip_addr) {
+            /* Update the entry */
+            memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN);
+            return;
+        }
+    }
+
+    /* No entry found, create a new one */
+    arptbl->table[arptbl->next_victim].ar_sip = ip_addr;
+    memcpy(arptbl->table[arptbl->next_victim].ar_sha,  ethaddr, ETH_ALEN);
+    arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE;
+}
+
+bool arp_table_search(Slirp *slirp, int in_ip_addr,
+                      uint8_t out_ethaddr[ETH_ALEN])
+{
+    const in_addr_t broadcast_addr =
+        ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr;
+    ArpTable *arptbl = &slirp->arp_table;
+    int i;
+
+    DEBUG_CALL("arp_table_search");
+    DEBUG_ARG("ip = 0x%x", in_ip_addr);
+
+    /* Check 0.0.0.0/8 invalid source-only addresses */
+    assert((in_ip_addr & htonl(~(0xf << 28))) != 0);
+
+    /* If broadcast address */
+    if (in_ip_addr == 0xffffffff || in_ip_addr == broadcast_addr) {
+        /* return Ethernet broadcast address */
+        memset(out_ethaddr, 0xff, ETH_ALEN);
+        return 1;
+    }
+
+    for (i = 0; i < ARP_TABLE_SIZE; i++) {
+        if (arptbl->table[i].ar_sip == in_ip_addr) {
+            memcpy(out_ethaddr, arptbl->table[i].ar_sha,  ETH_ALEN);
+            DEBUG_ARGS((dfd, " found hw addr = %02x:%02x:%02x:%02x:%02x:%02x\n",
+                        out_ethaddr[0], out_ethaddr[1], out_ethaddr[2],
+                        out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]));
+            return 1;
+        }
+    }
+
+    return 0;
+}
diff --git a/slirp/bootp.c b/slirp/bootp.c
index 1eb2ed1143..efd1fe777a 100644
--- a/slirp/bootp.c
+++ b/slirp/bootp.c
@@ -149,6 +149,7 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp)
     struct in_addr preq_addr;
     int dhcp_msg_type, val;
     uint8_t *q;
+    uint8_t client_ethaddr[ETH_ALEN];
 
     /* extract exact DHCP msg type */
     dhcp_decode(bp, &dhcp_msg_type, &preq_addr);
@@ -164,8 +165,9 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp)
     if (dhcp_msg_type != DHCPDISCOVER &&
         dhcp_msg_type != DHCPREQUEST)
         return;
-    /* XXX: this is a hack to get the client mac address */
-    memcpy(slirp->client_ethaddr, bp->bp_hwaddr, 6);
+
+    /* Get client's hardware address from bootp request */
+    memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN);
 
     m = m_get(slirp);
     if (!m) {
@@ -178,25 +180,25 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp)
 
     if (dhcp_msg_type == DHCPDISCOVER) {
         if (preq_addr.s_addr != htonl(0L)) {
-            bc = request_addr(slirp, &preq_addr, slirp->client_ethaddr);
+            bc = request_addr(slirp, &preq_addr, client_ethaddr);
             if (bc) {
                 daddr.sin_addr = preq_addr;
             }
         }
         if (!bc) {
          new_addr:
-            bc = get_new_addr(slirp, &daddr.sin_addr, slirp->client_ethaddr);
+            bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr);
             if (!bc) {
                 DPRINTF("no address left\n");
                 return;
             }
         }
-        memcpy(bc->macaddr, slirp->client_ethaddr, 6);
+        memcpy(bc->macaddr, client_ethaddr, ETH_ALEN);
     } else if (preq_addr.s_addr != htonl(0L)) {
-        bc = request_addr(slirp, &preq_addr, slirp->client_ethaddr);
+        bc = request_addr(slirp, &preq_addr, client_ethaddr);
         if (bc) {
             daddr.sin_addr = preq_addr;
-            memcpy(bc->macaddr, slirp->client_ethaddr, 6);
+            memcpy(bc->macaddr, client_ethaddr, ETH_ALEN);
         } else {
             daddr.sin_addr.s_addr = 0;
         }
@@ -209,6 +211,9 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp)
         }
     }
 
+    /* Update ARP table for this IP address */
+    arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr);
+
     saddr.sin_addr = slirp->vhost_addr;
     saddr.sin_port = htons(BOOTP_SERVER);
 
@@ -218,7 +223,7 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp)
     rbp->bp_xid = bp->bp_xid;
     rbp->bp_htype = 1;
     rbp->bp_hlen = 6;
-    memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, 6);
+    memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN);
 
     rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */
     rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */
diff --git a/slirp/if.c b/slirp/if.c
index 0f04e13989..2d79e45bcd 100644
--- a/slirp/if.c
+++ b/slirp/if.c
@@ -6,6 +6,7 @@
  */
 
 #include <slirp.h>
+#include "qemu-timer.h"
 
 #define ifs_init(ifm) ((ifm)->ifs_next = (ifm)->ifs_prev = (ifm))
 
@@ -105,6 +106,9 @@ if_output(struct socket *so, struct mbuf *ifm)
 	ifs_init(ifm);
 	insque(ifm, ifq);
 
+        /* Expiration date = Now + 1 second */
+        ifm->expiration_date = qemu_get_clock_ns(rt_clock) + 1000000000ULL;
+
 diddit:
 	slirp->if_queued++;
 
@@ -153,6 +157,9 @@ diddit:
 void
 if_start(Slirp *slirp)
 {
+    int requeued = 0;
+    uint64_t now;
+
 	struct mbuf *ifm, *ifqt;
 
 	DEBUG_CALL("if_start");
@@ -165,6 +172,8 @@ if_start(Slirp *slirp)
         if (!slirp_can_output(slirp->opaque))
             return;
 
+        now = qemu_get_clock_ns(rt_clock);
+
 	/*
 	 * See which queue to get next packet from
 	 * If there's something in the fastq, select it immediately
@@ -199,11 +208,22 @@ if_start(Slirp *slirp)
 		   ifm->ifq_so->so_nqueued = 0;
 	}
 
-	/* Encapsulate the packet for sending */
-        if_encap(slirp, (uint8_t *)ifm->m_data, ifm->m_len);
-
-        m_free(ifm);
+        if (ifm->expiration_date < now) {
+            /* Expired */
+            m_free(ifm);
+        } else {
+            /* Encapsulate the packet for sending */
+            if (if_encap(slirp, ifm)) {
+                m_free(ifm);
+            } else {
+                /* re-queue */
+                insque(ifm, ifqt);
+                requeued++;
+            }
+        }
 
 	if (slirp->if_queued)
 	   goto again;
+
+        slirp->if_queued = requeued;
 }
diff --git a/slirp/main.h b/slirp/main.h
index 0dd8d81ce4..028df4b361 100644
--- a/slirp/main.h
+++ b/slirp/main.h
@@ -42,5 +42,5 @@ extern int tcp_keepintvl;
 #define PROTO_PPP 0x2
 #endif
 
-void if_encap(Slirp *slirp, const uint8_t *ip_data, int ip_data_len);
+int if_encap(Slirp *slirp, struct mbuf *ifm);
 ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags);
diff --git a/slirp/mbuf.c b/slirp/mbuf.c
index ce2eb843f5..c699c75096 100644
--- a/slirp/mbuf.c
+++ b/slirp/mbuf.c
@@ -70,6 +70,8 @@ m_get(Slirp *slirp)
 	m->m_len = 0;
         m->m_nextpkt = NULL;
         m->m_prevpkt = NULL;
+        m->arp_requested = false;
+        m->expiration_date = (uint64_t)-1;
 end_error:
 	DEBUG_ARG("m = %lx", (long )m);
 	return m;
diff --git a/slirp/mbuf.h b/slirp/mbuf.h
index b74544b42b..55170e517b 100644
--- a/slirp/mbuf.h
+++ b/slirp/mbuf.h
@@ -86,6 +86,8 @@ struct mbuf {
 		char	m_dat_[1]; /* ANSI don't like 0 sized arrays */
 		char	*m_ext_;
 	} M_dat;
+    bool     arp_requested;
+    uint64_t expiration_date;
 };
 
 #define m_next		m_hdr.mh_next
diff --git a/slirp/slirp.c b/slirp/slirp.c
index df787ea1d9..a86cc6eb2d 100644
--- a/slirp/slirp.c
+++ b/slirp/slirp.c
@@ -31,11 +31,11 @@
 struct in_addr loopback_addr;
 
 /* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */
-static const uint8_t special_ethaddr[6] = {
+static const uint8_t special_ethaddr[ETH_ALEN] = {
     0x52, 0x55, 0x00, 0x00, 0x00, 0x00
 };
 
-static const uint8_t zero_ethaddr[6] = { 0, 0, 0, 0, 0, 0 };
+static const uint8_t zero_ethaddr[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 };
 
 /* XXX: suppress those select globals */
 fd_set *global_readfds, *global_writefds, *global_xfds;
@@ -599,42 +599,8 @@ void slirp_select_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds,
 	 global_xfds = NULL;
 }
 
-#define ETH_ALEN 6
-#define ETH_HLEN 14
-
-#define ETH_P_IP	0x0800		/* Internet Protocol packet	*/
-#define ETH_P_ARP	0x0806		/* Address Resolution packet	*/
-
-#define	ARPOP_REQUEST	1		/* ARP request			*/
-#define	ARPOP_REPLY	2		/* ARP reply			*/
-
-struct ethhdr
-{
-	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
-	unsigned char	h_source[ETH_ALEN];	/* source ether addr	*/
-	unsigned short	h_proto;		/* packet type ID field	*/
-};
-
-struct arphdr
-{
-	unsigned short	ar_hrd;		/* format of hardware address	*/
-	unsigned short	ar_pro;		/* format of protocol address	*/
-	unsigned char	ar_hln;		/* length of hardware address	*/
-	unsigned char	ar_pln;		/* length of protocol address	*/
-	unsigned short	ar_op;		/* ARP opcode (command)		*/
-
-	 /*
-	  *	 Ethernet looks like this : This bit is variable sized however...
-	  */
-	unsigned char		ar_sha[ETH_ALEN];	/* sender hardware address	*/
-	uint32_t		ar_sip;			/* sender IP address		*/
-	unsigned char		ar_tha[ETH_ALEN];	/* target hardware address	*/
-	uint32_t		ar_tip	;		/* target IP address		*/
-} __attribute__((packed));
-
 static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
 {
-    struct ethhdr *eh = (struct ethhdr *)pkt;
     struct arphdr *ah = (struct arphdr *)(pkt + ETH_HLEN);
     uint8_t arp_reply[max(ETH_HLEN + sizeof(struct arphdr), 64)];
     struct ethhdr *reh = (struct ethhdr *)arp_reply;
@@ -645,6 +611,12 @@ static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
     ar_op = ntohs(ah->ar_op);
     switch(ar_op) {
     case ARPOP_REQUEST:
+        if (ah->ar_tip == ah->ar_sip) {
+            /* Gratuitous ARP */
+            arp_table_add(slirp, ah->ar_sip, ah->ar_sha);
+            return;
+        }
+
         if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) ==
             slirp->vnetwork_addr.s_addr) {
             if (ah->ar_tip == slirp->vnameserver_addr.s_addr ||
@@ -657,8 +629,8 @@ static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
             return;
         arp_ok:
             memset(arp_reply, 0, sizeof(arp_reply));
-            /* XXX: make an ARP request to have the client address */
-            memcpy(slirp->client_ethaddr, eh->h_source, ETH_ALEN);
+
+            arp_table_add(slirp, ah->ar_sip, ah->ar_sha);
 
             /* ARP request for alias/dns mac address */
             memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN);
@@ -679,11 +651,7 @@ static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
         }
         break;
     case ARPOP_REPLY:
-        /* reply to request of client mac address ? */
-        if (!memcmp(slirp->client_ethaddr, zero_ethaddr, ETH_ALEN) &&
-            ah->ar_sip == slirp->client_ipaddr.s_addr) {
-            memcpy(slirp->client_ethaddr, ah->ar_sha, ETH_ALEN);
-        }
+        arp_table_add(slirp, ah->ar_sip, ah->ar_sha);
         break;
     default:
         break;
@@ -724,54 +692,63 @@ void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
     }
 }
 
-/* output the IP packet to the ethernet device */
-void if_encap(Slirp *slirp, const uint8_t *ip_data, int ip_data_len)
+/* Output the IP packet to the ethernet device. Returns 0 if the packet must be
+ * re-queued.
+ */
+int if_encap(Slirp *slirp, struct mbuf *ifm)
 {
     uint8_t buf[1600];
     struct ethhdr *eh = (struct ethhdr *)buf;
+    uint8_t ethaddr[ETH_ALEN];
+    const struct ip *iph = (const struct ip *)ifm->m_data;
 
-    if (ip_data_len + ETH_HLEN > sizeof(buf))
-        return;
-    
-    if (!memcmp(slirp->client_ethaddr, zero_ethaddr, ETH_ALEN)) {
+    if (ifm->m_len + ETH_HLEN > sizeof(buf)) {
+        return 1;
+    }
+
+    if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) {
         uint8_t arp_req[ETH_HLEN + sizeof(struct arphdr)];
         struct ethhdr *reh = (struct ethhdr *)arp_req;
         struct arphdr *rah = (struct arphdr *)(arp_req + ETH_HLEN);
-        const struct ip *iph = (const struct ip *)ip_data;
-
-        /* If the client addr is not known, there is no point in
-           sending the packet to it. Normally the sender should have
-           done an ARP request to get its MAC address. Here we do it
-           in place of sending the packet and we hope that the sender
-           will retry sending its packet. */
-        memset(reh->h_dest, 0xff, ETH_ALEN);
-        memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4);
-        memcpy(&reh->h_source[2], &slirp->vhost_addr, 4);
-        reh->h_proto = htons(ETH_P_ARP);
-        rah->ar_hrd = htons(1);
-        rah->ar_pro = htons(ETH_P_IP);
-        rah->ar_hln = ETH_ALEN;
-        rah->ar_pln = 4;
-        rah->ar_op = htons(ARPOP_REQUEST);
-        /* source hw addr */
-        memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4);
-        memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4);
-        /* source IP */
-        rah->ar_sip = slirp->vhost_addr.s_addr;
-        /* target hw addr (none) */
-        memset(rah->ar_tha, 0, ETH_ALEN);
-        /* target IP */
-        rah->ar_tip = iph->ip_dst.s_addr;
-        slirp->client_ipaddr = iph->ip_dst;
-        slirp_output(slirp->opaque, arp_req, sizeof(arp_req));
+
+        if (!ifm->arp_requested) {
+            /* If the client addr is not known, send an ARP request */
+            memset(reh->h_dest, 0xff, ETH_ALEN);
+            memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4);
+            memcpy(&reh->h_source[2], &slirp->vhost_addr, 4);
+            reh->h_proto = htons(ETH_P_ARP);
+            rah->ar_hrd = htons(1);
+            rah->ar_pro = htons(ETH_P_IP);
+            rah->ar_hln = ETH_ALEN;
+            rah->ar_pln = 4;
+            rah->ar_op = htons(ARPOP_REQUEST);
+
+            /* source hw addr */
+            memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4);
+            memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4);
+
+            /* source IP */
+            rah->ar_sip = slirp->vhost_addr.s_addr;
+
+            /* target hw addr (none) */
+            memset(rah->ar_tha, 0, ETH_ALEN);
+
+            /* target IP */
+            rah->ar_tip = iph->ip_dst.s_addr;
+            slirp->client_ipaddr = iph->ip_dst;
+            slirp_output(slirp->opaque, arp_req, sizeof(arp_req));
+            ifm->arp_requested = true;
+        }
+        return 0;
     } else {
-        memcpy(eh->h_dest, slirp->client_ethaddr, ETH_ALEN);
+        memcpy(eh->h_dest, ethaddr, ETH_ALEN);
         memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4);
         /* XXX: not correct */
         memcpy(&eh->h_source[2], &slirp->vhost_addr, 4);
         eh->h_proto = htons(ETH_P_IP);
-        memcpy(buf + sizeof(struct ethhdr), ip_data, ip_data_len);
-        slirp_output(slirp->opaque, buf, ip_data_len + ETH_HLEN);
+        memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len);
+        slirp_output(slirp->opaque, buf, ifm->m_len + ETH_HLEN);
+        return 1;
     }
 }
 
diff --git a/slirp/slirp.h b/slirp/slirp.h
index 16bb6bae45..2a070e6126 100644
--- a/slirp/slirp.h
+++ b/slirp/slirp.h
@@ -170,6 +170,48 @@ int inet_aton(const char *cp, struct in_addr *ia);
 /* osdep.c */
 int qemu_socket(int domain, int type, int protocol);
 
+#define ETH_ALEN 6
+#define ETH_HLEN 14
+
+#define ETH_P_IP  0x0800        /* Internet Protocol packet  */
+#define ETH_P_ARP 0x0806        /* Address Resolution packet */
+
+#define ARPOP_REQUEST 1         /* ARP request */
+#define ARPOP_REPLY   2         /* ARP reply   */
+
+struct ethhdr {
+    unsigned char  h_dest[ETH_ALEN];   /* destination eth addr */
+    unsigned char  h_source[ETH_ALEN]; /* source ether addr    */
+    unsigned short h_proto;            /* packet type ID field */
+};
+
+struct arphdr {
+    unsigned short ar_hrd;      /* format of hardware address */
+    unsigned short ar_pro;      /* format of protocol address */
+    unsigned char  ar_hln;      /* length of hardware address */
+    unsigned char  ar_pln;      /* length of protocol address */
+    unsigned short ar_op;       /* ARP opcode (command)       */
+
+    /*
+     *  Ethernet looks like this : This bit is variable sized however...
+     */
+    unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */
+    uint32_t      ar_sip;           /* sender IP address       */
+    unsigned char ar_tha[ETH_ALEN]; /* target hardware address */
+    uint32_t      ar_tip;           /* target IP address       */
+} __attribute__((packed));
+
+#define ARP_TABLE_SIZE 16
+
+typedef struct ArpTable {
+    struct arphdr table[ARP_TABLE_SIZE];
+    int next_victim;
+} ArpTable;
+
+void arp_table_add(Slirp *slirp, int ip_addr, uint8_t ethaddr[ETH_ALEN]);
+
+bool arp_table_search(Slirp *slirp, int in_ip_addr,
+                      uint8_t out_ethaddr[ETH_ALEN]);
 
 struct Slirp {
     QTAILQ_ENTRY(Slirp) entry;
@@ -181,9 +223,6 @@ struct Slirp {
     struct in_addr vdhcp_startaddr;
     struct in_addr vnameserver_addr;
 
-    /* ARP cache for the guest IP addresses (XXX: allow many entries) */
-    uint8_t client_ethaddr[6];
-
     struct in_addr client_ipaddr;
     char client_hostname[33];
 
@@ -227,6 +266,8 @@ struct Slirp {
     char *tftp_prefix;
     struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX];
 
+    ArpTable arp_table;
+
     void *opaque;
 };
 
diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
index c1c4d4b07e..5aeca2b06d 100644
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@@ -4252,13 +4252,8 @@ void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
 static void do_unassigned_access(target_phys_addr_t addr, int is_write,
                                  int is_exec, int is_asi, int size)
 {
-    CPUState *saved_env;
     int fault_type;
 
-    /* XXX: hack to restore env in all cases, even if not called from
-       generated code */
-    saved_env = env;
-    env = cpu_single_env;
 #ifdef DEBUG_UNASSIGNED
     if (is_asi)
         printf("Unassigned mem %s access of %d byte%s to " TARGET_FMT_plx
@@ -4306,8 +4301,6 @@ static void do_unassigned_access(target_phys_addr_t addr, int is_write,
     if (env->mmuregs[0] & MMU_NF) {
         tlb_flush(env, 1);
     }
-
-    env = saved_env;
 }
 #endif
 #else
@@ -4319,13 +4312,6 @@ static void do_unassigned_access(target_phys_addr_t addr, int is_write,
                                  int is_exec, int is_asi, int size)
 #endif
 {
-    CPUState *saved_env;
-
-    /* XXX: hack to restore env in all cases, even if not called from
-       generated code */
-    saved_env = env;
-    env = cpu_single_env;
-
 #ifdef DEBUG_UNASSIGNED
     printf("Unassigned mem access to " TARGET_FMT_plx " from " TARGET_FMT_lx
            "\n", addr, env->pc);
@@ -4335,8 +4321,6 @@ static void do_unassigned_access(target_phys_addr_t addr, int is_write,
         raise_exception(TT_CODE_ACCESS);
     else
         raise_exception(TT_DATA_ACCESS);
-
-    env = saved_env;
 }
 #endif
 
@@ -4370,7 +4354,14 @@ void helper_tick_set_limit(void *opaque, uint64_t limit)
 void cpu_unassigned_access(CPUState *env1, target_phys_addr_t addr,
                            int is_write, int is_exec, int is_asi, int size)
 {
+    CPUState *saved_env;
+
+    saved_env = env;
     env = env1;
-    do_unassigned_access(addr, is_write, is_exec, is_asi, size);
+    /* Ignore unassigned accesses outside of CPU context */
+    if (env1) {
+        do_unassigned_access(addr, is_write, is_exec, is_asi, size);
+    }
+    env = saved_env;
 }
 #endif
diff --git a/test-coroutine.c b/test-coroutine.c
new file mode 100644
index 0000000000..bf9f3e91b5
--- /dev/null
+++ b/test-coroutine.c
@@ -0,0 +1,192 @@
+/*
+ * Coroutine tests
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include <glib.h>
+#include "qemu-coroutine.h"
+
+/*
+ * Check that qemu_in_coroutine() works
+ */
+
+static void coroutine_fn verify_in_coroutine(void *opaque)
+{
+    g_assert(qemu_in_coroutine());
+}
+
+static void test_in_coroutine(void)
+{
+    Coroutine *coroutine;
+
+    g_assert(!qemu_in_coroutine());
+
+    coroutine = qemu_coroutine_create(verify_in_coroutine);
+    qemu_coroutine_enter(coroutine, NULL);
+}
+
+/*
+ * Check that qemu_coroutine_self() works
+ */
+
+static void coroutine_fn verify_self(void *opaque)
+{
+    g_assert(qemu_coroutine_self() == opaque);
+}
+
+static void test_self(void)
+{
+    Coroutine *coroutine;
+
+    coroutine = qemu_coroutine_create(verify_self);
+    qemu_coroutine_enter(coroutine, coroutine);
+}
+
+/*
+ * Check that coroutines may nest multiple levels
+ */
+
+typedef struct {
+    unsigned int n_enter;   /* num coroutines entered */
+    unsigned int n_return;  /* num coroutines returned */
+    unsigned int max;       /* maximum level of nesting */
+} NestData;
+
+static void coroutine_fn nest(void *opaque)
+{
+    NestData *nd = opaque;
+
+    nd->n_enter++;
+
+    if (nd->n_enter < nd->max) {
+        Coroutine *child;
+
+        child = qemu_coroutine_create(nest);
+        qemu_coroutine_enter(child, nd);
+    }
+
+    nd->n_return++;
+}
+
+static void test_nesting(void)
+{
+    Coroutine *root;
+    NestData nd = {
+        .n_enter  = 0,
+        .n_return = 0,
+        .max      = 128,
+    };
+
+    root = qemu_coroutine_create(nest);
+    qemu_coroutine_enter(root, &nd);
+
+    /* Must enter and return from max nesting level */
+    g_assert_cmpint(nd.n_enter, ==, nd.max);
+    g_assert_cmpint(nd.n_return, ==, nd.max);
+}
+
+/*
+ * Check that yield/enter transfer control correctly
+ */
+
+static void coroutine_fn yield_5_times(void *opaque)
+{
+    bool *done = opaque;
+    int i;
+
+    for (i = 0; i < 5; i++) {
+        qemu_coroutine_yield();
+    }
+    *done = true;
+}
+
+static void test_yield(void)
+{
+    Coroutine *coroutine;
+    bool done = false;
+    int i = -1; /* one extra time to return from coroutine */
+
+    coroutine = qemu_coroutine_create(yield_5_times);
+    while (!done) {
+        qemu_coroutine_enter(coroutine, &done);
+        i++;
+    }
+    g_assert_cmpint(i, ==, 5); /* coroutine must yield 5 times */
+}
+
+/*
+ * Check that creation, enter, and return work
+ */
+
+static void coroutine_fn set_and_exit(void *opaque)
+{
+    bool *done = opaque;
+
+    *done = true;
+}
+
+static void test_lifecycle(void)
+{
+    Coroutine *coroutine;
+    bool done = false;
+
+    /* Create, enter, and return from coroutine */
+    coroutine = qemu_coroutine_create(set_and_exit);
+    qemu_coroutine_enter(coroutine, &done);
+    g_assert(done); /* expect done to be true (first time) */
+
+    /* Repeat to check that no state affects this test */
+    done = false;
+    coroutine = qemu_coroutine_create(set_and_exit);
+    qemu_coroutine_enter(coroutine, &done);
+    g_assert(done); /* expect done to be true (second time) */
+}
+
+/*
+ * Lifecycle benchmark
+ */
+
+static void coroutine_fn empty_coroutine(void *opaque)
+{
+    /* Do nothing */
+}
+
+static void perf_lifecycle(void)
+{
+    Coroutine *coroutine;
+    unsigned int i, max;
+    double duration;
+
+    max = 1000000;
+
+    g_test_timer_start();
+    for (i = 0; i < max; i++) {
+        coroutine = qemu_coroutine_create(empty_coroutine);
+        qemu_coroutine_enter(coroutine, NULL);
+    }
+    duration = g_test_timer_elapsed();
+
+    g_test_message("Lifecycle %u iterations: %f s\n", max, duration);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_add_func("/basic/lifecycle", test_lifecycle);
+    g_test_add_func("/basic/yield", test_yield);
+    g_test_add_func("/basic/nesting", test_nesting);
+    g_test_add_func("/basic/self", test_self);
+    g_test_add_func("/basic/in_coroutine", test_in_coroutine);
+    if (g_test_perf()) {
+        g_test_add_func("/perf/lifecycle", perf_lifecycle);
+    }
+    return g_test_run();
+}
diff --git a/trace-events b/trace-events
index 713f042081..19d31e3541 100644
--- a/trace-events
+++ b/trace-events
@@ -66,6 +66,9 @@ disable bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
 disable bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 disable bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 disable bdrv_set_locked(void *bs, int locked) "bs %p locked %d"
+disable bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+disable bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+disable bdrv_co_io(int is_write, void *acb) "is_write %d acb %p"
 
 # hw/virtio-blk.c
 disable virtio_blk_req_complete(void *req, int status) "req %p status %d"
@@ -425,3 +428,16 @@ disable qemu_put_ram_ptr(void* addr) "%p"
 
 # hw/xen_platform.c
 disable xen_platform_log(char *s) "xen platform: %s"
+
+# qemu-coroutine.c
+disable qemu_coroutine_enter(void *from, void *to, void *opaque) "from %p to %p opaque %p"
+disable qemu_coroutine_yield(void *from, void *to) "from %p to %p"
+disable qemu_coroutine_terminate(void *co) "self %p"
+
+# qemu-coroutine-lock.c
+disable qemu_co_queue_next_bh(void) ""
+disable qemu_co_queue_next(void *next) "next %p"
+disable qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
+disable qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
+disable qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
+disable qemu_co_mutex_unlock_return(void *mutex, void *self) "mutex %p self %p"
diff --git a/usb-bsd.c b/usb-bsd.c
index 3b97eb491b..ab84d93857 100644
--- a/usb-bsd.c
+++ b/usb-bsd.c
@@ -62,7 +62,6 @@ typedef struct USBHostDevice {
 } USBHostDevice;
 
 
-#if 0
 static int ensure_ep_open(USBHostDevice *dev, int ep, int mode)
 {
     char buf[32];
@@ -110,7 +109,6 @@ static void ensure_eps_closed(USBHostDevice *dev)
         epnum++;
     }
 }
-#endif
 
 static void usb_host_handle_reset(USBDevice *dev)
 {
@@ -119,7 +117,6 @@ static void usb_host_handle_reset(USBDevice *dev)
 #endif
 }
 
-#if 0
 /* XXX:
  * -check device states against transfer requests
  *  and return appropriate response
@@ -256,9 +253,9 @@ static int usb_host_handle_data(USBDevice *dev, USBPacket *p)
     }
 
     if (p->pid == USB_TOKEN_IN)
-        ret = read(fd, p->data, p->len);
+        ret = readv(fd, p->iov.iov, p->iov.niov);
     else
-        ret = write(fd, p->data, p->len);
+        ret = writev(fd, p->iov.iov, p->iov.niov);
 
     sigprocmask(SIG_SETMASK, &old_mask, NULL);
 
@@ -278,7 +275,6 @@ static int usb_host_handle_data(USBDevice *dev, USBPacket *p)
         return ret;
     }
 }
-#endif
 
 static void usb_host_handle_destroy(USBDevice *opaque)
 {
@@ -305,8 +301,8 @@ static int usb_host_initfn(USBDevice *dev)
 USBDevice *usb_host_device_open(const char *devname)
 {
     struct usb_device_info bus_info, dev_info;
-    USBDevice *d = NULL;
-    USBHostDevice *dev, *ret = NULL;
+    USBDevice *d = NULL, *ret = NULL;
+    USBHostDevice *dev;
     char ctlpath[PATH_MAX + 1];
     char buspath[PATH_MAX + 1];
     int bfd, dfd, bus, address, i;
@@ -408,10 +404,8 @@ static struct USBDeviceInfo usb_host_dev_info = {
     .init           = usb_host_initfn,
     .handle_packet  = usb_generic_handle_packet,
     .handle_reset   = usb_host_handle_reset,
-#if 0
     .handle_control = usb_host_handle_control,
     .handle_data    = usb_host_handle_data,
-#endif
     .handle_destroy = usb_host_handle_destroy,
 };
 
diff --git a/usb-linux.c b/usb-linux.c
index 53cc5fc00e..5562187bd5 100644
--- a/usb-linux.c
+++ b/usb-linux.c
@@ -341,16 +341,16 @@ static void async_complete(void *opaque)
         if (p) {
             switch (aurb->urb.status) {
             case 0:
-                p->len += aurb->urb.actual_length;
+                p->result += aurb->urb.actual_length;
                 break;
 
             case -EPIPE:
                 set_halt(s, p->devep);
-                p->len = USB_RET_STALL;
+                p->result = USB_RET_STALL;
                 break;
 
             default:
-                p->len = USB_RET_NAK;
+                p->result = USB_RET_NAK;
                 break;
             }
 
@@ -604,6 +604,7 @@ static int usb_host_handle_iso_data(USBHostDevice *s, USBPacket *p, int in)
 {
     AsyncURB *aurb;
     int i, j, ret, max_packet_size, offset, len = 0;
+    uint8_t *buf;
 
     max_packet_size = get_max_packet_size(s, p->devep);
     if (max_packet_size == 0)
@@ -628,19 +629,19 @@ static int usb_host_handle_iso_data(USBHostDevice *s, USBPacket *p, int in)
                 len = urb_status_to_usb_ret(
                                         aurb[i].urb.iso_frame_desc[j].status);
             /* Check the frame fits */
-            } else if (aurb[i].urb.iso_frame_desc[j].actual_length > p->len) {
+            } else if (aurb[i].urb.iso_frame_desc[j].actual_length
+                       > p->iov.size) {
                 printf("husb: received iso data is larger then packet\n");
                 len = USB_RET_NAK;
             /* All good copy data over */
             } else {
                 len = aurb[i].urb.iso_frame_desc[j].actual_length;
-                memcpy(p->data,
-                       aurb[i].urb.buffer +
-                           j * aurb[i].urb.iso_frame_desc[0].length,
-                       len);
+                buf  = aurb[i].urb.buffer +
+                    j * aurb[i].urb.iso_frame_desc[0].length;
+                usb_packet_copy(p, buf, len);
             }
         } else {
-            len = p->len;
+            len = p->iov.size;
             offset = (j == 0) ? 0 : get_iso_buffer_used(s, p->devep);
 
             /* Check the frame fits */
@@ -650,7 +651,7 @@ static int usb_host_handle_iso_data(USBHostDevice *s, USBPacket *p, int in)
             }
 
             /* All good copy data over */
-            memcpy(aurb[i].urb.buffer + offset, p->data, len);
+            usb_packet_copy(p, aurb[i].urb.buffer + offset, len);
             aurb[i].urb.iso_frame_desc[j].length = len;
             offset += len;
             set_iso_buffer_used(s, p->devep, offset);
@@ -706,7 +707,7 @@ static int usb_host_handle_data(USBDevice *dev, USBPacket *p)
     USBHostDevice *s = DO_UPCAST(USBHostDevice, dev, dev);
     struct usbdevfs_urb *urb;
     AsyncURB *aurb;
-    int ret, rem;
+    int ret, rem, prem, v;
     uint8_t *pbuf;
     uint8_t ep;
 
@@ -734,10 +735,18 @@ static int usb_host_handle_data(USBDevice *dev, USBPacket *p)
         return usb_host_handle_iso_data(s, p, p->pid == USB_TOKEN_IN);
     }
 
-    rem = p->len;
-    pbuf = p->data;
-    p->len = 0;
+    v = 0;
+    prem = p->iov.iov[v].iov_len;
+    pbuf = p->iov.iov[v].iov_base;
+    rem = p->iov.size;
     while (rem) {
+        if (prem == 0) {
+            v++;
+            assert(v < p->iov.niov);
+            prem = p->iov.iov[v].iov_len;
+            pbuf = p->iov.iov[v].iov_base;
+            assert(prem <= rem);
+        }
         aurb = async_alloc(s);
         aurb->packet = p;
 
@@ -746,16 +755,17 @@ static int usb_host_handle_data(USBDevice *dev, USBPacket *p)
         urb->type          = USBDEVFS_URB_TYPE_BULK;
         urb->usercontext   = s;
         urb->buffer        = pbuf;
+        urb->buffer_length = prem;
 
-        if (rem > MAX_USBFS_BUFFER_SIZE) {
+        if (urb->buffer_length > MAX_USBFS_BUFFER_SIZE) {
             urb->buffer_length = MAX_USBFS_BUFFER_SIZE;
-            aurb->more         = 1;
-        } else {
-            urb->buffer_length = rem;
-            aurb->more         = 0;
         }
         pbuf += urb->buffer_length;
+        prem -= urb->buffer_length;
         rem  -= urb->buffer_length;
+        if (rem) {
+            aurb->more         = 1;
+        }
 
         ret = ioctl(s->fd, USBDEVFS_SUBMITURB, urb);
 
diff --git a/usb-redir.c b/usb-redir.c
index e2129931a0..9e5fce21ea 100644
--- a/usb-redir.c
+++ b/usb-redir.c
@@ -365,12 +365,12 @@ static int usbredir_handle_iso_data(USBRedirDevice *dev, USBPacket *p,
         }
 
         len = isop->len;
-        if (len > p->len) {
+        if (len > p->iov.size) {
             ERROR("received iso data is larger then packet ep %02X\n", ep);
             bufp_free(dev, isop, ep);
             return USB_RET_NAK;
         }
-        memcpy(p->data, isop->data, len);
+        usb_packet_copy(p, isop->data, len);
         bufp_free(dev, isop, ep);
         return len;
     } else {
@@ -379,18 +379,20 @@ static int usbredir_handle_iso_data(USBRedirDevice *dev, USBPacket *p,
         if (dev->endpoint[EP2I(ep)].iso_started) {
             struct usb_redir_iso_packet_header iso_packet = {
                 .endpoint = ep,
-                .length = p->len
+                .length = p->iov.size
             };
+            uint8_t buf[p->iov.size];
             /* No id, we look at the ep when receiving a status back */
+            usb_packet_copy(p, buf, p->iov.size);
             usbredirparser_send_iso_packet(dev->parser, 0, &iso_packet,
-                                           p->data, p->len);
+                                           buf, p->iov.size);
             usbredirparser_do_write(dev->parser);
         }
         status = dev->endpoint[EP2I(ep)].iso_error;
         dev->endpoint[EP2I(ep)].iso_error = 0;
-        DPRINTF2("iso-token-out ep %02X status %d len %d\n", ep, status,
-                 p->len);
-        return usbredir_handle_status(dev, status, p->len);
+        DPRINTF2("iso-token-out ep %02X status %d len %zd\n", ep, status,
+                 p->iov.size);
+        return usbredir_handle_status(dev, status, p->iov.size);
     }
 }
 
@@ -413,10 +415,11 @@ static int usbredir_handle_bulk_data(USBRedirDevice *dev, USBPacket *p,
     AsyncURB *aurb = async_alloc(dev, p);
     struct usb_redir_bulk_packet_header bulk_packet;
 
-    DPRINTF("bulk-out ep %02X len %d id %u\n", ep, p->len, aurb->packet_id);
+    DPRINTF("bulk-out ep %02X len %zd id %u\n", ep,
+            p->iov.size, aurb->packet_id);
 
     bulk_packet.endpoint  = ep;
-    bulk_packet.length    = p->len;
+    bulk_packet.length    = p->iov.size;
     bulk_packet.stream_id = 0;
     aurb->bulk_packet = bulk_packet;
 
@@ -424,9 +427,11 @@ static int usbredir_handle_bulk_data(USBRedirDevice *dev, USBPacket *p,
         usbredirparser_send_bulk_packet(dev->parser, aurb->packet_id,
                                         &bulk_packet, NULL, 0);
     } else {
-        usbredir_log_data(dev, "bulk data out:", p->data, p->len);
+        uint8_t buf[p->iov.size];
+        usb_packet_copy(p, buf, p->iov.size);
+        usbredir_log_data(dev, "bulk data out:", buf, p->iov.size);
         usbredirparser_send_bulk_packet(dev->parser, aurb->packet_id,
-                                        &bulk_packet, p->data, p->len);
+                                        &bulk_packet, buf, p->iov.size);
     }
     usbredirparser_do_write(dev->parser);
     return USB_RET_ASYNC;
@@ -471,29 +476,31 @@ static int usbredir_handle_interrupt_data(USBRedirDevice *dev,
         }
 
         len = intp->len;
-        if (len > p->len) {
+        if (len > p->iov.size) {
             ERROR("received int data is larger then packet ep %02X\n", ep);
             bufp_free(dev, intp, ep);
             return USB_RET_NAK;
         }
-        memcpy(p->data, intp->data, len);
+        usb_packet_copy(p, intp->data, len);
         bufp_free(dev, intp, ep);
         return len;
     } else {
         /* Output interrupt endpoint, normal async operation */
         AsyncURB *aurb = async_alloc(dev, p);
         struct usb_redir_interrupt_packet_header interrupt_packet;
+        uint8_t buf[p->iov.size];
 
-        DPRINTF("interrupt-out ep %02X len %d id %u\n", ep, p->len,
+        DPRINTF("interrupt-out ep %02X len %zd id %u\n", ep, p->iov.size,
                 aurb->packet_id);
 
         interrupt_packet.endpoint  = ep;
-        interrupt_packet.length    = p->len;
+        interrupt_packet.length    = p->iov.size;
         aurb->interrupt_packet     = interrupt_packet;
 
-        usbredir_log_data(dev, "interrupt data out:", p->data, p->len);
+        usb_packet_copy(p, buf, p->iov.size);
+        usbredir_log_data(dev, "interrupt data out:", buf, p->iov.size);
         usbredirparser_send_interrupt_packet(dev->parser, aurb->packet_id,
-                                        &interrupt_packet, p->data, p->len);
+                                        &interrupt_packet, buf, p->iov.size);
         usbredirparser_do_write(dev->parser);
         return USB_RET_ASYNC;
     }
@@ -959,7 +966,7 @@ static void usbredir_configuration_status(void *priv, uint32_t id,
             dev->dev.data_buf[0] = config_status->configuration;
             len = 1;
         }
-        aurb->packet->len =
+        aurb->packet->result =
             usbredir_handle_status(dev, config_status->status, len);
         usb_generic_async_ctrl_complete(&dev->dev, aurb->packet);
     }
@@ -987,7 +994,7 @@ static void usbredir_alt_setting_status(void *priv, uint32_t id,
             dev->dev.data_buf[0] = alt_setting_status->alt;
             len = 1;
         }
-        aurb->packet->len =
+        aurb->packet->result =
             usbredir_handle_status(dev, alt_setting_status->status, len);
         usb_generic_async_ctrl_complete(&dev->dev, aurb->packet);
     }
@@ -1070,7 +1077,7 @@ static void usbredir_control_packet(void *priv, uint32_t id,
                 len = USB_RET_STALL;
             }
         }
-        aurb->packet->len = len;
+        aurb->packet->result = len;
         usb_generic_async_ctrl_complete(&dev->dev, aurb->packet);
     }
     async_free(dev, aurb);
@@ -1105,15 +1112,15 @@ static void usbredir_bulk_packet(void *priv, uint32_t id,
         len = usbredir_handle_status(dev, bulk_packet->status, len);
         if (len > 0) {
             usbredir_log_data(dev, "bulk data in:", data, data_len);
-            if (data_len <= aurb->packet->len) {
-                memcpy(aurb->packet->data, data, data_len);
+            if (data_len <= aurb->packet->iov.size) {
+                usb_packet_copy(aurb->packet, data, data_len);
             } else {
-                ERROR("bulk buffer too small (%d > %d)\n", data_len,
-                      aurb->packet->len);
+                ERROR("bulk buffer too small (%d > %zd)\n", data_len,
+                      aurb->packet->iov.size);
                 len = USB_RET_STALL;
             }
         }
-        aurb->packet->len = len;
+        aurb->packet->result = len;
         usb_packet_complete(&dev->dev, aurb->packet);
     }
     async_free(dev, aurb);
@@ -1185,7 +1192,7 @@ static void usbredir_interrupt_packet(void *priv, uint32_t id,
         }
 
         if (aurb->packet) {
-            aurb->packet->len = usbredir_handle_status(dev,
+            aurb->packet->result = usbredir_handle_status(dev,
                                                interrupt_packet->status, len);
             usb_packet_complete(&dev->dev, aurb->packet);
         }