summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2012-01-27 08:58:52 -0600
committerAnthony Liguori <aliguori@us.ibm.com>2012-01-27 08:58:52 -0600
commit21fe5bc678b16d748db385fb1be95caa96b00eee (patch)
tree33342168c916f107f2c1e89c8fff8490b9e46f88
parent96bab41df61b532bb6954a38527ad8403859a6c9 (diff)
parente2f0c49ffae8d3a00272c3cbc68850cc5aafbffa (diff)
downloadfocaccia-qemu-21fe5bc678b16d748db385fb1be95caa96b00eee.tar.gz
focaccia-qemu-21fe5bc678b16d748db385fb1be95caa96b00eee.zip
Merge remote-tracking branch 'kwolf/for-anthony' into staging
* kwolf/for-anthony: (22 commits)
  scsi: Guard against buflen exceeding req->cmd.xfer in scsi_disk_emulate_command
  qcow: Use bdrv functions to replace file operation
  qcow: Return real error code in qcow_open
  block/vdi: Zero unused parts when allocating a new block (fix #919242)
  virtio-blk: add virtio_blk_handle_read trace event
  docs: describe live block operations
  block: add support for partial streaming
  add QERR_BASE_NOT_FOUND
  block: add bdrv_find_backing_image
  blockdev: make image streaming safe across hotplug
  qmp: add query-block-jobs
  qmp: add block_job_cancel command
  qmp: add block_job_set_speed command
  qmp: add block_stream command
  block: rate-limit streaming operations
  block: add image streaming block job
  block: add BlockJob interface for long-running operations
  block: make copy-on-read a per-request flag
  block: check bdrv_in_use() before blockdev operations
  coroutine: add co_sleep_ns() coroutine sleep function
  ...
-rw-r--r--Makefile.objs2
-rw-r--r--QMP/qmp-events.txt53
-rw-r--r--block.c119
-rw-r--r--block.h4
-rw-r--r--block/blkdebug.c4
-rw-r--r--block/blkverify.c4
-rw-r--r--block/qcow.c104
-rw-r--r--block/rbd.c22
-rw-r--r--block/stream.c269
-rw-r--r--block/vdi.c8
-rw-r--r--block_int.h47
-rw-r--r--blockdev.c199
-rw-r--r--docs/live-block-ops.txt58
-rw-r--r--hmp-commands.hx41
-rw-r--r--hmp.c68
-rw-r--r--hmp.h4
-rw-r--r--hw/scsi-disk.c10
-rw-r--r--hw/virtio-blk.c2
-rw-r--r--monitor.c13
-rw-r--r--monitor.h2
-rw-r--r--qapi-schema.json115
-rw-r--r--qemu-coroutine-sleep.c38
-rw-r--r--qemu-coroutine.h9
-rw-r--r--qemu-io.c48
-rw-r--r--qerror.c8
-rw-r--r--qerror.h6
-rw-r--r--qmp-commands.hx24
-rw-r--r--trace-events13
28 files changed, 1206 insertions, 88 deletions
diff --git a/Makefile.objs b/Makefile.objs
index 9ca606356f..06a147b0b0 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -13,6 +13,7 @@ oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o
 #######################################################################
 # coroutines
 coroutine-obj-y = qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
+coroutine-obj-y += qemu-coroutine-sleep.o
 ifeq ($(CONFIG_UCONTEXT_COROUTINE),y)
 coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o
 else
@@ -34,6 +35,7 @@ block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
 block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
 block-nested-$(CONFIG_LIBISCSI) += iscsi.o
diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt
index af586ec855..06cb404837 100644
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -264,3 +264,56 @@ Example:
 
 Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
 followed respectively by the RESET, SHUTDOWN, or STOP events.
+
+
+BLOCK_JOB_COMPLETED
+-------------------
+
+Emitted when a block job has completed.
+
+Data:
+
+- "type":     Job type ("stream" for image streaming, json-string)
+- "device":   Device name (json-string)
+- "len":      Maximum progress value (json-int)
+- "offset":   Current progress value (json-int)
+              On success this is equal to len.
+              On failure this is less than len.
+- "speed":    Rate limit, bytes per second (json-int)
+- "error":    Error message (json-string, optional)
+              Only present on failure.  This field contains a human-readable
+              error message.  There are no semantics other than that streaming
+              has failed and clients should not try to interpret the error
+              string.
+
+Example:
+
+{ "event": "BLOCK_JOB_COMPLETED",
+     "data": { "type": "stream", "device": "virtio-disk0",
+               "len": 10737418240, "offset": 10737418240,
+               "speed": 0 },
+     "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+
+BLOCK_JOB_CANCELLED
+-------------------
+
+Emitted when a block job has been cancelled.
+
+Data:
+
+- "type":     Job type ("stream" for image streaming, json-string)
+- "device":   Device name (json-string)
+- "len":      Maximum progress value (json-int)
+- "offset":   Current progress value (json-int)
+              On success this is equal to len.
+              On failure this is less than len.
+- "speed":    Rate limit, bytes per second (json-int)
+
+Example:
+
+{ "event": "BLOCK_JOB_CANCELLED",
+     "data": { "type": "stream", "device": "virtio-disk0",
+               "len": 10737418240, "offset": 134217728,
+               "speed": 0 },
+     "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
diff --git a/block.c b/block.c
index 3f072f6274..3621d11de0 100644
--- a/block.c
+++ b/block.c
@@ -48,6 +48,10 @@
 
 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
 
+typedef enum {
+    BDRV_REQ_COPY_ON_READ = 0x1,
+} BdrvRequestFlags;
+
 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
@@ -62,7 +66,8 @@ static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                          int64_t sector_num, int nb_sectors,
                                          QEMUIOVector *iov);
 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+    BdrvRequestFlags flags);
 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
@@ -1020,6 +1025,10 @@ int bdrv_commit(BlockDriverState *bs)
         return -EACCES;
     }
 
+    if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
+        return -EBUSY;
+    }
+
     backing_drv = bs->backing_hd->drv;
     ro = bs->backing_hd->read_only;
     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
@@ -1288,7 +1297,7 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
 
     if (!rwco->is_write) {
         rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
-                                     rwco->nb_sectors, rwco->qiov);
+                                     rwco->nb_sectors, rwco->qiov, 0);
     } else {
         rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
                                       rwco->nb_sectors, rwco->qiov);
@@ -1496,7 +1505,7 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
     return 0;
 }
 
-static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
     /* Perform I/O through a temporary buffer so that users who scribble over
@@ -1519,8 +1528,8 @@ static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
     round_to_clusters(bs, sector_num, nb_sectors,
                       &cluster_sector_num, &cluster_nb_sectors);
 
-    trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
-                                cluster_sector_num, cluster_nb_sectors);
+    trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
+                                   cluster_sector_num, cluster_nb_sectors);
 
     iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
     iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
@@ -1555,7 +1564,8 @@ err:
  * Handle a read request in coroutine context
  */
 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+    BdrvRequestFlags flags)
 {
     BlockDriver *drv = bs->drv;
     BdrvTrackedRequest req;
@@ -1574,12 +1584,19 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
     }
 
     if (bs->copy_on_read) {
+        flags |= BDRV_REQ_COPY_ON_READ;
+    }
+    if (flags & BDRV_REQ_COPY_ON_READ) {
+        bs->copy_on_read_in_flight++;
+    }
+
+    if (bs->copy_on_read_in_flight) {
         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
     }
 
     tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
 
-    if (bs->copy_on_read) {
+    if (flags & BDRV_REQ_COPY_ON_READ) {
         int pnum;
 
         ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
@@ -1588,7 +1605,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
         }
 
         if (!ret || pnum != nb_sectors) {
-            ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+            ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
             goto out;
         }
     }
@@ -1597,6 +1614,11 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
 
 out:
     tracked_request_end(&req);
+
+    if (flags & BDRV_REQ_COPY_ON_READ) {
+        bs->copy_on_read_in_flight--;
+    }
+
     return ret;
 }
 
@@ -1605,7 +1627,16 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
 {
     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
 
-    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
+    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+    trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
+
+    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
+                            BDRV_REQ_COPY_ON_READ);
 }
 
 /*
@@ -1633,7 +1664,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
         bdrv_io_limits_intercept(bs, true, nb_sectors);
     }
 
-    if (bs->copy_on_read) {
+    if (bs->copy_on_read_in_flight) {
         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
     }
 
@@ -2564,6 +2595,24 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
     return -ENOTSUP;
 }
 
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+        const char *backing_file)
+{
+    if (!bs->drv) {
+        return NULL;
+    }
+
+    if (bs->backing_hd) {
+        if (strcmp(bs->backing_file, backing_file) == 0) {
+            return bs->backing_hd;
+        } else {
+            return bdrv_find_backing_image(bs->backing_hd, backing_file);
+        }
+    }
+
+    return NULL;
+}
+
 #define NB_SUFFIXES 4
 
 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
@@ -3140,7 +3189,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
 
     if (!acb->is_write) {
         acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
-            acb->req.nb_sectors, acb->req.qiov);
+            acb->req.nb_sectors, acb->req.qiov, 0);
     } else {
         acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
             acb->req.nb_sectors, acb->req.qiov);
@@ -3827,3 +3876,51 @@ out:
 
     return ret;
 }
+
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+                       BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BlockJob *job;
+
+    if (bs->job || bdrv_in_use(bs)) {
+        return NULL;
+    }
+    bdrv_set_in_use(bs, 1);
+
+    job = g_malloc0(job_type->instance_size);
+    job->job_type      = job_type;
+    job->bs            = bs;
+    job->cb            = cb;
+    job->opaque        = opaque;
+    bs->job = job;
+    return job;
+}
+
+void block_job_complete(BlockJob *job, int ret)
+{
+    BlockDriverState *bs = job->bs;
+
+    assert(bs->job == job);
+    job->cb(job->opaque, ret);
+    bs->job = NULL;
+    g_free(job);
+    bdrv_set_in_use(bs, 0);
+}
+
+int block_job_set_speed(BlockJob *job, int64_t value)
+{
+    if (!job->job_type->set_speed) {
+        return -ENOTSUP;
+    }
+    return job->job_type->set_speed(job, value);
+}
+
+void block_job_cancel(BlockJob *job)
+{
+    job->cancelled = true;
+}
+
+bool block_job_is_cancelled(BlockJob *job)
+{
+    return job->cancelled;
+}
diff --git a/block.h b/block.h
index 3bd4398609..cae289b2fb 100644
--- a/block.h
+++ b/block.h
@@ -142,10 +142,14 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
     const void *buf, int count);
 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors, QEMUIOVector *qiov);
 int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors, int *pnum);
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+    const char *backing_file);
 int bdrv_truncate(BlockDriverState *bs, int64_t offset);
 int64_t bdrv_getlength(BlockDriverState *bs);
 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 9b885359e4..a251802ad4 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -292,10 +292,10 @@ static int blkdebug_open(BlockDriverState *bs, const char *filename, int flags)
         return -EINVAL;
     }
 
-    config = strdup(filename);
+    config = g_strdup(filename);
     config[c - filename] = '\0';
     ret = read_config(s, config);
-    free(config);
+    g_free(config);
     if (ret < 0) {
         return ret;
     }
diff --git a/block/blkverify.c b/block/blkverify.c
index 4ca8584b88..9d5f1ec5b9 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -87,10 +87,10 @@ static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
         return -EINVAL;
     }
 
-    raw = strdup(filename);
+    raw = g_strdup(filename);
     raw[c - filename] = '\0';
     ret = bdrv_file_open(&bs->file, raw, flags);
-    free(raw);
+    g_free(raw);
     if (ret < 0) {
         return ret;
     }
diff --git a/block/qcow.c b/block/qcow.c
index b16955d764..b1cfe1f696 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -95,11 +95,13 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
 static int qcow_open(BlockDriverState *bs, int flags)
 {
     BDRVQcowState *s = bs->opaque;
-    int len, i, shift;
+    int len, i, shift, ret;
     QCowHeader header;
 
-    if (bdrv_pread(bs->file, 0, &header, sizeof(header)) != sizeof(header))
+    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+    if (ret < 0) {
         goto fail;
+    }
     be32_to_cpus(&header.magic);
     be32_to_cpus(&header.version);
     be64_to_cpus(&header.backing_file_offset);
@@ -109,15 +111,31 @@ static int qcow_open(BlockDriverState *bs, int flags)
     be32_to_cpus(&header.crypt_method);
     be64_to_cpus(&header.l1_table_offset);
 
-    if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
+    if (header.magic != QCOW_MAGIC) {
+        ret = -EINVAL;
         goto fail;
-    if (header.size <= 1 || header.cluster_bits < 9)
+    }
+    if (header.version != QCOW_VERSION) {
+        char version[64];
+        snprintf(version, sizeof(version), "QCOW version %d", header.version);
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+            bs->device_name, "qcow", version);
+        ret = -ENOTSUP;
         goto fail;
-    if (header.crypt_method > QCOW_CRYPT_AES)
+    }
+
+    if (header.size <= 1 || header.cluster_bits < 9) {
+        ret = -EINVAL;
+        goto fail;
+    }
+    if (header.crypt_method > QCOW_CRYPT_AES) {
+        ret = -EINVAL;
         goto fail;
+    }
     s->crypt_method_header = header.crypt_method;
-    if (s->crypt_method_header)
+    if (s->crypt_method_header) {
         bs->encrypted = 1;
+    }
     s->cluster_bits = header.cluster_bits;
     s->cluster_size = 1 << s->cluster_bits;
     s->cluster_sectors = 1 << (s->cluster_bits - 9);
@@ -132,33 +150,33 @@ static int qcow_open(BlockDriverState *bs, int flags)
 
     s->l1_table_offset = header.l1_table_offset;
     s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
-    if (!s->l1_table)
-        goto fail;
-    if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
-        s->l1_size * sizeof(uint64_t))
+
+    ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+               s->l1_size * sizeof(uint64_t));
+    if (ret < 0) {
         goto fail;
+    }
+
     for(i = 0;i < s->l1_size; i++) {
         be64_to_cpus(&s->l1_table[i]);
     }
     /* alloc L2 cache */
     s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    if (!s->l2_cache)
-        goto fail;
     s->cluster_cache = g_malloc(s->cluster_size);
-    if (!s->cluster_cache)
-        goto fail;
     s->cluster_data = g_malloc(s->cluster_size);
-    if (!s->cluster_data)
-        goto fail;
     s->cluster_cache_offset = -1;
 
     /* read the backing file name */
     if (header.backing_file_offset != 0) {
         len = header.backing_file_size;
-        if (len > 1023)
+        if (len > 1023) {
             len = 1023;
-        if (bdrv_pread(bs->file, header.backing_file_offset, bs->backing_file, len) != len)
+        }
+        ret = bdrv_pread(bs->file, header.backing_file_offset,
+                   bs->backing_file, len);
+        if (ret < 0) {
             goto fail;
+        }
         bs->backing_file[len] = '\0';
     }
 
@@ -176,7 +194,7 @@ static int qcow_open(BlockDriverState *bs, int flags)
     g_free(s->l2_cache);
     g_free(s->cluster_cache);
     g_free(s->cluster_data);
-    return -1;
+    return ret;
 }
 
 static int qcow_set_key(BlockDriverState *bs, const char *key)
@@ -626,13 +644,14 @@ static void qcow_close(BlockDriverState *bs)
 
 static int qcow_create(const char *filename, QEMUOptionParameter *options)
 {
-    int fd, header_size, backing_filename_len, l1_size, i, shift;
+    int header_size, backing_filename_len, l1_size, shift, i;
     QCowHeader header;
-    uint64_t tmp;
+    uint8_t *tmp;
     int64_t total_size = 0;
     const char *backing_file = NULL;
     int flags = 0;
     int ret;
+    BlockDriverState *qcow_bs;
 
     /* Read out options */
     while (options && options->name) {
@@ -646,9 +665,21 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
         options++;
     }
 
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
-    if (fd < 0)
-        return -errno;
+    ret = bdrv_create_file(filename, options);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = bdrv_file_open(&qcow_bs, filename, BDRV_O_RDWR);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = bdrv_truncate(qcow_bs, 0);
+    if (ret < 0) {
+        goto exit;
+    }
+
     memset(&header, 0, sizeof(header));
     header.magic = cpu_to_be32(QCOW_MAGIC);
     header.version = cpu_to_be32(QCOW_VERSION);
@@ -684,33 +715,34 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
     }
 
     /* write all the data */
-    ret = qemu_write_full(fd, &header, sizeof(header));
+    ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
     if (ret != sizeof(header)) {
-        ret = -errno;
         goto exit;
     }
 
     if (backing_file) {
-        ret = qemu_write_full(fd, backing_file, backing_filename_len);
+        ret = bdrv_pwrite(qcow_bs, sizeof(header),
+            backing_file, backing_filename_len);
         if (ret != backing_filename_len) {
-            ret = -errno;
             goto exit;
         }
-
     }
-    lseek(fd, header_size, SEEK_SET);
-    tmp = 0;
-    for(i = 0;i < l1_size; i++) {
-        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
-        if (ret != sizeof(tmp)) {
-            ret = -errno;
+
+    tmp = g_malloc0(BDRV_SECTOR_SIZE);
+    for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
+        BDRV_SECTOR_SIZE); i++) {
+        ret = bdrv_pwrite(qcow_bs, header_size +
+            BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
+        if (ret != BDRV_SECTOR_SIZE) {
+            g_free(tmp);
             goto exit;
         }
     }
 
+    g_free(tmp);
     ret = 0;
 exit:
-    close(fd);
+    bdrv_delete(qcow_bs);
     return ret;
 }
 
diff --git a/block/rbd.c b/block/rbd.c
index db5abf240b..46a8579018 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -789,6 +789,26 @@ static int qemu_rbd_snap_create(BlockDriverState *bs,
     return 0;
 }
 
+static int qemu_rbd_snap_remove(BlockDriverState *bs,
+                                const char *snapshot_name)
+{
+    BDRVRBDState *s = bs->opaque;
+    int r;
+
+    r = rbd_snap_remove(s->image, snapshot_name);
+    return r;
+}
+
+static int qemu_rbd_snap_rollback(BlockDriverState *bs,
+                                  const char *snapshot_name)
+{
+    BDRVRBDState *s = bs->opaque;
+    int r;
+
+    r = rbd_snap_rollback(s->image, snapshot_name);
+    return r;
+}
+
 static int qemu_rbd_snap_list(BlockDriverState *bs,
                               QEMUSnapshotInfo **psn_tab)
 {
@@ -862,7 +882,9 @@ static BlockDriver bdrv_rbd = {
     .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
 
     .bdrv_snapshot_create   = qemu_rbd_snap_create,
+    .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
     .bdrv_snapshot_list     = qemu_rbd_snap_list,
+    .bdrv_snapshot_goto     = qemu_rbd_snap_rollback,
 };
 
 static void bdrv_rbd_init(void)
diff --git a/block/stream.c b/block/stream.c
new file mode 100644
index 0000000000..d1b3986a8a
--- /dev/null
+++ b/block/stream.c
@@ -0,0 +1,269 @@
+/*
+ * Image streaming
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "block_int.h"
+
+enum {
+    /*
+     * Size of data buffer for populating the image file.  This should be large
+     * enough to process multiple clusters in a single call, so that populating
+     * contiguous regions of the image is efficient.
+     */
+    STREAM_BUFFER_SIZE = 512 * 1024, /* in bytes */
+};
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct {
+    int64_t next_slice_time;
+    uint64_t slice_quota;
+    uint64_t dispatched;
+} RateLimit;
+
+static int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
+{
+    int64_t delay_ns = 0;
+    int64_t now = qemu_get_clock_ns(rt_clock);
+
+    if (limit->next_slice_time < now) {
+        limit->next_slice_time = now + SLICE_TIME;
+        limit->dispatched = 0;
+    }
+    if (limit->dispatched + n > limit->slice_quota) {
+        delay_ns = limit->next_slice_time - now;
+    } else {
+        limit->dispatched += n;
+    }
+    return delay_ns;
+}
+
+static void ratelimit_set_speed(RateLimit *limit, uint64_t speed)
+{
+    limit->slice_quota = speed / (1000000000ULL / SLICE_TIME);
+}
+
+typedef struct StreamBlockJob {
+    BlockJob common;
+    RateLimit limit;
+    BlockDriverState *base;
+    char backing_file_id[1024];
+} StreamBlockJob;
+
+static int coroutine_fn stream_populate(BlockDriverState *bs,
+                                        int64_t sector_num, int nb_sectors,
+                                        void *buf)
+{
+    struct iovec iov = {
+        .iov_base = buf,
+        .iov_len  = nb_sectors * BDRV_SECTOR_SIZE,
+    };
+    QEMUIOVector qiov;
+
+    qemu_iovec_init_external(&qiov, &iov, 1);
+
+    /* Copy-on-read the unallocated clusters */
+    return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
+}
+
+/*
+ * Given an image chain: [BASE] -> [INTER1] -> [INTER2] -> [TOP]
+ *
+ * Return true if the given sector is allocated in top.
+ * Return false if the given sector is allocated in intermediate images.
+ * Return true otherwise.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ *  the specified sector) that are known to be in the same
+ *  allocated/unallocated state.
+ *
+ */
+static int coroutine_fn is_allocated_base(BlockDriverState *top,
+                                          BlockDriverState *base,
+                                          int64_t sector_num,
+                                          int nb_sectors, int *pnum)
+{
+    BlockDriverState *intermediate;
+    int ret, n;
+
+    ret = bdrv_co_is_allocated(top, sector_num, nb_sectors, &n);
+    if (ret) {
+        *pnum = n;
+        return ret;
+    }
+
+    /*
+     * Is the unallocated chunk [sector_num, n] also
+     * unallocated between base and top?
+     */
+    intermediate = top->backing_hd;
+
+    while (intermediate) {
+        int pnum_inter;
+
+        /* reached base */
+        if (intermediate == base) {
+            *pnum = n;
+            return 1;
+        }
+        ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
+                                   &pnum_inter);
+        if (ret < 0) {
+            return ret;
+        } else if (ret) {
+            *pnum = pnum_inter;
+            return 0;
+        }
+
+        /*
+         * [sector_num, nb_sectors] is unallocated on top but intermediate
+         * might have
+         *
+         * [sector_num+x, nr_sectors] allocated.
+         */
+        if (n > pnum_inter) {
+            n = pnum_inter;
+        }
+
+        intermediate = intermediate->backing_hd;
+    }
+
+    return 1;
+}
+
+static void coroutine_fn stream_run(void *opaque)
+{
+    StreamBlockJob *s = opaque;
+    BlockDriverState *bs = s->common.bs;
+    BlockDriverState *base = s->base;
+    int64_t sector_num, end;
+    int ret = 0;
+    int n;
+    void *buf;
+
+    s->common.len = bdrv_getlength(bs);
+    if (s->common.len < 0) {
+        block_job_complete(&s->common, s->common.len);
+        return;
+    }
+
+    end = s->common.len >> BDRV_SECTOR_BITS;
+    buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE);
+
+    /* Turn on copy-on-read for the whole block device so that guest read
+     * requests help us make progress.  Only do this when copying the entire
+     * backing chain since the copy-on-read operation does not take base into
+     * account.
+     */
+    if (!base) {
+        bdrv_enable_copy_on_read(bs);
+    }
+
+    for (sector_num = 0; sector_num < end; sector_num += n) {
+retry:
+        if (block_job_is_cancelled(&s->common)) {
+            break;
+        }
+
+
+        if (base) {
+            ret = is_allocated_base(bs, base, sector_num,
+                                    STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
+        } else {
+            ret = bdrv_co_is_allocated(bs, sector_num,
+                                       STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+                                       &n);
+        }
+        trace_stream_one_iteration(s, sector_num, n, ret);
+        if (ret == 0) {
+            if (s->common.speed) {
+                uint64_t delay_ns = ratelimit_calculate_delay(&s->limit, n);
+                if (delay_ns > 0) {
+                    co_sleep_ns(rt_clock, delay_ns);
+
+                    /* Recheck cancellation and that sectors are unallocated */
+                    goto retry;
+                }
+            }
+            ret = stream_populate(bs, sector_num, n, buf);
+        }
+        if (ret < 0) {
+            break;
+        }
+        ret = 0;
+
+        /* Publish progress */
+        s->common.offset += n * BDRV_SECTOR_SIZE;
+
+        /* Note that even when no rate limit is applied we need to yield
+         * with no pending I/O here so that qemu_aio_flush() returns.
+         */
+        co_sleep_ns(rt_clock, 0);
+    }
+
+    if (!base) {
+        bdrv_disable_copy_on_read(bs);
+    }
+
+    if (sector_num == end && ret == 0) {
+        const char *base_id = NULL;
+        if (base) {
+            base_id = s->backing_file_id;
+        }
+        ret = bdrv_change_backing_file(bs, base_id, NULL);
+    }
+
+    qemu_vfree(buf);
+    block_job_complete(&s->common, ret);
+}
+
+static int stream_set_speed(BlockJob *job, int64_t value)
+{
+    StreamBlockJob *s = container_of(job, StreamBlockJob, common);
+
+    if (value < 0) {
+        return -EINVAL;
+    }
+    job->speed = value;
+    ratelimit_set_speed(&s->limit, value / BDRV_SECTOR_SIZE);
+    return 0;
+}
+
+static BlockJobType stream_job_type = {
+    .instance_size = sizeof(StreamBlockJob),
+    .job_type      = "stream",
+    .set_speed     = stream_set_speed,
+};
+
+int stream_start(BlockDriverState *bs, BlockDriverState *base,
+                 const char *base_id, BlockDriverCompletionFunc *cb,
+                 void *opaque)
+{
+    StreamBlockJob *s;
+    Coroutine *co;
+
+    s = block_job_create(&stream_job_type, bs, cb, opaque);
+    if (!s) {
+        return -EBUSY; /* bs must already be in use */
+    }
+
+    s->base = base;
+    if (base_id) {
+        pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id);
+    }
+
+    co = qemu_coroutine_create(stream_run);
+    trace_stream_start(bs, base, s, co, opaque);
+    qemu_coroutine_enter(co, s);
+    return 0;
+}
diff --git a/block/vdi.c b/block/vdi.c
index 31cdfabdea..6a0011fbcc 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -1,7 +1,7 @@
 /*
  * Block driver for the Virtual Disk Image (VDI) format
  *
- * Copyright (c) 2009 Stefan Weil
+ * Copyright (c) 2009, 2012 Stefan Weil
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -756,15 +756,19 @@ static void vdi_aio_write_cb(void *opaque, int ret)
                  (uint64_t)bmap_entry * s->block_sectors;
         block = acb->block_buffer;
         if (block == NULL) {
-            block = g_malloc0(s->block_size);
+            block = g_malloc(s->block_size);
             acb->block_buffer = block;
             acb->bmap_first = block_index;
             assert(!acb->header_modified);
             acb->header_modified = 1;
         }
         acb->bmap_last = block_index;
+        /* Copy data to be written to new block and zero unused parts. */
+        memset(block, 0, sector_in_block * SECTOR_SIZE);
         memcpy(block + sector_in_block * SECTOR_SIZE,
                acb->buf, n_sectors * SECTOR_SIZE);
+        memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
+               (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
         acb->hd_iov.iov_base = (void *)block;
         acb->hd_iov.iov_len = s->block_size;
         qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
diff --git a/block_int.h b/block_int.h
index 311bd2a6fa..7be2988ca7 100644
--- a/block_int.h
+++ b/block_int.h
@@ -69,6 +69,36 @@ typedef struct BlockIOBaseValue {
     uint64_t ios[2];
 } BlockIOBaseValue;
 
+typedef void BlockJobCancelFunc(void *opaque);
+typedef struct BlockJob BlockJob;
+typedef struct BlockJobType {
+    /** Derived BlockJob struct size */
+    size_t instance_size;
+
+    /** String describing the operation, part of query-block-jobs QMP API */
+    const char *job_type;
+
+    /** Optional callback for job types that support setting a speed limit */
+    int (*set_speed)(BlockJob *job, int64_t value);
+} BlockJobType;
+
+/**
+ * Long-running operation on a BlockDriverState
+ */
+struct BlockJob {
+    const BlockJobType *job_type;
+    BlockDriverState *bs;
+    bool cancelled;
+
+    /* These fields are published by the query-block-jobs QMP API */
+    int64_t offset;
+    int64_t len;
+    int64_t speed;
+
+    BlockDriverCompletionFunc *cb;
+    void *opaque;
+};
+
 struct BlockDriver {
     const char *format_name;
     int instance_size;
@@ -218,6 +248,9 @@ struct BlockDriverState {
     BlockDriverState *backing_hd;
     BlockDriverState *file;
 
+    /* number of in-flight copy-on-read requests */
+    unsigned int copy_on_read_in_flight;
+
     /* async read/write emulation */
 
     void *sync_aiocb;
@@ -261,6 +294,9 @@ struct BlockDriverState {
     void *private;
 
     QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+
+    /* long-running background operation */
+    BlockJob *job;
 };
 
 struct BlockDriverAIOCB {
@@ -284,4 +320,15 @@ void bdrv_set_io_limits(BlockDriverState *bs,
 int is_windows_drive(const char *filename);
 #endif
 
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+                       BlockDriverCompletionFunc *cb, void *opaque);
+void block_job_complete(BlockJob *job, int ret);
+int block_job_set_speed(BlockJob *job, int64_t value);
+void block_job_cancel(BlockJob *job);
+bool block_job_is_cancelled(BlockJob *job);
+
+int stream_start(BlockDriverState *bs, BlockDriverState *base,
+                 const char *base_id, BlockDriverCompletionFunc *cb,
+                 void *opaque);
+
 #endif /* BLOCK_INT_H */
diff --git a/blockdev.c b/blockdev.c
index 1f83c888e7..7e4c548426 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -13,9 +13,11 @@
 #include "qerror.h"
 #include "qemu-option.h"
 #include "qemu-config.h"
+#include "qemu-objects.h"
 #include "sysemu.h"
 #include "block_int.h"
 #include "qmp-commands.h"
+#include "trace.h"
 
 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
 
@@ -200,6 +202,37 @@ void drive_get_ref(DriveInfo *dinfo)
     dinfo->refcount++;
 }
 
+typedef struct {
+    QEMUBH *bh;
+    DriveInfo *dinfo;
+} DrivePutRefBH;
+
+static void drive_put_ref_bh(void *opaque)
+{
+    DrivePutRefBH *s = opaque;
+
+    drive_put_ref(s->dinfo);
+    qemu_bh_delete(s->bh);
+    g_free(s);
+}
+
+/*
+ * Release a drive reference in a BH
+ *
+ * It is not possible to use drive_put_ref() from a callback function when the
+ * callers still need the drive.  In such cases we schedule a BH to release the
+ * reference.
+ */
+static void drive_put_ref_bh_schedule(DriveInfo *dinfo)
+{
+    DrivePutRefBH *s;
+
+    s = g_new(DrivePutRefBH, 1);
+    s->bh = qemu_bh_new(drive_put_ref_bh, s);
+    s->dinfo = dinfo;
+    qemu_bh_schedule(s->bh);
+}
+
 static int parse_block_error_action(const char *buf, int is_read)
 {
     if (!strcmp(buf, "ignore")) {
@@ -592,12 +625,18 @@ void do_commit(Monitor *mon, const QDict *qdict)
     if (!strcmp(device, "all")) {
         bdrv_commit_all();
     } else {
+        int ret;
+
         bs = bdrv_find(device);
         if (!bs) {
             qerror_report(QERR_DEVICE_NOT_FOUND, device);
             return;
         }
-        bdrv_commit(bs);
+        ret = bdrv_commit(bs);
+        if (ret == -EBUSY) {
+            qerror_report(QERR_DEVICE_IN_USE, device);
+            return;
+        }
     }
 }
 
@@ -616,6 +655,10 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
         return;
     }
+    if (bdrv_in_use(bs)) {
+        error_set(errp, QERR_DEVICE_IN_USE, device);
+        return;
+    }
 
     pstrcpy(old_filename, sizeof(old_filename), bs->filename);
 
@@ -667,6 +710,10 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
 
 static void eject_device(BlockDriverState *bs, int force, Error **errp)
 {
+    if (bdrv_in_use(bs)) {
+        error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
+        return;
+    }
     if (!bdrv_dev_has_removable_media(bs)) {
         error_set(errp, QERR_DEVICE_NOT_REMOVABLE, bdrv_get_device_name(bs));
         return;
@@ -883,3 +930,153 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp)
         break;
     }
 }
+
+static QObject *qobject_from_block_job(BlockJob *job)
+{
+    return qobject_from_jsonf("{ 'type': %s,"
+                              "'device': %s,"
+                              "'len': %" PRId64 ","
+                              "'offset': %" PRId64 ","
+                              "'speed': %" PRId64 " }",
+                              job->job_type->job_type,
+                              bdrv_get_device_name(job->bs),
+                              job->len,
+                              job->offset,
+                              job->speed);
+}
+
+static void block_stream_cb(void *opaque, int ret)
+{
+    BlockDriverState *bs = opaque;
+    QObject *obj;
+
+    trace_block_stream_cb(bs, bs->job, ret);
+
+    assert(bs->job);
+    obj = qobject_from_block_job(bs->job);
+    if (ret < 0) {
+        QDict *dict = qobject_to_qdict(obj);
+        qdict_put(dict, "error", qstring_from_str(strerror(-ret)));
+    }
+
+    if (block_job_is_cancelled(bs->job)) {
+        monitor_protocol_event(QEVENT_BLOCK_JOB_CANCELLED, obj);
+    } else {
+        monitor_protocol_event(QEVENT_BLOCK_JOB_COMPLETED, obj);
+    }
+    qobject_decref(obj);
+
+    drive_put_ref_bh_schedule(drive_get_by_blockdev(bs));
+}
+
+void qmp_block_stream(const char *device, bool has_base,
+                      const char *base, Error **errp)
+{
+    BlockDriverState *bs;
+    BlockDriverState *base_bs = NULL;
+    int ret;
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    if (base) {
+        base_bs = bdrv_find_backing_image(bs, base);
+        if (base_bs == NULL) {
+            error_set(errp, QERR_BASE_NOT_FOUND, base);
+            return;
+        }
+    }
+
+    ret = stream_start(bs, base_bs, base, block_stream_cb, bs);
+    if (ret < 0) {
+        switch (ret) {
+        case -EBUSY:
+            error_set(errp, QERR_DEVICE_IN_USE, device);
+            return;
+        default:
+            error_set(errp, QERR_NOT_SUPPORTED);
+            return;
+        }
+    }
+
+    /* Grab a reference so hotplug does not delete the BlockDriverState from
+     * underneath us.
+     */
+    drive_get_ref(drive_get_by_blockdev(bs));
+
+    trace_qmp_block_stream(bs, bs->job);
+}
+
+static BlockJob *find_block_job(const char *device)
+{
+    BlockDriverState *bs;
+
+    bs = bdrv_find(device);
+    if (!bs || !bs->job) {
+        return NULL;
+    }
+    return bs->job;
+}
+
+void qmp_block_job_set_speed(const char *device, int64_t value, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+        return;
+    }
+
+    if (block_job_set_speed(job, value) < 0) {
+        error_set(errp, QERR_NOT_SUPPORTED);
+    }
+}
+
+void qmp_block_job_cancel(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_cancel(job);
+    block_job_cancel(job);
+}
+
+static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
+{
+    BlockJobInfoList **prev = opaque;
+    BlockJob *job = bs->job;
+
+    if (job) {
+        BlockJobInfoList *elem;
+        BlockJobInfo *info = g_new(BlockJobInfo, 1);
+        *info = (BlockJobInfo){
+            .type   = g_strdup(job->job_type->job_type),
+            .device = g_strdup(bdrv_get_device_name(bs)),
+            .len    = job->len,
+            .offset = job->offset,
+            .speed  = job->speed,
+        };
+
+        elem = g_new0(BlockJobInfoList, 1);
+        elem->value = info;
+
+        (*prev)->next = elem;
+        *prev = elem;
+    }
+}
+
+BlockJobInfoList *qmp_query_block_jobs(Error **errp)
+{
+    /* Dummy is a fake list element for holding the head pointer */
+    BlockJobInfoList dummy = {};
+    BlockJobInfoList *prev = &dummy;
+    bdrv_iterate(do_qmp_query_block_jobs_one, &prev);
+    return dummy.next;
+}
diff --git a/docs/live-block-ops.txt b/docs/live-block-ops.txt
new file mode 100644
index 0000000000..a257087401
--- /dev/null
+++ b/docs/live-block-ops.txt
@@ -0,0 +1,58 @@
+LIVE BLOCK OPERATIONS
+=====================
+
+High level description of live block operations. Note these are not
+supported for use with the raw format at the moment.
+
+Snapshot live merge
+===================
+
+Given a snapshot chain, described in this document in the following
+format:
+
+[A] -> [B] -> [C] -> [D]
+
+Where the rightmost object ([D] in the example) described is the current
+image which the guest OS has write access to. To the left of it is its base
+image, and so on accordingly until the leftmost image, which has no
+base.
+
+The snapshot live merge operation transforms such a chain into a
+smaller one with fewer elements, such as this transformation relative
+to the first example:
+
+[A] -> [D]
+
+Currently only forward merge with target being the active image is
+supported, that is, data copy is performed in the right direction with
+destination being the rightmost image.
+
+The operation is implemented in QEMU through image streaming facilities.
+
+The basic idea is to execute 'block_stream virtio0' while the guest is
+running. Progress can be monitored using 'info block-jobs'. When the
+streaming operation completes it raises a QMP event. 'block_stream'
+copies data from the backing file(s) into the active image. When finished,
+it adjusts the backing file pointer.
+
+The 'base' parameter specifies an image which data need not be streamed from.
+This image will be used as the backing file for the active image when the
+operation is finished.
+
+In the example above, the command would be:
+
+(qemu) block_stream virtio0 A
+
+
+Live block copy
+===============
+
+To copy an in use image to another destination in the filesystem, one
+should create a live snapshot in the desired destination, then stream
+into that image. Example:
+
+(qemu) snapshot_blkdev ide0-hd0 /new-path/disk.img qcow2
+
+(qemu) block_stream ide0-hd0
+
+
diff --git a/hmp-commands.hx b/hmp-commands.hx
index e6506fc9d3..573b823347 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -69,6 +69,47 @@ but should be used with extreme caution.  Note that this command only
 resizes image files, it can not resize block devices like LVM volumes.
 ETEXI
 
+    {
+        .name       = "block_stream",
+        .args_type  = "device:B,base:s?",
+        .params     = "device [base]",
+        .help       = "copy data from a backing file into a block device",
+        .mhandler.cmd = hmp_block_stream,
+    },
+
+STEXI
+@item block_stream
+@findex block_stream
+Copy data from a backing file into a block device.
+ETEXI
+
+    {
+        .name       = "block_job_set_speed",
+        .args_type  = "device:B,value:o",
+        .params     = "device value",
+        .help       = "set maximum speed for a background block operation",
+        .mhandler.cmd = hmp_block_job_set_speed,
+    },
+
+STEXI
+@item block_job_set_stream
+@findex block_job_set_stream
+Set maximum speed for a background block operation.
+ETEXI
+
+    {
+        .name       = "block_job_cancel",
+        .args_type  = "device:B",
+        .params     = "device",
+        .help       = "stop an active block streaming operation",
+        .mhandler.cmd = hmp_block_job_cancel,
+    },
+
+STEXI
+@item block_job_cancel
+@findex block_job_cancel
+Stop an active block streaming operation.
+ETEXI
 
     {
         .name       = "eject",
diff --git a/hmp.c b/hmp.c
index 4664dbe8e4..8ff8c9434e 100644
--- a/hmp.c
+++ b/hmp.c
@@ -509,6 +509,42 @@ void hmp_info_pci(Monitor *mon)
     qapi_free_PciInfoList(info_list);
 }
 
+void hmp_info_block_jobs(Monitor *mon)
+{
+    BlockJobInfoList *list;
+    Error *err = NULL;
+
+    list = qmp_query_block_jobs(&err);
+    assert(!err);
+
+    if (!list) {
+        monitor_printf(mon, "No active jobs\n");
+        return;
+    }
+
+    while (list) {
+        if (strcmp(list->value->type, "stream") == 0) {
+            monitor_printf(mon, "Streaming device %s: Completed %" PRId64
+                           " of %" PRId64 " bytes, speed limit %" PRId64
+                           " bytes/s\n",
+                           list->value->device,
+                           list->value->offset,
+                           list->value->len,
+                           list->value->speed);
+        } else {
+            monitor_printf(mon, "Type %s, device %s: Completed %" PRId64
+                           " of %" PRId64 " bytes, speed limit %" PRId64
+                           " bytes/s\n",
+                           list->value->type,
+                           list->value->device,
+                           list->value->offset,
+                           list->value->len,
+                           list->value->speed);
+        }
+        list = list->next;
+    }
+}
+
 void hmp_quit(Monitor *mon, const QDict *qdict)
 {
     monitor_suspend(mon);
@@ -783,3 +819,35 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
                               qdict_get_int(qdict, "iops_wr"), &err);
     hmp_handle_error(mon, &err);
 }
+
+void hmp_block_stream(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+    const char *base = qdict_get_try_str(qdict, "base");
+
+    qmp_block_stream(device, base != NULL, base, &error);
+
+    hmp_handle_error(mon, &error);
+}
+
+void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+    int64_t value = qdict_get_int(qdict, "value");
+
+    qmp_block_job_set_speed(device, value, &error);
+
+    hmp_handle_error(mon, &error);
+}
+
+void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+
+    qmp_block_job_cancel(device, &error);
+
+    hmp_handle_error(mon, &error);
+}
diff --git a/hmp.h b/hmp.h
index aab0b1f508..18eecbdcb9 100644
--- a/hmp.h
+++ b/hmp.h
@@ -32,6 +32,7 @@ void hmp_info_vnc(Monitor *mon);
 void hmp_info_spice(Monitor *mon);
 void hmp_info_balloon(Monitor *mon);
 void hmp_info_pci(Monitor *mon);
+void hmp_info_block_jobs(Monitor *mon);
 void hmp_quit(Monitor *mon, const QDict *qdict);
 void hmp_stop(Monitor *mon, const QDict *qdict);
 void hmp_system_reset(Monitor *mon, const QDict *qdict);
@@ -54,5 +55,8 @@ void hmp_expire_password(Monitor *mon, const QDict *qdict);
 void hmp_eject(Monitor *mon, const QDict *qdict);
 void hmp_change(Monitor *mon, const QDict *qdict);
 void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
+void hmp_block_stream(Monitor *mon, const QDict *qdict);
+void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
+void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 5d8bf53586..11cfe73df8 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -391,9 +391,6 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             }
 
             l = strlen(s->serial);
-            if (l > req->cmd.xfer) {
-                l = req->cmd.xfer;
-            }
             if (l > 20) {
                 l = 20;
             }
@@ -1002,9 +999,6 @@ static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
         outbuf[0] = ((buflen - 2) >> 8) & 0xff;
         outbuf[1] = (buflen - 2) & 0xff;
     }
-    if (buflen > r->req.cmd.xfer) {
-        buflen = r->req.cmd.xfer;
-    }
     return buflen;
 }
 
@@ -1038,9 +1032,6 @@ static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
     default:
         return -1;
     }
-    if (toclen > req->cmd.xfer) {
-        toclen = req->cmd.xfer;
-    }
     return toclen;
 }
 
@@ -1251,6 +1242,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
         scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
         return -1;
     }
+    buflen = MIN(buflen, req->cmd.xfer);
     return buflen;
 
 not_ready:
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 5b416c36ee..a5a439668b 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -346,6 +346,8 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
 
     bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
 
+    trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
+
     if (sector & req->dev->sector_mask) {
         virtio_blk_rw_complete(req, -EIO);
         return;
diff --git a/monitor.c b/monitor.c
index 187083c450..aadbdcbf33 100644
--- a/monitor.c
+++ b/monitor.c
@@ -479,6 +479,12 @@ void monitor_protocol_event(MonitorEvent event, QObject *data)
         case QEVENT_SPICE_DISCONNECTED:
             event_name = "SPICE_DISCONNECTED";
             break;
+        case QEVENT_BLOCK_JOB_COMPLETED:
+            event_name = "BLOCK_JOB_COMPLETED";
+            break;
+        case QEVENT_BLOCK_JOB_CANCELLED:
+            event_name = "BLOCK_JOB_CANCELLED";
+            break;
         default:
             abort();
             break;
@@ -2312,6 +2318,13 @@ static mon_cmd_t info_cmds[] = {
         .mhandler.info = hmp_info_blockstats,
     },
     {
+        .name       = "block-jobs",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show progress of ongoing block device operations",
+        .mhandler.info = hmp_info_block_jobs,
+    },
+    {
         .name       = "registers",
         .args_type  = "",
         .params     = "",
diff --git a/monitor.h b/monitor.h
index 887c472a92..b72ea07050 100644
--- a/monitor.h
+++ b/monitor.h
@@ -36,6 +36,8 @@ typedef enum MonitorEvent {
     QEVENT_SPICE_CONNECTED,
     QEVENT_SPICE_INITIALIZED,
     QEVENT_SPICE_DISCONNECTED,
+    QEVENT_BLOCK_JOB_COMPLETED,
+    QEVENT_BLOCK_JOB_CANCELLED,
     QEVENT_MAX,
 } MonitorEvent;
 
diff --git a/qapi-schema.json b/qapi-schema.json
index 735eb352b5..80debe679a 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -845,6 +845,38 @@
 { 'command': 'query-pci', 'returns': ['PciInfo'] }
 
 ##
+# @BlockJobInfo:
+#
+# Information about a long-running block device operation.
+#
+# @type: the job type ('stream' for image streaming)
+#
+# @device: the block device name
+#
+# @len: the maximum progress value
+#
+# @offset: the current progress value
+#
+# @speed: the rate limit, bytes per second
+#
+# Since: 1.1
+##
+{ 'type': 'BlockJobInfo',
+  'data': {'type': 'str', 'device': 'str', 'len': 'int',
+           'offset': 'int', 'speed': 'int'} }
+
+##
+# @query-block-jobs:
+#
+# Return information about long-running block device operations.
+#
+# Returns: a list of @BlockJobInfo for each active block job
+#
+# Since: 1.1
+##
+{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] }
+
+##
 # @quit:
 #
 # This command will cause the QEMU process to exit gracefully.  While every
@@ -1434,3 +1466,86 @@
 { 'command': 'block_set_io_throttle',
   'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
             'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int' } }
+
+# @block_stream:
+#
+# Copy data from a backing file into a block device.
+#
+# The block streaming operation is performed in the background until the entire
+# backing file has been copied.  This command returns immediately once streaming
+# has started.  The status of ongoing block streaming operations can be checked
+# with query-block-jobs.  The operation can be stopped before it has completed
+# using the block_job_cancel command.
+#
+# If a base file is specified then sectors are not copied from that base file and
+# its backing chain.  When streaming completes the image file will have the base
+# file as its backing file.  This can be used to stream a subset of the backing
+# file chain instead of flattening the entire image.
+#
+# On successful completion the image file is updated to drop the backing file
+# and the BLOCK_JOB_COMPLETED event is emitted.
+#
+# @device: the device name
+#
+# @base:   #optional the common backing file name
+#
+# Returns: Nothing on success
+#          If streaming is already active on this device, DeviceInUse
+#          If @device does not exist, DeviceNotFound
+#          If image streaming is not supported by this device, NotSupported
+#          If @base does not exist, BaseNotFound
+#
+# Since: 1.1
+##
+{ 'command': 'block_stream', 'data': { 'device': 'str', '*base': 'str' } }
+
+##
+# @block_job_set_speed:
+#
+# Set maximum speed for a background block operation.
+#
+# This command can only be issued when there is an active block job.
+#
+# Throttling can be disabled by setting the speed to 0.
+#
+# @device: the device name
+#
+# @value:  the maximum speed, in bytes per second
+#
+# Returns: Nothing on success
+#          If the job type does not support throttling, NotSupported
+#          If streaming is not active on this device, DeviceNotActive
+#
+# Since: 1.1
+##
+{ 'command': 'block_job_set_speed',
+  'data': { 'device': 'str', 'value': 'int' } }
+
+##
+# @block_job_cancel:
+#
+# Stop an active block streaming operation.
+#
+# This command returns immediately after marking the active block streaming
+# operation for cancellation.  It is an error to call this command if no
+# operation is in progress.
+#
+# The operation will cancel as soon as possible and then emit the
+# BLOCK_JOB_CANCELLED event.  Before that happens the job is still visible when
+# enumerated using query-block-jobs.
+#
+# The image file retains its backing file unless the streaming operation happens
+# to complete just as it is being cancelled.
+#
+# A new block streaming operation can be started at a later time to finish
+# copying all data from the backing file.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If streaming is not active on this device, DeviceNotActive
+#          If cancellation already in progress, DeviceInUse
+#
+# Since: 1.1
+##
+{ 'command': 'block_job_cancel', 'data': { 'device': 'str' } }
diff --git a/qemu-coroutine-sleep.c b/qemu-coroutine-sleep.c
new file mode 100644
index 0000000000..fd65274446
--- /dev/null
+++ b/qemu-coroutine-sleep.c
@@ -0,0 +1,38 @@
+/*
+ * QEMU coroutine sleep
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu-coroutine.h"
+#include "qemu-timer.h"
+
+typedef struct CoSleepCB {
+    QEMUTimer *ts;
+    Coroutine *co;
+} CoSleepCB;
+
+static void co_sleep_cb(void *opaque)
+{
+    CoSleepCB *sleep_cb = opaque;
+
+    qemu_free_timer(sleep_cb->ts);
+    qemu_coroutine_enter(sleep_cb->co, NULL);
+}
+
+void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns)
+{
+    CoSleepCB sleep_cb = {
+        .co = qemu_coroutine_self(),
+    };
+    sleep_cb.ts = qemu_new_timer(clock, SCALE_NS, co_sleep_cb, &sleep_cb);
+    qemu_mod_timer(sleep_cb.ts, qemu_get_clock_ns(clock) + ns);
+    qemu_coroutine_yield();
+}
diff --git a/qemu-coroutine.h b/qemu-coroutine.h
index 8a55fe125e..34c15d4116 100644
--- a/qemu-coroutine.h
+++ b/qemu-coroutine.h
@@ -17,6 +17,7 @@
 
 #include <stdbool.h>
 #include "qemu-queue.h"
+#include "qemu-timer.h"
 
 /**
  * Coroutines are a mechanism for stack switching and can be used for
@@ -199,4 +200,12 @@ void qemu_co_rwlock_wrlock(CoRwlock *lock);
  */
 void qemu_co_rwlock_unlock(CoRwlock *lock);
 
+/**
+ * Yield the coroutine for a given duration
+ *
+ * Note this function uses timers and hence only works when a main loop is in
+ * use.  See main-loop.h and do not use from qemu-tool programs.
+ */
+void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns);
+
 #endif /* QEMU_COROUTINE_H */
diff --git a/qemu-io.c b/qemu-io.c
index 938b20c499..d4a5fcbcbc 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -130,7 +130,7 @@ static void print_report(const char *op, struct timeval *t, int64_t offset,
 static void *
 create_iovec(QEMUIOVector *qiov, char **argv, int nr_iov, int pattern)
 {
-    size_t *sizes = calloc(nr_iov, sizeof(size_t));
+    size_t *sizes = g_new0(size_t, nr_iov);
     size_t count = 0;
     void *buf = NULL;
     void *p;
@@ -172,7 +172,7 @@ create_iovec(QEMUIOVector *qiov, char **argv, int nr_iov, int pattern)
     }
 
 fail:
-    free(sizes);
+    g_free(sizes);
     return buf;
 }
 
@@ -471,14 +471,14 @@ static int read_f(int argc, char **argv)
     }
 
     if (Pflag) {
-        void *cmp_buf = malloc(pattern_count);
+        void *cmp_buf = g_malloc(pattern_count);
         memset(cmp_buf, pattern, pattern_count);
         if (memcmp(buf + pattern_offset, cmp_buf, pattern_count)) {
             printf("Pattern verification failed at offset %"
                    PRId64 ", %d bytes\n",
                    offset + pattern_offset, pattern_count);
         }
-        free(cmp_buf);
+        g_free(cmp_buf);
     }
 
     if (qflag) {
@@ -601,13 +601,13 @@ static int readv_f(int argc, char **argv)
     }
 
     if (Pflag) {
-        void *cmp_buf = malloc(qiov.size);
+        void *cmp_buf = g_malloc(qiov.size);
         memset(cmp_buf, pattern, qiov.size);
         if (memcmp(buf, cmp_buf, qiov.size)) {
             printf("Pattern verification failed at offset %"
                    PRId64 ", %zd bytes\n", offset, qiov.size);
         }
-        free(cmp_buf);
+        g_free(cmp_buf);
     }
 
     if (qflag) {
@@ -1063,7 +1063,7 @@ static void aio_write_done(void *opaque, int ret)
                  ctx->qiov.size, 1, ctx->Cflag);
 out:
     qemu_io_free(ctx->buf);
-    free(ctx);
+    g_free(ctx);
 }
 
 static void aio_read_done(void *opaque, int ret)
@@ -1079,14 +1079,14 @@ static void aio_read_done(void *opaque, int ret)
     }
 
     if (ctx->Pflag) {
-        void *cmp_buf = malloc(ctx->qiov.size);
+        void *cmp_buf = g_malloc(ctx->qiov.size);
 
         memset(cmp_buf, ctx->pattern, ctx->qiov.size);
         if (memcmp(ctx->buf, cmp_buf, ctx->qiov.size)) {
             printf("Pattern verification failed at offset %"
                    PRId64 ", %zd bytes\n", ctx->offset, ctx->qiov.size);
         }
-        free(cmp_buf);
+        g_free(cmp_buf);
     }
 
     if (ctx->qflag) {
@@ -1103,7 +1103,7 @@ static void aio_read_done(void *opaque, int ret)
                  ctx->qiov.size, 1, ctx->Cflag);
 out:
     qemu_io_free(ctx->buf);
-    free(ctx);
+    g_free(ctx);
 }
 
 static void aio_read_help(void)
@@ -1141,7 +1141,7 @@ static const cmdinfo_t aio_read_cmd = {
 static int aio_read_f(int argc, char **argv)
 {
     int nr_iov, c;
-    struct aio_ctx *ctx = calloc(1, sizeof(struct aio_ctx));
+    struct aio_ctx *ctx = g_new0(struct aio_ctx, 1);
 
     while ((c = getopt(argc, argv, "CP:qv")) != EOF) {
         switch (c) {
@@ -1152,7 +1152,7 @@ static int aio_read_f(int argc, char **argv)
             ctx->Pflag = 1;
             ctx->pattern = parse_pattern(optarg);
             if (ctx->pattern < 0) {
-                free(ctx);
+                g_free(ctx);
                 return 0;
             }
             break;
@@ -1163,20 +1163,20 @@ static int aio_read_f(int argc, char **argv)
             ctx->vflag = 1;
             break;
         default:
-            free(ctx);
+            g_free(ctx);
             return command_usage(&aio_read_cmd);
         }
     }
 
     if (optind > argc - 2) {
-        free(ctx);
+        g_free(ctx);
         return command_usage(&aio_read_cmd);
     }
 
     ctx->offset = cvtnum(argv[optind]);
     if (ctx->offset < 0) {
         printf("non-numeric length argument -- %s\n", argv[optind]);
-        free(ctx);
+        g_free(ctx);
         return 0;
     }
     optind++;
@@ -1184,14 +1184,14 @@ static int aio_read_f(int argc, char **argv)
     if (ctx->offset & 0x1ff) {
         printf("offset %" PRId64 " is not sector aligned\n",
                ctx->offset);
-        free(ctx);
+        g_free(ctx);
         return 0;
     }
 
     nr_iov = argc - optind;
     ctx->buf = create_iovec(&ctx->qiov, &argv[optind], nr_iov, 0xab);
     if (ctx->buf == NULL) {
-        free(ctx);
+        g_free(ctx);
         return 0;
     }
 
@@ -1237,7 +1237,7 @@ static int aio_write_f(int argc, char **argv)
 {
     int nr_iov, c;
     int pattern = 0xcd;
-    struct aio_ctx *ctx = calloc(1, sizeof(struct aio_ctx));
+    struct aio_ctx *ctx = g_new0(struct aio_ctx, 1);
 
     while ((c = getopt(argc, argv, "CqP:")) != EOF) {
         switch (c) {
@@ -1250,25 +1250,25 @@ static int aio_write_f(int argc, char **argv)
         case 'P':
             pattern = parse_pattern(optarg);
             if (pattern < 0) {
-                free(ctx);
+                g_free(ctx);
                 return 0;
             }
             break;
         default:
-            free(ctx);
+            g_free(ctx);
             return command_usage(&aio_write_cmd);
         }
     }
 
     if (optind > argc - 2) {
-        free(ctx);
+        g_free(ctx);
         return command_usage(&aio_write_cmd);
     }
 
     ctx->offset = cvtnum(argv[optind]);
     if (ctx->offset < 0) {
         printf("non-numeric length argument -- %s\n", argv[optind]);
-        free(ctx);
+        g_free(ctx);
         return 0;
     }
     optind++;
@@ -1276,14 +1276,14 @@ static int aio_write_f(int argc, char **argv)
     if (ctx->offset & 0x1ff) {
         printf("offset %" PRId64 " is not sector aligned\n",
                ctx->offset);
-        free(ctx);
+        g_free(ctx);
         return 0;
     }
 
     nr_iov = argc - optind;
     ctx->buf = create_iovec(&ctx->qiov, &argv[optind], nr_iov, pattern);
     if (ctx->buf == NULL) {
-        free(ctx);
+        g_free(ctx);
         return 0;
     }
 
diff --git a/qerror.c b/qerror.c
index 3d95383940..637eca793c 100644
--- a/qerror.c
+++ b/qerror.c
@@ -52,6 +52,10 @@ static const QErrorStringTable qerror_table[] = {
         .desc      = "Device '%(device)' can't go on a %(bad_bus_type) bus",
     },
     {
+        .error_fmt = QERR_BASE_NOT_FOUND,
+        .desc      = "Base '%(base)' not found",
+    },
+    {
         .error_fmt = QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
         .desc      = "Block format '%(format)' used by device '%(name)' does not support feature '%(feature)'",
     },
@@ -197,6 +201,10 @@ static const QErrorStringTable qerror_table[] = {
         .desc      = "No '%(bus)' bus found for device '%(device)'",
     },
     {
+        .error_fmt = QERR_NOT_SUPPORTED,
+        .desc      = "Not supported",
+    },
+    {
         .error_fmt = QERR_OPEN_FILE_FAILED,
         .desc      = "Could not open '%(filename)'",
     },
diff --git a/qerror.h b/qerror.h
index 89160dd78e..8c36ddb7e1 100644
--- a/qerror.h
+++ b/qerror.h
@@ -57,6 +57,9 @@ QError *qobject_to_qerror(const QObject *obj);
 #define QERR_BAD_BUS_FOR_DEVICE \
     "{ 'class': 'BadBusForDevice', 'data': { 'device': %s, 'bad_bus_type': %s } }"
 
+#define QERR_BASE_NOT_FOUND \
+    "{ 'class': 'BaseNotFound', 'data': { 'base': %s } }"
+
 #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
     "{ 'class': 'BlockFormatFeatureNotSupported', 'data': { 'format': %s, 'name': %s, 'feature': %s } }"
 
@@ -168,6 +171,9 @@ QError *qobject_to_qerror(const QObject *obj);
 #define QERR_NO_BUS_FOR_DEVICE \
     "{ 'class': 'NoBusForDevice', 'data': { 'device': %s, 'bus': %s } }"
 
+#define QERR_NOT_SUPPORTED \
+    "{ 'class': 'NotSupported', 'data': {} }"
+
 #define QERR_OPEN_FILE_FAILED \
     "{ 'class': 'OpenFileFailed', 'data': { 'filename': %s } }"
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 799e655988..bd6b6410ad 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -649,6 +649,24 @@ Example:
 EQMP
 
     {
+        .name       = "block_stream",
+        .args_type  = "device:B,base:s?",
+        .mhandler.cmd_new = qmp_marshal_input_block_stream,
+    },
+
+    {
+        .name       = "block_job_set_speed",
+        .args_type  = "device:B,value:o",
+        .mhandler.cmd_new = qmp_marshal_input_block_job_set_speed,
+    },
+
+    {
+        .name       = "block_job_cancel",
+        .args_type  = "device:B",
+        .mhandler.cmd_new = qmp_marshal_input_block_job_cancel,
+    },
+
+    {
         .name       = "blockdev-snapshot-sync",
         .args_type  = "device:B,snapshot-file:s,format:s?",
         .mhandler.cmd_new = qmp_marshal_input_blockdev_snapshot_sync,
@@ -1996,6 +2014,12 @@ EQMP
     },
 
     {
+        .name       = "query-block-jobs",
+        .args_type  = "",
+        .mhandler.cmd_new = qmp_marshal_input_query_block_jobs,
+    },
+
+    {
         .name       = "qom-list",
         .args_type  = "path:s",
         .mhandler.cmd_new = qmp_marshal_input_qom_list,
diff --git a/trace-events b/trace-events
index d2b0c6181d..75f6e17abe 100644
--- a/trace-events
+++ b/trace-events
@@ -65,14 +65,25 @@ bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %
 bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
 bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_io_em(void *bs, int64_t sector_num, int nb_sectors, int is_write, void *acb) "bs %p sector_num %"PRId64" nb_sectors %d is_write %d acb %p"
-bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
+bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
+
+# block/stream.c
+stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+
+# blockdev.c
+qmp_block_job_cancel(void *job) "job %p"
+block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
+qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
 # hw/virtio-blk.c
 virtio_blk_req_complete(void *req, int status) "req %p status %d"
 virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
 virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
+virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
 
 # posix-aio-compat.c
 paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"