summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--block.c60
-rw-r--r--block/Makefile.objs1
-rw-r--r--block/accounting.c54
-rw-r--r--block/archipelago.c66
-rw-r--r--block/cow.c3
-rw-r--r--block/gluster.c9
-rw-r--r--block/iscsi.c4
-rw-r--r--block/nfs.c3
-rw-r--r--block/qapi.c19
-rw-r--r--block/qcow.c7
-rw-r--r--block/qcow2.c82
-rw-r--r--block/qed.c3
-rw-r--r--block/raw-posix.c102
-rw-r--r--block/raw-win32.c6
-rw-r--r--block/rbd.c3
-rw-r--r--block/sheepdog.c3
-rw-r--r--block/ssh.c3
-rw-r--r--block/vdi.c3
-rw-r--r--block/vhdx.c3
-rw-r--r--block/vmdk.c3
-rw-r--r--block/vpc.c11
-rw-r--r--blockdev.c10
-rw-r--r--dma-helpers.c2
-rw-r--r--hw/block/dataplane/virtio-blk.c4
-rw-r--r--hw/block/nvme.c4
-rw-r--r--hw/block/virtio-blk.c15
-rw-r--r--hw/block/xen_disk.c39
-rw-r--r--hw/ide/ahci.c7
-rw-r--r--hw/ide/atapi.c18
-rw-r--r--hw/ide/core.c24
-rw-r--r--hw/ide/macio.c19
-rw-r--r--hw/ide/mmio.c4
-rw-r--r--hw/ide/piix.c1
-rw-r--r--hw/scsi/scsi-disk.c45
-rw-r--r--include/block/accounting.h57
-rw-r--r--include/block/block.h20
-rw-r--r--include/block/block_int.h6
-rw-r--r--include/block/thread-pool.h6
-rw-r--r--include/hw/virtio/virtio-blk.h1
-rw-r--r--include/sysemu/dma.h1
-rw-r--r--qapi/block-core.json30
-rw-r--r--qemu-doc.texi17
-rw-r--r--qemu-img.texi17
-rw-r--r--qemu-io.c36
-rwxr-xr-xtests/qemu-iotests/0252
-rw-r--r--tests/qemu-iotests/049.out2
-rw-r--r--tests/qemu-iotests/082.out54
-rwxr-xr-xtests/qemu-iotests/10457
-rw-r--r--tests/qemu-iotests/104.out12
-rw-r--r--tests/qemu-iotests/common.filter21
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/qemu-iotests/socket_scm_helper.c2
-rw-r--r--thread-pool.c1
53 files changed, 704 insertions, 279 deletions
diff --git a/block.c b/block.c
index d06dd51632..bcd952a4cb 100644
--- a/block.c
+++ b/block.c
@@ -3363,9 +3363,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
 
     bdrv_set_dirty(bs, sector_num, nb_sectors);
 
-    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
-        bs->wr_highest_sector = sector_num + nb_sectors - 1;
-    }
+    block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
+
     if (bs->growable && ret >= 0) {
         bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
     }
@@ -3639,6 +3638,19 @@ BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int e
     }
 }
 
+static void send_qmp_error_event(BlockDriverState *bs,
+                                 BlockErrorAction action,
+                                 bool is_read, int error)
+{
+    BlockErrorAction ac;
+
+    ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
+    qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action,
+                                   bdrv_iostatus_is_enabled(bs),
+                                   error == ENOSPC, strerror(error),
+                                   &error_abort);
+}
+
 /* This is done by device models because, while the block layer knows
  * about the error, it does not know whether an operation comes from
  * the device or the block layer (from a job, for example).
@@ -3664,16 +3676,10 @@ void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
          * also ensures that the STOP/RESUME pair of events is emitted.
          */
         qemu_system_vmstop_request_prepare();
-        qapi_event_send_block_io_error(bdrv_get_device_name(bs),
-                                       is_read ? IO_OPERATION_TYPE_READ :
-                                       IO_OPERATION_TYPE_WRITE,
-                                       action, &error_abort);
+        send_qmp_error_event(bs, action, is_read, error);
         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
     } else {
-        qapi_event_send_block_io_error(bdrv_get_device_name(bs),
-                                       is_read ? IO_OPERATION_TYPE_READ :
-                                       IO_OPERATION_TYPE_WRITE,
-                                       action, &error_abort);
+        send_qmp_error_event(bs, action, is_read, error);
     }
 }
 
@@ -5566,27 +5572,6 @@ void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
     }
 }
 
-void
-bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
-        enum BlockAcctType type)
-{
-    assert(type < BDRV_MAX_IOTYPE);
-
-    cookie->bytes = bytes;
-    cookie->start_time_ns = get_clock();
-    cookie->type = type;
-}
-
-void
-bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
-{
-    assert(cookie->type < BDRV_MAX_IOTYPE);
-
-    bs->nr_bytes[cookie->type] += cookie->bytes;
-    bs->nr_ops[cookie->type]++;
-    bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
-}
-
 void bdrv_img_create(const char *filename, const char *fmt,
                      const char *base_filename, const char *base_fmt,
                      char *options, uint64_t img_size, int flags,
@@ -6103,3 +6088,14 @@ void bdrv_refresh_filename(BlockDriverState *bs)
         QDECREF(json);
     }
 }
+
+/* This accessor function purpose is to allow the device models to access the
+ * BlockAcctStats structure embedded inside a BlockDriverState without being
+ * aware of the BlockDriverState structure layout.
+ * It will go away when the BlockAcctStats structure will be moved inside
+ * the device models.
+ */
+BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
+{
+    return &bs->stats;
+}
diff --git a/block/Makefile.objs b/block/Makefile.objs
index f45f9399aa..c9c8bbbcde 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -18,6 +18,7 @@ block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
+block-obj-y += accounting.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/accounting.c b/block/accounting.c
new file mode 100644
index 0000000000..edbb1cc89f
--- /dev/null
+++ b/block/accounting.c
@@ -0,0 +1,54 @@
+/*
+ * QEMU System Emulator block accounting
+ *
+ * Copyright (c) 2011 Christoph Hellwig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block/accounting.h"
+#include "block/block_int.h"
+
+void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
+                      int64_t bytes, enum BlockAcctType type)
+{
+    assert(type < BLOCK_MAX_IOTYPE);
+
+    cookie->bytes = bytes;
+    cookie->start_time_ns = get_clock();
+    cookie->type = type;
+}
+
+void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+    assert(cookie->type < BLOCK_MAX_IOTYPE);
+
+    stats->nr_bytes[cookie->type] += cookie->bytes;
+    stats->nr_ops[cookie->type]++;
+    stats->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
+}
+
+
+void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
+                               unsigned int nb_sectors)
+{
+    if (stats->wr_highest_sector < sector_num + nb_sectors - 1) {
+        stats->wr_highest_sector = sector_num + nb_sectors - 1;
+    }
+}
diff --git a/block/archipelago.c b/block/archipelago.c
index 22a7daaa41..93fb7c0634 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -63,8 +63,6 @@
 #include <xseg/xseg.h>
 #include <xseg/protocol.h>
 
-#define ARCHIP_FD_READ      0
-#define ARCHIP_FD_WRITE     1
 #define MAX_REQUEST_SIZE    524288
 
 #define ARCHIPELAGO_OPT_VOLUME      "volume"
@@ -84,6 +82,7 @@ typedef enum {
     ARCHIP_OP_WRITE,
     ARCHIP_OP_FLUSH,
     ARCHIP_OP_VOLINFO,
+    ARCHIP_OP_TRUNCATE,
 } ARCHIPCmd;
 
 typedef struct ArchipelagoAIOCB {
@@ -248,6 +247,7 @@ static void xseg_request_handler(void *state)
                 }
                 break;
             case ARCHIP_OP_VOLINFO:
+            case ARCHIP_OP_TRUNCATE:
                 s->is_signaled = true;
                 qemu_cond_signal(&s->archip_cond);
                 break;
@@ -708,7 +708,8 @@ static int qemu_archipelago_create(const char *filename,
 
     parse_filename_opts(filename, errp, &volname, &segment_name, &mport,
                         &vport);
-    total_size = qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0);
+    total_size = ROUND_UP(qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
 
     if (segment_name == NULL) {
         segment_name = g_strdup("archipelago");
@@ -995,6 +996,64 @@ static int64_t qemu_archipelago_getlength(BlockDriverState *bs)
     return ret;
 }
 
+static int qemu_archipelago_truncate(BlockDriverState *bs, int64_t offset)
+{
+    int ret, targetlen;
+    struct xseg_request *req;
+    BDRVArchipelagoState *s = bs->opaque;
+    AIORequestData *reqdata = g_new(AIORequestData, 1);
+
+    const char *volname = s->volname;
+    targetlen = strlen(volname);
+    req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC);
+    if (!req) {
+        archipelagolog("Cannot get XSEG request\n");
+        return err_exit2;
+    }
+
+    ret = xseg_prep_request(s->xseg, req, targetlen, 0);
+    if (ret < 0) {
+        archipelagolog("Cannot prepare XSEG request\n");
+        goto err_exit;
+    }
+    char *target = xseg_get_target(s->xseg, req);
+    if (!target) {
+        archipelagolog("Cannot get XSEG target\n");
+        goto err_exit;
+    }
+    memcpy(target, volname, targetlen);
+    req->offset = offset;
+    req->op = X_TRUNCATE;
+
+    reqdata->op = ARCHIP_OP_TRUNCATE;
+    reqdata->volname = volname;
+
+    xseg_set_req_data(s->xseg, req, reqdata);
+
+    xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
+    if (p == NoPort) {
+        archipelagolog("Cannot submit XSEG request\n");
+        goto err_exit;
+    }
+
+    xseg_signal(s->xseg, p);
+    qemu_mutex_lock(&s->archip_mutex);
+    while (!s->is_signaled) {
+        qemu_cond_wait(&s->archip_cond, &s->archip_mutex);
+    }
+    s->is_signaled = false;
+    qemu_mutex_unlock(&s->archip_mutex);
+    xseg_put_request(s->xseg, req, s->srcport);
+    g_free(reqdata);
+    return 0;
+
+err_exit:
+    xseg_put_request(s->xseg, req, s->srcport);
+err_exit2:
+    g_free(reqdata);
+    return -EIO;
+}
+
 static QemuOptsList qemu_archipelago_create_opts = {
     .name = "archipelago-create-opts",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_archipelago_create_opts.head),
@@ -1024,6 +1083,7 @@ static BlockDriver bdrv_archipelago = {
     .bdrv_close          = qemu_archipelago_close,
     .bdrv_create         = qemu_archipelago_create,
     .bdrv_getlength      = qemu_archipelago_getlength,
+    .bdrv_truncate       = qemu_archipelago_truncate,
     .bdrv_aio_readv      = qemu_archipelago_aio_readv,
     .bdrv_aio_writev     = qemu_archipelago_aio_writev,
     .bdrv_aio_flush      = qemu_archipelago_aio_flush,
diff --git a/block/cow.c b/block/cow.c
index 6ee483327f..c3769fe03b 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -335,7 +335,8 @@ static int cow_create(const char *filename, QemuOpts *opts, Error **errp)
     BlockDriverState *cow_bs = NULL;
 
     /* Read out options */
-    image_sectors = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
+    image_sectors = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                                 BDRV_SECTOR_SIZE);
     image_filename = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
 
     ret = bdrv_create_file(filename, opts, &local_err);
diff --git a/block/gluster.c b/block/gluster.c
index 1912cf9d07..1eb3a8c398 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -494,8 +494,8 @@ static int qemu_gluster_create(const char *filename,
         goto out;
     }
 
-    total_size =
-        qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
 
     tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
     if (!tmp || !strcmp(tmp, "off")) {
@@ -516,9 +516,8 @@ static int qemu_gluster_create(const char *filename,
     if (!fd) {
         ret = -errno;
     } else {
-        if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) {
-            if (prealloc && qemu_gluster_zerofill(fd, 0,
-                    total_size * BDRV_SECTOR_SIZE)) {
+        if (!glfs_ftruncate(fd, total_size)) {
+            if (prealloc && qemu_gluster_zerofill(fd, 0, total_size)) {
                 ret = -errno;
             }
         } else {
diff --git a/block/iscsi.c b/block/iscsi.c
index 3e19202488..84bcae89fa 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1531,8 +1531,8 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
     bs = bdrv_new("", &error_abort);
 
     /* Read out options */
-    total_size =
-        qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
+    total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                              BDRV_SECTOR_SIZE);
     bs->opaque = g_new0(struct IscsiLun, 1);
     iscsilun = bs->opaque;
 
diff --git a/block/nfs.c b/block/nfs.c
index 194f301501..c76e368b95 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -418,7 +418,8 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
     client->aio_context = qemu_get_aio_context();
 
     /* Read out options */
-    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
 
     ret = nfs_client_open(client, url, O_CREAT, errp);
     if (ret < 0) {
diff --git a/block/qapi.c b/block/qapi.c
index 79d1e6a9f4..9733ebd328 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -333,15 +333,16 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs)
     }
 
     s->stats = g_malloc0(sizeof(*s->stats));
-    s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
-    s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
-    s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
-    s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
-    s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
-    s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
-    s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
-    s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
-    s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
+    s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ];
+    s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
+    s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
+    s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
+    s->stats->wr_highest_offset =
+        bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
+    s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
+    s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
+    s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
+    s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
 
     if (bs->file) {
         s->has_parent = true;
diff --git a/block/qcow.c b/block/qcow.c
index 67c237fe7d..a87bd692f0 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -725,7 +725,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
     BlockDriverState *qcow_bs;
 
     /* Read out options */
-    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
     backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
     if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
         flags |= BLOCK_FLAG_ENCRYPT;
@@ -753,7 +754,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
     memset(&header, 0, sizeof(header));
     header.magic = cpu_to_be32(QCOW_MAGIC);
     header.version = cpu_to_be32(QCOW_VERSION);
-    header.size = cpu_to_be64(total_size * 512);
+    header.size = cpu_to_be64(total_size);
     header_size = sizeof(header);
     backing_filename_len = 0;
     if (backing_file) {
@@ -775,7 +776,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
     }
     header_size = (header_size + 7) & ~7;
     shift = header.cluster_bits + header.l2_bits;
-    l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
+    l1_size = (total_size + (1LL << shift) - 1) >> shift;
 
     header.l1_table_offset = cpu_to_be64(header_size);
     if (flags & BLOCK_FLAG_ENCRYPT) {
diff --git a/block/qcow2.c b/block/qcow2.c
index f9e045ff2b..0daf25cb58 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -30,6 +30,7 @@
 #include "qemu/error-report.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qbool.h"
+#include "qapi/util.h"
 #include "trace.h"
 #include "qemu/option_int.h"
 
@@ -1738,7 +1739,7 @@ static int preallocate(BlockDriverState *bs)
 
 static int qcow2_create2(const char *filename, int64_t total_size,
                          const char *backing_file, const char *backing_format,
-                         int flags, size_t cluster_size, int prealloc,
+                         int flags, size_t cluster_size, PreallocMode prealloc,
                          QemuOpts *opts, int version,
                          Error **errp)
 {
@@ -1771,6 +1772,56 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     Error *local_err = NULL;
     int ret;
 
+    if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
+        int64_t meta_size = 0;
+        uint64_t nreftablee, nrefblocke, nl1e, nl2e;
+        int64_t aligned_total_size = align_offset(total_size, cluster_size);
+
+        /* header: 1 cluster */
+        meta_size += cluster_size;
+
+        /* total size of L2 tables */
+        nl2e = aligned_total_size / cluster_size;
+        nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
+        meta_size += nl2e * sizeof(uint64_t);
+
+        /* total size of L1 tables */
+        nl1e = nl2e * sizeof(uint64_t) / cluster_size;
+        nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t));
+        meta_size += nl1e * sizeof(uint64_t);
+
+        /* total size of refcount blocks
+         *
+         * note: every host cluster is reference-counted, including metadata
+         * (even refcount blocks are recursively included).
+         * Let:
+         *   a = total_size (this is the guest disk size)
+         *   m = meta size not including refcount blocks and refcount tables
+         *   c = cluster size
+         *   y1 = number of refcount blocks entries
+         *   y2 = meta size including everything
+         * then,
+         *   y1 = (y2 + a)/c
+         *   y2 = y1 * sizeof(u16) + y1 * sizeof(u16) * sizeof(u64) / c + m
+         * we can get y1:
+         *   y1 = (a + m) / (c - sizeof(u16) - sizeof(u16) * sizeof(u64) / c)
+         */
+        nrefblocke = (aligned_total_size + meta_size + cluster_size) /
+            (cluster_size - sizeof(uint16_t) -
+             1.0 * sizeof(uint16_t) * sizeof(uint64_t) / cluster_size);
+        nrefblocke = align_offset(nrefblocke, cluster_size / sizeof(uint16_t));
+        meta_size += nrefblocke * sizeof(uint16_t);
+
+        /* total size of refcount tables */
+        nreftablee = nrefblocke * sizeof(uint16_t) / cluster_size;
+        nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t));
+        meta_size += nreftablee * sizeof(uint64_t);
+
+        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
+                            aligned_total_size + meta_size);
+        qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_lookup[prealloc]);
+    }
+
     ret = bdrv_create_file(filename, opts, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
@@ -1859,7 +1910,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     }
 
     /* Okay, now that we have a valid image, let's give it the right size */
-    ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
+    ret = bdrv_truncate(bs, total_size);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not resize image");
         goto out;
@@ -1876,7 +1927,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     }
 
     /* And if we're supposed to preallocate metadata, do that now */
-    if (prealloc) {
+    if (prealloc != PREALLOC_MODE_OFF) {
         BDRVQcowState *s = bs->opaque;
         qemu_co_mutex_lock(&s->lock);
         ret = preallocate(bs);
@@ -1912,16 +1963,17 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
     char *backing_file = NULL;
     char *backing_fmt = NULL;
     char *buf = NULL;
-    uint64_t sectors = 0;
+    uint64_t size = 0;
     int flags = 0;
     size_t cluster_size = DEFAULT_CLUSTER_SIZE;
-    int prealloc = 0;
+    PreallocMode prealloc;
     int version = 3;
     Error *local_err = NULL;
     int ret;
 
     /* Read out options */
-    sectors = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
+    size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                    BDRV_SECTOR_SIZE);
     backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
     backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
     if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
@@ -1930,12 +1982,11 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
     cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
                                          DEFAULT_CLUSTER_SIZE);
     buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    if (!buf || !strcmp(buf, "off")) {
-        prealloc = 0;
-    } else if (!strcmp(buf, "metadata")) {
-        prealloc = 1;
-    } else {
-        error_setg(errp, "Invalid preallocation mode: '%s'", buf);
+    prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
+                               PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
+                               &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
         ret = -EINVAL;
         goto finish;
     }
@@ -1957,7 +2008,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
         flags |= BLOCK_FLAG_LAZY_REFCOUNTS;
     }
 
-    if (backing_file && prealloc) {
+    if (backing_file && prealloc != PREALLOC_MODE_OFF) {
         error_setg(errp, "Backing file and preallocation cannot be used at "
                    "the same time");
         ret = -EINVAL;
@@ -1971,7 +2022,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
         goto finish;
     }
 
-    ret = qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
+    ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
                         cluster_size, prealloc, opts, version, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -2517,7 +2568,8 @@ static QemuOptsList qcow2_create_opts = {
         {
             .name = BLOCK_OPT_PREALLOC,
             .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode (allowed values: off, metadata)"
+            .help = "Preallocation mode (allowed values: off, metadata, "
+                    "falloc, full)"
         },
         {
             .name = BLOCK_OPT_LAZY_REFCOUNTS,
diff --git a/block/qed.c b/block/qed.c
index ba395af76a..f8d9e12263 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -648,7 +648,8 @@ static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp)
     char *backing_fmt = NULL;
     int ret;
 
-    image_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+    image_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
     backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
     backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
     cluster_size = qemu_opt_get_size_del(opts,
diff --git a/block/raw-posix.c b/block/raw-posix.c
index d737f3a0c5..a253697427 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -30,6 +30,7 @@
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
 #include "raw-aio.h"
+#include "qapi/util.h"
 
 #if defined(__APPLE__) && (__MACH__)
 #include <paths.h>
@@ -1365,44 +1366,92 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
     int result = 0;
     int64_t total_size = 0;
     bool nocow = false;
+    PreallocMode prealloc;
+    char *buf = NULL;
+    Error *local_err = NULL;
 
     strstart(filename, "file:", &filename);
 
     /* Read out options */
-    total_size =
-        qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
     nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
+    buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
+    prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
+                               PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
+                               &local_err);
+    g_free(buf);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        result = -EINVAL;
+        goto out;
+    }
 
     fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
                    0644);
     if (fd < 0) {
         result = -errno;
         error_setg_errno(errp, -result, "Could not create file");
-    } else {
-        if (nocow) {
+        goto out;
+    }
+
+    if (nocow) {
 #ifdef __linux__
-            /* Set NOCOW flag to solve performance issue on fs like btrfs.
-             * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value
-             * will be ignored since any failure of this operation should not
-             * block the left work.
-             */
-            int attr;
-            if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
-                attr |= FS_NOCOW_FL;
-                ioctl(fd, FS_IOC_SETFLAGS, &attr);
-            }
-#endif
+        /* Set NOCOW flag to solve performance issue on fs like btrfs.
+         * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value
+         * will be ignored since any failure of this operation should not
+         * block the left work.
+         */
+        int attr;
+        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+            attr |= FS_NOCOW_FL;
+            ioctl(fd, FS_IOC_SETFLAGS, &attr);
         }
+#endif
+    }
+
+    if (ftruncate(fd, total_size) != 0) {
+        result = -errno;
+        error_setg_errno(errp, -result, "Could not resize file");
+        goto out_close;
+    }
 
-        if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
-            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
+    if (prealloc == PREALLOC_MODE_FALLOC) {
+        /* posix_fallocate() doesn't set errno. */
+        result = -posix_fallocate(fd, 0, total_size);
+        if (result != 0) {
+            error_setg_errno(errp, -result,
+                             "Could not preallocate data for the new file");
         }
-        if (qemu_close(fd) != 0) {
-            result = -errno;
-            error_setg_errno(errp, -result, "Could not close the new file");
+    } else if (prealloc == PREALLOC_MODE_FULL) {
+        buf = g_malloc0(65536);
+        int64_t num = 0, left = total_size;
+
+        while (left > 0) {
+            num = MIN(left, 65536);
+            result = write(fd, buf, num);
+            if (result < 0) {
+                result = -errno;
+                error_setg_errno(errp, -result,
+                                 "Could not write to the new file");
+                break;
+            }
+            left -= num;
         }
+        fsync(fd);
+        g_free(buf);
+    } else if (prealloc != PREALLOC_MODE_OFF) {
+        result = -EINVAL;
+        error_setg(errp, "Unsupported preallocation mode: %s",
+                   PreallocMode_lookup[prealloc]);
     }
+
+out_close:
+    if (qemu_close(fd) != 0 && result == 0) {
+        result = -errno;
+        error_setg_errno(errp, -result, "Could not close the new file");
+    }
+out:
     return result;
 }
 
@@ -1585,6 +1634,11 @@ static QemuOptsList raw_create_opts = {
             .type = QEMU_OPT_BOOL,
             .help = "Turn off copy-on-write (valid only on btrfs)"
         },
+        {
+            .name = BLOCK_OPT_PREALLOC,
+            .type = QEMU_OPT_STRING,
+            .help = "Preallocation mode (allowed values: off, falloc, full)"
+        },
         { /* end of list */ }
     }
 };
@@ -1966,8 +2020,8 @@ static int hdev_create(const char *filename, QemuOpts *opts,
     (void)has_prefix;
 
     /* Read out options */
-    total_size =
-        qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
 
     fd = qemu_open(filename, O_WRONLY | O_BINARY);
     if (fd < 0) {
@@ -1983,7 +2037,7 @@ static int hdev_create(const char *filename, QemuOpts *opts,
         error_setg(errp,
                    "The given file is neither a block nor a character device");
         ret = -ENODEV;
-    } else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE) {
+    } else if (lseek(fd, 0, SEEK_END) < total_size) {
         error_setg(errp, "Device is too small");
         ret = -ENOSPC;
     }
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 902eab6100..9bf82252b8 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -511,8 +511,8 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
     strstart(filename, "file:", &filename);
 
     /* Read out options */
-    total_size =
-        qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
 
     fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
                    0644);
@@ -521,7 +521,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
         return -EIO;
     }
     set_sparse(fd);
-    ftruncate(fd, total_size * 512);
+    ftruncate(fd, total_size);
     qemu_close(fd);
     return 0;
 }
diff --git a/block/rbd.c b/block/rbd.c
index ea969e7beb..b7f7d5ff30 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -314,7 +314,8 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     /* Read out options */
-    bytes = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+    bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                     BDRV_SECTOR_SIZE);
     objsize = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 0);
     if (objsize) {
         if ((objsize - 1) & objsize) {    /* not a power of 2? */
diff --git a/block/sheepdog.c b/block/sheepdog.c
index f91afc3a5b..7da36e1f9a 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1702,7 +1702,8 @@ static int sd_create(const char *filename, QemuOpts *opts,
         goto out;
     }
 
-    s->inode.vdi_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+    s->inode.vdi_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                                 BDRV_SECTOR_SIZE);
     backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
     buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
     if (!buf || !strcmp(buf, "off")) {
diff --git a/block/ssh.c b/block/ssh.c
index cd2fd751fe..cf43bc0f89 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -700,7 +700,8 @@ static int ssh_create(const char *filename, QemuOpts *opts, Error **errp)
     ssh_state_init(&s);
 
     /* Get desired file size. */
-    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
     DPRINTF("total_size=%" PRIi64, total_size);
 
     uri_options = qdict_new();
diff --git a/block/vdi.c b/block/vdi.c
index 4b10aacc3b..cfa08b0b93 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -700,7 +700,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
     logout("\n");
 
     /* Read out options. */
-    bytes = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+    bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                     BDRV_SECTOR_SIZE);
 #if defined(CONFIG_VDI_BLOCK_SIZE)
     /* TODO: Additional checks (SECTOR_SIZE * 2^n, ...). */
     block_size = qemu_opt_get_size_del(opts,
diff --git a/block/vhdx.c b/block/vhdx.c
index 87c99fc260..796b7bd884 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1766,7 +1766,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
     VHDXImageType image_type;
     Error *local_err = NULL;
 
-    image_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+    image_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
     log_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_LOG_SIZE, 0);
     block_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_BLOCK_SIZE, 0);
     type = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
diff --git a/block/vmdk.c b/block/vmdk.c
index a1cb91131e..afdea1a8b6 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1807,7 +1807,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
         goto exit;
     }
     /* Read out options */
-    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
     adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
     backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
     if (qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false)) {
diff --git a/block/vpc.c b/block/vpc.c
index 055efc42d2..4947369d48 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -489,7 +489,7 @@ static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
     BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
     VHDFooter *footer = (VHDFooter *) s->footer_buf;
 
-    if (cpu_to_be32(footer->type) != VHD_FIXED) {
+    if (be32_to_cpu(footer->type) != VHD_FIXED) {
         bdi->cluster_size = s->block_size;
     }
 
@@ -506,7 +506,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
     int64_t sectors, sectors_per_block;
     VHDFooter *footer = (VHDFooter *) s->footer_buf;
 
-    if (cpu_to_be32(footer->type) == VHD_FIXED) {
+    if (be32_to_cpu(footer->type) == VHD_FIXED) {
         return bdrv_read(bs->file, sector_num, buf, nb_sectors);
     }
     while (nb_sectors > 0) {
@@ -555,7 +555,7 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
     int ret;
     VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 
-    if (cpu_to_be32(footer->type) == VHD_FIXED) {
+    if (be32_to_cpu(footer->type) == VHD_FIXED) {
         return bdrv_write(bs->file, sector_num, buf, nb_sectors);
     }
     while (nb_sectors > 0) {
@@ -757,7 +757,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
     BlockDriverState *bs = NULL;
 
     /* Read out options */
-    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
     disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
     if (disk_type_param) {
         if (!strcmp(disk_type_param, "dynamic")) {
@@ -857,7 +858,7 @@ static int vpc_has_zero_init(BlockDriverState *bs)
     BDRVVPCState *s = bs->opaque;
     VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 
-    if (cpu_to_be32(footer->type) == VHD_FIXED) {
+    if (be32_to_cpu(footer->type) == VHD_FIXED) {
         return bdrv_has_zero_init(bs->file);
     } else {
         return 1;
diff --git a/blockdev.c b/blockdev.c
index e919566c16..b361fbb964 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1739,6 +1739,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
     const char *id = qdict_get_str(qdict, "id");
     BlockDriverState *bs;
+    DriveInfo *dinfo;
     AioContext *aio_context;
     Error *local_err = NULL;
 
@@ -1748,6 +1749,13 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
         return -1;
     }
 
+    dinfo = drive_get_by_blockdev(bs);
+    if (dinfo && !dinfo->enable_auto_del) {
+        error_report("Deleting device added with blockdev-add"
+                     " is not supported");
+        return -1;
+    }
+
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
@@ -1775,7 +1783,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
         bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
                           BLOCKDEV_ON_ERROR_REPORT);
     } else {
-        drive_del(drive_get_by_blockdev(bs));
+        drive_del(dinfo);
     }
 
     aio_context_release(aio_context);
diff --git a/dma-helpers.c b/dma-helpers.c
index 499b52bc5a..f6811fa86b 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -277,5 +277,5 @@ uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg)
 void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
                     QEMUSGList *sg, enum BlockAcctType type)
 {
-    bdrv_acct_start(bs, cookie, sg->size, type);
+    block_acct_start(bdrv_get_stats(bs), cookie, sg->size, type);
 }
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index b55188cb82..5458f9d25f 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -164,8 +164,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
      * block jobs that can conflict.
      */
     if (bdrv_op_is_blocked(blk->conf.bs, BLOCK_OP_TYPE_DATAPLANE, &local_err)) {
-        error_report("cannot start dataplane thread: %s",
-                      error_get_pretty(local_err));
+        error_setg(errp, "cannot start dataplane thread: %s",
+                   error_get_pretty(local_err));
         error_free(local_err);
         return;
     }
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 04459e583c..b010c9b00f 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -197,7 +197,7 @@ static void nvme_rw_cb(void *opaque, int ret)
     NvmeCtrl *n = sq->ctrl;
     NvmeCQueue *cq = n->cq[sq->cqid];
 
-    bdrv_acct_done(n->conf.bs, &req->acct);
+    block_acct_done(bdrv_get_stats(n->conf.bs), &req->acct);
     if (!ret) {
         req->status = NVME_SUCCESS;
     } else {
@@ -232,7 +232,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     assert((nlb << data_shift) == req->qsg.size);
 
     dma_acct_start(n->conf.bs, &req->acct, &req->qsg, is_write ?
-        BDRV_ACCT_WRITE : BDRV_ACCT_READ);
+        BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
     req->aiocb = is_write ?
         dma_bdrv_write(n->conf.bs, &req->qsg, aio_slba, nvme_rw_cb, req) :
         dma_bdrv_read(n->conf.bs, &req->qsg, aio_slba, nvme_rw_cb, req);
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index a7f28275f4..38ad38f49f 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -74,7 +74,7 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
         s->rq = req;
     } else if (action == BLOCK_ERROR_ACTION_REPORT) {
         virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
-        bdrv_acct_done(s->bs, &req->acct);
+        block_acct_done(bdrv_get_stats(s->bs), &req->acct);
         virtio_blk_free_request(req);
     }
 
@@ -96,7 +96,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
     }
 
     virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
-    bdrv_acct_done(req->dev->bs, &req->acct);
+    block_acct_done(bdrv_get_stats(req->dev->bs), &req->acct);
     virtio_blk_free_request(req);
 }
 
@@ -111,7 +111,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret)
     }
 
     virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
-    bdrv_acct_done(req->dev->bs, &req->acct);
+    block_acct_done(bdrv_get_stats(req->dev->bs), &req->acct);
     virtio_blk_free_request(req);
 }
 
@@ -279,7 +279,8 @@ void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
 
 static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 {
-    bdrv_acct_start(req->dev->bs, &req->acct, 0, BDRV_ACCT_FLUSH);
+    block_acct_start(bdrv_get_stats(req->dev->bs), &req->acct, 0,
+                     BLOCK_ACCT_FLUSH);
 
     /*
      * Make sure all outstanding writes are posted to the backing device.
@@ -322,7 +323,8 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
         return;
     }
 
-    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
+    block_acct_start(bdrv_get_stats(req->dev->bs), &req->acct, req->qiov.size,
+                     BLOCK_ACCT_WRITE);
 
     if (mrb->num_writes == 32) {
         virtio_submit_multiwrite(req->dev->bs, mrb);
@@ -353,7 +355,8 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
         return;
     }
 
-    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
+    block_acct_start(bdrv_get_stats(req->dev->bs), &req->acct, req->qiov.size,
+                     BLOCK_ACCT_READ);
     bdrv_aio_readv(req->dev->bs, sector, &req->qiov,
                    req->qiov.size / BDRV_SECTOR_SIZE,
                    virtio_blk_rw_complete, req);
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index a221d0bfca..0d27ab16a7 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -493,7 +493,7 @@ static void qemu_aio_complete(void *opaque, int ret)
             break;
         }
     case BLKIF_OP_READ:
-        bdrv_acct_done(ioreq->blkdev->bs, &ioreq->acct);
+        block_acct_done(bdrv_get_stats(ioreq->blkdev->bs), &ioreq->acct);
         break;
     case BLKIF_OP_DISCARD:
     default:
@@ -518,7 +518,8 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
 
     switch (ioreq->req.operation) {
     case BLKIF_OP_READ:
-        bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_READ);
+        block_acct_start(bdrv_get_stats(blkdev->bs), &ioreq->acct,
+                         ioreq->v.size, BLOCK_ACCT_READ);
         ioreq->aio_inflight++;
         bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
                        &ioreq->v, ioreq->v.size / BLOCK_SIZE,
@@ -530,7 +531,8 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
             break;
         }
 
-        bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_WRITE);
+        block_acct_start(bdrv_get_stats(blkdev->bs), &ioreq->acct,
+                         ioreq->v.size, BLOCK_ACCT_WRITE);
         ioreq->aio_inflight++;
         bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
                         &ioreq->v, ioreq->v.size / BLOCK_SIZE,
@@ -852,28 +854,25 @@ static int blk_connect(struct XenDevice *xendev)
     blkdev->dinfo = drive_get(IF_XEN, 0, index);
     if (!blkdev->dinfo) {
         Error *local_err = NULL;
+        BlockDriver *drv;
+
         /* setup via xenbus -> create new block driver instance */
         xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
-        blkdev->bs = bdrv_new(blkdev->dev, &local_err);
-        if (local_err) {
-            blkdev->bs = NULL;
-        }
-        if (blkdev->bs) {
-            BlockDriver *drv = bdrv_find_whitelisted_format(blkdev->fileproto,
-                                                           readonly);
-            if (bdrv_open(&blkdev->bs, blkdev->filename, NULL, NULL, qflags,
-                          drv, &local_err) != 0)
-            {
-                xen_be_printf(&blkdev->xendev, 0, "error: %s\n",
-                              error_get_pretty(local_err));
-                error_free(local_err);
-                bdrv_unref(blkdev->bs);
-                blkdev->bs = NULL;
-            }
-        }
+        blkdev->bs = bdrv_new(blkdev->dev, NULL);
         if (!blkdev->bs) {
             return -1;
         }
+
+        drv = bdrv_find_whitelisted_format(blkdev->fileproto, readonly);
+        if (bdrv_open(&blkdev->bs, blkdev->filename, NULL, NULL, qflags,
+                      drv, &local_err) != 0) {
+            xen_be_printf(&blkdev->xendev, 0, "error: %s\n",
+                          error_get_pretty(local_err));
+            error_free(local_err);
+            bdrv_unref(blkdev->bs);
+            blkdev->bs = NULL;
+            return -1;
+        }
     } else {
         /* setup via qemu cmdline -> already setup for us */
         xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 0ee713b0ff..ba69de30e0 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -809,7 +809,8 @@ static void ncq_cb(void *opaque, int ret)
     DPRINTF(ncq_tfs->drive->port_no, "NCQ transfer tag %d finished\n",
             ncq_tfs->tag);
 
-    bdrv_acct_done(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct);
+    block_acct_done(bdrv_get_stats(ncq_tfs->drive->port.ifs[0].bs),
+                    &ncq_tfs->acct);
     qemu_sglist_destroy(&ncq_tfs->sglist);
     ncq_tfs->used = 0;
 }
@@ -860,7 +861,7 @@ static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
                     ncq_tfs->tag, ncq_tfs->lba);
 
             dma_acct_start(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct,
-                           &ncq_tfs->sglist, BDRV_ACCT_READ);
+                           &ncq_tfs->sglist, BLOCK_ACCT_READ);
             ncq_tfs->aiocb = dma_bdrv_read(ncq_tfs->drive->port.ifs[0].bs,
                                            &ncq_tfs->sglist, ncq_tfs->lba,
                                            ncq_cb, ncq_tfs);
@@ -873,7 +874,7 @@ static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
                     ncq_tfs->tag, ncq_tfs->lba);
 
             dma_acct_start(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct,
-                           &ncq_tfs->sglist, BDRV_ACCT_WRITE);
+                           &ncq_tfs->sglist, BLOCK_ACCT_WRITE);
             ncq_tfs->aiocb = dma_bdrv_write(ncq_tfs->drive->port.ifs[0].bs,
                                             &ncq_tfs->sglist, ncq_tfs->lba,
                                             ncq_cb, ncq_tfs);
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 3d92b52dbc..6d52cda4cc 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -110,14 +110,16 @@ static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
 
     switch(sector_size) {
     case 2048:
-        bdrv_acct_start(s->bs, &s->acct, 4 * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
+        block_acct_start(bdrv_get_stats(s->bs), &s->acct,
+                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
         ret = bdrv_read(s->bs, (int64_t)lba << 2, buf, 4);
-        bdrv_acct_done(s->bs, &s->acct);
+        block_acct_done(bdrv_get_stats(s->bs), &s->acct);
         break;
     case 2352:
-        bdrv_acct_start(s->bs, &s->acct, 4 * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
+        block_acct_start(bdrv_get_stats(s->bs), &s->acct,
+                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
         ret = bdrv_read(s->bs, (int64_t)lba << 2, buf + 16, 4);
-        bdrv_acct_done(s->bs, &s->acct);
+        block_acct_done(bdrv_get_stats(s->bs), &s->acct);
         if (ret < 0)
             return ret;
         cd_data_to_raw(buf, lba);
@@ -253,7 +255,8 @@ static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size)
     s->io_buffer_index = 0;
 
     if (s->atapi_dma) {
-        bdrv_acct_start(s->bs, &s->acct, size, BDRV_ACCT_READ);
+        block_acct_start(bdrv_get_stats(s->bs), &s->acct, size,
+                         BLOCK_ACCT_READ);
         s->status = READY_STAT | SEEK_STAT | DRQ_STAT;
         ide_start_dma(s, ide_atapi_cmd_read_dma_cb);
     } else {
@@ -354,7 +357,7 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
     return;
 
 eot:
-    bdrv_acct_done(s->bs, &s->acct);
+    block_acct_done(bdrv_get_stats(s->bs), &s->acct);
     ide_set_inactive(s, false);
 }
 
@@ -369,7 +372,8 @@ static void ide_atapi_cmd_read_dma(IDEState *s, int lba, int nb_sectors,
     s->io_buffer_size = 0;
     s->cd_sector_size = sector_size;
 
-    bdrv_acct_start(s->bs, &s->acct, s->packet_transfer_size, BDRV_ACCT_READ);
+    block_acct_start(bdrv_get_stats(s->bs), &s->acct, s->packet_transfer_size,
+                     BLOCK_ACCT_READ);
 
     /* XXX: check if BUSY_STAT should be set */
     s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 191f89321e..6fba056783 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -568,7 +568,7 @@ static void ide_sector_read_cb(void *opaque, int ret)
     s->pio_aiocb = NULL;
     s->status &= ~BUSY_STAT;
 
-    bdrv_acct_done(s->bs, &s->acct);
+    block_acct_done(bdrv_get_stats(s->bs), &s->acct);
     if (ret != 0) {
         if (ide_handle_rw_error(s, -ret, IDE_RETRY_PIO |
                                 IDE_RETRY_READ)) {
@@ -624,7 +624,8 @@ void ide_sector_read(IDEState *s)
     s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&s->qiov, &s->iov, 1);
 
-    bdrv_acct_start(s->bs, &s->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
+    block_acct_start(bdrv_get_stats(s->bs), &s->acct,
+                     n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
     s->pio_aiocb = bdrv_aio_readv(s->bs, sector_num, &s->qiov, n,
                                   ide_sector_read_cb, s);
 }
@@ -756,7 +757,7 @@ void ide_dma_cb(void *opaque, int ret)
 
 eot:
     if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) {
-        bdrv_acct_done(s->bs, &s->acct);
+        block_acct_done(bdrv_get_stats(s->bs), &s->acct);
     }
     ide_set_inactive(s, stay_active);
 }
@@ -770,12 +771,12 @@ static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd)
 
     switch (dma_cmd) {
     case IDE_DMA_READ:
-        bdrv_acct_start(s->bs, &s->acct, s->nsector * BDRV_SECTOR_SIZE,
-                        BDRV_ACCT_READ);
+        block_acct_start(bdrv_get_stats(s->bs), &s->acct,
+                         s->nsector * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
         break;
     case IDE_DMA_WRITE:
-        bdrv_acct_start(s->bs, &s->acct, s->nsector * BDRV_SECTOR_SIZE,
-                        BDRV_ACCT_WRITE);
+        block_acct_start(bdrv_get_stats(s->bs), &s->acct,
+                         s->nsector * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE);
         break;
     default:
         break;
@@ -802,7 +803,7 @@ static void ide_sector_write_cb(void *opaque, int ret)
     IDEState *s = opaque;
     int n;
 
-    bdrv_acct_done(s->bs, &s->acct);
+    block_acct_done(bdrv_get_stats(s->bs), &s->acct);
 
     s->pio_aiocb = NULL;
     s->status &= ~BUSY_STAT;
@@ -869,7 +870,8 @@ void ide_sector_write(IDEState *s)
     s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&s->qiov, &s->iov, 1);
 
-    bdrv_acct_start(s->bs, &s->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
+    block_acct_start(bdrv_get_stats(s->bs), &s->acct,
+                     n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
     s->pio_aiocb = bdrv_aio_writev(s->bs, sector_num, &s->qiov, n,
                                    ide_sector_write_cb, s);
 }
@@ -888,7 +890,7 @@ static void ide_flush_cb(void *opaque, int ret)
     }
 
     if (s->bs) {
-        bdrv_acct_done(s->bs, &s->acct);
+        block_acct_done(bdrv_get_stats(s->bs), &s->acct);
     }
     s->status = READY_STAT | SEEK_STAT;
     ide_cmd_done(s);
@@ -903,7 +905,7 @@ void ide_flush_cache(IDEState *s)
     }
 
     s->status |= BUSY_STAT;
-    bdrv_acct_start(s->bs, &s->acct, 0, BDRV_ACCT_FLUSH);
+    block_acct_start(bdrv_get_stats(s->bs), &s->acct, 0, BLOCK_ACCT_FLUSH);
     s->pio_aiocb = bdrv_aio_flush(s->bs, ide_flush_cb, s);
 }
 
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index b0c0d400d9..cefc85cf12 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -171,7 +171,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
 
 done:
     MACIO_DPRINTF("done DMA\n");
-    bdrv_acct_done(s->bs, &s->acct);
+    block_acct_done(bdrv_get_stats(s->bs), &s->acct);
     io->dma_end(opaque);
 }
 
@@ -352,7 +352,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
 
 done:
     if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) {
-        bdrv_acct_done(s->bs, &s->acct);
+        block_acct_done(bdrv_get_stats(s->bs), &s->acct);
     }
     io->dma_end(io);
 }
@@ -370,8 +370,8 @@ static void pmac_ide_transfer(DBDMA_io *io)
         /* Handle non-block ATAPI DMA transfers */
         if (s->lba == -1) {
             s->io_buffer_size = MIN(io->len, s->packet_transfer_size);
-            bdrv_acct_start(s->bs, &s->acct, s->io_buffer_size,
-                            BDRV_ACCT_READ);
+            block_acct_start(bdrv_get_stats(s->bs), &s->acct, s->io_buffer_size,
+                             BLOCK_ACCT_READ);
             MACIO_DPRINTF("non-block ATAPI DMA transfer size: %d\n",
                           s->io_buffer_size);
 
@@ -382,22 +382,25 @@ static void pmac_ide_transfer(DBDMA_io *io)
             m->dma_active = false;
 
             MACIO_DPRINTF("end of non-block ATAPI DMA transfer\n");
-            bdrv_acct_done(s->bs, &s->acct);
+            block_acct_done(bdrv_get_stats(s->bs), &s->acct);
             io->dma_end(io);
             return;
         }
 
-        bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_READ);
+        block_acct_start(bdrv_get_stats(s->bs), &s->acct, io->len,
+                         BLOCK_ACCT_READ);
         pmac_ide_atapi_transfer_cb(io, 0);
         return;
     }
 
     switch (s->dma_cmd) {
     case IDE_DMA_READ:
-        bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_READ);
+        block_acct_start(bdrv_get_stats(s->bs), &s->acct, io->len,
+                         BLOCK_ACCT_READ);
         break;
     case IDE_DMA_WRITE:
-        bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_WRITE);
+        block_acct_start(bdrv_get_stats(s->bs), &s->acct, io->len,
+                         BLOCK_ACCT_WRITE);
         break;
     default:
         break;
diff --git a/hw/ide/mmio.c b/hw/ide/mmio.c
index 01c1d0e6ce..334c8ccf08 100644
--- a/hw/ide/mmio.c
+++ b/hw/ide/mmio.c
@@ -82,7 +82,7 @@ static void mmio_ide_write(void *opaque, hwaddr addr,
 static const MemoryRegionOps mmio_ide_ops = {
     .read = mmio_ide_read,
     .write = mmio_ide_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static uint64_t mmio_ide_status_read(void *opaque, hwaddr addr,
@@ -102,7 +102,7 @@ static void mmio_ide_cmd_write(void *opaque, hwaddr addr,
 static const MemoryRegionOps mmio_ide_cs_ops = {
     .read = mmio_ide_status_read,
     .write = mmio_ide_cmd_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static const VMStateDescription vmstate_ide_mmio = {
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 59319eb6c1..49e78a738a 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -182,7 +182,6 @@ int pci_piix3_xen_ide_unplug(DeviceState *dev)
             if (ds) {
                 bdrv_detach_dev(di->bdrv, ds);
             }
-            bdrv_close(di->bdrv);
             pci_ide->bus[di->bus].ifs[di->unit].bs = NULL;
             drive_del(di);
         }
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index e34a54404b..9645d0194a 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -183,7 +183,7 @@ static void scsi_aio_complete(void *opaque, int ret)
 
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
-    bdrv_acct_done(s->qdev.conf.bs, &r->acct);
+    block_acct_done(bdrv_get_stats(s->qdev.conf.bs), &r->acct);
     if (r->req.io_canceled) {
         goto done;
     }
@@ -237,7 +237,8 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
     }
 
     if (scsi_is_cmd_fua(&r->req.cmd)) {
-        bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH);
+        block_acct_start(bdrv_get_stats(s->qdev.conf.bs), &r->acct, 0,
+                         BLOCK_ACCT_FLUSH);
         r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_aio_complete, r);
         return;
     }
@@ -257,7 +258,7 @@ static void scsi_dma_complete_noio(void *opaque, int ret)
 
     if (r->req.aiocb != NULL) {
         r->req.aiocb = NULL;
-        bdrv_acct_done(s->qdev.conf.bs, &r->acct);
+        block_acct_done(bdrv_get_stats(s->qdev.conf.bs), &r->acct);
     }
     if (r->req.io_canceled) {
         goto done;
@@ -300,7 +301,7 @@ static void scsi_read_complete(void * opaque, int ret)
 
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
-    bdrv_acct_done(s->qdev.conf.bs, &r->acct);
+    block_acct_done(bdrv_get_stats(s->qdev.conf.bs), &r->acct);
     if (r->req.io_canceled) {
         goto done;
     }
@@ -333,7 +334,7 @@ static void scsi_do_read(void *opaque, int ret)
 
     if (r->req.aiocb != NULL) {
         r->req.aiocb = NULL;
-        bdrv_acct_done(s->qdev.conf.bs, &r->acct);
+        block_acct_done(bdrv_get_stats(s->qdev.conf.bs), &r->acct);
     }
     if (r->req.io_canceled) {
         goto done;
@@ -349,13 +350,14 @@ static void scsi_do_read(void *opaque, int ret)
     scsi_req_ref(&r->req);
 
     if (r->req.sg) {
-        dma_acct_start(s->qdev.conf.bs, &r->acct, r->req.sg, BDRV_ACCT_READ);
+        dma_acct_start(s->qdev.conf.bs, &r->acct, r->req.sg, BLOCK_ACCT_READ);
         r->req.resid -= r->req.sg->size;
         r->req.aiocb = dma_bdrv_read(s->qdev.conf.bs, r->req.sg, r->sector,
                                      scsi_dma_complete, r);
     } else {
         n = scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
-        bdrv_acct_start(s->qdev.conf.bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
+        block_acct_start(bdrv_get_stats(s->qdev.conf.bs), &r->acct,
+                         n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
         r->req.aiocb = bdrv_aio_readv(s->qdev.conf.bs, r->sector, &r->qiov, n,
                                       scsi_read_complete, r);
     }
@@ -399,7 +401,8 @@ static void scsi_read_data(SCSIRequest *req)
     first = !r->started;
     r->started = true;
     if (first && scsi_is_cmd_fua(&r->req.cmd)) {
-        bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH);
+        block_acct_start(bdrv_get_stats(s->qdev.conf.bs), &r->acct, 0,
+                         BLOCK_ACCT_FLUSH);
         r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_do_read, r);
     } else {
         scsi_do_read(r, 0);
@@ -453,7 +456,7 @@ static void scsi_write_complete(void * opaque, int ret)
 
     if (r->req.aiocb != NULL) {
         r->req.aiocb = NULL;
-        bdrv_acct_done(s->qdev.conf.bs, &r->acct);
+        block_acct_done(bdrv_get_stats(s->qdev.conf.bs), &r->acct);
     }
     if (r->req.io_canceled) {
         goto done;
@@ -522,13 +525,14 @@ static void scsi_write_data(SCSIRequest *req)
     }
 
     if (r->req.sg) {
-        dma_acct_start(s->qdev.conf.bs, &r->acct, r->req.sg, BDRV_ACCT_WRITE);
+        dma_acct_start(s->qdev.conf.bs, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
         r->req.resid -= r->req.sg->size;
         r->req.aiocb = dma_bdrv_write(s->qdev.conf.bs, r->req.sg, r->sector,
                                       scsi_dma_complete, r);
     } else {
         n = r->qiov.size / 512;
-        bdrv_acct_start(s->qdev.conf.bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_WRITE);
+        block_acct_start(bdrv_get_stats(s->qdev.conf.bs), &r->acct,
+                         n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE);
         r->req.aiocb = bdrv_aio_writev(s->qdev.conf.bs, r->sector, &r->qiov, n,
                                        scsi_write_complete, r);
     }
@@ -1496,7 +1500,8 @@ static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
     if (!bdrv_enable_write_cache(s->qdev.conf.bs)) {
         /* The request is used as the AIO opaque value, so add a ref.  */
         scsi_req_ref(&r->req);
-        bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH);
+        block_acct_start(bdrv_get_stats(s->qdev.conf.bs), &r->acct, 0,
+                         BLOCK_ACCT_FLUSH);
         r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_aio_complete, r);
         return;
     }
@@ -1647,7 +1652,7 @@ static void scsi_write_same_complete(void *opaque, int ret)
 
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
-    bdrv_acct_done(s->qdev.conf.bs, &r->acct);
+    block_acct_done(bdrv_get_stats(s->qdev.conf.bs), &r->acct);
     if (r->req.io_canceled) {
         goto done;
     }
@@ -1662,7 +1667,8 @@ static void scsi_write_same_complete(void *opaque, int ret)
     data->sector += data->iov.iov_len / 512;
     data->iov.iov_len = MIN(data->nb_sectors * 512, data->iov.iov_len);
     if (data->iov.iov_len) {
-        bdrv_acct_start(s->qdev.conf.bs, &r->acct, data->iov.iov_len, BDRV_ACCT_WRITE);
+        block_acct_start(bdrv_get_stats(s->qdev.conf.bs), &r->acct,
+                         data->iov.iov_len, BLOCK_ACCT_WRITE);
         r->req.aiocb = bdrv_aio_writev(s->qdev.conf.bs, data->sector,
                                        &data->qiov, data->iov.iov_len / 512,
                                        scsi_write_same_complete, data);
@@ -1708,8 +1714,9 @@ static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
 
         /* The request is used as the AIO opaque value, so add a ref.  */
         scsi_req_ref(&r->req);
-        bdrv_acct_start(s->qdev.conf.bs, &r->acct, nb_sectors * s->qdev.blocksize,
-                        BDRV_ACCT_WRITE);
+        block_acct_start(bdrv_get_stats(s->qdev.conf.bs), &r->acct,
+                         nb_sectors * s->qdev.blocksize,
+                        BLOCK_ACCT_WRITE);
         r->req.aiocb = bdrv_aio_write_zeroes(s->qdev.conf.bs,
                                              r->req.cmd.lba * (s->qdev.blocksize / 512),
                                              nb_sectors * (s->qdev.blocksize / 512),
@@ -1730,7 +1737,8 @@ static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
     }
 
     scsi_req_ref(&r->req);
-    bdrv_acct_start(s->qdev.conf.bs, &r->acct, data->iov.iov_len, BDRV_ACCT_WRITE);
+    block_acct_start(bdrv_get_stats(s->qdev.conf.bs), &r->acct,
+                     data->iov.iov_len, BLOCK_ACCT_WRITE);
     r->req.aiocb = bdrv_aio_writev(s->qdev.conf.bs, data->sector,
                                    &data->qiov, data->iov.iov_len / 512,
                                    scsi_write_same_complete, data);
@@ -1994,7 +2002,8 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
     case SYNCHRONIZE_CACHE:
         /* The request is used as the AIO opaque value, so add a ref.  */
         scsi_req_ref(&r->req);
-        bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH);
+        block_acct_start(bdrv_get_stats(s->qdev.conf.bs), &r->acct, 0,
+                         BLOCK_ACCT_FLUSH);
         r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_aio_complete, r);
         return 0;
     case SEEK_10:
diff --git a/include/block/accounting.h b/include/block/accounting.h
new file mode 100644
index 0000000000..50b42b3808
--- /dev/null
+++ b/include/block/accounting.h
@@ -0,0 +1,57 @@
+/*
+ * QEMU System Emulator block accounting
+ *
+ * Copyright (c) 2011 Christoph Hellwig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_ACCOUNTING_H
+#define BLOCK_ACCOUNTING_H
+
+#include <stdint.h>
+
+#include "qemu/typedefs.h"
+
+enum BlockAcctType {
+    BLOCK_ACCT_READ,
+    BLOCK_ACCT_WRITE,
+    BLOCK_ACCT_FLUSH,
+    BLOCK_MAX_IOTYPE,
+};
+
+typedef struct BlockAcctStats {
+    uint64_t nr_bytes[BLOCK_MAX_IOTYPE];
+    uint64_t nr_ops[BLOCK_MAX_IOTYPE];
+    uint64_t total_time_ns[BLOCK_MAX_IOTYPE];
+    uint64_t wr_highest_sector;
+} BlockAcctStats;
+
+typedef struct BlockAcctCookie {
+    int64_t bytes;
+    int64_t start_time_ns;
+    enum BlockAcctType type;
+} BlockAcctCookie;
+
+void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
+                      int64_t bytes, enum BlockAcctType type);
+void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie);
+void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
+                               unsigned int nb_sectors);
+
+#endif
diff --git a/include/block/block.h b/include/block/block.h
index 8f4ad16d8f..07d6d8e67e 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -5,6 +5,7 @@
 #include "qemu-common.h"
 #include "qemu/option.h"
 #include "block/coroutine.h"
+#include "block/accounting.h"
 #include "qapi/qmp/qobject.h"
 #include "qapi-types.h"
 
@@ -485,23 +486,6 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
 bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
 
-enum BlockAcctType {
-    BDRV_ACCT_READ,
-    BDRV_ACCT_WRITE,
-    BDRV_ACCT_FLUSH,
-    BDRV_MAX_IOTYPE,
-};
-
-typedef struct BlockAcctCookie {
-    int64_t bytes;
-    int64_t start_time_ns;
-    enum BlockAcctType type;
-} BlockAcctCookie;
-
-void bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
-        int64_t bytes, enum BlockAcctType type);
-void bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie);
-
 typedef enum {
     BLKDBG_L1_UPDATE,
 
@@ -591,4 +575,6 @@ void bdrv_io_plug(BlockDriverState *bs);
 void bdrv_io_unplug(BlockDriverState *bs);
 void bdrv_flush_io_queue(BlockDriverState *bs);
 
+BlockAcctStats *bdrv_get_stats(BlockDriverState *bs);
+
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8a61215ac0..8d86a6c1eb 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -24,6 +24,7 @@
 #ifndef BLOCK_INT_H
 #define BLOCK_INT_H
 
+#include "block/accounting.h"
 #include "block/block.h"
 #include "qemu/option.h"
 #include "qemu/queue.h"
@@ -359,10 +360,7 @@ struct BlockDriverState {
     bool         io_limits_enabled;
 
     /* I/O stats (display with "info blockstats"). */
-    uint64_t nr_bytes[BDRV_MAX_IOTYPE];
-    uint64_t nr_ops[BDRV_MAX_IOTYPE];
-    uint64_t total_time_ns[BDRV_MAX_IOTYPE];
-    uint64_t wr_highest_sector;
+    BlockAcctStats stats;
 
     /* I/O Limits */
     BlockLimits bl;
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
index 32afcdd1d6..472375227a 100644
--- a/include/block/thread-pool.h
+++ b/include/block/thread-pool.h
@@ -18,11 +18,7 @@
 #ifndef QEMU_THREAD_POOL_H
 #define QEMU_THREAD_POOL_H 1
 
-#include "qemu-common.h"
-#include "qemu/queue.h"
-#include "qemu/thread.h"
-#include "block/coroutine.h"
-#include "block/block_int.h"
+#include "block/block.h"
 
 typedef int ThreadPoolFunc(void *opaque);
 
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index afb7b8db3a..cf61154658 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -18,6 +18,7 @@
 #include "hw/block/block.h"
 #include "sysemu/iothread.h"
 #include "block/block.h"
+#include "block/accounting.h"
 
 #define TYPE_VIRTIO_BLK "virtio-blk-device"
 #define VIRTIO_BLK(obj) \
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 00f21f3da2..73ff86d4b9 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -15,6 +15,7 @@
 #include "exec/address-spaces.h"
 #include "hw/hw.h"
 #include "block/block.h"
+#include "block/accounting.h"
 #include "sysemu/kvm.h"
 
 typedef struct ScatterGatherEntry ScatterGatherEntry;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index a685d02728..95dcd81ed4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -336,6 +336,7 @@
 #
 # @io-status: #optional @BlockDeviceIoStatus. Only present if the device
 #             supports it and the VM is configured to stop on errors
+#             (supported device models: virtio-blk, ide, scsi-disk)
 #
 # @inserted: #optional @BlockDeviceInfo describing the device if media is
 #            present
@@ -1587,6 +1588,15 @@
 #
 # @action: action that has been taken
 #
+# @nospace: #optional true if I/O error was caused due to a no-space
+#           condition. This key is only present if query-block's
+#           io-status is present, please see query-block documentation
+#           for more information (since: 2.2)
+#
+# @reason: human readable string describing the error cause.
+#          (This field is a debugging aid for humans, it should not
+#           be parsed by applications) (since: 2.2)
+#
 # Note: If action is "stop", a STOP event will eventually follow the
 # BLOCK_IO_ERROR event
 #
@@ -1594,7 +1604,8 @@
 ##
 { 'event': 'BLOCK_IO_ERROR',
   'data': { 'device': 'str', 'operation': 'IoOperationType',
-            'action': 'BlockErrorAction' } }
+            'action': 'BlockErrorAction', '*nospace': 'bool',
+            'reason': 'str' } }
 
 ##
 # @BLOCK_JOB_COMPLETED
@@ -1697,3 +1708,20 @@
             'len'   : 'int',
             'offset': 'int',
             'speed' : 'int' } }
+
+# @PreallocMode
+#
+# Preallocation mode of QEMU image file
+#
+# @off: no preallocation
+# @metadata: preallocate only for metadata
+# @falloc: like @full preallocation but allocate disk space by
+#          posix_fallocate() rather than writing zeros.
+# @full: preallocate all data by writing zeros to device to ensure disk
+#        space is really available. @full preallocation also sets up
+#        metadata correctly.
+#
+# Since 2.2
+##
+{ 'enum': 'PreallocMode',
+  'data': [ 'off', 'metadata', 'falloc', 'full' ] }
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 2b232ae8b7..ef3be72ae2 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -527,6 +527,15 @@ Linux or NTFS on Windows), then only the written sectors will reserve
 space. Use @code{qemu-img info} to know the real size used by the
 image or @code{ls -ls} on Unix/Linux.
 
+Supported options:
+@table @code
+@item preallocation
+Preallocation mode (allowed values: @code{off}, @code{falloc}, @code{full}).
+@code{falloc} mode preallocates space for image by calling posix_fallocate().
+@code{full} mode preallocates space for image by writing zeros to underlying
+storage.
+@end table
+
 @item qcow2
 QEMU image format, the most versatile format. Use it to have smaller
 images (useful if your filesystem does not supports holes, for example
@@ -575,9 +584,11 @@ sizes can improve the image file size whereas larger cluster sizes generally
 provide better performance.
 
 @item preallocation
-Preallocation mode (allowed values: off, metadata). An image with preallocated
-metadata is initially larger but can improve performance when the image needs
-to grow.
+Preallocation mode (allowed values: @code{off}, @code{metadata}, @code{falloc},
+@code{full}). An image with preallocated metadata is initially larger but can
+improve performance when the image needs to grow. @code{falloc} and @code{full}
+preallocations are like the same options of @code{raw} format, but sets up
+metadata also.
 
 @item lazy_refcounts
 If this option is set to @code{on}, reference count updates are postponed with
diff --git a/qemu-img.texi b/qemu-img.texi
index cc4668e64f..50d2cca7a0 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -419,6 +419,15 @@ Linux or NTFS on Windows), then only the written sectors will reserve
 space. Use @code{qemu-img info} to know the real size used by the
 image or @code{ls -ls} on Unix/Linux.
 
+Supported options:
+@table @code
+@item preallocation
+Preallocation mode (allowed values: @code{off}, @code{falloc}, @code{full}).
+@code{falloc} mode preallocates space for image by calling posix_fallocate().
+@code{full} mode preallocates space for image by writing zeros to underlying
+storage.
+@end table
+
 @item qcow2
 QEMU image format, the most versatile format. Use it to have smaller
 images (useful if your filesystem does not supports holes, for example
@@ -467,9 +476,11 @@ sizes can improve the image file size whereas larger cluster sizes generally
 provide better performance.
 
 @item preallocation
-Preallocation mode (allowed values: off, metadata). An image with preallocated
-metadata is initially larger but can improve performance when the image needs
-to grow.
+Preallocation mode (allowed values: @code{off}, @code{metadata}, @code{falloc},
+@code{full}). An image with preallocated metadata is initially larger but can
+improve performance when the image needs to grow. @code{falloc} and @code{full}
+preallocations are like the same options of @code{raw} format, but sets up
+metadata also.
 
 @item lazy_refcounts
 If this option is set to @code{on}, reference count updates are postponed with
diff --git a/qemu-io.c b/qemu-io.c
index 33c96c4c1c..d2ab6946e8 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -58,30 +58,20 @@ static int openfile(char *name, int flags, int growable, QDict *opts)
         return 1;
     }
 
+    qemuio_bs = bdrv_new("hda", &error_abort);
+
     if (growable) {
-        if (bdrv_open(&qemuio_bs, name, NULL, opts, flags | BDRV_O_PROTOCOL,
-                      NULL, &local_err))
-        {
-            fprintf(stderr, "%s: can't open%s%s: %s\n", progname,
-                    name ? " device " : "", name ?: "",
-                    error_get_pretty(local_err));
-            error_free(local_err);
-            return 1;
-        }
-    } else {
-        qemuio_bs = bdrv_new("hda", &error_abort);
-
-        if (bdrv_open(&qemuio_bs, name, NULL, opts, flags, NULL, &local_err)
-            < 0)
-        {
-            fprintf(stderr, "%s: can't open%s%s: %s\n", progname,
-                    name ? " device " : "", name ?: "",
-                    error_get_pretty(local_err));
-            error_free(local_err);
-            bdrv_unref(qemuio_bs);
-            qemuio_bs = NULL;
-            return 1;
-        }
+        flags |= BDRV_O_PROTOCOL;
+    }
+
+    if (bdrv_open(&qemuio_bs, name, NULL, opts, flags, NULL, &local_err) < 0) {
+        fprintf(stderr, "%s: can't open%s%s: %s\n", progname,
+                name ? " device " : "", name ?: "",
+                error_get_pretty(local_err));
+        error_free(local_err);
+        bdrv_unref(qemuio_bs);
+        qemuio_bs = NULL;
+        return 1;
     }
 
     return 0;
diff --git a/tests/qemu-iotests/025 b/tests/qemu-iotests/025
index a5f45b454c..467a4b7090 100755
--- a/tests/qemu-iotests/025
+++ b/tests/qemu-iotests/025
@@ -40,7 +40,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.pattern
 
 _supported_fmt raw qcow2 qed
-_supported_proto file sheepdog rbd nfs
+_supported_proto file sheepdog rbd nfs archipelago
 _supported_os Linux
 
 echo "=== Creating image"
diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out
index 71ca44d76b..09ca0aed49 100644
--- a/tests/qemu-iotests/049.out
+++ b/tests/qemu-iotests/049.out
@@ -179,7 +179,7 @@ qemu-img create -f qcow2 -o preallocation=metadata TEST_DIR/t.qcow2 64M
 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 preallocation='metadata' lazy_refcounts=off 
 
 qemu-img create -f qcow2 -o preallocation=1234 TEST_DIR/t.qcow2 64M
-qemu-img: TEST_DIR/t.qcow2: Invalid preallocation mode: '1234'
+qemu-img: TEST_DIR/t.qcow2: invalid parameter value: 1234
 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 preallocation='1234' lazy_refcounts=off 
 
 == Check encryption option ==
diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out
index 413e7ef391..90c21c893b 100644
--- a/tests/qemu-iotests/082.out
+++ b/tests/qemu-iotests/082.out
@@ -64,7 +64,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -76,7 +76,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -88,7 +88,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -100,7 +100,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -112,7 +112,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -124,7 +124,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -136,7 +136,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -148,7 +148,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -175,7 +175,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 
 Testing: create -o help
@@ -253,7 +253,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -265,7 +265,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -277,7 +277,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -289,7 +289,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -301,7 +301,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -313,7 +313,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -325,7 +325,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -337,7 +337,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -364,7 +364,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 
 Testing: convert -o help
@@ -431,7 +431,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -443,7 +443,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -455,7 +455,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -467,7 +467,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -479,7 +479,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -491,7 +491,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -503,7 +503,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -515,7 +515,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 nocow            Turn off copy-on-write (valid only on btrfs)
 
@@ -544,7 +544,7 @@ backing_file     File name of a base image
 backing_fmt      Image format of the base image
 encryption       Encrypt the image
 cluster_size     qcow2 cluster size
-preallocation    Preallocation mode (allowed values: off, metadata)
+preallocation    Preallocation mode (allowed values: off, metadata, falloc, full)
 lazy_refcounts   Postpone refcount updates
 
 Testing: convert -o help
diff --git a/tests/qemu-iotests/104 b/tests/qemu-iotests/104
new file mode 100755
index 0000000000..b471aa5bb1
--- /dev/null
+++ b/tests/qemu-iotests/104
@@ -0,0 +1,57 @@
+#!/bin/bash
+#
+# Test image creation with aligned and unaligned sizes
+#
+# Copyright (C) 2014 Fujitsu.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=hutao@cn.fujitsu.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt generic
+_supported_proto generic
+_supported_os Linux
+
+echo "=== Check qemu-img info output ==="
+echo
+image_sizes="1024 1234"
+
+for s in $image_sizes; do
+    _make_test_img $s | _filter_img_create
+    _img_info | _filter_img_info
+done
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/104.out b/tests/qemu-iotests/104.out
new file mode 100644
index 0000000000..de27852e35
--- /dev/null
+++ b/tests/qemu-iotests/104.out
@@ -0,0 +1,12 @@
+QA output created by 104
+=== Check qemu-img info output ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1024 
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 1.0K (1024 bytes)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1234 
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 1.5K (1536 bytes)
+***done
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 51192c8511..f69cb6b916 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -192,5 +192,26 @@ _filter_img_create()
         -e "s/archipelago:a/TEST_DIR\//g"
 }
 
+_filter_img_info()
+{
+    sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
+        -e "s#$TEST_DIR#TEST_DIR#g" \
+        -e "s#$IMGFMT#IMGFMT#g" \
+        -e "/encrypted: yes/d" \
+        -e "/cluster_size: [0-9]\\+/d" \
+        -e "/table_size: [0-9]\\+/d" \
+        -e "/compat: '[^']*'/d" \
+        -e "/compat6: \\(on\\|off\\)/d" \
+        -e "/static: \\(on\\|off\\)/d" \
+        -e "/zeroed_grain: \\(on\\|off\\)/d" \
+        -e "/subformat: '[^']*'/d" \
+        -e "/adapter_type: '[^']*'/d" \
+        -e "/lazy_refcounts: \\(on\\|off\\)/d" \
+        -e "/block_size: [0-9]\\+/d" \
+        -e "/block_state_zero: \\(on\\|off\\)/d" \
+        -e "/log_size: [0-9]\\+/d" \
+        -e "s/archipelago:a/TEST_DIR\//g"
+}
+
 # make sure this script returns success
 /bin/true
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 0920b28db4..622685e94c 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -104,3 +104,4 @@
 100 rw auto quick
 101 rw auto quick
 103 rw auto quick
+104 rw auto
diff --git a/tests/qemu-iotests/socket_scm_helper.c b/tests/qemu-iotests/socket_scm_helper.c
index 0e2b2859af..81959835eb 100644
--- a/tests/qemu-iotests/socket_scm_helper.c
+++ b/tests/qemu-iotests/socket_scm_helper.c
@@ -52,7 +52,7 @@ static int send_fd(int fd, int fd_to_send)
     cmsg->cmsg_len = CMSG_LEN(sizeof(int));
     cmsg->cmsg_level = SOL_SOCKET;
     cmsg->cmsg_type = SCM_RIGHTS;
-    memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
+    memcpy(CMSG_DATA(cmsg), &fd_to_send, sizeof(int));
 
     do {
         ret = sendmsg(fd, &msg, 0);
diff --git a/thread-pool.c b/thread-pool.c
index 23888dcfc4..bc07d7a1c9 100644
--- a/thread-pool.c
+++ b/thread-pool.c
@@ -20,7 +20,6 @@
 #include "qemu/osdep.h"
 #include "block/coroutine.h"
 #include "trace.h"
-#include "block/block_int.h"
 #include "block/thread-pool.h"
 #include "qemu/main-loop.h"