summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--block/qcow2-cluster.c5
-rw-r--r--block/qcow2.c123
-rw-r--r--block/qcow2.h21
-rw-r--r--block_int.h26
-rw-r--r--docs/specs/qcow2.txt14
-rw-r--r--hmp-commands.hx2
-rw-r--r--hw/ide/qdev.c3
-rw-r--r--hw/scsi-disk.c3
-rw-r--r--qapi-schema.json17
-rw-r--r--qemu-img.c28
-rw-r--r--qemu-io.c12
-rw-r--r--tests/qemu-iotests/031.out20
-rw-r--r--tests/qemu-iotests/036.out4
-rwxr-xr-xtests/qemu-iotests/039136
-rw-r--r--tests/qemu-iotests/039.out53
-rw-r--r--tests/qemu-iotests/common.rc7
-rw-r--r--tests/qemu-iotests/group1
-rwxr-xr-xtests/qemu-iotests/qed.py235
18 files changed, 650 insertions, 60 deletions
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index d7e0e19d9c..e179211c57 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -662,7 +662,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
         qcow2_cache_depends_on_flush(s->l2_table_cache);
     }
 
-    qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
+    if (qcow2_need_accurate_refcounts(s)) {
+        qcow2_cache_set_dependency(bs, s->l2_table_cache,
+                                   s->refcount_block_cache);
+    }
     ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
     if (ret < 0) {
         goto err;
diff --git a/block/qcow2.c b/block/qcow2.c
index 870148ddf8..fd5e214431 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -214,6 +214,62 @@ static void report_unsupported_feature(BlockDriverState *bs,
     }
 }
 
+/*
+ * Sets the dirty bit and flushes afterwards if necessary.
+ *
+ * The incompatible_features bit is only set if the image file header was
+ * updated successfully.  Therefore it is not required to check the return
+ * value of this function.
+ */
+static int qcow2_mark_dirty(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t val;
+    int ret;
+
+    assert(s->qcow_version >= 3);
+
+    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+        return 0; /* already dirty */
+    }
+
+    val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
+    ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
+                      &val, sizeof(val));
+    if (ret < 0) {
+        return ret;
+    }
+    ret = bdrv_flush(bs->file);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* Only treat image as dirty if the header was updated successfully */
+    s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
+    return 0;
+}
+
+/*
+ * Clears the dirty bit and flushes before if necessary.  Only call this
+ * function when there are no pending requests, it does not guard against
+ * concurrent requests dirtying the image.
+ */
+static int qcow2_mark_clean(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+        int ret = bdrv_flush(bs);
+        if (ret < 0) {
+            return ret;
+        }
+
+        s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
+        return qcow2_update_header(bs);
+    }
+    return 0;
+}
+
 static int qcow2_open(BlockDriverState *bs, int flags)
 {
     BDRVQcowState *s = bs->opaque;
@@ -287,12 +343,13 @@ static int qcow2_open(BlockDriverState *bs, int flags)
     s->compatible_features      = header.compatible_features;
     s->autoclear_features       = header.autoclear_features;
 
-    if (s->incompatible_features != 0) {
+    if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
         void *feature_table = NULL;
         qcow2_read_extensions(bs, header.header_length, ext_end,
                               &feature_table);
         report_unsupported_feature(bs, feature_table,
-                                   s->incompatible_features);
+                                   s->incompatible_features &
+                                   ~QCOW2_INCOMPAT_MASK);
         ret = -ENOTSUP;
         goto fail;
     }
@@ -412,6 +469,22 @@ static int qcow2_open(BlockDriverState *bs, int flags)
     /* Initialise locks */
     qemu_co_mutex_init(&s->lock);
 
+    /* Repair image if dirty */
+    if ((s->incompatible_features & QCOW2_INCOMPAT_DIRTY) &&
+        !bs->read_only) {
+        BdrvCheckResult result = {0};
+
+        ret = qcow2_check_refcounts(bs, &result, BDRV_FIX_ERRORS);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        ret = qcow2_mark_clean(bs);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
 #ifdef DEBUG_ALLOC
     {
         BdrvCheckResult result = {0};
@@ -714,6 +787,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             goto fail;
         }
 
+        if (l2meta.nb_clusters > 0 &&
+            (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) {
+            qcow2_mark_dirty(bs);
+        }
+
         cluster_offset = l2meta.cluster_offset;
         assert((cluster_offset & 511) == 0);
 
@@ -785,6 +863,8 @@ static void qcow2_close(BlockDriverState *bs)
     qcow2_cache_flush(bs, s->l2_table_cache);
     qcow2_cache_flush(bs, s->refcount_block_cache);
 
+    qcow2_mark_clean(bs);
+
     qcow2_cache_destroy(bs, s->l2_table_cache);
     qcow2_cache_destroy(bs, s->refcount_block_cache);
 
@@ -949,7 +1029,16 @@ int qcow2_update_header(BlockDriverState *bs)
 
     /* Feature table */
     Qcow2Feature features[] = {
-        /* no feature defined yet */
+        {
+            .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+            .bit  = QCOW2_INCOMPAT_DIRTY_BITNR,
+            .name = "dirty bit",
+        },
+        {
+            .type = QCOW2_FEAT_TYPE_COMPATIBLE,
+            .bit  = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+            .name = "lazy refcounts",
+        },
     };
 
     ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
@@ -1132,6 +1221,11 @@ static int qcow2_create2(const char *filename, int64_t total_size,
         header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
     }
 
+    if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
+        header.compatible_features |=
+            cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
+    }
+
     ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
     if (ret < 0) {
         goto out;
@@ -1245,6 +1339,8 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options)
                     options->value.s);
                 return -EINVAL;
             }
+        } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
+            flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
         }
         options++;
     }
@@ -1255,6 +1351,12 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options)
         return -EINVAL;
     }
 
+    if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
+        fprintf(stderr, "Lazy refcounts only supported with compatibility "
+                "level 1.1 and above (use compat=1.1 or greater)\n");
+        return -EINVAL;
+    }
+
     return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
                          cluster_size, prealloc, options, version);
 }
@@ -1441,10 +1543,12 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
         return ret;
     }
 
-    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
-    if (ret < 0) {
-        qemu_co_mutex_unlock(&s->lock);
-        return ret;
+    if (qcow2_need_accurate_refcounts(s)) {
+        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+        if (ret < 0) {
+            qemu_co_mutex_unlock(&s->lock);
+            return ret;
+        }
     }
     qemu_co_mutex_unlock(&s->lock);
 
@@ -1559,6 +1663,11 @@ static QEMUOptionParameter qcow2_create_options[] = {
         .type = OPT_STRING,
         .help = "Preallocation mode (allowed values: off, metadata)"
     },
+    {
+        .name = BLOCK_OPT_LAZY_REFCOUNTS,
+        .type = OPT_FLAG,
+        .help = "Postpone refcount updates",
+    },
     { NULL }
 };
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 455b6d7cfe..b4eb65470e 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -110,6 +110,22 @@ enum {
     QCOW2_FEAT_TYPE_AUTOCLEAR       = 2,
 };
 
+/* Incompatible feature bits */
+enum {
+    QCOW2_INCOMPAT_DIRTY_BITNR   = 0,
+    QCOW2_INCOMPAT_DIRTY         = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+
+    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY,
+};
+
+/* Compatible feature bits */
+enum {
+    QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
+    QCOW2_COMPAT_LAZY_REFCOUNTS       = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+
+    QCOW2_COMPAT_FEAT_MASK            = QCOW2_COMPAT_LAZY_REFCOUNTS,
+};
+
 typedef struct Qcow2Feature {
     uint8_t type;
     uint8_t bit;
@@ -237,6 +253,11 @@ static inline int qcow2_get_cluster_type(uint64_t l2_entry)
     }
 }
 
+/* Check whether refcounts are eager or lazy */
+static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
+{
+    return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
+}
 
 // FIXME Need qcow2_ prefix to global functions
 
diff --git a/block_int.h b/block_int.h
index d72317fbe3..6c1d9cafb1 100644
--- a/block_int.h
+++ b/block_int.h
@@ -31,8 +31,9 @@
 #include "qemu-timer.h"
 #include "qapi-types.h"
 
-#define BLOCK_FLAG_ENCRYPT	1
-#define BLOCK_FLAG_COMPAT6	4
+#define BLOCK_FLAG_ENCRYPT          1
+#define BLOCK_FLAG_COMPAT6          4
+#define BLOCK_FLAG_LAZY_REFCOUNTS   8
 
 #define BLOCK_IO_LIMIT_READ     0
 #define BLOCK_IO_LIMIT_WRITE    1
@@ -41,16 +42,17 @@
 #define BLOCK_IO_SLICE_TIME     100000000
 #define NANOSECONDS_PER_SECOND  1000000000.0
 
-#define BLOCK_OPT_SIZE          "size"
-#define BLOCK_OPT_ENCRYPT       "encryption"
-#define BLOCK_OPT_COMPAT6       "compat6"
-#define BLOCK_OPT_BACKING_FILE  "backing_file"
-#define BLOCK_OPT_BACKING_FMT   "backing_fmt"
-#define BLOCK_OPT_CLUSTER_SIZE  "cluster_size"
-#define BLOCK_OPT_TABLE_SIZE    "table_size"
-#define BLOCK_OPT_PREALLOC      "preallocation"
-#define BLOCK_OPT_SUBFMT        "subformat"
-#define BLOCK_OPT_COMPAT_LEVEL  "compat"
+#define BLOCK_OPT_SIZE              "size"
+#define BLOCK_OPT_ENCRYPT           "encryption"
+#define BLOCK_OPT_COMPAT6           "compat6"
+#define BLOCK_OPT_BACKING_FILE      "backing_file"
+#define BLOCK_OPT_BACKING_FMT       "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE      "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE        "table_size"
+#define BLOCK_OPT_PREALLOC          "preallocation"
+#define BLOCK_OPT_SUBFMT            "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL      "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
 
 typedef struct BdrvTrackedRequest BdrvTrackedRequest;
 
diff --git a/docs/specs/qcow2.txt b/docs/specs/qcow2.txt
index 87bf785fe0..36a559d886 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/specs/qcow2.txt
@@ -75,13 +75,23 @@ in the description of a field.
                     Bitmask of incompatible features. An implementation must
                     fail to open an image if an unknown bit is set.
 
-                    Bits 0-63:  Reserved (set to 0)
+                    Bit 0:      Dirty bit.  If this bit is set then refcounts
+                                may be inconsistent, make sure to scan L1/L2
+                                tables to repair refcounts before accessing the
+                                image.
+
+                    Bits 1-63:  Reserved (set to 0)
 
          80 -  87:  compatible_features
                     Bitmask of compatible features. An implementation can
                     safely ignore any unknown bits that are set.
 
-                    Bits 0-63:  Reserved (set to 0)
+                    Bit 0:      Lazy refcounts bit.  If this bit is set then
+                                lazy refcount updates can be used.  This means
+                                marking the image file dirty and postponing
+                                refcount metadata updates.
+
+                    Bits 1-63:  Reserved (set to 0)
 
          88 -  95:  autoclear_features
                     Bitmask of auto-clear features. An implementation may only
diff --git a/hmp-commands.hx b/hmp-commands.hx
index eea8b32894..9bbc7f7555 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -101,7 +101,7 @@ ETEXI
         .name       = "block_job_cancel",
         .args_type  = "device:B",
         .params     = "device",
-        .help       = "stop an active block streaming operation",
+        .help       = "stop an active background block operation",
         .mhandler.cmd = hmp_block_job_cancel,
     },
 
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 22e58dfc8a..5ea9b8f4b2 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -149,7 +149,8 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
     }
 
     blkconf_serial(&dev->conf, &dev->serial);
-    if (blkconf_geometry(&dev->conf, &dev->chs_trans, 65536, 16, 255) < 0) {
+    if (kind != IDE_CD
+        && blkconf_geometry(&dev->conf, &dev->chs_trans, 65536, 16, 255) < 0) {
         return -1;
     }
 
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index a9c727905a..c8d5edd86e 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -1958,7 +1958,8 @@ static int scsi_initfn(SCSIDevice *dev)
     }
 
     blkconf_serial(&s->qdev.conf, &s->serial);
-    if (blkconf_geometry(&dev->conf, NULL, 65535, 255, 255) < 0) {
+    if (dev->type == TYPE_DISK
+        && blkconf_geometry(&dev->conf, NULL, 65535, 255, 255) < 0) {
         return -1;
     }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index cddf63a878..bd9c450029 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1660,7 +1660,7 @@
 # Returns: Nothing on success
 #          If the job type does not support throttling, NotSupported
 #          If the speed value is invalid, InvalidParameter
-#          If streaming is not active on this device, DeviceNotActive
+#          If no background operation is active on this device, DeviceNotActive
 #
 # Since: 1.1
 ##
@@ -1670,9 +1670,9 @@
 ##
 # @block-job-cancel:
 #
-# Stop an active block streaming operation.
+# Stop an active background block operation.
 #
-# This command returns immediately after marking the active block streaming
+# This command returns immediately after marking the active background block
 # operation for cancellation.  It is an error to call this command if no
 # operation is in progress.
 #
@@ -1680,16 +1680,15 @@
 # BLOCK_JOB_CANCELLED event.  Before that happens the job is still visible when
 # enumerated using query-block-jobs.
 #
-# The image file retains its backing file unless the streaming operation happens
-# to complete just as it is being cancelled.
-#
-# A new block streaming operation can be started at a later time to finish
-# copying all data from the backing file.
+# For streaming, the image file retains its backing file unless the streaming
+# operation happens to complete just as it is being cancelled.  A new streaming
+# operation can be started at a later time to finish copying all data from the
+# backing file.
 #
 # @device: the device name
 #
 # Returns: Nothing on success
-#          If streaming is not active on this device, DeviceNotActive
+#          If no background operation is active on this device, DeviceNotActive
 #          If cancellation already in progress, DeviceInUse
 #
 # Since: 1.1
diff --git a/qemu-img.c b/qemu-img.c
index b866f8081e..94a31ad9f0 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1567,14 +1567,19 @@ static int img_resize(int argc, char **argv)
     const char *filename, *fmt, *size;
     int64_t n, total_size;
     BlockDriverState *bs = NULL;
-    QEMUOptionParameter *param;
-    QEMUOptionParameter resize_options[] = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = OPT_SIZE,
-            .help = "Virtual disk size"
+    QemuOpts *param;
+    static QemuOptsList resize_options = {
+        .name = "resize_options",
+        .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
+        .desc = {
+            {
+                .name = BLOCK_OPT_SIZE,
+                .type = QEMU_OPT_SIZE,
+                .help = "Virtual disk size"
+            }, {
+                /* end of list */
+            }
         },
-        { NULL }
     };
 
     /* Remove size from argv manually so that negative numbers are not treated
@@ -1624,14 +1629,15 @@ static int img_resize(int argc, char **argv)
     }
 
     /* Parse size */
-    param = parse_option_parameters("", resize_options, NULL);
-    if (set_option_parameter(param, BLOCK_OPT_SIZE, size)) {
+    param = qemu_opts_create(&resize_options, NULL, 0, NULL);
+    if (qemu_opt_set(param, BLOCK_OPT_SIZE, size)) {
         /* Error message already printed when size parsing fails */
         ret = -1;
+        qemu_opts_del(param);
         goto out;
     }
-    n = get_option_parameter(param, BLOCK_OPT_SIZE)->value.n;
-    free_option_parameters(param);
+    n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
+    qemu_opts_del(param);
 
     bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR);
     if (!bs) {
diff --git a/qemu-io.c b/qemu-io.c
index 8f3b94b838..d0f4fb70c7 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -1652,6 +1652,17 @@ static const cmdinfo_t map_cmd = {
        .oneline        = "prints the allocated areas of a file",
 };
 
+static int abort_f(int argc, char **argv)
+{
+    abort();
+}
+
+static const cmdinfo_t abort_cmd = {
+       .name           = "abort",
+       .cfunc          = abort_f,
+       .flags          = CMD_NOFILE_OK,
+       .oneline        = "simulate a program crash using abort(3)",
+};
 
 static int close_f(int argc, char **argv)
 {
@@ -1905,6 +1916,7 @@ int main(int argc, char **argv)
     add_command(&discard_cmd);
     add_command(&alloc_cmd);
     add_command(&map_cmd);
+    add_command(&abort_cmd);
 
     add_args_command(init_args_command);
     add_check_command(init_check_command);
diff --git a/tests/qemu-iotests/031.out b/tests/qemu-iotests/031.out
index d3cab301d4..796c993df2 100644
--- a/tests/qemu-iotests/031.out
+++ b/tests/qemu-iotests/031.out
@@ -54,8 +54,8 @@ header_length             72
 
 Header extension:
 magic                     0x6803f857
-length                    0
-data                      ''
+length                    96
+data                      <binary>
 
 Header extension:
 magic                     0x12345678
@@ -68,7 +68,7 @@ No errors were found on the image.
 
 magic                     0x514649fb
 version                   2
-backing_file_offset       0x98
+backing_file_offset       0xf8
 backing_file_size         0x17
 cluster_bits              16
 size                      67108864
@@ -92,8 +92,8 @@ data                      'host_device'
 
 Header extension:
 magic                     0x6803f857
-length                    0
-data                      ''
+length                    96
+data                      <binary>
 
 Header extension:
 magic                     0x12345678
@@ -155,8 +155,8 @@ header_length             104
 
 Header extension:
 magic                     0x6803f857
-length                    0
-data                      ''
+length                    96
+data                      <binary>
 
 Header extension:
 magic                     0x12345678
@@ -169,7 +169,7 @@ No errors were found on the image.
 
 magic                     0x514649fb
 version                   3
-backing_file_offset       0xb8
+backing_file_offset       0x118
 backing_file_size         0x17
 cluster_bits              16
 size                      67108864
@@ -193,8 +193,8 @@ data                      'host_device'
 
 Header extension:
 magic                     0x6803f857
-length                    0
-data                      ''
+length                    96
+data                      <binary>
 
 Header extension:
 magic                     0x12345678
diff --git a/tests/qemu-iotests/036.out b/tests/qemu-iotests/036.out
index 6953e37ab6..063ca22d66 100644
--- a/tests/qemu-iotests/036.out
+++ b/tests/qemu-iotests/036.out
@@ -46,7 +46,7 @@ header_length             104
 
 Header extension:
 magic                     0x6803f857
-length                    0
-data                      ''
+length                    96
+data                      <binary>
 
 *** done
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
new file mode 100755
index 0000000000..a749fcf23b
--- /dev/null
+++ b/tests/qemu-iotests/039
@@ -0,0 +1,136 @@
+#!/bin/bash
+#
+# Test qcow2 lazy refcounts
+#
+# Copyright (C) 2012 Red Hat, Inc.
+# Copyright IBM, Corp. 2010
+#
+# Based on test 038.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=stefanha@linux.vnet.ibm.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto generic
+_supported_os Linux
+
+size=128M
+
+echo
+echo "== Checking that image is clean on shutdown =="
+
+IMGOPTS="compat=1.1,lazy_refcounts=on"
+_make_test_img $size
+
+$QEMU_IO -c "write -P 0x5a 0 512" $TEST_IMG | _filter_qemu_io
+
+# The dirty bit must not be set
+./qcow2.py $TEST_IMG dump-header | grep incompatible_features
+_check_test_img
+
+echo
+echo "== Creating a dirty image file =="
+
+IMGOPTS="compat=1.1,lazy_refcounts=on"
+_make_test_img $size
+
+old_ulimit=$(ulimit -c)
+ulimit -c 0 # do not produce a core dump on abort(3)
+$QEMU_IO -c "write -P 0x5a 0 512" -c "abort" $TEST_IMG | _filter_qemu_io
+ulimit -c "$old_ulimit"
+
+# The dirty bit must be set
+./qcow2.py $TEST_IMG dump-header | grep incompatible_features
+_check_test_img
+
+echo
+echo "== Read-only access must still work =="
+
+$QEMU_IO -r -c "read -P 0x5a 0 512" $TEST_IMG | _filter_qemu_io
+
+# The dirty bit must be set
+./qcow2.py $TEST_IMG dump-header | grep incompatible_features
+
+echo
+echo "== Repairing the image file must succeed =="
+
+$QEMU_IMG check -r all $TEST_IMG
+
+# The dirty bit must not be set
+./qcow2.py $TEST_IMG dump-header | grep incompatible_features
+
+echo
+echo "== Data should still be accessible after repair =="
+
+$QEMU_IO -c "read -P 0x5a 0 512" $TEST_IMG | _filter_qemu_io
+
+echo
+echo "== Opening a dirty image read/write should repair it =="
+
+IMGOPTS="compat=1.1,lazy_refcounts=on"
+_make_test_img $size
+
+old_ulimit=$(ulimit -c)
+ulimit -c 0 # do not produce a core dump on abort(3)
+$QEMU_IO -c "write -P 0x5a 0 512" -c "abort" $TEST_IMG | _filter_qemu_io
+ulimit -c "$old_ulimit"
+
+# The dirty bit must be set
+./qcow2.py $TEST_IMG dump-header | grep incompatible_features
+
+$QEMU_IO -c "write 0 512" $TEST_IMG | _filter_qemu_io
+
+# The dirty bit must not be set
+./qcow2.py $TEST_IMG dump-header | grep incompatible_features
+
+echo
+echo "== Creating an image file with lazy_refcounts=off =="
+
+IMGOPTS="compat=1.1,lazy_refcounts=off"
+_make_test_img $size
+
+old_ulimit=$(ulimit -c)
+ulimit -c 0 # do not produce a core dump on abort(3)
+$QEMU_IO -c "write -P 0x5a 0 512" -c "abort" $TEST_IMG | _filter_qemu_io
+ulimit -c "$old_ulimit"
+
+# The dirty bit must not be set since lazy_refcounts=off
+./qcow2.py $TEST_IMG dump-header | grep incompatible_features
+_check_test_img
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
+
diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out
new file mode 100644
index 0000000000..155a05e109
--- /dev/null
+++ b/tests/qemu-iotests/039.out
@@ -0,0 +1,53 @@
+QA output created by 039
+
+== Checking that image is clean on shutdown ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+incompatible_features     0x0
+No errors were found on the image.
+
+== Creating a dirty image file ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+incompatible_features     0x1
+ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0
+ERROR cluster 5 refcount=0 reference=1
+
+2 errors were found on the image.
+Data may be corrupted, or further writes to the image may corrupt it.
+
+== Read-only access must still work ==
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+incompatible_features     0x1
+
+== Repairing the image file must succeed ==
+ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0
+Repairing cluster 5 refcount=0 reference=1
+No errors were found on the image.
+incompatible_features     0x0
+
+== Data should still be accessible after repair ==
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Opening a dirty image read/write should repair it ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+incompatible_features     0x1
+ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0
+Repairing cluster 5 refcount=0 reference=1
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+incompatible_features     0x0
+
+== Creating an image file with lazy_refcounts=off ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+incompatible_features     0x0
+No errors were found on the image.
+*** done
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 5e3a524bc8..7782808a26 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -110,10 +110,11 @@ _make_test_img()
     	sed -e "s#$IMGFMT#IMGFMT#g" | \
 	sed -e "s# encryption=off##g" | \
 	sed -e "s# cluster_size=[0-9]\\+##g" | \
-	sed -e "s# table_size=0##g" | \
+	sed -e "s# table_size=[0-9]\\+##g" | \
 	sed -e "s# compat='[^']*'##g" | \
-	sed -e "s# compat6=off##g" | \
-	sed -e "s# static=off##g"
+	sed -e "s# compat6=\\(on\\|off\\)##g" | \
+	sed -e "s# static=\\(on\\|off\\)##g" | \
+	sed -e "s# lazy_refcounts=\\(on\\|off\\)##g"
 }
 
 _cleanup_test_img()
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 7a2c92b6e9..ebb5ca4b41 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -45,3 +45,4 @@
 036 rw auto quick
 037 rw auto backing
 038 rw auto backing
+039 rw auto
diff --git a/tests/qemu-iotests/qed.py b/tests/qemu-iotests/qed.py
new file mode 100755
index 0000000000..52ff845590
--- /dev/null
+++ b/tests/qemu-iotests/qed.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python
+#
+# Tool to manipulate QED image files
+#
+# Copyright (C) 2010 IBM, Corp.
+#
+# Authors:
+#  Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+import sys
+import struct
+import random
+import optparse
+
+# This can be used as a module
+__all__ = ['QED_F_NEED_CHECK', 'QED']
+
+QED_F_NEED_CHECK = 0x02
+
+header_fmt = '<IIIIQQQQQII'
+header_size = struct.calcsize(header_fmt)
+field_names = ['magic', 'cluster_size', 'table_size',
+               'header_size', 'features', 'compat_features',
+               'autoclear_features', 'l1_table_offset', 'image_size',
+               'backing_filename_offset', 'backing_filename_size']
+table_elem_fmt = '<Q'
+table_elem_size = struct.calcsize(table_elem_fmt)
+
+def err(msg):
+    sys.stderr.write(msg + '\n')
+    sys.exit(1)
+
+def unpack_header(s):
+    fields = struct.unpack(header_fmt, s)
+    return dict((field_names[idx], val) for idx, val in enumerate(fields))
+
+def pack_header(header):
+    fields = tuple(header[x] for x in field_names)
+    return struct.pack(header_fmt, *fields)
+
+def unpack_table_elem(s):
+    return struct.unpack(table_elem_fmt, s)[0]
+
+def pack_table_elem(elem):
+    return struct.pack(table_elem_fmt, elem)
+
+class QED(object):
+    def __init__(self, f):
+        self.f = f
+
+        self.f.seek(0, 2)
+        self.filesize = f.tell()
+
+        self.load_header()
+        self.load_l1_table()
+
+    def raw_pread(self, offset, size):
+        self.f.seek(offset)
+        return self.f.read(size)
+
+    def raw_pwrite(self, offset, data):
+        self.f.seek(offset)
+        return self.f.write(data)
+
+    def load_header(self):
+        self.header = unpack_header(self.raw_pread(0, header_size))
+
+    def store_header(self):
+        self.raw_pwrite(0, pack_header(self.header))
+
+    def read_table(self, offset):
+        size = self.header['table_size'] * self.header['cluster_size']
+        s = self.raw_pread(offset, size)
+        table = [unpack_table_elem(s[i:i + table_elem_size]) for i in xrange(0, size, table_elem_size)]
+        return table
+
+    def load_l1_table(self):
+        self.l1_table = self.read_table(self.header['l1_table_offset'])
+        self.table_nelems = self.header['table_size'] * self.header['cluster_size'] / table_elem_size
+
+    def write_table(self, offset, table):
+        s = ''.join(pack_table_elem(x) for x in table)
+        self.raw_pwrite(offset, s)
+
+def random_table_item(table):
+    vals = [(index, offset) for index, offset in enumerate(table) if offset != 0]
+    if not vals:
+        err('cannot pick random item because table is empty')
+    return random.choice(vals)
+
+def corrupt_table_duplicate(table):
+    '''Corrupt a table by introducing a duplicate offset'''
+    victim_idx, victim_val = random_table_item(table)
+    unique_vals = set(table)
+    if len(unique_vals) == 1:
+        err('no duplication corruption possible in table')
+    dup_val = random.choice(list(unique_vals.difference([victim_val])))
+    table[victim_idx] = dup_val
+
+def corrupt_table_invalidate(qed, table):
+    '''Corrupt a table by introducing an invalid offset'''
+    index, _ = random_table_item(table)
+    table[index] = qed.filesize + random.randint(0, 100 * 1024 * 1024 * 1024 * 1024)
+
+def cmd_show(qed, *args):
+    '''show [header|l1|l2 <offset>]- Show header or l1/l2 tables'''
+    if not args or args[0] == 'header':
+        print qed.header
+    elif args[0] == 'l1':
+        print qed.l1_table
+    elif len(args) == 2 and args[0] == 'l2':
+        offset = int(args[1])
+        print qed.read_table(offset)
+    else:
+        err('unrecognized sub-command')
+
+def cmd_duplicate(qed, table_level):
+    '''duplicate l1|l2 - Duplicate a random table element'''
+    if table_level == 'l1':
+        offset = qed.header['l1_table_offset']
+        table = qed.l1_table
+    elif table_level == 'l2':
+        _, offset = random_table_item(qed.l1_table)
+        table = qed.read_table(offset)
+    else:
+        err('unrecognized sub-command')
+    corrupt_table_duplicate(table)
+    qed.write_table(offset, table)
+
+def cmd_invalidate(qed, table_level):
+    '''invalidate l1|l2 - Plant an invalid table element at random'''
+    if table_level == 'l1':
+        offset = qed.header['l1_table_offset']
+        table = qed.l1_table
+    elif table_level == 'l2':
+        _, offset = random_table_item(qed.l1_table)
+        table = qed.read_table(offset)
+    else:
+        err('unrecognized sub-command')
+    corrupt_table_invalidate(qed, table)
+    qed.write_table(offset, table)
+
+def cmd_need_check(qed, *args):
+    '''need-check [on|off] - Test, set, or clear the QED_F_NEED_CHECK header bit'''
+    if not args:
+        print bool(qed.header['features'] & QED_F_NEED_CHECK)
+        return
+
+    if args[0] == 'on':
+        qed.header['features'] |= QED_F_NEED_CHECK
+    elif args[0] == 'off':
+        qed.header['features'] &= ~QED_F_NEED_CHECK
+    else:
+        err('unrecognized sub-command')
+    qed.store_header()
+
+def cmd_zero_cluster(qed, pos, *args):
+    '''zero-cluster <pos> [<n>] - Zero data clusters'''
+    pos, n = int(pos), 1
+    if args:
+        if len(args) != 1:
+            err('expected one argument')
+        n = int(args[0])
+
+    for i in xrange(n):
+        l1_index = pos / qed.header['cluster_size'] / len(qed.l1_table)
+        if qed.l1_table[l1_index] == 0:
+            err('no l2 table allocated')
+
+        l2_offset = qed.l1_table[l1_index]
+        l2_table = qed.read_table(l2_offset)
+
+        l2_index = (pos / qed.header['cluster_size']) % len(qed.l1_table)
+        l2_table[l2_index] = 1 # zero the data cluster
+        qed.write_table(l2_offset, l2_table)
+        pos += qed.header['cluster_size']
+
+def cmd_copy_metadata(qed, outfile):
+    '''copy-metadata <outfile> - Copy metadata only (for scrubbing corrupted images)'''
+    out = open(outfile, 'wb')
+
+    # Match file size
+    out.seek(qed.filesize - 1)
+    out.write('\0')
+
+    # Copy header clusters
+    out.seek(0)
+    header_size_bytes = qed.header['header_size'] * qed.header['cluster_size']
+    out.write(qed.raw_pread(0, header_size_bytes))
+
+    # Copy L1 table
+    out.seek(qed.header['l1_table_offset'])
+    s = ''.join(pack_table_elem(x) for x in qed.l1_table)
+    out.write(s)
+
+    # Copy L2 tables
+    for l2_offset in qed.l1_table:
+        if l2_offset == 0:
+            continue
+        l2_table = qed.read_table(l2_offset)
+        out.seek(l2_offset)
+        s = ''.join(pack_table_elem(x) for x in l2_table)
+        out.write(s)
+
+    out.close()
+
+def usage():
+    print 'Usage: %s <file> <cmd> [<arg>, ...]' % sys.argv[0]
+    print
+    print 'Supported commands:'
+    for cmd in sorted(x for x in globals() if x.startswith('cmd_')):
+        print globals()[cmd].__doc__
+    sys.exit(1)
+
+def main():
+    if len(sys.argv) < 3:
+        usage()
+    filename, cmd = sys.argv[1:3]
+
+    cmd = 'cmd_' + cmd.replace('-', '_')
+    if cmd not in globals():
+        usage()
+
+    qed = QED(open(filename, 'r+b'))
+    try:
+        globals()[cmd](qed, *sys.argv[3:])
+    except TypeError, e:
+        sys.stderr.write(globals()[cmd].__doc__ + '\n')
+        sys.exit(1)
+
+if __name__ == '__main__':
+    main()