summary refs log tree commit diff stats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blkverify.c20
-rw-r--r--block/commit.c37
-rw-r--r--block/copy-on-read.c9
-rw-r--r--block/filter-compress.c9
-rw-r--r--block/io_uring.c2
-rw-r--r--block/mirror.c37
-rw-r--r--block/qcow2-bitmap.c1
-rw-r--r--block/qcow2-cluster.c7
-rw-r--r--block/qcow2-refcount.c1
-rw-r--r--block/qcow2-threads.c12
-rw-r--r--block/qcow2.c2
-rw-r--r--block/quorum.c70
-rw-r--r--block/replication.c7
-rw-r--r--block/throttle.c8
-rw-r--r--block/vvfat.c7
15 files changed, 125 insertions, 104 deletions
diff --git a/block/blkverify.c b/block/blkverify.c
index 304b0a1368..ba6b1853ae 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -268,18 +268,18 @@ static int blkverify_co_flush(BlockDriverState *bs)
     return bdrv_co_flush(s->test_file->bs);
 }
 
-static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                  BlockDriverState *candidate)
+static bool blkverify_recurse_can_replace(BlockDriverState *bs,
+                                          BlockDriverState *to_replace)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
-    bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-
-    if (perm) {
-        return true;
-    }
-
-    return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
+    /*
+     * blkverify quits the whole qemu process if there is a mismatch
+     * between bs->file->bs and s->test_file->bs.  Therefore, we know
+     * know that both must match bs and we can recurse down to either.
+     */
+    return bdrv_recurse_can_replace(bs->file->bs, to_replace) ||
+           bdrv_recurse_can_replace(s->test_file->bs, to_replace);
 }
 
 static void blkverify_refresh_filename(BlockDriverState *bs)
@@ -327,7 +327,7 @@ static BlockDriver bdrv_blkverify = {
     .bdrv_co_flush                    = blkverify_co_flush,
 
     .is_filter                        = true,
-    .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
+    .bdrv_recurse_can_replace         = blkverify_recurse_can_replace,
 };
 
 static void bdrv_blkverify_init(void)
diff --git a/block/commit.c b/block/commit.c
index 23c90b3b91..8e672799af 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -43,27 +43,6 @@ typedef struct CommitBlockJob {
     char *backing_file_str;
 } CommitBlockJob;
 
-static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
-                                        int64_t offset, uint64_t bytes,
-                                        void *buf)
-{
-    int ret = 0;
-
-    assert(bytes < SIZE_MAX);
-
-    ret = blk_co_pread(bs, offset, bytes, buf, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = blk_co_pwrite(base, offset, bytes, buf, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return 0;
-}
-
 static int commit_prepare(Job *job)
 {
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
@@ -140,7 +119,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
     int ret = 0;
     int64_t n = 0; /* bytes */
     void *buf = NULL;
-    int bytes_written = 0;
     int64_t len, base_len;
 
     ret = len = blk_getlength(s->top);
@@ -165,6 +143,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 
     for (offset = 0; offset < len; offset += n) {
         bool copy;
+        bool error_in_source = true;
 
         /* Note that even when no rate limit is applied we need to yield
          * with no pending I/O here so that bdrv_drain_all() returns.
@@ -179,12 +158,20 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
         copy = (ret == 1);
         trace_commit_one_iteration(s, offset, n, ret);
         if (copy) {
-            ret = commit_populate(s->top, s->base, offset, n, buf);
-            bytes_written += n;
+            assert(n < SIZE_MAX);
+
+            ret = blk_co_pread(s->top, offset, n, buf, 0);
+            if (ret >= 0) {
+                ret = blk_co_pwrite(s->base, offset, n, buf, 0);
+                if (ret < 0) {
+                    error_in_source = false;
+                }
+            }
         }
         if (ret < 0) {
             BlockErrorAction action =
-                block_job_error_action(&s->common, false, s->on_error, -ret);
+                block_job_error_action(&s->common, s->on_error,
+                                       error_in_source, -ret);
             if (action == BLOCK_ERROR_ACTION_REPORT) {
                 goto out;
             } else {
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e95223d3cb..242d3ff055 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -118,13 +118,6 @@ static void cor_lock_medium(BlockDriverState *bs, bool locked)
 }
 
 
-static bool cor_recurse_is_first_non_filter(BlockDriverState *bs,
-                                            BlockDriverState *candidate)
-{
-    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
-
 static BlockDriver bdrv_copy_on_read = {
     .format_name                        = "copy-on-read",
 
@@ -143,8 +136,6 @@ static BlockDriver bdrv_copy_on_read = {
 
     .bdrv_co_block_status               = bdrv_co_block_status_from_file,
 
-    .bdrv_recurse_is_first_non_filter   = cor_recurse_is_first_non_filter,
-
     .has_variable_length                = true,
     .is_filter                          = true,
 };
diff --git a/block/filter-compress.c b/block/filter-compress.c
index 60137fb680..82c315b298 100644
--- a/block/filter-compress.c
+++ b/block/filter-compress.c
@@ -128,13 +128,6 @@ static void compress_lock_medium(BlockDriverState *bs, bool locked)
 }
 
 
-static bool compress_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                 BlockDriverState *candidate)
-{
-    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
-
 static BlockDriver bdrv_compress = {
     .format_name                        = "compress",
 
@@ -154,8 +147,6 @@ static BlockDriver bdrv_compress = {
 
     .bdrv_co_block_status               = bdrv_co_block_status_from_file,
 
-    .bdrv_recurse_is_first_non_filter   = compress_recurse_is_first_non_filter,
-
     .has_variable_length                = true,
     .is_filter                          = true,
 };
diff --git a/block/io_uring.c b/block/io_uring.c
index 56892fd1ab..a3142ca989 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -187,7 +187,7 @@ static void luring_process_completions(LuringState *s)
                     ret = 0;
                 }
             } else {
-                ret = -ENOSPC;;
+                ret = -ENOSPC;
             }
         }
 end:
diff --git a/block/mirror.c b/block/mirror.c
index f0f2d9dff1..447051dbc6 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -103,6 +103,7 @@ struct MirrorOp {
     bool is_pseudo_op;
     bool is_active_write;
     CoQueue waiting_requests;
+    Coroutine *co;
 
     QTAILQ_ENTRY(MirrorOp) next;
 };
@@ -282,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
 }
 
 static inline void coroutine_fn
-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
+mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active)
 {
     MirrorOp *op;
 
     QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
+        if (self == op) {
+            continue;
+        }
         /* Do not wait on pseudo ops, because it may in turn wait on
          * some other operation to start, which may in fact be the
          * caller of this function.  Since there is only one pseudo op
@@ -301,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
 }
 
 static inline void coroutine_fn
-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self)
 {
     /* Only non-active operations use up in-flight slots */
-    mirror_wait_for_any_operation(s, false);
+    mirror_wait_for_any_operation(s, self, false);
 }
 
 /* Perform a mirror copy operation.
@@ -347,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque)
 
     while (s->buf_free_count < nb_chunks) {
         trace_mirror_yield_in_flight(s, op->offset, s->in_flight);
-        mirror_wait_for_free_in_flight_slot(s);
+        mirror_wait_for_free_in_flight_slot(s, op);
     }
 
     /* Now make a QEMUIOVector taking enough granularity-sized chunks
@@ -429,6 +433,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
     default:
         abort();
     }
+    op->co = co;
 
     QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
     qemu_coroutine_enter(co);
@@ -553,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 
         while (s->in_flight >= MAX_IN_FLIGHT) {
             trace_mirror_yield_in_flight(s, offset, s->in_flight);
-            mirror_wait_for_free_in_flight_slot(s);
+            mirror_wait_for_free_in_flight_slot(s, pseudo_op);
         }
 
         if (s->ret < 0) {
@@ -607,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s)
 static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s)
 {
     while (s->in_flight > 0) {
-        mirror_wait_for_free_in_flight_slot(s);
+        mirror_wait_for_free_in_flight_slot(s, NULL);
     }
 }
 
@@ -695,7 +700,19 @@ static int mirror_exit_common(Job *job)
          * drain potential other users of the BDS before changing the graph. */
         assert(s->in_drain);
         bdrv_drained_begin(target_bs);
-        bdrv_replace_node(to_replace, target_bs, &local_err);
+        /*
+         * Cannot use check_to_replace_node() here, because that would
+         * check for an op blocker on @to_replace, and we have our own
+         * there.
+         */
+        if (bdrv_recurse_can_replace(src, to_replace)) {
+            bdrv_replace_node(to_replace, target_bs, &local_err);
+        } else {
+            error_setg(&local_err, "Can no longer replace '%s' by '%s', "
+                       "because it can no longer be guaranteed that doing so "
+                       "would not lead to an abrupt change of visible data",
+                       to_replace->node_name, target_bs->node_name);
+        }
         bdrv_drained_end(target_bs);
         if (local_err) {
             error_report_err(local_err);
@@ -792,7 +809,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             if (s->in_flight >= MAX_IN_FLIGHT) {
                 trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
                                    s->in_flight);
-                mirror_wait_for_free_in_flight_slot(s);
+                mirror_wait_for_free_in_flight_slot(s, NULL);
                 continue;
             }
 
@@ -945,7 +962,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
         /* Do not start passive operations while there are active
          * writes in progress */
         while (s->in_active_write_counter) {
-            mirror_wait_for_any_operation(s, true);
+            mirror_wait_for_any_operation(s, NULL, true);
         }
 
         if (s->ret < 0) {
@@ -971,7 +988,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
             if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                 (cnt == 0 && s->in_flight > 0)) {
                 trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
-                mirror_wait_for_free_in_flight_slot(s);
+                mirror_wait_for_free_in_flight_slot(s, NULL);
                 continue;
             } else if (cnt != 0) {
                 delay_ns = mirror_iteration(s);
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index d41f5d049b..8cccc2c9f3 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -647,7 +647,6 @@ static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset,
     return bm_list;
 
 broken_dir:
-    ret = -EINVAL;
     error_setg(errp, "Broken bitmap directory");
 
 fail:
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 1947f13a2d..78c95dfa16 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1026,8 +1026,11 @@ err:
 void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
 {
     BDRVQcow2State *s = bs->opaque;
-    qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits,
-                        QCOW2_DISCARD_NEVER);
+    if (!has_data_file(bs)) {
+        qcow2_free_clusters(bs, m->alloc_offset,
+                            m->nb_clusters << s->cluster_bits,
+                            QCOW2_DISCARD_NEVER);
+    }
 }
 
 /*
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index c963bc8de1..7ef1c0e42a 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -889,6 +889,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
                                                 offset);
             if (table != NULL) {
                 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
+                old_table_index = -1;
                 qcow2_cache_discard(s->refcount_block_cache, table);
             }
 
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
index 8f5a0d1ebe..77bb578cdf 100644
--- a/block/qcow2-threads.c
+++ b/block/qcow2-threads.c
@@ -246,12 +246,15 @@ qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset,
         .len = len,
         .func = func,
     };
+    uint64_t sector_size;
 
-    assert(QEMU_IS_ALIGNED(guest_offset, BDRV_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(host_offset, BDRV_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(len, BDRV_SECTOR_SIZE));
     assert(s->crypto);
 
+    sector_size = qcrypto_block_get_sector_size(s->crypto);
+    assert(QEMU_IS_ALIGNED(guest_offset, sector_size));
+    assert(QEMU_IS_ALIGNED(host_offset, sector_size));
+    assert(QEMU_IS_ALIGNED(len, sector_size));
+
     return len == 0 ? 0 : qcow2_co_process(bs, qcow2_encdec_pool_func, &arg);
 }
 
@@ -270,7 +273,8 @@ qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset,
  *        will be written to the underlying storage device at
  *        @host_offset
  *
- * @len - length of the buffer (must be a BDRV_SECTOR_SIZE multiple)
+ * @len - length of the buffer (must be a multiple of the encryption
+ *        sector size)
  *
  * Depending on the encryption method, @host_offset and/or @guest_offset
  * may be used for generating the initialization vector for
diff --git a/block/qcow2.c b/block/qcow2.c
index ef96606f8d..8dcee5efec 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2068,8 +2068,6 @@ qcow2_co_preadv_encrypted(BlockDriverState *bs,
         goto fail;
     }
 
-    assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
     if (qcow2_co_decrypt(bs,
                          file_cluster_offset + offset_into_cluster(s, offset),
                          offset, buf, bytes) < 0)
diff --git a/block/quorum.c b/block/quorum.c
index df68adcfaa..6d7a56bd93 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -796,17 +796,53 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
     return result;
 }
 
-static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
-                                               BlockDriverState *candidate)
+static bool quorum_recurse_can_replace(BlockDriverState *bs,
+                                       BlockDriverState *to_replace)
 {
     BDRVQuorumState *s = bs->opaque;
     int i;
 
     for (i = 0; i < s->num_children; i++) {
-        bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs,
-                                                     candidate);
-        if (perm) {
-            return true;
+        /*
+         * We have no idea whether our children show the same data as
+         * this node (@bs).  It is actually highly likely that
+         * @to_replace does not, because replacing a broken child is
+         * one of the main use cases here.
+         *
+         * We do know that the new BDS will match @bs, so replacing
+         * any of our children by it will be safe.  It cannot change
+         * the data this quorum node presents to its parents.
+         *
+         * However, replacing @to_replace by @bs in any of our
+         * children's chains may change visible data somewhere in
+         * there.  We therefore cannot recurse down those chains with
+         * bdrv_recurse_can_replace().
+         * (More formally, bdrv_recurse_can_replace() requires that
+         * @to_replace will be replaced by something matching the @bs
+         * passed to it.  We cannot guarantee that.)
+         *
+         * Thus, we can only check whether any of our immediate
+         * children matches @to_replace.
+         *
+         * (In the future, we might add a function to recurse down a
+         * chain that checks that nothing there cares about a change
+         * in data from the respective child in question.  For
+         * example, most filters do not care when their child's data
+         * suddenly changes, as long as their parents do not care.)
+         */
+        if (s->children[i]->bs == to_replace) {
+            /*
+             * We now have to ensure that there is no other parent
+             * that cares about replacing this child by a node with
+             * potentially different data.
+             * We do so by checking whether there are any other parents
+             * at all, which is stricter than necessary, but also very
+             * simple.  (We may decide to implement something more
+             * complex and permissive when there is an actual need for
+             * it.)
+             */
+            return QLIST_FIRST(&to_replace->parents) == s->children[i] &&
+                QLIST_NEXT(s->children[i], next_parent) == NULL;
         }
     }
 
@@ -1114,6 +1150,23 @@ static char *quorum_dirname(BlockDriverState *bs, Error **errp)
     return NULL;
 }
 
+static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c,
+                              const BdrvChildRole *role,
+                              BlockReopenQueue *reopen_queue,
+                              uint64_t perm, uint64_t shared,
+                              uint64_t *nperm, uint64_t *nshared)
+{
+    *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+
+    /*
+     * We cannot share RESIZE or WRITE, as this would make the
+     * children differ from each other.
+     */
+    *nshared = (shared & (BLK_PERM_CONSISTENT_READ |
+                          BLK_PERM_WRITE_UNCHANGED))
+             | DEFAULT_PERM_UNCHANGED;
+}
+
 static const char *const quorum_strong_runtime_opts[] = {
     QUORUM_OPT_VOTE_THRESHOLD,
     QUORUM_OPT_BLKVERIFY,
@@ -1143,10 +1196,9 @@ static BlockDriver bdrv_quorum = {
     .bdrv_add_child                     = quorum_add_child,
     .bdrv_del_child                     = quorum_del_child,
 
-    .bdrv_child_perm                    = bdrv_filter_default_perms,
+    .bdrv_child_perm                    = quorum_child_perm,
 
-    .is_filter                          = true,
-    .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
+    .bdrv_recurse_can_replace           = quorum_recurse_can_replace,
 
     .strong_runtime_opts                = quorum_strong_runtime_opts,
 };
diff --git a/block/replication.c b/block/replication.c
index 99532ce521..d6681b6c84 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -306,12 +306,6 @@ out:
     return ret;
 }
 
-static bool replication_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                    BlockDriverState *candidate)
-{
-    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
 static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
 {
     Error *local_err = NULL;
@@ -699,7 +693,6 @@ static BlockDriver bdrv_replication = {
     .bdrv_co_writev             = replication_co_writev,
 
     .is_filter                  = true,
-    .bdrv_recurse_is_first_non_filter = replication_recurse_is_first_non_filter,
 
     .has_variable_length        = true,
     .strong_runtime_opts        = replication_strong_runtime_opts,
diff --git a/block/throttle.c b/block/throttle.c
index 0349f42257..71f4bb0ad1 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -207,12 +207,6 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state)
     reopen_state->opaque = NULL;
 }
 
-static bool throttle_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                 BlockDriverState *candidate)
-{
-    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
 static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
 {
     ThrottleGroupMember *tgm = bs->opaque;
@@ -252,8 +246,6 @@ static BlockDriver bdrv_throttle = {
     .bdrv_co_pwrite_zeroes              =   throttle_co_pwrite_zeroes,
     .bdrv_co_pdiscard                   =   throttle_co_pdiscard,
 
-    .bdrv_recurse_is_first_non_filter   =   throttle_recurse_is_first_non_filter,
-
     .bdrv_attach_aio_context            =   throttle_attach_aio_context,
     .bdrv_detach_aio_context            =   throttle_detach_aio_context,
 
diff --git a/block/vvfat.c b/block/vvfat.c
index 019b8f1341..ab800c4887 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3124,17 +3124,10 @@ write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return ret;
 }
 
-static void write_target_close(BlockDriverState *bs) {
-    BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    bdrv_unref_child(s->bs, s->qcow);
-    g_free(s->qcow_filename);
-}
-
 static BlockDriver vvfat_write_target = {
     .format_name        = "vvfat_write_target",
     .instance_size      = sizeof(void*),
     .bdrv_co_pwritev    = write_target_commit,
-    .bdrv_close         = write_target_close,
 };
 
 static void vvfat_qcow_options(int *child_flags, QDict *child_options,