diff options
| author | Stefan Hajnoczi <stefanha@redhat.com> | 2025-05-15 13:42:09 -0400 |
|---|---|---|
| committer | Stefan Hajnoczi <stefanha@redhat.com> | 2025-05-15 13:42:09 -0400 |
| commit | 599f2762ed8c86a6eea03b9f91d49d14a874a95c (patch) | |
| tree | 3833453b251e245498d4cd2337123ba450bdbe98 /block | |
| parent | 21596064081e8d0c0153f68714981c7f0e040973 (diff) | |
| parent | aff46b4bf556430dd3c12fa39a457f0487bb0053 (diff) | |
| download | focaccia-qemu-599f2762ed8c86a6eea03b9f91d49d14a874a95c.tar.gz focaccia-qemu-599f2762ed8c86a6eea03b9f91d49d14a874a95c.zip | |
Merge tag 'pull-nbd-2025-05-14' of https://repo.or.cz/qemu/ericb into staging
NBD patches for 2025-05-14 - Eric Blake: fix blockdev-mirror to no longer inflate sparse destination that already reads as zero # -----BEGIN PGP SIGNATURE----- # # iQEzBAABCAAdFiEEccLMIrHEYCkn0vOqp6FrSiUnQ2oFAmglUT0ACgkQp6FrSiUn # Q2ozXwgAqm4crl7r7b5jFHUS2nbJbdxhJR7GuW5oOlt9In4kXNL8T31SP5tFhfyq # inPl9wbLuvOHyi+NyMK9Wi3XYrHJ26U0PsmSBk2DFF9SvplV+ekUpFNhd6suf7nE # NK97y6Pv6H+KLlrUI8Z4bkRnZnSCIHYpGmS04ehXLodCaWjVOQ+xfXL8g7LprttU # 7xOLRtvW+vEV0TDs2WfjpWmzdqSGB2TVNB6u2a3tRkHGV9LHV1IyBJTs/7m5s/La # UwKt8joUYBw54k6ZeE2JFrhoOPE8W7AzWZJmKnlYopgh7TxWnwVhFPMDSF3/4ffr # ma1nVP6C1zyH4Wi7cw3GRjZktErIww== # =A3FA # -----END PGP SIGNATURE----- # gpg: Signature made Wed 14 May 2025 22:28:13 EDT # gpg: using RSA key 71C2CC22B1C4602927D2F3AAA7A16B4A2527436A # gpg: Good signature from "Eric Blake <eblake@redhat.com>" [full] # gpg: aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>" [full] # gpg: aka "[jpeg image of size 6874]" [full] # Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2 F3AA A7A1 6B4A 2527 436A * tag 'pull-nbd-2025-05-14' of https://repo.or.cz/qemu/ericb: mirror: Reduce I/O when destination is detect-zeroes:unmap tests: Add iotest mirror-sparse for recent patches iotests/common.rc: add disk_usage function mirror: Skip writing zeroes when target is already zero mirror: Skip pre-zeroing destination if it is already zero mirror: Drop redundant zero_target parameter mirror: Allow QMP override to declare target already zero mirror: Pass full sync mode rather than bool to internals mirror: Minor refactoring iotests: Improve iotest 194 to mirror data block: Add new bdrv_co_is_all_zeroes() function block: Let bdrv_co_is_zero_fast consolidate adjacent extents file-posix, gluster: Handle zero block status hint better block: Expand block status mode from bool to flags Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'block')
| -rw-r--r-- | block/blkdebug.c | 6 | ||||
| -rw-r--r-- | block/copy-before-write.c | 4 | ||||
| -rw-r--r-- | block/coroutines.h | 4 | ||||
| -rw-r--r-- | block/file-posix.c | 5 | ||||
| -rw-r--r-- | block/gluster.c | 4 | ||||
| -rw-r--r-- | block/io.c | 126 | ||||
| -rw-r--r-- | block/iscsi.c | 6 | ||||
| -rw-r--r-- | block/mirror.c | 190 | ||||
| -rw-r--r-- | block/nbd.c | 4 | ||||
| -rw-r--r-- | block/null.c | 6 | ||||
| -rw-r--r-- | block/parallels.c | 6 | ||||
| -rw-r--r-- | block/qcow.c | 2 | ||||
| -rw-r--r-- | block/qcow2.c | 6 | ||||
| -rw-r--r-- | block/qed.c | 6 | ||||
| -rw-r--r-- | block/quorum.c | 4 | ||||
| -rw-r--r-- | block/raw-format.c | 4 | ||||
| -rw-r--r-- | block/rbd.c | 6 | ||||
| -rw-r--r-- | block/snapshot-access.c | 4 | ||||
| -rw-r--r-- | block/vdi.c | 4 | ||||
| -rw-r--r-- | block/vmdk.c | 2 | ||||
| -rw-r--r-- | block/vpc.c | 2 | ||||
| -rw-r--r-- | block/vvfat.c | 6 |
22 files changed, 291 insertions, 116 deletions
diff --git a/block/blkdebug.c b/block/blkdebug.c index 1c1967f8e0..c54aee0c84 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -751,9 +751,9 @@ blkdebug_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) } static int coroutine_fn GRAPH_RDLOCK -blkdebug_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) +blkdebug_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { int err; diff --git a/block/copy-before-write.c b/block/copy-before-write.c index 00af0b18ac..36d5d3ed9b 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -291,8 +291,8 @@ cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes, } static int coroutine_fn GRAPH_RDLOCK -cbw_co_snapshot_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, +cbw_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { diff --git a/block/coroutines.h b/block/coroutines.h index 79e5efbf75..892646bb7a 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -47,7 +47,7 @@ int coroutine_fn GRAPH_RDLOCK bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, @@ -78,7 +78,7 @@ int co_wrapper_mixed_bdrv_rdlock bdrv_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, diff --git a/block/file-posix.c b/block/file-posix.c index ef52ed9169..ec95b74869 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -3273,7 +3273,7 @@ static int find_allocation(BlockDriverState *bs, off_t start, * well exceed it. */ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, @@ -3289,7 +3289,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, return ret; } - if (!want_zero) { + if (!(mode & BDRV_WANT_ZERO)) { + /* There is no backing file - all bytes are allocated in this file. */ *pnum = bytes; *map = offset; *file = bs; diff --git a/block/gluster.c b/block/gluster.c index 8712aa606a..89abd40f31 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1461,7 +1461,7 @@ exit: * (Based on raw_co_block_status() from file-posix.c.) */ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, @@ -1478,7 +1478,7 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, return ret; } - if (!want_zero) { + if (!(mode & BDRV_WANT_ZERO)) { *pnum = bytes; *map = offset; *file = bs; diff --git a/block/io.c b/block/io.c index 6d98b0abb9..4fd7768f9c 100644 --- a/block/io.c +++ b/block/io.c @@ -38,10 +38,14 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "system/replay.h" +#include "qemu/units.h" /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) +/* Maximum read size for checking if data reads as zero, in bytes */ +#define MAX_ZERO_CHECK_BUFFER (128 * KiB) + static void coroutine_fn GRAPH_RDLOCK bdrv_parent_cb_resize(BlockDriverState *bs); @@ -2364,10 +2368,8 @@ int bdrv_flush_all(void) * Drivers not implementing the functionality are assumed to not support * backing files, hence all their sectors are reported as allocated. * - * If 'want_zero' is true, the caller is querying for mapping - * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and - * _ZERO where possible; otherwise, the result favors larger 'pnum', - * with a focus on accurate BDRV_BLOCK_ALLOCATED. + * 'mode' serves as a hint as to which results are favored; see the + * BDRV_WANT_* macros for details. * * If 'offset' is beyond the end of the disk image the return value is * BDRV_BLOCK_EOF and 'pnum' is set to 0. @@ -2387,7 +2389,7 @@ int bdrv_flush_all(void) * set to the host mapping and BDS corresponding to the guest offset. */ static int coroutine_fn GRAPH_RDLOCK -bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, +bdrv_co_do_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -2476,7 +2478,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, local_file = bs; local_map = aligned_offset; } else { - ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, + ret = bs->drv->bdrv_co_block_status(bs, mode, aligned_offset, aligned_bytes, pnum, &local_map, &local_file); @@ -2488,10 +2490,10 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, * the cache requires an RCU update, so double check here to avoid * such an update if possible. * - * Check want_zero, because we only want to update the cache when we + * Check mode, because we only want to update the cache when we * have accurate information about what is zero and what is data. */ - if (want_zero && + if (mode == BDRV_WANT_PRECISE && ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && QLIST_EMPTY(&bs->children)) { @@ -2548,7 +2550,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file); - ret = bdrv_co_do_block_status(local_file, want_zero, local_map, + ret = bdrv_co_do_block_status(local_file, mode, local_map, *pnum, pnum, &local_map, &local_file); goto out; } @@ -2560,7 +2562,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, if (!cow_bs) { ret |= BDRV_BLOCK_ZERO; - } else if (want_zero) { + } else if (mode == BDRV_WANT_PRECISE) { int64_t size2 = bdrv_co_getlength(cow_bs); if (size2 >= 0 && offset >= size2) { @@ -2569,14 +2571,14 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, } } - if (want_zero && ret & BDRV_BLOCK_RECURSE && + if (mode == BDRV_WANT_PRECISE && ret & BDRV_BLOCK_RECURSE && local_file && local_file != bs && (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && (ret & BDRV_BLOCK_OFFSET_VALID)) { int64_t file_pnum; int ret2; - ret2 = bdrv_co_do_block_status(local_file, want_zero, local_map, + ret2 = bdrv_co_do_block_status(local_file, mode, local_map, *pnum, &file_pnum, NULL, NULL); if (ret2 >= 0) { /* Ignore errors. This is just providing extra information, it @@ -2627,7 +2629,7 @@ int coroutine_fn bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, @@ -2654,7 +2656,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, return 0; } - ret = bdrv_co_do_block_status(bs, want_zero, offset, bytes, pnum, + ret = bdrv_co_do_block_status(bs, mode, offset, bytes, pnum, map, file); ++*depth; if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) { @@ -2671,7 +2673,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base; p = bdrv_filter_or_cow_bs(p)) { - ret = bdrv_co_do_block_status(p, want_zero, offset, bytes, pnum, + ret = bdrv_co_do_block_status(p, mode, offset, bytes, pnum, map, file); ++*depth; if (ret < 0) { @@ -2734,7 +2736,8 @@ int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState **file) { IO_CODE(); - return bdrv_co_common_block_status_above(bs, base, false, true, offset, + return bdrv_co_common_block_status_above(bs, base, false, + BDRV_WANT_PRECISE, offset, bytes, pnum, map, file, NULL); } @@ -2752,27 +2755,89 @@ int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, int64_t offset, * by @offset and @bytes is known to read as zeroes. * Return 1 if that is the case, 0 otherwise and -errno on error. * This test is meant to be fast rather than accurate so returning 0 - * does not guarantee non-zero data. + * does not guarantee non-zero data; but a return of 1 is reliable. */ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes) { int ret; - int64_t pnum = bytes; + int64_t pnum; IO_CODE(); - if (!bytes) { - return 1; + while (bytes) { + ret = bdrv_co_common_block_status_above(bs, NULL, false, + BDRV_WANT_ZERO, offset, bytes, + &pnum, NULL, NULL, NULL); + + if (ret < 0) { + return ret; + } + if (!(ret & BDRV_BLOCK_ZERO)) { + return 0; + } + offset += pnum; + bytes -= pnum; } - ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset, - bytes, &pnum, NULL, NULL, NULL); + return 1; +} +/* + * Check @bs (and its backing chain) to see if the entire image is known + * to read as zeroes. + * Return 1 if that is the case, 0 otherwise and -errno on error. + * This test is meant to be fast rather than accurate so returning 0 + * does not guarantee non-zero data; however, a return of 1 is reliable, + * and this function can report 1 in more cases than bdrv_co_is_zero_fast. + */ +int coroutine_fn bdrv_co_is_all_zeroes(BlockDriverState *bs) +{ + int ret; + int64_t pnum, bytes; + char *buf; + QEMUIOVector local_qiov; + IO_CODE(); + + bytes = bdrv_co_getlength(bs); + if (bytes < 0) { + return bytes; + } + + /* First probe - see if the entire image reads as zero */ + ret = bdrv_co_common_block_status_above(bs, NULL, false, BDRV_WANT_ZERO, + 0, bytes, &pnum, NULL, NULL, + NULL); if (ret < 0) { return ret; } + if (ret & BDRV_BLOCK_ZERO) { + return bdrv_co_is_zero_fast(bs, pnum, bytes - pnum); + } - return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO); + /* + * Because of the way 'blockdev-create' works, raw files tend to + * be created with a non-sparse region at the front to make + * alignment probing easier. If the block starts with only a + * small allocated region, it is still worth the effort to see if + * the rest of the image is still sparse, coupled with manually + * reading the first region to see if it reads zero after all. + */ + if (pnum > MAX_ZERO_CHECK_BUFFER) { + return 0; + } + ret = bdrv_co_is_zero_fast(bs, pnum, bytes - pnum); + if (ret <= 0) { + return ret; + } + /* Only the head of the image is unknown, and it's small. Read it. */ + buf = qemu_blockalign(bs, pnum); + qemu_iovec_init_buf(&local_qiov, buf, pnum); + ret = bdrv_driver_preadv(bs, 0, pnum, &local_qiov, 0, 0); + if (ret >= 0) { + ret = buffer_is_zero(buf, pnum); + } + qemu_vfree(buf); + return ret; } int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, @@ -2782,9 +2847,9 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, int64_t dummy; IO_CODE(); - ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset, - bytes, pnum ? pnum : &dummy, NULL, - NULL, NULL); + ret = bdrv_co_common_block_status_above(bs, bs, true, BDRV_WANT_ALLOCATED, + offset, bytes, pnum ? pnum : &dummy, + NULL, NULL, NULL); if (ret < 0) { return ret; } @@ -2817,7 +2882,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *bs, int ret; IO_CODE(); - ret = bdrv_co_common_block_status_above(bs, base, include_base, false, + ret = bdrv_co_common_block_status_above(bs, base, include_base, + BDRV_WANT_ALLOCATED, offset, bytes, pnum, NULL, NULL, &depth); if (ret < 0) { @@ -3698,8 +3764,8 @@ bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes, } int coroutine_fn -bdrv_co_snapshot_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, +bdrv_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -3717,7 +3783,7 @@ bdrv_co_snapshot_block_status(BlockDriverState *bs, } bdrv_inc_in_flight(bs); - ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes, + ret = drv->bdrv_co_snapshot_block_status(bs, mode, offset, bytes, pnum, map, file); bdrv_dec_in_flight(bs); diff --git a/block/iscsi.c b/block/iscsi.c index 2f0f4dac09..15b96ee880 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -694,9 +694,9 @@ out_unlock: static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { IscsiLun *iscsilun = bs->opaque; diff --git a/block/mirror.c b/block/mirror.c index a53582f17b..c2c5099c95 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -51,10 +51,10 @@ typedef struct MirrorBlockJob { BlockDriverState *to_replace; /* Used to block operations on the drive-mirror-replace target */ Error *replace_blocker; - bool is_none_mode; + MirrorSyncMode sync_mode; BlockMirrorBackingMode backing_mode; - /* Whether the target image requires explicit zero-initialization */ - bool zero_target; + /* Whether the target should be assumed to be already zero initialized */ + bool target_is_zero; /* * To be accesssed with atomics. Written only under the BQL (required by the * current implementation of mirror_change()). @@ -73,6 +73,7 @@ typedef struct MirrorBlockJob { size_t buf_size; int64_t bdev_length; unsigned long *cow_bitmap; + unsigned long *zero_bitmap; BdrvDirtyBitmap *dirty_bitmap; BdrvDirtyBitmapIter *dbi; uint8_t *buf; @@ -108,9 +109,12 @@ struct MirrorOp { int64_t offset; uint64_t bytes; - /* The pointee is set by mirror_co_read(), mirror_co_zero(), and - * mirror_co_discard() before yielding for the first time */ + /* + * These pointers are set by mirror_co_read(), mirror_co_zero(), and + * mirror_co_discard() before yielding for the first time + */ int64_t *bytes_handled; + bool *io_skipped; bool is_pseudo_op; bool is_active_write; @@ -408,15 +412,34 @@ static void coroutine_fn mirror_co_read(void *opaque) static void coroutine_fn mirror_co_zero(void *opaque) { MirrorOp *op = opaque; - int ret; + bool write_needed = true; + int ret = 0; op->s->in_flight++; op->s->bytes_in_flight += op->bytes; *op->bytes_handled = op->bytes; op->is_in_flight = true; - ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, - op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); + if (op->s->zero_bitmap) { + unsigned long end = DIV_ROUND_UP(op->offset + op->bytes, + op->s->granularity); + assert(QEMU_IS_ALIGNED(op->offset, op->s->granularity)); + assert(QEMU_IS_ALIGNED(op->bytes, op->s->granularity) || + op->offset + op->bytes == op->s->bdev_length); + if (find_next_zero_bit(op->s->zero_bitmap, end, + op->offset / op->s->granularity) == end) { + write_needed = false; + *op->io_skipped = true; + } + } + if (write_needed) { + ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, + op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); + } + if (ret >= 0 && op->s->zero_bitmap) { + bitmap_set(op->s->zero_bitmap, op->offset / op->s->granularity, + DIV_ROUND_UP(op->bytes, op->s->granularity)); + } mirror_write_complete(op, ret); } @@ -435,29 +458,43 @@ static void coroutine_fn mirror_co_discard(void *opaque) } static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, - unsigned bytes, MirrorMethod mirror_method) + unsigned bytes, MirrorMethod mirror_method, + bool *io_skipped) { MirrorOp *op; Coroutine *co; int64_t bytes_handled = -1; + assert(QEMU_IS_ALIGNED(offset, s->granularity)); + assert(QEMU_IS_ALIGNED(bytes, s->granularity) || + offset + bytes == s->bdev_length); op = g_new(MirrorOp, 1); *op = (MirrorOp){ .s = s, .offset = offset, .bytes = bytes, .bytes_handled = &bytes_handled, + .io_skipped = io_skipped, }; qemu_co_queue_init(&op->waiting_requests); switch (mirror_method) { case MIRROR_METHOD_COPY: + if (s->zero_bitmap) { + bitmap_clear(s->zero_bitmap, offset / s->granularity, + DIV_ROUND_UP(bytes, s->granularity)); + } co = qemu_coroutine_create(mirror_co_read, op); break; case MIRROR_METHOD_ZERO: + /* s->zero_bitmap handled in mirror_co_zero */ co = qemu_coroutine_create(mirror_co_zero, op); break; case MIRROR_METHOD_DISCARD: + if (s->zero_bitmap) { + bitmap_clear(s->zero_bitmap, offset / s->granularity, + DIV_ROUND_UP(bytes, s->granularity)); + } co = qemu_coroutine_create(mirror_co_discard, op); break; default: @@ -568,6 +605,7 @@ static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) int ret = -1; int64_t io_bytes; int64_t io_bytes_acct; + bool io_skipped = false; MirrorMethod mirror_method = MIRROR_METHOD_COPY; assert(!(offset % s->granularity)); @@ -611,8 +649,10 @@ static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) } io_bytes = mirror_clip_bytes(s, offset, io_bytes); - io_bytes = mirror_perform(s, offset, io_bytes, mirror_method); - if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) { + io_bytes = mirror_perform(s, offset, io_bytes, mirror_method, + &io_skipped); + if (io_skipped || + (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok)) { io_bytes_acct = 0; } else { io_bytes_acct = io_bytes; @@ -723,9 +763,10 @@ static int mirror_exit_common(Job *job) &error_abort); if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { - BlockDriverState *backing = s->is_none_mode ? src : s->base; + BlockDriverState *backing; BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs); + backing = s->sync_mode == MIRROR_SYNC_MODE_NONE ? src : s->base; if (bdrv_cow_bs(unfiltered_target) != backing) { bdrv_set_backing_hd(unfiltered_target, backing, &local_err); if (local_err) { @@ -841,15 +882,54 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) int64_t offset; BlockDriverState *bs; BlockDriverState *target_bs = blk_bs(s->target); - int ret = -1; + int ret = -EIO; int64_t count; + bool punch_holes = + target_bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && + bdrv_can_write_zeroes_with_unmap(target_bs); + int64_t bitmap_length = DIV_ROUND_UP(s->bdev_length, s->granularity); + /* Determine if the image is already zero, regardless of sync mode. */ + s->zero_bitmap = bitmap_new(bitmap_length); bdrv_graph_co_rdlock(); bs = s->mirror_top_bs->backing->bs; + if (s->target_is_zero) { + ret = 1; + } else { + ret = bdrv_co_is_all_zeroes(target_bs); + } bdrv_graph_co_rdunlock(); - if (s->zero_target) { - if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { + /* Determine if a pre-zeroing pass is necessary. */ + if (ret < 0) { + return ret; + } else if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { + /* + * In TOP mode, there is no benefit to a pre-zeroing pass, but + * the zero bitmap can be set if the destination already reads + * as zero and we are not punching holes. + */ + if (ret > 0 && !punch_holes) { + bitmap_set(s->zero_bitmap, 0, bitmap_length); + } + } else if (ret == 0 || punch_holes) { + /* + * Here, we are in FULL mode; our goal is to avoid writing + * zeroes if the destination already reads as zero, except + * when we are trying to punch holes. This is possible if + * zeroing happened externally (ret > 0) or if we have a fast + * way to pre-zero the image (the dirty bitmap will be + * populated later by the non-zero portions, the same as for + * TOP mode). If pre-zeroing is not fast, or we need to visit + * the entire image in order to punch holes even in the + * non-allocated regions of the source, then just mark the + * entire image dirty and leave the zero bitmap clear at this + * point in time. Otherwise, it can be faster to pre-zero the + * image now, even if we re-write the allocated portions of + * the disk later, and the pre-zero pass will populate the + * zero bitmap. + */ + if (!bdrv_can_write_zeroes_with_unmap(target_bs) || punch_holes) { bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); return 0; } @@ -858,6 +938,7 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) for (offset = 0; offset < s->bdev_length; ) { int bytes = MIN(s->bdev_length - offset, QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); + bool ignored; mirror_throttle(s); @@ -873,12 +954,15 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) continue; } - mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO); + mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO, &ignored); offset += bytes; } mirror_wait_for_all_io(s); s->initial_zeroing_ongoing = false; + } else { + /* In FULL mode, and image already reads as zero. */ + bitmap_set(s->zero_bitmap, 0, bitmap_length); } /* First part, loop on the sectors and initialize the dirty bitmap. */ @@ -1020,7 +1104,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) mirror_free_init(s); s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - if (!s->is_none_mode) { + if (s->sync_mode != MIRROR_SYNC_MODE_NONE) { ret = mirror_dirty_init(s); if (ret < 0 || job_is_cancelled(&s->common.job)) { goto immediate_exit; @@ -1163,6 +1247,7 @@ immediate_exit: assert(s->in_flight == 0); qemu_vfree(s->buf); g_free(s->cow_bitmap); + g_free(s->zero_bitmap); g_free(s->in_flight_bitmap); bdrv_dirty_iter_free(s->dbi); @@ -1341,7 +1426,8 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, { int ret; size_t qiov_offset = 0; - int64_t bitmap_offset, bitmap_end; + int64_t dirty_bitmap_offset, dirty_bitmap_end; + int64_t zero_bitmap_offset, zero_bitmap_end; if (!QEMU_IS_ALIGNED(offset, job->granularity) && bdrv_dirty_bitmap_get(job->dirty_bitmap, offset)) @@ -1385,31 +1471,54 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, } /* - * Tails are either clean or shrunk, so for bitmap resetting - * we safely align the range down. + * Tails are either clean or shrunk, so for dirty bitmap resetting + * we safely align the range narrower. But for zero bitmap, round + * range wider for checking or clearing, and narrower for setting. */ - bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); - bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); - if (bitmap_offset < bitmap_end) { - bdrv_reset_dirty_bitmap(job->dirty_bitmap, bitmap_offset, - bitmap_end - bitmap_offset); + dirty_bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); + dirty_bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); + if (dirty_bitmap_offset < dirty_bitmap_end) { + bdrv_reset_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset, + dirty_bitmap_end - dirty_bitmap_offset); } + zero_bitmap_offset = offset / job->granularity; + zero_bitmap_end = DIV_ROUND_UP(offset + bytes, job->granularity); job_progress_increase_remaining(&job->common.job, bytes); job->active_write_bytes_in_flight += bytes; switch (method) { case MIRROR_METHOD_COPY: + if (job->zero_bitmap) { + bitmap_clear(job->zero_bitmap, zero_bitmap_offset, + zero_bitmap_end - zero_bitmap_offset); + } ret = blk_co_pwritev_part(job->target, offset, bytes, qiov, qiov_offset, flags); break; case MIRROR_METHOD_ZERO: + if (job->zero_bitmap) { + if (find_next_zero_bit(job->zero_bitmap, zero_bitmap_end, + zero_bitmap_offset) == zero_bitmap_end) { + ret = 0; + break; + } + } assert(!qiov); ret = blk_co_pwrite_zeroes(job->target, offset, bytes, flags); + if (job->zero_bitmap && ret >= 0) { + bitmap_set(job->zero_bitmap, dirty_bitmap_offset / job->granularity, + (dirty_bitmap_end - dirty_bitmap_offset) / + job->granularity); + } break; case MIRROR_METHOD_DISCARD: + if (job->zero_bitmap) { + bitmap_clear(job->zero_bitmap, zero_bitmap_offset, + zero_bitmap_end - zero_bitmap_offset); + } assert(!qiov); ret = blk_co_pdiscard(job->target, offset, bytes); break; @@ -1430,10 +1539,10 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, * at function start, and they must be still dirty, as we've locked * the region for in-flight op. */ - bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); - bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); - bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset, - bitmap_end - bitmap_offset); + dirty_bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); + dirty_bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); + bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset, + dirty_bitmap_end - dirty_bitmap_offset); qatomic_set(&job->actively_synced, false); action = mirror_error_action(job, false, -ret); @@ -1711,15 +1820,16 @@ static BlockJob *mirror_start_job( int creation_flags, BlockDriverState *target, const char *replaces, int64_t speed, uint32_t granularity, int64_t buf_size, + MirrorSyncMode sync_mode, BlockMirrorBackingMode backing_mode, - bool zero_target, + bool target_is_zero, BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, BlockCompletionFunc *cb, void *opaque, const BlockJobDriver *driver, - bool is_none_mode, BlockDriverState *base, + BlockDriverState *base, bool auto_complete, const char *filter_node_name, bool is_mirror, MirrorCopyMode copy_mode, bool base_ro, @@ -1878,9 +1988,9 @@ static BlockJob *mirror_start_job( s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; s->on_target_error = on_target_error; - s->is_none_mode = is_none_mode; + s->sync_mode = sync_mode; s->backing_mode = backing_mode; - s->zero_target = zero_target; + s->target_is_zero = target_is_zero; qatomic_set(&s->copy_mode, copy_mode); s->base = base; s->base_overlay = bdrv_find_overlay(bs, base); @@ -2009,13 +2119,12 @@ void mirror_start(const char *job_id, BlockDriverState *bs, int creation_flags, int64_t speed, uint32_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, - bool zero_target, + bool target_is_zero, BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, const char *filter_node_name, MirrorCopyMode copy_mode, Error **errp) { - bool is_none_mode; BlockDriverState *base; GLOBAL_STATE_CODE(); @@ -2028,14 +2137,13 @@ void mirror_start(const char *job_id, BlockDriverState *bs, } bdrv_graph_rdlock_main_loop(); - is_none_mode = mode == MIRROR_SYNC_MODE_NONE; base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL; bdrv_graph_rdunlock_main_loop(); mirror_start_job(job_id, bs, creation_flags, target, replaces, - speed, granularity, buf_size, backing_mode, zero_target, - on_source_error, on_target_error, unmap, NULL, NULL, - &mirror_job_driver, is_none_mode, base, false, + speed, granularity, buf_size, mode, backing_mode, + target_is_zero, on_source_error, on_target_error, unmap, + NULL, NULL, &mirror_job_driver, base, false, filter_node_name, true, copy_mode, false, errp); } @@ -2061,9 +2169,9 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, job = mirror_start_job( job_id, bs, creation_flags, base, NULL, speed, 0, 0, - MIRROR_LEAVE_BACKING_CHAIN, false, + MIRROR_SYNC_MODE_TOP, MIRROR_LEAVE_BACKING_CHAIN, false, on_error, on_error, true, cb, opaque, - &commit_active_job_driver, false, base, auto_complete, + &commit_active_job_driver, base, auto_complete, filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, base_read_only, errp); if (!job) { diff --git a/block/nbd.c b/block/nbd.c index 887841bc81..d5a2b21c6d 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -1397,8 +1397,8 @@ nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) } static int coroutine_fn GRAPH_RDLOCK nbd_client_co_block_status( - BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, BlockDriverState **file) + BlockDriverState *bs, unsigned int mode, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { int ret, request_ret; NBDExtent64 extent = { 0 }; diff --git a/block/null.c b/block/null.c index dc0b1fdbd9..4e448d593d 100644 --- a/block/null.c +++ b/block/null.c @@ -227,9 +227,9 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state, } static int coroutine_fn null_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVNullState *s = bs->opaque; diff --git a/block/parallels.c b/block/parallels.c index 347ca127f3..3a375e2a8a 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -416,9 +416,9 @@ parallels_co_flush_to_os(BlockDriverState *bs) } static int coroutine_fn GRAPH_RDLOCK -parallels_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) +parallels_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { BDRVParallelsState *s = bs->opaque; int count; diff --git a/block/qcow.c b/block/qcow.c index da8ad4d243..8a3e7591a9 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -530,7 +530,7 @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate, } static int coroutine_fn GRAPH_RDLOCK -qcow_co_block_status(BlockDriverState *bs, bool want_zero, +qcow_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { diff --git a/block/qcow2.c b/block/qcow2.c index 7774e7f090..66fba89b41 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2141,9 +2141,9 @@ static void qcow2_join_options(QDict *options, QDict *old_options) } static int coroutine_fn GRAPH_RDLOCK -qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t count, int64_t *pnum, int64_t *map, - BlockDriverState **file) +qcow2_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t count, int64_t *pnum, + int64_t *map, BlockDriverState **file) { BDRVQcow2State *s = bs->opaque; uint64_t host_offset; diff --git a/block/qed.c b/block/qed.c index ac24449ffb..4a36fb3929 100644 --- a/block/qed.c +++ b/block/qed.c @@ -833,9 +833,9 @@ fail: } static int coroutine_fn GRAPH_RDLOCK -bdrv_qed_co_block_status(BlockDriverState *bs, bool want_zero, int64_t pos, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) +bdrv_qed_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t pos, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { BDRVQEDState *s = bs->opaque; size_t len = MIN(bytes, SIZE_MAX); diff --git a/block/quorum.c b/block/quorum.c index 30747a6df9..ed8ce801ee 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1226,7 +1226,7 @@ static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c, * region contains zeroes, and BDRV_BLOCK_DATA otherwise. */ static int coroutine_fn GRAPH_RDLOCK -quorum_co_block_status(BlockDriverState *bs, bool want_zero, +quorum_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t count, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -1238,7 +1238,7 @@ quorum_co_block_status(BlockDriverState *bs, bool want_zero, for (i = 0; i < s->num_children; i++) { int64_t bytes; ret = bdrv_co_common_block_status_above(s->children[i]->bs, NULL, false, - want_zero, offset, count, + mode, offset, count, &bytes, NULL, NULL, NULL); if (ret < 0) { quorum_report_bad(QUORUM_OP_TYPE_READ, offset, count, diff --git a/block/raw-format.c b/block/raw-format.c index e08526e2ec..df16ac1ea2 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -283,8 +283,8 @@ fail: } static int coroutine_fn GRAPH_RDLOCK -raw_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, +raw_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVRawState *s = bs->opaque; diff --git a/block/rbd.c b/block/rbd.c index 7446e66659..951cd63f9a 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1503,9 +1503,9 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, } static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVRBDState *s = bs->opaque; diff --git a/block/snapshot-access.c b/block/snapshot-access.c index 71ac83c01f..17ed2402db 100644 --- a/block/snapshot-access.c +++ b/block/snapshot-access.c @@ -41,11 +41,11 @@ snapshot_access_co_preadv_part(BlockDriverState *bs, static int coroutine_fn GRAPH_RDLOCK snapshot_access_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { - return bdrv_co_snapshot_block_status(bs->file->bs, want_zero, offset, + return bdrv_co_snapshot_block_status(bs->file->bs, mode, offset, bytes, pnum, map, file); } diff --git a/block/vdi.c b/block/vdi.c index a2da6ecab0..3ddc62a569 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -523,8 +523,8 @@ static int vdi_reopen_prepare(BDRVReopenState *state, } static int coroutine_fn GRAPH_RDLOCK -vdi_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, +vdi_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVVdiState *s = (BDRVVdiState *)bs->opaque; diff --git a/block/vmdk.c b/block/vmdk.c index 2adec49912..9c7ab037e1 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1777,7 +1777,7 @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent, } static int coroutine_fn GRAPH_RDLOCK -vmdk_co_block_status(BlockDriverState *bs, bool want_zero, +vmdk_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { diff --git a/block/vpc.c b/block/vpc.c index 0309e319f6..801ff5793f 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -726,7 +726,7 @@ fail: } static int coroutine_fn GRAPH_RDLOCK -vpc_co_block_status(BlockDriverState *bs, bool want_zero, +vpc_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) diff --git a/block/vvfat.c b/block/vvfat.c index 91d69b3cc8..814796d918 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3134,9 +3134,9 @@ vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, } static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *n, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *n, int64_t *map, BlockDriverState **file) { *n = bytes; |