diff options
Diffstat (limited to '')
102 files changed, 2311 insertions, 714 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index ca0ffc02d4..57dddcc80d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2068,6 +2068,7 @@ F: hw/pci-bridge/* F: qapi/pci.json F: docs/pci* F: docs/specs/*pci* +F: docs/system/sriov.rst PCIE DOE M: Huai-Cheng Kuo <hchkuo@avery-design.com.tw> diff --git a/backends/vhost-user.c b/backends/vhost-user.c index 94274a619d..42845329e7 100644 --- a/backends/vhost-user.c +++ b/backends/vhost-user.c @@ -97,30 +97,28 @@ err_host_notifiers: vhost_dev_disable_notifiers(&b->dev, b->vdev); } -void +int vhost_user_backend_stop(VhostUserBackend *b) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(b->vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret = 0; + int ret; if (!b->started) { - return; + return 0; } - vhost_dev_stop(&b->dev, b->vdev, true); + ret = vhost_dev_stop(&b->dev, b->vdev, true); - if (k->set_guest_notifiers) { - ret = k->set_guest_notifiers(qbus->parent, - b->dev.nvqs, false); - if (ret < 0) { - error_report("vhost guest notifier cleanup failed: %d", ret); - } + if (k->set_guest_notifiers && + k->set_guest_notifiers(qbus->parent, b->dev.nvqs, false) < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return -1; } - assert(ret >= 0); vhost_dev_disable_notifiers(&b->dev, b->vdev); b->started = false; + return ret; } static void set_chardev(Object *obj, const char *value, Error **errp) diff --git a/block/blkdebug.c b/block/blkdebug.c index 1c1967f8e0..c54aee0c84 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -751,9 +751,9 @@ blkdebug_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) } static int coroutine_fn GRAPH_RDLOCK -blkdebug_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) +blkdebug_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { int err; diff --git a/block/copy-before-write.c b/block/copy-before-write.c index 00af0b18ac..36d5d3ed9b 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -291,8 +291,8 @@ cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes, } static int coroutine_fn GRAPH_RDLOCK -cbw_co_snapshot_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, +cbw_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { diff --git a/block/coroutines.h b/block/coroutines.h index 79e5efbf75..892646bb7a 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -47,7 +47,7 @@ int coroutine_fn GRAPH_RDLOCK bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, @@ -78,7 +78,7 @@ int co_wrapper_mixed_bdrv_rdlock bdrv_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, diff --git a/block/file-posix.c b/block/file-posix.c index ef52ed9169..ec95b74869 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -3273,7 +3273,7 @@ static int find_allocation(BlockDriverState *bs, off_t start, * well exceed it. */ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, @@ -3289,7 +3289,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, return ret; } - if (!want_zero) { + if (!(mode & BDRV_WANT_ZERO)) { + /* There is no backing file - all bytes are allocated in this file. */ *pnum = bytes; *map = offset; *file = bs; diff --git a/block/gluster.c b/block/gluster.c index 8712aa606a..89abd40f31 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1461,7 +1461,7 @@ exit: * (Based on raw_co_block_status() from file-posix.c.) */ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, @@ -1478,7 +1478,7 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, return ret; } - if (!want_zero) { + if (!(mode & BDRV_WANT_ZERO)) { *pnum = bytes; *map = offset; *file = bs; diff --git a/block/io.c b/block/io.c index 6d98b0abb9..4fd7768f9c 100644 --- a/block/io.c +++ b/block/io.c @@ -38,10 +38,14 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "system/replay.h" +#include "qemu/units.h" /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) +/* Maximum read size for checking if data reads as zero, in bytes */ +#define MAX_ZERO_CHECK_BUFFER (128 * KiB) + static void coroutine_fn GRAPH_RDLOCK bdrv_parent_cb_resize(BlockDriverState *bs); @@ -2364,10 +2368,8 @@ int bdrv_flush_all(void) * Drivers not implementing the functionality are assumed to not support * backing files, hence all their sectors are reported as allocated. * - * If 'want_zero' is true, the caller is querying for mapping - * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and - * _ZERO where possible; otherwise, the result favors larger 'pnum', - * with a focus on accurate BDRV_BLOCK_ALLOCATED. + * 'mode' serves as a hint as to which results are favored; see the + * BDRV_WANT_* macros for details. * * If 'offset' is beyond the end of the disk image the return value is * BDRV_BLOCK_EOF and 'pnum' is set to 0. @@ -2387,7 +2389,7 @@ int bdrv_flush_all(void) * set to the host mapping and BDS corresponding to the guest offset. */ static int coroutine_fn GRAPH_RDLOCK -bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, +bdrv_co_do_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -2476,7 +2478,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, local_file = bs; local_map = aligned_offset; } else { - ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, + ret = bs->drv->bdrv_co_block_status(bs, mode, aligned_offset, aligned_bytes, pnum, &local_map, &local_file); @@ -2488,10 +2490,10 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, * the cache requires an RCU update, so double check here to avoid * such an update if possible. * - * Check want_zero, because we only want to update the cache when we + * Check mode, because we only want to update the cache when we * have accurate information about what is zero and what is data. */ - if (want_zero && + if (mode == BDRV_WANT_PRECISE && ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && QLIST_EMPTY(&bs->children)) { @@ -2548,7 +2550,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file); - ret = bdrv_co_do_block_status(local_file, want_zero, local_map, + ret = bdrv_co_do_block_status(local_file, mode, local_map, *pnum, pnum, &local_map, &local_file); goto out; } @@ -2560,7 +2562,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, if (!cow_bs) { ret |= BDRV_BLOCK_ZERO; - } else if (want_zero) { + } else if (mode == BDRV_WANT_PRECISE) { int64_t size2 = bdrv_co_getlength(cow_bs); if (size2 >= 0 && offset >= size2) { @@ -2569,14 +2571,14 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, } } - if (want_zero && ret & BDRV_BLOCK_RECURSE && + if (mode == BDRV_WANT_PRECISE && ret & BDRV_BLOCK_RECURSE && local_file && local_file != bs && (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && (ret & BDRV_BLOCK_OFFSET_VALID)) { int64_t file_pnum; int ret2; - ret2 = bdrv_co_do_block_status(local_file, want_zero, local_map, + ret2 = bdrv_co_do_block_status(local_file, mode, local_map, *pnum, &file_pnum, NULL, NULL); if (ret2 >= 0) { /* Ignore errors. This is just providing extra information, it @@ -2627,7 +2629,7 @@ int coroutine_fn bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, @@ -2654,7 +2656,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, return 0; } - ret = bdrv_co_do_block_status(bs, want_zero, offset, bytes, pnum, + ret = bdrv_co_do_block_status(bs, mode, offset, bytes, pnum, map, file); ++*depth; if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) { @@ -2671,7 +2673,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base; p = bdrv_filter_or_cow_bs(p)) { - ret = bdrv_co_do_block_status(p, want_zero, offset, bytes, pnum, + ret = bdrv_co_do_block_status(p, mode, offset, bytes, pnum, map, file); ++*depth; if (ret < 0) { @@ -2734,7 +2736,8 @@ int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState **file) { IO_CODE(); - return bdrv_co_common_block_status_above(bs, base, false, true, offset, + return bdrv_co_common_block_status_above(bs, base, false, + BDRV_WANT_PRECISE, offset, bytes, pnum, map, file, NULL); } @@ -2752,27 +2755,89 @@ int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, int64_t offset, * by @offset and @bytes is known to read as zeroes. * Return 1 if that is the case, 0 otherwise and -errno on error. * This test is meant to be fast rather than accurate so returning 0 - * does not guarantee non-zero data. + * does not guarantee non-zero data; but a return of 1 is reliable. */ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes) { int ret; - int64_t pnum = bytes; + int64_t pnum; IO_CODE(); - if (!bytes) { - return 1; + while (bytes) { + ret = bdrv_co_common_block_status_above(bs, NULL, false, + BDRV_WANT_ZERO, offset, bytes, + &pnum, NULL, NULL, NULL); + + if (ret < 0) { + return ret; + } + if (!(ret & BDRV_BLOCK_ZERO)) { + return 0; + } + offset += pnum; + bytes -= pnum; } - ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset, - bytes, &pnum, NULL, NULL, NULL); + return 1; +} +/* + * Check @bs (and its backing chain) to see if the entire image is known + * to read as zeroes. + * Return 1 if that is the case, 0 otherwise and -errno on error. + * This test is meant to be fast rather than accurate so returning 0 + * does not guarantee non-zero data; however, a return of 1 is reliable, + * and this function can report 1 in more cases than bdrv_co_is_zero_fast. + */ +int coroutine_fn bdrv_co_is_all_zeroes(BlockDriverState *bs) +{ + int ret; + int64_t pnum, bytes; + char *buf; + QEMUIOVector local_qiov; + IO_CODE(); + + bytes = bdrv_co_getlength(bs); + if (bytes < 0) { + return bytes; + } + + /* First probe - see if the entire image reads as zero */ + ret = bdrv_co_common_block_status_above(bs, NULL, false, BDRV_WANT_ZERO, + 0, bytes, &pnum, NULL, NULL, + NULL); if (ret < 0) { return ret; } + if (ret & BDRV_BLOCK_ZERO) { + return bdrv_co_is_zero_fast(bs, pnum, bytes - pnum); + } - return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO); + /* + * Because of the way 'blockdev-create' works, raw files tend to + * be created with a non-sparse region at the front to make + * alignment probing easier. If the block starts with only a + * small allocated region, it is still worth the effort to see if + * the rest of the image is still sparse, coupled with manually + * reading the first region to see if it reads zero after all. + */ + if (pnum > MAX_ZERO_CHECK_BUFFER) { + return 0; + } + ret = bdrv_co_is_zero_fast(bs, pnum, bytes - pnum); + if (ret <= 0) { + return ret; + } + /* Only the head of the image is unknown, and it's small. Read it. */ + buf = qemu_blockalign(bs, pnum); + qemu_iovec_init_buf(&local_qiov, buf, pnum); + ret = bdrv_driver_preadv(bs, 0, pnum, &local_qiov, 0, 0); + if (ret >= 0) { + ret = buffer_is_zero(buf, pnum); + } + qemu_vfree(buf); + return ret; } int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, @@ -2782,9 +2847,9 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, int64_t dummy; IO_CODE(); - ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset, - bytes, pnum ? pnum : &dummy, NULL, - NULL, NULL); + ret = bdrv_co_common_block_status_above(bs, bs, true, BDRV_WANT_ALLOCATED, + offset, bytes, pnum ? pnum : &dummy, + NULL, NULL, NULL); if (ret < 0) { return ret; } @@ -2817,7 +2882,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *bs, int ret; IO_CODE(); - ret = bdrv_co_common_block_status_above(bs, base, include_base, false, + ret = bdrv_co_common_block_status_above(bs, base, include_base, + BDRV_WANT_ALLOCATED, offset, bytes, pnum, NULL, NULL, &depth); if (ret < 0) { @@ -3698,8 +3764,8 @@ bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes, } int coroutine_fn -bdrv_co_snapshot_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, +bdrv_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -3717,7 +3783,7 @@ bdrv_co_snapshot_block_status(BlockDriverState *bs, } bdrv_inc_in_flight(bs); - ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes, + ret = drv->bdrv_co_snapshot_block_status(bs, mode, offset, bytes, pnum, map, file); bdrv_dec_in_flight(bs); diff --git a/block/iscsi.c b/block/iscsi.c index 2f0f4dac09..15b96ee880 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -694,9 +694,9 @@ out_unlock: static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { IscsiLun *iscsilun = bs->opaque; diff --git a/block/mirror.c b/block/mirror.c index a53582f17b..c2c5099c95 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -51,10 +51,10 @@ typedef struct MirrorBlockJob { BlockDriverState *to_replace; /* Used to block operations on the drive-mirror-replace target */ Error *replace_blocker; - bool is_none_mode; + MirrorSyncMode sync_mode; BlockMirrorBackingMode backing_mode; - /* Whether the target image requires explicit zero-initialization */ - bool zero_target; + /* Whether the target should be assumed to be already zero initialized */ + bool target_is_zero; /* * To be accesssed with atomics. Written only under the BQL (required by the * current implementation of mirror_change()). @@ -73,6 +73,7 @@ typedef struct MirrorBlockJob { size_t buf_size; int64_t bdev_length; unsigned long *cow_bitmap; + unsigned long *zero_bitmap; BdrvDirtyBitmap *dirty_bitmap; BdrvDirtyBitmapIter *dbi; uint8_t *buf; @@ -108,9 +109,12 @@ struct MirrorOp { int64_t offset; uint64_t bytes; - /* The pointee is set by mirror_co_read(), mirror_co_zero(), and - * mirror_co_discard() before yielding for the first time */ + /* + * These pointers are set by mirror_co_read(), mirror_co_zero(), and + * mirror_co_discard() before yielding for the first time + */ int64_t *bytes_handled; + bool *io_skipped; bool is_pseudo_op; bool is_active_write; @@ -408,15 +412,34 @@ static void coroutine_fn mirror_co_read(void *opaque) static void coroutine_fn mirror_co_zero(void *opaque) { MirrorOp *op = opaque; - int ret; + bool write_needed = true; + int ret = 0; op->s->in_flight++; op->s->bytes_in_flight += op->bytes; *op->bytes_handled = op->bytes; op->is_in_flight = true; - ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, - op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); + if (op->s->zero_bitmap) { + unsigned long end = DIV_ROUND_UP(op->offset + op->bytes, + op->s->granularity); + assert(QEMU_IS_ALIGNED(op->offset, op->s->granularity)); + assert(QEMU_IS_ALIGNED(op->bytes, op->s->granularity) || + op->offset + op->bytes == op->s->bdev_length); + if (find_next_zero_bit(op->s->zero_bitmap, end, + op->offset / op->s->granularity) == end) { + write_needed = false; + *op->io_skipped = true; + } + } + if (write_needed) { + ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, + op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); + } + if (ret >= 0 && op->s->zero_bitmap) { + bitmap_set(op->s->zero_bitmap, op->offset / op->s->granularity, + DIV_ROUND_UP(op->bytes, op->s->granularity)); + } mirror_write_complete(op, ret); } @@ -435,29 +458,43 @@ static void coroutine_fn mirror_co_discard(void *opaque) } static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, - unsigned bytes, MirrorMethod mirror_method) + unsigned bytes, MirrorMethod mirror_method, + bool *io_skipped) { MirrorOp *op; Coroutine *co; int64_t bytes_handled = -1; + assert(QEMU_IS_ALIGNED(offset, s->granularity)); + assert(QEMU_IS_ALIGNED(bytes, s->granularity) || + offset + bytes == s->bdev_length); op = g_new(MirrorOp, 1); *op = (MirrorOp){ .s = s, .offset = offset, .bytes = bytes, .bytes_handled = &bytes_handled, + .io_skipped = io_skipped, }; qemu_co_queue_init(&op->waiting_requests); switch (mirror_method) { case MIRROR_METHOD_COPY: + if (s->zero_bitmap) { + bitmap_clear(s->zero_bitmap, offset / s->granularity, + DIV_ROUND_UP(bytes, s->granularity)); + } co = qemu_coroutine_create(mirror_co_read, op); break; case MIRROR_METHOD_ZERO: + /* s->zero_bitmap handled in mirror_co_zero */ co = qemu_coroutine_create(mirror_co_zero, op); break; case MIRROR_METHOD_DISCARD: + if (s->zero_bitmap) { + bitmap_clear(s->zero_bitmap, offset / s->granularity, + DIV_ROUND_UP(bytes, s->granularity)); + } co = qemu_coroutine_create(mirror_co_discard, op); break; default: @@ -568,6 +605,7 @@ static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) int ret = -1; int64_t io_bytes; int64_t io_bytes_acct; + bool io_skipped = false; MirrorMethod mirror_method = MIRROR_METHOD_COPY; assert(!(offset % s->granularity)); @@ -611,8 +649,10 @@ static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) } io_bytes = mirror_clip_bytes(s, offset, io_bytes); - io_bytes = mirror_perform(s, offset, io_bytes, mirror_method); - if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) { + io_bytes = mirror_perform(s, offset, io_bytes, mirror_method, + &io_skipped); + if (io_skipped || + (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok)) { io_bytes_acct = 0; } else { io_bytes_acct = io_bytes; @@ -723,9 +763,10 @@ static int mirror_exit_common(Job *job) &error_abort); if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { - BlockDriverState *backing = s->is_none_mode ? src : s->base; + BlockDriverState *backing; BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs); + backing = s->sync_mode == MIRROR_SYNC_MODE_NONE ? src : s->base; if (bdrv_cow_bs(unfiltered_target) != backing) { bdrv_set_backing_hd(unfiltered_target, backing, &local_err); if (local_err) { @@ -841,15 +882,54 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) int64_t offset; BlockDriverState *bs; BlockDriverState *target_bs = blk_bs(s->target); - int ret = -1; + int ret = -EIO; int64_t count; + bool punch_holes = + target_bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && + bdrv_can_write_zeroes_with_unmap(target_bs); + int64_t bitmap_length = DIV_ROUND_UP(s->bdev_length, s->granularity); + /* Determine if the image is already zero, regardless of sync mode. */ + s->zero_bitmap = bitmap_new(bitmap_length); bdrv_graph_co_rdlock(); bs = s->mirror_top_bs->backing->bs; + if (s->target_is_zero) { + ret = 1; + } else { + ret = bdrv_co_is_all_zeroes(target_bs); + } bdrv_graph_co_rdunlock(); - if (s->zero_target) { - if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { + /* Determine if a pre-zeroing pass is necessary. */ + if (ret < 0) { + return ret; + } else if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { + /* + * In TOP mode, there is no benefit to a pre-zeroing pass, but + * the zero bitmap can be set if the destination already reads + * as zero and we are not punching holes. + */ + if (ret > 0 && !punch_holes) { + bitmap_set(s->zero_bitmap, 0, bitmap_length); + } + } else if (ret == 0 || punch_holes) { + /* + * Here, we are in FULL mode; our goal is to avoid writing + * zeroes if the destination already reads as zero, except + * when we are trying to punch holes. This is possible if + * zeroing happened externally (ret > 0) or if we have a fast + * way to pre-zero the image (the dirty bitmap will be + * populated later by the non-zero portions, the same as for + * TOP mode). If pre-zeroing is not fast, or we need to visit + * the entire image in order to punch holes even in the + * non-allocated regions of the source, then just mark the + * entire image dirty and leave the zero bitmap clear at this + * point in time. Otherwise, it can be faster to pre-zero the + * image now, even if we re-write the allocated portions of + * the disk later, and the pre-zero pass will populate the + * zero bitmap. + */ + if (!bdrv_can_write_zeroes_with_unmap(target_bs) || punch_holes) { bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); return 0; } @@ -858,6 +938,7 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) for (offset = 0; offset < s->bdev_length; ) { int bytes = MIN(s->bdev_length - offset, QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); + bool ignored; mirror_throttle(s); @@ -873,12 +954,15 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) continue; } - mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO); + mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO, &ignored); offset += bytes; } mirror_wait_for_all_io(s); s->initial_zeroing_ongoing = false; + } else { + /* In FULL mode, and image already reads as zero. */ + bitmap_set(s->zero_bitmap, 0, bitmap_length); } /* First part, loop on the sectors and initialize the dirty bitmap. */ @@ -1020,7 +1104,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) mirror_free_init(s); s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - if (!s->is_none_mode) { + if (s->sync_mode != MIRROR_SYNC_MODE_NONE) { ret = mirror_dirty_init(s); if (ret < 0 || job_is_cancelled(&s->common.job)) { goto immediate_exit; @@ -1163,6 +1247,7 @@ immediate_exit: assert(s->in_flight == 0); qemu_vfree(s->buf); g_free(s->cow_bitmap); + g_free(s->zero_bitmap); g_free(s->in_flight_bitmap); bdrv_dirty_iter_free(s->dbi); @@ -1341,7 +1426,8 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, { int ret; size_t qiov_offset = 0; - int64_t bitmap_offset, bitmap_end; + int64_t dirty_bitmap_offset, dirty_bitmap_end; + int64_t zero_bitmap_offset, zero_bitmap_end; if (!QEMU_IS_ALIGNED(offset, job->granularity) && bdrv_dirty_bitmap_get(job->dirty_bitmap, offset)) @@ -1385,31 +1471,54 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, } /* - * Tails are either clean or shrunk, so for bitmap resetting - * we safely align the range down. + * Tails are either clean or shrunk, so for dirty bitmap resetting + * we safely align the range narrower. But for zero bitmap, round + * range wider for checking or clearing, and narrower for setting. */ - bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); - bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); - if (bitmap_offset < bitmap_end) { - bdrv_reset_dirty_bitmap(job->dirty_bitmap, bitmap_offset, - bitmap_end - bitmap_offset); + dirty_bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); + dirty_bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); + if (dirty_bitmap_offset < dirty_bitmap_end) { + bdrv_reset_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset, + dirty_bitmap_end - dirty_bitmap_offset); } + zero_bitmap_offset = offset / job->granularity; + zero_bitmap_end = DIV_ROUND_UP(offset + bytes, job->granularity); job_progress_increase_remaining(&job->common.job, bytes); job->active_write_bytes_in_flight += bytes; switch (method) { case MIRROR_METHOD_COPY: + if (job->zero_bitmap) { + bitmap_clear(job->zero_bitmap, zero_bitmap_offset, + zero_bitmap_end - zero_bitmap_offset); + } ret = blk_co_pwritev_part(job->target, offset, bytes, qiov, qiov_offset, flags); break; case MIRROR_METHOD_ZERO: + if (job->zero_bitmap) { + if (find_next_zero_bit(job->zero_bitmap, zero_bitmap_end, + zero_bitmap_offset) == zero_bitmap_end) { + ret = 0; + break; + } + } assert(!qiov); ret = blk_co_pwrite_zeroes(job->target, offset, bytes, flags); + if (job->zero_bitmap && ret >= 0) { + bitmap_set(job->zero_bitmap, dirty_bitmap_offset / job->granularity, + (dirty_bitmap_end - dirty_bitmap_offset) / + job->granularity); + } break; case MIRROR_METHOD_DISCARD: + if (job->zero_bitmap) { + bitmap_clear(job->zero_bitmap, zero_bitmap_offset, + zero_bitmap_end - zero_bitmap_offset); + } assert(!qiov); ret = blk_co_pdiscard(job->target, offset, bytes); break; @@ -1430,10 +1539,10 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, * at function start, and they must be still dirty, as we've locked * the region for in-flight op. */ - bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); - bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); - bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset, - bitmap_end - bitmap_offset); + dirty_bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); + dirty_bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); + bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset, + dirty_bitmap_end - dirty_bitmap_offset); qatomic_set(&job->actively_synced, false); action = mirror_error_action(job, false, -ret); @@ -1711,15 +1820,16 @@ static BlockJob *mirror_start_job( int creation_flags, BlockDriverState *target, const char *replaces, int64_t speed, uint32_t granularity, int64_t buf_size, + MirrorSyncMode sync_mode, BlockMirrorBackingMode backing_mode, - bool zero_target, + bool target_is_zero, BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, BlockCompletionFunc *cb, void *opaque, const BlockJobDriver *driver, - bool is_none_mode, BlockDriverState *base, + BlockDriverState *base, bool auto_complete, const char *filter_node_name, bool is_mirror, MirrorCopyMode copy_mode, bool base_ro, @@ -1878,9 +1988,9 @@ static BlockJob *mirror_start_job( s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; s->on_target_error = on_target_error; - s->is_none_mode = is_none_mode; + s->sync_mode = sync_mode; s->backing_mode = backing_mode; - s->zero_target = zero_target; + s->target_is_zero = target_is_zero; qatomic_set(&s->copy_mode, copy_mode); s->base = base; s->base_overlay = bdrv_find_overlay(bs, base); @@ -2009,13 +2119,12 @@ void mirror_start(const char *job_id, BlockDriverState *bs, int creation_flags, int64_t speed, uint32_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, - bool zero_target, + bool target_is_zero, BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, const char *filter_node_name, MirrorCopyMode copy_mode, Error **errp) { - bool is_none_mode; BlockDriverState *base; GLOBAL_STATE_CODE(); @@ -2028,14 +2137,13 @@ void mirror_start(const char *job_id, BlockDriverState *bs, } bdrv_graph_rdlock_main_loop(); - is_none_mode = mode == MIRROR_SYNC_MODE_NONE; base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL; bdrv_graph_rdunlock_main_loop(); mirror_start_job(job_id, bs, creation_flags, target, replaces, - speed, granularity, buf_size, backing_mode, zero_target, - on_source_error, on_target_error, unmap, NULL, NULL, - &mirror_job_driver, is_none_mode, base, false, + speed, granularity, buf_size, mode, backing_mode, + target_is_zero, on_source_error, on_target_error, unmap, + NULL, NULL, &mirror_job_driver, base, false, filter_node_name, true, copy_mode, false, errp); } @@ -2061,9 +2169,9 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, job = mirror_start_job( job_id, bs, creation_flags, base, NULL, speed, 0, 0, - MIRROR_LEAVE_BACKING_CHAIN, false, + MIRROR_SYNC_MODE_TOP, MIRROR_LEAVE_BACKING_CHAIN, false, on_error, on_error, true, cb, opaque, - &commit_active_job_driver, false, base, auto_complete, + &commit_active_job_driver, base, auto_complete, filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, base_read_only, errp); if (!job) { diff --git a/block/nbd.c b/block/nbd.c index 887841bc81..d5a2b21c6d 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -1397,8 +1397,8 @@ nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) } static int coroutine_fn GRAPH_RDLOCK nbd_client_co_block_status( - BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, BlockDriverState **file) + BlockDriverState *bs, unsigned int mode, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { int ret, request_ret; NBDExtent64 extent = { 0 }; diff --git a/block/null.c b/block/null.c index dc0b1fdbd9..4e448d593d 100644 --- a/block/null.c +++ b/block/null.c @@ -227,9 +227,9 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state, } static int coroutine_fn null_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVNullState *s = bs->opaque; diff --git a/block/parallels.c b/block/parallels.c index 347ca127f3..3a375e2a8a 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -416,9 +416,9 @@ parallels_co_flush_to_os(BlockDriverState *bs) } static int coroutine_fn GRAPH_RDLOCK -parallels_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) +parallels_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { BDRVParallelsState *s = bs->opaque; int count; diff --git a/block/qcow.c b/block/qcow.c index da8ad4d243..8a3e7591a9 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -530,7 +530,7 @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate, } static int coroutine_fn GRAPH_RDLOCK -qcow_co_block_status(BlockDriverState *bs, bool want_zero, +qcow_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { diff --git a/block/qcow2.c b/block/qcow2.c index 7774e7f090..66fba89b41 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2141,9 +2141,9 @@ static void qcow2_join_options(QDict *options, QDict *old_options) } static int coroutine_fn GRAPH_RDLOCK -qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t count, int64_t *pnum, int64_t *map, - BlockDriverState **file) +qcow2_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t count, int64_t *pnum, + int64_t *map, BlockDriverState **file) { BDRVQcow2State *s = bs->opaque; uint64_t host_offset; diff --git a/block/qed.c b/block/qed.c index ac24449ffb..4a36fb3929 100644 --- a/block/qed.c +++ b/block/qed.c @@ -833,9 +833,9 @@ fail: } static int coroutine_fn GRAPH_RDLOCK -bdrv_qed_co_block_status(BlockDriverState *bs, bool want_zero, int64_t pos, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) +bdrv_qed_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t pos, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { BDRVQEDState *s = bs->opaque; size_t len = MIN(bytes, SIZE_MAX); diff --git a/block/quorum.c b/block/quorum.c index 30747a6df9..ed8ce801ee 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1226,7 +1226,7 @@ static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c, * region contains zeroes, and BDRV_BLOCK_DATA otherwise. */ static int coroutine_fn GRAPH_RDLOCK -quorum_co_block_status(BlockDriverState *bs, bool want_zero, +quorum_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t count, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -1238,7 +1238,7 @@ quorum_co_block_status(BlockDriverState *bs, bool want_zero, for (i = 0; i < s->num_children; i++) { int64_t bytes; ret = bdrv_co_common_block_status_above(s->children[i]->bs, NULL, false, - want_zero, offset, count, + mode, offset, count, &bytes, NULL, NULL, NULL); if (ret < 0) { quorum_report_bad(QUORUM_OP_TYPE_READ, offset, count, diff --git a/block/raw-format.c b/block/raw-format.c index e08526e2ec..df16ac1ea2 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -283,8 +283,8 @@ fail: } static int coroutine_fn GRAPH_RDLOCK -raw_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, +raw_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVRawState *s = bs->opaque; diff --git a/block/rbd.c b/block/rbd.c index 7446e66659..951cd63f9a 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1503,9 +1503,9 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, } static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVRBDState *s = bs->opaque; diff --git a/block/snapshot-access.c b/block/snapshot-access.c index 71ac83c01f..17ed2402db 100644 --- a/block/snapshot-access.c +++ b/block/snapshot-access.c @@ -41,11 +41,11 @@ snapshot_access_co_preadv_part(BlockDriverState *bs, static int coroutine_fn GRAPH_RDLOCK snapshot_access_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { - return bdrv_co_snapshot_block_status(bs->file->bs, want_zero, offset, + return bdrv_co_snapshot_block_status(bs->file->bs, mode, offset, bytes, pnum, map, file); } diff --git a/block/vdi.c b/block/vdi.c index a2da6ecab0..3ddc62a569 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -523,8 +523,8 @@ static int vdi_reopen_prepare(BDRVReopenState *state, } static int coroutine_fn GRAPH_RDLOCK -vdi_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, +vdi_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVVdiState *s = (BDRVVdiState *)bs->opaque; diff --git a/block/vmdk.c b/block/vmdk.c index 2adec49912..9c7ab037e1 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1777,7 +1777,7 @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent, } static int coroutine_fn GRAPH_RDLOCK -vmdk_co_block_status(BlockDriverState *bs, bool want_zero, +vmdk_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { diff --git a/block/vpc.c b/block/vpc.c index 0309e319f6..801ff5793f 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -726,7 +726,7 @@ fail: } static int coroutine_fn GRAPH_RDLOCK -vpc_co_block_status(BlockDriverState *bs, bool want_zero, +vpc_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) diff --git a/block/vvfat.c b/block/vvfat.c index 91d69b3cc8..814796d918 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3134,9 +3134,9 @@ vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, } static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *n, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *n, int64_t *map, BlockDriverState **file) { *n = bytes; diff --git a/blockdev.c b/blockdev.c index 818ec42511..21443b4514 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2804,7 +2804,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, const char *replaces, enum MirrorSyncMode sync, BlockMirrorBackingMode backing_mode, - bool zero_target, + bool target_is_zero, bool has_speed, int64_t speed, bool has_granularity, uint32_t granularity, bool has_buf_size, int64_t buf_size, @@ -2871,10 +2871,6 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, return; } - if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) { - sync = MIRROR_SYNC_MODE_FULL; - } - if (!replaces) { /* We want to mirror from @bs, but keep implicit filters on top */ unfiltered_bs = bdrv_skip_implicit_filters(bs); @@ -2915,11 +2911,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, /* pass the node name to replace to mirror start since it's loose coupling * and will allow to check whether the node still exist at mirror completion */ - mirror_start(job_id, bs, target, - replaces, job_flags, - speed, granularity, buf_size, sync, backing_mode, zero_target, - on_source_error, on_target_error, unmap, filter_node_name, - copy_mode, errp); + mirror_start(job_id, bs, target, replaces, job_flags, + speed, granularity, buf_size, sync, backing_mode, + target_is_zero, on_source_error, on_target_error, unmap, + filter_node_name, copy_mode, errp); } void qmp_drive_mirror(DriveMirror *arg, Error **errp) @@ -2933,7 +2928,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) int flags; int64_t size; const char *format = arg->format; - bool zero_target; + bool target_is_zero; int ret; bs = qmp_get_root_bs(arg->device, errp); @@ -3047,9 +3042,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) } bdrv_graph_rdlock_main_loop(); - zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL && - (arg->mode == NEW_IMAGE_MODE_EXISTING || - !bdrv_has_zero_init(target_bs))); + target_is_zero = (arg->mode != NEW_IMAGE_MODE_EXISTING && + bdrv_has_zero_init(target_bs)); bdrv_graph_rdunlock_main_loop(); @@ -3061,7 +3055,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) blockdev_mirror_common(arg->job_id, bs, target_bs, arg->replaces, arg->sync, - backing_mode, zero_target, + backing_mode, target_is_zero, arg->has_speed, arg->speed, arg->has_granularity, arg->granularity, arg->has_buf_size, arg->buf_size, @@ -3091,13 +3085,13 @@ void qmp_blockdev_mirror(const char *job_id, bool has_copy_mode, MirrorCopyMode copy_mode, bool has_auto_finalize, bool auto_finalize, bool has_auto_dismiss, bool auto_dismiss, + bool has_target_is_zero, bool target_is_zero, Error **errp) { BlockDriverState *bs; BlockDriverState *target_bs; AioContext *aio_context; BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; - bool zero_target; int ret; bs = qmp_get_root_bs(device, errp); @@ -3110,8 +3104,6 @@ void qmp_blockdev_mirror(const char *job_id, return; } - zero_target = (sync == MIRROR_SYNC_MODE_FULL); - aio_context = bdrv_get_aio_context(bs); ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); @@ -3121,7 +3113,8 @@ void qmp_blockdev_mirror(const char *job_id, blockdev_mirror_common(job_id, bs, target_bs, replaces, sync, backing_mode, - zero_target, has_speed, speed, + has_target_is_zero && target_is_zero, + has_speed, speed, has_granularity, granularity, has_buf_size, buf_size, has_on_source_error, on_source_error, diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst index 882b036f5e..e307caf3f8 100644 --- a/docs/system/devices/cxl.rst +++ b/docs/system/devices/cxl.rst @@ -308,7 +308,7 @@ A very simple setup with just one directly attached CXL Type 3 Persistent Memory -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa.raw,size=256M \ -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ - -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \ + -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0,sn=0x1 \ -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G A very simple setup with just one directly attached CXL Type 3 Volatile Memory device:: @@ -349,13 +349,13 @@ the CXL Type3 device directly attached (no switches).:: -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ -device pxb-cxl,bus_nr=222,bus=pcie.0,id=cxl.2 \ -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ - -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \ + -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0,sn=0x1 \ -device cxl-rp,port=1,bus=cxl.1,id=root_port14,chassis=0,slot=3 \ - -device cxl-type3,bus=root_port14,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \ + -device cxl-type3,bus=root_port14,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1,sn=0x2 \ -device cxl-rp,port=0,bus=cxl.2,id=root_port15,chassis=0,slot=5 \ - -device cxl-type3,bus=root_port15,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \ + -device cxl-type3,bus=root_port15,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2,sn=0x3 \ -device cxl-rp,port=1,bus=cxl.2,id=root_port16,chassis=0,slot=6 \ - -device cxl-type3,bus=root_port16,persistent-memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \ + -device cxl-type3,bus=root_port16,persistent-memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3,sn=0x4 \ -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.targets.1=cxl.2,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=8k An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave:: @@ -375,13 +375,13 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave:: -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \ -device cxl-upstream,bus=root_port0,id=us0 \ -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ - -device cxl-type3,bus=swport0,persistent-memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ + -device cxl-type3,bus=swport0,persistent-memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0,sn=0x1 \ -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ - -device cxl-type3,bus=swport1,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ + -device cxl-type3,bus=swport1,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1,sn=0x2 \ -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ - -device cxl-type3,bus=swport2,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ + -device cxl-type3,bus=swport2,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2,sn=0x3 \ -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ - -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ + -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,sn=0x4 \ -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k Deprecations diff --git a/docs/system/index.rst b/docs/system/index.rst index c21065e519..718e9d3c56 100644 --- a/docs/system/index.rst +++ b/docs/system/index.rst @@ -39,3 +39,4 @@ or Hypervisor.Framework. multi-process confidential-guest-support vm-templating + sriov diff --git a/docs/system/sriov.rst b/docs/system/sriov.rst new file mode 100644 index 0000000000..d12178f3c3 --- /dev/null +++ b/docs/system/sriov.rst @@ -0,0 +1,37 @@ +.. SPDX-License-Identifier: GPL-2.0-or-later + +Compsable SR-IOV device +======================= + +SR-IOV (Single Root I/O Virtualization) is an optional extended capability of a +PCI Express device. It allows a single physical function (PF) to appear as +multiple virtual functions (VFs) for the main purpose of eliminating software +overhead in I/O from virtual machines. + +There are devices with predefined SR-IOV configurations, but it is also possible +to compose an SR-IOV device yourself. Composing an SR-IOV device is currently +only supported by virtio-net-pci. + +Users can configure an SR-IOV-capable virtio-net device by adding +virtio-net-pci functions to a bus. Below is a command line example: + +.. code-block:: shell + + -netdev user,id=n -netdev user,id=o + -netdev user,id=p -netdev user,id=q + -device pcie-root-port,id=b + -device virtio-net-pci,bus=b,addr=0x0.0x3,netdev=q,sriov-pf=f + -device virtio-net-pci,bus=b,addr=0x0.0x2,netdev=p,sriov-pf=f + -device virtio-net-pci,bus=b,addr=0x0.0x1,netdev=o,sriov-pf=f + -device virtio-net-pci,bus=b,addr=0x0.0x0,netdev=n,id=f + +The VFs specify the paired PF with ``sriov-pf`` property. The PF must be +added after all VFs. It is the user's responsibility to ensure that VFs have +function numbers larger than one of the PF, and that the function numbers +have a consistent stride. Both the PF and VFs are ARI-capable so you can have +255 VFs at maximum. + +You may also need to perform additional steps to activate the SR-IOV feature on +your guest. For Linux, refer to [1]_. + +.. [1] https://docs.kernel.org/PCI/pci-iov-howto.html diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 4bb5ed299e..0eebbcd80d 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -204,7 +204,7 @@ err_host_notifiers: return ret; } -static void vhost_user_blk_stop(VirtIODevice *vdev) +static int vhost_user_blk_stop(VirtIODevice *vdev) { VHostUserBlk *s = VHOST_USER_BLK(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -212,26 +212,26 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) int ret; if (!s->started_vu) { - return; + return 0; } s->started_vu = false; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&s->dev, vdev, true); + ret = vhost_dev_stop(&s->dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&s->dev, vdev); + return ret; } -static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) +static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserBlk *s = VHOST_USER_BLK(vdev); bool should_start = virtio_device_should_start(vdev, status); @@ -239,11 +239,11 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) int ret; if (!s->connected) { - return; + return -1; } if (vhost_dev_is_started(&s->dev) == should_start) { - return; + return 0; } if (should_start) { @@ -253,9 +253,12 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) qemu_chr_fe_disconnect(&s->chardev); } } else { - vhost_user_blk_stop(vdev); + ret = vhost_user_blk_stop(vdev); + if (ret < 0) { + return ret; + } } - + return 0; } static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index b54d01d3a2..9bab2716c1 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1270,7 +1270,7 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, return features; } -static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOBlock *s = VIRTIO_BLK(vdev); @@ -1279,7 +1279,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) } if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { - return; + return 0; } /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send @@ -1302,6 +1302,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_WCE)); } + return 0; } static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) @@ -1802,7 +1803,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) * called after ->start_ioeventfd() has already set blk's AioContext. */ s->change = - qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s); + qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, NULL, s); blk_ram_registrar_init(&s->blk_ram_registrar, s->blk); blk_set_dev_ops(s->blk, &virtio_block_ops, s); diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index eb79f5258b..673c50f0be 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -622,7 +622,7 @@ static void guest_reset(VirtIOSerial *vser) } } -static void set_status(VirtIODevice *vdev, uint8_t status) +static int set_status(VirtIODevice *vdev, uint8_t status) { VirtIOSerial *vser; VirtIOSerialPort *port; @@ -650,6 +650,7 @@ static void set_status(VirtIODevice *vdev, uint8_t status) vsc->enable_backend(port, vdev->vm_running); } } + return 0; } static void vser_reset(VirtIODevice *vdev) diff --git a/hw/core/vm-change-state-handler.c b/hw/core/vm-change-state-handler.c index 7064995578..99c642b558 100644 --- a/hw/core/vm-change-state-handler.c +++ b/hw/core/vm-change-state-handler.c @@ -40,6 +40,7 @@ static int qdev_get_dev_tree_depth(DeviceState *dev) * qdev_add_vm_change_state_handler: * @dev: the device that owns this handler * @cb: the callback function to be invoked + * @cb_ret: the callback function with return value to be invoked * @opaque: user data passed to the callback function * * This function works like qemu_add_vm_change_state_handler() except callbacks @@ -50,25 +51,30 @@ static int qdev_get_dev_tree_depth(DeviceState *dev) * controller's callback is invoked before the children on its bus when the VM * starts running. The order is reversed when the VM stops running. * + * Note that the parameter `cb` and `cb_ret` are mutually exclusive. + * * Returns: an entry to be freed with qemu_del_vm_change_state_handler() */ VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, VMChangeStateHandler *cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque) { - return qdev_add_vm_change_state_handler_full(dev, cb, NULL, opaque); + assert(!cb || !cb_ret); + return qdev_add_vm_change_state_handler_full(dev, cb, NULL, cb_ret, opaque); } /* * Exactly like qdev_add_vm_change_state_handler() but passes a prepare_cb - * argument too. + * and the cb_ret arguments too. */ VMChangeStateEntry *qdev_add_vm_change_state_handler_full( - DeviceState *dev, VMChangeStateHandler *cb, - VMChangeStateHandler *prepare_cb, void *opaque) + DeviceState *dev, VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque) { int depth = qdev_get_dev_tree_depth(dev); - return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, opaque, - depth); + assert(!cb || !cb_ret); + return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, cb_ret, + opaque, depth); } diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 52ad1e4c3f..e150d74457 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -95,11 +95,15 @@ static uint64_t mailbox_reg_read(void *opaque, hwaddr offset, unsigned size) } if (offset == A_CXL_DEV_MAILBOX_STS) { uint64_t status_reg = cxl_dstate->mbox_reg_state64[offset / size]; - if (cci->bg.complete_pct) { - status_reg = FIELD_DP64(status_reg, CXL_DEV_MAILBOX_STS, BG_OP, - 0); - cxl_dstate->mbox_reg_state64[offset / size] = status_reg; - } + int bgop; + + qemu_mutex_lock(&cci->bg.lock); + bgop = !(cci->bg.complete_pct == 100 || cci->bg.aborted); + + status_reg = FIELD_DP64(status_reg, CXL_DEV_MAILBOX_STS, BG_OP, + bgop); + cxl_dstate->mbox_reg_state64[offset / size] = status_reg; + qemu_mutex_unlock(&cci->bg.lock); } return cxl_dstate->mbox_reg_state64[offset / size]; default: diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 516c01d840..299f232f26 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -7,6 +7,8 @@ * COPYING file in the top-level directory. */ +#include <math.h> + #include "qemu/osdep.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" @@ -26,6 +28,11 @@ #define CXL_DC_EVENT_LOG_SIZE 8 #define CXL_NUM_EXTENTS_SUPPORTED 512 #define CXL_NUM_TAGS_SUPPORTED 0 +#define CXL_ALERTS_LIFE_USED_WARN_THRESH (1 << 0) +#define CXL_ALERTS_OVER_TEMP_WARN_THRESH (1 << 1) +#define CXL_ALERTS_UNDER_TEMP_WARN_THRESH (1 << 2) +#define CXL_ALERTS_COR_VMEM_ERR_WARN_THRESH (1 << 3) +#define CXL_ALERTS_COR_PMEM_ERR_WARN_THRESH (1 << 4) /* * How to add a new command, example. The command set FOO, with cmd BAR. @@ -56,6 +63,9 @@ enum { INFOSTAT = 0x00, #define IS_IDENTIFY 0x1 #define BACKGROUND_OPERATION_STATUS 0x2 + #define GET_RESPONSE_MSG_LIMIT 0x3 + #define SET_RESPONSE_MSG_LIMIT 0x4 + #define BACKGROUND_OPERATION_ABORT 0x5 EVENTS = 0x01, #define GET_RECORDS 0x0 #define CLEAR_RECORDS 0x1 @@ -81,9 +91,13 @@ enum { #define GET_PARTITION_INFO 0x0 #define GET_LSA 0x2 #define SET_LSA 0x3 + HEALTH_INFO_ALERTS = 0x42, + #define GET_ALERT_CONFIG 0x1 + #define SET_ALERT_CONFIG 0x2 SANITIZE = 0x44, #define OVERWRITE 0x0 #define SECURE_ERASE 0x1 + #define MEDIA_OPERATIONS 0x2 PERSISTENT_MEM = 0x45, #define GET_SECURITY_STATE 0x0 MEDIA_AND_POISON = 0x43, @@ -412,12 +426,58 @@ static CXLRetCode cmd_infostat_identify(const struct cxl_cmd *cmd, is_identify->component_type = 0x3; /* Type 3 */ } - /* TODO: Allow this to vary across different CCIs */ - is_identify->max_message_size = 9; /* 512 bytes - MCTP_CXL_MAILBOX_BYTES */ + is_identify->max_message_size = (uint8_t)log2(cci->payload_max); *len_out = sizeof(*is_identify); return CXL_MBOX_SUCCESS; } +/* CXL r3.1 section 8.2.9.1.3: Get Response Message Limit (Opcode 0003h) */ +static CXLRetCode cmd_get_response_msg_limit(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t rsp_limit; + } QEMU_PACKED *get_rsp_msg_limit = (void *)payload_out; + QEMU_BUILD_BUG_ON(sizeof(*get_rsp_msg_limit) != 1); + + get_rsp_msg_limit->rsp_limit = (uint8_t)log2(cci->payload_max); + + *len_out = sizeof(*get_rsp_msg_limit); + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.1 section 8.2.9.1.4: Set Response Message Limit (Opcode 0004h) */ +static CXLRetCode cmd_set_response_msg_limit(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t rsp_limit; + } QEMU_PACKED *in = (void *)payload_in; + QEMU_BUILD_BUG_ON(sizeof(*in) != 1); + struct { + uint8_t rsp_limit; + } QEMU_PACKED *out = (void *)payload_out; + QEMU_BUILD_BUG_ON(sizeof(*out) != 1); + + if (in->rsp_limit < 8 || in->rsp_limit > 10) { + return CXL_MBOX_INVALID_INPUT; + } + + cci->payload_max = 1 << in->rsp_limit; + out->rsp_limit = in->rsp_limit; + + *len_out = sizeof(*out); + return CXL_MBOX_SUCCESS; +} + static void cxl_set_dsp_active_bm(PCIBus *b, PCIDevice *d, void *private) { @@ -636,6 +696,41 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* + * CXL r3.1 Section 8.2.9.1.5: + * Request Abort Background Operation (Opcode 0005h) + */ +static CXLRetCode cmd_infostat_bg_op_abort(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + int bg_set = cci->bg.opcode >> 8; + int bg_cmd = cci->bg.opcode & 0xff; + const struct cxl_cmd *bg_c = &cci->cxl_cmd_set[bg_set][bg_cmd]; + + if (!(bg_c->effect & CXL_MBOX_BACKGROUND_OPERATION_ABORT)) { + return CXL_MBOX_REQUEST_ABORT_NOTSUP; + } + + qemu_mutex_lock(&cci->bg.lock); + if (cci->bg.runtime) { + /* operation is near complete, let it finish */ + if (cci->bg.complete_pct < 85) { + timer_del(cci->bg.timer); + cci->bg.ret_code = CXL_MBOX_ABORTED; + cci->bg.starttime = 0; + cci->bg.runtime = 0; + cci->bg.aborted = true; + } + } + qemu_mutex_unlock(&cci->bg.lock); + + return CXL_MBOX_SUCCESS; +} + #define CXL_FW_SLOTS 2 #define CXL_FW_SIZE 0x02000000 /* 32 mb */ @@ -1523,6 +1618,97 @@ static CXLRetCode cmd_ccls_set_lsa(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* CXL r3.2 Section 8.2.10.9.3.2 Get Alert Configuration (Opcode 4201h) */ +static CXLRetCode cmd_get_alert_config(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLAlertConfig *out = (CXLAlertConfig *)payload_out; + + memcpy(out, &ct3d->alert_config, sizeof(ct3d->alert_config)); + *len_out = sizeof(ct3d->alert_config); + + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.2 Section 8.2.10.9.3.3 Set Alert Configuration (Opcode 4202h) */ +static CXLRetCode cmd_set_alert_config(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLAlertConfig *alert_config = &ct3d->alert_config; + struct { + uint8_t valid_alert_actions; + uint8_t enable_alert_actions; + uint8_t life_used_warn_thresh; + uint8_t rsvd; + uint16_t over_temp_warn_thresh; + uint16_t under_temp_warn_thresh; + uint16_t cor_vmem_err_warn_thresh; + uint16_t cor_pmem_err_warn_thresh; + } QEMU_PACKED *in = (void *)payload_in; + + if (in->valid_alert_actions & CXL_ALERTS_LIFE_USED_WARN_THRESH) { + /* + * CXL r3.2 Table 8-149 The life used warning threshold shall be + * less than the life used critical alert value. + */ + if (in->life_used_warn_thresh >= + alert_config->life_used_crit_alert_thresh) { + return CXL_MBOX_INVALID_INPUT; + } + alert_config->life_used_warn_thresh = in->life_used_warn_thresh; + alert_config->enable_alerts |= CXL_ALERTS_LIFE_USED_WARN_THRESH; + } + + if (in->valid_alert_actions & CXL_ALERTS_OVER_TEMP_WARN_THRESH) { + /* + * CXL r3.2 Table 8-149 The Device Over-Temperature Warning Threshold + * shall be less than the the Device Over-Temperature Critical + * Alert Threshold. + */ + if (in->over_temp_warn_thresh >= + alert_config->over_temp_crit_alert_thresh) { + return CXL_MBOX_INVALID_INPUT; + } + alert_config->over_temp_warn_thresh = in->over_temp_warn_thresh; + alert_config->enable_alerts |= CXL_ALERTS_OVER_TEMP_WARN_THRESH; + } + + if (in->valid_alert_actions & CXL_ALERTS_UNDER_TEMP_WARN_THRESH) { + /* + * CXL r3.2 Table 8-149 The Device Under-Temperature Warning Threshold + * shall be higher than the the Device Under-Temperature Critical + * Alert Threshold. + */ + if (in->under_temp_warn_thresh <= + alert_config->under_temp_crit_alert_thresh) { + return CXL_MBOX_INVALID_INPUT; + } + alert_config->under_temp_warn_thresh = in->under_temp_warn_thresh; + alert_config->enable_alerts |= CXL_ALERTS_UNDER_TEMP_WARN_THRESH; + } + + if (in->valid_alert_actions & CXL_ALERTS_COR_VMEM_ERR_WARN_THRESH) { + alert_config->cor_vmem_err_warn_thresh = in->cor_vmem_err_warn_thresh; + alert_config->enable_alerts |= CXL_ALERTS_COR_VMEM_ERR_WARN_THRESH; + } + + if (in->valid_alert_actions & CXL_ALERTS_COR_PMEM_ERR_WARN_THRESH) { + alert_config->cor_pmem_err_warn_thresh = in->cor_pmem_err_warn_thresh; + alert_config->enable_alerts |= CXL_ALERTS_COR_PMEM_ERR_WARN_THRESH; + } + return CXL_MBOX_SUCCESS; +} + /* Perform the actual device zeroing */ static void __do_sanitization(CXLType3Dev *ct3d) { @@ -1553,34 +1739,10 @@ static void __do_sanitization(CXLType3Dev *ct3d) cxl_discard_all_event_records(&ct3d->cxl_dstate); } -/* - * CXL r3.1 Section 8.2.9.9.5.1: Sanitize (Opcode 4400h) - * - * Once the Sanitize command has started successfully, the device shall be - * placed in the media disabled state. If the command fails or is interrupted - * by a reset or power failure, it shall remain in the media disabled state - * until a successful Sanitize command has been completed. During this state: - * - * 1. Memory writes to the device will have no effect, and all memory reads - * will return random values (no user data returned, even for locations that - * the failed Sanitize operation didn’t sanitize yet). - * - * 2. Mailbox commands shall still be processed in the disabled state, except - * that commands that access Sanitized areas shall fail with the Media Disabled - * error code. - */ -static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd, - uint8_t *payload_in, - size_t len_in, - uint8_t *payload_out, - size_t *len_out, - CXLCCI *cci) +static int get_sanitize_duration(uint64_t total_mem) { - CXLType3Dev *ct3d = CXL_TYPE3(cci->d); - uint64_t total_mem; /* in Mb */ - int secs; + int secs = 0; - total_mem = (ct3d->cxl_dstate.vmem_size + ct3d->cxl_dstate.pmem_size) >> 20; if (total_mem <= 512) { secs = 4; } else if (total_mem <= 1024) { @@ -1609,6 +1771,39 @@ static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd, secs = 240 * 60; /* max 4 hrs */ } + return secs; +} + +/* + * CXL r3.1 Section 8.2.9.9.5.1: Sanitize (Opcode 4400h) + * + * Once the Sanitize command has started successfully, the device shall be + * placed in the media disabled state. If the command fails or is interrupted + * by a reset or power failure, it shall remain in the media disabled state + * until a successful Sanitize command has been completed. During this state: + * + * 1. Memory writes to the device will have no effect, and all memory reads + * will return random values (no user data returned, even for locations that + * the failed Sanitize operation didn’t sanitize yet). + * + * 2. Mailbox commands shall still be processed in the disabled state, except + * that commands that access Sanitized areas shall fail with the Media Disabled + * error code. + */ +static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + uint64_t total_mem; /* in Mb */ + int secs; + + total_mem = (ct3d->cxl_dstate.vmem_size + ct3d->cxl_dstate.pmem_size) >> 20; + secs = get_sanitize_duration(total_mem); + /* EBUSY other bg cmds as of now */ cci->bg.runtime = secs * 1000UL; *len_out = 0; @@ -1619,6 +1814,324 @@ static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd, return CXL_MBOX_BG_STARTED; } +struct dpa_range_list_entry { + uint64_t starting_dpa; + uint64_t length; +} QEMU_PACKED; + +struct CXLSanitizeInfo { + uint32_t dpa_range_count; + uint8_t fill_value; + struct dpa_range_list_entry dpa_range_list[]; +} QEMU_PACKED; + +static uint64_t get_vmr_size(CXLType3Dev *ct3d, MemoryRegion **vmr) +{ + MemoryRegion *mr; + if (ct3d->hostvmem) { + mr = host_memory_backend_get_memory(ct3d->hostvmem); + if (vmr) { + *vmr = mr; + } + return memory_region_size(mr); + } + return 0; +} + +static uint64_t get_pmr_size(CXLType3Dev *ct3d, MemoryRegion **pmr) +{ + MemoryRegion *mr; + if (ct3d->hostpmem) { + mr = host_memory_backend_get_memory(ct3d->hostpmem); + if (pmr) { + *pmr = mr; + } + return memory_region_size(mr); + } + return 0; +} + +static uint64_t get_dc_size(CXLType3Dev *ct3d, MemoryRegion **dc_mr) +{ + MemoryRegion *mr; + if (ct3d->dc.host_dc) { + mr = host_memory_backend_get_memory(ct3d->dc.host_dc); + if (dc_mr) { + *dc_mr = mr; + } + return memory_region_size(mr); + } + return 0; +} + +static int validate_dpa_addr(CXLType3Dev *ct3d, uint64_t dpa_addr, + size_t length) +{ + uint64_t vmr_size, pmr_size, dc_size; + + if ((dpa_addr % CXL_CACHE_LINE_SIZE) || + (length % CXL_CACHE_LINE_SIZE) || + (length <= 0)) { + return -EINVAL; + } + + vmr_size = get_vmr_size(ct3d, NULL); + pmr_size = get_pmr_size(ct3d, NULL); + dc_size = get_dc_size(ct3d, NULL); + + if (dpa_addr + length > vmr_size + pmr_size + dc_size) { + return -EINVAL; + } + + if (dpa_addr > vmr_size + pmr_size) { + if (!ct3_test_region_block_backed(ct3d, dpa_addr, length)) { + return -ENODEV; + } + } + + return 0; +} + +static int sanitize_range(CXLType3Dev *ct3d, uint64_t dpa_addr, size_t length, + uint8_t fill_value) +{ + + uint64_t vmr_size, pmr_size; + AddressSpace *as = NULL; + MemTxAttrs mem_attrs = {}; + + vmr_size = get_vmr_size(ct3d, NULL); + pmr_size = get_pmr_size(ct3d, NULL); + + if (dpa_addr < vmr_size) { + as = &ct3d->hostvmem_as; + } else if (dpa_addr < vmr_size + pmr_size) { + as = &ct3d->hostpmem_as; + } else { + if (!ct3_test_region_block_backed(ct3d, dpa_addr, length)) { + return -ENODEV; + } + as = &ct3d->dc.host_dc_as; + } + + return address_space_set(as, dpa_addr, fill_value, length, mem_attrs); +} + +/* Perform the actual device zeroing */ +static void __do_sanitize(CXLType3Dev *ct3d) +{ + struct CXLSanitizeInfo *san_info = ct3d->media_op_sanitize; + int dpa_range_count = san_info->dpa_range_count; + int rc = 0; + int i; + + for (i = 0; i < dpa_range_count; i++) { + rc = sanitize_range(ct3d, san_info->dpa_range_list[i].starting_dpa, + san_info->dpa_range_list[i].length, + san_info->fill_value); + if (rc) { + goto exit; + } + } +exit: + g_free(ct3d->media_op_sanitize); + ct3d->media_op_sanitize = NULL; + return; +} + +enum { + MEDIA_OP_CLASS_GENERAL = 0x0, + #define MEDIA_OP_GEN_SUBC_DISCOVERY 0x0 + MEDIA_OP_CLASS_SANITIZE = 0x1, + #define MEDIA_OP_SAN_SUBC_SANITIZE 0x0 + #define MEDIA_OP_SAN_SUBC_ZERO 0x1 +}; + +struct media_op_supported_list_entry { + uint8_t media_op_class; + uint8_t media_op_subclass; +}; + +struct media_op_discovery_out_pl { + uint64_t dpa_range_granularity; + uint16_t total_supported_operations; + uint16_t num_of_supported_operations; + struct media_op_supported_list_entry entry[]; +} QEMU_PACKED; + +static const struct media_op_supported_list_entry media_op_matrix[] = { + { MEDIA_OP_CLASS_GENERAL, MEDIA_OP_GEN_SUBC_DISCOVERY }, + { MEDIA_OP_CLASS_SANITIZE, MEDIA_OP_SAN_SUBC_SANITIZE }, + { MEDIA_OP_CLASS_SANITIZE, MEDIA_OP_SAN_SUBC_ZERO }, +}; + +static CXLRetCode media_operations_discovery(uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out) +{ + struct { + uint8_t media_operation_class; + uint8_t media_operation_subclass; + uint8_t rsvd[2]; + uint32_t dpa_range_count; + struct { + uint16_t start_index; + uint16_t num_ops; + } discovery_osa; + } QEMU_PACKED *media_op_in_disc_pl = (void *)payload_in; + struct media_op_discovery_out_pl *media_out_pl = + (struct media_op_discovery_out_pl *)payload_out; + int num_ops, start_index, i; + int count = 0; + + if (len_in < sizeof(*media_op_in_disc_pl)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + + num_ops = media_op_in_disc_pl->discovery_osa.num_ops; + start_index = media_op_in_disc_pl->discovery_osa.start_index; + + /* + * As per spec CXL r3.2 8.2.10.9.5.3 dpa_range_count should be zero and + * start index should not exceed the total number of entries for discovery + * sub class command. + */ + if (media_op_in_disc_pl->dpa_range_count || + start_index > ARRAY_SIZE(media_op_matrix)) { + return CXL_MBOX_INVALID_INPUT; + } + + media_out_pl->dpa_range_granularity = CXL_CACHE_LINE_SIZE; + media_out_pl->total_supported_operations = + ARRAY_SIZE(media_op_matrix); + if (num_ops > 0) { + for (i = start_index; i < start_index + num_ops; i++) { + media_out_pl->entry[count].media_op_class = + media_op_matrix[i].media_op_class; + media_out_pl->entry[count].media_op_subclass = + media_op_matrix[i].media_op_subclass; + count++; + if (count == num_ops) { + break; + } + } + } + + media_out_pl->num_of_supported_operations = count; + *len_out = sizeof(*media_out_pl) + count * sizeof(*media_out_pl->entry); + return CXL_MBOX_SUCCESS; +} + +static CXLRetCode media_operations_sanitize(CXLType3Dev *ct3d, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + uint8_t fill_value, + CXLCCI *cci) +{ + struct media_operations_sanitize { + uint8_t media_operation_class; + uint8_t media_operation_subclass; + uint8_t rsvd[2]; + uint32_t dpa_range_count; + struct dpa_range_list_entry dpa_range_list[]; + } QEMU_PACKED *media_op_in_sanitize_pl = (void *)payload_in; + uint32_t dpa_range_count = media_op_in_sanitize_pl->dpa_range_count; + uint64_t total_mem = 0; + size_t dpa_range_list_size; + int secs = 0, i; + + if (dpa_range_count == 0) { + return CXL_MBOX_SUCCESS; + } + + dpa_range_list_size = dpa_range_count * sizeof(struct dpa_range_list_entry); + if (len_in < (sizeof(*media_op_in_sanitize_pl) + dpa_range_list_size)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + + for (i = 0; i < dpa_range_count; i++) { + uint64_t start_dpa = + media_op_in_sanitize_pl->dpa_range_list[i].starting_dpa; + uint64_t length = media_op_in_sanitize_pl->dpa_range_list[i].length; + + if (validate_dpa_addr(ct3d, start_dpa, length)) { + return CXL_MBOX_INVALID_INPUT; + } + total_mem += length; + } + ct3d->media_op_sanitize = g_malloc0(sizeof(struct CXLSanitizeInfo) + + dpa_range_list_size); + + ct3d->media_op_sanitize->dpa_range_count = dpa_range_count; + ct3d->media_op_sanitize->fill_value = fill_value; + memcpy(ct3d->media_op_sanitize->dpa_range_list, + media_op_in_sanitize_pl->dpa_range_list, + dpa_range_list_size); + secs = get_sanitize_duration(total_mem >> 20); + + /* EBUSY other bg cmds as of now */ + cci->bg.runtime = secs * 1000UL; + *len_out = 0; + /* + * media op sanitize is targeted so no need to disable media or + * clear event logs + */ + return CXL_MBOX_BG_STARTED; +} + +static CXLRetCode cmd_media_operations(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t media_operation_class; + uint8_t media_operation_subclass; + uint8_t rsvd[2]; + uint32_t dpa_range_count; + } QEMU_PACKED *media_op_in_common_pl = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + uint8_t media_op_cl = 0; + uint8_t media_op_subclass = 0; + + if (len_in < sizeof(*media_op_in_common_pl)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + + media_op_cl = media_op_in_common_pl->media_operation_class; + media_op_subclass = media_op_in_common_pl->media_operation_subclass; + + switch (media_op_cl) { + case MEDIA_OP_CLASS_GENERAL: + if (media_op_subclass != MEDIA_OP_GEN_SUBC_DISCOVERY) { + return CXL_MBOX_UNSUPPORTED; + } + + return media_operations_discovery(payload_in, len_in, payload_out, + len_out); + case MEDIA_OP_CLASS_SANITIZE: + switch (media_op_subclass) { + case MEDIA_OP_SAN_SUBC_SANITIZE: + return media_operations_sanitize(ct3d, payload_in, len_in, + payload_out, len_out, 0xF, + cci); + case MEDIA_OP_SAN_SUBC_ZERO: + return media_operations_sanitize(ct3d, payload_in, len_in, + payload_out, len_out, 0, + cci); + default: + return CXL_MBOX_UNSUPPORTED; + } + default: + return CXL_MBOX_UNSUPPORTED; + } +} + static CXLRetCode cmd_get_security_state(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -2715,6 +3228,8 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd, } static const struct cxl_cmd cxl_cmd_set[256][256] = { + [INFOSTAT][BACKGROUND_OPERATION_ABORT] = { "BACKGROUND_OPERATION_ABORT", + cmd_infostat_bg_op_abort, 0, 0 }, [EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS", cmd_events_get_records, 1, 0 }, [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS", @@ -2727,9 +3242,11 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = { [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO", cmd_firmware_update_get_info, 0, 0 }, [FIRMWARE_UPDATE][TRANSFER] = { "FIRMWARE_UPDATE_TRANSFER", - cmd_firmware_update_transfer, ~0, CXL_MBOX_BACKGROUND_OPERATION }, + cmd_firmware_update_transfer, ~0, + CXL_MBOX_BACKGROUND_OPERATION | CXL_MBOX_BACKGROUND_OPERATION_ABORT }, [FIRMWARE_UPDATE][ACTIVATE] = { "FIRMWARE_UPDATE_ACTIVATE", - cmd_firmware_update_activate, 2, CXL_MBOX_BACKGROUND_OPERATION }, + cmd_firmware_update_activate, 2, + CXL_MBOX_BACKGROUND_OPERATION | CXL_MBOX_BACKGROUND_OPERATION_ABORT }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8, CXL_MBOX_IMMEDIATE_POLICY_CHANGE }, @@ -2755,9 +3272,20 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = { [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | CXL_MBOX_IMMEDIATE_DATA_CHANGE }, + [HEALTH_INFO_ALERTS][GET_ALERT_CONFIG] = { + "HEALTH_INFO_ALERTS_GET_ALERT_CONFIG", + cmd_get_alert_config, 0, 0 }, + [HEALTH_INFO_ALERTS][SET_ALERT_CONFIG] = { + "HEALTH_INFO_ALERTS_SET_ALERT_CONFIG", + cmd_set_alert_config, 12, CXL_MBOX_IMMEDIATE_POLICY_CHANGE }, [SANITIZE][OVERWRITE] = { "SANITIZE_OVERWRITE", cmd_sanitize_overwrite, 0, (CXL_MBOX_IMMEDIATE_DATA_CHANGE | CXL_MBOX_SECURITY_STATE_CHANGE | + CXL_MBOX_BACKGROUND_OPERATION | + CXL_MBOX_BACKGROUND_OPERATION_ABORT)}, + [SANITIZE][MEDIA_OPERATIONS] = { "MEDIA_OPERATIONS", cmd_media_operations, + ~0, + (CXL_MBOX_IMMEDIATE_DATA_CHANGE | CXL_MBOX_BACKGROUND_OPERATION)}, [PERSISTENT_MEM][GET_SECURITY_STATE] = { "GET_SECURITY_STATE", cmd_get_security_state, 0, 0 }, @@ -2771,7 +3299,8 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = { "MEDIA_AND_POISON_GET_SCAN_MEDIA_CAPABILITIES", cmd_media_get_scan_media_capabilities, 16, 0 }, [MEDIA_AND_POISON][SCAN_MEDIA] = { "MEDIA_AND_POISON_SCAN_MEDIA", - cmd_media_scan_media, 17, CXL_MBOX_BACKGROUND_OPERATION }, + cmd_media_scan_media, 17, + (CXL_MBOX_BACKGROUND_OPERATION | CXL_MBOX_BACKGROUND_OPERATION_ABORT)}, [MEDIA_AND_POISON][GET_SCAN_MEDIA_RESULTS] = { "MEDIA_AND_POISON_GET_SCAN_MEDIA_RESULTS", cmd_media_get_scan_media_results, 0, 0 }, @@ -2795,6 +3324,8 @@ static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { [INFOSTAT][IS_IDENTIFY] = { "IDENTIFY", cmd_infostat_identify, 0, 0 }, [INFOSTAT][BACKGROUND_OPERATION_STATUS] = { "BACKGROUND_OPERATION_STATUS", cmd_infostat_bg_op_sts, 0, 0 }, + [INFOSTAT][BACKGROUND_OPERATION_ABORT] = { "BACKGROUND_OPERATION_ABORT", + cmd_infostat_bg_op_abort, 0, 0 }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8, CXL_MBOX_IMMEDIATE_POLICY_CHANGE }, @@ -2881,6 +3412,7 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd, cci->bg.opcode = (set << 8) | cmd; cci->bg.complete_pct = 0; + cci->bg.aborted = false; cci->bg.ret_code = 0; now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); @@ -2894,10 +3426,12 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd, static void bg_timercb(void *opaque) { CXLCCI *cci = opaque; - uint64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); - uint64_t total_time = cci->bg.starttime + cci->bg.runtime; + uint64_t now, total_time; - assert(cci->bg.runtime > 0); + qemu_mutex_lock(&cci->bg.lock); + + now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + total_time = cci->bg.starttime + cci->bg.runtime; if (now >= total_time) { /* we are done */ uint16_t ret = CXL_MBOX_SUCCESS; @@ -2916,6 +3450,12 @@ static void bg_timercb(void *opaque) cxl_dev_enable_media(&ct3d->cxl_dstate); } break; + case 0x4402: /* Media Operations sanitize */ + { + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + __do_sanitize(ct3d); + } + break; case 0x4304: /* scan media */ { CXLType3Dev *ct3d = CXL_TYPE3(cci->d); @@ -2950,6 +3490,8 @@ static void bg_timercb(void *opaque) msi_notify(pdev, cxl_dstate->mbox_msi_n); } } + + qemu_mutex_unlock(&cci->bg.lock); } static void cxl_rebuild_cel(CXLCCI *cci) @@ -2978,12 +3520,21 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max) cci->bg.complete_pct = 0; cci->bg.starttime = 0; cci->bg.runtime = 0; + cci->bg.aborted = false; cci->bg.timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, bg_timercb, cci); + qemu_mutex_init(&cci->bg.lock); memset(&cci->fw, 0, sizeof(cci->fw)); cci->fw.active_slot = 1; cci->fw.slot[cci->fw.active_slot - 1] = true; + cci->initialized = true; +} + +void cxl_destroy_cci(CXLCCI *cci) +{ + qemu_mutex_destroy(&cci->bg.lock); + cci->initialized = false; } static void cxl_copy_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmds)[256]) @@ -3047,6 +3598,10 @@ void cxl_initialize_t3_ld_cci(CXLCCI *cci, DeviceState *d, DeviceState *intf, static const struct cxl_cmd cxl_cmd_set_t3_fm_owned_ld_mctp[256][256] = { [INFOSTAT][IS_IDENTIFY] = { "IDENTIFY", cmd_infostat_identify, 0, 0}, + [INFOSTAT][GET_RESPONSE_MSG_LIMIT] = { "GET_RESPONSE_MSG_LIMIT", + cmd_get_response_msg_limit, 0, 0 }, + [INFOSTAT][SET_RESPONSE_MSG_LIMIT] = { "SET_RESPONSE_MSG_LIMIT", + cmd_set_response_msg_limit, 1, 0 }, [LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0, 0 }, [LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 }, diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 43d4c08a2e..9fc6bbcd2c 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -516,7 +516,7 @@ vhost_user_gpu_set_config(VirtIODevice *vdev, } } -static void +static int vhost_user_gpu_set_status(VirtIODevice *vdev, uint8_t val) { VhostUserGPU *g = VHOST_USER_GPU(vdev); @@ -525,18 +525,24 @@ vhost_user_gpu_set_status(VirtIODevice *vdev, uint8_t val) if (val & VIRTIO_CONFIG_S_DRIVER_OK && vdev->vm_running) { if (!vhost_user_gpu_do_set_socket(g, &err)) { error_report_err(err); - return; + return 0; } vhost_user_backend_start(g->vhost); } else { + int ret; + /* unblock any wait and stop processing */ if (g->vhost_gpu_fd != -1) { vhost_user_gpu_update_blocked(g, true); qemu_chr_fe_deinit(&g->vhost_chr, true); g->vhost_gpu_fd = -1; } - vhost_user_backend_stop(g->vhost); + ret = vhost_user_backend_stop(g->vhost); + if (ret < 0) { + return ret; + } } + return 0; } static bool diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index f40ad062f9..61851cc840 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2333,10 +2333,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, build_append_int_noprefix(table_data, ivhd_blob->len + 24, 2); /* DeviceID */ build_append_int_noprefix(table_data, - object_property_get_int(OBJECT(&s->pci), "addr", + object_property_get_int(OBJECT(s->pci), "addr", &error_abort), 2); /* Capability offset */ - build_append_int_noprefix(table_data, s->pci.capab_offset, 2); + build_append_int_noprefix(table_data, s->pci->capab_offset, 2); /* IOMMU base address */ build_append_int_noprefix(table_data, s->mr_mmio.addr, 8); /* PCI Segment Group */ @@ -2368,10 +2368,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, build_append_int_noprefix(table_data, ivhd_blob->len + 40, 2); /* DeviceID */ build_append_int_noprefix(table_data, - object_property_get_int(OBJECT(&s->pci), "addr", + object_property_get_int(OBJECT(s->pci), "addr", &error_abort), 2); /* Capability offset */ - build_append_int_noprefix(table_data, s->pci.capab_offset, 2); + build_append_int_noprefix(table_data, s->pci->capab_offset, 2); /* IOMMU base address */ build_append_int_noprefix(table_data, s->mr_mmio.addr, 8); /* PCI Segment Group */ diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 2cf7e24a21..0775c8f3bb 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -167,11 +167,11 @@ static void amdvi_generate_msi_interrupt(AMDVIState *s) { MSIMessage msg = {}; MemTxAttrs attrs = { - .requester_id = pci_requester_id(&s->pci.dev) + .requester_id = pci_requester_id(&s->pci->dev) }; - if (msi_enabled(&s->pci.dev)) { - msg = msi_get_message(&s->pci.dev, 0); + if (msi_enabled(&s->pci->dev)) { + msg = msi_get_message(&s->pci->dev, 0); address_space_stl_le(&address_space_memory, msg.address, msg.data, attrs, NULL); } @@ -239,7 +239,7 @@ static void amdvi_page_fault(AMDVIState *s, uint16_t devid, info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF; amdvi_encode_event(evt, devid, addr, info); amdvi_log_event(s, evt); - pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, + pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, PCI_STATUS_SIG_TARGET_ABORT); } /* @@ -256,7 +256,7 @@ static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, amdvi_encode_event(evt, devid, devtab, info); amdvi_log_event(s, evt); - pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, + pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, PCI_STATUS_SIG_TARGET_ABORT); } /* log an event trying to access command buffer @@ -269,7 +269,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) amdvi_encode_event(evt, 0, addr, info); amdvi_log_event(s, evt); - pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, + pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, PCI_STATUS_SIG_TARGET_ABORT); } /* log an illegal command event @@ -310,7 +310,7 @@ static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR; amdvi_encode_event(evt, devid, addr, info); amdvi_log_event(s, evt); - pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, + pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, PCI_STATUS_SIG_TARGET_ABORT); } @@ -1607,26 +1607,92 @@ static void amdvi_sysbus_reset(DeviceState *dev) { AMDVIState *s = AMD_IOMMU_DEVICE(dev); - msi_reset(&s->pci.dev); + msi_reset(&s->pci->dev); amdvi_init(s); } +static const VMStateDescription vmstate_amdvi_sysbus_migratable = { + .name = "amd-iommu", + .version_id = 1, + .minimum_version_id = 1, + .priority = MIG_PRI_IOMMU, + .fields = (VMStateField[]) { + /* Updated in amdvi_handle_control_write() */ + VMSTATE_BOOL(enabled, AMDVIState), + VMSTATE_BOOL(ga_enabled, AMDVIState), + VMSTATE_BOOL(ats_enabled, AMDVIState), + VMSTATE_BOOL(cmdbuf_enabled, AMDVIState), + VMSTATE_BOOL(completion_wait_intr, AMDVIState), + VMSTATE_BOOL(evtlog_enabled, AMDVIState), + VMSTATE_BOOL(evtlog_intr, AMDVIState), + /* Updated in amdvi_handle_devtab_write() */ + VMSTATE_UINT64(devtab, AMDVIState), + VMSTATE_UINT64(devtab_len, AMDVIState), + /* Updated in amdvi_handle_cmdbase_write() */ + VMSTATE_UINT64(cmdbuf, AMDVIState), + VMSTATE_UINT64(cmdbuf_len, AMDVIState), + /* Updated in amdvi_handle_cmdhead_write() */ + VMSTATE_UINT32(cmdbuf_head, AMDVIState), + /* Updated in amdvi_handle_cmdtail_write() */ + VMSTATE_UINT32(cmdbuf_tail, AMDVIState), + /* Updated in amdvi_handle_evtbase_write() */ + VMSTATE_UINT64(evtlog, AMDVIState), + VMSTATE_UINT32(evtlog_len, AMDVIState), + /* Updated in amdvi_handle_evthead_write() */ + VMSTATE_UINT32(evtlog_head, AMDVIState), + /* Updated in amdvi_handle_evttail_write() */ + VMSTATE_UINT32(evtlog_tail, AMDVIState), + /* Updated in amdvi_handle_pprbase_write() */ + VMSTATE_UINT64(ppr_log, AMDVIState), + VMSTATE_UINT32(pprlog_len, AMDVIState), + /* Updated in amdvi_handle_pprhead_write() */ + VMSTATE_UINT32(pprlog_head, AMDVIState), + /* Updated in amdvi_handle_tailhead_write() */ + VMSTATE_UINT32(pprlog_tail, AMDVIState), + /* MMIO registers */ + VMSTATE_UINT8_ARRAY(mmior, AMDVIState, AMDVI_MMIO_SIZE), + VMSTATE_UINT8_ARRAY(romask, AMDVIState, AMDVI_MMIO_SIZE), + VMSTATE_UINT8_ARRAY(w1cmask, AMDVIState, AMDVI_MMIO_SIZE), + VMSTATE_END_OF_LIST() + } +}; + static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) { + DeviceClass *dc = (DeviceClass *) object_get_class(OBJECT(dev)); AMDVIState *s = AMD_IOMMU_DEVICE(dev); MachineState *ms = MACHINE(qdev_get_machine()); PCMachineState *pcms = PC_MACHINE(ms); X86MachineState *x86ms = X86_MACHINE(ms); PCIBus *bus = pcms->pcibus; - s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, - amdvi_uint64_equal, g_free, g_free); + if (s->pci_id) { + PCIDevice *pdev = NULL; + int ret = pci_qdev_find_device(s->pci_id, &pdev); - /* This device should take care of IOMMU PCI properties */ - if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { - return; + if (ret) { + error_report("Cannot find PCI device '%s'", s->pci_id); + return; + } + + if (!object_dynamic_cast(OBJECT(pdev), TYPE_AMD_IOMMU_PCI)) { + error_report("Device '%s' must be an AMDVI-PCI device type", s->pci_id); + return; + } + + s->pci = AMD_IOMMU_PCI(pdev); + dc->vmsd = &vmstate_amdvi_sysbus_migratable; + } else { + s->pci = AMD_IOMMU_PCI(object_new(TYPE_AMD_IOMMU_PCI)); + /* This device should take care of IOMMU PCI properties */ + if (!qdev_realize(DEVICE(s->pci), &bus->qbus, errp)) { + return; + } } + s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, + amdvi_uint64_equal, g_free, g_free); + /* Pseudo address space under root PCI bus. */ x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); @@ -1663,6 +1729,7 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) static const Property amdvi_properties[] = { DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false), + DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id), }; static const VMStateDescription vmstate_amdvi_sysbus = { @@ -1670,13 +1737,6 @@ static const VMStateDescription vmstate_amdvi_sysbus = { .unmigratable = 1 }; -static void amdvi_sysbus_instance_init(Object *klass) -{ - AMDVIState *s = AMD_IOMMU_DEVICE(klass); - - object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI); -} - static void amdvi_sysbus_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -1696,7 +1756,6 @@ static const TypeInfo amdvi_sysbus = { .name = TYPE_AMD_IOMMU_DEVICE, .parent = TYPE_X86_IOMMU_DEVICE, .instance_size = sizeof(AMDVIState), - .instance_init = amdvi_sysbus_instance_init, .class_init = amdvi_sysbus_class_init }; diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 28125130c6..5672bdef89 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -315,7 +315,8 @@ struct AMDVIPCIState { struct AMDVIState { X86IOMMUState iommu; /* IOMMU bus device */ - AMDVIPCIState pci; /* IOMMU PCI device */ + AMDVIPCIState *pci; /* IOMMU PCI device */ + char *pci_id; /* ID of AMDVI-PCI device, if user created */ uint32_t version; @@ -328,7 +329,7 @@ struct AMDVIState { bool excl_enabled; hwaddr devtab; /* base address device table */ - size_t devtab_len; /* device table length */ + uint64_t devtab_len; /* device table length */ hwaddr cmdbuf; /* command buffer base address */ uint64_t cmdbuf_len; /* command buffer length */ diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 5f8ed1243d..69d72ad35c 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1728,8 +1728,6 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as) static bool vtd_switch_address_space(VTDAddressSpace *as) { bool use_iommu, pt; - /* Whether we need to take the BQL on our own */ - bool take_bql = !bql_locked(); assert(as); @@ -1746,9 +1744,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) * from vtd_pt_enable_fast_path(). However the memory APIs need * it. We'd better make sure we have had it already, or, take it. */ - if (take_bql) { - bql_lock(); - } + BQL_LOCK_GUARD(); /* Turn off first then on the other */ if (use_iommu) { @@ -1801,10 +1797,6 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) memory_region_set_enabled(&as->iommu_ir_fault, false); } - if (take_bql) { - bql_unlock(); - } - return use_iommu; } @@ -4213,9 +4205,30 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, VTDAddressSpace *vtd_dev_as; char name[128]; + vtd_iommu_lock(s); vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key); + vtd_iommu_unlock(s); + if (!vtd_dev_as) { - struct vtd_as_key *new_key = g_malloc(sizeof(*new_key)); + struct vtd_as_key *new_key; + /* Slow path */ + + /* + * memory_region_add_subregion_overlap requires the bql, + * make sure we own it. + */ + BQL_LOCK_GUARD(); + vtd_iommu_lock(s); + + /* Check again as we released the lock for a moment */ + vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key); + if (vtd_dev_as) { + vtd_iommu_unlock(s); + return vtd_dev_as; + } + + /* Still nothing, allocate a new address space */ + new_key = g_malloc(sizeof(*new_key)); new_key->bus = bus; new_key->devfn = devfn; @@ -4306,6 +4319,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, vtd_switch_address_space(vtd_dev_as); g_hash_table_insert(s->vtd_address_spaces, new_key, vtd_dev_as); + + vtd_iommu_unlock(s); } return vtd_dev_as; } diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c index 1818cbddc7..a3f554f211 100644 --- a/hw/input/virtio-input.c +++ b/hw/input/virtio-input.c @@ -189,7 +189,7 @@ static uint64_t virtio_input_get_features(VirtIODevice *vdev, uint64_t f, return f; } -static void virtio_input_set_status(VirtIODevice *vdev, uint8_t val) +static int virtio_input_set_status(VirtIODevice *vdev, uint8_t val) { VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(vdev); VirtIOInput *vinput = VIRTIO_INPUT(vdev); @@ -202,6 +202,7 @@ static void virtio_input_set_status(VirtIODevice *vdev, uint8_t val) } } } + return 0; } static void virtio_input_reset(VirtIODevice *vdev) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index bba923f8ea..94e7274912 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -843,6 +843,19 @@ static DOEProtocol doe_cdat_prot[] = { { } }; +/* Initialize CXL device alerts with default threshold values. */ +static void init_alert_config(CXLType3Dev *ct3d) +{ + ct3d->alert_config = (CXLAlertConfig) { + .life_used_crit_alert_thresh = 75, + .life_used_warn_thresh = 40, + .over_temp_crit_alert_thresh = 35, + .under_temp_crit_alert_thresh = 10, + .over_temp_warn_thresh = 25, + .under_temp_warn_thresh = 20 + }; +} + static void ct3_realize(PCIDevice *pci_dev, Error **errp) { ERRP_GUARD(); @@ -910,6 +923,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) goto err_msix_uninit; } + init_alert_config(ct3d); pcie_cap_deverr_init(pci_dev); /* Leave a bit of room for expansion */ rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, errp); @@ -969,6 +983,7 @@ static void ct3_exit(PCIDevice *pci_dev) cxl_doe_cdat_release(cxl_cstate); msix_uninit_exclusive_bar(pci_dev); g_free(regs->special_ops); + cxl_destroy_cci(&ct3d->cci); if (ct3d->dc.host_dc) { cxl_destroy_dc_regions(ct3d); address_space_destroy(&ct3d->dc.host_dc_as); @@ -1224,12 +1239,17 @@ static void ct3d_reset(DeviceState *dev) * Bring up an endpoint to target with MCTP over VDM. * This device is emulating an MLD with single LD for now. */ + if (ct3d->vdm_fm_owned_ld_mctp_cci.initialized) { + cxl_destroy_cci(&ct3d->vdm_fm_owned_ld_mctp_cci); + } cxl_initialize_t3_fm_owned_ld_mctpcci(&ct3d->vdm_fm_owned_ld_mctp_cci, DEVICE(ct3d), DEVICE(ct3d), 512); /* Max payload made up */ + if (ct3d->ld0_cci.initialized) { + cxl_destroy_cci(&ct3d->ld0_cci); + } cxl_initialize_t3_ld_cci(&ct3d->ld0_cci, DEVICE(ct3d), DEVICE(ct3d), 512); /* Max payload made up */ - } static const Property ct3_props[] = { diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c index 3f6a8bba84..ba71c5069f 100644 --- a/hw/misc/pci-testdev.c +++ b/hw/misc/pci-testdev.c @@ -90,6 +90,7 @@ struct PCITestDevState { int current; uint64_t membar_size; + bool membar_backed; MemoryRegion membar; }; @@ -258,8 +259,14 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error **errp) pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->portio); if (d->membar_size) { - memory_region_init(&d->membar, OBJECT(d), "pci-testdev-membar", - d->membar_size); + if (d->membar_backed) + memory_region_init_ram(&d->membar, OBJECT(d), + "pci-testdev-membar-backed", + d->membar_size, NULL); + else + memory_region_init(&d->membar, OBJECT(d), + "pci-testdev-membar", + d->membar_size); pci_register_bar(pci_dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_PREFETCH | @@ -321,6 +328,7 @@ static void qdev_pci_testdev_reset(DeviceState *dev) static const Property pci_testdev_properties[] = { DEFINE_PROP_SIZE("membar", PCITestDevState, membar_size, 0), + DEFINE_PROP_BOOL("membar-backed", PCITestDevState, membar_backed, false), }; static void pci_testdev_class_init(ObjectClass *klass, const void *data) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 2de037c273..221252e00a 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -382,7 +382,7 @@ static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) } } -static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) +static int virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) { VirtIONet *n = VIRTIO_NET(vdev); VirtIONetQueue *q; @@ -437,6 +437,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) } } } + return 0; } static void virtio_net_set_link_status(NetClientState *nc) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 352b3d12c8..f5ab510697 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -94,6 +94,7 @@ static const Property pci_props[] = { QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice, max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE), + DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf), DEFINE_PROP_BIT("x-pcie-ext-tag", PCIDevice, cap_present, QEMU_PCIE_EXT_TAG_BITNR, true), { .name = "busnr", .info = &prop_pci_busnr }, @@ -1105,13 +1106,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; } - /* - * With SR/IOV and ARI, a device at function 0 need not be a multifunction - * device, as it may just be a VF that ended up with function 0 in - * the legacy PCI interpretation. Avoid failing in such cases: - */ - if (pci_is_vf(dev) && - dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + /* SR/IOV is not handled here. */ + if (pci_is_vf(dev)) { return; } @@ -1144,7 +1140,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) } /* function 0 indicates single function, so function > 0 must be NULL */ for (func = 1; func < PCI_FUNC_MAX; ++func) { - if (bus->devices[PCI_DEVFN(slot, func)]) { + PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)]; + if (device && !pci_is_vf(device)) { error_setg(errp, "PCI: %x.0 indicates single function, " "but %x.%x is already populated.", slot, slot, func); @@ -1432,6 +1429,7 @@ static void pci_qdev_unrealize(DeviceState *dev) pci_unregister_io_regions(pci_dev); pci_del_option_rom(pci_dev); + pcie_sriov_unregister_device(pci_dev); if (pc->exit) { pc->exit(pci_dev); @@ -1463,7 +1461,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size = memory_region_size(memory); uint8_t hdr_type; - assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */ assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); assert(is_power_of_2(size)); @@ -1475,7 +1472,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, r = &pci_dev->io_regions[region_num]; assert(!r->size); - r->addr = PCI_BAR_UNMAPPED; r->size = size; r->type = type; r->memory = memory; @@ -1483,22 +1479,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, ? pci_get_bus(pci_dev)->address_space_io : pci_get_bus(pci_dev)->address_space_mem; - wmask = ~(size - 1); - if (region_num == PCI_ROM_SLOT) { - /* ROM enable bit is writable */ - wmask |= PCI_ROM_ADDRESS_ENABLE; - } + if (pci_is_vf(pci_dev)) { + PCIDevice *pf = pci_dev->exp.sriov_vf.pf; + assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]); - addr = pci_bar(pci_dev, region_num); - pci_set_long(pci_dev->config + addr, type); - - if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && - r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { - pci_set_quad(pci_dev->wmask + addr, wmask); - pci_set_quad(pci_dev->cmask + addr, ~0ULL); + r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size); + if (r->addr != PCI_BAR_UNMAPPED) { + memory_region_add_subregion_overlap(r->address_space, + r->addr, r->memory, 1); + } } else { - pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); - pci_set_long(pci_dev->cmask + addr, 0xffffffff); + r->addr = PCI_BAR_UNMAPPED; + + wmask = ~(size - 1); + if (region_num == PCI_ROM_SLOT) { + /* ROM enable bit is writable */ + wmask |= PCI_ROM_ADDRESS_ENABLE; + } + + addr = pci_bar(pci_dev, region_num); + pci_set_long(pci_dev->config + addr, type); + + if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && + r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(pci_dev->wmask + addr, wmask); + pci_set_quad(pci_dev->cmask + addr, ~0ULL); + } else { + pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); + pci_set_long(pci_dev->cmask + addr, 0xffffffff); + } } } @@ -1587,7 +1596,11 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg, pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET); uint16_t vf_stride = pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE); - uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride; + uint32_t vf_num = d->devfn - (pf->devfn + vf_offset); + + if (vf_num) { + vf_num /= vf_stride; + } if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { new_addr = pci_get_quad(pf->config + bar); @@ -2261,6 +2274,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } } + if (!pcie_sriov_register_device(pci_dev, errp)) { + pci_qdev_unrealize(DEVICE(pci_dev)); + return; + } + /* * A PCIe Downstream Port that do not have ARI Forwarding enabled must * associate only Device 0 with the device attached to the bus @@ -2515,6 +2533,14 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, return; } + if (pci_is_vf(pdev)) { + if (pdev->rom_bar > 0) { + error_setg(errp, "ROM BAR cannot be enabled for SR-IOV VF"); + } + + return; + } + if (load_file || pdev->romsize == UINT32_MAX) { path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); if (path == NULL) { diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 1eb4358256..3ad18744f4 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -15,11 +15,12 @@ #include "hw/pci/pcie.h" #include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" -#include "qemu/error-report.h" #include "qemu/range.h" #include "qapi/error.h" #include "trace.h" +static GHashTable *pfs; + static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) { for (uint16_t i = 0; i < total_vfs; i++) { @@ -31,17 +32,57 @@ static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) dev->exp.sriov_pf.vf = NULL; } -bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, - const char *vfname, uint16_t vf_dev_id, - uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride, - Error **errp) +static void register_vfs(PCIDevice *dev) +{ + uint16_t num_vfs; + uint16_t i; + uint16_t sriov_cap = dev->exp.sriov_cap; + + assert(sriov_cap > 0); + num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); + + trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), num_vfs); + for (i = 0; i < num_vfs; i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], true); + } + + pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0); +} + +static void unregister_vfs(PCIDevice *dev) +{ + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + uint16_t i; + + trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], false); + } + + pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff); +} + +static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset, + uint16_t vf_dev_id, uint16_t init_vfs, + uint16_t total_vfs, uint16_t vf_offset, + uint16_t vf_stride, Error **errp) { - BusState *bus = qdev_get_parent_bus(&dev->qdev); int32_t devfn = dev->devfn + vf_offset; uint8_t *cfg = dev->config + offset; uint8_t *wmask; + if (!pci_is_express(dev)) { + error_setg(errp, "PCI Express is required for SR-IOV PF"); + return false; + } + + if (pci_is_vf(dev)) { + error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same time"); + return false; + } + if (total_vfs && (uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) { error_setg(errp, "VF addr overflows"); @@ -84,6 +125,28 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, qdev_prop_set_bit(&dev->qdev, "multifunction", true); + return true; +} + +bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, + const char *vfname, uint16_t vf_dev_id, + uint16_t init_vfs, uint16_t total_vfs, + uint16_t vf_offset, uint16_t vf_stride, + Error **errp) +{ + BusState *bus = qdev_get_parent_bus(&dev->qdev); + int32_t devfn = dev->devfn + vf_offset; + + if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) { + error_setg(errp, "attaching user-created SR-IOV VF unsupported"); + return false; + } + + if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs, + total_vfs, vf_offset, vf_stride, errp)) { + return false; + } + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); for (uint16_t i = 0; i < total_vfs; i++) { @@ -113,7 +176,22 @@ void pcie_sriov_pf_exit(PCIDevice *dev) { uint8_t *cfg = dev->config + dev->exp.sriov_cap; - unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); + if (dev->exp.sriov_pf.vf_user_created) { + uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); + uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF); + uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID); + + unregister_vfs(dev); + + for (uint16_t i = 0; i < total_vfs; i++) { + dev->exp.sriov_pf.vf[i]->exp.sriov_vf.pf = NULL; + + pci_config_set_vendor_id(dev->exp.sriov_pf.vf[i]->config, ven_id); + pci_config_set_device_id(dev->exp.sriov_pf.vf[i]->config, vf_dev_id); + } + } else { + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); + } } void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, @@ -146,69 +224,179 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, MemoryRegion *memory) { - PCIIORegion *r; - PCIBus *bus = pci_get_bus(dev); uint8_t type; - pcibus_t size = memory_region_size(memory); - assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */ - assert(region_num >= 0); - assert(region_num < PCI_NUM_REGIONS); + assert(dev->exp.sriov_vf.pf); type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; - if (!is_power_of_2(size)) { - error_report("%s: PCI region size must be a power" - " of two - type=0x%x, size=0x%"FMT_PCIBUS, - __func__, type, size); - exit(1); - } + return pci_register_bar(dev, region_num, type, memory); +} - r = &dev->io_regions[region_num]; - r->memory = memory; - r->address_space = - type & PCI_BASE_ADDRESS_SPACE_IO - ? bus->address_space_io - : bus->address_space_mem; - r->size = size; - r->type = type; - - r->addr = pci_bar_address(dev, region_num, r->type, r->size); - if (r->addr != PCI_BAR_UNMAPPED) { - memory_region_add_subregion_overlap(r->address_space, - r->addr, r->memory, 1); - } +static gint compare_vf_devfns(gconstpointer a, gconstpointer b) +{ + return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn; } -static void register_vfs(PCIDevice *dev) +int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, + uint16_t offset, + Error **errp) { - uint16_t num_vfs; + GPtrArray *pf; + PCIDevice **vfs; + BusState *bus = qdev_get_parent_bus(DEVICE(dev)); + uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); + uint16_t size = PCI_EXT_CAP_SRIOV_SIZEOF; + uint16_t vf_dev_id; + uint16_t vf_offset; + uint16_t vf_stride; uint16_t i; - uint16_t sriov_cap = dev->exp.sriov_cap; - assert(sriov_cap > 0); - num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); + if (!pfs || !dev->qdev.id) { + return 0; + } - trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), num_vfs); - for (i = 0; i < num_vfs; i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], true); + pf = g_hash_table_lookup(pfs, dev->qdev.id); + if (!pf) { + return 0; } - pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0); + if (pf->len > UINT16_MAX) { + error_setg(errp, "too many VFs"); + return -1; + } + + g_ptr_array_sort(pf, compare_vf_devfns); + vfs = (void *)pf->pdata; + + if (vfs[0]->devfn <= dev->devfn) { + error_setg(errp, "a VF function number is less than the PF function number"); + return -1; + } + + vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID); + vf_offset = vfs[0]->devfn - dev->devfn; + vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn; + + for (i = 0; i < pf->len; i++) { + if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) { + error_setg(errp, "SR-IOV VF parent bus mismatches with PF"); + return -1; + } + + if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) { + error_setg(errp, "SR-IOV VF vendor ID mismatches with PF"); + return -1; + } + + if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) { + error_setg(errp, "inconsistent SR-IOV VF device IDs"); + return -1; + } + + for (size_t j = 0; j < PCI_NUM_REGIONS; j++) { + if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size || + vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) { + error_setg(errp, "inconsistent SR-IOV BARs"); + return -1; + } + } + + if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) { + error_setg(errp, "inconsistent SR-IOV stride"); + return -1; + } + } + + if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len, + pf->len, vf_offset, vf_stride, errp)) { + return -1; + } + + if (!pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI)) { + pcie_ari_init(dev, offset + size); + size += PCI_ARI_SIZEOF; + } + + for (i = 0; i < pf->len; i++) { + vfs[i]->exp.sriov_vf.pf = dev; + vfs[i]->exp.sriov_vf.vf_number = i; + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(vfs[i]->config, 0xffff); + pci_config_set_device_id(vfs[i]->config, 0xffff); + } + + dev->exp.sriov_pf.vf = vfs; + dev->exp.sriov_pf.vf_user_created = true; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + PCIIORegion *region = &vfs[0]->io_regions[i]; + + if (region->size) { + pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size); + } + } + + return size; } -static void unregister_vfs(PCIDevice *dev) +bool pcie_sriov_register_device(PCIDevice *dev, Error **errp) { - uint8_t *cfg = dev->config + dev->exp.sriov_cap; - uint16_t i; + if (!dev->exp.sriov_pf.vf && dev->qdev.id && + pfs && g_hash_table_contains(pfs, dev->qdev.id)) { + error_setg(errp, "attaching user-created SR-IOV VF unsupported"); + return false; + } - trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn)); - for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], false); + if (dev->sriov_pf) { + PCIDevice *pci_pf; + GPtrArray *pf; + + if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) { + error_setg(errp, "user cannot create SR-IOV VF with this device type"); + return false; + } + + if (!pci_is_express(dev)) { + error_setg(errp, "PCI Express is required for SR-IOV VF"); + return false; + } + + if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) { + error_setg(errp, "PCI device specified as SR-IOV PF already exists"); + return false; + } + + if (!pfs) { + pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); + } + + pf = g_hash_table_lookup(pfs, dev->sriov_pf); + if (!pf) { + pf = g_ptr_array_new(); + g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf); + } + + g_ptr_array_add(pf, dev); } - pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff); + return true; +} + +void pcie_sriov_unregister_device(PCIDevice *dev) +{ + if (dev->sriov_pf && pfs) { + GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf); + + if (pf) { + g_ptr_array_remove_fast(pf, dev); + + if (!pf->len) { + g_hash_table_remove(pfs, dev->sriov_pf); + g_ptr_array_free(pf, FALSE); + } + } + } } void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, @@ -304,7 +492,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize) uint16_t pcie_sriov_vf_number(PCIDevice *dev) { - assert(pci_is_vf(dev)); + assert(dev->exp.sriov_vf.pf); return dev->exp.sriov_vf.vf_number; } diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c index 1afe364573..7b7bf237fe 100644 --- a/hw/s390x/event-facility.c +++ b/hw/s390x/event-facility.c @@ -45,13 +45,6 @@ struct SCLPEventFacility { uint32_t receive_mask_pieces[2]; sccb_mask_t receive_mask; }; - /* - * when false, we keep the same broken, backwards compatible behaviour as - * before, allowing only masks of size exactly 4; when true, we implement - * the architecture correctly, allowing all valid mask sizes. Needed for - * migration toward older versions. - */ - bool allow_all_mask_sizes; /* length of the receive mask */ uint16_t mask_length; }; @@ -294,8 +287,7 @@ static void write_event_mask(SCLPEventFacility *ef, SCCB *sccb) uint16_t mask_length = be16_to_cpu(we_mask->mask_length); sccb_mask_t tmp_mask; - if (!mask_length || (mask_length > SCLP_EVENT_MASK_LEN_MAX) || - ((mask_length != 4) && !ef->allow_all_mask_sizes)) { + if (!mask_length || mask_length > SCLP_EVENT_MASK_LEN_MAX) { sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_MASK_LENGTH); return; } @@ -355,13 +347,6 @@ static bool vmstate_event_facility_mask64_needed(void *opaque) return (ef->receive_mask & 0xFFFFFFFF) != 0; } -static bool vmstate_event_facility_mask_length_needed(void *opaque) -{ - SCLPEventFacility *ef = opaque; - - return ef->allow_all_mask_sizes; -} - static const VMStateDescription vmstate_event_facility_mask64 = { .name = "vmstate-event-facility/mask64", .version_id = 0, @@ -377,7 +362,6 @@ static const VMStateDescription vmstate_event_facility_mask_length = { .name = "vmstate-event-facility/mask_length", .version_id = 0, .minimum_version_id = 0, - .needed = vmstate_event_facility_mask_length_needed, .fields = (const VMStateField[]) { VMSTATE_UINT16(mask_length, SCLPEventFacility), VMSTATE_END_OF_LIST() @@ -399,31 +383,12 @@ static const VMStateDescription vmstate_event_facility = { } }; -static void sclp_event_set_allow_all_mask_sizes(Object *obj, bool value, - Error **errp) -{ - SCLPEventFacility *ef = (SCLPEventFacility *)obj; - - ef->allow_all_mask_sizes = value; -} - -static bool sclp_event_get_allow_all_mask_sizes(Object *obj, Error **errp) -{ - SCLPEventFacility *ef = (SCLPEventFacility *)obj; - - return ef->allow_all_mask_sizes; -} - static void init_event_facility(Object *obj) { SCLPEventFacility *event_facility = EVENT_FACILITY(obj); DeviceState *sdev = DEVICE(obj); event_facility->mask_length = 4; - event_facility->allow_all_mask_sizes = true; - object_property_add_bool(obj, "allow_all_mask_sizes", - sclp_event_get_allow_all_mask_sizes, - sclp_event_set_allow_all_mask_sizes); /* Spawn a new bus for SCLP events */ qbus_init(&event_facility->sbus, sizeof(event_facility->sbus), diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index d5658afed9..f69a4d8ed3 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -748,39 +748,6 @@ static inline void machine_set_dea_key_wrap(Object *obj, bool value, ms->dea_key_wrap = value; } -static S390CcwMachineClass *current_mc; - -/* - * Get the class of the s390-ccw-virtio machine that is currently in use. - * Note: libvirt is using the "none" machine to probe for the features of the - * host CPU, so in case this is called with the "none" machine, the function - * returns the TYPE_S390_CCW_MACHINE base class. In this base class, all the - * various "*_allowed" variables are enabled, so that the *_allowed() wrappers - * below return the correct default value for the "none" machine. - * - * Attention! Do *not* add additional new wrappers for CPU features via this - * mechanism anymore. CPU features should be handled via the CPU models, - * i.e. checking with s390_has_feat() should be sufficient. - */ -static S390CcwMachineClass *get_machine_class(void) -{ - if (unlikely(!current_mc)) { - /* - * No s390 ccw machine was instantiated, we are likely to - * be called for the 'none' machine. The properties will - * have their after-initialization values. - */ - current_mc = S390_CCW_MACHINE_CLASS( - object_class_by_name(TYPE_S390_CCW_MACHINE)); - } - return current_mc; -} - -bool hpage_1m_allowed(void) -{ - return get_machine_class()->hpage_1m_allowed; -} - static void machine_get_loadparm(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) @@ -804,6 +771,7 @@ static void machine_set_loadparm(Object *obj, Visitor *v, } s390_ipl_fmt_loadparm(ms->loadparm, val, errp); + g_free(val); } static void ccw_machine_class_init(ObjectClass *oc, const void *data) @@ -814,7 +782,6 @@ static void ccw_machine_class_init(ObjectClass *oc, const void *data) S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); DumpSKeysInterface *dsi = DUMP_SKEYS_INTERFACE_CLASS(oc); - s390mc->hpage_1m_allowed = true; s390mc->max_threads = 1; mc->reset = s390_machine_reset; mc->block_default_type = IF_VIRTIO; @@ -888,7 +855,6 @@ static const TypeInfo ccw_machine_info = { #define DEFINE_CCW_MACHINE_IMPL(latest, ...) \ static void MACHINE_VER_SYM(mach_init, ccw, __VA_ARGS__)(MachineState *mach) \ { \ - current_mc = S390_CCW_MACHINE_CLASS(MACHINE_GET_CLASS(mach)); \ MACHINE_VER_SYM(instance_options, ccw, __VA_ARGS__)(mach); \ ccw_init(mach); \ } \ @@ -1193,102 +1159,6 @@ static void ccw_machine_4_1_class_options(MachineClass *mc) } DEFINE_CCW_MACHINE(4, 1); -static void ccw_machine_4_0_instance_options(MachineState *machine) -{ - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V4_0 }; - ccw_machine_4_1_instance_options(machine); - s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); -} - -static void ccw_machine_4_0_class_options(MachineClass *mc) -{ - ccw_machine_4_1_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); -} -DEFINE_CCW_MACHINE(4, 0); - -static void ccw_machine_3_1_instance_options(MachineState *machine) -{ - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; - ccw_machine_4_0_instance_options(machine); - s390_cpudef_featoff_greater(14, 1, S390_FEAT_MULTIPLE_EPOCH); - s390_cpudef_group_featoff_greater(14, 1, S390_FEAT_GROUP_MULTIPLE_EPOCH_PTFF); - s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); -} - -static void ccw_machine_3_1_class_options(MachineClass *mc) -{ - ccw_machine_4_0_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_3_1, hw_compat_3_1_len); -} -DEFINE_CCW_MACHINE(3, 1); - -static void ccw_machine_3_0_instance_options(MachineState *machine) -{ - ccw_machine_3_1_instance_options(machine); -} - -static void ccw_machine_3_0_class_options(MachineClass *mc) -{ - S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); - - s390mc->hpage_1m_allowed = false; - ccw_machine_3_1_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_3_0, hw_compat_3_0_len); -} -DEFINE_CCW_MACHINE(3, 0); - -static void ccw_machine_2_12_instance_options(MachineState *machine) -{ - ccw_machine_3_0_instance_options(machine); - s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); - s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); -} - -static void ccw_machine_2_12_class_options(MachineClass *mc) -{ - ccw_machine_3_0_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_12, hw_compat_2_12_len); -} -DEFINE_CCW_MACHINE(2, 12); - -#ifdef CONFIG_S390X_LEGACY_CPUS - -static void ccw_machine_2_11_instance_options(MachineState *machine) -{ - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; - ccw_machine_2_12_instance_options(machine); - - /* before 2.12 we emulated the very first z900 */ - s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); -} - -static void ccw_machine_2_11_class_options(MachineClass *mc) -{ - static GlobalProperty compat[] = { - { TYPE_SCLP_EVENT_FACILITY, "allow_all_mask_sizes", "off", }, - }; - - ccw_machine_2_12_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_11, hw_compat_2_11_len); - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); -} -DEFINE_CCW_MACHINE(2, 11); - -static void ccw_machine_2_10_instance_options(MachineState *machine) -{ - ccw_machine_2_11_instance_options(machine); -} - -static void ccw_machine_2_10_class_options(MachineClass *mc) -{ - ccw_machine_2_11_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_10, hw_compat_2_10_len); -} -DEFINE_CCW_MACHINE(2, 10); - -#endif - static void ccw_machine_register_types(void) { type_register_static(&ccw_machine_info); diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 70be4a7367..9b12ee7f1c 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -400,7 +400,7 @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp) return; } dev->vmsentry = qdev_add_vm_change_state_handler(DEVICE(dev), - scsi_dma_restart_cb, dev); + scsi_dma_restart_cb, NULL, dev); } static void scsi_qdev_unrealize(DeviceState *qdev) diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c index 4c8637045d..43525ba46d 100644 --- a/hw/scsi/vhost-scsi-common.c +++ b/hw/scsi/vhost-scsi-common.c @@ -101,24 +101,25 @@ err_host_notifiers: return ret; } -void vhost_scsi_common_stop(VHostSCSICommon *vsc) +int vhost_scsi_common_stop(VHostSCSICommon *vsc) { VirtIODevice *vdev = VIRTIO_DEVICE(vsc); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); int ret = 0; - vhost_dev_stop(&vsc->dev, vdev, true); + ret = vhost_dev_stop(&vsc->dev, vdev, true); if (k->set_guest_notifiers) { - ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false); - if (ret < 0) { - error_report("vhost guest notifier cleanup failed: %d", ret); + int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false); + if (r < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return r; } } - assert(ret >= 0); vhost_dev_disable_notifiers(&vsc->dev, vdev); + return ret; } uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features, diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 10fde8eee0..cdf405b0f8 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -114,7 +114,7 @@ static void vhost_scsi_stop(VHostSCSI *s) vhost_scsi_common_stop(vsc); } -static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) +static int vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) { VHostSCSI *s = VHOST_SCSI(vdev); VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); @@ -125,7 +125,7 @@ static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) } if (vhost_dev_is_started(&vsc->dev) == start) { - return; + return 0; } if (start) { @@ -139,6 +139,7 @@ static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) } else { vhost_scsi_stop(s); } + return 0; } static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) @@ -358,6 +359,9 @@ static const Property vhost_scsi_properties[] = { DEFINE_PROP_BIT64("t10_pi", VHostSCSICommon, host_features, VIRTIO_SCSI_F_T10_PI, false), + DEFINE_PROP_BIT64("hotplug", VHostSCSICommon, host_features, + VIRTIO_SCSI_F_HOTPLUG, + false), DEFINE_PROP_BOOL("migratable", VHostSCSICommon, migratable, false), DEFINE_PROP_BOOL("worker_per_virtqueue", VirtIOSCSICommon, conf.worker_per_virtqueue, false), diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 8298e8cc6d..25f2d894e7 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, Error **errp) return ret; } -static void vhost_user_scsi_stop(VHostUserSCSI *s) +static int vhost_user_scsi_stop(VHostUserSCSI *s) { VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); if (!s->started_vu) { - return; + return 0; } s->started_vu = false; - vhost_scsi_common_stop(vsc); + return vhost_scsi_common_stop(vsc); } -static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) +static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserSCSI *s = (VHostUserSCSI *)vdev; DeviceState *dev = DEVICE(vdev); @@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) int ret; if (!s->connected) { - return; + return -1; } if (vhost_dev_is_started(&vsc->dev) == should_start) { - return; + return 0; } if (should_start) { @@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) qemu_chr_fe_disconnect(&vs->conf.chardev); } } else { - vhost_user_scsi_stop(s); + ret = vhost_user_scsi_stop(s); + if (ret) { + return ret; + } } + return 0; } static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq) diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 1dceab1b19..b76697bd1a 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -1016,7 +1016,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) vfio_vmstate_change_prepare : NULL; migration->vm_state = qdev_add_vm_change_state_handler_full( - vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev); + vbasedev->dev, vfio_vmstate_change, prepare_cb, NULL, vbasedev); migration_add_notifier(&migration->migration_state, vfio_migration_state_notifier); diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index dd8837ce4e..d1da40afc8 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -312,7 +312,7 @@ static void vhost_vdpa_device_stop(VirtIODevice *vdev) vhost_dev_disable_notifiers(&s->dev, vdev); } -static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) +static int vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) { VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); bool should_start = virtio_device_started(vdev, status); @@ -324,7 +324,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) } if (s->started == should_start) { - return; + return 0; } if (should_start) { @@ -335,6 +335,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) } else { vhost_vdpa_device_stop(vdev); } + return 0; } static const Property vhost_vdpa_device_properties[] = { diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c index 77143320a2..ff67a020b4 100644 --- a/hw/virtio/vhost-user-base.c +++ b/hw/virtio/vhost-user-base.c @@ -66,7 +66,7 @@ err_host_notifiers: vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); } -static void vub_stop(VirtIODevice *vdev) +static int vub_stop(VirtIODevice *vdev) { VHostUserBase *vub = VHOST_USER_BASE(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -74,34 +74,39 @@ static void vub_stop(VirtIODevice *vdev) int ret; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&vub->vhost_dev, vdev, true); + ret = vhost_dev_stop(&vub->vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); + return ret; } -static void vub_set_status(VirtIODevice *vdev, uint8_t status) +static int vub_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserBase *vub = VHOST_USER_BASE(vdev); bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&vub->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { vub_start(vdev); } else { - vub_stop(vdev); + int ret; + ret = vub_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } /* diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index f6d1fc8804..e77c69eb12 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -100,7 +100,7 @@ err_host_notifiers: vhost_dev_disable_notifiers(&fs->vhost_dev, vdev); } -static void vuf_stop(VirtIODevice *vdev) +static int vuf_stop(VirtIODevice *vdev) { VHostUserFS *fs = VHOST_USER_FS(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -108,34 +108,39 @@ static void vuf_stop(VirtIODevice *vdev) int ret; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&fs->vhost_dev, vdev, true); + ret = vhost_dev_stop(&fs->vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&fs->vhost_dev, vdev); + return ret; } -static void vuf_set_status(VirtIODevice *vdev, uint8_t status) +static int vuf_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserFS *fs = VHOST_USER_FS(vdev); bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&fs->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { vuf_start(vdev); } else { - vuf_stop(vdev); + int ret; + ret = vuf_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } static uint64_t vuf_get_features(VirtIODevice *vdev, diff --git a/hw/virtio/vhost-user-scmi.c b/hw/virtio/vhost-user-scmi.c index 7a0f622181..f9264c4374 100644 --- a/hw/virtio/vhost-user-scmi.c +++ b/hw/virtio/vhost-user-scmi.c @@ -83,7 +83,7 @@ err_host_notifiers: return ret; } -static void vu_scmi_stop(VirtIODevice *vdev) +static int vu_scmi_stop(VirtIODevice *vdev) { VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -93,41 +93,46 @@ static void vu_scmi_stop(VirtIODevice *vdev) /* vhost_dev_is_started() check in the callers is not fully reliable. */ if (!scmi->started_vu) { - return; + return 0; } scmi->started_vu = false; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(vhost_dev, vdev, true); + ret = vhost_dev_stop(vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(vhost_dev, vdev); + return ret; } -static void vu_scmi_set_status(VirtIODevice *vdev, uint8_t status) +static int vu_scmi_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); bool should_start = virtio_device_should_start(vdev, status); if (!scmi->connected) { - return; + return -1; } if (vhost_dev_is_started(&scmi->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { vu_scmi_start(vdev); } else { - vu_scmi_stop(vdev); + int ret; + ret = vu_scmi_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } static uint64_t vu_scmi_get_features(VirtIODevice *vdev, uint64_t features, diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index 2776792f59..993c287348 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -54,23 +54,28 @@ const VhostDevConfigOps vsock_ops = { .vhost_dev_config_notifier = vuv_handle_config_change, }; -static void vuv_set_status(VirtIODevice *vdev, uint8_t status) +static int vuv_set_status(VirtIODevice *vdev, uint8_t status) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); bool should_start = virtio_device_should_start(vdev, status); + int ret; if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { - int ret = vhost_vsock_common_start(vdev); + ret = vhost_vsock_common_start(vdev); if (ret < 0) { - return; + return ret; } } else { - vhost_vsock_common_stop(vdev); + ret = vhost_vsock_common_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } static uint64_t vuv_get_features(VirtIODevice *vdev, diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index 4b4fbb45cc..c6c44d8989 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -95,7 +95,7 @@ err_host_notifiers: return ret; } -void vhost_vsock_common_stop(VirtIODevice *vdev) +int vhost_vsock_common_stop(VirtIODevice *vdev) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -103,18 +103,18 @@ void vhost_vsock_common_stop(VirtIODevice *vdev) int ret; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&vvc->vhost_dev, vdev, true); + ret = vhost_dev_stop(&vvc->vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev); + return ret; } diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index b73dc723c2..6e4088831f 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -67,37 +67,38 @@ static int vhost_vsock_set_running(VirtIODevice *vdev, int start) } -static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) +static int vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); bool should_start = virtio_device_should_start(vdev, status); int ret; if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { ret = vhost_vsock_common_start(vdev); if (ret < 0) { - return; + return 0; } ret = vhost_vsock_set_running(vdev, 1); if (ret < 0) { vhost_vsock_common_stop(vdev); error_report("Error starting vhost vsock: %d", -ret); - return; + return 0; } } else { ret = vhost_vsock_set_running(vdev, 0); if (ret < 0) { error_report("vhost vsock set running failed: %d", ret); - return; + return 0; } vhost_vsock_common_stop(vdev); } + return 0; } static uint64_t vhost_vsock_get_features(VirtIODevice *vdev, diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 4cae7c1664..fc43853704 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1367,10 +1367,10 @@ fail_alloc_desc: return r; } -void vhost_virtqueue_stop(struct vhost_dev *dev, - struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, - unsigned idx) +int vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx) { int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { @@ -1380,7 +1380,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, if (virtio_queue_get_desc_addr(vdev, idx) == 0) { /* Don't stop the virtqueue which might have not been started */ - return; + return 0; } r = dev->vhost_ops->vhost_get_vring_base(dev, &state); @@ -1411,6 +1411,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, 0, virtio_queue_get_avail_size(vdev, idx)); vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), 0, virtio_queue_get_desc_size(vdev, idx)); + return r; } static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, @@ -2135,9 +2136,10 @@ fail_features: } /* Host notifiers must be enabled at this point. */ -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) { int i; + int rc = 0; /* should only be called after backend is connected */ assert(hdev->vhost_ops); @@ -2156,10 +2158,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) vhost_dev_set_vring_enable(hdev, false); } for (i = 0; i < hdev->nvqs; ++i) { - vhost_virtqueue_stop(hdev, - vdev, - hdev->vqs + i, - hdev->vq_index + i); + rc |= vhost_virtqueue_stop(hdev, + vdev, + hdev->vqs + i, + hdev->vq_index + i); } if (hdev->vhost_ops->vhost_reset_status) { hdev->vhost_ops->vhost_reset_status(hdev); @@ -2176,6 +2178,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) hdev->started = false; vdev->vhost_started = false; hdev->vdev = NULL; + return rc; } int vhost_net_set_backend(struct vhost_dev *hdev, diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 91510ec2e2..db787d00b3 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -958,7 +958,7 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) s->poison_val = 0; } -static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); @@ -988,6 +988,7 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) qemu_mutex_unlock(&s->free_page_lock); } } + return 0; } static ResettableState *virtio_balloon_get_reset_state(Object *obj) diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index e24d6914b6..517f2089c5 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -1197,11 +1197,12 @@ static void virtio_crypto_vhost_status(VirtIOCrypto *c, uint8_t status) } } -static void virtio_crypto_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_crypto_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); virtio_crypto_vhost_status(vcrypto, status); + return 0; } static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx, diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 54060988ef..3500f1b082 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -1522,9 +1522,10 @@ static void virtio_iommu_device_reset_exit(Object *obj, ResetType type) NULL, NULL, virtio_iommu_put_endpoint); } -static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) { trace_virtio_iommu_device_status(status); + return 0; } static void virtio_iommu_instance_init(Object *obj) diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c index 8cf9788bc3..f857a84f11 100644 --- a/hw/virtio/virtio-net-pci.c +++ b/hw/virtio/virtio-net-pci.c @@ -74,6 +74,7 @@ static void virtio_net_pci_class_init(ObjectClass *klass, const void *data) k->device_id = PCI_DEVICE_ID_VIRTIO_NET; k->revision = VIRTIO_PCI_ABI_VERSION; k->class_id = PCI_CLASS_NETWORK_ETHERNET; + k->sriov_vf_user_creatable = true; set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); device_class_set_props(dc, virtio_net_properties); vpciklass->realize = virtio_net_pci_realize; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 0fa8fe4955..9b48aa8c3e 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1962,6 +1962,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) uint8_t *config; uint32_t size; VirtIODevice *vdev = virtio_bus_get_device(bus); + int16_t res; /* * Virtio capabilities present without @@ -2109,6 +2110,18 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx, PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar); } + + if (pci_is_vf(&proxy->pci_dev)) { + pcie_ari_init(&proxy->pci_dev, proxy->last_pcie_cap_offset); + proxy->last_pcie_cap_offset += PCI_ARI_SIZEOF; + } else { + res = pcie_sriov_pf_init_from_user_created_vfs( + &proxy->pci_dev, proxy->last_pcie_cap_offset, errp); + if (res > 0) { + proxy->last_pcie_cap_offset += res; + virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV); + } + } } static void virtio_pci_device_unplugged(DeviceState *d) @@ -2199,7 +2212,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) if (pcie_port && pci_is_express(pci_dev)) { int pos; - uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; + proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; pos = pcie_endpoint_cap_init(pci_dev, 0); assert(pos > 0); @@ -2216,9 +2229,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3); if (proxy->flags & VIRTIO_PCI_FLAG_AER) { - pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset, + pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset, PCI_ERR_SIZEOF, NULL); - last_pcie_cap_offset += PCI_ERR_SIZEOF; + proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) { @@ -2243,9 +2256,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) } if (proxy->flags & VIRTIO_PCI_FLAG_ATS) { - pcie_ats_init(pci_dev, last_pcie_cap_offset, + pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset, proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED); - last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; + proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) { @@ -2273,6 +2286,7 @@ static void virtio_pci_exit(PCIDevice *pci_dev) !pci_bus_is_root(pci_get_bus(pci_dev)); bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; + pcie_sriov_pf_exit(&proxy->pci_dev); msix_uninit_exclusive_bar(pci_dev); if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port && pci_is_express(pci_dev)) { diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index dcb3c71d6a..3df5d2576e 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -159,17 +159,18 @@ static void check_rate_limit(void *opaque) vrng->activate_timer = true; } -static void virtio_rng_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_rng_set_status(VirtIODevice *vdev, uint8_t status) { VirtIORNG *vrng = VIRTIO_RNG(vdev); if (!vdev->vm_running) { - return; + return 0; } vdev->status = status; /* Something changed, try to process buffers */ virtio_rng_process(vrng); + return 0; } static void virtio_rng_device_realize(DeviceState *dev, Error **errp) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 480c2e5036..2e98cecf64 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2221,12 +2221,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) { VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); trace_virtio_set_status(vdev, val); + int ret = 0; if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && val & VIRTIO_CONFIG_S_FEATURES_OK) { - int ret = virtio_validate_features(vdev); - + ret = virtio_validate_features(vdev); if (ret) { return ret; } @@ -2239,11 +2239,15 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) } if (k->set_status) { - k->set_status(vdev, val); + ret = k->set_status(vdev, val); + if (ret) { + qemu_log("set %s status to %d failed, old status: %d\n", + vdev->name, val, vdev->status); + } } vdev->status = val; - return 0; + return ret; } static enum virtio_device_endian virtio_default_endian(void) @@ -2316,49 +2320,6 @@ void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index) } } -void virtio_reset(void *opaque) -{ - VirtIODevice *vdev = opaque; - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); - int i; - - virtio_set_status(vdev, 0); - if (current_cpu) { - /* Guest initiated reset */ - vdev->device_endian = virtio_current_cpu_endian(); - } else { - /* System reset */ - vdev->device_endian = virtio_default_endian(); - } - - if (k->get_vhost) { - struct vhost_dev *hdev = k->get_vhost(vdev); - /* Only reset when vhost back-end is connected */ - if (hdev && hdev->vhost_ops) { - vhost_reset_device(hdev); - } - } - - if (k->reset) { - k->reset(vdev); - } - - vdev->start_on_kick = false; - vdev->started = false; - vdev->broken = false; - vdev->guest_features = 0; - vdev->queue_sel = 0; - vdev->status = 0; - vdev->disabled = false; - qatomic_set(&vdev->isr, 0); - vdev->config_vector = VIRTIO_NO_VECTOR; - virtio_notify_vector(vdev, vdev->config_vector); - - for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { - __virtio_queue_reset(vdev, i); - } -} - void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) { if (!vdev->vq[n].vring.num) { @@ -3169,6 +3130,49 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return ret; } +void virtio_reset(void *opaque) +{ + VirtIODevice *vdev = opaque; + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + int i; + + virtio_set_status(vdev, 0); + if (current_cpu) { + /* Guest initiated reset */ + vdev->device_endian = virtio_current_cpu_endian(); + } else { + /* System reset */ + vdev->device_endian = virtio_default_endian(); + } + + if (k->get_vhost) { + struct vhost_dev *hdev = k->get_vhost(vdev); + /* Only reset when vhost back-end is connected */ + if (hdev && hdev->vhost_ops) { + vhost_reset_device(hdev); + } + } + + if (k->reset) { + k->reset(vdev); + } + + vdev->start_on_kick = false; + vdev->started = false; + vdev->broken = false; + virtio_set_features_nocheck(vdev, 0); + vdev->queue_sel = 0; + vdev->status = 0; + vdev->disabled = false; + qatomic_set(&vdev->isr, 0); + vdev->config_vector = VIRTIO_NO_VECTOR; + virtio_notify_vector(vdev, vdev->config_vector); + + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + __virtio_queue_reset(vdev, i); + } +} + static void virtio_device_check_notification_compatibility(VirtIODevice *vdev, Error **errp) { @@ -3419,7 +3423,7 @@ void virtio_cleanup(VirtIODevice *vdev) qemu_del_vm_change_state_handler(vdev->vmstate); } -static void virtio_vmstate_change(void *opaque, bool running, RunState state) +static int virtio_vmstate_change(void *opaque, bool running, RunState state) { VirtIODevice *vdev = opaque; BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); @@ -3436,8 +3440,12 @@ static void virtio_vmstate_change(void *opaque, bool running, RunState state) } if (!backend_run) { - virtio_set_status(vdev, vdev->status); + int ret = virtio_set_status(vdev, vdev->status); + if (ret) { + return ret; + } } + return 0; } void virtio_instance_init_common(Object *proxy_obj, void *data, @@ -3489,7 +3497,7 @@ void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size) vdev->config = NULL; } vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), - virtio_vmstate_change, vdev); + NULL, virtio_vmstate_change, vdev); vdev->device_endian = virtio_default_endian(); vdev->use_guest_notifier_mask = true; } diff --git a/include/block/block-common.h b/include/block/block-common.h index 0b831ef87b..c8c626daea 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -333,6 +333,17 @@ typedef enum { #define BDRV_BLOCK_RECURSE 0x40 #define BDRV_BLOCK_COMPRESSED 0x80 +/* + * Block status hints: the bitwise-or of these flags emphasize what + * the caller hopes to learn, and some drivers may be able to give + * faster answers by doing less work when the hint permits. + */ +#define BDRV_WANT_ZERO BDRV_BLOCK_ZERO +#define BDRV_WANT_OFFSET_VALID BDRV_BLOCK_OFFSET_VALID +#define BDRV_WANT_ALLOCATED BDRV_BLOCK_ALLOCATED +#define BDRV_WANT_PRECISE (BDRV_WANT_ZERO | BDRV_WANT_OFFSET_VALID | \ + BDRV_WANT_OFFSET_VALID) + typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; typedef struct BDRVReopenState { diff --git a/include/block/block-io.h b/include/block/block-io.h index b49e0537dd..b99cc98d26 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -161,6 +161,8 @@ bdrv_is_allocated_above(BlockDriverState *bs, BlockDriverState *base, int coroutine_fn GRAPH_RDLOCK bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes); +int coroutine_fn GRAPH_RDLOCK +bdrv_co_is_all_zeroes(BlockDriverState *bs); int GRAPH_RDLOCK bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 0d8187f656..2982dd3118 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -604,15 +604,16 @@ struct BlockDriver { * according to the current layer, and should only need to set * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing - * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See - * block.h for the overall meaning of the bits. As a hint, the - * flag want_zero is true if the caller cares more about precise - * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for - * overall allocation (favor larger *pnum, perhaps by reporting - * _DATA instead of _ZERO). The block layer guarantees input - * clamped to bdrv_getlength() and aligned to request_alignment, - * as well as non-NULL pnum, map, and file; in turn, the driver - * must return an error or set pnum to an aligned non-zero value. + * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). The + * caller will synthesize BDRV_BLOCK_ALLOCATED based on the + * non-zero results. See block.h for the overall meaning of the + * bits. As a hint, the flags in @mode may include a bitwise-or + * of BDRV_WANT_ALLOCATED, BDRV_WANT_OFFSET_VALID, or + * BDRV_WANT_ZERO based on what the caller is looking for in the + * results. The block layer guarantees input clamped to + * bdrv_getlength() and aligned to request_alignment, as well as + * non-NULL pnum, map, and file; in turn, the driver must return + * an error or set pnum to an aligned non-zero value. * * Note that @bytes is just a hint on how big of a region the * caller wants to inspect. It is not a limit on *pnum. @@ -624,8 +625,8 @@ struct BlockDriver { * to clamping *pnum for return to its caller. */ int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_block_status)( - BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, + BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); /* @@ -649,8 +650,8 @@ struct BlockDriver { QEMUIOVector *qiov, size_t qiov_offset); int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_snapshot_block_status)( - BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, BlockDriverState **file); + BlockDriverState *bs, unsigned int mode, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard_snapshot)( BlockDriverState *bs, int64_t offset, int64_t bytes); diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h index 0d93783763..e7c8f1a856 100644 --- a/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h @@ -139,7 +139,7 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, * @buf_size: The amount of data that can be in flight at one time. * @mode: Whether to collapse all images in the chain to the target. * @backing_mode: How to establish the target's backing chain after completion. - * @zero_target: Whether the target should be explicitly zero-initialized + * @target_is_zero: Whether the target already is zero-initialized. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. * @unmap: Whether to unmap target where source sectors only contain zeroes. @@ -159,7 +159,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, int creation_flags, int64_t speed, uint32_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, - bool zero_target, + bool target_is_zero, BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, const char *filter_node_name, diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h index 4a7cf2b4fd..4f94eb3c5a 100644 --- a/include/block/block_int-io.h +++ b/include/block/block_int-io.h @@ -38,8 +38,8 @@ int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset); int coroutine_fn GRAPH_RDLOCK bdrv_co_snapshot_block_status( - BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, BlockDriverState **file); + BlockDriverState *bs, unsigned int mode, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); int coroutine_fn GRAPH_RDLOCK bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes); diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 3a0ee7e8e7..ed6cd50c67 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -176,10 +176,12 @@ typedef struct CXLCCI { uint16_t opcode; uint16_t complete_pct; uint16_t ret_code; /* Current value of retcode */ + bool aborted; uint64_t starttime; /* set by each bg cmd, cleared by the bg_timer when complete */ uint64_t runtime; QEMUTimer *timer; + QemuMutex lock; /* serializes mbox abort vs timer cb */ } bg; /* firmware update */ @@ -201,6 +203,7 @@ typedef struct CXLCCI { DeviceState *d; /* Pointer to the device hosting the protocol conversion */ DeviceState *intf; + bool initialized; } CXLCCI; typedef struct cxl_device_state { @@ -316,6 +319,7 @@ void cxl_initialize_mailbox_t3(CXLCCI *cci, DeviceState *d, size_t payload_max); void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf, DeviceState *d, size_t payload_max); void cxl_init_cci(CXLCCI *cci, size_t payload_max); +void cxl_destroy_cci(CXLCCI *cci); void cxl_add_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmd_set)[256], size_t payload_max); int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd, @@ -536,6 +540,21 @@ typedef struct CXLSetFeatureInfo { size_t data_size; } CXLSetFeatureInfo; +struct CXLSanitizeInfo; + +typedef struct CXLAlertConfig { + uint8_t valid_alerts; + uint8_t enable_alerts; + uint8_t life_used_crit_alert_thresh; + uint8_t life_used_warn_thresh; + uint16_t over_temp_crit_alert_thresh; + uint16_t under_temp_crit_alert_thresh; + uint16_t over_temp_warn_thresh; + uint16_t under_temp_warn_thresh; + uint16_t cor_vmem_err_warn_thresh; + uint16_t cor_pmem_err_warn_thresh; +} QEMU_PACKED CXLAlertConfig; + struct CXLType3Dev { /* Private */ PCIDevice parent_obj; @@ -557,6 +576,8 @@ struct CXLType3Dev { CXLCCI vdm_fm_owned_ld_mctp_cci; CXLCCI ld0_cci; + CXLAlertConfig alert_config; + /* PCIe link characteristics */ PCIExpLinkSpeed speed; PCIExpLinkWidth width; @@ -602,6 +623,8 @@ struct CXLType3Dev { uint8_t num_regions; /* 0-8 regions */ CXLDCRegion regions[DCD_MAX_NUM_REGION]; } dc; + + struct CXLSanitizeInfo *media_op_sanitize; }; #define TYPE_CXL_TYPE3 "cxl-type3" diff --git a/include/hw/cxl/cxl_mailbox.h b/include/hw/cxl/cxl_mailbox.h index beb048052e..9008402d1c 100644 --- a/include/hw/cxl/cxl_mailbox.h +++ b/include/hw/cxl/cxl_mailbox.h @@ -14,5 +14,6 @@ #define CXL_MBOX_IMMEDIATE_LOG_CHANGE (1 << 4) #define CXL_MBOX_SECURITY_STATE_CHANGE (1 << 5) #define CXL_MBOX_BACKGROUND_OPERATION (1 << 6) +#define CXL_MBOX_BACKGROUND_OPERATION_ABORT (1 << 7) #endif diff --git a/include/hw/dma/xlnx_dpdma.h b/include/hw/dma/xlnx_dpdma.h index 1ec0d265be..484b2e377f 100644 --- a/include/hw/dma/xlnx_dpdma.h +++ b/include/hw/dma/xlnx_dpdma.h @@ -26,7 +26,6 @@ #define XLNX_DPDMA_H #include "hw/sysbus.h" -#include "ui/console.h" #include "system/dma.h" #include "qom/object.h" diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index 345b12eaac..e41d95b0b0 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -38,6 +38,8 @@ struct PCIDeviceClass { uint16_t subsystem_id; /* only for header type = 0 */ const char *romfile; /* rom bar */ + + bool sriov_vf_user_creatable; }; enum PCIReqIDType { @@ -177,6 +179,8 @@ struct PCIDevice { * realizing the device. */ uint32_t max_bounce_buffer_size; + + char *sriov_pf; }; static inline int pci_intx(PCIDevice *pci_dev) @@ -209,7 +213,7 @@ static inline int pci_is_express_downstream_port(const PCIDevice *d) static inline int pci_is_vf(const PCIDevice *d) { - return d->exp.sriov_vf.pf != NULL; + return d->sriov_pf || d->exp.sriov_vf.pf != NULL; } static inline uint32_t pci_config_size(const PCIDevice *d) diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index c5d2d318d3..aeaa38cf34 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -18,6 +18,7 @@ typedef struct PCIESriovPF { uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ + bool vf_user_created; /* If VFs are created by user */ } PCIESriovPF; typedef struct PCIESriovVF { @@ -40,6 +41,26 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, MemoryRegion *memory); +/** + * pcie_sriov_pf_init_from_user_created_vfs() - Initialize PF with user-created + * VFs, adding ARI to PF + * @dev: A PCIe device being realized. + * @offset: The offset of the SR-IOV capability. + * @errp: pointer to Error*, to store an error if it happens. + * + * Initializes a PF with user-created VFs, adding the ARI extended capability to + * the PF. The VFs should call pcie_ari_init() to form an ARI device. + * + * Return: The size of added capabilities. 0 if the user did not create VFs. + * -1 if failed. + */ +int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, + uint16_t offset, + Error **errp); + +bool pcie_sriov_register_device(PCIDevice *dev, Error **errp); +void pcie_sriov_unregister_device(PCIDevice *dev); + /* * Default (minimal) page size support values * as required by the SR/IOV standard: diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h index 686d9497d2..321b26df30 100644 --- a/include/hw/s390x/s390-virtio-ccw.h +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -53,11 +53,7 @@ struct S390CcwMachineClass { MachineClass parent_class; /*< public >*/ - bool hpage_1m_allowed; int max_threads; }; -/* 1M huge page mappings allowed by the machine */ -bool hpage_1m_allowed(void); - #endif diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h index c5d2c09455..d54d9c916f 100644 --- a/include/hw/virtio/vhost-scsi-common.h +++ b/include/hw/virtio/vhost-scsi-common.h @@ -40,7 +40,7 @@ struct VHostSCSICommon { }; int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp); -void vhost_scsi_common_stop(VHostSCSICommon *vsc); +int vhost_scsi_common_stop(VHostSCSICommon *vsc); char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus, DeviceState *dev); void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config); diff --git a/include/hw/virtio/vhost-vsock-common.h b/include/hw/virtio/vhost-vsock-common.h index 75a74e8a99..01bf6062af 100644 --- a/include/hw/virtio/vhost-vsock-common.h +++ b/include/hw/virtio/vhost-vsock-common.h @@ -42,7 +42,7 @@ struct VHostVSockCommon { }; int vhost_vsock_common_start(VirtIODevice *vdev); -void vhost_vsock_common_stop(VirtIODevice *vdev); +int vhost_vsock_common_stop(VirtIODevice *vdev); int vhost_vsock_common_pre_save(void *opaque); int vhost_vsock_common_post_load(void *opaque, int version_id); void vhost_vsock_common_realize(VirtIODevice *vdev); diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index bb4b58e115..38800a7156 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -232,8 +232,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); * Stop the vhost device. After the device is stopped the notifiers * can be disabled (@vhost_dev_disable_notifiers) and the device can * be torn down (@vhost_dev_cleanup). + * + * Return: 0 on success, != 0 on error when stopping dev. */ -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); /** * DOC: vhost device configuration handling @@ -333,8 +335,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, struct vhost_virtqueue *vq, unsigned idx); -void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, unsigned idx); +int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, unsigned idx); void vhost_dev_reset_inflight(struct vhost_inflight *inflight); void vhost_dev_free_inflight(struct vhost_inflight *inflight); diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 31ec144509..1dbc3851b0 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -155,6 +155,7 @@ struct VirtIOPCIProxy { uint32_t modern_io_bar_idx; uint32_t modern_mem_bar_idx; int config_cap; + uint16_t last_pcie_cap_offset; uint32_t flags; bool disable_modern; bool ignore_backend_features; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 7e0c471ea4..214d4a77e9 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -186,7 +186,7 @@ struct VirtioDeviceClass { void (*get_config)(VirtIODevice *vdev, uint8_t *config); void (*set_config)(VirtIODevice *vdev, const uint8_t *config); void (*reset)(VirtIODevice *vdev); - void (*set_status)(VirtIODevice *vdev, uint8_t val); + int (*set_status)(VirtIODevice *vdev, uint8_t val); /* Device must validate queue_index. */ void (*queue_reset)(VirtIODevice *vdev, uint32_t queue_index); /* Device must validate queue_index. */ diff --git a/include/system/runstate.h b/include/system/runstate.h index bffc3719d4..fdd5c4a517 100644 --- a/include/system/runstate.h +++ b/include/system/runstate.h @@ -12,6 +12,7 @@ bool runstate_needs_reset(void); void runstate_replay_enable(void); typedef void VMChangeStateHandler(void *opaque, bool running, RunState state); +typedef int VMChangeStateHandlerWithRet(void *opaque, bool running, RunState state); VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, void *opaque); @@ -20,21 +21,27 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( VMChangeStateEntry * qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque, int priority); VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, VMChangeStateHandler *cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque); VMChangeStateEntry *qdev_add_vm_change_state_handler_full( - DeviceState *dev, VMChangeStateHandler *cb, - VMChangeStateHandler *prepare_cb, void *opaque); + DeviceState *dev, VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque); void qemu_del_vm_change_state_handler(VMChangeStateEntry *e); /** * vm_state_notify: Notify the state of the VM * * @running: whether the VM is running or not. * @state: the #RunState of the VM. + * + * Return the result of the callback which has return value. + * If no callback has return value, still return 0 and the + * upper layer should not do additional processing. */ -void vm_state_notify(bool running, RunState state); +int vm_state_notify(bool running, RunState state); static inline bool shutdown_caused_by_guest(ShutdownCause cause) { diff --git a/include/system/vhost-user-backend.h b/include/system/vhost-user-backend.h index 5ed953cd53..5634ebdb2e 100644 --- a/include/system/vhost-user-backend.h +++ b/include/system/vhost-user-backend.h @@ -43,6 +43,6 @@ struct VhostUserBackend { int vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, unsigned nvqs, Error **errp); void vhost_user_backend_start(VhostUserBackend *b); -void vhost_user_backend_stop(VhostUserBackend *b); +int vhost_user_backend_stop(VhostUserBackend *b); #endif diff --git a/qapi/block-core.json b/qapi/block-core.json index 91c70e24a7..b4115113d4 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2542,6 +2542,11 @@ # disappear from the query list without user intervention. # Defaults to true. (Since 3.1) # +# @target-is-zero: Assume the destination reads as all zeroes before +# the mirror started. Setting this to true can speed up the +# mirror. Setting this to true when the destination is not +# actually all zero can corrupt the destination. (Since 10.1) +# # Since: 2.6 # # .. qmp-example:: @@ -2561,7 +2566,8 @@ '*on-target-error': 'BlockdevOnError', '*filter-node-name': 'str', '*copy-mode': 'MirrorCopyMode', - '*auto-finalize': 'bool', '*auto-dismiss': 'bool' }, + '*auto-finalize': 'bool', '*auto-dismiss': 'bool', + '*target-is-zero': 'bool'}, 'allow-preconfig': true } ## diff --git a/system/cpus.c b/system/cpus.c index 2cc5f887ab..d16b0dff98 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -299,14 +299,18 @@ static int do_vm_stop(RunState state, bool send_stop) if (oldstate == RUN_STATE_RUNNING) { pause_all_vcpus(); } - vm_state_notify(0, state); + ret = vm_state_notify(0, state); if (send_stop) { qapi_event_send_stop(); } } bdrv_drain_all(); - ret = bdrv_flush_all(); + /* + * Even if vm_state_notify() return failure, + * it would be better to flush as before. + */ + ret |= bdrv_flush_all(); trace_vm_stop_flush_all(ret); return ret; diff --git a/system/runstate.c b/system/runstate.c index 272801d307..de74d962bc 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -297,6 +297,7 @@ void qemu_system_vmstop_request(RunState state) struct VMChangeStateEntry { VMChangeStateHandler *cb; VMChangeStateHandler *prepare_cb; + VMChangeStateHandlerWithRet *cb_ret; void *opaque; QTAILQ_ENTRY(VMChangeStateEntry) entries; int priority; @@ -320,14 +321,15 @@ static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head = VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( VMChangeStateHandler *cb, void *opaque, int priority) { - return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque, - priority); + return qemu_add_vm_change_state_handler_prio_full(cb, NULL, NULL, + opaque, priority); } /** * qemu_add_vm_change_state_handler_prio_full: * @cb: the main callback to invoke * @prepare_cb: a callback to invoke before the main callback + * @cb_ret: the main callback to invoke with return value * @opaque: user data passed to the callbacks * @priority: low priorities execute first when the vm runs and the reverse is * true when the vm stops @@ -344,6 +346,7 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( VMChangeStateEntry * qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque, int priority) { VMChangeStateEntry *e; @@ -352,6 +355,7 @@ qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, e = g_malloc0(sizeof(*e)); e->cb = cb; e->prepare_cb = prepare_cb; + e->cb_ret = cb_ret; e->opaque = opaque; e->priority = priority; @@ -379,9 +383,10 @@ void qemu_del_vm_change_state_handler(VMChangeStateEntry *e) g_free(e); } -void vm_state_notify(bool running, RunState state) +int vm_state_notify(bool running, RunState state) { VMChangeStateEntry *e, *next; + int ret = 0; trace_vm_state_notify(running, state, RunState_str(state)); @@ -393,7 +398,17 @@ void vm_state_notify(bool running, RunState state) } QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { - e->cb(e->opaque, running, state); + if (e->cb) { + e->cb(e->opaque, running, state); + } else if (e->cb_ret) { + /* + * Here ignore the return value of cb_ret because + * we only care about the stopping the device during + * the VM live migration to indicate whether the + * connection between qemu and backend is normal. + */ + e->cb_ret(e->opaque, running, state); + } } } else { QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { @@ -403,9 +418,19 @@ void vm_state_notify(bool running, RunState state) } QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { - e->cb(e->opaque, running, state); + if (e->cb) { + e->cb(e->opaque, running, state); + } else if (e->cb_ret) { + /* + * We should execute all registered callbacks even if + * one of them returns failure, otherwise, some cleanup + * work of the device will be skipped. + */ + ret |= e->cb_ret(e->opaque, running, state); + } } } + return ret; } static ShutdownCause reset_requested; diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c index 41840677ce..a814ece82f 100644 --- a/target/s390x/gen-features.c +++ b/target/s390x/gen-features.c @@ -844,13 +844,11 @@ static uint16_t default_GEN17_GA1[] = { /* QEMU (CPU model) features */ -static uint16_t qemu_V2_11[] = { +static uint16_t qemu_MIN[] = { + /* Features supported by the default CPU of the oldest machine type */ S390_FEAT_GROUP_PLO, S390_FEAT_ESAN3, S390_FEAT_ZARCH, -}; - -static uint16_t qemu_V3_1[] = { S390_FEAT_DAT_ENH, S390_FEAT_IDTE_SEGMENT, S390_FEAT_STFLE, @@ -880,9 +878,6 @@ static uint16_t qemu_V3_1[] = { S390_FEAT_ADAPTER_INT_SUPPRESSION, S390_FEAT_MSA_EXT_3, S390_FEAT_MSA_EXT_4, -}; - -static uint16_t qemu_V4_0[] = { /* * Only BFP bits are implemented (HFP, DFP, PFPO and DIVIDE TO INTEGER not * implemented yet). @@ -1053,9 +1048,7 @@ static FeatGroupDefSpec FeatGroupDef[] = { * QEMU (CPU model) features *******************************/ static FeatGroupDefSpec QemuFeatDef[] = { - QEMU_FEAT_INITIALIZER(V2_11), - QEMU_FEAT_INITIALIZER(V3_1), - QEMU_FEAT_INITIALIZER(V4_0), + QEMU_FEAT_INITIALIZER(MIN), QEMU_FEAT_INITIALIZER(V4_1), QEMU_FEAT_INITIALIZER(V6_0), QEMU_FEAT_INITIALIZER(V6_2), diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index b9f1422197..6cd2ebc5f1 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -298,12 +298,6 @@ void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp) return; } - if (!hpage_1m_allowed()) { - error_setg(errp, "This QEMU machine does not support huge page " - "mappings"); - return; - } - if (pagesize != 1 * MiB) { error_setg(errp, "Memory backing with 2G pages was specified, " "but KVM does not support this memory backing"); diff --git a/tests/functional/test_arm_integratorcp.py b/tests/functional/test_arm_integratorcp.py index a85b339d77..4f00924aa0 100755 --- a/tests/functional/test_arm_integratorcp.py +++ b/tests/functional/test_arm_integratorcp.py @@ -73,8 +73,10 @@ class IntegratorMachine(QemuSystemTest): framebuffer_ready = 'Console: switching to colour frame buffer device' wait_for_console_pattern(self, framebuffer_ready) self.vm.cmd('human-monitor-command', command_line='stop') - self.vm.cmd('human-monitor-command', - command_line='screendump %s' % screendump_path) + res = self.vm.cmd('human-monitor-command', + command_line='screendump %s' % screendump_path) + if 'unknown command' in res: + self.skipTest('screendump not available') logger = logging.getLogger('framebuffer') cpu_count = 1 diff --git a/tests/functional/test_m68k_nextcube.py b/tests/functional/test_m68k_nextcube.py index ff773a7994..13c72bd136 100755 --- a/tests/functional/test_m68k_nextcube.py +++ b/tests/functional/test_m68k_nextcube.py @@ -32,8 +32,10 @@ class NextCubeMachine(QemuSystemTest): # TODO: wait for the 'displaysurface_create 1120x832' trace-event. time.sleep(2) - self.vm.cmd('human-monitor-command', - command_line='screendump %s' % screenshot_path) + res = self.vm.cmd('human-monitor-command', + command_line='screendump %s' % screenshot_path) + if 'unknown command' in res: + self.skipTest('screendump not available') @skipIfMissingImports("PIL") def test_bootrom_framebuffer_size(self): diff --git a/tests/functional/test_mips64el_malta.py b/tests/functional/test_mips64el_malta.py index dd37212f9d..3cc79b74c1 100755 --- a/tests/functional/test_mips64el_malta.py +++ b/tests/functional/test_mips64el_malta.py @@ -155,8 +155,10 @@ class MaltaMachineFramebuffer(LinuxKernelTest): framebuffer_ready = 'Console: switching to colour frame buffer device' self.wait_for_console_pattern(framebuffer_ready) self.vm.cmd('human-monitor-command', command_line='stop') - self.vm.cmd('human-monitor-command', - command_line='screendump %s' % screendump_path) + res = self.vm.cmd('human-monitor-command', + command_line='screendump %s' % screendump_path) + if 'unknown command' in res: + self.skipTest('screendump not available') logger = logging.getLogger('framebuffer') match_threshold = 0.95 diff --git a/tests/functional/test_s390x_tuxrun.py b/tests/functional/test_s390x_tuxrun.py index a7db4bfd84..8df3c6893b 100755 --- a/tests/functional/test_s390x_tuxrun.py +++ b/tests/functional/test_s390x_tuxrun.py @@ -24,6 +24,7 @@ class TuxRunS390xTest(TuxRunBaselineTest): 'bff7971fc2fef56372d98afe4557b82fd0a785a241e44c29b058e577ad1bbb44') def test_s390(self): + self.set_machine('s390-ccw-virtio') self.wait_for_shutdown=False self.common_tuxrun(kernel_asset=self.ASSET_S390X_KERNEL, rootfs_asset=self.ASSET_S390X_ROOTFS, diff --git a/tests/qemu-iotests/194 b/tests/qemu-iotests/194 index c0ce82dd25..e114c0b269 100755 --- a/tests/qemu-iotests/194 +++ b/tests/qemu-iotests/194 @@ -34,6 +34,7 @@ with iotests.FilePath('source.img') as source_img_path, \ img_size = '1G' iotests.qemu_img_create('-f', iotests.imgfmt, source_img_path, img_size) + iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write 512M 1M', source_img_path) iotests.qemu_img_create('-f', iotests.imgfmt, dest_img_path, img_size) iotests.log('Launching VMs...') @@ -61,7 +62,8 @@ with iotests.FilePath('source.img') as source_img_path, \ iotests.log('Waiting for `drive-mirror` to complete...') iotests.log(source_vm.event_wait('BLOCK_JOB_READY'), - filters=[iotests.filter_qmp_event]) + filters=[iotests.filter_qmp_event, + iotests.filter_block_job]) iotests.log('Starting migration...') capabilities = [{'capability': 'events', 'state': True}, @@ -87,7 +89,8 @@ with iotests.FilePath('source.img') as source_img_path, \ while True: event2 = source_vm.event_wait('BLOCK_JOB_COMPLETED') - iotests.log(event2, filters=[iotests.filter_qmp_event]) + iotests.log(event2, filters=[iotests.filter_qmp_event, + iotests.filter_block_job]) if event2['event'] == 'BLOCK_JOB_COMPLETED': iotests.log('Stopping the NBD server on destination...') iotests.log(dest_vm.qmp('nbd-server-stop')) diff --git a/tests/qemu-iotests/194.out b/tests/qemu-iotests/194.out index 6940e809cd..d02655a514 100644 --- a/tests/qemu-iotests/194.out +++ b/tests/qemu-iotests/194.out @@ -7,7 +7,7 @@ Launching NBD server on destination... Starting `drive-mirror` on source... {"return": {}} Waiting for `drive-mirror` to complete... -{"data": {"device": "mirror-job0", "len": 1073741824, "offset": 1073741824, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "mirror-job0", "len": "LEN", "offset": "OFFSET", "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} Starting migration... {"return": {}} {"execute": "migrate-start-postcopy", "arguments": {}} @@ -18,7 +18,7 @@ Starting migration... {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} Gracefully ending the `drive-mirror` job on source... {"return": {}} -{"data": {"device": "mirror-job0", "len": 1073741824, "offset": 1073741824, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "mirror-job0", "len": "LEN", "offset": "OFFSET", "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} Stopping the NBD server on destination... {"return": {}} Wait for migration completion on target... diff --git a/tests/qemu-iotests/250 b/tests/qemu-iotests/250 index af48f83aba..c0a0dbc0ff 100755 --- a/tests/qemu-iotests/250 +++ b/tests/qemu-iotests/250 @@ -52,11 +52,6 @@ _unsupported_imgopts data_file # bdrv_co_truncate(bs->file) call in qcow2_co_truncate(), which might succeed # anyway. -disk_usage() -{ - du --block-size=1 $1 | awk '{print $1}' -} - size=2100M _make_test_img -o "cluster_size=1M,preallocation=metadata" $size diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 95c12577dd..237f746af8 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -140,6 +140,12 @@ _optstr_add() fi } +# report real disk usage for sparse files +disk_usage() +{ + du --block-size=1 "$1" | awk '{print $1}' +} + # Set the variables to the empty string to turn Valgrind off # for specific processes, e.g. # $ VALGRIND_QEMU_IO= ./check -qcow2 -valgrind 015 diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 7292c8b342..05274772ce 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -601,13 +601,23 @@ def filter_chown(msg): return chown_re.sub("chown UID:GID", msg) def filter_qmp_event(event): - '''Filter a QMP event dict''' + '''Filter the timestamp of a QMP event dict''' event = dict(event) if 'timestamp' in event: event['timestamp']['seconds'] = 'SECS' event['timestamp']['microseconds'] = 'USECS' return event +def filter_block_job(event): + '''Filter the offset and length of a QMP block job event dict''' + event = dict(event) + if 'data' in event: + if 'offset' in event['data']: + event['data']['offset'] = 'OFFSET' + if 'len' in event['data']: + event['data']['len'] = 'LEN' + return event + def filter_qmp(qmsg, filter_fn): '''Given a string filter, filter a QMP object's values. filter_fn takes a (key, value) pair.''' diff --git a/tests/qemu-iotests/tests/mirror-sparse b/tests/qemu-iotests/tests/mirror-sparse new file mode 100755 index 0000000000..8c52a4e244 --- /dev/null +++ b/tests/qemu-iotests/tests/mirror-sparse @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +# group: rw auto quick +# +# Test blockdev-mirror with raw sparse destination +# +# Copyright (C) 2025 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +seq="$(basename $0)" +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + _cleanup_qemu +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +cd .. +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 raw # Format of the source. dst is always raw file +_supported_proto file +_supported_os Linux + +echo +echo "=== Initial image setup ===" +echo + +TEST_IMG="$TEST_IMG.base" _make_test_img 20M +$QEMU_IO -c 'w 8M 2M' -f $IMGFMT "$TEST_IMG.base" | _filter_qemu_io + +_launch_qemu \ + -blockdev '{"driver":"file", "cache":{"direct":true, "no-flush":false}, + "filename":"'"$TEST_IMG.base"'", "node-name":"src-file"}' \ + -blockdev '{"driver":"'$IMGFMT'", "node-name":"src", "file":"src-file"}' +h1=$QEMU_HANDLE +_send_qemu_cmd $h1 '{"execute": "qmp_capabilities"}' 'return' + +# Check several combinations; most should result in a sparse destination; +# the destination should only be fully allocated if pre-allocated +# and not punching holes due to detect-zeroes +# do_test creation discard zeroes result +do_test() { + creation=$1 + discard=$2 + zeroes=$3 + expected=$4 + +echo +echo "=== Testing creation=$creation discard=$discard zeroes=$zeroes ===" +echo + +rm -f $TEST_IMG +if test $creation = external; then + truncate --size=20M $TEST_IMG +else + _send_qemu_cmd $h1 '{"execute": "blockdev-create", "arguments": + {"options": {"driver":"file", "filename":"'$TEST_IMG'", + "size":'$((20*1024*1024))', "preallocation":"'$creation'"}, + "job-id":"job1"}}' 'concluded' + _send_qemu_cmd $h1 '{"execute": "job-dismiss", "arguments": + {"id": "job1"}}' 'return' +fi +_send_qemu_cmd $h1 '{"execute": "blockdev-add", "arguments": + {"node-name": "dst", "driver":"file", + "filename":"'$TEST_IMG'", "aio":"threads", + "auto-read-only":true, "discard":"'$discard'", + "detect-zeroes":"'$zeroes'"}}' 'return' +_send_qemu_cmd $h1 '{"execute":"blockdev-mirror", "arguments": + {"sync":"full", "device":"src", "target":"dst", + "job-id":"job2"}}' 'return' +_timed_wait_for $h1 '"ready"' +_send_qemu_cmd $h1 '{"execute": "job-complete", "arguments": + {"id":"job2"}}' 'return' \ + | _filter_block_job_offset | _filter_block_job_len +_send_qemu_cmd $h1 '{"execute": "blockdev-del", "arguments": + {"node-name": "dst"}}' 'return' \ + | _filter_block_job_offset | _filter_block_job_len +$QEMU_IMG compare -U -f $IMGFMT -F raw $TEST_IMG.base $TEST_IMG +result=$(disk_usage $TEST_IMG) +if test $result -lt $((3*1024*1024)); then + actual=sparse +elif test $result = $((20*1024*1024)); then + actual=full +else + actual=unknown +fi +echo "Destination is $actual; expected $expected" +} + +do_test external ignore off sparse +do_test external unmap off sparse +do_test external unmap unmap sparse +do_test off ignore off sparse +do_test off unmap off sparse +do_test off unmap unmap sparse +do_test full ignore off full +do_test full unmap off sparse +do_test full unmap unmap sparse + +_send_qemu_cmd $h1 '{"execute":"quit"}' '' + +# success, all done +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/tests/mirror-sparse.out b/tests/qemu-iotests/tests/mirror-sparse.out new file mode 100644 index 0000000000..2103b891c3 --- /dev/null +++ b/tests/qemu-iotests/tests/mirror-sparse.out @@ -0,0 +1,365 @@ +QA output created by mirror-sparse + +=== Initial image setup === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=20971520 +wrote 2097152/2097152 bytes at offset 8388608 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"execute": "qmp_capabilities"} +{"return": {}} + +=== Testing creation=external discard=ignore zeroes=off === + +{"execute": "blockdev-add", "arguments": + {"node-name": "dst", "driver":"file", + "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", + "auto-read-only":true, "discard":"ignore", + "detect-zeroes":"off"}} +{"return": {}} +{"execute":"blockdev-mirror", "arguments": + {"sync":"full", "device":"src", "target":"dst", + "job-id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} +{"execute": "job-complete", "arguments": + {"id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"return": {}} +{"execute": "blockdev-del", "arguments": + {"node-name": "dst"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} +{"return": {}} +Images are identical. +Destination is sparse; expected sparse + +=== Testing creation=external discard=unmap zeroes=off === + +{"execute": "blockdev-add", "arguments": + {"node-name": "dst", "driver":"file", + "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", + "auto-read-only":true, "discard":"unmap", + "detect-zeroes":"off"}} +{"return": {}} +{"execute":"blockdev-mirror", "arguments": + {"sync":"full", "device":"src", "target":"dst", + "job-id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} +{"execute": "job-complete", "arguments": + {"id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"return": {}} +{"execute": "blockdev-del", "arguments": + {"node-name": "dst"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} +{"return": {}} +Images are identical. +Destination is sparse; expected sparse + +=== Testing creation=external discard=unmap zeroes=unmap === + +{"execute": "blockdev-add", "arguments": + {"node-name": "dst", "driver":"file", + "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", + "auto-read-only":true, "discard":"unmap", + "detect-zeroes":"unmap"}} +{"return": {}} +{"execute":"blockdev-mirror", "arguments": + {"sync":"full", "device":"src", "target":"dst", + "job-id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} +{"execute": "job-complete", "arguments": + {"id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"return": {}} +{"execute": "blockdev-del", "arguments": + {"node-name": "dst"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} +{"return": {}} +Images are identical. +Destination is sparse; expected sparse + +=== Testing creation=off discard=ignore zeroes=off === + +{"execute": "blockdev-create", "arguments": + {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", + "size":20971520, "preallocation":"off"}, + "job-id":"job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} +{"execute": "job-dismiss", "arguments": + {"id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} +{"return": {}} +{"execute": "blockdev-add", "arguments": + {"node-name": "dst", "driver":"file", + "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", + "auto-read-only":true, "discard":"ignore", + "detect-zeroes":"off"}} +{"return": {}} +{"execute":"blockdev-mirror", "arguments": + {"sync":"full", "device":"src", "target":"dst", + "job-id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} +{"execute": "job-complete", "arguments": + {"id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"return": {}} +{"execute": "blockdev-del", "arguments": + {"node-name": "dst"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} +{"return": {}} +Images are identical. +Destination is sparse; expected sparse + +=== Testing creation=off discard=unmap zeroes=off === + +{"execute": "blockdev-create", "arguments": + {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", + "size":20971520, "preallocation":"off"}, + "job-id":"job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} +{"execute": "job-dismiss", "arguments": + {"id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} +{"return": {}} +{"execute": "blockdev-add", "arguments": + {"node-name": "dst", "driver":"file", + "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", + "auto-read-only":true, "discard":"unmap", + "detect-zeroes":"off"}} +{"return": {}} +{"execute":"blockdev-mirror", "arguments": + {"sync":"full", "device":"src", "target":"dst", + "job-id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} +{"execute": "job-complete", "arguments": + {"id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"return": {}} +{"execute": "blockdev-del", "arguments": + {"node-name": "dst"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} +{"return": {}} +Images are identical. +Destination is sparse; expected sparse + +=== Testing creation=off discard=unmap zeroes=unmap === + +{"execute": "blockdev-create", "arguments": + {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", + "size":20971520, "preallocation":"off"}, + "job-id":"job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} +{"execute": "job-dismiss", "arguments": + {"id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} +{"return": {}} +{"execute": "blockdev-add", "arguments": + {"node-name": "dst", "driver":"file", + "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", + "auto-read-only":true, "discard":"unmap", + "detect-zeroes":"unmap"}} +{"return": {}} +{"execute":"blockdev-mirror", "arguments": + {"sync":"full", "device":"src", "target":"dst", + "job-id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} +{"execute": "job-complete", "arguments": + {"id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"return": {}} +{"execute": "blockdev-del", "arguments": + {"node-name": "dst"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} +{"return": {}} +Images are identical. +Destination is sparse; expected sparse + +=== Testing creation=full discard=ignore zeroes=off === + +{"execute": "blockdev-create", "arguments": + {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", + "size":20971520, "preallocation":"full"}, + "job-id":"job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} +{"execute": "job-dismiss", "arguments": + {"id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} +{"return": {}} +{"execute": "blockdev-add", "arguments": + {"node-name": "dst", "driver":"file", + "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", + "auto-read-only":true, "discard":"ignore", + "detect-zeroes":"off"}} +{"return": {}} +{"execute":"blockdev-mirror", "arguments": + {"sync":"full", "device":"src", "target":"dst", + "job-id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} +{"execute": "job-complete", "arguments": + {"id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"return": {}} +{"execute": "blockdev-del", "arguments": + {"node-name": "dst"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} +{"return": {}} +Images are identical. +Destination is full; expected full + +=== Testing creation=full discard=unmap zeroes=off === + +{"execute": "blockdev-create", "arguments": + {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", + "size":20971520, "preallocation":"full"}, + "job-id":"job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} +{"execute": "job-dismiss", "arguments": + {"id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} +{"return": {}} +{"execute": "blockdev-add", "arguments": + {"node-name": "dst", "driver":"file", + "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", + "auto-read-only":true, "discard":"unmap", + "detect-zeroes":"off"}} +{"return": {}} +{"execute":"blockdev-mirror", "arguments": + {"sync":"full", "device":"src", "target":"dst", + "job-id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} +{"execute": "job-complete", "arguments": + {"id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"return": {}} +{"execute": "blockdev-del", "arguments": + {"node-name": "dst"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} +{"return": {}} +Images are identical. +Destination is sparse; expected sparse + +=== Testing creation=full discard=unmap zeroes=unmap === + +{"execute": "blockdev-create", "arguments": + {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", + "size":20971520, "preallocation":"full"}, + "job-id":"job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} +{"execute": "job-dismiss", "arguments": + {"id": "job1"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} +{"return": {}} +{"execute": "blockdev-add", "arguments": + {"node-name": "dst", "driver":"file", + "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", + "auto-read-only":true, "discard":"unmap", + "detect-zeroes":"unmap"}} +{"return": {}} +{"execute":"blockdev-mirror", "arguments": + {"sync":"full", "device":"src", "target":"dst", + "job-id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} +{"execute": "job-complete", "arguments": + {"id":"job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"return": {}} +{"execute": "blockdev-del", "arguments": + {"node-name": "dst"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} +{"return": {}} +Images are identical. +Destination is sparse; expected sparse +{"execute":"quit"} +*** done diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c index 2b358eaaa8..e26b3be593 100644 --- a/tests/unit/test-block-iothread.c +++ b/tests/unit/test-block-iothread.c @@ -63,7 +63,7 @@ bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, } static int coroutine_fn bdrv_test_co_block_status(BlockDriverState *bs, - bool want_zero, + unsigned int mode, int64_t offset, int64_t count, int64_t *pnum, int64_t *map, BlockDriverState **file) |