diff options
Diffstat (limited to 'include')
44 files changed, 484 insertions, 181 deletions
diff --git a/include/block/block-common.h b/include/block/block-common.h index 2d2af7230d..d7599564db 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -66,13 +66,16 @@ * function. The coroutine yields after scheduling the BH and is reentered when * the wrapped function returns. * - * A no_co_wrapper_bdrv_wrlock function is a no_co_wrapper function that - * automatically takes the graph wrlock when calling the wrapped function. + * A no_co_wrapper_bdrv_rdlock function is a no_co_wrapper function that + * automatically takes the graph rdlock when calling the wrapped function. In + * the same way, no_co_wrapper_bdrv_wrlock functions automatically take the + * graph wrlock. * * If the first parameter of the function is a BlockDriverState, BdrvChild or * BlockBackend pointer, the AioContext lock for it is taken in the wrapper. */ #define no_co_wrapper +#define no_co_wrapper_bdrv_rdlock #define no_co_wrapper_bdrv_wrlock #include "block/blockjob.h" diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 6061220a6c..6bfafe781d 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -132,13 +132,13 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file); -void bdrv_refresh_filename(BlockDriverState *bs); +void GRAPH_RDLOCK bdrv_refresh_filename(BlockDriverState *bs); void GRAPH_RDLOCK bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp); int bdrv_commit(BlockDriverState *bs); -int bdrv_make_empty(BdrvChild *c, Error **errp); +int GRAPH_RDLOCK bdrv_make_empty(BdrvChild *c, Error **errp); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt, bool warn); void bdrv_register(BlockDriver *bdrv); @@ -160,19 +160,20 @@ void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base); */ typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset, int64_t total_work_size, void *opaque); -int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb, void *cb_opaque, - bool force, - Error **errp); +int GRAPH_RDLOCK +bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + bool force, Error **errp); /* check if a named node can be replaced when doing drive-mirror */ BlockDriverState * GRAPH_RDLOCK check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp); -int no_coroutine_fn bdrv_activate(BlockDriverState *bs, Error **errp); +int no_coroutine_fn GRAPH_RDLOCK +bdrv_activate(BlockDriverState *bs, Error **errp); -int coroutine_fn no_co_wrapper +int coroutine_fn no_co_wrapper_bdrv_rdlock bdrv_co_activate(BlockDriverState *bs, Error **errp); void bdrv_activate_all(Error **errp); @@ -191,7 +192,7 @@ int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); BlockDriverState *bdrv_find_node(const char *node_name); BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp); -XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp); +XDbgBlockGraph * GRAPH_RDLOCK bdrv_get_xdbg_block_graph(Error **errp); BlockDriverState *bdrv_lookup_bs(const char *device, const char *node_name, Error **errp); @@ -208,15 +209,18 @@ typedef struct BdrvNextIterator { BlockDriverState *bs; } BdrvNextIterator; -BlockDriverState *bdrv_first(BdrvNextIterator *it); -BlockDriverState *bdrv_next(BdrvNextIterator *it); +BlockDriverState * GRAPH_RDLOCK bdrv_first(BdrvNextIterator *it); +BlockDriverState * GRAPH_RDLOCK bdrv_next(BdrvNextIterator *it); void bdrv_next_cleanup(BdrvNextIterator *it); BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); void bdrv_iterate_format(void (*it)(void *opaque, const char *name), void *opaque, bool read_only); -char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp); -char *bdrv_dirname(BlockDriverState *bs, Error **errp); + +char * GRAPH_RDLOCK +bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp); + +char * GRAPH_RDLOCK bdrv_dirname(BlockDriverState *bs, Error **errp); void bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, @@ -242,7 +246,9 @@ bdrv_attach_child(BlockDriverState *parent_bs, BdrvChildRole child_role, Error **errp); -bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); +bool GRAPH_RDLOCK +bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); + void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason); void bdrv_op_block_all(BlockDriverState *bs, Error *reason); diff --git a/include/block/block-io.h b/include/block/block-io.h index f1c796a1ce..ad270b6ad2 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -127,37 +127,46 @@ int coroutine_fn GRAPH_RDLOCK bdrv_co_zone_append(BlockDriverState *bs, BdrvRequestFlags flags); bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); -int bdrv_block_status(BlockDriverState *bs, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file); + +int coroutine_fn GRAPH_RDLOCK +bdrv_co_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file); +int co_wrapper_mixed_bdrv_rdlock +bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file); int coroutine_fn GRAPH_RDLOCK bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); -int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, - int64_t offset, int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file); +int co_wrapper_mixed_bdrv_rdlock +bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); int coroutine_fn GRAPH_RDLOCK bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum); -int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, - int64_t *pnum); +int co_wrapper_mixed_bdrv_rdlock +bdrv_is_allocated(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum); int coroutine_fn GRAPH_RDLOCK bdrv_co_is_allocated_above(BlockDriverState *top, BlockDriverState *base, bool include_base, int64_t offset, int64_t bytes, int64_t *pnum); -int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, - bool include_base, int64_t offset, int64_t bytes, - int64_t *pnum); +int co_wrapper_mixed_bdrv_rdlock +bdrv_is_allocated_above(BlockDriverState *bs, BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum); int coroutine_fn GRAPH_RDLOCK bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes); -int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, - Error **errp); +int GRAPH_RDLOCK +bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, + Error **errp); + bool bdrv_is_read_only(BlockDriverState *bs); bool bdrv_is_writable(BlockDriverState *bs); bool bdrv_is_sg(BlockDriverState *bs); @@ -176,8 +185,12 @@ const char *bdrv_get_format_name(BlockDriverState *bs); bool bdrv_supports_compressed_writes(BlockDriverState *bs); const char *bdrv_get_node_name(const BlockDriverState *bs); -const char *bdrv_get_device_name(const BlockDriverState *bs); -const char *bdrv_get_device_or_node_name(const BlockDriverState *bs); + +const char * GRAPH_RDLOCK +bdrv_get_device_name(const BlockDriverState *bs); + +const char * GRAPH_RDLOCK +bdrv_get_device_or_node_name(const BlockDriverState *bs); int coroutine_fn GRAPH_RDLOCK bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); @@ -185,8 +198,9 @@ bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); int co_wrapper_mixed_bdrv_rdlock bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); -ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, - Error **errp); +ImageInfoSpecific * GRAPH_RDLOCK +bdrv_get_specific_info(BlockDriverState *bs, Error **errp); + BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs); void bdrv_round_to_subclusters(BlockDriverState *bs, int64_t offset, int64_t bytes, @@ -363,7 +377,7 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); * * Begin a quiesced section for the parent of @c. */ -void bdrv_parent_drained_begin_single(BdrvChild *c); +void GRAPH_RDLOCK bdrv_parent_drained_begin_single(BdrvChild *c); /** * bdrv_parent_drained_poll_single: @@ -371,14 +385,14 @@ void bdrv_parent_drained_begin_single(BdrvChild *c); * Returns true if there is any pending activity to cease before @c can be * called quiesced, false otherwise. */ -bool bdrv_parent_drained_poll_single(BdrvChild *c); +bool GRAPH_RDLOCK bdrv_parent_drained_poll_single(BdrvChild *c); /** * bdrv_parent_drained_end_single: * * End a quiesced section for the parent of @c. */ -void bdrv_parent_drained_end_single(BdrvChild *c); +void GRAPH_RDLOCK bdrv_parent_drained_end_single(BdrvChild *c); /** * bdrv_drain_poll: @@ -391,8 +405,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c); * * This is part of bdrv_drained_begin. */ -bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, - bool ignore_bds_parents); +bool GRAPH_RDLOCK +bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, + bool ignore_bds_parents); /** * bdrv_drained_begin: @@ -400,6 +415,12 @@ bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, * Begin a quiesced section for exclusive access to the BDS, by disabling * external request sources including NBD server, block jobs, and device model. * + * This function can only be invoked by the main loop or a coroutine + * (regardless of the AioContext where it is running). + * If the coroutine is running in an Iothread AioContext, this function will + * just schedule a BH to run in the main loop. + * However, it cannot be directly called by an Iothread. + * * This function can be recursive. */ void bdrv_drained_begin(BlockDriverState *bs); @@ -416,6 +437,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); * bdrv_drained_end: * * End a quiescent section started by bdrv_drained_begin(). + * + * This function can only be invoked by the main loop or a coroutine + * (regardless of the AioContext where it is running). + * If the coroutine is running in an Iothread AioContext, this function will + * just schedule a BH to run in the main loop. + * However, it cannot be directly called by an Iothread. */ void bdrv_drained_end(BlockDriverState *bs); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 2ca3758cb8..b8d9d24f39 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -235,11 +235,14 @@ struct BlockDriver { Error **errp); /* For handling image reopen for split or non-split files. */ - int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, Error **errp); - void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + int GRAPH_UNLOCKED_PTR (*bdrv_reopen_prepare)( + BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); + void GRAPH_UNLOCKED_PTR (*bdrv_reopen_commit)( + BDRVReopenState *reopen_state); + void GRAPH_UNLOCKED_PTR (*bdrv_reopen_commit_post)( + BDRVReopenState *reopen_state); + void GRAPH_UNLOCKED_PTR (*bdrv_reopen_abort)( + BDRVReopenState *reopen_state); void (*bdrv_join_options)(QDict *options, QDict *old_options); int GRAPH_UNLOCKED_PTR (*bdrv_open)( @@ -256,20 +259,18 @@ struct BlockDriver { int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create_opts)( BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp); - int (*bdrv_amend_options)(BlockDriverState *bs, - QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb, - void *cb_opaque, - bool force, - Error **errp); + int GRAPH_RDLOCK_PTR (*bdrv_amend_options)( + BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + bool force, Error **errp); - int (*bdrv_make_empty)(BlockDriverState *bs); + int GRAPH_RDLOCK_PTR (*bdrv_make_empty)(BlockDriverState *bs); /* * Refreshes the bs->exact_filename field. If that is impossible, * bs->exact_filename has to be left empty. */ - void (*bdrv_refresh_filename)(BlockDriverState *bs); + void GRAPH_RDLOCK_PTR (*bdrv_refresh_filename)(BlockDriverState *bs); /* * Gathers the open options for all children into @target. @@ -292,15 +293,15 @@ struct BlockDriver { * block driver which implements it is probably doing something * shady regarding its runtime option structure. */ - void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target, - bool backing_overridden); + void GRAPH_RDLOCK_PTR (*bdrv_gather_child_options)( + BlockDriverState *bs, QDict *target, bool backing_overridden); /* * Returns an allocated string which is the directory name of this BDS: It * will be used to make relative filenames absolute by prepending this * function's return value to them. */ - char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp); + char * GRAPH_RDLOCK_PTR (*bdrv_dirname)(BlockDriverState *bs, Error **errp); /* * This informs the driver that we are no longer interested in the result @@ -313,14 +314,16 @@ struct BlockDriver { int GRAPH_RDLOCK_PTR (*bdrv_inactivate)(BlockDriverState *bs); - int (*bdrv_snapshot_create)(BlockDriverState *bs, - QEMUSnapshotInfo *sn_info); - int (*bdrv_snapshot_goto)(BlockDriverState *bs, - const char *snapshot_id); - int (*bdrv_snapshot_delete)(BlockDriverState *bs, - const char *snapshot_id, - const char *name, - Error **errp); + int GRAPH_RDLOCK_PTR (*bdrv_snapshot_create)( + BlockDriverState *bs, QEMUSnapshotInfo *sn_info); + + int GRAPH_UNLOCKED_PTR (*bdrv_snapshot_goto)( + BlockDriverState *bs, const char *snapshot_id); + + int GRAPH_RDLOCK_PTR (*bdrv_snapshot_delete)( + BlockDriverState *bs, const char *snapshot_id, const char *name, + Error **errp); + int (*bdrv_snapshot_list)(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, @@ -725,8 +728,8 @@ struct BlockDriver { int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_get_info)( BlockDriverState *bs, BlockDriverInfo *bdi); - ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs, - Error **errp); + ImageInfoSpecific * GRAPH_RDLOCK_PTR (*bdrv_get_specific_info)( + BlockDriverState *bs, Error **errp); BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_save_vmstate)( @@ -963,15 +966,15 @@ struct BdrvChildClass { * Note that this can be nested. If drained_begin() was called twice, new * I/O is allowed only after drained_end() was called twice, too. */ - void (*drained_begin)(BdrvChild *child); - void (*drained_end)(BdrvChild *child); + void GRAPH_RDLOCK_PTR (*drained_begin)(BdrvChild *child); + void GRAPH_RDLOCK_PTR (*drained_end)(BdrvChild *child); /* * Returns whether the parent has pending requests for the child. This * callback is polled after .drained_begin() has been called until all * activity on the child has stopped. */ - bool (*drained_poll)(BdrvChild *child); + bool GRAPH_RDLOCK_PTR (*drained_poll)(BdrvChild *child); /* * Notifies the parent that the filename of its child has changed (e.g. @@ -1039,8 +1042,8 @@ struct BdrvChild { */ bool quiesced_parent; - QLIST_ENTRY(BdrvChild) next; - QLIST_ENTRY(BdrvChild) next_parent; + QLIST_ENTRY(BdrvChild GRAPH_RDLOCK_PTR) next; + QLIST_ENTRY(BdrvChild GRAPH_RDLOCK_PTR) next_parent; }; /* @@ -1173,11 +1176,11 @@ struct BlockDriverState { * See also comment in include/block/block.h, to learn how backing and file * are connected with BdrvChildRole. */ - QLIST_HEAD(, BdrvChild) children; + QLIST_HEAD(, BdrvChild GRAPH_RDLOCK_PTR) children; BdrvChild *backing; BdrvChild *file; - QLIST_HEAD(, BdrvChild) parents; + QLIST_HEAD(, BdrvChild GRAPH_RDLOCK_PTR) parents; QDict *options; QDict *explicit_options; diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h index eb0da7232e..34eac72d7a 100644 --- a/include/block/block_int-io.h +++ b/include/block/block_int-io.h @@ -99,7 +99,7 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, */ void bdrv_wakeup(BlockDriverState *bs); -const char *bdrv_get_parent_name(const BlockDriverState *bs); +const char * GRAPH_RDLOCK bdrv_get_parent_name(const BlockDriverState *bs); bool blk_dev_has_tray(BlockBackend *blk); bool blk_dev_is_tray_open(BlockBackend *blk); @@ -133,7 +133,7 @@ bdrv_refresh_total_sectors(BlockDriverState *bs, int64_t hint); BdrvChild *bdrv_cow_child(BlockDriverState *bs); BdrvChild *bdrv_filter_child(BlockDriverState *bs); BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs); -BdrvChild *bdrv_primary_child(BlockDriverState *bs); +BdrvChild * GRAPH_RDLOCK bdrv_primary_child(BlockDriverState *bs); BlockDriverState *bdrv_skip_filters(BlockDriverState *bs); BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs); @@ -155,7 +155,8 @@ static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs) return child_bs(bdrv_filter_or_cow_child(bs)); } -static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs) +static inline BlockDriverState * GRAPH_RDLOCK +bdrv_primary_bs(BlockDriverState *bs) { IO_CODE(); return child_bs(bdrv_primary_child(bs)); diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index 7e04f98ff0..6f1cd12745 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -116,7 +116,8 @@ void unregister_aiocontext(AioContext *ctx); * This function polls. Callers must not hold the lock of any AioContext other * than the current one and the one of @bs. */ -void bdrv_graph_wrlock(BlockDriverState *bs) TSA_ACQUIRE(graph_lock) TSA_NO_TSA; +void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA +bdrv_graph_wrlock(BlockDriverState *bs); /* * bdrv_graph_wrunlock: diff --git a/include/block/qapi.h b/include/block/qapi.h index 8663971c58..54c48de26a 100644 --- a/include/block/qapi.h +++ b/include/block/qapi.h @@ -29,18 +29,17 @@ #include "block/snapshot.h" #include "qapi/qapi-types-block-core.h" -BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - BlockDriverState *bs, - bool flat, - Error **errp); -int bdrv_query_snapshot_info_list(BlockDriverState *bs, - SnapshotInfoList **p_list, - Error **errp); -void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, - bool flat, - bool skip_implicit_filters, - Error **errp); +BlockDeviceInfo * GRAPH_RDLOCK +bdrv_block_device_info(BlockBackend *blk, BlockDriverState *bs, + bool flat, Error **errp); + +int GRAPH_RDLOCK +bdrv_query_snapshot_info_list(BlockDriverState *bs, + SnapshotInfoList **p_list, + Error **errp); +void GRAPH_RDLOCK +bdrv_query_image_info(BlockDriverState *bs, ImageInfo **p_info, bool flat, + bool skip_implicit_filters, Error **errp); void GRAPH_RDLOCK bdrv_query_block_graph_info(BlockDriverState *bs, BlockGraphInfo **p_info, Error **errp); diff --git a/include/block/snapshot.h b/include/block/snapshot.h index 50ff924710..d49c5599d9 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -25,6 +25,7 @@ #ifndef SNAPSHOT_H #define SNAPSHOT_H +#include "block/graph-lock.h" #include "qapi/qapi-builtin-types.h" #define SNAPSHOT_OPT_BASE "snapshot." @@ -59,16 +60,19 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, const char *name, QEMUSnapshotInfo *sn_info, Error **errp); -int bdrv_can_snapshot(BlockDriverState *bs); -int bdrv_snapshot_create(BlockDriverState *bs, - QEMUSnapshotInfo *sn_info); -int bdrv_snapshot_goto(BlockDriverState *bs, - const char *snapshot_id, - Error **errp); -int bdrv_snapshot_delete(BlockDriverState *bs, - const char *snapshot_id, - const char *name, - Error **errp); + +int GRAPH_RDLOCK bdrv_can_snapshot(BlockDriverState *bs); + +int GRAPH_RDLOCK +bdrv_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); + +int GRAPH_UNLOCKED +bdrv_snapshot_goto(BlockDriverState *bs, const char *snapshot_id, Error **errp); + +int GRAPH_RDLOCK +bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id, + const char *name, Error **errp); + int bdrv_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); int bdrv_snapshot_load_tmp(BlockDriverState *bs, diff --git a/include/block/ufs.h b/include/block/ufs.h index fd884eb8ce..7631a5af10 100644 --- a/include/block/ufs.h +++ b/include/block/ufs.h @@ -111,14 +111,14 @@ REG32(UECT, offsetof(UfsReg, uect)) REG32(UECDME, offsetof(UfsReg, uecdme)) REG32(UTRIACR, offsetof(UfsReg, utriacr)) REG32(UTRLBA, offsetof(UfsReg, utrlba)) - FIELD(UTRLBA, UTRLBA, 9, 22) + FIELD(UTRLBA, UTRLBA, 10, 22) REG32(UTRLBAU, offsetof(UfsReg, utrlbau)) REG32(UTRLDBR, offsetof(UfsReg, utrldbr)) REG32(UTRLCLR, offsetof(UfsReg, utrlclr)) REG32(UTRLRSR, offsetof(UfsReg, utrlrsr)) REG32(UTRLCNR, offsetof(UfsReg, utrlcnr)) REG32(UTMRLBA, offsetof(UfsReg, utmrlba)) - FIELD(UTMRLBA, UTMRLBA, 9, 22) + FIELD(UTMRLBA, UTMRLBA, 10, 22) REG32(UTMRLBAU, offsetof(UfsReg, utmrlbau)) REG32(UTMRLDBR, offsetof(UfsReg, utmrldbr)) REG32(UTMRLCLR, offsetof(UfsReg, utmrlclr)) diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 605b160a7e..30c376a4de 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -83,6 +83,21 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); ram_addr_t qemu_ram_addr_from_host(void *ptr); ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr); RAMBlock *qemu_ram_block_by_name(const char *name); + +/* + * Translates a host ptr back to a RAMBlock and an offset in that RAMBlock. + * + * @ptr: The host pointer to translate. + * @round_offset: Whether to round the result offset down to a target page + * @offset: Will be set to the offset within the returned RAMBlock. + * + * Returns: RAMBlock (or NULL if not found) + * + * By the time this function returns, the returned pointer is not protected + * by RCU anymore. If the caller is not within an RCU critical section and + * does not hold the iothread lock, it must have other means of protecting the + * pointer, such as a reference to the memory region that owns the RAMBlock. + */ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, ram_addr_t *offset); ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host); diff --git a/include/exec/memory.h b/include/exec/memory.h index c99842d2fc..653a32ea10 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -95,6 +95,7 @@ struct ReservedRegion { * relative to the region's address space * @readonly: writes to this section are ignored * @nonvolatile: this section is non-volatile + * @unmergeable: this section should not get merged with adjacent sections */ struct MemoryRegionSection { Int128 size; @@ -104,6 +105,7 @@ struct MemoryRegionSection { hwaddr offset_within_address_space; bool readonly; bool nonvolatile; + bool unmergeable; }; typedef struct IOMMUTLBEntry IOMMUTLBEntry; @@ -599,8 +601,9 @@ typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque); * populated (consuming memory), to be used/accessed by the VM. * * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the - * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is - * mapped. + * #MemoryRegion isn't mapped into an address space yet (either directly + * or via an alias); it cannot change while the #MemoryRegion is + * mapped into an address space. * * The #RamDiscardManager is intended to be used by technologies that are * incompatible with discarding of RAM (e.g., VFIO, which may pin all @@ -772,6 +775,7 @@ struct MemoryRegion { bool nonvolatile; bool rom_device; bool flush_coalesced_mmio; + bool unmergeable; uint8_t dirty_log_mask; bool is_iommu; RAMBlock *ram_block; @@ -2350,6 +2354,25 @@ void memory_region_set_size(MemoryRegion *mr, uint64_t size); void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset); +/* + * memory_region_set_unmergeable: Set a memory region unmergeable + * + * Mark a memory region unmergeable, resulting in the memory region (or + * everything contained in a memory region container) not getting merged when + * simplifying the address space and notifying memory listeners. Consequently, + * memory listeners will never get notified about ranges that are larger than + * the original memory regions. + * + * This is primarily useful when multiple aliases to a RAM memory region are + * mapped into a memory region container, and updates (e.g., enable/disable or + * map/unmap) of individual memory region aliases are not supposed to affect + * other memory regions in the same container. + * + * @mr: the #MemoryRegion to be updated + * @unmergeable: whether to mark the #MemoryRegion unmergeable + */ +void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable); + /** * memory_region_present: checks if an address relative to a @container * translates into #MemoryRegion within @container diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index ef59810c17..ac21a95913 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -56,7 +56,7 @@ typedef struct AcpiPciHpState { } AcpiPciHpState; void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root, - MemoryRegion *address_space_io, uint16_t io_base); + MemoryRegion *io, uint16_t io_base); bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus); void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, diff --git a/include/hw/arm/bsa.h b/include/hw/arm/bsa.h new file mode 100644 index 0000000000..8eaab603c0 --- /dev/null +++ b/include/hw/arm/bsa.h @@ -0,0 +1,35 @@ +/* + * Common definitions for Arm Base System Architecture (BSA) platforms. + * + * Copyright (c) 2015 Linaro Limited + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef QEMU_ARM_BSA_H +#define QEMU_ARM_BSA_H + +/* These are architectural INTID values */ +#define VIRTUAL_PMU_IRQ 23 +#define ARCH_GIC_MAINT_IRQ 25 +#define ARCH_TIMER_NS_EL2_IRQ 26 +#define ARCH_TIMER_VIRT_IRQ 27 +#define ARCH_TIMER_NS_EL2_VIRT_IRQ 28 +#define ARCH_TIMER_S_EL1_IRQ 29 +#define ARCH_TIMER_NS_EL1_IRQ 30 + +#define INTID_TO_PPI(irq) ((irq) - 16) + +#endif /* QEMU_ARM_BSA_H */ diff --git a/include/hw/arm/exynos4210.h b/include/hw/arm/exynos4210.h index 68db19f0cb..d33fe38586 100644 --- a/include/hw/arm/exynos4210.h +++ b/include/hw/arm/exynos4210.h @@ -30,7 +30,7 @@ #include "hw/intc/exynos4210_gic.h" #include "hw/intc/exynos4210_combiner.h" #include "hw/core/split-irq.h" -#include "target/arm/cpu-qom.h" +#include "hw/arm/boot.h" #include "qom/object.h" #define EXYNOS4210_NCPUS 2 diff --git a/include/hw/misc/raspberrypi-fw-defs.h b/include/hw/arm/raspberrypi-fw-defs.h index 4551fe7450..4551fe7450 100644 --- a/include/hw/misc/raspberrypi-fw-defs.h +++ b/include/hw/arm/raspberrypi-fw-defs.h diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index e1ddbea96b..f69239850e 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -34,6 +34,7 @@ #include "qemu/notify.h" #include "hw/boards.h" #include "hw/arm/boot.h" +#include "hw/arm/bsa.h" #include "hw/block/flash.h" #include "sysemu/kvm.h" #include "hw/intc/arm_gicv3_common.h" @@ -43,17 +44,6 @@ #define NUM_VIRTIO_TRANSPORTS 32 #define NUM_SMMU_IRQS 4 -#define ARCH_GIC_MAINT_IRQ 9 - -#define ARCH_TIMER_VIRT_IRQ 11 -#define ARCH_TIMER_S_EL1_IRQ 13 -#define ARCH_TIMER_NS_EL1_IRQ 14 -#define ARCH_TIMER_NS_EL2_IRQ 10 - -#define VIRTUAL_PMU_IRQ 7 - -#define PPI(irq) ((irq) + 16) - /* See Linux kernel arch/arm64/include/asm/pvclock-abi.h */ #define PVTIME_SIZE_PER_CPU 64 diff --git a/include/hw/audio/pcspk.h b/include/hw/audio/pcspk.h index 9506179587..6be75a6b86 100644 --- a/include/hw/audio/pcspk.h +++ b/include/hw/audio/pcspk.h @@ -25,16 +25,6 @@ #ifndef HW_PCSPK_H #define HW_PCSPK_H -#include "hw/isa/isa.h" -#include "hw/qdev-properties.h" -#include "qapi/error.h" - #define TYPE_PC_SPEAKER "isa-pcspk" -static inline void pcspk_init(ISADevice *isadev, ISABus *bus, ISADevice *pit) -{ - object_property_set_link(OBJECT(isadev), "pit", OBJECT(pit), NULL); - isa_realize_and_unref(isadev, bus, &error_fatal); -} - #endif /* HW_PCSPK_H */ diff --git a/include/hw/boards.h b/include/hw/boards.h index 55a64a13fd..43a56dc51e 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -297,15 +297,27 @@ struct MachineClass { * DeviceMemoryState: * @base: address in guest physical address space where the memory * address space for memory devices starts - * @mr: address space container for memory devices + * @mr: memory region container for memory devices + * @as: address space for memory devices + * @listener: memory listener used to track used memslots in the address space * @dimm_size: the sum of plugged DIMMs' sizes * @used_region_size: the part of @mr already used by memory devices + * @required_memslots: the number of memslots required by memory devices + * @used_memslots: the number of memslots currently used by memory devices + * @memslot_auto_decision_active: whether any plugged memory device + * automatically decided to use more than + * one memslot */ typedef struct DeviceMemoryState { hwaddr base; MemoryRegion mr; + AddressSpace as; + MemoryListener listener; uint64_t dimm_size; uint64_t used_region_size; + unsigned int required_memslots; + unsigned int used_memslots; + unsigned int memslot_auto_decision_active; } DeviceMemoryState; /** diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 3968369554..18593db5b2 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -618,8 +618,10 @@ bool cpu_paging_enabled(const CPUState *cpu); * @cpu: The CPU whose memory mappings are to be obtained. * @list: Where to write the memory mappings to. * @errp: Pointer for reporting an #Error. + * + * Returns: %true on success, %false otherwise. */ -void cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, +bool cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, Error **errp); #if !defined(CONFIG_USER_ONLY) diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h index ee169b872c..24d003fe04 100644 --- a/include/hw/core/sysemu-cpu-ops.h +++ b/include/hw/core/sysemu-cpu-ops.h @@ -19,7 +19,7 @@ typedef struct SysemuCPUOps { /** * @get_memory_mapping: Callback for obtaining the memory mappings. */ - void (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list, + bool (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list, Error **errp); /** * @get_paging_enabled: Callback for inquiring whether paging is enabled. diff --git a/include/hw/display/ramfb.h b/include/hw/display/ramfb.h index b33a2c467b..a7e0019144 100644 --- a/include/hw/display/ramfb.h +++ b/include/hw/display/ramfb.h @@ -1,11 +1,15 @@ #ifndef RAMFB_H #define RAMFB_H +#include "migration/vmstate.h" + /* ramfb.c */ typedef struct RAMFBState RAMFBState; void ramfb_display_update(QemuConsole *con, RAMFBState *s); RAMFBState *ramfb_setup(Error **errp); +extern const VMStateDescription ramfb_vmstate; + /* ramfb-standalone.c */ #define TYPE_RAMFB_DEVICE "ramfb" diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h index f1659655c6..674f4655e0 100644 --- a/include/hw/loongarch/virt.h +++ b/include/hw/loongarch/virt.h @@ -16,8 +16,6 @@ #define LOONGARCH_MAX_CPUS 256 -#define VIRT_ISA_IO_BASE 0x18000000UL -#define VIRT_ISA_IO_SIZE 0x0004000 #define VIRT_FWCFG_BASE 0x1e020000UL #define VIRT_BIOS_BASE 0x1c000000UL #define VIRT_BIOS_SIZE (4 * MiB) @@ -38,7 +36,6 @@ struct LoongArchMachineState { MemoryRegion lowmem; MemoryRegion highmem; - MemoryRegion isa_io; MemoryRegion bios; bool bios_loaded; /* State for other subsystems/APIs: */ diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h index 48d2611fc5..3354d6c166 100644 --- a/include/hw/mem/memory-device.h +++ b/include/hw/mem/memory-device.h @@ -14,6 +14,7 @@ #define MEMORY_DEVICE_H #include "hw/qdev-core.h" +#include "qemu/typedefs.h" #include "qapi/qapi-types-machine.h" #include "qom/object.h" @@ -41,6 +42,17 @@ typedef struct MemoryDeviceState MemoryDeviceState; * successive memory regions are used, a covering memory region has to * be provided. Scattered memory regions are not supported for single * devices. + * + * The device memory region returned via @get_memory_region may either be a + * single RAM memory region or a memory region container with subregions + * that are RAM memory regions or aliases to RAM memory regions. Other + * memory regions or subregions are not supported. + * + * If the device memory region returned via @get_memory_region is a + * memory region container, it's supported to dynamically (un)map subregions + * as long as the number of memslots returned by @get_memslots() won't + * be exceeded and as long as all memory regions are of the same kind (e.g., + * all RAM or all ROM). */ struct MemoryDeviceClass { /* private */ @@ -89,6 +101,28 @@ struct MemoryDeviceClass { MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp); /* + * Optional: Instruct the memory device to decide how many memory slots + * it requires, not exceeding the given limit. + * + * Called exactly once when pre-plugging the memory device, before + * querying the number of memslots using @get_memslots the first time. + */ + void (*decide_memslots)(MemoryDeviceState *md, unsigned int limit); + + /* + * Optional for memory devices that require only a single memslot, + * required for all other memory devices: Return the number of memslots + * (distinct RAM memory regions in the device memory region) that are + * required by the device. + * + * If this function is not implemented, the assumption is "1". + * + * Called when (un)plugging the memory device, to check if the requirements + * can be satisfied, and to do proper accounting. + */ + unsigned int (*get_memslots)(MemoryDeviceState *md); + + /* * Optional: Return the desired minimum alignment of the device in guest * physical address space. The final alignment is computed based on this * alignment and the alignment requirements of the memory region. @@ -105,8 +139,31 @@ struct MemoryDeviceClass { MemoryDeviceInfo *info); }; +/* + * Traditionally, KVM/vhost in many setups supported 509 memslots, whereby + * 253 memslots were "reserved" for boot memory and other devices (such + * as PCI BARs, which can get mapped dynamically) and 256 memslots were + * dedicated for DIMMs. These magic numbers worked reliably in the past. + * + * Further, using many memslots can negatively affect performance, so setting + * the soft-limit of memslots used by memory devices to the traditional + * DIMM limit of 256 sounds reasonable. + * + * If we have less than 509 memslots, we will instruct memory devices that + * support automatically deciding how many memslots to use to only use a single + * one. + * + * Hotplugging vhost devices with at least 509 memslots is not expected to + * cause problems, not even when devices automatically decided how many memslots + * to use. + */ +#define MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT 256 +#define MEMORY_DEVICES_SAFE_MAX_MEMSLOTS 509 + MemoryDeviceInfoList *qmp_memory_device_list(void); uint64_t get_plugged_memory_size(void); +unsigned int memory_devices_get_reserved_memslots(void); +bool memory_devices_memslot_auto_decision_active(void); void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, const uint64_t *legacy_align, Error **errp); void memory_device_plug(MemoryDeviceState *md, MachineState *ms); diff --git a/include/hw/mips/cpudevs.h b/include/hw/mips/cpudevs.h deleted file mode 100644 index f7c9728fa9..0000000000 --- a/include/hw/mips/cpudevs.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef HW_MIPS_CPUDEVS_H -#define HW_MIPS_CPUDEVS_H - -#include "target/mips/cpu-qom.h" - -/* Definitions for MIPS CPU internal devices. */ - -/* mips_int.c */ -void cpu_mips_irq_init_cpu(MIPSCPU *cpu); - -/* mips_timer.c */ -void cpu_mips_clock_init(MIPSCPU *cpu); - -#endif diff --git a/include/hw/misc/mips_itu.h b/include/hw/misc/mips_itu.h index 35218b2d14..5caed6cc36 100644 --- a/include/hw/misc/mips_itu.h +++ b/include/hw/misc/mips_itu.h @@ -73,10 +73,12 @@ struct MIPSITUState { /* SAAR */ uint64_t *saar; - MIPSCPU *cpu0; + ArchCPU *cpu0; }; /* Get ITC Configuration Tag memory region. */ MemoryRegion *mips_itu_get_tag_region(MIPSITUState *itu); +void itc_reconfigure(struct MIPSITUState *tag); + #endif /* MIPS_ITU_H */ diff --git a/include/hw/nvram/xlnx-bbram.h b/include/hw/nvram/xlnx-bbram.h index 87d59ef3c0..6fc13f8cc1 100644 --- a/include/hw/nvram/xlnx-bbram.h +++ b/include/hw/nvram/xlnx-bbram.h @@ -34,7 +34,7 @@ #define RMAX_XLNX_BBRAM ((0x4c / 4) + 1) -#define TYPE_XLNX_BBRAM "xlnx,bbram-ctrl" +#define TYPE_XLNX_BBRAM "xlnx.bbram-ctrl" OBJECT_DECLARE_SIMPLE_TYPE(XlnxBBRam, XLNX_BBRAM); struct XlnxBBRam { diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index b70a0b95ff..ea5aff118b 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -279,12 +279,10 @@ bool pci_bus_is_express(const PCIBus *bus); void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent, const char *name, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, const char *typename); PCIBus *pci_root_bus_new(DeviceState *parent, const char *name, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, const char *typename); void pci_root_bus_cleanup(PCIBus *bus); void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, @@ -304,8 +302,7 @@ int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin); PCIBus *pci_register_root_bus(DeviceState *parent, const char *name, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, void *irq_opaque, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, int nirq, const char *typename); void pci_unregister_root_bus(PCIBus *bus); diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h index 9bc6463547..35b19610f7 100644 --- a/include/hw/ppc/pnv_xscom.h +++ b/include/hw/ppc/pnv_xscom.h @@ -170,7 +170,7 @@ struct PnvXScomInterfaceClass { #define PNV10_XSCOM_PEC_PCI_BASE 0x8010800 /* index goes upwards ... */ #define PNV10_XSCOM_PEC_PCI_SIZE 0x200 -void pnv_xscom_realize(PnvChip *chip, uint64_t size, Error **errp); +void pnv_xscom_init(PnvChip *chip, uint64_t size, hwaddr addr); int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset, uint64_t xscom_base, uint64_t xscom_size, const char *compat, int compat_size); diff --git a/include/hw/s390x/vfio-ccw.h b/include/hw/s390x/vfio-ccw.h index 63a909eb7e..4209d27657 100644 --- a/include/hw/s390x/vfio-ccw.h +++ b/include/hw/s390x/vfio-ccw.h @@ -22,6 +22,4 @@ #define TYPE_VFIO_CCW "vfio-ccw" OBJECT_DECLARE_SIMPLE_TYPE(VFIOCCWDevice, VFIO_CCW) -#define TYPE_VFIO_CCW "vfio-ccw" - #endif diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index e9b8954595..7780b9073a 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -98,6 +98,7 @@ typedef struct VFIOContainer { QLIST_HEAD(, VFIOGroup) group_list; QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; QLIST_ENTRY(VFIOContainer) next; + QLIST_HEAD(, VFIODevice) device_list; } VFIOContainer; typedef struct VFIOGuestIOMMU { @@ -129,7 +130,10 @@ typedef struct VFIODeviceOps VFIODeviceOps; typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) next; + QLIST_ENTRY(VFIODevice) container_next; + QLIST_ENTRY(VFIODevice) global_next; struct VFIOGroup *group; + VFIOContainer *container; char *sysfsdev; char *name; DeviceState *dev; @@ -196,7 +200,36 @@ typedef struct VFIODisplay { } dmabuf; } VFIODisplay; -void vfio_put_base_device(VFIODevice *vbasedev); +typedef struct { + unsigned long *bitmap; + hwaddr size; + hwaddr pages; +} VFIOBitmap; + +void vfio_host_win_add(VFIOContainer *container, + hwaddr min_iova, hwaddr max_iova, + uint64_t iova_pgsizes); +int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova, + hwaddr max_iova); +VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); +void vfio_put_address_space(VFIOAddressSpace *space); +bool vfio_devices_all_running_and_saving(VFIOContainer *container); + +/* container->fd */ +int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + ram_addr_t size, IOMMUTLBEntry *iotlb); +int vfio_dma_map(VFIOContainer *container, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly); +int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); +int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); + +int vfio_container_add_section_window(VFIOContainer *container, + MemoryRegionSection *section, + Error **errp); +void vfio_container_del_section_window(VFIOContainer *container, + MemoryRegionSection *section); + void vfio_disable_irqindex(VFIODevice *vbasedev, int index); void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index); @@ -214,15 +247,22 @@ void vfio_region_unmap(VFIORegion *region); void vfio_region_exit(VFIORegion *region); void vfio_region_finalize(VFIORegion *region); void vfio_reset_handler(void *opaque); -VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); -void vfio_put_group(VFIOGroup *group); struct vfio_device_info *vfio_get_device_info(int fd); -int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp); +int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp); +void vfio_detach_device(VFIODevice *vbasedev); + +int vfio_kvm_device_add_fd(int fd, Error **errp); +int vfio_kvm_device_del_fd(int fd, Error **errp); extern const MemoryRegionOps vfio_region_ops; typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; +typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; extern VFIOGroupList vfio_group_list; +extern VFIODeviceList vfio_device_list; + +extern const MemoryListener vfio_memory_listener; +extern int vfio_kvm_device_fd; bool vfio_mig_active(void); int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); @@ -245,6 +285,8 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, unsigned int *avail); struct vfio_info_cap_header * vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id); +struct vfio_info_cap_header * +vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id); #endif extern const MemoryListener vfio_prereg_listener; @@ -257,4 +299,12 @@ int vfio_spapr_remove_window(VFIOContainer *container, bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); void vfio_migration_exit(VFIODevice *vbasedev); +int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); +bool vfio_devices_all_running_and_mig_active(VFIOContainer *container); +bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container); +int vfio_devices_query_dirty_bitmap(VFIOContainer *container, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size); +int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + uint64_t size, ram_addr_t ram_addr); #endif /* HW_VFIO_VFIO_COMMON_H */ diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 1860b541d8..96ccc18cd3 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -86,9 +86,6 @@ typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev, typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev); typedef int (*vhost_migration_done_op)(struct vhost_dev *dev, char *mac_addr); -typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev, - uint64_t start1, uint64_t size1, - uint64_t start2, uint64_t size2); typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev, uint64_t guest_cid); typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start); @@ -108,8 +105,7 @@ typedef int (*vhost_crypto_create_session_op)(struct vhost_dev *dev, typedef int (*vhost_crypto_close_session_op)(struct vhost_dev *dev, uint64_t session_id); -typedef bool (*vhost_backend_mem_section_filter_op)(struct vhost_dev *dev, - MemoryRegionSection *section); +typedef bool (*vhost_backend_no_private_memslots_op)(struct vhost_dev *dev); typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev, uint16_t queue_size, @@ -138,6 +134,7 @@ typedef struct VhostOps { vhost_backend_init vhost_backend_init; vhost_backend_cleanup vhost_backend_cleanup; vhost_backend_memslots_limit vhost_backend_memslots_limit; + vhost_backend_no_private_memslots_op vhost_backend_no_private_memslots; vhost_net_set_backend_op vhost_net_set_backend; vhost_net_set_mtu_op vhost_net_set_mtu; vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint; @@ -163,7 +160,6 @@ typedef struct VhostOps { vhost_set_vring_enable_op vhost_set_vring_enable; vhost_requires_shm_log_op vhost_requires_shm_log; vhost_migration_done_op vhost_migration_done; - vhost_backend_can_merge_op vhost_backend_can_merge; vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid; vhost_vsock_set_running_op vhost_vsock_set_running; vhost_set_iotlb_callback_op vhost_set_iotlb_callback; @@ -172,7 +168,6 @@ typedef struct VhostOps { vhost_set_config_op vhost_set_config; vhost_crypto_create_session_op vhost_crypto_create_session; vhost_crypto_close_session_op vhost_crypto_close_session; - vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter; vhost_get_inflight_fd_op vhost_get_inflight_fd; vhost_set_inflight_fd_op vhost_set_inflight_fd; vhost_dev_start_op vhost_dev_start; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 6a173cb9fa..c7e5467693 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -315,7 +315,8 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, */ void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, uint64_t features); -bool vhost_has_free_slot(void); +unsigned int vhost_get_max_memslots(void); +unsigned int vhost_get_free_memslots(void); int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h index 637a0585d0..dd1975e2d4 100644 --- a/include/hw/virtio/virtio-gpu-bswap.h +++ b/include/hw/virtio/virtio-gpu-bswap.h @@ -71,6 +71,21 @@ virtio_gpu_create_blob_bswap(struct virtio_gpu_resource_create_blob *cblob) } static inline void +virtio_gpu_map_blob_bswap(struct virtio_gpu_resource_map_blob *mblob) +{ + virtio_gpu_ctrl_hdr_bswap(&mblob->hdr); + le32_to_cpus(&mblob->resource_id); + le64_to_cpus(&mblob->offset); +} + +static inline void +virtio_gpu_unmap_blob_bswap(struct virtio_gpu_resource_unmap_blob *ublob) +{ + virtio_gpu_ctrl_hdr_bswap(&ublob->hdr); + le32_to_cpus(&ublob->resource_id); +} + +static inline void virtio_gpu_scanout_blob_bswap(struct virtio_gpu_set_scanout_blob *ssb) { virtio_gpu_bswap_32(ssb, sizeof(*ssb) - sizeof(ssb->offsets[3])); diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index 390c4642b8..584ba2ed73 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -38,6 +38,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPUGL, VIRTIO_GPU_GL) #define TYPE_VHOST_USER_GPU "vhost-user-gpu" OBJECT_DECLARE_SIMPLE_TYPE(VhostUserGPU, VHOST_USER_GPU) +#define TYPE_VIRTIO_GPU_RUTABAGA "virtio-gpu-rutabaga-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPURutabaga, VIRTIO_GPU_RUTABAGA) + struct virtio_gpu_simple_resource { uint32_t resource_id; uint32_t width; @@ -93,6 +96,8 @@ enum virtio_gpu_base_conf_flags { VIRTIO_GPU_FLAG_EDID_ENABLED, VIRTIO_GPU_FLAG_DMABUF_ENABLED, VIRTIO_GPU_FLAG_BLOB_ENABLED, + VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED, + VIRTIO_GPU_FLAG_RUTABAGA_ENABLED, }; #define virtio_gpu_virgl_enabled(_cfg) \ @@ -105,12 +110,19 @@ enum virtio_gpu_base_conf_flags { (_cfg.flags & (1 << VIRTIO_GPU_FLAG_DMABUF_ENABLED)) #define virtio_gpu_blob_enabled(_cfg) \ (_cfg.flags & (1 << VIRTIO_GPU_FLAG_BLOB_ENABLED)) +#define virtio_gpu_context_init_enabled(_cfg) \ + (_cfg.flags & (1 << VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED)) +#define virtio_gpu_rutabaga_enabled(_cfg) \ + (_cfg.flags & (1 << VIRTIO_GPU_FLAG_RUTABAGA_ENABLED)) +#define virtio_gpu_hostmem_enabled(_cfg) \ + (_cfg.hostmem > 0) struct virtio_gpu_base_conf { uint32_t max_outputs; uint32_t flags; uint32_t xres; uint32_t yres; + uint64_t hostmem; }; struct virtio_gpu_ctrl_command { @@ -134,6 +146,8 @@ struct VirtIOGPUBase { int renderer_blocked; int enable; + MemoryRegion hostmem; + struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS]; int enabled_output_bitmask; @@ -224,14 +238,35 @@ struct VhostUserGPU { bool backend_blocked; }; +#define MAX_SLOTS 4096 + +struct MemoryRegionInfo { + int used; + MemoryRegion mr; + uint32_t resource_id; +}; + +struct rutabaga; + +struct VirtIOGPURutabaga { + VirtIOGPU parent_obj; + struct MemoryRegionInfo memory_regions[MAX_SLOTS]; + uint64_t capset_mask; + char *wayland_socket_path; + char *wsi; + bool headless; + uint32_t num_capsets; + struct rutabaga *rutabaga; +}; + #define VIRTIO_GPU_FILL_CMD(out) do { \ - size_t s; \ - s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \ + size_t virtiogpufillcmd_s_ = \ + iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \ &out, sizeof(out)); \ - if (s != sizeof(out)) { \ + if (virtiogpufillcmd_s_ != sizeof(out)) { \ qemu_log_mask(LOG_GUEST_ERROR, \ "%s: command size incorrect %zu vs %zu\n", \ - __func__, s, sizeof(out)); \ + __func__, virtiogpufillcmd_s_, sizeof(out)); \ return; \ } \ } while (0) @@ -249,6 +284,9 @@ void virtio_gpu_base_fill_display_info(VirtIOGPUBase *g, void virtio_gpu_base_generate_edid(VirtIOGPUBase *g, int scanout, struct virtio_gpu_resp_edid *edid); /* virtio-gpu.c */ +struct virtio_gpu_simple_resource * +virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id); + void virtio_gpu_ctrl_response(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd, struct virtio_gpu_ctrl_hdr *resp, @@ -267,6 +305,8 @@ int virtio_gpu_create_mapping_iov(VirtIOGPU *g, uint32_t *niov); void virtio_gpu_cleanup_mapping_iov(VirtIOGPU *g, struct iovec *iov, uint32_t count); +void virtio_gpu_cleanup_mapping(VirtIOGPU *g, + struct virtio_gpu_simple_resource *res); void virtio_gpu_process_cmdq(VirtIOGPU *g); void virtio_gpu_device_realize(DeviceState *qdev, Error **errp); void virtio_gpu_reset(VirtIODevice *vdev); diff --git a/include/hw/virtio/virtio-input.h b/include/hw/virtio/virtio-input.h index 08f1591424..a6c9703644 100644 --- a/include/hw/virtio/virtio-input.h +++ b/include/hw/virtio/virtio-input.h @@ -84,7 +84,7 @@ struct VirtIOInputHID { VirtIOInput parent_obj; char *display; uint32_t head; - QemuInputHandler *handler; + const QemuInputHandler *handler; QemuInputHandlerState *hs; int ledstate; bool wheel_axis; diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h index ab0fe2b4f2..5f5b02b8f9 100644 --- a/include/hw/virtio/virtio-mem.h +++ b/include/hw/virtio/virtio-mem.h @@ -33,6 +33,7 @@ OBJECT_DECLARE_TYPE(VirtIOMEM, VirtIOMEMClass, #define VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "unplugged-inaccessible" #define VIRTIO_MEM_EARLY_MIGRATION_PROP "x-early-migration" #define VIRTIO_MEM_PREALLOC_PROP "prealloc" +#define VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP "dynamic-memslots" struct VirtIOMEM { VirtIODevice parent_obj; @@ -44,7 +45,28 @@ struct VirtIOMEM { int32_t bitmap_size; unsigned long *bitmap; - /* assigned memory backend and memory region */ + /* + * With "dynamic-memslots=on": Device memory region in which we dynamically + * map the memslots. + */ + MemoryRegion *mr; + + /* + * With "dynamic-memslots=on": The individual memslots (aliases into the + * memory backend). + */ + MemoryRegion *memslots; + + /* With "dynamic-memslots=on": The total number of memslots. */ + uint16_t nb_memslots; + + /* + * With "dynamic-memslots=on": Size of one memslot (the size of the + * last one can differ). + */ + uint64_t memslot_size; + + /* Assigned memory backend with the RAM memory region. */ HostMemoryBackend *memdev; /* NUMA node */ @@ -82,6 +104,12 @@ struct VirtIOMEM { */ bool early_migration; + /* + * Whether we dynamically map (multiple, if possible) memslots instead of + * statically mapping the whole RAM memory region. + */ + bool dynamic_memslots; + /* notifiers to notify when "size" changes */ NotifierList size_change_notifiers; @@ -96,6 +124,8 @@ struct VirtIOMEMClass { /* public */ void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi); MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp); + void (*decide_memslots)(VirtIOMEM *vmem, unsigned int limit); + unsigned int (*get_memslots)(VirtIOMEM *vmem); void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); void (*unplug_request_check)(VirtIOMEM *vmem, Error **errp); diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index ab2051b64b..5a3f182f99 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -264,4 +264,8 @@ unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, int n, bool assign, bool with_irqfd); + +int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, uint8_t bar, uint64_t offset, + uint64_t length, uint8_t id); + #endif diff --git a/include/migration/register.h b/include/migration/register.h index 2b12c6adec..fed1d04a3c 100644 --- a/include/migration/register.h +++ b/include/migration/register.h @@ -25,6 +25,7 @@ typedef struct SaveVMHandlers { * used to perform early checks. */ int (*save_prepare)(void *opaque, Error **errp); + int (*save_setup)(QEMUFile *f, void *opaque); void (*save_cleanup)(void *opaque); int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque); int (*save_live_complete_precopy)(QEMUFile *f, void *opaque); @@ -50,7 +51,6 @@ typedef struct SaveVMHandlers { int (*save_live_iterate)(QEMUFile *f, void *opaque); /* This runs outside the iothread lock! */ - int (*save_setup)(QEMUFile *f, void *opaque); /* Note for save_live_pending: * must_precopy: * - must be migrated in precopy or in stopped state diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 1109482a00..c797f0d457 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -212,4 +212,19 @@ # define QEMU_USED #endif +/* + * Ugly CPP trick that is like "defined FOO", but also works in C + * code. Useful to replace #ifdef with "if" statements; assumes + * the symbol was defined with Meson's "config.set()", so it is empty + * if defined. + */ +#define IS_ENABLED(x) IS_EMPTY(x) + +#define IS_EMPTY_JUNK_ junk, +#define IS_EMPTY(value) IS_EMPTY_(IS_EMPTY_JUNK_##value) + +/* Expands to either SECOND_ARG(junk, 1, 0) or SECOND_ARG(IS_EMPTY_JUNK_CONFIG_FOO 1, 0) */ +#define SECOND_ARG(first, second, ...) second +#define IS_EMPTY_(junk_maybecomma) SECOND_ARG(junk_maybecomma 1, 0) + #endif /* COMPILER_H */ diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h index d5f675493a..49c12b0fa9 100644 --- a/include/sysemu/block-backend-global-state.h +++ b/include/sysemu/block-backend-global-state.h @@ -59,8 +59,8 @@ BlockBackend *blk_by_public(BlockBackendPublic *public); void blk_remove_bs(BlockBackend *blk); int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp); -bool bdrv_has_blk(BlockDriverState *bs); -bool bdrv_is_root_node(BlockDriverState *bs); +bool GRAPH_RDLOCK bdrv_has_blk(BlockDriverState *bs); +bool GRAPH_RDLOCK bdrv_is_root_node(BlockDriverState *bs); int GRAPH_UNLOCKED blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, Error **errp); void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index ee9025f8e9..97a8a4f201 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -215,7 +215,8 @@ typedef struct KVMRouteChange { /* external API */ -bool kvm_has_free_slot(MachineState *ms); +unsigned int kvm_get_max_memslots(void); +unsigned int kvm_get_free_memslots(void); bool kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); int kvm_has_robust_singlestep(void); @@ -552,7 +553,6 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source); */ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); struct ppc_radix_page_info *kvm_get_radix_page_info(void); -int kvm_get_max_memslots(void); /* Notify resamplefd for EOI of specific interrupts. */ void kvm_resample_fd_notify(int gsi); diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index a5b9122cb8..075939a3c4 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -40,6 +40,7 @@ typedef struct KVMMemoryUpdate { typedef struct KVMMemoryListener { MemoryListener listener; KVMSlot *slots; + unsigned int nr_used_slots; int as_id; QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h index 3bbeb1bcb4..021e0a6230 100644 --- a/include/sysemu/memory_mapping.h +++ b/include/sysemu/memory_mapping.h @@ -71,7 +71,7 @@ void guest_phys_blocks_free(GuestPhysBlockList *list); void guest_phys_blocks_init(GuestPhysBlockList *list); void guest_phys_blocks_append(GuestPhysBlockList *list); -void qemu_get_guest_memory_mapping(MemoryMappingList *list, +bool qemu_get_guest_memory_mapping(MemoryMappingList *list, const GuestPhysBlockList *guest_phys_blocks, Error **errp); diff --git a/include/ui/input.h b/include/ui/input.h index 24d8e4579e..8f9aac562e 100644 --- a/include/ui/input.h +++ b/include/ui/input.h @@ -30,7 +30,7 @@ struct QemuInputHandler { }; QemuInputHandlerState *qemu_input_handler_register(DeviceState *dev, - QemuInputHandler *handler); + const QemuInputHandler *handler); void qemu_input_handler_activate(QemuInputHandlerState *s); void qemu_input_handler_deactivate(QemuInputHandlerState *s); void qemu_input_handler_unregister(QemuInputHandlerState *s); |