diff options
Diffstat (limited to 'include/system')
60 files changed, 6113 insertions, 0 deletions
diff --git a/include/system/accel-blocker.h b/include/system/accel-blocker.h new file mode 100644 index 0000000000..e10099d6a9 --- /dev/null +++ b/include/system/accel-blocker.h @@ -0,0 +1,55 @@ +/* + * Accelerator blocking API, to prevent new ioctls from starting and wait the + * running ones finish. + * This mechanism differs from pause/resume_all_vcpus() in that it does not + * release the BQL. + * + * Copyright (c) 2022 Red Hat Inc. + * + * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef ACCEL_BLOCKER_H +#define ACCEL_BLOCKER_H + +#include "system/cpus.h" + +void accel_blocker_init(void); + +/* + * accel_{cpu_}ioctl_begin/end: + * Mark when ioctl is about to run or just finished. + * + * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is + * called, preventing new ioctls to run. They will continue only after + * accel_ioctl_inibith_end(). + */ +void accel_ioctl_begin(void); +void accel_ioctl_end(void); +void accel_cpu_ioctl_begin(CPUState *cpu); +void accel_cpu_ioctl_end(CPUState *cpu); + +/* + * accel_ioctl_inhibit_begin: start critical section + * + * This function makes sure that: + * 1) incoming accel_{cpu_}ioctl_begin() calls block + * 2) wait that all ioctls that were already running reach + * accel_{cpu_}ioctl_end(), kicking vcpus if necessary. + * + * This allows the caller to access shared data or perform operations without + * worrying of concurrent vcpus accesses. + */ +void accel_ioctl_inhibit_begin(void); + +/* + * accel_ioctl_inhibit_end: end critical section started by + * accel_ioctl_inhibit_begin() + * + * This function allows blocked accel_{cpu_}ioctl_begin() to continue. + */ +void accel_ioctl_inhibit_end(void); + +#endif /* ACCEL_BLOCKER_H */ diff --git a/include/system/accel-ops.h b/include/system/accel-ops.h new file mode 100644 index 0000000000..a088672230 --- /dev/null +++ b/include/system/accel-ops.h @@ -0,0 +1,74 @@ +/* + * Accelerator OPS, used for cpus.c module + * + * Copyright 2021 SUSE LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef ACCEL_OPS_H +#define ACCEL_OPS_H + +#include "exec/cpu-common.h" +#include "qom/object.h" + +#define ACCEL_OPS_SUFFIX "-ops" +#define TYPE_ACCEL_OPS "accel" ACCEL_OPS_SUFFIX +#define ACCEL_OPS_NAME(name) (name "-" TYPE_ACCEL_OPS) + +typedef struct AccelOpsClass AccelOpsClass; +DECLARE_CLASS_CHECKERS(AccelOpsClass, ACCEL_OPS, TYPE_ACCEL_OPS) + +/** + * struct AccelOpsClass - accelerator interfaces + * + * This structure is used to abstract accelerator differences from the + * core CPU code. Not all have to be implemented. + */ +struct AccelOpsClass { + /*< private >*/ + ObjectClass parent_class; + /*< public >*/ + + /* initialization function called when accel is chosen */ + void (*ops_init)(AccelOpsClass *ops); + + bool (*cpus_are_resettable)(void); + void (*cpu_reset_hold)(CPUState *cpu); + + void (*create_vcpu_thread)(CPUState *cpu); /* MANDATORY NON-NULL */ + void (*kick_vcpu_thread)(CPUState *cpu); + bool (*cpu_thread_is_idle)(CPUState *cpu); + + void (*synchronize_post_reset)(CPUState *cpu); + void (*synchronize_post_init)(CPUState *cpu); + void (*synchronize_state)(CPUState *cpu); + void (*synchronize_pre_loadvm)(CPUState *cpu); + void (*synchronize_pre_resume)(bool step_pending); + + void (*handle_interrupt)(CPUState *cpu, int mask); + + /** + * @get_virtual_clock: fetch virtual clock + * @set_virtual_clock: set virtual clock + * + * These allow the timer subsystem to defer to the accelerator to + * fetch time. The set function is needed if the accelerator wants + * to track the changes to time as the timer is warped through + * various timer events. + */ + int64_t (*get_virtual_clock)(void); + void (*set_virtual_clock)(int64_t time); + + int64_t (*get_elapsed_ticks)(void); + + /* gdbstub hooks */ + bool (*supports_guest_debug)(void); + int (*update_guest_debug)(CPUState *cpu); + int (*insert_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len); + int (*remove_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len); + void (*remove_all_breakpoints)(CPUState *cpu); +}; + +#endif /* ACCEL_OPS_H */ diff --git a/include/system/arch_init.h b/include/system/arch_init.h new file mode 100644 index 0000000000..5b1c1026f3 --- /dev/null +++ b/include/system/arch_init.h @@ -0,0 +1,32 @@ +#ifndef QEMU_ARCH_INIT_H +#define QEMU_ARCH_INIT_H + + +enum { + QEMU_ARCH_ALL = -1, + QEMU_ARCH_ALPHA = (1 << 0), + QEMU_ARCH_ARM = (1 << 1), + QEMU_ARCH_I386 = (1 << 3), + QEMU_ARCH_M68K = (1 << 4), + QEMU_ARCH_MICROBLAZE = (1 << 6), + QEMU_ARCH_MIPS = (1 << 7), + QEMU_ARCH_PPC = (1 << 8), + QEMU_ARCH_S390X = (1 << 9), + QEMU_ARCH_SH4 = (1 << 10), + QEMU_ARCH_SPARC = (1 << 11), + QEMU_ARCH_XTENSA = (1 << 12), + QEMU_ARCH_OPENRISC = (1 << 13), + QEMU_ARCH_TRICORE = (1 << 16), + QEMU_ARCH_HPPA = (1 << 18), + QEMU_ARCH_RISCV = (1 << 19), + QEMU_ARCH_RX = (1 << 20), + QEMU_ARCH_AVR = (1 << 21), + QEMU_ARCH_HEXAGON = (1 << 22), + QEMU_ARCH_LOONGARCH = (1 << 23), +}; + +extern const uint32_t arch_type; + +void qemu_init_arch_modules(void); + +#endif diff --git a/include/system/balloon.h b/include/system/balloon.h new file mode 100644 index 0000000000..867687b73a --- /dev/null +++ b/include/system/balloon.h @@ -0,0 +1,27 @@ +/* + * Balloon + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_BALLOON_H +#define QEMU_BALLOON_H + +#include "exec/cpu-common.h" +#include "qapi/qapi-types-machine.h" + +typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target); +typedef void (QEMUBalloonStatus)(void *opaque, BalloonInfo *info); + +int qemu_add_balloon_handler(QEMUBalloonEvent *event_func, + QEMUBalloonStatus *stat_func, void *opaque); +void qemu_remove_balloon_handler(void *opaque); + +#endif diff --git a/include/system/block-backend-common.h b/include/system/block-backend-common.h new file mode 100644 index 0000000000..780cea7305 --- /dev/null +++ b/include/system/block-backend-common.h @@ -0,0 +1,103 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_COMMON_H +#define BLOCK_BACKEND_COMMON_H + +#include "qemu/iov.h" +#include "block/throttle-groups.h" + +/* + * TODO Have to include block/block.h for a bunch of block layer + * types. Unfortunately, this pulls in the whole BlockDriverState + * API, which we don't want used by many BlockBackend users. Some of + * the types belong here, and the rest should be split into a common + * header and one for the BlockDriverState API. + */ +#include "block/block.h" + +/* Callbacks for block device models */ +typedef struct BlockDevOps { + + /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + + /* + * Runs when virtual media changed (monitor commands eject, change) + * Argument load is true on load and false on eject. + * Beware: doesn't run when a host device's physical media + * changes. Sure would be useful if it did. + * Device models with removable media must implement this callback. + */ + void (*change_media_cb)(void *opaque, bool load, Error **errp); + /* + * Runs when an eject request is issued from the monitor, the tray + * is closed, and the medium is locked. + * Device models that do not implement is_medium_locked will not need + * this callback. Device models that can lock the medium or tray might + * want to implement the callback and unlock the tray when "force" is + * true, even if they do not support eject requests. + */ + void (*eject_request_cb)(void *opaque, bool force); + + /* + * Is the virtual medium locked into the device? + * Device models implement this only when device has such a lock. + */ + bool (*is_medium_locked)(void *opaque); + + /* + * Runs when the backend receives a drain request. + */ + void (*drained_begin)(void *opaque); + /* + * Runs when the backend's last drain request ends. + */ + void (*drained_end)(void *opaque); + /* + * Is the device still busy? + */ + bool (*drained_poll)(void *opaque); + + /* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + + /* + * Is the virtual tray open? + * Device models implement this only when the device has a tray. + */ + bool (*is_tray_open)(void *opaque); + + /* + * Runs when the size changed (e.g. monitor command block_resize) + */ + void (*resize_cb)(void *opaque); +} BlockDevOps; + +/* + * This struct is embedded in (the private) BlockBackend struct and contains + * fields that must be public. This is in particular for QLIST_ENTRY() and + * friends so that BlockBackends can be kept in lists outside block-backend.c + */ +typedef struct BlockBackendPublic { + ThrottleGroupMember throttle_group_member; +} BlockBackendPublic; + +#endif /* BLOCK_BACKEND_COMMON_H */ diff --git a/include/system/block-backend-global-state.h b/include/system/block-backend-global-state.h new file mode 100644 index 0000000000..9cc9b008ec --- /dev/null +++ b/include/system/block-backend-global-state.h @@ -0,0 +1,125 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_GLOBAL_STATE_H +#define BLOCK_BACKEND_GLOBAL_STATE_H + +#include "block-backend-common.h" + +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm); + +BlockBackend * no_coroutine_fn +blk_new_with_bs(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm, + Error **errp); + +BlockBackend * coroutine_fn no_co_wrapper +blk_co_new_with_bs(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm, + Error **errp); + +BlockBackend * no_coroutine_fn +blk_new_open(const char *filename, const char *reference, QDict *options, + int flags, Error **errp); + +BlockBackend * coroutine_fn no_co_wrapper +blk_co_new_open(const char *filename, const char *reference, QDict *options, + int flags, Error **errp); + +int blk_get_refcnt(BlockBackend *blk); +void blk_ref(BlockBackend *blk); + +void no_coroutine_fn blk_unref(BlockBackend *blk); +void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk); + +void blk_remove_all_bs(void); +BlockBackend *blk_by_name(const char *name); +BlockBackend *blk_next(BlockBackend *blk); +BlockBackend *blk_all_next(BlockBackend *blk); +bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp); +void monitor_remove_blk(BlockBackend *blk); + +BlockBackendPublic *blk_get_public(BlockBackend *blk); + +void blk_remove_bs(BlockBackend *blk); +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); +int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp); +bool GRAPH_RDLOCK bdrv_has_blk(BlockDriverState *bs); +bool GRAPH_RDLOCK bdrv_is_root_node(BlockDriverState *bs); +int GRAPH_UNLOCKED blk_set_perm(BlockBackend *blk, uint64_t perm, + uint64_t shared_perm, Error **errp); +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); + +void blk_iostatus_enable(BlockBackend *blk); +BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); +void blk_iostatus_reset(BlockBackend *blk); +int blk_attach_dev(BlockBackend *blk, DeviceState *dev); +void blk_detach_dev(BlockBackend *blk, DeviceState *dev); +DeviceState *blk_get_attached_dev(BlockBackend *blk); +BlockBackend *blk_by_dev(void *dev); +BlockBackend *blk_by_qdev_id(const char *id, Error **errp); +void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); + +int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); +void blk_aio_cancel(BlockAIOCB *acb); +int blk_commit_all(void); +bool blk_in_drain(BlockBackend *blk); +void blk_drain(BlockBackend *blk); +void blk_drain_all(void); +void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +bool blk_supports_write_perm(BlockBackend *blk); +bool blk_is_sg(BlockBackend *blk); +void blk_set_enable_write_cache(BlockBackend *blk, bool wce); +int blk_get_flags(BlockBackend *blk); +bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp); +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + Error **errp); +void blk_add_aio_context_notifier(BlockBackend *blk, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque); +void blk_remove_aio_context_notifier(BlockBackend *blk, + void (*attached_aio_context)(AioContext *, + void *), + void (*detach_aio_context)(void *), + void *opaque); +void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify); +BlockBackendRootState *blk_get_root_state(BlockBackend *blk); +void blk_update_root_state(BlockBackend *blk); +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk); +int blk_get_open_flags_from_root_state(BlockBackend *blk); + +int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size); +int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); +int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); +int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); + +void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg); +void blk_io_limits_disable(BlockBackend *blk); +void blk_io_limits_enable(BlockBackend *blk, const char *group); +void blk_io_limits_update_group(BlockBackend *blk, const char *group); +void blk_set_force_allow_inactivate(BlockBackend *blk); + +bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp); +void blk_unregister_buf(BlockBackend *blk, void *host, size_t size); + +const BdrvChild *blk_root(BlockBackend *blk); + +int blk_make_empty(BlockBackend *blk, Error **errp); + +#endif /* BLOCK_BACKEND_GLOBAL_STATE_H */ diff --git a/include/system/block-backend-io.h b/include/system/block-backend-io.h new file mode 100644 index 0000000000..d174275a5c --- /dev/null +++ b/include/system/block-backend-io.h @@ -0,0 +1,230 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_IO_H +#define BLOCK_BACKEND_IO_H + +#include "block-backend-common.h" +#include "block/accounting.h" + +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + +const char *blk_name(const BlockBackend *blk); + +BlockDriverState *blk_bs(BlockBackend *blk); + +void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); +void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); +void blk_set_disable_request_queuing(BlockBackend *blk, bool disable); +bool blk_iostatus_is_enabled(const BlockBackend *blk); + +char *blk_get_attached_dev_id(BlockBackend *blk); + +BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); + +BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_flush(BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset, + unsigned int *nr_zones, + BlockZoneDescriptor *zones, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op, + int64_t offset, int64_t len, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes, + BlockCompletionFunc *cb, void *opaque); +void blk_aio_cancel_async(BlockAIOCB *acb); +BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); + +void blk_inc_in_flight(BlockBackend *blk); +void blk_dec_in_flight(BlockBackend *blk); + +bool coroutine_fn GRAPH_RDLOCK blk_co_is_inserted(BlockBackend *blk); +bool co_wrapper_mixed_bdrv_rdlock blk_is_inserted(BlockBackend *blk); + +bool coroutine_fn GRAPH_RDLOCK blk_co_is_available(BlockBackend *blk); +bool co_wrapper_mixed_bdrv_rdlock blk_is_available(BlockBackend *blk); + +void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked); +void co_wrapper blk_lock_medium(BlockBackend *blk, bool locked); + +void coroutine_fn blk_co_eject(BlockBackend *blk, bool eject_flag); +void co_wrapper blk_eject(BlockBackend *blk, bool eject_flag); + +int64_t coroutine_fn blk_co_getlength(BlockBackend *blk); +int64_t co_wrapper_mixed blk_getlength(BlockBackend *blk); + +void coroutine_fn blk_co_get_geometry(BlockBackend *blk, + uint64_t *nb_sectors_ptr); +void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); + +int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk); +int64_t blk_nb_sectors(BlockBackend *blk); + +void *blk_try_blockalign(BlockBackend *blk, size_t size); +void *blk_blockalign(BlockBackend *blk, size_t size); +bool blk_is_writable(BlockBackend *blk); +bool blk_enable_write_cache(BlockBackend *blk); +BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); +BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + int error); +void blk_error_action(BlockBackend *blk, BlockErrorAction action, + bool is_read, int error); +void blk_iostatus_set_err(BlockBackend *blk, int error); +int blk_get_max_iov(BlockBackend *blk); +int blk_get_max_hw_iov(BlockBackend *blk); + +AioContext *blk_get_aio_context(BlockBackend *blk); +BlockAcctStats *blk_get_stats(BlockBackend *blk); +void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret); + +uint32_t blk_get_request_alignment(BlockBackend *blk); +uint32_t blk_get_max_transfer(BlockBackend *blk); +uint64_t blk_get_max_hw_transfer(BlockBackend *blk); + +int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, + BlockBackend *blk_out, int64_t off_out, + int64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + +int coroutine_fn blk_co_block_status_above(BlockBackend *blk, + BlockDriverState *base, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file); +int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk, + BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum); + +/* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * + * See include/block/block-io.h for more information about + * the "I/O or GS" API. + */ + +int co_wrapper_mixed blk_pread(BlockBackend *blk, int64_t offset, + int64_t bytes, void *buf, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes, + void *buf, BdrvRequestFlags flags); + +int co_wrapper_mixed blk_preadv(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); + +int co_wrapper_mixed blk_preadv_part(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset, + BdrvRequestFlags flags); +int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset, BdrvRequestFlags flags); + +int co_wrapper_mixed blk_pwrite(BlockBackend *blk, int64_t offset, + int64_t bytes, const void *buf, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes, + const void *buf, BdrvRequestFlags flags); + +int co_wrapper_mixed blk_pwritev(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); + +int co_wrapper_mixed blk_pwritev_part(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); + +int co_wrapper_mixed blk_pwrite_compressed(BlockBackend *blk, + int64_t offset, int64_t bytes, + const void *buf); +int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset, + int64_t bytes, const void *buf); + +int co_wrapper_mixed blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, BdrvRequestFlags flags); + +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset, + unsigned int *nr_zones, + BlockZoneDescriptor *zones); +int co_wrapper_mixed blk_zone_report(BlockBackend *blk, int64_t offset, + unsigned int *nr_zones, + BlockZoneDescriptor *zones); +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op, + int64_t offset, int64_t len); +int co_wrapper_mixed blk_zone_mgmt(BlockBackend *blk, BlockZoneOp op, + int64_t offset, int64_t len); +int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset, + QEMUIOVector *qiov, + BdrvRequestFlags flags); +int co_wrapper_mixed blk_zone_append(BlockBackend *blk, int64_t *offset, + QEMUIOVector *qiov, + BdrvRequestFlags flags); + +int co_wrapper_mixed blk_pdiscard(BlockBackend *blk, int64_t offset, + int64_t bytes); +int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, + int64_t bytes); + +int co_wrapper_mixed blk_flush(BlockBackend *blk); +int coroutine_fn blk_co_flush(BlockBackend *blk); + +int co_wrapper_mixed blk_ioctl(BlockBackend *blk, unsigned long int req, + void *buf); +int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req, + void *buf); + +int co_wrapper_mixed blk_truncate(BlockBackend *blk, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp); +int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp); + +#endif /* BLOCK_BACKEND_IO_H */ diff --git a/include/system/block-backend.h b/include/system/block-backend.h new file mode 100644 index 0000000000..038be9fc40 --- /dev/null +++ b/include/system/block-backend.h @@ -0,0 +1,21 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_H +#define BLOCK_BACKEND_H + +#include "block-backend-global-state.h" +#include "block-backend-io.h" + +/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */ + +#endif diff --git a/include/system/block-ram-registrar.h b/include/system/block-ram-registrar.h new file mode 100644 index 0000000000..d8b2f7942b --- /dev/null +++ b/include/system/block-ram-registrar.h @@ -0,0 +1,37 @@ +/* + * BlockBackend RAM Registrar + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef BLOCK_RAM_REGISTRAR_H +#define BLOCK_RAM_REGISTRAR_H + +#include "exec/ramlist.h" + +/** + * struct BlockRAMRegistrar: + * + * Keeps RAMBlock memory registered with a BlockBackend using + * blk_register_buf() including hotplugged memory. + * + * Emulated devices or other BlockBackend users initialize a BlockRAMRegistrar + * with blk_ram_registrar_init() before submitting I/O requests with the + * BDRV_REQ_REGISTERED_BUF flag set. + */ +typedef struct { + BlockBackend *blk; + RAMBlockNotifier notifier; + bool ok; +} BlockRAMRegistrar; + +void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk); +void blk_ram_registrar_destroy(BlockRAMRegistrar *r); + +/* Have all RAMBlocks been registered successfully? */ +static inline bool blk_ram_registrar_ok(BlockRAMRegistrar *r) +{ + return r->ok; +} + +#endif /* BLOCK_RAM_REGISTRAR_H */ diff --git a/include/system/blockdev.h b/include/system/blockdev.h new file mode 100644 index 0000000000..3211b16513 --- /dev/null +++ b/include/system/blockdev.h @@ -0,0 +1,64 @@ +/* + * QEMU host block devices + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef BLOCKDEV_H +#define BLOCKDEV_H + +#include "block/block.h" +#include "qemu/queue.h" + +typedef enum { + IF_DEFAULT = -1, /* for use with drive_add() only */ + /* + * IF_NONE must be zero, because we want MachineClass member + * block_default_type to default-initialize to IF_NONE + */ + IF_NONE = 0, + IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN, + IF_COUNT +} BlockInterfaceType; + +struct DriveInfo { + BlockInterfaceType type; + int bus; + int unit; + int auto_del; /* see blockdev_mark_auto_del() */ + bool is_default; /* Added by default_drive() ? */ + int media_cd; + QemuOpts *opts; + QTAILQ_ENTRY(DriveInfo) next; +}; + +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +void blockdev_mark_auto_del(BlockBackend *blk); +void blockdev_auto_del(BlockBackend *blk); + +DriveInfo *blk_legacy_dinfo(BlockBackend *blk); +DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo); +BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo); + +void override_max_devs(BlockInterfaceType type, int max_devs); + +DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit); +void drive_check_orphaned(void); +DriveInfo *drive_get_by_index(BlockInterfaceType type, int index); +int drive_get_max_bus(BlockInterfaceType type); + +QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file, + const char *optstr); +DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type, + Error **errp); + +#endif diff --git a/include/system/cpu-throttle.h b/include/system/cpu-throttle.h new file mode 100644 index 0000000000..44bf6a5389 --- /dev/null +++ b/include/system/cpu-throttle.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2012 SUSE LINUX Products GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * <http://www.gnu.org/licenses/gpl-2.0.html> + */ + +#ifndef SYSTEM_CPU_THROTTLE_H +#define SYSTEM_CPU_THROTTLE_H + +#include "qemu/timer.h" + +/** + * cpu_throttle_init: + * + * Initialize the CPU throttling API. + */ +void cpu_throttle_init(void); + +/** + * cpu_throttle_set: + * @new_throttle_pct: Percent of sleep time. Valid range is 1 to 99. + * + * Throttles all vcpus by forcing them to sleep for the given percentage of + * time. A throttle_percentage of 25 corresponds to a 75% duty cycle roughly. + * (example: 10ms sleep for every 30ms awake). + * + * cpu_throttle_set can be called as needed to adjust new_throttle_pct. + * Once the throttling starts, it will remain in effect until cpu_throttle_stop + * is called. + */ +void cpu_throttle_set(int new_throttle_pct); + +/** + * cpu_throttle_stop: + * + * Stops the vcpu throttling started by cpu_throttle_set. + */ +void cpu_throttle_stop(void); + +/** + * cpu_throttle_active: + * + * Returns: %true if the vcpus are currently being throttled, %false otherwise. + */ +bool cpu_throttle_active(void); + +/** + * cpu_throttle_get_percentage: + * + * Returns the vcpu throttle percentage. See cpu_throttle_set for details. + * + * Returns: The throttle percentage in range 1 to 99. + */ +int cpu_throttle_get_percentage(void); + +/** + * cpu_throttle_dirty_sync_timer_tick: + * + * Dirty sync timer hook. + */ +void cpu_throttle_dirty_sync_timer_tick(void *opaque); + +/** + * cpu_throttle_dirty_sync_timer: + * + * Start or stop the dirty sync timer. + */ +void cpu_throttle_dirty_sync_timer(bool enable); + +#endif /* SYSTEM_CPU_THROTTLE_H */ diff --git a/include/system/cpu-timers-internal.h b/include/system/cpu-timers-internal.h new file mode 100644 index 0000000000..94bb7394c5 --- /dev/null +++ b/include/system/cpu-timers-internal.h @@ -0,0 +1,71 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TIMERS_STATE_H +#define TIMERS_STATE_H + +/* timers state, for sharing between icount and cpu-timers */ + +typedef struct TimersState { + /* Protected by BQL. */ + int64_t cpu_ticks_prev; + int64_t cpu_ticks_offset; + + /* + * Protect fields that can be respectively read outside the + * BQL, and written from multiple threads. + */ + QemuSeqLock vm_clock_seqlock; + QemuSpin vm_clock_lock; + + int16_t cpu_ticks_enabled; + + /* Conversion factor from emulated instructions to virtual clock ticks. */ + int16_t icount_time_shift; + /* Icount delta used for shift auto adjust. */ + int64_t last_delta; + + /* Compensate for varying guest execution speed. */ + aligned_int64_t qemu_icount_bias; + + int64_t vm_clock_warp_start; + int64_t cpu_clock_offset; + + /* Only written by TCG thread */ + int64_t qemu_icount; + + /* for adjusting icount */ + QEMUTimer *icount_rt_timer; + QEMUTimer *icount_vm_timer; + QEMUTimer *icount_warp_timer; +} TimersState; + +extern TimersState timers_state; + +/* + * icount needs this internal from cpu-timers when adjusting the icount shift. + */ +int64_t cpu_get_clock_locked(void); + +#endif /* TIMERS_STATE_H */ diff --git a/include/system/cpu-timers.h b/include/system/cpu-timers.h new file mode 100644 index 0000000000..64ae54f6d6 --- /dev/null +++ b/include/system/cpu-timers.h @@ -0,0 +1,104 @@ +/* + * CPU timers state API + * + * Copyright 2020 SUSE LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#ifndef SYSTEM_CPU_TIMERS_H +#define SYSTEM_CPU_TIMERS_H + +#include "qemu/timer.h" + +/* init the whole cpu timers API, including icount, ticks, and cpu_throttle */ +void cpu_timers_init(void); + +/* icount - Instruction Counter API */ + +/** + * ICountMode: icount enablement state: + * + * @ICOUNT_DISABLED: Disabled - Do not count executed instructions. + * @ICOUNT_PRECISE: Enabled - Fixed conversion of insn to ns via "shift" option + * @ICOUNT_ADAPTATIVE: Enabled - Runtime adaptive algorithm to compute shift + */ +typedef enum { + ICOUNT_DISABLED = 0, + ICOUNT_PRECISE, + ICOUNT_ADAPTATIVE, +} ICountMode; + +#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) +extern ICountMode use_icount; +#define icount_enabled() (use_icount) +#else +#define icount_enabled() ICOUNT_DISABLED +#endif + +/* + * Update the icount with the executed instructions. Called by + * cpus-tcg vCPU thread so the main-loop can see time has moved forward. + */ +void icount_update(CPUState *cpu); + +/* get raw icount value */ +int64_t icount_get_raw(void); + +/* return the virtual CPU time in ns, based on the instruction counter. */ +int64_t icount_get(void); +/* + * convert an instruction counter value to ns, based on the icount shift. + * This shift is set as a fixed value with the icount "shift" option + * (precise mode), or it is constantly approximated and corrected at + * runtime in adaptive mode. + */ +int64_t icount_to_ns(int64_t icount); + +/** + * icount_configure: configure the icount options, including "shift" + * @opts: Options to parse + * @errp: pointer to a NULL-initialized error object + * + * Return: true on success, else false setting @errp with error + */ +bool icount_configure(QemuOpts *opts, Error **errp); + +/* used by tcg vcpu thread to calc icount budget */ +int64_t icount_round(int64_t count); + +/* if the CPUs are idle, start accounting real time to virtual clock. */ +void icount_start_warp_timer(void); +void icount_account_warp_timer(void); +void icount_notify_exit(void); + +/* + * CPU Ticks and Clock + */ + +/* Caller must hold BQL */ +void cpu_enable_ticks(void); +/* Caller must hold BQL */ +void cpu_disable_ticks(void); + +/* + * return the time elapsed in VM between vm_start and vm_stop. + * cpu_get_ticks() uses units of the host CPU cycle counter. + */ +int64_t cpu_get_ticks(void); + +/* + * Returns the monotonic time elapsed in VM, i.e., + * the time between vm_start and vm_stop + */ +int64_t cpu_get_clock(void); + +void qemu_timer_notify_cb(void *opaque, QEMUClockType type); + +/* get/set VIRTUAL clock and VM elapsed ticks via the cpus accel interface */ +int64_t cpus_get_virtual_clock(void); +void cpus_set_virtual_clock(int64_t new_time); +int64_t cpus_get_elapsed_ticks(void); + +#endif /* SYSTEM_CPU_TIMERS_H */ diff --git a/include/system/cpus.h b/include/system/cpus.h new file mode 100644 index 0000000000..3d8fd368f3 --- /dev/null +++ b/include/system/cpus.h @@ -0,0 +1,53 @@ +#ifndef QEMU_CPUS_H +#define QEMU_CPUS_H + +#include "system/accel-ops.h" + +/* register accel-specific operations */ +void cpus_register_accel(const AccelOpsClass *i); + +/* return registers ops */ +const AccelOpsClass *cpus_get_accel(void); + +/* accel/dummy-cpus.c */ + +/* Create a dummy vcpu for AccelOpsClass->create_vcpu_thread */ +void dummy_start_vcpu_thread(CPUState *); + +/* interface available for cpus accelerator threads */ + +/* For temporary buffers for forming a name */ +#define VCPU_THREAD_NAME_SIZE 16 + +void cpus_kick_thread(CPUState *cpu); +bool cpu_work_list_empty(CPUState *cpu); +bool cpu_thread_is_idle(CPUState *cpu); +bool all_cpu_threads_idle(void); +bool cpu_can_run(CPUState *cpu); +void qemu_wait_io_event_common(CPUState *cpu); +void qemu_wait_io_event(CPUState *cpu); +void cpu_thread_signal_created(CPUState *cpu); +void cpu_thread_signal_destroyed(CPUState *cpu); +void cpu_handle_guest_debug(CPUState *cpu); + +/* end interface for cpus accelerator threads */ + +bool qemu_in_vcpu_thread(void); +void qemu_init_cpu_loop(void); +void resume_all_vcpus(void); +void pause_all_vcpus(void); +void cpu_stop_current(void); + +extern int icount_align_option; + +/* Unblock cpu */ +void qemu_cpu_kick_self(void); + +bool cpus_are_resettable(void); + +void cpu_synchronize_all_states(void); +void cpu_synchronize_all_post_reset(void); +void cpu_synchronize_all_post_init(void); +void cpu_synchronize_all_pre_loadvm(void); + +#endif diff --git a/include/system/cryptodev-vhost-user.h b/include/system/cryptodev-vhost-user.h new file mode 100644 index 0000000000..5138c146fa --- /dev/null +++ b/include/system/cryptodev-vhost-user.h @@ -0,0 +1,50 @@ +/* + * QEMU Crypto Device Common Vhost User Implement + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei <arei.gonglei@huawei.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef CRYPTODEV_VHOST_USER_H +#define CRYPTODEV_VHOST_USER_H + +#include "system/cryptodev-vhost.h" + +#define VHOST_USER_MAX_AUTH_KEY_LEN 512 +#define VHOST_USER_MAX_CIPHER_KEY_LEN 64 + + +/** + * cryptodev_vhost_user_get_vhost: + * @cc: the client object for each queue + * @b: the cryptodev backend common vhost object + * @queue: the queue index + * + * Gets a new cryptodev backend common vhost object based on + * @b and @queue + * + * Returns: the cryptodev backend common vhost object + */ +CryptoDevBackendVhost * +cryptodev_vhost_user_get_vhost( + CryptoDevBackendClient *cc, + CryptoDevBackend *b, + uint16_t queue); + +#endif /* CRYPTODEV_VHOST_USER_H */ diff --git a/include/system/cryptodev-vhost.h b/include/system/cryptodev-vhost.h new file mode 100644 index 0000000000..b0bb09e70a --- /dev/null +++ b/include/system/cryptodev-vhost.h @@ -0,0 +1,153 @@ +/* + * QEMU Crypto Device Common Vhost Implement + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei <arei.gonglei@huawei.com> + * Jay Zhou <jianjay.zhou@huawei.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ +#ifndef CRYPTODEV_VHOST_H +#define CRYPTODEV_VHOST_H + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-backend.h" +#include "chardev/char.h" + +#include "system/cryptodev.h" + + +typedef struct CryptoDevBackendVhostOptions { + VhostBackendType backend_type; + void *opaque; + int total_queues; + CryptoDevBackendClient *cc; +} CryptoDevBackendVhostOptions; + +typedef struct CryptoDevBackendVhost { + struct vhost_dev dev; + struct vhost_virtqueue vqs[1]; + int backend; + CryptoDevBackendClient *cc; +} CryptoDevBackendVhost; + +/** + * cryptodev_vhost_get_max_queues: + * @crypto: the cryptodev backend common vhost object + * + * Get the maximum queue number of @crypto. + * + * + * Returns: the maximum queue number + */ +uint64_t +cryptodev_vhost_get_max_queues( + CryptoDevBackendVhost *crypto); + + +/** + * cryptodev_vhost_init: + * @options: the common vhost object's option + * + * Creates a new cryptodev backend common vhost object + * + ** The returned object must be released with + * cryptodev_vhost_cleanup() when no + * longer required + * + * Returns: the cryptodev backend common vhost object + */ +struct CryptoDevBackendVhost * +cryptodev_vhost_init( + CryptoDevBackendVhostOptions *options); + +/** + * cryptodev_vhost_cleanup: + * @crypto: the cryptodev backend common vhost object + * + * Clean the resource associated with @crypto that realizaed + * by cryptodev_vhost_init() + * + */ +void cryptodev_vhost_cleanup( + CryptoDevBackendVhost *crypto); + +/** + * cryptodev_get_vhost: + * @cc: the client object for each queue + * @b: the cryptodev backend common vhost object + * @queue: the cryptodev backend queue index + * + * Gets a new cryptodev backend common vhost object based on + * @b and @queue + * + * Returns: the cryptodev backend common vhost object + */ +CryptoDevBackendVhost * +cryptodev_get_vhost(CryptoDevBackendClient *cc, + CryptoDevBackend *b, + uint16_t queue); +/** + * cryptodev_vhost_start: + * @dev: the virtio crypto object + * @total_queues: the total count of queue + * + * Starts the vhost crypto logic + * + * Returns: 0 for success, negative for errors + */ +int cryptodev_vhost_start(VirtIODevice *dev, int total_queues); + +/** + * cryptodev_vhost_stop: + * @dev: the virtio crypto object + * @total_queues: the total count of queue + * + * Stops the vhost crypto logic + * + */ +void cryptodev_vhost_stop(VirtIODevice *dev, int total_queues); + +/** + * cryptodev_vhost_virtqueue_mask: + * @dev: the virtio crypto object + * @queue: the cryptodev backend queue index + * @idx: the virtqueue index + * @mask: mask or not (true or false) + * + * Mask/unmask events for @idx virtqueue on @dev device + * + */ +void cryptodev_vhost_virtqueue_mask(VirtIODevice *dev, + int queue, + int idx, bool mask); + +/** + * cryptodev_vhost_virtqueue_pending: + * @dev: the virtio crypto object + * @queue: the cryptodev backend queue index + * @idx: the virtqueue index + * + * Test and clear event pending status for @idx virtqueue on @dev device. + * Should be called after unmask to avoid losing events. + * + * Returns: true for success, false for errors + */ +bool cryptodev_vhost_virtqueue_pending(VirtIODevice *dev, + int queue, int idx); + +#endif /* CRYPTODEV_VHOST_H */ diff --git a/include/system/cryptodev.h b/include/system/cryptodev.h new file mode 100644 index 0000000000..b20822df0d --- /dev/null +++ b/include/system/cryptodev.h @@ -0,0 +1,447 @@ +/* + * QEMU Crypto Device Implementation + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei <arei.gonglei@huawei.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ +#ifndef CRYPTODEV_H +#define CRYPTODEV_H + +#include "qemu/queue.h" +#include "qemu/throttle.h" +#include "qom/object.h" +#include "qapi/qapi-types-cryptodev.h" + +/** + * CryptoDevBackend: + * + * The CryptoDevBackend object is an interface + * for different cryptodev backends, which provides crypto + * operation wrapper. + * + */ + +#define TYPE_CRYPTODEV_BACKEND "cryptodev-backend" + +OBJECT_DECLARE_TYPE(CryptoDevBackend, CryptoDevBackendClass, + CRYPTODEV_BACKEND) + + +#define MAX_CRYPTO_QUEUE_NUM 64 + +typedef struct CryptoDevBackendConf CryptoDevBackendConf; +typedef struct CryptoDevBackendPeers CryptoDevBackendPeers; +typedef struct CryptoDevBackendClient + CryptoDevBackendClient; + +/** + * CryptoDevBackendSymSessionInfo: + * + * @cipher_alg: algorithm type of CIPHER + * @key_len: byte length of cipher key + * @hash_alg: algorithm type of HASH/MAC + * @hash_result_len: byte length of HASH operation result + * @auth_key_len: byte length of authenticated key + * @add_len: byte length of additional authenticated data + * @op_type: operation type (refer to virtio_crypto.h) + * @direction: encryption or direction for CIPHER + * @hash_mode: HASH mode for HASH operation (refer to virtio_crypto.h) + * @alg_chain_order: order of algorithm chaining (CIPHER then HASH, + * or HASH then CIPHER) + * @cipher_key: point to a key of CIPHER + * @auth_key: point to an authenticated key of MAC + * + */ +typedef struct CryptoDevBackendSymSessionInfo { + /* corresponding with virtio crypto spec */ + uint32_t cipher_alg; + uint32_t key_len; + uint32_t hash_alg; + uint32_t hash_result_len; + uint32_t auth_key_len; + uint32_t add_len; + uint8_t op_type; + uint8_t direction; + uint8_t hash_mode; + uint8_t alg_chain_order; + uint8_t *cipher_key; + uint8_t *auth_key; +} CryptoDevBackendSymSessionInfo; + +/** + * CryptoDevBackendAsymSessionInfo: + */ +typedef struct CryptoDevBackendRsaPara { + uint32_t padding_algo; + uint32_t hash_algo; +} CryptoDevBackendRsaPara; + +typedef struct CryptoDevBackendAsymSessionInfo { + /* corresponding with virtio crypto spec */ + uint32_t algo; + uint32_t keytype; + uint32_t keylen; + uint8_t *key; + union { + CryptoDevBackendRsaPara rsa; + } u; +} CryptoDevBackendAsymSessionInfo; + +typedef struct CryptoDevBackendSessionInfo { + uint32_t op_code; + union { + CryptoDevBackendSymSessionInfo sym_sess_info; + CryptoDevBackendAsymSessionInfo asym_sess_info; + } u; + uint64_t session_id; +} CryptoDevBackendSessionInfo; + +/** + * CryptoDevBackendSymOpInfo: + * + * @aad_len: byte length of additional authenticated data + * @iv_len: byte length of initialization vector or counter + * @src_len: byte length of source data + * @dst_len: byte length of destination data + * @digest_result_len: byte length of hash digest result + * @hash_start_src_offset: Starting point for hash processing, specified + * as number of bytes from start of packet in source data, only used for + * algorithm chain + * @cipher_start_src_offset: Starting point for cipher processing, specified + * as number of bytes from start of packet in source data, only used for + * algorithm chain + * @len_to_hash: byte length of source data on which the hash + * operation will be computed, only used for algorithm chain + * @len_to_cipher: byte length of source data on which the cipher + * operation will be computed, only used for algorithm chain + * @op_type: operation type (refer to virtio_crypto.h) + * @iv: point to the initialization vector or counter + * @src: point to the source data + * @dst: point to the destination data + * @aad_data: point to the additional authenticated data + * @digest_result: point to the digest result data + * @data[0]: point to the extensional memory by one memory allocation + * + */ +typedef struct CryptoDevBackendSymOpInfo { + uint32_t aad_len; + uint32_t iv_len; + uint32_t src_len; + uint32_t dst_len; + uint32_t digest_result_len; + uint32_t hash_start_src_offset; + uint32_t cipher_start_src_offset; + uint32_t len_to_hash; + uint32_t len_to_cipher; + uint8_t op_type; + uint8_t *iv; + uint8_t *src; + uint8_t *dst; + uint8_t *aad_data; + uint8_t *digest_result; + uint8_t data[]; +} CryptoDevBackendSymOpInfo; + + +/** + * CryptoDevBackendAsymOpInfo: + * + * @src_len: byte length of source data + * @dst_len: byte length of destination data + * @src: point to the source data + * @dst: point to the destination data + * + */ +typedef struct CryptoDevBackendAsymOpInfo { + uint32_t src_len; + uint32_t dst_len; + uint8_t *src; + uint8_t *dst; +} CryptoDevBackendAsymOpInfo; + +typedef void (*CryptoDevCompletionFunc) (void *opaque, int ret); + +typedef struct CryptoDevBackendOpInfo { + QCryptodevBackendAlgoType algtype; + uint32_t op_code; + uint32_t queue_index; + CryptoDevCompletionFunc cb; + void *opaque; /* argument for cb */ + uint64_t session_id; + union { + CryptoDevBackendSymOpInfo *sym_op_info; + CryptoDevBackendAsymOpInfo *asym_op_info; + } u; + QTAILQ_ENTRY(CryptoDevBackendOpInfo) next; +} CryptoDevBackendOpInfo; + +struct CryptoDevBackendClass { + ObjectClass parent_class; + + void (*init)(CryptoDevBackend *backend, Error **errp); + void (*cleanup)(CryptoDevBackend *backend, Error **errp); + + int (*create_session)(CryptoDevBackend *backend, + CryptoDevBackendSessionInfo *sess_info, + uint32_t queue_index, + CryptoDevCompletionFunc cb, + void *opaque); + + int (*close_session)(CryptoDevBackend *backend, + uint64_t session_id, + uint32_t queue_index, + CryptoDevCompletionFunc cb, + void *opaque); + + int (*do_op)(CryptoDevBackend *backend, + CryptoDevBackendOpInfo *op_info); +}; + +struct CryptoDevBackendClient { + QCryptodevBackendType type; + char *info_str; + unsigned int queue_index; + int vring_enable; + QTAILQ_ENTRY(CryptoDevBackendClient) next; +}; + +struct CryptoDevBackendPeers { + CryptoDevBackendClient *ccs[MAX_CRYPTO_QUEUE_NUM]; + uint32_t queues; +}; + +struct CryptoDevBackendConf { + CryptoDevBackendPeers peers; + + /* Supported service mask */ + uint32_t crypto_services; + + /* Detailed algorithms mask */ + uint32_t cipher_algo_l; + uint32_t cipher_algo_h; + uint32_t hash_algo; + uint32_t mac_algo_l; + uint32_t mac_algo_h; + uint32_t aead_algo; + uint32_t akcipher_algo; + /* Maximum length of cipher key */ + uint32_t max_cipher_key_len; + /* Maximum length of authenticated key */ + uint32_t max_auth_key_len; + /* Maximum size of each crypto request's content */ + uint64_t max_size; +}; + +typedef struct CryptodevBackendSymStat { + int64_t encrypt_ops; + int64_t decrypt_ops; + int64_t encrypt_bytes; + int64_t decrypt_bytes; +} CryptodevBackendSymStat; + +typedef struct CryptodevBackendAsymStat { + int64_t encrypt_ops; + int64_t decrypt_ops; + int64_t sign_ops; + int64_t verify_ops; + int64_t encrypt_bytes; + int64_t decrypt_bytes; + int64_t sign_bytes; + int64_t verify_bytes; +} CryptodevBackendAsymStat; + +struct CryptoDevBackend { + Object parent_obj; + + bool ready; + /* Tag the cryptodev backend is used by virtio-crypto or not */ + bool is_used; + CryptoDevBackendConf conf; + CryptodevBackendSymStat *sym_stat; + CryptodevBackendAsymStat *asym_stat; + + ThrottleState ts; + ThrottleTimers tt; + ThrottleConfig tc; + QTAILQ_HEAD(, CryptoDevBackendOpInfo) opinfos; +}; + +#define CryptodevSymStatInc(be, op, bytes) do { \ + be->sym_stat->op##_bytes += (bytes); \ + be->sym_stat->op##_ops += 1; \ +} while (/*CONSTCOND*/0) + +#define CryptodevSymStatIncEncrypt(be, bytes) \ + CryptodevSymStatInc(be, encrypt, bytes) + +#define CryptodevSymStatIncDecrypt(be, bytes) \ + CryptodevSymStatInc(be, decrypt, bytes) + +#define CryptodevAsymStatInc(be, op, bytes) do { \ + be->asym_stat->op##_bytes += (bytes); \ + be->asym_stat->op##_ops += 1; \ +} while (/*CONSTCOND*/0) + +#define CryptodevAsymStatIncEncrypt(be, bytes) \ + CryptodevAsymStatInc(be, encrypt, bytes) + +#define CryptodevAsymStatIncDecrypt(be, bytes) \ + CryptodevAsymStatInc(be, decrypt, bytes) + +#define CryptodevAsymStatIncSign(be, bytes) \ + CryptodevAsymStatInc(be, sign, bytes) + +#define CryptodevAsymStatIncVerify(be, bytes) \ + CryptodevAsymStatInc(be, verify, bytes) + + +/** + * cryptodev_backend_new_client: + * + * Creates a new cryptodev backend client object. + * + * The returned object must be released with + * cryptodev_backend_free_client() when no + * longer required + * + * Returns: a new cryptodev backend client object + */ +CryptoDevBackendClient *cryptodev_backend_new_client(void); + +/** + * cryptodev_backend_free_client: + * @cc: the cryptodev backend client object + * + * Release the memory associated with @cc that + * was previously allocated by cryptodev_backend_new_client() + */ +void cryptodev_backend_free_client( + CryptoDevBackendClient *cc); + +/** + * cryptodev_backend_cleanup: + * @backend: the cryptodev backend object + * @errp: pointer to a NULL-initialized error object + * + * Clean the resource associated with @backend that realizaed + * by the specific backend's init() callback + */ +void cryptodev_backend_cleanup( + CryptoDevBackend *backend, + Error **errp); + +/** + * cryptodev_backend_create_session: + * @backend: the cryptodev backend object + * @sess_info: parameters needed by session creating + * @queue_index: queue index of cryptodev backend client + * @errp: pointer to a NULL-initialized error object + * @cb: callback when session create is compeleted + * @opaque: parameter passed to callback + * + * Create a session for symmetric/asymmetric algorithms + * + * Returns: 0 for success and cb will be called when creation is completed, + * negative value for error, and cb will not be called. + */ +int cryptodev_backend_create_session( + CryptoDevBackend *backend, + CryptoDevBackendSessionInfo *sess_info, + uint32_t queue_index, + CryptoDevCompletionFunc cb, + void *opaque); + +/** + * cryptodev_backend_close_session: + * @backend: the cryptodev backend object + * @session_id: the session id + * @queue_index: queue index of cryptodev backend client + * @errp: pointer to a NULL-initialized error object + * @cb: callback when session create is compeleted + * @opaque: parameter passed to callback + * + * Close a session for which was previously + * created by cryptodev_backend_create_session() + * + * Returns: 0 for success and cb will be called when creation is completed, + * negative value for error, and cb will not be called. + */ +int cryptodev_backend_close_session( + CryptoDevBackend *backend, + uint64_t session_id, + uint32_t queue_index, + CryptoDevCompletionFunc cb, + void *opaque); + +/** + * cryptodev_backend_crypto_operation: + * @backend: the cryptodev backend object + * @op_info: pointer to a CryptoDevBackendOpInfo object + * + * Do crypto operation, such as encryption, decryption, signature and + * verification + * + * Returns: 0 for success and cb will be called when creation is completed, + * negative value for error, and cb will not be called. + */ +int cryptodev_backend_crypto_operation( + CryptoDevBackend *backend, + CryptoDevBackendOpInfo *op_info); + +/** + * cryptodev_backend_set_used: + * @backend: the cryptodev backend object + * @used: true or false + * + * Set the cryptodev backend is used by virtio-crypto or not + */ +void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used); + +/** + * cryptodev_backend_is_used: + * @backend: the cryptodev backend object + * + * Return the status that the cryptodev backend is used + * by virtio-crypto or not + * + * Returns: true on used, or false on not used + */ +bool cryptodev_backend_is_used(CryptoDevBackend *backend); + +/** + * cryptodev_backend_set_ready: + * @backend: the cryptodev backend object + * @ready: true or false + * + * Set the cryptodev backend is ready or not, which is called + * by the children of the cryptodev banckend interface. + */ +void cryptodev_backend_set_ready(CryptoDevBackend *backend, bool ready); + +/** + * cryptodev_backend_is_ready: + * @backend: the cryptodev backend object + * + * Return the status that the cryptodev backend is ready or not + * + * Returns: true on ready, or false on not ready + */ +bool cryptodev_backend_is_ready(CryptoDevBackend *backend); + +#endif /* CRYPTODEV_H */ diff --git a/include/system/device_tree.h b/include/system/device_tree.h new file mode 100644 index 0000000000..eb601522f8 --- /dev/null +++ b/include/system/device_tree.h @@ -0,0 +1,215 @@ +/* + * Header with function prototypes to help device tree manipulation using + * libfdt. It also provides functions to read entries from device tree proc + * interface. + * + * Copyright 2008 IBM Corporation. + * Authors: Jerone Young <jyoung5@us.ibm.com> + * Hollis Blanchard <hollisb@us.ibm.com> + * + * This work is licensed under the GNU GPL license version 2 or later. + * + */ + +#ifndef DEVICE_TREE_H +#define DEVICE_TREE_H + +void *create_device_tree(int *sizep); +void *load_device_tree(const char *filename_path, int *sizep); +#ifdef CONFIG_LINUX +/** + * load_device_tree_from_sysfs: reads the device tree information in the + * /proc/device-tree directory and return the corresponding binary blob + * buffer pointer. Asserts in case of error. + */ +void *load_device_tree_from_sysfs(void); +#endif + +/** + * qemu_fdt_node_path: return the paths of nodes matching a given + * name and compat string + * @fdt: pointer to the dt blob + * @name: node name + * @compat: compatibility string + * @errp: handle to an error object + * + * returns a newly allocated NULL-terminated array of node paths. + * Use g_strfreev() to free it. If one or more nodes were found, the + * array contains the path of each node and the last element equals to + * NULL. If there is no error but no matching node was found, the + * returned array contains a single element equal to NULL. If an error + * was encountered when parsing the blob, the function returns NULL + * + * @name may be NULL to wildcard names and only match compatibility + * strings. + */ +char **qemu_fdt_node_path(void *fdt, const char *name, const char *compat, + Error **errp); + +/** + * qemu_fdt_node_unit_path: return the paths of nodes matching a given + * node-name, ie. node-name and node-name@unit-address + * @fdt: pointer to the dt blob + * @name: node name + * @errp: handle to an error object + * + * returns a newly allocated NULL-terminated array of node paths. + * Use g_strfreev() to free it. If one or more nodes were found, the + * array contains the path of each node and the last element equals to + * NULL. If there is no error but no matching node was found, the + * returned array contains a single element equal to NULL. If an error + * was encountered when parsing the blob, the function returns NULL + */ +char **qemu_fdt_node_unit_path(void *fdt, const char *name, Error **errp); + +int qemu_fdt_setprop(void *fdt, const char *node_path, + const char *property, const void *val, int size); +int qemu_fdt_setprop_cell(void *fdt, const char *node_path, + const char *property, uint32_t val); +int qemu_fdt_setprop_u64(void *fdt, const char *node_path, + const char *property, uint64_t val); +int qemu_fdt_setprop_string(void *fdt, const char *node_path, + const char *property, const char *string); + +/** + * qemu_fdt_setprop_string_array: set a string array property + * + * @fdt: pointer to the dt blob + * @name: node name + * @prop: property array + * @array: pointer to an array of string pointers + * @len: length of array + * + * assigns a string array to a property. This function converts and + * array of strings to a sequential string with \0 separators before + * setting the property. + */ +int qemu_fdt_setprop_string_array(void *fdt, const char *node_path, + const char *prop, char **array, int len); + +int qemu_fdt_setprop_phandle(void *fdt, const char *node_path, + const char *property, + const char *target_node_path); +/** + * qemu_fdt_getprop: retrieve the value of a given property + * @fdt: pointer to the device tree blob + * @node_path: node path + * @property: name of the property to find + * @lenp: fdt error if any or length of the property on success + * @errp: handle to an error object + * + * returns a pointer to the property on success and NULL on failure + */ +const void *qemu_fdt_getprop(void *fdt, const char *node_path, + const char *property, int *lenp, + Error **errp); +/** + * qemu_fdt_getprop_cell: retrieve the value of a given 4 byte property + * @fdt: pointer to the device tree blob + * @node_path: node path + * @property: name of the property to find + * @lenp: fdt error if any or -EINVAL if the property size is different from + * 4 bytes, or 4 (expected length of the property) upon success. + * @errp: handle to an error object + * + * returns the property value on success + */ +uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path, + const char *property, int *lenp, + Error **errp); +uint32_t qemu_fdt_get_phandle(void *fdt, const char *path); +uint32_t qemu_fdt_alloc_phandle(void *fdt); +int qemu_fdt_nop_node(void *fdt, const char *node_path); +int qemu_fdt_add_subnode(void *fdt, const char *name); +int qemu_fdt_add_path(void *fdt, const char *path); + +#define qemu_fdt_setprop_cells(fdt, node_path, property, ...) \ + do { \ + uint32_t qdt_tmp[] = { __VA_ARGS__ }; \ + for (unsigned i_ = 0; i_ < ARRAY_SIZE(qdt_tmp); i_++) { \ + qdt_tmp[i_] = cpu_to_be32(qdt_tmp[i_]); \ + } \ + qemu_fdt_setprop(fdt, node_path, property, qdt_tmp, \ + sizeof(qdt_tmp)); \ + } while (0) + +void qemu_fdt_dumpdtb(void *fdt, int size); + +/** + * qemu_fdt_setprop_sized_cells_from_array: + * @fdt: device tree blob + * @node_path: node to set property on + * @property: property to set + * @numvalues: number of values + * @values: array of number-of-cells, value pairs + * + * Set the specified property on the specified node in the device tree + * to be an array of cells. The values of the cells are specified via + * the values list, which alternates between "number of cells used by + * this value" and "value". + * number-of-cells must be either 1 or 2 (other values will result in + * an error being returned). If a value is too large to fit in the + * number of cells specified for it, an error is returned. + * + * This function is useful because device tree nodes often have cell arrays + * which are either lists of addresses or lists of address,size tuples, but + * the number of cells used for each element vary depending on the + * #address-cells and #size-cells properties of their parent node. + * If you know all your cell elements are one cell wide you can use the + * simpler qemu_fdt_setprop_cells(). If you're not setting up the + * array programmatically, qemu_fdt_setprop_sized_cells may be more + * convenient. + * + * Return value: 0 on success, <0 on error. + */ +int qemu_fdt_setprop_sized_cells_from_array(void *fdt, + const char *node_path, + const char *property, + int numvalues, + uint64_t *values); + +/** + * qemu_fdt_setprop_sized_cells: + * @fdt: device tree blob + * @node_path: node to set property on + * @property: property to set + * @...: list of number-of-cells, value pairs + * + * Set the specified property on the specified node in the device tree + * to be an array of cells. The values of the cells are specified via + * the variable arguments, which alternates between "number of cells + * used by this value" and "value". + * + * This is a convenience wrapper for the function + * qemu_fdt_setprop_sized_cells_from_array(). + * + * Return value: 0 on success, <0 on error. + */ +#define qemu_fdt_setprop_sized_cells(fdt, node_path, property, ...) \ + ({ \ + uint64_t qdt_tmp[] = { __VA_ARGS__ }; \ + qemu_fdt_setprop_sized_cells_from_array(fdt, node_path, \ + property, \ + ARRAY_SIZE(qdt_tmp) / 2, \ + qdt_tmp); \ + }) + + +/** + * qemu_fdt_randomize_seeds: + * @fdt: device tree blob + * + * Re-randomize all "rng-seed" properties with new seeds. + */ +void qemu_fdt_randomize_seeds(void *fdt); + +#define FDT_PCI_RANGE_RELOCATABLE 0x80000000 +#define FDT_PCI_RANGE_PREFETCHABLE 0x40000000 +#define FDT_PCI_RANGE_ALIASED 0x20000000 +#define FDT_PCI_RANGE_TYPE_MASK 0x03000000 +#define FDT_PCI_RANGE_MMIO_64BIT 0x03000000 +#define FDT_PCI_RANGE_MMIO 0x02000000 +#define FDT_PCI_RANGE_IOPORT 0x01000000 +#define FDT_PCI_RANGE_CONFIG 0x00000000 + +#endif /* DEVICE_TREE_H */ diff --git a/include/system/dirtylimit.h b/include/system/dirtylimit.h new file mode 100644 index 0000000000..d11ebbbbdb --- /dev/null +++ b/include/system/dirtylimit.h @@ -0,0 +1,39 @@ +/* + * Dirty page rate limit common functions + * + * Copyright (c) 2022 CHINA TELECOM CO.,LTD. + * + * Authors: + * Hyman Huang(黄勇) <huangy81@chinatelecom.cn> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_DIRTYRLIMIT_H +#define QEMU_DIRTYRLIMIT_H + +#define DIRTYLIMIT_CALC_TIME_MS 1000 /* 1000ms */ + +int64_t vcpu_dirty_rate_get(int cpu_index); +void vcpu_dirty_rate_stat_start(void); +void vcpu_dirty_rate_stat_stop(void); +void vcpu_dirty_rate_stat_initialize(void); +void vcpu_dirty_rate_stat_finalize(void); + +void dirtylimit_state_lock(void); +void dirtylimit_state_unlock(void); +void dirtylimit_state_initialize(void); +void dirtylimit_state_finalize(void); +bool dirtylimit_in_service(void); +bool dirtylimit_vcpu_index_valid(int cpu_index); +void dirtylimit_process(void); +void dirtylimit_change(bool start); +void dirtylimit_set_vcpu(int cpu_index, + uint64_t quota, + bool enable); +void dirtylimit_set_all(uint64_t quota, + bool enable); +void dirtylimit_vcpu_execute(CPUState *cpu); +uint64_t dirtylimit_throttle_time_per_round(void); +uint64_t dirtylimit_ring_full_time(void); +#endif diff --git a/include/system/dirtyrate.h b/include/system/dirtyrate.h new file mode 100644 index 0000000000..20813f303f --- /dev/null +++ b/include/system/dirtyrate.h @@ -0,0 +1,30 @@ +/* + * dirty page rate helper functions + * + * Copyright (c) 2022 CHINA TELECOM CO.,LTD. + * + * Authors: + * Hyman Huang(黄勇) <huangy81@chinatelecom.cn> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_DIRTYRATE_H +#define QEMU_DIRTYRATE_H + +#include "qapi/qapi-types-migration.h" + +typedef struct VcpuStat { + int nvcpu; /* number of vcpu */ + DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */ +} VcpuStat; + +int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms, + VcpuStat *stat, + unsigned int flag, + bool one_shot); + +void global_dirty_log_change(unsigned int flag, + bool start); +#endif diff --git a/include/system/dma.h b/include/system/dma.h new file mode 100644 index 0000000000..5a49a30628 --- /dev/null +++ b/include/system/dma.h @@ -0,0 +1,323 @@ +/* + * DMA helper functions + * + * Copyright (c) 2009, 2020 Red Hat + * + * This work is licensed under the terms of the GNU General Public License + * (GNU GPL), version 2 or later. + */ + +#ifndef DMA_H +#define DMA_H + +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "block/block.h" +#include "block/accounting.h" + +typedef enum { + DMA_DIRECTION_TO_DEVICE = 0, + DMA_DIRECTION_FROM_DEVICE = 1, +} DMADirection; + +/* + * When an IOMMU is present, bus addresses become distinct from + * CPU/memory physical addresses and may be a different size. Because + * the IOVA size depends more on the bus than on the platform, we more + * or less have to treat these as 64-bit always to cover all (or at + * least most) cases. + */ +typedef uint64_t dma_addr_t; + +#define DMA_ADDR_BITS 64 +#define DMA_ADDR_FMT "%" PRIx64 + +typedef struct ScatterGatherEntry ScatterGatherEntry; + +struct QEMUSGList { + ScatterGatherEntry *sg; + int nsg; + int nalloc; + dma_addr_t size; + DeviceState *dev; + AddressSpace *as; +}; + +static inline void dma_barrier(AddressSpace *as, DMADirection dir) +{ + /* + * This is called before DMA read and write operations + * unless the _relaxed form is used and is responsible + * for providing some sane ordering of accesses vs + * concurrently running VCPUs. + * + * Users of map(), unmap() or lower level st/ld_* + * operations are responsible for providing their own + * ordering via barriers. + * + * This primitive implementation does a simple smp_mb() + * before each operation which provides pretty much full + * ordering. + * + * A smarter implementation can be devised if needed to + * use lighter barriers based on the direction of the + * transfer, the DMA context, etc... + */ + smp_mb(); +} + +/* Checks that the given range of addresses is valid for DMA. This is + * useful for certain cases, but usually you should just use + * dma_memory_{read,write}() and check for errors */ +static inline bool dma_memory_valid(AddressSpace *as, + dma_addr_t addr, dma_addr_t len, + DMADirection dir, MemTxAttrs attrs) +{ + return address_space_access_valid(as, addr, len, + dir == DMA_DIRECTION_FROM_DEVICE, + attrs); +} + +static inline MemTxResult dma_memory_rw_relaxed(AddressSpace *as, + dma_addr_t addr, + void *buf, dma_addr_t len, + DMADirection dir, + MemTxAttrs attrs) +{ + return address_space_rw(as, addr, attrs, + buf, len, dir == DMA_DIRECTION_FROM_DEVICE); +} + +static inline MemTxResult dma_memory_read_relaxed(AddressSpace *as, + dma_addr_t addr, + void *buf, dma_addr_t len) +{ + return dma_memory_rw_relaxed(as, addr, buf, len, + DMA_DIRECTION_TO_DEVICE, + MEMTXATTRS_UNSPECIFIED); +} + +static inline MemTxResult dma_memory_write_relaxed(AddressSpace *as, + dma_addr_t addr, + const void *buf, + dma_addr_t len) +{ + return dma_memory_rw_relaxed(as, addr, (void *)buf, len, + DMA_DIRECTION_FROM_DEVICE, + MEMTXATTRS_UNSPECIFIED); +} + +/** + * dma_memory_rw: Read from or write to an address space from DMA controller. + * + * Return a MemTxResult indicating whether the operation succeeded + * or failed (eg unassigned memory, device rejected the transaction, + * IOMMU fault). + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @buf: buffer with the data transferred + * @len: the number of bytes to read or write + * @dir: indicates the transfer direction + * @attrs: memory transaction attributes + */ +static inline MemTxResult dma_memory_rw(AddressSpace *as, dma_addr_t addr, + void *buf, dma_addr_t len, + DMADirection dir, MemTxAttrs attrs) +{ + dma_barrier(as, dir); + + return dma_memory_rw_relaxed(as, addr, buf, len, dir, attrs); +} + +/** + * dma_memory_read: Read from an address space from DMA controller. + * + * Return a MemTxResult indicating whether the operation succeeded + * or failed (eg unassigned memory, device rejected the transaction, + * IOMMU fault). Called within RCU critical section. + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @buf: buffer with the data transferred + * @len: length of the data transferred + * @attrs: memory transaction attributes + */ +static inline MemTxResult dma_memory_read(AddressSpace *as, dma_addr_t addr, + void *buf, dma_addr_t len, + MemTxAttrs attrs) +{ + return dma_memory_rw(as, addr, buf, len, + DMA_DIRECTION_TO_DEVICE, attrs); +} + +/** + * dma_memory_write: Write to address space from DMA controller. + * + * Return a MemTxResult indicating whether the operation succeeded + * or failed (eg unassigned memory, device rejected the transaction, + * IOMMU fault). + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @buf: buffer with the data transferred + * @len: the number of bytes to write + * @attrs: memory transaction attributes + */ +static inline MemTxResult dma_memory_write(AddressSpace *as, dma_addr_t addr, + const void *buf, dma_addr_t len, + MemTxAttrs attrs) +{ + return dma_memory_rw(as, addr, (void *)buf, len, + DMA_DIRECTION_FROM_DEVICE, attrs); +} + +/** + * dma_memory_set: Fill memory with a constant byte from DMA controller. + * + * Return a MemTxResult indicating whether the operation succeeded + * or failed (eg unassigned memory, device rejected the transaction, + * IOMMU fault). + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @c: constant byte to fill the memory + * @len: the number of bytes to fill with the constant byte + * @attrs: memory transaction attributes + */ +MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr, + uint8_t c, dma_addr_t len, MemTxAttrs attrs); + +/** + * dma_memory_map: Map a physical memory region into a host virtual address. + * + * May map a subset of the requested range, given by and returned in @plen. + * May return %NULL and set *@plen to zero(0), if resources needed to perform + * the mapping are exhausted. + * Use only for reads OR writes - not for read-modify-write operations. + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @len: pointer to length of buffer; updated on return + * @dir: indicates the transfer direction + * @attrs: memory attributes + */ +static inline void *dma_memory_map(AddressSpace *as, + dma_addr_t addr, dma_addr_t *len, + DMADirection dir, MemTxAttrs attrs) +{ + hwaddr xlen = *len; + void *p; + + p = address_space_map(as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE, + attrs); + *len = xlen; + return p; +} + +/** + * dma_memory_unmap: Unmaps a memory region previously mapped by dma_memory_map() + * + * Will also mark the memory as dirty if @dir == %DMA_DIRECTION_FROM_DEVICE. + * @access_len gives the amount of memory that was actually read or written + * by the caller. + * + * @as: #AddressSpace used + * @buffer: host pointer as returned by dma_memory_map() + * @len: buffer length as returned by dma_memory_map() + * @dir: indicates the transfer direction + * @access_len: amount of data actually transferred + */ +static inline void dma_memory_unmap(AddressSpace *as, + void *buffer, dma_addr_t len, + DMADirection dir, dma_addr_t access_len) +{ + address_space_unmap(as, buffer, (hwaddr)len, + dir == DMA_DIRECTION_FROM_DEVICE, access_len); +} + +#define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \ + static inline MemTxResult ld##_lname##_##_end##_dma(AddressSpace *as, \ + dma_addr_t addr, \ + uint##_bits##_t *pval, \ + MemTxAttrs attrs) \ + { \ + MemTxResult res = dma_memory_read(as, addr, pval, (_bits) / 8, attrs); \ + _end##_bits##_to_cpus(pval); \ + return res; \ + } \ + static inline MemTxResult st##_sname##_##_end##_dma(AddressSpace *as, \ + dma_addr_t addr, \ + uint##_bits##_t val, \ + MemTxAttrs attrs) \ + { \ + val = cpu_to_##_end##_bits(val); \ + return dma_memory_write(as, addr, &val, (_bits) / 8, attrs); \ + } + +static inline MemTxResult ldub_dma(AddressSpace *as, dma_addr_t addr, + uint8_t *val, MemTxAttrs attrs) +{ + return dma_memory_read(as, addr, val, 1, attrs); +} + +static inline MemTxResult stb_dma(AddressSpace *as, dma_addr_t addr, + uint8_t val, MemTxAttrs attrs) +{ + return dma_memory_write(as, addr, &val, 1, attrs); +} + +DEFINE_LDST_DMA(uw, w, 16, le); +DEFINE_LDST_DMA(l, l, 32, le); +DEFINE_LDST_DMA(q, q, 64, le); +DEFINE_LDST_DMA(uw, w, 16, be); +DEFINE_LDST_DMA(l, l, 32, be); +DEFINE_LDST_DMA(q, q, 64, be); + +#undef DEFINE_LDST_DMA + +struct ScatterGatherEntry { + dma_addr_t base; + dma_addr_t len; +}; + +void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint, + AddressSpace *as); +void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len); +void qemu_sglist_destroy(QEMUSGList *qsg); + +typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque); + +BlockAIOCB *dma_blk_io(AioContext *ctx, + QEMUSGList *sg, uint64_t offset, uint32_t align, + DMAIOFunc *io_func, void *io_func_opaque, + BlockCompletionFunc *cb, void *opaque, DMADirection dir); +BlockAIOCB *dma_blk_read(BlockBackend *blk, + QEMUSGList *sg, uint64_t offset, uint32_t align, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *dma_blk_write(BlockBackend *blk, + QEMUSGList *sg, uint64_t offset, uint32_t align, + BlockCompletionFunc *cb, void *opaque); +MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual, + QEMUSGList *sg, MemTxAttrs attrs); +MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual, + QEMUSGList *sg, MemTxAttrs attrs); + +void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie, + QEMUSGList *sg, enum BlockAcctType type); + +/** + * dma_aligned_pow2_mask: Return the address bit mask of the largest + * power of 2 size less or equal than @end - @start + 1, aligned with @start, + * and bounded by 1 << @max_addr_bits bits. + * + * @start: range start address + * @end: range end address (greater than @start) + * @max_addr_bits: max address bits (<= 64) + */ +uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, + int max_addr_bits); + +#endif diff --git a/include/system/dump-arch.h b/include/system/dump-arch.h new file mode 100644 index 0000000000..743916e46c --- /dev/null +++ b/include/system/dump-arch.h @@ -0,0 +1,35 @@ +/* + * QEMU dump + * + * Copyright Fujitsu, Corp. 2011, 2012 + * + * Authors: + * Wen Congyang <wency@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef DUMP_ARCH_H +#define DUMP_ARCH_H + +typedef struct ArchDumpInfo { + int d_machine; /* Architecture */ + int d_endian; /* ELFDATA2LSB or ELFDATA2MSB */ + int d_class; /* ELFCLASS32 or ELFCLASS64 */ + uint32_t page_size; /* The target's page size. If it's variable and + * unknown, then this should be the maximum. */ + uint64_t phys_base; /* The target's physmem base. */ + void (*arch_sections_add_fn)(DumpState *s); + uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff); + int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff); + void (*arch_cleanup_fn)(DumpState *s); +} ArchDumpInfo; + +struct GuestPhysBlockList; /* memory_mapping.h */ +int cpu_get_dump_info(ArchDumpInfo *info, + const struct GuestPhysBlockList *guest_phys_blocks); +ssize_t cpu_get_note_size(int class, int machine, int nr_cpus); + +#endif diff --git a/include/system/dump.h b/include/system/dump.h new file mode 100644 index 0000000000..607bd7b220 --- /dev/null +++ b/include/system/dump.h @@ -0,0 +1,225 @@ +/* + * QEMU dump + * + * Copyright Fujitsu, Corp. 2011, 2012 + * + * Authors: + * Wen Congyang <wency@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef DUMP_H +#define DUMP_H + +#include "qapi/qapi-types-dump.h" +#include "qemu/thread.h" + +#define MAKEDUMPFILE_SIGNATURE "makedumpfile" +#define MAX_SIZE_MDF_HEADER (4096) /* max size of makedumpfile_header */ +#define TYPE_FLAT_HEADER (1) /* type of flattened format */ +#define VERSION_FLAT_HEADER (1) /* version of flattened format */ +#define END_FLAG_FLAT_HEADER (-1) + +#ifndef ARCH_PFN_OFFSET +#define ARCH_PFN_OFFSET (0) +#endif + +/* + * flag for compressed format + */ +#define DUMP_DH_COMPRESSED_ZLIB (0x1) +#define DUMP_DH_COMPRESSED_LZO (0x2) +#define DUMP_DH_COMPRESSED_SNAPPY (0x4) + +#define KDUMP_SIGNATURE "KDUMP " +#define SIG_LEN (sizeof(KDUMP_SIGNATURE) - 1) +#define DUMP_LEVEL (1) +#define DISKDUMP_HEADER_BLOCKS (1) + +#include "system/dump-arch.h" +#include "system/memory_mapping.h" + +typedef struct QEMU_PACKED MakedumpfileHeader { + char signature[16]; /* = "makedumpfile" */ + int64_t type; + int64_t version; +} MakedumpfileHeader; + +typedef struct QEMU_PACKED MakedumpfileDataHeader { + int64_t offset; + int64_t buf_size; +} MakedumpfileDataHeader; + +typedef struct QEMU_PACKED NewUtsname { + char sysname[65]; + char nodename[65]; + char release[65]; + char version[65]; + char machine[65]; + char domainname[65]; +} NewUtsname; + +typedef struct QEMU_PACKED DiskDumpHeader32 { + char signature[SIG_LEN]; /* = "KDUMP " */ + uint32_t header_version; /* Dump header version */ + NewUtsname utsname; /* copy of system_utsname */ + char timestamp[10]; /* Time stamp */ + uint32_t status; /* Above flags */ + uint32_t block_size; /* Size of a block in byte */ + uint32_t sub_hdr_size; /* Size of arch dependent header in block */ + uint32_t bitmap_blocks; /* Size of Memory bitmap in block */ + uint32_t max_mapnr; /* = max_mapnr , + obsoleted in header_version 6 */ + uint32_t total_ram_blocks; /* Number of blocks should be written */ + uint32_t device_blocks; /* Number of total blocks in dump device */ + uint32_t written_blocks; /* Number of written blocks */ + uint32_t current_cpu; /* CPU# which handles dump */ + uint32_t nr_cpus; /* Number of CPUs */ +} DiskDumpHeader32; + +typedef struct QEMU_PACKED DiskDumpHeader64 { + char signature[SIG_LEN]; /* = "KDUMP " */ + uint32_t header_version; /* Dump header version */ + NewUtsname utsname; /* copy of system_utsname */ + char timestamp[22]; /* Time stamp */ + uint32_t status; /* Above flags */ + uint32_t block_size; /* Size of a block in byte */ + uint32_t sub_hdr_size; /* Size of arch dependent header in block */ + uint32_t bitmap_blocks; /* Size of Memory bitmap in block */ + uint32_t max_mapnr; /* = max_mapnr, + obsoleted in header_version 6 */ + uint32_t total_ram_blocks; /* Number of blocks should be written */ + uint32_t device_blocks; /* Number of total blocks in dump device */ + uint32_t written_blocks; /* Number of written blocks */ + uint32_t current_cpu; /* CPU# which handles dump */ + uint32_t nr_cpus; /* Number of CPUs */ +} DiskDumpHeader64; + +typedef struct QEMU_PACKED KdumpSubHeader32 { + uint32_t phys_base; + uint32_t dump_level; /* header_version 1 and later */ + uint32_t split; /* header_version 2 and later */ + uint32_t start_pfn; /* header_version 2 and later, + obsoleted in header_version 6 */ + uint32_t end_pfn; /* header_version 2 and later, + obsoleted in header_version 6 */ + uint64_t offset_vmcoreinfo; /* header_version 3 and later */ + uint32_t size_vmcoreinfo; /* header_version 3 and later */ + uint64_t offset_note; /* header_version 4 and later */ + uint32_t note_size; /* header_version 4 and later */ + uint64_t offset_eraseinfo; /* header_version 5 and later */ + uint32_t size_eraseinfo; /* header_version 5 and later */ + uint64_t start_pfn_64; /* header_version 6 and later */ + uint64_t end_pfn_64; /* header_version 6 and later */ + uint64_t max_mapnr_64; /* header_version 6 and later */ +} KdumpSubHeader32; + +typedef struct QEMU_PACKED KdumpSubHeader64 { + uint64_t phys_base; + uint32_t dump_level; /* header_version 1 and later */ + uint32_t split; /* header_version 2 and later */ + uint64_t start_pfn; /* header_version 2 and later, + obsoleted in header_version 6 */ + uint64_t end_pfn; /* header_version 2 and later, + obsoleted in header_version 6 */ + uint64_t offset_vmcoreinfo; /* header_version 3 and later */ + uint64_t size_vmcoreinfo; /* header_version 3 and later */ + uint64_t offset_note; /* header_version 4 and later */ + uint64_t note_size; /* header_version 4 and later */ + uint64_t offset_eraseinfo; /* header_version 5 and later */ + uint64_t size_eraseinfo; /* header_version 5 and later */ + uint64_t start_pfn_64; /* header_version 6 and later */ + uint64_t end_pfn_64; /* header_version 6 and later */ + uint64_t max_mapnr_64; /* header_version 6 and later */ +} KdumpSubHeader64; + +typedef struct DataCache { + DumpState *state; /* dump state related to this data */ + uint8_t *buf; /* buffer for cached data */ + size_t buf_size; /* size of the buf */ + size_t data_size; /* size of cached data in buf */ + off_t offset; /* offset of the file */ +} DataCache; + +typedef struct QEMU_PACKED PageDescriptor { + uint64_t offset; /* the offset of the page data*/ + uint32_t size; /* the size of this dump page */ + uint32_t flags; /* flags */ + uint64_t page_flags; /* page flags */ +} PageDescriptor; + +typedef struct DumpState { + GuestPhysBlockList guest_phys_blocks; + ArchDumpInfo dump_info; + MemoryMappingList list; + bool resume; + bool detached; + bool kdump_raw; + hwaddr memory_offset; + int fd; + + /* + * Dump filter area variables + * + * A filtered dump only contains the guest memory designated by + * the start address and length variables defined below. + * + * If length is 0, no filtering is applied. + */ + int64_t filter_area_begin; /* Start address of partial guest memory area */ + int64_t filter_area_length; /* Length of partial guest memory area */ + + /* Elf dump related data */ + uint32_t phdr_num; + uint32_t shdr_num; + ssize_t note_size; + hwaddr shdr_offset; + hwaddr phdr_offset; + hwaddr section_offset; + hwaddr note_offset; + + void *elf_section_hdrs; /* Pointer to section header buffer */ + void *elf_section_data; /* Pointer to section data buffer */ + uint64_t elf_section_data_size; /* Size of section data */ + GArray *string_table_buf; /* String table data buffer */ + + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ + uint32_t nr_cpus; /* number of guest's cpu */ + uint64_t max_mapnr; /* the biggest guest's phys-mem's number */ + size_t len_dump_bitmap; /* the size of the place used to store + dump_bitmap in vmcore */ + off_t offset_dump_bitmap; /* offset of dump_bitmap part in vmcore */ + off_t offset_page; /* offset of page part in vmcore */ + size_t num_dumpable; /* number of page that can be dumped */ + uint32_t flag_compress; /* indicate the compression format */ + DumpStatus status; /* current dump status */ + + bool has_format; /* whether format is provided */ + DumpGuestMemoryFormat format; /* valid only if has_format == true */ + QemuThread dump_thread; /* thread for detached dump */ + + int64_t total_size; /* total memory size (in bytes) to + * be dumped. When filter is + * enabled, this will only count + * those to be written. */ + int64_t written_size; /* written memory size (in bytes), + * this could be used to calculate + * how much work we have + * finished. */ + uint8_t *guest_note; /* ELF note content */ + size_t guest_note_size; +} DumpState; + +uint16_t cpu_to_dump16(DumpState *s, uint16_t val); +uint32_t cpu_to_dump32(DumpState *s, uint32_t val); +uint64_t cpu_to_dump64(DumpState *s, uint64_t val); + +int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start, + int64_t filter_area_length); +int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start, + int64_t filter_area_length); +#endif diff --git a/include/system/event-loop-base.h b/include/system/event-loop-base.h new file mode 100644 index 0000000000..a6c24f1351 --- /dev/null +++ b/include/system/event-loop-base.h @@ -0,0 +1,40 @@ +/* + * QEMU event-loop backend + * + * Copyright (C) 2022 Red Hat Inc + * + * Authors: + * Nicolas Saenz Julienne <nsaenzju@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_EVENT_LOOP_BASE_H +#define QEMU_EVENT_LOOP_BASE_H + +#include "qom/object.h" +#include "block/aio.h" + +#define TYPE_EVENT_LOOP_BASE "event-loop-base" +OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass, + EVENT_LOOP_BASE) + +struct EventLoopBaseClass { + ObjectClass parent_class; + + void (*init)(EventLoopBase *base, Error **errp); + void (*update_params)(EventLoopBase *base, Error **errp); + bool (*can_be_deleted)(EventLoopBase *base); +}; + +struct EventLoopBase { + Object parent; + + /* AioContext AIO engine parameters */ + int64_t aio_max_batch; + + /* AioContext thread pool parameters */ + int64_t thread_pool_min; + int64_t thread_pool_max; +}; +#endif diff --git a/include/system/host_iommu_device.h b/include/system/host_iommu_device.h new file mode 100644 index 0000000000..809cced4ba --- /dev/null +++ b/include/system/host_iommu_device.h @@ -0,0 +1,110 @@ +/* + * Host IOMMU device abstract declaration + * + * Copyright (C) 2024 Intel Corporation. + * + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef HOST_IOMMU_DEVICE_H +#define HOST_IOMMU_DEVICE_H + +#include "qom/object.h" +#include "qapi/error.h" + +/** + * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. + * + * @type: host platform IOMMU type. + * + * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents + * the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl) + */ +typedef struct HostIOMMUDeviceCaps { + uint32_t type; + uint64_t hw_caps; +} HostIOMMUDeviceCaps; + +#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" +OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) + +struct HostIOMMUDevice { + Object parent_obj; + + char *name; + void *agent; /* pointer to agent device, ie. VFIO or VDPA device */ + PCIBus *aliased_bus; + int aliased_devfn; + HostIOMMUDeviceCaps caps; +}; + +/** + * struct HostIOMMUDeviceClass - The base class for all host IOMMU devices. + * + * Different types of host devices (e.g., VFIO or VDPA device) or devices + * with different backend (e.g., VFIO legacy container or IOMMUFD backend) + * will have different implementations of the HostIOMMUDeviceClass. + */ +struct HostIOMMUDeviceClass { + ObjectClass parent_class; + + /** + * @realize: initialize host IOMMU device instance further. + * + * Mandatory callback. + * + * @hiod: pointer to a host IOMMU device instance. + * + * @opaque: pointer to agent device of this host IOMMU device, + * e.g., VFIO base device or VDPA device. + * + * @errp: pass an Error out when realize fails. + * + * Returns: true on success, false on failure. + */ + bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp); + /** + * @get_cap: check if a host IOMMU device capability is supported. + * + * Optional callback, if not implemented, hint not supporting query + * of @cap. + * + * @hiod: pointer to a host IOMMU device instance. + * + * @cap: capability to check. + * + * @errp: pass an Error out when fails to query capability. + * + * Returns: <0 on failure, 0 if a @cap is unsupported, or else + * 1 or some positive value for some special @cap, + * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS. + */ + int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp); + /** + * @get_iova_ranges: Return the list of usable iova_ranges along with + * @hiod Host IOMMU device + * + * @hiod: handle to the host IOMMU device + */ + GList* (*get_iova_ranges)(HostIOMMUDevice *hiod); + /** + * + * @get_page_size_mask: Return the page size mask supported along this + * @hiod Host IOMMU device + * + * @hiod: handle to the host IOMMU device + */ + uint64_t (*get_page_size_mask)(HostIOMMUDevice *hiod); +}; + +/* + * Host IOMMU device capability list. + */ +#define HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE 0 +#define HOST_IOMMU_DEVICE_CAP_AW_BITS 1 + +#define HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX 64 +#endif diff --git a/include/system/hostmem.h b/include/system/hostmem.h new file mode 100644 index 0000000000..5c21ca55c0 --- /dev/null +++ b/include/system/hostmem.h @@ -0,0 +1,96 @@ +/* + * QEMU Host Memory Backend + * + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SYSTEM_HOSTMEM_H +#define SYSTEM_HOSTMEM_H + +#include "system/numa.h" +#include "qapi/qapi-types-machine.h" +#include "qom/object.h" +#include "exec/memory.h" +#include "qemu/bitmap.h" +#include "qemu/thread-context.h" + +#define TYPE_MEMORY_BACKEND "memory-backend" +OBJECT_DECLARE_TYPE(HostMemoryBackend, HostMemoryBackendClass, + MEMORY_BACKEND) + +/* hostmem-ram.c */ +/** + * @TYPE_MEMORY_BACKEND_RAM: + * name of backend that uses mmap on the anonymous RAM + */ + +#define TYPE_MEMORY_BACKEND_RAM "memory-backend-ram" + +/* hostmem-file.c */ +/** + * @TYPE_MEMORY_BACKEND_FILE: + * name of backend that uses mmap on a file descriptor + */ +#define TYPE_MEMORY_BACKEND_FILE "memory-backend-file" + +#define TYPE_MEMORY_BACKEND_MEMFD "memory-backend-memfd" + + +/** + * HostMemoryBackendClass: + * @parent_class: opaque parent class container + */ +struct HostMemoryBackendClass { + ObjectClass parent_class; + + /** + * alloc: Allocate memory from backend. + * + * @backend: the #HostMemoryBackend. + * @errp: pointer to Error*, to store an error if it happens. + * + * Return: true on success, else false setting @errp with error. + */ + bool (*alloc)(HostMemoryBackend *backend, Error **errp); +}; + +/** + * @HostMemoryBackend + * + * @parent: opaque parent object container + * @size: amount of memory backend provides + * @mr: MemoryRegion representing host memory belonging to backend + * @prealloc_threads: number of threads to be used for preallocatining RAM + */ +struct HostMemoryBackend { + /* private */ + Object parent; + + /* protected */ + uint64_t size; + bool merge, dump, use_canonical_path; + bool prealloc, is_mapped, share, reserve; + bool guest_memfd, aligned; + uint32_t prealloc_threads; + ThreadContext *prealloc_context; + DECLARE_BITMAP(host_nodes, MAX_NODES + 1); + HostMemPolicy policy; + + MemoryRegion mr; +}; + +bool host_memory_backend_mr_inited(HostMemoryBackend *backend); +MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend); + +void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped); +bool host_memory_backend_is_mapped(HostMemoryBackend *backend); +size_t host_memory_backend_pagesize(HostMemoryBackend *memdev); +char *host_memory_backend_get_name(HostMemoryBackend *backend); + +#endif diff --git a/include/system/hvf.h b/include/system/hvf.h new file mode 100644 index 0000000000..730f927f03 --- /dev/null +++ b/include/system/hvf.h @@ -0,0 +1,71 @@ +/* + * QEMU Hypervisor.framework (HVF) support + * + * Copyright Google Inc., 2017 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* header to be included in non-HVF-specific code */ + +#ifndef HVF_H +#define HVF_H + +#include "qemu/accel.h" +#include "qom/object.h" + +#ifdef COMPILING_PER_TARGET +#include "cpu.h" + +#ifdef CONFIG_HVF +extern bool hvf_allowed; +#define hvf_enabled() (hvf_allowed) +#else /* !CONFIG_HVF */ +#define hvf_enabled() 0 +#endif /* !CONFIG_HVF */ + +#endif /* COMPILING_PER_TARGET */ + +#define TYPE_HVF_ACCEL ACCEL_CLASS_NAME("hvf") + +typedef struct HVFState HVFState; +DECLARE_INSTANCE_CHECKER(HVFState, HVF_STATE, + TYPE_HVF_ACCEL) + +#ifdef COMPILING_PER_TARGET +struct hvf_sw_breakpoint { + vaddr pc; + vaddr saved_insn; + int use_count; + QTAILQ_ENTRY(hvf_sw_breakpoint) entry; +}; + +struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, + vaddr pc); +int hvf_sw_breakpoints_active(CPUState *cpu); + +int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); +int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); +int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type); +int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type); +void hvf_arch_remove_all_hw_breakpoints(void); + +/* + * hvf_update_guest_debug: + * @cs: CPUState for the CPU to update + * + * Update guest to enable or disable debugging. Per-arch specifics will be + * handled by calling down to hvf_arch_update_guest_debug. + */ +int hvf_update_guest_debug(CPUState *cpu); +void hvf_arch_update_guest_debug(CPUState *cpu); + +/* + * Return whether the guest supports debugging. + */ +bool hvf_arch_supports_guest_debug(void); +#endif /* COMPILING_PER_TARGET */ + +#endif diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h new file mode 100644 index 0000000000..42ae18433f --- /dev/null +++ b/include/system/hvf_int.h @@ -0,0 +1,77 @@ +/* + * QEMU Hypervisor.framework (HVF) support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* header to be included in HVF-specific code */ + +#ifndef HVF_INT_H +#define HVF_INT_H + +#ifdef __aarch64__ +#include <Hypervisor/Hypervisor.h> +typedef hv_vcpu_t hvf_vcpuid; +#else +#include <Hypervisor/hv.h> +typedef hv_vcpuid_t hvf_vcpuid; +#endif + +/* hvf_slot flags */ +#define HVF_SLOT_LOG (1 << 0) + +typedef struct hvf_slot { + uint64_t start; + uint64_t size; + uint8_t *mem; + int slot_id; + uint32_t flags; + MemoryRegion *region; +} hvf_slot; + +typedef struct hvf_vcpu_caps { + uint64_t vmx_cap_pinbased; + uint64_t vmx_cap_procbased; + uint64_t vmx_cap_procbased2; + uint64_t vmx_cap_entry; + uint64_t vmx_cap_exit; + uint64_t vmx_cap_preemption_timer; +} hvf_vcpu_caps; + +struct HVFState { + AccelState parent; + hvf_slot slots[32]; + int num_slots; + + hvf_vcpu_caps *hvf_caps; + uint64_t vtimer_offset; + QTAILQ_HEAD(, hvf_sw_breakpoint) hvf_sw_breakpoints; +}; +extern HVFState *hvf_state; + +struct AccelCPUState { + hvf_vcpuid fd; + void *exit; + bool vtimer_masked; + sigset_t unblock_ipi_mask; + bool guest_debug_enabled; + bool dirty; +}; + +void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line, + const char *exp); +#define assert_hvf_ok(EX) assert_hvf_ok_impl((EX), __FILE__, __LINE__, #EX) +const char *hvf_return_string(hv_return_t ret); +int hvf_arch_init(void); +hv_return_t hvf_arch_vm_create(MachineState *ms, uint32_t pa_range); +int hvf_arch_init_vcpu(CPUState *cpu); +void hvf_arch_vcpu_destroy(CPUState *cpu); +int hvf_vcpu_exec(CPUState *); +hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); +int hvf_put_registers(CPUState *); +int hvf_get_registers(CPUState *); +void hvf_kick_vcpu_thread(CPUState *cpu); + +#endif diff --git a/include/system/hw_accel.h b/include/system/hw_accel.h new file mode 100644 index 0000000000..380e9e640b --- /dev/null +++ b/include/system/hw_accel.h @@ -0,0 +1,25 @@ +/* + * QEMU Hardware accelerators support + * + * Copyright 2016 Google, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_HW_ACCEL_H +#define QEMU_HW_ACCEL_H + +#include "hw/core/cpu.h" +#include "system/kvm.h" +#include "system/hvf.h" +#include "system/whpx.h" +#include "system/nvmm.h" + +void cpu_synchronize_state(CPUState *cpu); +void cpu_synchronize_post_reset(CPUState *cpu); +void cpu_synchronize_post_init(CPUState *cpu); +void cpu_synchronize_pre_loadvm(CPUState *cpu); + +#endif /* QEMU_HW_ACCEL_H */ diff --git a/include/system/iommufd.h b/include/system/iommufd.h new file mode 100644 index 0000000000..cbab75bfbf --- /dev/null +++ b/include/system/iommufd.h @@ -0,0 +1,66 @@ +/* + * iommufd container backend declaration + * + * Copyright (C) 2024 Intel Corporation. + * Copyright Red Hat, Inc. 2024 + * + * Authors: Yi Liu <yi.l.liu@intel.com> + * Eric Auger <eric.auger@redhat.com> + * Zhenzhong Duan <zhenzhong.duan@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef SYSTEM_IOMMUFD_H +#define SYSTEM_IOMMUFD_H + +#include "qom/object.h" +#include "exec/hwaddr.h" +#include "exec/cpu-common.h" +#include "system/host_iommu_device.h" + +#define TYPE_IOMMUFD_BACKEND "iommufd" +OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND) + +struct IOMMUFDBackendClass { + ObjectClass parent_class; +}; + +struct IOMMUFDBackend { + Object parent; + + /*< protected >*/ + int fd; /* /dev/iommu file descriptor */ + bool owned; /* is the /dev/iommu opened internally */ + uint32_t users; + + /*< public >*/ +}; + +bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); +void iommufd_backend_disconnect(IOMMUFDBackend *be); + +bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, + Error **errp); +void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); +int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly); +int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size); +bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + uint64_t *caps, Error **errp); +bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, + uint32_t pt_id, uint32_t flags, + uint32_t data_type, uint32_t data_len, + void *data_ptr, uint32_t *out_hwpt, + Error **errp); +bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id, + bool start, Error **errp); +bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id, + uint64_t iova, ram_addr_t size, + uint64_t page_size, uint64_t *data, + Error **errp); + +#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" +#endif diff --git a/include/system/iothread.h b/include/system/iothread.h new file mode 100644 index 0000000000..d95c17a645 --- /dev/null +++ b/include/system/iothread.h @@ -0,0 +1,67 @@ +/* + * Event loop thread + * + * Copyright Red Hat Inc., 2013 + * + * Authors: + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef IOTHREAD_H +#define IOTHREAD_H + +#include "block/aio.h" +#include "qemu/thread.h" +#include "qom/object.h" +#include "system/event-loop-base.h" + +#define TYPE_IOTHREAD "iothread" + +struct IOThread { + EventLoopBase parent_obj; + + QemuThread thread; + AioContext *ctx; + bool run_gcontext; /* whether we should run gcontext */ + GMainContext *worker_context; + GMainLoop *main_loop; + QemuSemaphore init_done_sem; /* is thread init done? */ + bool stopping; /* has iothread_stop() been called? */ + bool running; /* should iothread_run() continue? */ + int thread_id; + + /* AioContext poll parameters */ + int64_t poll_max_ns; + int64_t poll_grow; + int64_t poll_shrink; +}; +typedef struct IOThread IOThread; + +DECLARE_INSTANCE_CHECKER(IOThread, IOTHREAD, + TYPE_IOTHREAD) + +char *iothread_get_id(IOThread *iothread); +IOThread *iothread_by_id(const char *id); +AioContext *iothread_get_aio_context(IOThread *iothread); +GMainContext *iothread_get_g_main_context(IOThread *iothread); + +/* + * Helpers used to allocate iothreads for internal use. These + * iothreads will not be seen by monitor clients when query using + * "query-iothreads". + */ +IOThread *iothread_create(const char *id, Error **errp); +void iothread_stop(IOThread *iothread); +void iothread_destroy(IOThread *iothread); + +/* + * Returns true if executing within IOThread context, + * false otherwise. + */ +bool qemu_in_iothread(void); + +#endif /* IOTHREAD_H */ diff --git a/include/system/kvm.h b/include/system/kvm.h new file mode 100644 index 0000000000..c3a60b2890 --- /dev/null +++ b/include/system/kvm.h @@ -0,0 +1,581 @@ +/* + * QEMU KVM support + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* header to be included in non-KVM-specific code */ + +#ifndef QEMU_KVM_H +#define QEMU_KVM_H + +#include "exec/memattrs.h" +#include "qemu/accel.h" +#include "qom/object.h" + +#ifdef COMPILING_PER_TARGET +# ifdef CONFIG_KVM +# include <linux/kvm.h> +# define CONFIG_KVM_IS_POSSIBLE +# endif +#else +# define CONFIG_KVM_IS_POSSIBLE +#endif + +#ifdef CONFIG_KVM_IS_POSSIBLE + +extern bool kvm_allowed; +extern bool kvm_kernel_irqchip; +extern bool kvm_split_irqchip; +extern bool kvm_async_interrupts_allowed; +extern bool kvm_halt_in_kernel_allowed; +extern bool kvm_resamplefds_allowed; +extern bool kvm_msi_via_irqfd_allowed; +extern bool kvm_gsi_routing_allowed; +extern bool kvm_gsi_direct_mapping; +extern bool kvm_readonly_mem_allowed; +extern bool kvm_msi_use_devid; + +#define kvm_enabled() (kvm_allowed) +/** + * kvm_irqchip_in_kernel: + * + * Returns: true if an in-kernel irqchip was created. + * What this actually means is architecture and machine model + * specific: on PC, for instance, it means that the LAPIC + * is in kernel. This function should never be used from generic + * target-independent code: use one of the following functions or + * some other specific check instead. + */ +#define kvm_irqchip_in_kernel() (kvm_kernel_irqchip) + +/** + * kvm_irqchip_is_split: + * + * Returns: true if the irqchip implementation is split between + * user and kernel space. The details are architecture and + * machine specific. On PC, it means that the PIC, IOAPIC, and + * PIT are in user space while the LAPIC is in the kernel. + */ +#define kvm_irqchip_is_split() (kvm_split_irqchip) + +/** + * kvm_async_interrupts_enabled: + * + * Returns: true if we can deliver interrupts to KVM + * asynchronously (ie by ioctl from any thread at any time) + * rather than having to do interrupt delivery synchronously + * (where the vcpu must be stopped at a suitable point first). + */ +#define kvm_async_interrupts_enabled() (kvm_async_interrupts_allowed) + +/** + * kvm_halt_in_kernel + * + * Returns: true if halted cpus should still get a KVM_RUN ioctl to run + * inside of kernel space. This only works if MP state is implemented. + */ +#define kvm_halt_in_kernel() (kvm_halt_in_kernel_allowed) + +/** + * kvm_irqfds_enabled: + * + * Returns: true if we can use irqfds to inject interrupts into + * a KVM CPU (ie the kernel supports irqfds and we are running + * with a configuration where it is meaningful to use them). + * + * Always available if running with in-kernel irqchip. + */ +#define kvm_irqfds_enabled() kvm_irqchip_in_kernel() + +/** + * kvm_resamplefds_enabled: + * + * Returns: true if we can use resamplefds to inject interrupts into + * a KVM CPU (ie the kernel supports resamplefds and we are running + * with a configuration where it is meaningful to use them). + */ +#define kvm_resamplefds_enabled() (kvm_resamplefds_allowed) + +/** + * kvm_msi_via_irqfd_enabled: + * + * Returns: true if we can route a PCI MSI (Message Signaled Interrupt) + * to a KVM CPU via an irqfd. This requires that the kernel supports + * this and that we're running in a configuration that permits it. + */ +#define kvm_msi_via_irqfd_enabled() (kvm_msi_via_irqfd_allowed) + +/** + * kvm_gsi_routing_enabled: + * + * Returns: true if GSI routing is enabled (ie the kernel supports + * it and we're running in a configuration that permits it). + */ +#define kvm_gsi_routing_enabled() (kvm_gsi_routing_allowed) + +/** + * kvm_gsi_direct_mapping: + * + * Returns: true if GSI direct mapping is enabled. + */ +#define kvm_gsi_direct_mapping() (kvm_gsi_direct_mapping) + +/** + * kvm_readonly_mem_enabled: + * + * Returns: true if KVM readonly memory is enabled (ie the kernel + * supports it and we're running in a configuration that permits it). + */ +#define kvm_readonly_mem_enabled() (kvm_readonly_mem_allowed) + +/** + * kvm_msi_devid_required: + * Returns: true if KVM requires a device id to be provided while + * defining an MSI routing entry. + */ +#define kvm_msi_devid_required() (kvm_msi_use_devid) + +#else + +#define kvm_enabled() (0) +#define kvm_irqchip_in_kernel() (false) +#define kvm_irqchip_is_split() (false) +#define kvm_async_interrupts_enabled() (false) +#define kvm_halt_in_kernel() (false) +#define kvm_irqfds_enabled() (false) +#define kvm_resamplefds_enabled() (false) +#define kvm_msi_via_irqfd_enabled() (false) +#define kvm_gsi_routing_allowed() (false) +#define kvm_gsi_direct_mapping() (false) +#define kvm_readonly_mem_enabled() (false) +#define kvm_msi_devid_required() (false) + +#endif /* CONFIG_KVM_IS_POSSIBLE */ + +struct kvm_run; +struct kvm_irq_routing_entry; + +typedef struct KVMCapabilityInfo { + const char *name; + int value; +} KVMCapabilityInfo; + +#define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP } +#define KVM_CAP_LAST_INFO { NULL, 0 } + +struct KVMState; + +#define TYPE_KVM_ACCEL ACCEL_CLASS_NAME("kvm") +typedef struct KVMState KVMState; +DECLARE_INSTANCE_CHECKER(KVMState, KVM_STATE, + TYPE_KVM_ACCEL) + +extern KVMState *kvm_state; +typedef struct Notifier Notifier; + +typedef struct KVMRouteChange { + KVMState *s; + int changes; +} KVMRouteChange; + +/* external API */ + +unsigned int kvm_get_max_memslots(void); +unsigned int kvm_get_free_memslots(void); +bool kvm_has_sync_mmu(void); +int kvm_has_vcpu_events(void); +int kvm_max_nested_state_length(void); +int kvm_has_gsi_routing(void); + +/** + * kvm_arm_supports_user_irq + * + * Not all KVM implementations support notifications for kernel generated + * interrupt events to user space. This function indicates whether the current + * KVM implementation does support them. + * + * Returns: true if KVM supports using kernel generated IRQs from user space + */ +bool kvm_arm_supports_user_irq(void); + + +int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); +int kvm_on_sigbus(int code, void *addr); + +#ifdef COMPILING_PER_TARGET +#include "cpu.h" + +void kvm_flush_coalesced_mmio_buffer(void); + +/** + * kvm_update_guest_debug(): ensure KVM debug structures updated + * @cs: the CPUState for this cpu + * @reinject_trap: KVM trap injection control + * + * There are usually per-arch specifics which will be handled by + * calling down to kvm_arch_update_guest_debug after the generic + * fields have been set. + */ +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG +int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap); +#else +static inline int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap) +{ + return -EINVAL; +} +#endif + +/* internal API */ + +int kvm_ioctl(KVMState *s, unsigned long type, ...); + +int kvm_vm_ioctl(KVMState *s, unsigned long type, ...); + +int kvm_vcpu_ioctl(CPUState *cpu, unsigned long type, ...); + +/** + * kvm_device_ioctl - call an ioctl on a kvm device + * @fd: The KVM device file descriptor as returned from KVM_CREATE_DEVICE + * @type: The device-ctrl ioctl number + * + * Returns: -errno on error, nonnegative on success + */ +int kvm_device_ioctl(int fd, unsigned long type, ...); + +/** + * kvm_vm_check_attr - check for existence of a specific vm attribute + * @s: The KVMState pointer + * @group: the group + * @attr: the attribute of that group to query for + * + * Returns: 1 if the attribute exists + * 0 if the attribute either does not exist or if the vm device + * interface is unavailable + */ +int kvm_vm_check_attr(KVMState *s, uint32_t group, uint64_t attr); + +/** + * kvm_device_check_attr - check for existence of a specific device attribute + * @fd: The device file descriptor + * @group: the group + * @attr: the attribute of that group to query for + * + * Returns: 1 if the attribute exists + * 0 if the attribute either does not exist or if the vm device + * interface is unavailable + */ +int kvm_device_check_attr(int fd, uint32_t group, uint64_t attr); + +/** + * kvm_device_access - set or get value of a specific device attribute + * @fd: The device file descriptor + * @group: the group + * @attr: the attribute of that group to set or get + * @val: pointer to a storage area for the value + * @write: true for set and false for get operation + * @errp: error object handle + * + * Returns: 0 on success + * < 0 on error + * Use kvm_device_check_attr() in order to check for the availability + * of optional attributes. + */ +int kvm_device_access(int fd, int group, uint64_t attr, + void *val, bool write, Error **errp); + +/** + * kvm_create_device - create a KVM device for the device control API + * @KVMState: The KVMState pointer + * @type: The KVM device type (see Documentation/virtual/kvm/devices in the + * kernel source) + * @test: If true, only test if device can be created, but don't actually + * create the device. + * + * Returns: -errno on error, nonnegative on success: @test ? 0 : device fd; + */ +int kvm_create_device(KVMState *s, uint64_t type, bool test); + +/** + * kvm_device_supported - probe whether KVM supports specific device + * + * @vmfd: The fd handler for VM + * @type: type of device + * + * @return: true if supported, otherwise false. + */ +bool kvm_device_supported(int vmfd, uint64_t type); + +/** + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. + * + * @returns: 0 when success, errno (<0) when failed. + */ +int kvm_create_vcpu(CPUState *cpu); + +/** + * kvm_park_vcpu - Park QEMU KVM vCPU context + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. + * + * @returns: none + */ +void kvm_park_vcpu(CPUState *cpu); + +/** + * kvm_unpark_vcpu - unpark QEMU KVM vCPU context + * @s: KVM State + * @vcpu_id: Architecture vCPU ID of the parked vCPU + * + * @returns: KVM fd + */ +int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id); + +/** + * kvm_create_and_park_vcpu - Create and park a KVM vCPU + * @cpu: QOM CPUState object for which KVM vCPU has to be created and parked. + * + * @returns: 0 when success, errno (<0) when failed. + */ +int kvm_create_and_park_vcpu(CPUState *cpu); + +/* Arch specific hooks */ + +extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; + +void kvm_arch_accel_class_init(ObjectClass *oc); + +void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run); +MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run); + +int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run); + +int kvm_arch_process_async_events(CPUState *cpu); + +int kvm_arch_get_registers(CPUState *cpu, Error **errp); + +/* state subset only touched by the VCPU itself during runtime */ +#define KVM_PUT_RUNTIME_STATE 1 +/* state subset modified during VCPU reset */ +#define KVM_PUT_RESET_STATE 2 +/* full state set, modified during initialization or on vmload */ +#define KVM_PUT_FULL_STATE 3 + +int kvm_arch_put_registers(CPUState *cpu, int level, Error **errp); + +int kvm_arch_get_default_type(MachineState *ms); + +int kvm_arch_init(MachineState *ms, KVMState *s); + +int kvm_arch_init_vcpu(CPUState *cpu); +int kvm_arch_destroy_vcpu(CPUState *cpu); + +bool kvm_vcpu_id_is_valid(int vcpu_id); + +/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */ +unsigned long kvm_arch_vcpu_id(CPUState *cpu); + +#ifdef KVM_HAVE_MCE_INJECTION +void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); +#endif + +void kvm_arch_init_irq_routing(KVMState *s); + +int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data, PCIDevice *dev); + +/* Notify arch about newly added MSI routes */ +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev); +/* Notify arch about released MSI routes */ +int kvm_arch_release_virq_post(int virq); + +int kvm_arch_msi_data_to_gsi(uint32_t data); + +int kvm_set_irq(KVMState *s, int irq, int level); +int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); + +void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin); + +void kvm_irqchip_add_change_notifier(Notifier *n); +void kvm_irqchip_remove_change_notifier(Notifier *n); +void kvm_irqchip_change_notify(void); + +struct kvm_guest_debug; +struct kvm_debug_exit_arch; + +struct kvm_sw_breakpoint { + vaddr pc; + vaddr saved_insn; + int use_count; + QTAILQ_ENTRY(kvm_sw_breakpoint) entry; +}; + +struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu, + vaddr pc); + +int kvm_sw_breakpoints_active(CPUState *cpu); + +int kvm_arch_insert_sw_breakpoint(CPUState *cpu, + struct kvm_sw_breakpoint *bp); +int kvm_arch_remove_sw_breakpoint(CPUState *cpu, + struct kvm_sw_breakpoint *bp); +int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type); +int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type); +void kvm_arch_remove_all_hw_breakpoints(void); + +void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg); + +bool kvm_arch_stop_on_emulation_error(CPUState *cpu); + +int kvm_check_extension(KVMState *s, unsigned int extension); + +int kvm_vm_check_extension(KVMState *s, unsigned int extension); + +#define kvm_vm_enable_cap(s, capability, cap_flags, ...) \ + ({ \ + struct kvm_enable_cap cap = { \ + .cap = capability, \ + .flags = cap_flags, \ + }; \ + uint64_t args_tmp[] = { __VA_ARGS__ }; \ + size_t n = MIN(ARRAY_SIZE(args_tmp), ARRAY_SIZE(cap.args)); \ + memcpy(cap.args, args_tmp, n * sizeof(cap.args[0])); \ + kvm_vm_ioctl(s, KVM_ENABLE_CAP, &cap); \ + }) + +#define kvm_vcpu_enable_cap(cpu, capability, cap_flags, ...) \ + ({ \ + struct kvm_enable_cap cap = { \ + .cap = capability, \ + .flags = cap_flags, \ + }; \ + uint64_t args_tmp[] = { __VA_ARGS__ }; \ + size_t n = MIN(ARRAY_SIZE(args_tmp), ARRAY_SIZE(cap.args)); \ + memcpy(cap.args, args_tmp, n * sizeof(cap.args[0])); \ + kvm_vcpu_ioctl(cpu, KVM_ENABLE_CAP, &cap); \ + }) + +void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); + +int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, + hwaddr *phys_addr); + +#endif /* COMPILING_PER_TARGET */ + +void kvm_cpu_synchronize_state(CPUState *cpu); + +void kvm_init_cpu_signals(CPUState *cpu); + +/** + * kvm_irqchip_add_msi_route - Add MSI route for specific vector + * @c: KVMRouteChange instance. + * @vector: which vector to add. This can be either MSI/MSIX + * vector. The function will automatically detect whether + * MSI/MSIX is enabled, and fetch corresponding MSI + * message. + * @dev: Owner PCI device to add the route. If @dev is specified + * as @NULL, an empty MSI message will be inited. + * @return: virq (>=0) when success, errno (<0) when failed. + */ +int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev); +int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, + PCIDevice *dev); +void kvm_irqchip_commit_routes(KVMState *s); + +static inline KVMRouteChange kvm_irqchip_begin_route_changes(KVMState *s) +{ + return (KVMRouteChange) { .s = s, .changes = 0 }; +} + +static inline void kvm_irqchip_commit_route_changes(KVMRouteChange *c) +{ + if (c->changes) { + kvm_irqchip_commit_routes(c->s); + c->changes = 0; + } +} + +int kvm_irqchip_get_virq(KVMState *s); +void kvm_irqchip_release_virq(KVMState *s, int virq); + +void kvm_add_routing_entry(KVMState *s, + struct kvm_irq_routing_entry *entry); + +int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + EventNotifier *rn, int virq); +int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + int virq); +int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, + EventNotifier *rn, qemu_irq irq); +int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, + qemu_irq irq); +void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi); +void kvm_init_irq_routing(KVMState *s); + +bool kvm_kernel_irqchip_allowed(void); +bool kvm_kernel_irqchip_required(void); +bool kvm_kernel_irqchip_split(void); + +/** + * kvm_arch_irqchip_create: + * @KVMState: The KVMState pointer + * + * Allow architectures to create an in-kernel irq chip themselves. + * + * Returns: < 0: error + * 0: irq chip was not created + * > 0: irq chip was created + */ +int kvm_arch_irqchip_create(KVMState *s); + +/** + * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl + * @id: The register ID + * @source: The pointer to the value to be set. It must point to a variable + * of the correct type/size for the register being accessed. + * + * Returns: 0 on success, or a negative errno on failure. + */ +int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source); + +/** + * kvm_get_one_reg - get a register value from KVM via KVM_GET_ONE_REG ioctl + * @id: The register ID + * @target: The pointer where the value is to be stored. It must point to a + * variable of the correct type/size for the register being accessed. + * + * Returns: 0 on success, or a negative errno on failure. + */ +int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); + +/* Notify resamplefd for EOI of specific interrupts. */ +void kvm_resample_fd_notify(int gsi); + +bool kvm_dirty_ring_enabled(void); + +uint32_t kvm_dirty_ring_size(void); + +void kvm_mark_guest_state_protected(void); + +/** + * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page + * reported for the VM. + */ +bool kvm_hwpoisoned_mem(void); + +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); + +int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); +int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + +int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private); + +#endif diff --git a/include/system/kvm_int.h b/include/system/kvm_int.h new file mode 100644 index 0000000000..4de6106869 --- /dev/null +++ b/include/system/kvm_int.h @@ -0,0 +1,187 @@ +/* + * Internal definitions for a target's KVM support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_KVM_INT_H +#define QEMU_KVM_INT_H + +#include "exec/memory.h" +#include "qapi/qapi-types-common.h" +#include "qemu/accel.h" +#include "qemu/queue.h" +#include "system/kvm.h" +#include "hw/boards.h" +#include "hw/i386/topology.h" +#include "io/channel-socket.h" + +typedef struct KVMSlot +{ + hwaddr start_addr; + ram_addr_t memory_size; + void *ram; + int slot; + int flags; + int old_flags; + /* Dirty bitmap cache for the slot */ + unsigned long *dirty_bmap; + unsigned long dirty_bmap_size; + /* Cache of the address space ID */ + int as_id; + /* Cache of the offset in ram address space */ + ram_addr_t ram_start_offset; + int guest_memfd; + hwaddr guest_memfd_offset; +} KVMSlot; + +typedef struct KVMMemoryUpdate { + QSIMPLEQ_ENTRY(KVMMemoryUpdate) next; + MemoryRegionSection section; +} KVMMemoryUpdate; + +typedef struct KVMMemoryListener { + MemoryListener listener; + KVMSlot *slots; + unsigned int nr_slots_used; + unsigned int nr_slots_allocated; + int as_id; + QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; + QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; +} KVMMemoryListener; + +#define KVM_MSI_HASHTAB_SIZE 256 + +typedef struct KVMHostTopoInfo { + /* Number of package on the Host */ + unsigned int maxpkgs; + /* Number of cpus on the Host */ + unsigned int maxcpus; + /* Number of cpus on each different package */ + unsigned int *pkg_cpu_count; + /* Each package can have different maxticks */ + unsigned int *maxticks; +} KVMHostTopoInfo; + +struct KVMMsrEnergy { + pid_t pid; + bool enable; + char *socket_path; + QIOChannelSocket *sioc; + QemuThread msr_thr; + unsigned int guest_vcpus; + unsigned int guest_vsockets; + X86CPUTopoInfo guest_topo_info; + KVMHostTopoInfo host_topo; + const CPUArchIdList *guest_cpu_list; + uint64_t *msr_value; + uint64_t msr_unit; + uint64_t msr_limit; + uint64_t msr_info; +}; + +enum KVMDirtyRingReaperState { + KVM_DIRTY_RING_REAPER_NONE = 0, + /* The reaper is sleeping */ + KVM_DIRTY_RING_REAPER_WAIT, + /* The reaper is reaping for dirty pages */ + KVM_DIRTY_RING_REAPER_REAPING, +}; + +/* + * KVM reaper instance, responsible for collecting the KVM dirty bits + * via the dirty ring. + */ +struct KVMDirtyRingReaper { + /* The reaper thread */ + QemuThread reaper_thr; + volatile uint64_t reaper_iteration; /* iteration number of reaper thr */ + volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */ +}; +struct KVMState +{ + AccelState parent_obj; + /* Max number of KVM slots supported */ + int nr_slots_max; + int fd; + int vmfd; + int coalesced_mmio; + int coalesced_pio; + struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; + bool coalesced_flush_in_progress; + int vcpu_events; +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG + QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; +#endif + int max_nested_state_len; + int kvm_shadow_mem; + bool kernel_irqchip_allowed; + bool kernel_irqchip_required; + OnOffAuto kernel_irqchip_split; + bool sync_mmu; + bool guest_state_protected; + uint64_t manual_dirty_log_protect; + /* + * Older POSIX says that ioctl numbers are signed int, but in + * practice they are not. (Newer POSIX doesn't specify ioctl + * at all.) Linux, glibc and *BSD all treat ioctl numbers as + * unsigned, and real-world ioctl values like KVM_GET_XSAVE have + * bit 31 set, which means that passing them via an 'int' will + * result in sign-extension when they get converted back to the + * 'unsigned long' which the ioctl() prototype uses. Luckily Linux + * always treats the argument as an unsigned 32-bit int, so any + * possible sign-extension is deliberately ignored, but for + * consistency we keep to the same type that glibc is using. + */ + unsigned long irq_set_ioctl; + unsigned int sigmask_len; + GHashTable *gsimap; +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing *irq_routes; + int nr_allocated_irq_routes; + unsigned long *used_gsi_bitmap; + unsigned int gsi_count; +#endif + KVMMemoryListener memory_listener; + QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus; + + /* For "info mtree -f" to tell if an MR is registered in KVM */ + int nr_as; + struct KVMAs { + KVMMemoryListener *ml; + AddressSpace *as; + } *as; + uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ + uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ + bool kvm_dirty_ring_with_bitmap; + uint64_t kvm_eager_split_size; /* Eager Page Splitting chunk size */ + struct KVMDirtyRingReaper reaper; + struct KVMMsrEnergy msr_energy; + NotifyVmexitOption notify_vmexit; + uint32_t notify_window; + uint32_t xen_version; + uint32_t xen_caps; + uint16_t xen_gnttab_max_frames; + uint16_t xen_evtchn_max_pirq; + char *device; +}; + +void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + AddressSpace *as, int as_id, const char *name); + +void kvm_set_max_memslot_size(hwaddr max_slot_size); + +/** + * kvm_hwpoison_page_add: + * + * Parameters: + * @ram_addr: the address in the RAM for the poisoned page + * + * Add a poisoned page to the list + * + * Return: None. + */ +void kvm_hwpoison_page_add(ram_addr_t ram_addr); +#endif diff --git a/include/system/kvm_xen.h b/include/system/kvm_xen.h new file mode 100644 index 0000000000..7d0e69f133 --- /dev/null +++ b/include/system/kvm_xen.h @@ -0,0 +1,44 @@ +/* + * Xen HVM emulation support in KVM + * + * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_SYSTEM_KVM_XEN_H +#define QEMU_SYSTEM_KVM_XEN_H + +/* The KVM API uses these to indicate "no GPA" or "no GFN" */ +#define INVALID_GPA UINT64_MAX +#define INVALID_GFN UINT64_MAX + +/* QEMU plays the rĂ´le of dom0 for "interdomain" communication. */ +#define DOMID_QEMU 0 + +int kvm_xen_soft_reset(void); +uint32_t kvm_xen_get_caps(void); +void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id); +bool kvm_xen_has_vcpu_callback_vector(void); +void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type); +void kvm_xen_set_callback_asserted(void); +int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port); +uint16_t kvm_xen_get_gnttab_max_frames(void); +uint16_t kvm_xen_get_evtchn_max_pirq(void); + +#define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() & \ + KVM_XEN_HVM_CONFIG_ ## cap)) + +#define XEN_SPECIAL_AREA_ADDR 0xfeff8000UL +#define XEN_SPECIAL_AREA_SIZE 0x4000UL + +#define XEN_SPECIALPAGE_CONSOLE 0 +#define XEN_SPECIALPAGE_XENSTORE 1 + +#define XEN_SPECIAL_PFN(x) ((XEN_SPECIAL_AREA_ADDR >> TARGET_PAGE_BITS) + \ + XEN_SPECIALPAGE_##x) + +#endif /* QEMU_SYSTEM_KVM_XEN_H */ diff --git a/include/system/memory_mapping.h b/include/system/memory_mapping.h new file mode 100644 index 0000000000..021e0a6230 --- /dev/null +++ b/include/system/memory_mapping.h @@ -0,0 +1,85 @@ +/* + * QEMU memory mapping + * + * Copyright Fujitsu, Corp. 2011, 2012 + * + * Authors: + * Wen Congyang <wency@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef MEMORY_MAPPING_H +#define MEMORY_MAPPING_H + +#include "qemu/queue.h" +#include "exec/cpu-common.h" + +typedef struct GuestPhysBlock { + /* visible to guest, reflects PCI hole, etc */ + hwaddr target_start; + + /* implies size */ + hwaddr target_end; + + /* points into host memory */ + uint8_t *host_addr; + + /* points to the MemoryRegion that this block belongs to */ + MemoryRegion *mr; + + QTAILQ_ENTRY(GuestPhysBlock) next; +} GuestPhysBlock; + +/* point-in-time snapshot of guest-visible physical mappings */ +typedef struct GuestPhysBlockList { + unsigned num; + QTAILQ_HEAD(, GuestPhysBlock) head; +} GuestPhysBlockList; + +/* The physical and virtual address in the memory mapping are contiguous. */ +typedef struct MemoryMapping { + hwaddr phys_addr; + vaddr virt_addr; + ram_addr_t length; + QTAILQ_ENTRY(MemoryMapping) next; +} MemoryMapping; + +struct MemoryMappingList { + unsigned int num; + MemoryMapping *last_mapping; + QTAILQ_HEAD(, MemoryMapping) head; +}; + +/* + * add or merge the memory region [phys_addr, phys_addr + length) into the + * memory mapping's list. The region's virtual address starts with virt_addr, + * and is contiguous. The list is sorted by phys_addr. + */ +void memory_mapping_list_add_merge_sorted(MemoryMappingList *list, + hwaddr phys_addr, + hwaddr virt_addr, + ram_addr_t length); + +void memory_mapping_list_free(MemoryMappingList *list); + +void memory_mapping_list_init(MemoryMappingList *list); + +void guest_phys_blocks_free(GuestPhysBlockList *list); +void guest_phys_blocks_init(GuestPhysBlockList *list); +void guest_phys_blocks_append(GuestPhysBlockList *list); + +bool qemu_get_guest_memory_mapping(MemoryMappingList *list, + const GuestPhysBlockList *guest_phys_blocks, + Error **errp); + +/* get guest's memory mapping without do paging(virtual address is 0). */ +void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list, + const GuestPhysBlockList *guest_phys_blocks); + +void memory_mapping_filter(MemoryMappingList *list, int64_t begin, + int64_t length); + +#endif diff --git a/include/system/numa.h b/include/system/numa.h new file mode 100644 index 0000000000..96d4ff9b85 --- /dev/null +++ b/include/system/numa.h @@ -0,0 +1,114 @@ +#ifndef SYSTEM_NUMA_H +#define SYSTEM_NUMA_H + +#include "qemu/bitmap.h" +#include "qapi/qapi-types-machine.h" +#include "exec/cpu-common.h" + +struct CPUArchId; + +#define MAX_NODES 128 +#define NUMA_NODE_UNASSIGNED MAX_NODES +#define NUMA_DISTANCE_MIN 10 +#define NUMA_DISTANCE_DEFAULT 20 +#define NUMA_DISTANCE_MAX 254 +#define NUMA_DISTANCE_UNREACHABLE 255 + +/* the value of AcpiHmatLBInfo flags */ +enum { + HMAT_LB_MEM_MEMORY = 0, + HMAT_LB_MEM_CACHE_1ST_LEVEL = 1, + HMAT_LB_MEM_CACHE_2ND_LEVEL = 2, + HMAT_LB_MEM_CACHE_3RD_LEVEL = 3, + HMAT_LB_LEVELS /* must be the last entry */ +}; + +/* the value of AcpiHmatLBInfo data type */ +enum { + HMAT_LB_DATA_ACCESS_LATENCY = 0, + HMAT_LB_DATA_READ_LATENCY = 1, + HMAT_LB_DATA_WRITE_LATENCY = 2, + HMAT_LB_DATA_ACCESS_BANDWIDTH = 3, + HMAT_LB_DATA_READ_BANDWIDTH = 4, + HMAT_LB_DATA_WRITE_BANDWIDTH = 5, + HMAT_LB_TYPES /* must be the last entry */ +}; + +#define UINT16_BITS 16 + +typedef struct NodeInfo { + uint64_t node_mem; + struct HostMemoryBackend *node_memdev; + bool present; + bool has_cpu; + bool has_gi; + uint8_t lb_info_provided; + uint16_t initiator; + uint8_t distance[MAX_NODES]; +} NodeInfo; + +typedef struct NumaNodeMem { + uint64_t node_mem; + uint64_t node_plugged_mem; +} NumaNodeMem; + +struct HMAT_LB_Data { + uint8_t initiator; + uint8_t target; + uint64_t data; +}; +typedef struct HMAT_LB_Data HMAT_LB_Data; + +struct HMAT_LB_Info { + /* Indicates it's memory or the specified level memory side cache. */ + uint8_t hierarchy; + + /* Present the type of data, access/read/write latency or bandwidth. */ + uint8_t data_type; + + /* The range bitmap of bandwidth for calculating common base */ + uint64_t range_bitmap; + + /* The common base unit for latencies or bandwidths */ + uint64_t base; + + /* Array to store the latencies or bandwidths */ + GArray *list; +}; +typedef struct HMAT_LB_Info HMAT_LB_Info; + +struct NumaState { + /* Number of NUMA nodes */ + int num_nodes; + + /* Allow setting NUMA distance for different NUMA nodes */ + bool have_numa_distance; + + /* Detect if HMAT support is enabled. */ + bool hmat_enabled; + + /* NUMA nodes information */ + NodeInfo nodes[MAX_NODES]; + + /* NUMA nodes HMAT Locality Latency and Bandwidth Information */ + HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES]; + + /* Memory Side Cache Information Structure */ + NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS]; +}; +typedef struct NumaState NumaState; + +void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp); +void parse_numa_opts(MachineState *ms); +void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, + Error **errp); +void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node, + Error **errp); +void numa_complete_configuration(MachineState *ms); +void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms); +extern QemuOptsList qemu_numa_opts; +void numa_cpu_pre_plug(const struct CPUArchId *slot, DeviceState *dev, + Error **errp); +bool numa_uses_legacy_mem(void); + +#endif diff --git a/include/system/nvmm.h b/include/system/nvmm.h new file mode 100644 index 0000000000..6971ddb3a5 --- /dev/null +++ b/include/system/nvmm.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018-2019 Maxime Villard, All rights reserved. + * + * NetBSD Virtual Machine Monitor (NVMM) accelerator support. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +/* header to be included in non-NVMM-specific code */ + +#ifndef QEMU_NVMM_H +#define QEMU_NVMM_H + +#ifdef COMPILING_PER_TARGET + +#ifdef CONFIG_NVMM + +int nvmm_enabled(void); + +#else /* CONFIG_NVMM */ + +#define nvmm_enabled() (0) + +#endif /* CONFIG_NVMM */ + +#endif /* COMPILING_PER_TARGET */ + +#endif /* QEMU_NVMM_H */ diff --git a/include/system/os-posix.h b/include/system/os-posix.h new file mode 100644 index 0000000000..b881ac6c6f --- /dev/null +++ b/include/system/os-posix.h @@ -0,0 +1,101 @@ +/* + * posix specific declarations + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_OS_POSIX_H +#define QEMU_OS_POSIX_H + +#include <sys/mman.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <sys/un.h> + +#ifdef CONFIG_SYSMACROS +#include <sys/sysmacros.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +void os_set_line_buffering(void); +void os_setup_early_signal_handling(void); +void os_set_proc_name(const char *s); +void os_setup_signal_handling(void); +int os_set_daemonize(bool d); +bool is_daemonized(void); +void os_daemonize(void); +bool os_set_runas(const char *user_id); +void os_set_chroot(const char *path); +void os_setup_limits(void); +void os_setup_post(void); +int os_mlock(void); + +/** + * qemu_alloc_stack: + * @sz: pointer to a size_t holding the requested usable stack size + * + * Allocate memory that can be used as a stack, for instance for + * coroutines. If the memory cannot be allocated, this function + * will abort (like g_malloc()). This function also inserts an + * additional guard page to catch a potential stack overflow. + * Note that the memory required for the guard page and alignment + * and minimal stack size restrictions will increase the value of sz. + * + * The allocated stack must be freed with qemu_free_stack(). + * + * Returns: pointer to (the lowest address of) the stack memory. + */ +void *qemu_alloc_stack(size_t *sz); + +/** + * qemu_free_stack: + * @stack: stack to free + * @sz: size of stack in bytes + * + * Free a stack allocated via qemu_alloc_stack(). Note that sz must + * be exactly the adjusted stack size returned by qemu_alloc_stack. + */ +void qemu_free_stack(void *stack, size_t sz); + +/* POSIX and Mingw32 differ in the name of the stdio lock functions. */ + +static inline void qemu_flockfile(FILE *f) +{ + flockfile(f); +} + +static inline void qemu_funlockfile(FILE *f) +{ + funlockfile(f); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/system/os-win32.h b/include/system/os-win32.h new file mode 100644 index 0000000000..b82a5d3ad9 --- /dev/null +++ b/include/system/os-win32.h @@ -0,0 +1,277 @@ +/* + * win32 specific declarations + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_OS_WIN32_H +#define QEMU_OS_WIN32_H + +#include <winsock2.h> +#include <windows.h> +#include <ws2tcpip.h> +#include "qemu/typedefs.h" + +#ifdef HAVE_AFUNIX_H +#include <afunix.h> +#else +/* + * Fallback definitions of things we need in afunix.h, if not available from + * the used Windows SDK or MinGW headers. + */ +#define UNIX_PATH_MAX 108 + +typedef struct sockaddr_un { + ADDRESS_FAMILY sun_family; + char sun_path[UNIX_PATH_MAX]; +} SOCKADDR_UN, *PSOCKADDR_UN; + +#define SIO_AF_UNIX_GETPEERPID _WSAIOR(IOC_VENDOR, 256) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__aarch64__) +/* + * On windows-arm64, setjmp is available in only one variant, and longjmp always + * does stack unwinding. This crash with generated code. + * Thus, we use another implementation of setjmp (not windows one), coming from + * mingw, which never performs stack unwinding. + */ +#undef setjmp +#undef longjmp +/* + * These functions are not declared in setjmp.h because __aarch64__ defines + * setjmp to _setjmpex instead. However, they are still defined in libmingwex.a, + * which gets linked automatically. + */ +int __mingw_setjmp(jmp_buf); +void __attribute__((noreturn)) __mingw_longjmp(jmp_buf, int); +#define setjmp(env) __mingw_setjmp(env) +#define longjmp(env, val) __mingw_longjmp(env, val) +#elif defined(_WIN64) +/* + * On windows-x64, setjmp is implemented by _setjmp which needs a second parameter. + * If this parameter is NULL, longjump does no stack unwinding. + * That is what we need for QEMU. Passing the value of register rsp (default) + * lets longjmp try a stack unwinding which will crash with generated code. + */ +# undef setjmp +# define setjmp(env) _setjmp(env, NULL) +#endif /* __aarch64__ */ +/* QEMU uses sigsetjmp()/siglongjmp() as the portable way to specify + * "longjmp and don't touch the signal masks". Since we know that the + * savemask parameter will always be zero we can safely define these + * in terms of setjmp/longjmp on Win32. + */ +#define sigjmp_buf jmp_buf +#define sigsetjmp(env, savemask) setjmp(env) +#define siglongjmp(env, val) longjmp(env, val) + +/* Missing POSIX functions. Don't use MinGW-w64 macros. */ +#ifndef _POSIX_THREAD_SAFE_FUNCTIONS +#undef gmtime_r +struct tm *gmtime_r(const time_t *timep, struct tm *result); +#undef localtime_r +struct tm *localtime_r(const time_t *timep, struct tm *result); +#endif /* _POSIX_THREAD_SAFE_FUNCTIONS */ + +static inline void os_setup_signal_handling(void) {} +static inline void os_daemonize(void) {} +static inline void os_setup_post(void) {} +static inline void os_set_proc_name(const char *dummy) {} +void os_set_line_buffering(void); +void os_setup_early_signal_handling(void); + +int getpagesize(void); + +#if !defined(EPROTONOSUPPORT) +# define EPROTONOSUPPORT EINVAL +#endif + +static inline int os_set_daemonize(bool d) +{ + if (d) { + return -ENOTSUP; + } + return 0; +} + +static inline bool is_daemonized(void) +{ + return false; +} + +static inline int os_mlock(void) +{ + return -ENOSYS; +} + +static inline void os_setup_limits(void) +{ + return; +} + +#define fsync _commit + +#if !defined(lseek) +# define lseek _lseeki64 +#endif + +int qemu_ftruncate64(int, int64_t); + +#if !defined(ftruncate) +# define ftruncate qemu_ftruncate64 +#endif + +static inline char *realpath(const char *path, char *resolved_path) +{ + _fullpath(resolved_path, path, _MAX_PATH); + return resolved_path; +} + +/* + * Older versions of MinGW do not import _lock_file and _unlock_file properly. + * This was fixed for v6.0.0 with commit b48e3ac8969d. + */ +static inline void qemu_flockfile(FILE *f) +{ +#ifdef HAVE__LOCK_FILE + _lock_file(f); +#endif +} + +static inline void qemu_funlockfile(FILE *f) +{ +#ifdef HAVE__LOCK_FILE + _unlock_file(f); +#endif +} + +/* Helper for WSAEventSelect, to report errors */ +bool qemu_socket_select(int sockfd, WSAEVENT hEventObject, + long lNetworkEvents, Error **errp); + +bool qemu_socket_unselect(int sockfd, Error **errp); + +/* We wrap all the sockets functions so that we can set errno based on + * WSAGetLastError(), and use file-descriptors instead of SOCKET. + */ + +/* + * qemu_close_socket_osfhandle: + * @fd: a file descriptor associated with a SOCKET + * + * Close only the C run-time file descriptor, leave the SOCKET opened. + * + * Returns zero on success. On error, -1 is returned, and errno is set to + * indicate the error. + */ +int qemu_close_socket_osfhandle(int fd); + +#undef close +#define close qemu_close_wrap +int qemu_close_wrap(int fd); + +#undef connect +#define connect qemu_connect_wrap +int qemu_connect_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen); + +#undef listen +#define listen qemu_listen_wrap +int qemu_listen_wrap(int sockfd, int backlog); + +#undef bind +#define bind qemu_bind_wrap +int qemu_bind_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen); + +#undef socket +#define socket qemu_socket_wrap +int qemu_socket_wrap(int domain, int type, int protocol); + +#undef accept +#define accept qemu_accept_wrap +int qemu_accept_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef shutdown +#define shutdown qemu_shutdown_wrap +int qemu_shutdown_wrap(int sockfd, int how); + +#undef ioctlsocket +#define ioctlsocket qemu_ioctlsocket_wrap +int qemu_ioctlsocket_wrap(int fd, int req, void *val); + +#undef getsockopt +#define getsockopt qemu_getsockopt_wrap +int qemu_getsockopt_wrap(int sockfd, int level, int optname, + void *optval, socklen_t *optlen); + +#undef setsockopt +#define setsockopt qemu_setsockopt_wrap +int qemu_setsockopt_wrap(int sockfd, int level, int optname, + const void *optval, socklen_t optlen); + +#undef getpeername +#define getpeername qemu_getpeername_wrap +int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef getsockname +#define getsockname qemu_getsockname_wrap +int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef send +#define send qemu_send_wrap +ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags); + +#undef sendto +#define sendto qemu_sendto_wrap +ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *addr, socklen_t addrlen); + +#undef recv +#define recv qemu_recv_wrap +ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags); + +#undef recvfrom +#define recvfrom qemu_recvfrom_wrap +ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, + struct sockaddr *addr, socklen_t *addrlen); + +EXCEPTION_DISPOSITION +win32_close_exception_handler(struct _EXCEPTION_RECORD*, void*, + struct _CONTEXT*, void*); + +void *qemu_win32_map_alloc(size_t size, HANDLE *h, Error **errp); +void qemu_win32_map_free(void *ptr, HANDLE h, Error **errp); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/system/qtest.h b/include/system/qtest.h new file mode 100644 index 0000000000..c161d75165 --- /dev/null +++ b/include/system/qtest.h @@ -0,0 +1,39 @@ +/* + * Test Server + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QTEST_H +#define QTEST_H + +#include "chardev/char.h" + +extern bool qtest_allowed; + +static inline bool qtest_enabled(void) +{ + return qtest_allowed; +} + +#ifndef CONFIG_USER_ONLY +void qtest_send_prefix(CharBackend *chr); +void G_GNUC_PRINTF(2, 3) qtest_sendf(CharBackend *chr, const char *fmt, ...); +void qtest_set_command_cb(bool (*pc_cb)(CharBackend *chr, gchar **words)); +bool qtest_driver(void); + +void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **errp); + +void qtest_server_set_send_handler(void (*send)(void *, const char *), + void *opaque); +void qtest_server_inproc_recv(void *opaque, const char *buf); +#endif + +#endif diff --git a/include/system/replay.h b/include/system/replay.h new file mode 100644 index 0000000000..8926d8cf4b --- /dev/null +++ b/include/system/replay.h @@ -0,0 +1,183 @@ +/* + * QEMU replay (system interface) + * + * Copyright (c) 2010-2015 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#ifndef SYSTEM_REPLAY_H +#define SYSTEM_REPLAY_H + +#ifdef CONFIG_USER_ONLY +#error Cannot include this header from user emulation +#endif + +#include "exec/replay-core.h" +#include "qapi/qapi-types-misc.h" +#include "qapi/qapi-types-run-state.h" +#include "qapi/qapi-types-ui.h" +#include "block/aio.h" + +/* replay clock kinds */ +enum ReplayClockKind { + /* host_clock */ + REPLAY_CLOCK_HOST, + /* virtual_rt_clock */ + REPLAY_CLOCK_VIRTUAL_RT, + REPLAY_CLOCK_COUNT +}; +typedef enum ReplayClockKind ReplayClockKind; + +/* IDs of the checkpoints */ +enum ReplayCheckpoint { + CHECKPOINT_CLOCK_WARP_START, + CHECKPOINT_CLOCK_WARP_ACCOUNT, + CHECKPOINT_RESET_REQUESTED, + CHECKPOINT_SUSPEND_REQUESTED, + CHECKPOINT_CLOCK_VIRTUAL, + CHECKPOINT_CLOCK_HOST, + CHECKPOINT_CLOCK_VIRTUAL_RT, + CHECKPOINT_INIT, + CHECKPOINT_RESET, + CHECKPOINT_COUNT +}; +typedef enum ReplayCheckpoint ReplayCheckpoint; + +typedef struct ReplayNetState ReplayNetState; + +/* Name of the initial VM snapshot */ +extern char *replay_snapshot; + +/* Replay locking + * + * The locks are needed to protect the shared structures and log file + * when doing record/replay. They also are the main sync-point between + * the main-loop thread and the vCPU thread. This was a role + * previously filled by the BQL which has been busy trying to reduce + * its impact across the code. This ensures blocks of events stay + * sequential and reproducible. + */ + +void replay_mutex_lock(void); +void replay_mutex_unlock(void); + +/* Processing the instructions */ + +/*! Returns number of executed instructions. */ +uint64_t replay_get_current_icount(void); +/*! Returns number of instructions to execute in replay mode. */ +int replay_get_instructions(void); +/*! Updates instructions counter in replay mode. */ +void replay_account_executed_instructions(void); + +/* Processing clocks and other time sources */ + +/*! Save the specified clock */ +int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, + int64_t raw_icount); +/*! Read the specified clock from the log or return cached data */ +int64_t replay_read_clock(ReplayClockKind kind, int64_t raw_icount); +/*! Saves or reads the clock depending on the current replay mode. */ +#define REPLAY_CLOCK(clock, value) \ + !icount_enabled() ? (value) : \ + (replay_mode == REPLAY_MODE_PLAY \ + ? replay_read_clock((clock), icount_get_raw()) \ + : replay_mode == REPLAY_MODE_RECORD \ + ? replay_save_clock((clock), (value), icount_get_raw()) \ + : (value)) +#define REPLAY_CLOCK_LOCKED(clock, value) \ + !icount_enabled() ? (value) : \ + (replay_mode == REPLAY_MODE_PLAY \ + ? replay_read_clock((clock), icount_get_raw_locked()) \ + : replay_mode == REPLAY_MODE_RECORD \ + ? replay_save_clock((clock), (value), icount_get_raw_locked()) \ + : (value)) + +/* Events */ + +/*! Called when qemu shutdown is requested. */ +void replay_shutdown_request(ShutdownCause cause); +/*! Should be called at check points in the execution. + These check points are skipped, if they were not met. + Saves checkpoint in the SAVE mode and validates in the PLAY mode. + Returns 0 in PLAY mode if checkpoint was not found. + Returns 1 in all other cases. */ +bool replay_checkpoint(ReplayCheckpoint checkpoint); +/*! Used to determine that checkpoint or async event is pending. + Does not proceed to the next event in the log. */ +bool replay_has_event(void); +/* + * Processes the async events added to the queue (while recording) + * or reads the events from the file (while replaying). + */ +void replay_async_events(void); + +/* Asynchronous events queue */ + +/*! Enables storing events in the queue */ +void replay_enable_events(void); +/*! Returns true when saving events is enabled */ +bool replay_events_enabled(void); +/* Flushes events queue */ +void replay_flush_events(void); +/*! Adds bottom half event to the queue */ +void replay_bh_schedule_event(QEMUBH *bh); +/* Adds oneshot bottom half event to the queue */ +void replay_bh_schedule_oneshot_event(AioContext *ctx, + QEMUBHFunc *cb, void *opaque); +/*! Adds input event to the queue */ +void replay_input_event(QemuConsole *src, InputEvent *evt); +/*! Adds input sync event to the queue */ +void replay_input_sync_event(void); +/*! Adds block layer event to the queue */ +void replay_block_event(QEMUBH *bh, uint64_t id); +/*! Returns ID for the next block event */ +uint64_t blkreplay_next_id(void); + +/* Character device */ + +/*! Registers char driver to save it's events */ +void replay_register_char_driver(struct Chardev *chr); +/*! Saves write to char device event to the log */ +void replay_chr_be_write(struct Chardev *s, const uint8_t *buf, int len); +/*! Writes char write return value to the replay log. */ +void replay_char_write_event_save(int res, int offset); +/*! Reads char write return value from the replay log. */ +void replay_char_write_event_load(int *res, int *offset); +/*! Reads information about read_all character event. */ +int replay_char_read_all_load(uint8_t *buf); +/*! Writes character read_all error code into the replay log. */ +void replay_char_read_all_save_error(int res); +/*! Writes character read_all execution result into the replay log. */ +void replay_char_read_all_save_buf(uint8_t *buf, int offset); + +/* Network */ + +/*! Registers replay network filter attached to some backend. */ +ReplayNetState *replay_register_net(NetFilterState *nfs); +/*! Unregisters replay network filter. */ +void replay_unregister_net(ReplayNetState *rns); +/*! Called to write network packet to the replay log. */ +void replay_net_packet_event(ReplayNetState *rns, unsigned flags, + const struct iovec *iov, int iovcnt); + +/* Audio */ + +/*! Saves/restores number of played samples of audio out operation. */ +void replay_audio_out(size_t *played); +/*! Saves/restores recorded samples of audio in operation. */ +void replay_audio_in(size_t *recorded, void *samples, size_t *wpos, size_t size); + +/* VM state operations */ + +/*! Called at the start of execution. + Loads or saves initial vmstate depending on execution mode. */ +void replay_vmstate_init(void); +/*! Called to ensure that replay state is consistent and VM snapshot + can be created */ +bool replay_can_snapshot(void); + +#endif diff --git a/include/system/reset.h b/include/system/reset.h new file mode 100644 index 0000000000..97131d94cf --- /dev/null +++ b/include/system/reset.h @@ -0,0 +1,127 @@ +/* + * Reset handlers. + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2016 Red Hat, Inc. + * Copyright (c) 2024 Linaro, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_SYSTEM_RESET_H +#define QEMU_SYSTEM_RESET_H + +#include "hw/resettable.h" +#include "qapi/qapi-events-run-state.h" + +typedef void QEMUResetHandler(void *opaque); + +/** + * qemu_register_resettable: Register an object to be reset + * @obj: object to be reset: it must implement the Resettable interface + * + * Register @obj on the list of objects which will be reset when the + * simulation is reset. These objects will be reset in the order + * they were added, using the three-phase Resettable protocol, + * so first all objects go through the enter phase, then all objects + * go through the hold phase, and then finally all go through the + * exit phase. + * + * It is not permitted to register or unregister reset functions or + * resettable objects from within any of the reset phase methods of @obj. + * + * We assume that the caller holds the BQL. + */ +void qemu_register_resettable(Object *obj); + +/** + * qemu_unregister_resettable: Unregister an object to be reset + * @obj: object to unregister + * + * Remove @obj from the list of objects which are reset when the + * simulation is reset. It must have been previously added to + * the list via qemu_register_resettable(). + * + * We assume that the caller holds the BQL. + */ +void qemu_unregister_resettable(Object *obj); + +/** + * qemu_register_reset: Register a callback for system reset + * @func: function to call + * @opaque: opaque data to pass to @func + * + * Register @func on the list of functions which are called when the + * entire system is reset. Functions registered with this API and + * Resettable objects registered with qemu_register_resettable() are + * handled together, in the order in which they were registered. + * Functions registered with this API are called in the 'hold' phase + * of the 3-phase reset. + * + * In general this function should not be used in new code where possible; + * for instance, device model reset is better accomplished using the + * methods on DeviceState. + * + * It is not permitted to register or unregister reset functions or + * resettable objects from within the @func callback. + * + * We assume that the caller holds the BQL. + */ +void qemu_register_reset(QEMUResetHandler *func, void *opaque); + +/** + * qemu_register_reset_nosnapshotload: Register a callback for system reset + * @func: function to call + * @opaque: opaque data to pass to @func + * + * This is the same as qemu_register_reset(), except that @func is + * not called if the reason that the system is being reset is to + * put it into a clean state prior to loading a snapshot (i.e. for + * SHUTDOWN_CAUSE_SNAPSHOT_LOAD). + */ +void qemu_register_reset_nosnapshotload(QEMUResetHandler *func, void *opaque); + +/** + * qemu_unregister_reset: Unregister a system reset callback + * @func: function registered with qemu_register_reset() + * @opaque: the same opaque data that was passed to qemu_register_reset() + * + * Undo the effects of a qemu_register_reset(). The @func and @opaque + * must both match the arguments originally used with qemu_register_reset(). + * + * We assume that the caller holds the BQL. + */ +void qemu_unregister_reset(QEMUResetHandler *func, void *opaque); + +/** + * qemu_devices_reset: Perform a complete system reset + * @reason: type of the reset + * + * This function performs the low-level work needed to do a complete reset + * of the system (calling all the callbacks registered with + * qemu_register_reset() and resetting all the Resettable objects registered + * with qemu_register_resettable()). It should only be called by the code in a + * MachineClass reset method. + * + * If you want to trigger a system reset from, for instance, a device + * model, don't use this function. Use qemu_system_reset_request(). + */ +void qemu_devices_reset(ResetType type); + +#endif diff --git a/include/system/rng-random.h b/include/system/rng-random.h new file mode 100644 index 0000000000..0fdc6c6974 --- /dev/null +++ b/include/system/rng-random.h @@ -0,0 +1,21 @@ +/* + * QEMU Random Number Generator Backend + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_RNG_RANDOM_H +#define QEMU_RNG_RANDOM_H + +#include "qom/object.h" + +#define TYPE_RNG_RANDOM "rng-random" +OBJECT_DECLARE_SIMPLE_TYPE(RngRandom, RNG_RANDOM) + + +#endif diff --git a/include/system/rng.h b/include/system/rng.h new file mode 100644 index 0000000000..e383f87d20 --- /dev/null +++ b/include/system/rng.h @@ -0,0 +1,89 @@ +/* + * QEMU Random Number Generator Backend + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_RNG_H +#define QEMU_RNG_H + +#include "qemu/queue.h" +#include "qom/object.h" + +#define TYPE_RNG_BACKEND "rng-backend" +OBJECT_DECLARE_TYPE(RngBackend, RngBackendClass, + RNG_BACKEND) + +#define TYPE_RNG_BUILTIN "rng-builtin" + +typedef struct RngRequest RngRequest; + +typedef void (EntropyReceiveFunc)(void *opaque, + const void *data, + size_t size); + +struct RngRequest +{ + EntropyReceiveFunc *receive_entropy; + uint8_t *data; + void *opaque; + size_t offset; + size_t size; + QSIMPLEQ_ENTRY(RngRequest) next; +}; + +struct RngBackendClass +{ + ObjectClass parent_class; + + void (*request_entropy)(RngBackend *s, RngRequest *req); + + void (*opened)(RngBackend *s, Error **errp); +}; + +struct RngBackend +{ + Object parent; + + /*< protected >*/ + bool opened; + QSIMPLEQ_HEAD(, RngRequest) requests; +}; + + +/** + * rng_backend_request_entropy: + * @s: the backend to request entropy from + * @size: the number of bytes of data to request + * @receive_entropy: a function to be invoked when entropy is available + * @opaque: data that should be passed to @receive_entropy + * + * This function is used by the front-end to request entropy from an entropy + * source. This function can be called multiple times before @receive_entropy + * is invoked with different values of @receive_entropy and @opaque. The + * backend will queue each request and handle appropriately. + * + * The backend does not need to pass the full amount of data to @receive_entropy + * but will pass a value greater than 0. + */ +void rng_backend_request_entropy(RngBackend *s, size_t size, + EntropyReceiveFunc *receive_entropy, + void *opaque); + +/** + * rng_backend_free_request: + * @s: the backend that created the request + * @req: the request to finalize + * + * Used by child rng backend classes to finalize requests once they've been + * processed. The request is removed from the list of active requests and + * deleted. + */ +void rng_backend_finalize_request(RngBackend *s, RngRequest *req); +#endif diff --git a/include/system/rtc.h b/include/system/rtc.h new file mode 100644 index 0000000000..cde83fab15 --- /dev/null +++ b/include/system/rtc.h @@ -0,0 +1,58 @@ +/* + * RTC configuration and clock read + * + * Copyright (c) 2003-2021 QEMU contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef SYSTEM_RTC_H +#define SYSTEM_RTC_H + +/** + * qemu_get_timedate: Get the current RTC time + * @tm: struct tm to fill in with RTC time + * @offset: offset in seconds to adjust the RTC time by before + * converting to struct tm format. + * + * This function fills in @tm with the current RTC time, as adjusted + * by @offset (for example, if @offset is 3600 then the returned time/date + * will be one hour further ahead than the current RTC time). + * + * The usual use is by RTC device models, which should call this function + * to find the time/date value that they should return to the guest + * when it reads the RTC registers. + * + * The behaviour of the clock whose value this function returns will + * depend on the -rtc command line option passed by the user. + */ +void qemu_get_timedate(struct tm *tm, time_t offset); + +/** + * qemu_timedate_diff: Return difference between a struct tm and the RTC + * @tm: struct tm containing the date/time to compare against + * + * Returns the difference in seconds between the RTC clock time + * and the date/time specified in @tm. For example, if @tm specifies + * a timestamp one hour further ahead than the current RTC time + * then this function will return 3600. + */ +time_t qemu_timedate_diff(struct tm *tm); + +#endif diff --git a/include/system/runstate-action.h b/include/system/runstate-action.h new file mode 100644 index 0000000000..db4e3099ae --- /dev/null +++ b/include/system/runstate-action.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2020 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef RUNSTATE_ACTION_H +#define RUNSTATE_ACTION_H + +#include "qapi/qapi-commands-run-state.h" + +/* in system/runstate-action.c */ +extern RebootAction reboot_action; +extern ShutdownAction shutdown_action; +extern PanicAction panic_action; + +#endif /* RUNSTATE_ACTION_H */ diff --git a/include/system/runstate.h b/include/system/runstate.h new file mode 100644 index 0000000000..bffc3719d4 --- /dev/null +++ b/include/system/runstate.h @@ -0,0 +1,112 @@ +#ifndef SYSTEM_RUNSTATE_H +#define SYSTEM_RUNSTATE_H + +#include "qapi/qapi-types-run-state.h" +#include "qemu/notify.h" + +bool runstate_check(RunState state); +void runstate_set(RunState new_state); +RunState runstate_get(void); +bool runstate_is_running(void); +bool runstate_needs_reset(void); +void runstate_replay_enable(void); + +typedef void VMChangeStateHandler(void *opaque, bool running, RunState state); + +VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, + void *opaque); +VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( + VMChangeStateHandler *cb, void *opaque, int priority); +VMChangeStateEntry * +qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, + VMChangeStateHandler *prepare_cb, + void *opaque, int priority); +VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, + VMChangeStateHandler *cb, + void *opaque); +VMChangeStateEntry *qdev_add_vm_change_state_handler_full( + DeviceState *dev, VMChangeStateHandler *cb, + VMChangeStateHandler *prepare_cb, void *opaque); +void qemu_del_vm_change_state_handler(VMChangeStateEntry *e); +/** + * vm_state_notify: Notify the state of the VM + * + * @running: whether the VM is running or not. + * @state: the #RunState of the VM. + */ +void vm_state_notify(bool running, RunState state); + +static inline bool shutdown_caused_by_guest(ShutdownCause cause) +{ + return cause >= SHUTDOWN_CAUSE_GUEST_SHUTDOWN; +} + +/* + * In a "live" state, the vcpu clock is ticking, and the runstate notifiers + * think we are running. + */ +static inline bool runstate_is_live(RunState state) +{ + return state == RUN_STATE_RUNNING || state == RUN_STATE_SUSPENDED; +} + +void vm_start(void); + +/** + * vm_prepare_start: Prepare for starting/resuming the VM + * + * @step_pending: whether any of the CPUs is about to be single-stepped by gdb + */ +int vm_prepare_start(bool step_pending); + +/** + * vm_resume: If @state is a live state, start the vm and set the state, + * else just set the state. + * + * @state: the state to restore + */ +void vm_resume(RunState state); + +int vm_stop(RunState state); +int vm_stop_force_state(RunState state); +int vm_shutdown(void); +void vm_set_suspended(bool suspended); +bool vm_get_suspended(void); + +typedef enum WakeupReason { + /* Always keep QEMU_WAKEUP_REASON_NONE = 0 */ + QEMU_WAKEUP_REASON_NONE = 0, + QEMU_WAKEUP_REASON_RTC, + QEMU_WAKEUP_REASON_PMTIMER, + QEMU_WAKEUP_REASON_OTHER, +} WakeupReason; + +void qemu_system_reset_request(ShutdownCause reason); +void qemu_system_suspend_request(void); +void qemu_register_suspend_notifier(Notifier *notifier); +bool qemu_wakeup_suspend_enabled(void); +void qemu_system_wakeup_request(WakeupReason reason, Error **errp); +void qemu_system_wakeup_enable(WakeupReason reason, bool enabled); +void qemu_register_wakeup_notifier(Notifier *notifier); +void qemu_register_wakeup_support(void); +void qemu_system_shutdown_request_with_code(ShutdownCause reason, + int exit_code); +void qemu_system_shutdown_request(ShutdownCause reason); +void qemu_system_powerdown_request(void); +void qemu_register_powerdown_notifier(Notifier *notifier); +void qemu_register_shutdown_notifier(Notifier *notifier); +void qemu_system_debug_request(void); +void qemu_system_vmstop_request(RunState reason); +void qemu_system_vmstop_request_prepare(void); +bool qemu_vmstop_requested(RunState *r); +ShutdownCause qemu_shutdown_requested_get(void); +ShutdownCause qemu_reset_requested_get(void); +void qemu_system_killed(int signal, pid_t pid); +void qemu_system_reset(ShutdownCause reason); +void qemu_system_guest_panicked(GuestPanicInformation *info); +void qemu_system_guest_crashloaded(GuestPanicInformation *info); +void qemu_system_guest_pvshutdown(void); +bool qemu_system_dump_in_progress(void); + +#endif + diff --git a/include/system/seccomp.h b/include/system/seccomp.h new file mode 100644 index 0000000000..fe859894f6 --- /dev/null +++ b/include/system/seccomp.h @@ -0,0 +1,26 @@ +/* + * QEMU seccomp mode 2 support with libseccomp + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Eduardo Otubo <eotubo@br.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#ifndef QEMU_SECCOMP_H +#define QEMU_SECCOMP_H + +#define QEMU_SECCOMP_SET_DEFAULT (1 << 0) +#define QEMU_SECCOMP_SET_OBSOLETE (1 << 1) +#define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2) +#define QEMU_SECCOMP_SET_SPAWN (1 << 3) +#define QEMU_SECCOMP_SET_RESOURCECTL (1 << 4) + +int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp); + +#endif diff --git a/include/system/spdm-socket.h b/include/system/spdm-socket.h new file mode 100644 index 0000000000..5d8bd9aa4e --- /dev/null +++ b/include/system/spdm-socket.h @@ -0,0 +1,74 @@ +/* + * QEMU SPDM socket support + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef SPDM_REQUESTER_H +#define SPDM_REQUESTER_H + +/** + * spdm_socket_connect: connect to an external SPDM socket + * @port: port to connect to + * @errp: error object handle + * + * This will connect to an external SPDM socket server. On error + * it will return -1 and errp will be set. On success this function + * will return the socket number. + */ +int spdm_socket_connect(uint16_t port, Error **errp); + +/** + * spdm_socket_rsp: send and receive a message to a SPDM server + * @socket: socket returned from spdm_socket_connect() + * @transport_type: SPDM_SOCKET_TRANSPORT_TYPE_* macro + * @req: request buffer + * @req_len: request buffer length + * @rsp: response buffer + * @rsp_len: response buffer length + * + * Send platform data to a SPDM server on socket and then receive + * a response. + */ +uint32_t spdm_socket_rsp(const int socket, uint32_t transport_type, + void *req, uint32_t req_len, + void *rsp, uint32_t rsp_len); + +/** + * spdm_socket_close: send a shutdown command to the server + * @socket: socket returned from spdm_socket_connect() + * @transport_type: SPDM_SOCKET_TRANSPORT_TYPE_* macro + * + * This will issue a shutdown command to the server. + */ +void spdm_socket_close(const int socket, uint32_t transport_type); + +#define SPDM_SOCKET_COMMAND_NORMAL 0x0001 +#define SPDM_SOCKET_COMMAND_OOB_ENCAP_KEY_UPDATE 0x8001 +#define SPDM_SOCKET_COMMAND_CONTINUE 0xFFFD +#define SPDM_SOCKET_COMMAND_SHUTDOWN 0xFFFE +#define SPDM_SOCKET_COMMAND_UNKOWN 0xFFFF +#define SPDM_SOCKET_COMMAND_TEST 0xDEAD + +#define SPDM_SOCKET_TRANSPORT_TYPE_MCTP 0x01 +#define SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE 0x02 + +#define SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE 0x1200 + +#endif diff --git a/include/system/stats.h b/include/system/stats.h new file mode 100644 index 0000000000..42c236c795 --- /dev/null +++ b/include/system/stats.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2022 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef STATS_H +#define STATS_H + +#include "qapi/qapi-types-stats.h" + +typedef void StatRetrieveFunc(StatsResultList **result, StatsTarget target, + strList *names, strList *targets, Error **errp); +typedef void SchemaRetrieveFunc(StatsSchemaList **result, Error **errp); + +/* + * Register callbacks for the QMP query-stats command. + * + * @provider: stats provider checked against QMP command arguments + * @stats_fn: routine to query stats: + * @schema_fn: routine to query stat schemas: + */ +void add_stats_callbacks(StatsProvider provider, + StatRetrieveFunc *stats_fn, + SchemaRetrieveFunc *schemas_fn); + +/* + * Helper routines for adding stats entries to the results lists. + */ +void add_stats_entry(StatsResultList **, StatsProvider, const char *id, + StatsList *stats_list); +void add_stats_schema(StatsSchemaList **, StatsProvider, StatsTarget, + StatsSchemaValueList *); + +/* + * True if a string matches the filter passed to the stats_fn callback, + * false otherwise. + * + * Note that an empty list means no filtering, i.e. all strings will + * return true. + */ +bool apply_str_list_filter(const char *string, strList *list); + +#endif /* STATS_H */ diff --git a/include/system/system.h b/include/system/system.h new file mode 100644 index 0000000000..5364ad4f27 --- /dev/null +++ b/include/system/system.h @@ -0,0 +1,114 @@ +#ifndef SYSTEM_H +#define SYSTEM_H +/* Misc. things related to the system emulator. */ + +#include "qemu/timer.h" +#include "qemu/notify.h" +#include "qemu/uuid.h" + +/* vl.c */ + +extern int only_migratable; +extern const char *qemu_name; +extern QemuUUID qemu_uuid; +extern bool qemu_uuid_set; + +const char *qemu_get_vm_name(void); + +void qemu_add_exit_notifier(Notifier *notify); +void qemu_remove_exit_notifier(Notifier *notify); + +void qemu_add_machine_init_done_notifier(Notifier *notify); +void qemu_remove_machine_init_done_notifier(Notifier *notify); + +void configure_rtc(QemuOpts *opts); + +void qemu_init_subsystems(void); + +extern int autostart; + +typedef enum { + VGA_NONE, VGA_STD, VGA_CIRRUS, VGA_VMWARE, VGA_XENFB, VGA_QXL, + VGA_TCX, VGA_CG3, VGA_DEVICE, VGA_VIRTIO, + VGA_TYPE_MAX, +} VGAInterfaceType; + +extern int vga_interface_type; +extern bool vga_interface_created; + +extern int graphic_width; +extern int graphic_height; +extern int graphic_depth; +extern int display_opengl; +extern const char *keyboard_layout; +extern int old_param; +extern uint8_t *boot_splash_filedata; +extern bool enable_mlock; +extern bool enable_cpu_pm; +extern QEMUClockType rtc_clock; + +#define MAX_OPTION_ROMS 16 +typedef struct QEMUOptionRom { + const char *name; + int32_t bootindex; +} QEMUOptionRom; +extern QEMUOptionRom option_rom[MAX_OPTION_ROMS]; +extern int nb_option_roms; + +#define MAX_PROM_ENVS 128 +extern const char *prom_envs[MAX_PROM_ENVS]; +extern unsigned int nb_prom_envs; + +/* serial ports */ + +/* Return the Chardev for serial port i, or NULL if none */ +Chardev *serial_hd(int i); + +/* parallel ports */ + +#define MAX_PARALLEL_PORTS 3 + +extern Chardev *parallel_hds[MAX_PARALLEL_PORTS]; + +void add_boot_device_path(int32_t bootindex, DeviceState *dev, + const char *suffix); +char *get_boot_devices_list(size_t *size); + +DeviceState *get_boot_device(uint32_t position); +void check_boot_index(int32_t bootindex, Error **errp); +void del_boot_device_path(DeviceState *dev, const char *suffix); +void device_add_bootindex_property(Object *obj, int32_t *bootindex, + const char *name, const char *suffix, + DeviceState *dev); +void restore_boot_order(void *opaque); +void validate_bootdevices(const char *devices, Error **errp); +void add_boot_device_lchs(DeviceState *dev, const char *suffix, + uint32_t lcyls, uint32_t lheads, uint32_t lsecs); +void del_boot_device_lchs(DeviceState *dev, const char *suffix); +char *get_boot_devices_lchs_list(size_t *size); + +/* handler to set the boot_device order for a specific type of MachineClass */ +typedef void QEMUBootSetHandler(void *opaque, const char *boot_order, + Error **errp); +void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque); +void qemu_boot_set(const char *boot_order, Error **errp); + +bool defaults_enabled(void); + +void qemu_init(int argc, char **argv); +int qemu_main_loop(void); +void qemu_cleanup(int); + +extern QemuOptsList qemu_legacy_drive_opts; +extern QemuOptsList qemu_common_drive_opts; +extern QemuOptsList qemu_drive_opts; +extern QemuOptsList bdrv_runtime_opts; +extern QemuOptsList qemu_chardev_opts; +extern QemuOptsList qemu_device_opts; +extern QemuOptsList qemu_netdev_opts; +extern QemuOptsList qemu_nic_opts; +extern QemuOptsList qemu_net_opts; +extern QemuOptsList qemu_global_opts; +extern QemuOptsList qemu_semihosting_config_opts; + +#endif diff --git a/include/system/tcg.h b/include/system/tcg.h new file mode 100644 index 0000000000..73229648c6 --- /dev/null +++ b/include/system/tcg.h @@ -0,0 +1,20 @@ +/* + * QEMU TCG support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +/* header to be included in non-TCG-specific code */ + +#ifndef SYSTEM_TCG_H +#define SYSTEM_TCG_H + +#ifdef CONFIG_TCG +extern bool tcg_allowed; +#define tcg_enabled() (tcg_allowed) +#else +#define tcg_enabled() 0 +#endif + +#endif diff --git a/include/system/tpm.h b/include/system/tpm.h new file mode 100644 index 0000000000..1ee568b3b6 --- /dev/null +++ b/include/system/tpm.h @@ -0,0 +1,94 @@ +/* + * Public TPM functions + * + * Copyright (C) 2011-2013 IBM Corporation + * + * Authors: + * Stefan Berger <stefanb@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_TPM_H +#define QEMU_TPM_H + +#include "qapi/qapi-types-tpm.h" +#include "qom/object.h" + +#ifdef CONFIG_TPM + +int tpm_config_parse(QemuOptsList *opts_list, const char *optstr); +int tpm_init(void); +void tpm_cleanup(void); + +typedef enum TPMVersion { + TPM_VERSION_UNSPEC = 0, + TPM_VERSION_1_2 = 1, + TPM_VERSION_2_0 = 2, +} TPMVersion; + +#define TYPE_TPM_IF "tpm-if" +typedef struct TPMIfClass TPMIfClass; +DECLARE_CLASS_CHECKERS(TPMIfClass, TPM_IF, + TYPE_TPM_IF) +#define TPM_IF(obj) \ + INTERFACE_CHECK(TPMIf, (obj), TYPE_TPM_IF) + +typedef struct TPMIf TPMIf; + +struct TPMIfClass { + InterfaceClass parent_class; + + enum TpmModel model; + void (*request_completed)(TPMIf *obj, int ret); + enum TPMVersion (*get_version)(TPMIf *obj); +}; + +#define TYPE_TPM_TIS_ISA "tpm-tis" +#define TYPE_TPM_TIS_SYSBUS "tpm-tis-device" +#define TYPE_TPM_CRB "tpm-crb" +#define TYPE_TPM_SPAPR "tpm-spapr" +#define TYPE_TPM_TIS_I2C "tpm-tis-i2c" + +#define TPM_IS_TIS_ISA(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_ISA) +#define TPM_IS_TIS_SYSBUS(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_SYSBUS) +#define TPM_IS_CRB(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB) +#define TPM_IS_SPAPR(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_SPAPR) +#define TPM_IS_TIS_I2C(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_I2C) + +/* returns NULL unless there is exactly one TPM device */ +static inline TPMIf *tpm_find(void) +{ + Object *obj = object_resolve_path_type("", TYPE_TPM_IF, NULL); + + return TPM_IF(obj); +} + +static inline TPMVersion tpm_get_version(TPMIf *ti) +{ + if (!ti) { + return TPM_VERSION_UNSPEC; + } + + return TPM_IF_GET_CLASS(ti)->get_version(ti); +} + +#else /* CONFIG_TPM */ + +#define tpm_init() (0) +#define tpm_cleanup() + +/* needed for an alignment check in non-tpm code */ +static inline Object *TPM_IS_CRB(Object *obj) +{ + return NULL; +} + +#endif /* CONFIG_TPM */ + +#endif /* QEMU_TPM_H */ diff --git a/include/system/tpm_backend.h b/include/system/tpm_backend.h new file mode 100644 index 0000000000..01b11f629c --- /dev/null +++ b/include/system/tpm_backend.h @@ -0,0 +1,216 @@ +/* + * QEMU TPM Backend + * + * Copyright IBM, Corp. 2013 + * + * Authors: + * Stefan Berger <stefanb@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef TPM_BACKEND_H +#define TPM_BACKEND_H + +#include "qom/object.h" +#include "qemu/option.h" +#include "system/tpm.h" +#include "qapi/error.h" + +#ifdef CONFIG_TPM + +#define TYPE_TPM_BACKEND "tpm-backend" +OBJECT_DECLARE_TYPE(TPMBackend, TPMBackendClass, + TPM_BACKEND) + + +typedef struct TPMBackendCmd { + uint8_t locty; + const uint8_t *in; + uint32_t in_len; + uint8_t *out; + uint32_t out_len; + bool selftest_done; +} TPMBackendCmd; + +struct TPMBackend { + Object parent; + + /*< protected >*/ + TPMIf *tpmif; + bool opened; + bool had_startup_error; + TPMBackendCmd *cmd; + + /* <public> */ + char *id; + + QLIST_ENTRY(TPMBackend) list; +}; + +struct TPMBackendClass { + ObjectClass parent_class; + + enum TpmType type; + const QemuOptDesc *opts; + /* get a descriptive text of the backend to display to the user */ + const char *desc; + + TPMBackend *(*create)(QemuOpts *opts); + + /* start up the TPM on the backend - optional */ + int (*startup_tpm)(TPMBackend *t, size_t buffersize); + + /* optional */ + void (*reset)(TPMBackend *t); + + void (*cancel_cmd)(TPMBackend *t); + + /* optional */ + bool (*get_tpm_established_flag)(TPMBackend *t); + + /* optional */ + int (*reset_tpm_established_flag)(TPMBackend *t, uint8_t locty); + + TPMVersion (*get_tpm_version)(TPMBackend *t); + + size_t (*get_buffer_size)(TPMBackend *t); + + TpmTypeOptions *(*get_tpm_options)(TPMBackend *t); + + void (*handle_request)(TPMBackend *s, TPMBackendCmd *cmd, Error **errp); +}; + +/** + * tpm_backend_get_type: + * @s: the backend + * + * Returns the TpmType of the backend. + */ +enum TpmType tpm_backend_get_type(TPMBackend *s); + +/** + * tpm_backend_init: + * @s: the backend to initialized + * @tpmif: TPM interface + * @datacb: callback for sending data to frontend + * @errp: a pointer to return the #Error object if an error occurs. + * + * Initialize the backend with the given variables. + * + * Returns 0 on success. + */ +int tpm_backend_init(TPMBackend *s, TPMIf *tpmif, Error **errp); + +/** + * tpm_backend_startup_tpm: + * @s: the backend whose TPM support is to be started + * @buffersize: the buffer size the TPM is supposed to use, + * 0 to leave it as-is + * + * Returns 0 on success. + */ +int tpm_backend_startup_tpm(TPMBackend *s, size_t buffersize); + +/** + * tpm_backend_had_startup_error: + * @s: the backend to query for a startup error + * + * Check whether the backend had an error during startup. Returns + * false if no error occurred and the backend can be used, true + * otherwise. + */ +bool tpm_backend_had_startup_error(TPMBackend *s); + +/** + * tpm_backend_deliver_request: + * @s: the backend to send the request to + * @cmd: the command to deliver + * + * Send a request to the backend. The backend will then send the request + * to the TPM implementation. + */ +void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd); + +/** + * tpm_backend_reset: + * @s: the backend to reset + * + * Reset the backend into a well defined state with all previous errors + * reset. + */ +void tpm_backend_reset(TPMBackend *s); + +/** + * tpm_backend_cancel_cmd: + * @s: the backend + * + * Cancel any ongoing command being processed by the TPM implementation + * on behalf of the QEMU guest. + */ +void tpm_backend_cancel_cmd(TPMBackend *s); + +/** + * tpm_backend_get_tpm_established_flag: + * @s: the backend + * + * Get the TPM establishment flag. This function may be called very + * frequently by the frontend since for example in the TIS implementation + * this flag is part of a register. + */ +bool tpm_backend_get_tpm_established_flag(TPMBackend *s); + +/** + * tpm_backend_reset_tpm_established_flag: + * @s: the backend + * @locty: the locality number + * + * Reset the TPM establishment flag. + */ +int tpm_backend_reset_tpm_established_flag(TPMBackend *s, uint8_t locty); + +/** + * tpm_backend_get_tpm_version: + * @s: the backend to call into + * + * Get the TPM Version that is emulated at the backend. + * + * Returns TPMVersion. + */ +TPMVersion tpm_backend_get_tpm_version(TPMBackend *s); + +/** + * tpm_backend_get_buffer_size: + * @s: the backend to call into + * + * Get the TPM's buffer size. + * + * Returns buffer size. + */ +size_t tpm_backend_get_buffer_size(TPMBackend *s); + +/** + * tpm_backend_finish_sync: + * @s: the backend to call into + * + * Finish the pending command synchronously (this will call aio_poll() + * on qemu main AIOContext until it ends) + */ +void tpm_backend_finish_sync(TPMBackend *s); + +/** + * tpm_backend_query_tpm: + * @s: the backend + * + * Query backend tpm info + * + * Returns newly allocated TPMInfo + */ +TPMInfo *tpm_backend_query_tpm(TPMBackend *s); + +TPMBackend *qemu_find_tpm_be(const char *id); + +#endif /* CONFIG_TPM */ + +#endif /* TPM_BACKEND_H */ diff --git a/include/system/tpm_util.h b/include/system/tpm_util.h new file mode 100644 index 0000000000..1858693225 --- /dev/null +++ b/include/system/tpm_util.h @@ -0,0 +1,72 @@ +/* + * TPM utility functions + * + * Copyright (c) 2010 - 2015 IBM Corporation + * Authors: + * Stefan Berger <stefanb@us.ibm.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#ifndef SYSTEM_TPM_UTIL_H +#define SYSTEM_TPM_UTIL_H + +#include "system/tpm.h" +#include "qemu/bswap.h" + +void tpm_util_write_fatal_error_response(uint8_t *out, uint32_t out_len); + +bool tpm_util_is_selftest(const uint8_t *in, uint32_t in_len); + +int tpm_util_test_tpmdev(int tpm_fd, TPMVersion *tpm_version); + +static inline uint16_t tpm_cmd_get_tag(const void *b) +{ + return lduw_be_p(b); +} + +static inline void tpm_cmd_set_tag(void *b, uint16_t tag) +{ + stw_be_p(b, tag); +} + +static inline uint32_t tpm_cmd_get_size(const void *b) +{ + return ldl_be_p(b + 2); +} + +static inline void tpm_cmd_set_size(void *b, uint32_t size) +{ + stl_be_p(b + 2, size); +} + +static inline uint32_t tpm_cmd_get_ordinal(const void *b) +{ + return ldl_be_p(b + 6); +} + +static inline uint32_t tpm_cmd_get_errcode(const void *b) +{ + return ldl_be_p(b + 6); +} + +static inline void tpm_cmd_set_error(void *b, uint32_t error) +{ + stl_be_p(b + 6, error); +} + +void tpm_util_show_buffer(const unsigned char *buffer, + size_t buffer_size, const char *string); + +#endif /* SYSTEM_TPM_UTIL_H */ diff --git a/include/system/vhost-user-backend.h b/include/system/vhost-user-backend.h new file mode 100644 index 0000000000..327b0b84f1 --- /dev/null +++ b/include/system/vhost-user-backend.h @@ -0,0 +1,48 @@ +/* + * QEMU vhost-user backend + * + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Marc-AndrĂ© Lureau <marcandre.lureau@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_VHOST_USER_BACKEND_H +#define QEMU_VHOST_USER_BACKEND_H + +#include "qom/object.h" +#include "exec/memory.h" +#include "qemu/option.h" +#include "qemu/bitmap.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "chardev/char-fe.h" +#include "io/channel.h" + +#define TYPE_VHOST_USER_BACKEND "vhost-user-backend" +OBJECT_DECLARE_SIMPLE_TYPE(VhostUserBackend, + VHOST_USER_BACKEND) + + + +struct VhostUserBackend { + /* private */ + Object parent; + + char *chr_name; + CharBackend chr; + VhostUserState vhost_user; + struct vhost_dev dev; + VirtIODevice *vdev; + bool started; + bool completed; +}; + +int vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, + unsigned nvqs, Error **errp); +void vhost_user_backend_start(VhostUserBackend *b); +void vhost_user_backend_stop(VhostUserBackend *b); + +#endif diff --git a/include/system/watchdog.h b/include/system/watchdog.h new file mode 100644 index 0000000000..745c89b02b --- /dev/null +++ b/include/system/watchdog.h @@ -0,0 +1,32 @@ +/* + * Virtual hardware watchdog. + * + * Copyright (C) 2009 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * By Richard W.M. Jones (rjones@redhat.com). + */ + +#ifndef QEMU_WATCHDOG_H +#define QEMU_WATCHDOG_H + +#include "qemu/queue.h" +#include "qapi/qapi-types-run-state.h" + +/* in hw/watchdog.c */ +WatchdogAction get_watchdog_action(void); +void watchdog_perform_action(void); + +#endif /* QEMU_WATCHDOG_H */ diff --git a/include/system/whpx.h b/include/system/whpx.h new file mode 100644 index 0000000000..00ff409b68 --- /dev/null +++ b/include/system/whpx.h @@ -0,0 +1,34 @@ +/* + * QEMU Windows Hypervisor Platform accelerator (WHPX) support + * + * Copyright Microsoft, Corp. 2017 + * + * Authors: + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* header to be included in non-WHPX-specific code */ + +#ifndef QEMU_WHPX_H +#define QEMU_WHPX_H + +#ifdef COMPILING_PER_TARGET + +#ifdef CONFIG_WHPX + +int whpx_enabled(void); +bool whpx_apic_in_platform(void); + +#else /* CONFIG_WHPX */ + +#define whpx_enabled() (0) +#define whpx_apic_in_platform() (0) + +#endif /* CONFIG_WHPX */ + +#endif /* COMPILING_PER_TARGET */ + +#endif /* QEMU_WHPX_H */ diff --git a/include/system/xen-mapcache.h b/include/system/xen-mapcache.h new file mode 100644 index 0000000000..b68f196ddd --- /dev/null +++ b/include/system/xen-mapcache.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef XEN_MAPCACHE_H +#define XEN_MAPCACHE_H + +#include "exec/cpu-common.h" +#include "system/xen.h" + +typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset, + ram_addr_t size); +#ifdef CONFIG_XEN_IS_POSSIBLE + +void xen_map_cache_init(phys_offset_to_gaddr_t f, + void *opaque); +uint8_t *xen_map_cache(MemoryRegion *mr, hwaddr phys_addr, hwaddr size, + ram_addr_t ram_addr_offset, + uint8_t lock, bool dma, + bool is_write); +ram_addr_t xen_ram_addr_from_mapcache(void *ptr); +void xen_invalidate_map_cache_entry(uint8_t *buffer); +void xen_invalidate_map_cache(void); +uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr, + hwaddr new_phys_addr, + hwaddr size); +#else + +static inline void xen_map_cache_init(phys_offset_to_gaddr_t f, + void *opaque) +{ +} + +static inline uint8_t *xen_map_cache(MemoryRegion *mr, + hwaddr phys_addr, + hwaddr size, + ram_addr_t ram_addr_offset, + uint8_t lock, + bool dma, + bool is_write) +{ + abort(); +} + +static inline ram_addr_t xen_ram_addr_from_mapcache(void *ptr) +{ + abort(); +} + +static inline void xen_invalidate_map_cache_entry(uint8_t *buffer) +{ +} + +static inline void xen_invalidate_map_cache(void) +{ +} + +static inline uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr, + hwaddr new_phys_addr, + hwaddr size) +{ + abort(); +} + +#endif + +#endif /* XEN_MAPCACHE_H */ diff --git a/include/system/xen.h b/include/system/xen.h new file mode 100644 index 0000000000..990c19a8ef --- /dev/null +++ b/include/system/xen.h @@ -0,0 +1,54 @@ +/* + * QEMU Xen support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +/* header to be included in non-Xen-specific code */ + +#ifndef SYSTEM_XEN_H +#define SYSTEM_XEN_H + +#ifdef CONFIG_USER_ONLY +#error Cannot include system/xen.h from user emulation +#endif + +#include "exec/cpu-common.h" + +#ifdef COMPILING_PER_TARGET +# ifdef CONFIG_XEN +# define CONFIG_XEN_IS_POSSIBLE +# endif +#else +# define CONFIG_XEN_IS_POSSIBLE +#endif /* COMPILING_PER_TARGET */ + +#ifdef CONFIG_XEN_IS_POSSIBLE + +extern bool xen_allowed; + +#define xen_enabled() (xen_allowed) + +void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length); +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, + struct MemoryRegion *mr, Error **errp); + +#else /* !CONFIG_XEN_IS_POSSIBLE */ + +#define xen_enabled() 0 +static inline void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) +{ + /* nothing */ +} +static inline void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, + MemoryRegion *mr, Error **errp) +{ + g_assert_not_reached(); +} + +#endif /* CONFIG_XEN_IS_POSSIBLE */ + +bool xen_mr_is_memory(MemoryRegion *mr); +bool xen_mr_is_grants(MemoryRegion *mr); +#endif |