60 files changed, 6113 insertions, 0 deletions
diff --git a/include/system/accel-blocker.h b/include/system/accel-blocker.h
new file mode 100644
index 0000000000..e10099d6a9
--- /dev/null
+++ b/include/system/accel-blocker.h
@@ -0,0 +1,55 @@
+/*
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
+ * running ones finish.
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
+ * release the BQL.
+ *
+ *  Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito       <eesposit@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef ACCEL_BLOCKER_H
+#define ACCEL_BLOCKER_H
+
+#include "system/cpus.h"
+
+void accel_blocker_init(void);
+
+/*
+ * accel_{cpu_}ioctl_begin/end:
+ * Mark when ioctl is about to run or just finished.
+ *
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
+ * called, preventing new ioctls to run. They will continue only after
+ * accel_ioctl_inibith_end().
+ */
+void accel_ioctl_begin(void);
+void accel_ioctl_end(void);
+void accel_cpu_ioctl_begin(CPUState *cpu);
+void accel_cpu_ioctl_end(CPUState *cpu);
+
+/*
+ * accel_ioctl_inhibit_begin: start critical section
+ *
+ * This function makes sure that:
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
+ * 2) wait that all ioctls that were already running reach
+ *    accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
+ *
+ * This allows the caller to access shared data or perform operations without
+ * worrying of concurrent vcpus accesses.
+ */
+void accel_ioctl_inhibit_begin(void);
+
+/*
+ * accel_ioctl_inhibit_end: end critical section started by
+ * accel_ioctl_inhibit_begin()
+ *
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
+ */
+void accel_ioctl_inhibit_end(void);
+
+#endif /* ACCEL_BLOCKER_H */
diff --git a/include/system/accel-ops.h b/include/system/accel-ops.h
new file mode 100644
index 0000000000..a088672230
--- /dev/null
+++ b/include/system/accel-ops.h
@@ -0,0 +1,74 @@
+/*
+ * Accelerator OPS, used for cpus.c module
+ *
+ * Copyright 2021 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef ACCEL_OPS_H
+#define ACCEL_OPS_H
+
+#include "exec/cpu-common.h"
+#include "qom/object.h"
+
+#define ACCEL_OPS_SUFFIX "-ops"
+#define TYPE_ACCEL_OPS "accel" ACCEL_OPS_SUFFIX
+#define ACCEL_OPS_NAME(name) (name "-" TYPE_ACCEL_OPS)
+
+typedef struct AccelOpsClass AccelOpsClass;
+DECLARE_CLASS_CHECKERS(AccelOpsClass, ACCEL_OPS, TYPE_ACCEL_OPS)
+
+/**
+ * struct AccelOpsClass - accelerator interfaces
+ *
+ * This structure is used to abstract accelerator differences from the
+ * core CPU code. Not all have to be implemented.
+ */
+struct AccelOpsClass {
+    /*< private >*/
+    ObjectClass parent_class;
+    /*< public >*/
+
+    /* initialization function called when accel is chosen */
+    void (*ops_init)(AccelOpsClass *ops);
+
+    bool (*cpus_are_resettable)(void);
+    void (*cpu_reset_hold)(CPUState *cpu);
+
+    void (*create_vcpu_thread)(CPUState *cpu); /* MANDATORY NON-NULL */
+    void (*kick_vcpu_thread)(CPUState *cpu);
+    bool (*cpu_thread_is_idle)(CPUState *cpu);
+
+    void (*synchronize_post_reset)(CPUState *cpu);
+    void (*synchronize_post_init)(CPUState *cpu);
+    void (*synchronize_state)(CPUState *cpu);
+    void (*synchronize_pre_loadvm)(CPUState *cpu);
+    void (*synchronize_pre_resume)(bool step_pending);
+
+    void (*handle_interrupt)(CPUState *cpu, int mask);
+
+    /**
+     * @get_virtual_clock: fetch virtual clock
+     * @set_virtual_clock: set virtual clock
+     *
+     * These allow the timer subsystem to defer to the accelerator to
+     * fetch time. The set function is needed if the accelerator wants
+     * to track the changes to time as the timer is warped through
+     * various timer events.
+     */
+    int64_t (*get_virtual_clock)(void);
+    void (*set_virtual_clock)(int64_t time);
+
+    int64_t (*get_elapsed_ticks)(void);
+
+    /* gdbstub hooks */
+    bool (*supports_guest_debug)(void);
+    int (*update_guest_debug)(CPUState *cpu);
+    int (*insert_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len);
+    int (*remove_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len);
+    void (*remove_all_breakpoints)(CPUState *cpu);
+};
+
+#endif /* ACCEL_OPS_H */
diff --git a/include/system/arch_init.h b/include/system/arch_init.h
new file mode 100644
index 0000000000..5b1c1026f3
--- /dev/null
+++ b/include/system/arch_init.h
@@ -0,0 +1,32 @@
+#ifndef QEMU_ARCH_INIT_H
+#define QEMU_ARCH_INIT_H
+
+
+enum {
+    QEMU_ARCH_ALL = -1,
+    QEMU_ARCH_ALPHA = (1 << 0),
+    QEMU_ARCH_ARM = (1 << 1),
+    QEMU_ARCH_I386 = (1 << 3),
+    QEMU_ARCH_M68K = (1 << 4),
+    QEMU_ARCH_MICROBLAZE = (1 << 6),
+    QEMU_ARCH_MIPS = (1 << 7),
+    QEMU_ARCH_PPC = (1 << 8),
+    QEMU_ARCH_S390X = (1 << 9),
+    QEMU_ARCH_SH4 = (1 << 10),
+    QEMU_ARCH_SPARC = (1 << 11),
+    QEMU_ARCH_XTENSA = (1 << 12),
+    QEMU_ARCH_OPENRISC = (1 << 13),
+    QEMU_ARCH_TRICORE = (1 << 16),
+    QEMU_ARCH_HPPA = (1 << 18),
+    QEMU_ARCH_RISCV = (1 << 19),
+    QEMU_ARCH_RX = (1 << 20),
+    QEMU_ARCH_AVR = (1 << 21),
+    QEMU_ARCH_HEXAGON = (1 << 22),
+    QEMU_ARCH_LOONGARCH = (1 << 23),
+};
+
+extern const uint32_t arch_type;
+
+void qemu_init_arch_modules(void);
+
+#endif
diff --git a/include/system/balloon.h b/include/system/balloon.h
new file mode 100644
index 0000000000..867687b73a
--- /dev/null
+++ b/include/system/balloon.h
@@ -0,0 +1,27 @@
+/*
+ * Balloon
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_BALLOON_H
+#define QEMU_BALLOON_H
+
+#include "exec/cpu-common.h"
+#include "qapi/qapi-types-machine.h"
+
+typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef void (QEMUBalloonStatus)(void *opaque, BalloonInfo *info);
+
+int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
+                             QEMUBalloonStatus *stat_func, void *opaque);
+void qemu_remove_balloon_handler(void *opaque);
+
+#endif
diff --git a/include/system/block-backend-common.h b/include/system/block-backend-common.h
new file mode 100644
index 0000000000..780cea7305
--- /dev/null
+++ b/include/system/block-backend-common.h
@@ -0,0 +1,103 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_COMMON_H
+#define BLOCK_BACKEND_COMMON_H
+
+#include "qemu/iov.h"
+#include "block/throttle-groups.h"
+
+/*
+ * TODO Have to include block/block.h for a bunch of block layer
+ * types.  Unfortunately, this pulls in the whole BlockDriverState
+ * API, which we don't want used by many BlockBackend users.  Some of
+ * the types belong here, and the rest should be split into a common
+ * header and one for the BlockDriverState API.
+ */
+#include "block/block.h"
+
+/* Callbacks for block device models */
+typedef struct BlockDevOps {
+
+    /*
+     * Global state (GS) API. These functions run under the BQL.
+     *
+     * See include/block/block-global-state.h for more information about
+     * the GS API.
+     */
+
+    /*
+     * Runs when virtual media changed (monitor commands eject, change)
+     * Argument load is true on load and false on eject.
+     * Beware: doesn't run when a host device's physical media
+     * changes.  Sure would be useful if it did.
+     * Device models with removable media must implement this callback.
+     */
+    void (*change_media_cb)(void *opaque, bool load, Error **errp);
+    /*
+     * Runs when an eject request is issued from the monitor, the tray
+     * is closed, and the medium is locked.
+     * Device models that do not implement is_medium_locked will not need
+     * this callback.  Device models that can lock the medium or tray might
+     * want to implement the callback and unlock the tray when "force" is
+     * true, even if they do not support eject requests.
+     */
+    void (*eject_request_cb)(void *opaque, bool force);
+
+    /*
+     * Is the virtual medium locked into the device?
+     * Device models implement this only when device has such a lock.
+     */
+    bool (*is_medium_locked)(void *opaque);
+
+    /*
+     * Runs when the backend receives a drain request.
+     */
+    void (*drained_begin)(void *opaque);
+    /*
+     * Runs when the backend's last drain request ends.
+     */
+    void (*drained_end)(void *opaque);
+    /*
+     * Is the device still busy?
+     */
+    bool (*drained_poll)(void *opaque);
+
+    /*
+     * I/O API functions. These functions are thread-safe.
+     *
+     * See include/block/block-io.h for more information about
+     * the I/O API.
+     */
+
+    /*
+     * Is the virtual tray open?
+     * Device models implement this only when the device has a tray.
+     */
+    bool (*is_tray_open)(void *opaque);
+
+    /*
+     * Runs when the size changed (e.g. monitor command block_resize)
+     */
+    void (*resize_cb)(void *opaque);
+} BlockDevOps;
+
+/*
+ * This struct is embedded in (the private) BlockBackend struct and contains
+ * fields that must be public. This is in particular for QLIST_ENTRY() and
+ * friends so that BlockBackends can be kept in lists outside block-backend.c
+ */
+typedef struct BlockBackendPublic {
+    ThrottleGroupMember throttle_group_member;
+} BlockBackendPublic;
+
+#endif /* BLOCK_BACKEND_COMMON_H */
diff --git a/include/system/block-backend-global-state.h b/include/system/block-backend-global-state.h
new file mode 100644
index 0000000000..9cc9b008ec
--- /dev/null
+++ b/include/system/block-backend-global-state.h
@@ -0,0 +1,125 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_GLOBAL_STATE_H
+#define BLOCK_BACKEND_GLOBAL_STATE_H
+
+#include "block-backend-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
+
+BlockBackend * no_coroutine_fn
+blk_new_with_bs(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
+                Error **errp);
+
+BlockBackend * coroutine_fn no_co_wrapper
+blk_co_new_with_bs(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
+                   Error **errp);
+
+BlockBackend * no_coroutine_fn
+blk_new_open(const char *filename, const char *reference, QDict *options,
+             int flags, Error **errp);
+
+BlockBackend * coroutine_fn no_co_wrapper
+blk_co_new_open(const char *filename, const char *reference, QDict *options,
+                int flags, Error **errp);
+
+int blk_get_refcnt(BlockBackend *blk);
+void blk_ref(BlockBackend *blk);
+
+void no_coroutine_fn blk_unref(BlockBackend *blk);
+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk);
+
+void blk_remove_all_bs(void);
+BlockBackend *blk_by_name(const char *name);
+BlockBackend *blk_next(BlockBackend *blk);
+BlockBackend *blk_all_next(BlockBackend *blk);
+bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
+void monitor_remove_blk(BlockBackend *blk);
+
+BlockBackendPublic *blk_get_public(BlockBackend *blk);
+
+void blk_remove_bs(BlockBackend *blk);
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
+int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
+bool GRAPH_RDLOCK bdrv_has_blk(BlockDriverState *bs);
+bool GRAPH_RDLOCK bdrv_is_root_node(BlockDriverState *bs);
+int GRAPH_UNLOCKED blk_set_perm(BlockBackend *blk, uint64_t perm,
+                                uint64_t shared_perm, Error **errp);
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
+
+void blk_iostatus_enable(BlockBackend *blk);
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
+void blk_iostatus_reset(BlockBackend *blk);
+int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
+void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
+DeviceState *blk_get_attached_dev(BlockBackend *blk);
+BlockBackend *blk_by_dev(void *dev);
+BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
+void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
+
+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
+void blk_aio_cancel(BlockAIOCB *acb);
+int blk_commit_all(void);
+bool blk_in_drain(BlockBackend *blk);
+void blk_drain(BlockBackend *blk);
+void blk_drain_all(void);
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+                      BlockdevOnError on_write_error);
+bool blk_supports_write_perm(BlockBackend *blk);
+bool blk_is_sg(BlockBackend *blk);
+void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
+int blk_get_flags(BlockBackend *blk);
+bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                        Error **errp);
+void blk_add_aio_context_notifier(BlockBackend *blk,
+        void (*attached_aio_context)(AioContext *new_context, void *opaque),
+        void (*detach_aio_context)(void *opaque), void *opaque);
+void blk_remove_aio_context_notifier(BlockBackend *blk,
+                                     void (*attached_aio_context)(AioContext *,
+                                                                  void *),
+                                     void (*detach_aio_context)(void *),
+                                     void *opaque);
+void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
+void blk_update_root_state(BlockBackend *blk);
+bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
+int blk_get_open_flags_from_root_state(BlockBackend *blk);
+
+int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
+                     int64_t pos, int size);
+int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
+int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
+int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
+
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
+void blk_io_limits_disable(BlockBackend *blk);
+void blk_io_limits_enable(BlockBackend *blk, const char *group);
+void blk_io_limits_update_group(BlockBackend *blk, const char *group);
+void blk_set_force_allow_inactivate(BlockBackend *blk);
+
+bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp);
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size);
+
+const BdrvChild *blk_root(BlockBackend *blk);
+
+int blk_make_empty(BlockBackend *blk, Error **errp);
+
+#endif /* BLOCK_BACKEND_GLOBAL_STATE_H */
diff --git a/include/system/block-backend-io.h b/include/system/block-backend-io.h
new file mode 100644
index 0000000000..d174275a5c
--- /dev/null
+++ b/include/system/block-backend-io.h
@@ -0,0 +1,230 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_IO_H
+#define BLOCK_BACKEND_IO_H
+
+#include "block-backend-common.h"
+#include "block/accounting.h"
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+const char *blk_name(const BlockBackend *blk);
+
+BlockDriverState *blk_bs(BlockBackend *blk);
+
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
+void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
+bool blk_iostatus_is_enabled(const BlockBackend *blk);
+
+char *blk_get_attached_dev_id(BlockBackend *blk);
+
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                  int64_t bytes, BdrvRequestFlags flags,
+                                  BlockCompletionFunc *cb, void *opaque);
+
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+                           QEMUIOVector *qiov, BdrvRequestFlags flags,
+                           BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+                            QEMUIOVector *qiov, BdrvRequestFlags flags,
+                            BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_flush(BlockBackend *blk,
+                          BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
+                                unsigned int *nr_zones,
+                                BlockZoneDescriptor *zones,
+                                BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+                              int64_t offset, int64_t len,
+                              BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
+                                QEMUIOVector *qiov, BdrvRequestFlags flags,
+                                BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
+                             BlockCompletionFunc *cb, void *opaque);
+void blk_aio_cancel_async(BlockAIOCB *acb);
+BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
+                          BlockCompletionFunc *cb, void *opaque);
+
+void blk_inc_in_flight(BlockBackend *blk);
+void blk_dec_in_flight(BlockBackend *blk);
+
+bool coroutine_fn GRAPH_RDLOCK blk_co_is_inserted(BlockBackend *blk);
+bool co_wrapper_mixed_bdrv_rdlock blk_is_inserted(BlockBackend *blk);
+
+bool coroutine_fn GRAPH_RDLOCK blk_co_is_available(BlockBackend *blk);
+bool co_wrapper_mixed_bdrv_rdlock blk_is_available(BlockBackend *blk);
+
+void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked);
+void co_wrapper blk_lock_medium(BlockBackend *blk, bool locked);
+
+void coroutine_fn blk_co_eject(BlockBackend *blk, bool eject_flag);
+void co_wrapper blk_eject(BlockBackend *blk, bool eject_flag);
+
+int64_t coroutine_fn blk_co_getlength(BlockBackend *blk);
+int64_t co_wrapper_mixed blk_getlength(BlockBackend *blk);
+
+void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
+                                      uint64_t *nb_sectors_ptr);
+void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
+
+int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk);
+int64_t blk_nb_sectors(BlockBackend *blk);
+
+void *blk_try_blockalign(BlockBackend *blk, size_t size);
+void *blk_blockalign(BlockBackend *blk, size_t size);
+bool blk_is_writable(BlockBackend *blk);
+bool blk_enable_write_cache(BlockBackend *blk);
+BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
+BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
+                                      int error);
+void blk_error_action(BlockBackend *blk, BlockErrorAction action,
+                      bool is_read, int error);
+void blk_iostatus_set_err(BlockBackend *blk, int error);
+int blk_get_max_iov(BlockBackend *blk);
+int blk_get_max_hw_iov(BlockBackend *blk);
+
+AioContext *blk_get_aio_context(BlockBackend *blk);
+BlockAcctStats *blk_get_stats(BlockBackend *blk);
+void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
+                  BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
+                                  BlockCompletionFunc *cb,
+                                  void *opaque, int ret);
+
+uint32_t blk_get_request_alignment(BlockBackend *blk);
+uint32_t blk_get_max_transfer(BlockBackend *blk);
+uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
+
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
+                                   BlockBackend *blk_out, int64_t off_out,
+                                   int64_t bytes, BdrvRequestFlags read_flags,
+                                   BdrvRequestFlags write_flags);
+
+int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
+                                           BlockDriverState *base,
+                                           int64_t offset, int64_t bytes,
+                                           int64_t *pnum, int64_t *map,
+                                           BlockDriverState **file);
+int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
+                                           BlockDriverState *base,
+                                           bool include_base, int64_t offset,
+                                           int64_t bytes, int64_t *pnum);
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+int co_wrapper_mixed blk_pread(BlockBackend *blk, int64_t offset,
+                               int64_t bytes, void *buf,
+                               BdrvRequestFlags flags);
+int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes,
+                              void *buf, BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_preadv(BlockBackend *blk, int64_t offset,
+                                int64_t bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags);
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+                               int64_t bytes, QEMUIOVector *qiov,
+                               BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_preadv_part(BlockBackend *blk, int64_t offset,
+                                     int64_t bytes, QEMUIOVector *qiov,
+                                     size_t qiov_offset,
+                                     BdrvRequestFlags flags);
+int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset,
+                                    int64_t bytes, QEMUIOVector *qiov,
+                                    size_t qiov_offset, BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_pwrite(BlockBackend *blk, int64_t offset,
+                                int64_t bytes, const void *buf,
+                                BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes,
+                               const void *buf, BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_pwritev(BlockBackend *blk, int64_t offset,
+                                 int64_t bytes, QEMUIOVector *qiov,
+                                 BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+                                int64_t bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_pwritev_part(BlockBackend *blk, int64_t offset,
+                                      int64_t bytes, QEMUIOVector *qiov,
+                                      size_t qiov_offset,
+                                      BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
+                                     int64_t bytes,
+                                     QEMUIOVector *qiov, size_t qiov_offset,
+                                     BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_pwrite_compressed(BlockBackend *blk,
+                                           int64_t offset, int64_t bytes,
+                                           const void *buf);
+int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset,
+                                          int64_t bytes, const void *buf);
+
+int co_wrapper_mixed blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                       int64_t bytes,
+                                       BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                      int64_t bytes, BdrvRequestFlags flags);
+
+int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
+                                    unsigned int *nr_zones,
+                                    BlockZoneDescriptor *zones);
+int co_wrapper_mixed blk_zone_report(BlockBackend *blk, int64_t offset,
+                                         unsigned int *nr_zones,
+                                         BlockZoneDescriptor *zones);
+int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+                                  int64_t offset, int64_t len);
+int co_wrapper_mixed blk_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+                                       int64_t offset, int64_t len);
+int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
+                                    QEMUIOVector *qiov,
+                                    BdrvRequestFlags flags);
+int co_wrapper_mixed blk_zone_append(BlockBackend *blk, int64_t *offset,
+                                         QEMUIOVector *qiov,
+                                         BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_pdiscard(BlockBackend *blk, int64_t offset,
+                                  int64_t bytes);
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
+                                 int64_t bytes);
+
+int co_wrapper_mixed blk_flush(BlockBackend *blk);
+int coroutine_fn blk_co_flush(BlockBackend *blk);
+
+int co_wrapper_mixed blk_ioctl(BlockBackend *blk, unsigned long int req,
+                               void *buf);
+int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req,
+                              void *buf);
+
+int co_wrapper_mixed blk_truncate(BlockBackend *blk, int64_t offset,
+                                  bool exact, PreallocMode prealloc,
+                                  BdrvRequestFlags flags, Error **errp);
+int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact,
+                                 PreallocMode prealloc, BdrvRequestFlags flags,
+                                 Error **errp);
+
+#endif /* BLOCK_BACKEND_IO_H */
diff --git a/include/system/block-backend.h b/include/system/block-backend.h
new file mode 100644
index 0000000000..038be9fc40
--- /dev/null
+++ b/include/system/block-backend.h
@@ -0,0 +1,21 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_H
+#define BLOCK_BACKEND_H
+
+#include "block-backend-global-state.h"
+#include "block-backend-io.h"
+
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
+
+#endif
diff --git a/include/system/block-ram-registrar.h b/include/system/block-ram-registrar.h
new file mode 100644
index 0000000000..d8b2f7942b
--- /dev/null
+++ b/include/system/block-ram-registrar.h
@@ -0,0 +1,37 @@
+/*
+ * BlockBackend RAM Registrar
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef BLOCK_RAM_REGISTRAR_H
+#define BLOCK_RAM_REGISTRAR_H
+
+#include "exec/ramlist.h"
+
+/**
+ * struct BlockRAMRegistrar:
+ *
+ * Keeps RAMBlock memory registered with a BlockBackend using
+ * blk_register_buf() including hotplugged memory.
+ *
+ * Emulated devices or other BlockBackend users initialize a BlockRAMRegistrar
+ * with blk_ram_registrar_init() before submitting I/O requests with the
+ * BDRV_REQ_REGISTERED_BUF flag set.
+ */
+typedef struct {
+    BlockBackend *blk;
+    RAMBlockNotifier notifier;
+    bool ok;
+} BlockRAMRegistrar;
+
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk);
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r);
+
+/* Have all RAMBlocks been registered successfully? */
+static inline bool blk_ram_registrar_ok(BlockRAMRegistrar *r)
+{
+    return r->ok;
+}
+
+#endif /* BLOCK_RAM_REGISTRAR_H */
diff --git a/include/system/blockdev.h b/include/system/blockdev.h
new file mode 100644
index 0000000000..3211b16513
--- /dev/null
+++ b/include/system/blockdev.h
@@ -0,0 +1,64 @@
+/*
+ * QEMU host block devices
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef BLOCKDEV_H
+#define BLOCKDEV_H
+
+#include "block/block.h"
+#include "qemu/queue.h"
+
+typedef enum {
+    IF_DEFAULT = -1,            /* for use with drive_add() only */
+    /*
+     * IF_NONE must be zero, because we want MachineClass member
+     * block_default_type to default-initialize to IF_NONE
+     */
+    IF_NONE = 0,
+    IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN,
+    IF_COUNT
+} BlockInterfaceType;
+
+struct DriveInfo {
+    BlockInterfaceType type;
+    int bus;
+    int unit;
+    int auto_del;               /* see blockdev_mark_auto_del() */
+    bool is_default;            /* Added by default_drive() ?  */
+    int media_cd;
+    QemuOpts *opts;
+    QTAILQ_ENTRY(DriveInfo) next;
+};
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+void blockdev_mark_auto_del(BlockBackend *blk);
+void blockdev_auto_del(BlockBackend *blk);
+
+DriveInfo *blk_legacy_dinfo(BlockBackend *blk);
+DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo);
+BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo);
+
+void override_max_devs(BlockInterfaceType type, int max_devs);
+
+DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit);
+void drive_check_orphaned(void);
+DriveInfo *drive_get_by_index(BlockInterfaceType type, int index);
+int drive_get_max_bus(BlockInterfaceType type);
+
+QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
+                    const char *optstr);
+DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type,
+                     Error **errp);
+
+#endif
diff --git a/include/system/cpu-throttle.h b/include/system/cpu-throttle.h
new file mode 100644
index 0000000000..44bf6a5389
--- /dev/null
+++ b/include/system/cpu-throttle.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2012 SUSE LINUX Products GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ */
+
+#ifndef SYSTEM_CPU_THROTTLE_H
+#define SYSTEM_CPU_THROTTLE_H
+
+#include "qemu/timer.h"
+
+/**
+ * cpu_throttle_init:
+ *
+ * Initialize the CPU throttling API.
+ */
+void cpu_throttle_init(void);
+
+/**
+ * cpu_throttle_set:
+ * @new_throttle_pct: Percent of sleep time. Valid range is 1 to 99.
+ *
+ * Throttles all vcpus by forcing them to sleep for the given percentage of
+ * time. A throttle_percentage of 25 corresponds to a 75% duty cycle roughly.
+ * (example: 10ms sleep for every 30ms awake).
+ *
+ * cpu_throttle_set can be called as needed to adjust new_throttle_pct.
+ * Once the throttling starts, it will remain in effect until cpu_throttle_stop
+ * is called.
+ */
+void cpu_throttle_set(int new_throttle_pct);
+
+/**
+ * cpu_throttle_stop:
+ *
+ * Stops the vcpu throttling started by cpu_throttle_set.
+ */
+void cpu_throttle_stop(void);
+
+/**
+ * cpu_throttle_active:
+ *
+ * Returns: %true if the vcpus are currently being throttled, %false otherwise.
+ */
+bool cpu_throttle_active(void);
+
+/**
+ * cpu_throttle_get_percentage:
+ *
+ * Returns the vcpu throttle percentage. See cpu_throttle_set for details.
+ *
+ * Returns: The throttle percentage in range 1 to 99.
+ */
+int cpu_throttle_get_percentage(void);
+
+/**
+ * cpu_throttle_dirty_sync_timer_tick:
+ *
+ * Dirty sync timer hook.
+ */
+void cpu_throttle_dirty_sync_timer_tick(void *opaque);
+
+/**
+ * cpu_throttle_dirty_sync_timer:
+ *
+ * Start or stop the dirty sync timer.
+ */
+void cpu_throttle_dirty_sync_timer(bool enable);
+
+#endif /* SYSTEM_CPU_THROTTLE_H */
diff --git a/include/system/cpu-timers-internal.h b/include/system/cpu-timers-internal.h
new file mode 100644
index 0000000000..94bb7394c5
--- /dev/null
+++ b/include/system/cpu-timers-internal.h
@@ -0,0 +1,71 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TIMERS_STATE_H
+#define TIMERS_STATE_H
+
+/* timers state, for sharing between icount and cpu-timers */
+
+typedef struct TimersState {
+    /* Protected by BQL.  */
+    int64_t cpu_ticks_prev;
+    int64_t cpu_ticks_offset;
+
+    /*
+     * Protect fields that can be respectively read outside the
+     * BQL, and written from multiple threads.
+     */
+    QemuSeqLock vm_clock_seqlock;
+    QemuSpin vm_clock_lock;
+
+    int16_t cpu_ticks_enabled;
+
+    /* Conversion factor from emulated instructions to virtual clock ticks.  */
+    int16_t icount_time_shift;
+    /* Icount delta used for shift auto adjust. */
+    int64_t last_delta;
+
+    /* Compensate for varying guest execution speed.  */
+    aligned_int64_t qemu_icount_bias;
+
+    int64_t vm_clock_warp_start;
+    int64_t cpu_clock_offset;
+
+    /* Only written by TCG thread */
+    int64_t qemu_icount;
+
+    /* for adjusting icount */
+    QEMUTimer *icount_rt_timer;
+    QEMUTimer *icount_vm_timer;
+    QEMUTimer *icount_warp_timer;
+} TimersState;
+
+extern TimersState timers_state;
+
+/*
+ * icount needs this internal from cpu-timers when adjusting the icount shift.
+ */
+int64_t cpu_get_clock_locked(void);
+
+#endif /* TIMERS_STATE_H */
diff --git a/include/system/cpu-timers.h b/include/system/cpu-timers.h
new file mode 100644
index 0000000000..64ae54f6d6
--- /dev/null
+++ b/include/system/cpu-timers.h
@@ -0,0 +1,104 @@
+/*
+ * CPU timers state API
+ *
+ * Copyright 2020 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef SYSTEM_CPU_TIMERS_H
+#define SYSTEM_CPU_TIMERS_H
+
+#include "qemu/timer.h"
+
+/* init the whole cpu timers API, including icount, ticks, and cpu_throttle */
+void cpu_timers_init(void);
+
+/* icount - Instruction Counter API */
+
+/**
+ * ICountMode: icount enablement state:
+ *
+ * @ICOUNT_DISABLED: Disabled - Do not count executed instructions.
+ * @ICOUNT_PRECISE: Enabled - Fixed conversion of insn to ns via "shift" option
+ * @ICOUNT_ADAPTATIVE: Enabled - Runtime adaptive algorithm to compute shift
+ */
+typedef enum {
+    ICOUNT_DISABLED = 0,
+    ICOUNT_PRECISE,
+    ICOUNT_ADAPTATIVE,
+} ICountMode;
+
+#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY)
+extern ICountMode use_icount;
+#define icount_enabled() (use_icount)
+#else
+#define icount_enabled() ICOUNT_DISABLED
+#endif
+
+/*
+ * Update the icount with the executed instructions. Called by
+ * cpus-tcg vCPU thread so the main-loop can see time has moved forward.
+ */
+void icount_update(CPUState *cpu);
+
+/* get raw icount value */
+int64_t icount_get_raw(void);
+
+/* return the virtual CPU time in ns, based on the instruction counter. */
+int64_t icount_get(void);
+/*
+ * convert an instruction counter value to ns, based on the icount shift.
+ * This shift is set as a fixed value with the icount "shift" option
+ * (precise mode), or it is constantly approximated and corrected at
+ * runtime in adaptive mode.
+ */
+int64_t icount_to_ns(int64_t icount);
+
+/**
+ * icount_configure: configure the icount options, including "shift"
+ * @opts: Options to parse
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Return: true on success, else false setting @errp with error
+ */
+bool icount_configure(QemuOpts *opts, Error **errp);
+
+/* used by tcg vcpu thread to calc icount budget */
+int64_t icount_round(int64_t count);
+
+/* if the CPUs are idle, start accounting real time to virtual clock. */
+void icount_start_warp_timer(void);
+void icount_account_warp_timer(void);
+void icount_notify_exit(void);
+
+/*
+ * CPU Ticks and Clock
+ */
+
+/* Caller must hold BQL */
+void cpu_enable_ticks(void);
+/* Caller must hold BQL */
+void cpu_disable_ticks(void);
+
+/*
+ * return the time elapsed in VM between vm_start and vm_stop.
+ * cpu_get_ticks() uses units of the host CPU cycle counter.
+ */
+int64_t cpu_get_ticks(void);
+
+/*
+ * Returns the monotonic time elapsed in VM, i.e.,
+ * the time between vm_start and vm_stop
+ */
+int64_t cpu_get_clock(void);
+
+void qemu_timer_notify_cb(void *opaque, QEMUClockType type);
+
+/* get/set VIRTUAL clock and VM elapsed ticks via the cpus accel interface */
+int64_t cpus_get_virtual_clock(void);
+void cpus_set_virtual_clock(int64_t new_time);
+int64_t cpus_get_elapsed_ticks(void);
+
+#endif /* SYSTEM_CPU_TIMERS_H */
diff --git a/include/system/cpus.h b/include/system/cpus.h
new file mode 100644
index 0000000000..3d8fd368f3
--- /dev/null
+++ b/include/system/cpus.h
@@ -0,0 +1,53 @@
+#ifndef QEMU_CPUS_H
+#define QEMU_CPUS_H
+
+#include "system/accel-ops.h"
+
+/* register accel-specific operations */
+void cpus_register_accel(const AccelOpsClass *i);
+
+/* return registers ops */
+const AccelOpsClass *cpus_get_accel(void);
+
+/* accel/dummy-cpus.c */
+
+/* Create a dummy vcpu for AccelOpsClass->create_vcpu_thread */
+void dummy_start_vcpu_thread(CPUState *);
+
+/* interface available for cpus accelerator threads */
+
+/* For temporary buffers for forming a name */
+#define VCPU_THREAD_NAME_SIZE 16
+
+void cpus_kick_thread(CPUState *cpu);
+bool cpu_work_list_empty(CPUState *cpu);
+bool cpu_thread_is_idle(CPUState *cpu);
+bool all_cpu_threads_idle(void);
+bool cpu_can_run(CPUState *cpu);
+void qemu_wait_io_event_common(CPUState *cpu);
+void qemu_wait_io_event(CPUState *cpu);
+void cpu_thread_signal_created(CPUState *cpu);
+void cpu_thread_signal_destroyed(CPUState *cpu);
+void cpu_handle_guest_debug(CPUState *cpu);
+
+/* end interface for cpus accelerator threads */
+
+bool qemu_in_vcpu_thread(void);
+void qemu_init_cpu_loop(void);
+void resume_all_vcpus(void);
+void pause_all_vcpus(void);
+void cpu_stop_current(void);
+
+extern int icount_align_option;
+
+/* Unblock cpu */
+void qemu_cpu_kick_self(void);
+
+bool cpus_are_resettable(void);
+
+void cpu_synchronize_all_states(void);
+void cpu_synchronize_all_post_reset(void);
+void cpu_synchronize_all_post_init(void);
+void cpu_synchronize_all_pre_loadvm(void);
+
+#endif
diff --git a/include/system/cryptodev-vhost-user.h b/include/system/cryptodev-vhost-user.h
new file mode 100644
index 0000000000..5138c146fa
--- /dev/null
+++ b/include/system/cryptodev-vhost-user.h
@@ -0,0 +1,50 @@
+/*
+ * QEMU Crypto Device Common Vhost User Implement
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef CRYPTODEV_VHOST_USER_H
+#define CRYPTODEV_VHOST_USER_H
+
+#include "system/cryptodev-vhost.h"
+
+#define VHOST_USER_MAX_AUTH_KEY_LEN    512
+#define VHOST_USER_MAX_CIPHER_KEY_LEN  64
+
+
+/**
+ * cryptodev_vhost_user_get_vhost:
+ * @cc: the client object for each queue
+ * @b: the cryptodev backend common vhost object
+ * @queue: the queue index
+ *
+ * Gets a new cryptodev backend common vhost object based on
+ * @b and @queue
+ *
+ * Returns: the cryptodev backend common vhost object
+ */
+CryptoDevBackendVhost *
+cryptodev_vhost_user_get_vhost(
+                         CryptoDevBackendClient *cc,
+                         CryptoDevBackend *b,
+                         uint16_t queue);
+
+#endif /* CRYPTODEV_VHOST_USER_H */
diff --git a/include/system/cryptodev-vhost.h b/include/system/cryptodev-vhost.h
new file mode 100644
index 0000000000..b0bb09e70a
--- /dev/null
+++ b/include/system/cryptodev-vhost.h
@@ -0,0 +1,153 @@
+/*
+ * QEMU Crypto Device Common Vhost Implement
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *    Jay Zhou <jianjay.zhou@huawei.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#ifndef CRYPTODEV_VHOST_H
+#define CRYPTODEV_VHOST_H
+
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "chardev/char.h"
+
+#include "system/cryptodev.h"
+
+
+typedef struct CryptoDevBackendVhostOptions {
+    VhostBackendType backend_type;
+    void *opaque;
+    int total_queues;
+    CryptoDevBackendClient *cc;
+} CryptoDevBackendVhostOptions;
+
+typedef struct CryptoDevBackendVhost {
+    struct vhost_dev dev;
+    struct vhost_virtqueue vqs[1];
+    int backend;
+    CryptoDevBackendClient *cc;
+} CryptoDevBackendVhost;
+
+/**
+ * cryptodev_vhost_get_max_queues:
+ * @crypto: the cryptodev backend common vhost object
+ *
+ * Get the maximum queue number of @crypto.
+ *
+ *
+ * Returns: the maximum queue number
+ */
+uint64_t
+cryptodev_vhost_get_max_queues(
+                        CryptoDevBackendVhost *crypto);
+
+
+/**
+ * cryptodev_vhost_init:
+ * @options: the common vhost object's option
+ *
+ * Creates a new cryptodev backend common vhost object
+ *
+ ** The returned object must be released with
+ * cryptodev_vhost_cleanup() when no
+ * longer required
+ *
+ * Returns: the cryptodev backend common vhost object
+ */
+struct CryptoDevBackendVhost *
+cryptodev_vhost_init(
+             CryptoDevBackendVhostOptions *options);
+
+/**
+ * cryptodev_vhost_cleanup:
+ * @crypto: the cryptodev backend common vhost object
+ *
+ * Clean the resource associated with @crypto that realizaed
+ * by cryptodev_vhost_init()
+ *
+ */
+void cryptodev_vhost_cleanup(
+                        CryptoDevBackendVhost *crypto);
+
+/**
+ * cryptodev_get_vhost:
+ * @cc: the client object for each queue
+ * @b: the cryptodev backend common vhost object
+ * @queue: the cryptodev backend queue index
+ *
+ * Gets a new cryptodev backend common vhost object based on
+ * @b and @queue
+ *
+ * Returns: the cryptodev backend common vhost object
+ */
+CryptoDevBackendVhost *
+cryptodev_get_vhost(CryptoDevBackendClient *cc,
+                            CryptoDevBackend *b,
+                            uint16_t queue);
+/**
+ * cryptodev_vhost_start:
+ * @dev: the virtio crypto object
+ * @total_queues: the total count of queue
+ *
+ * Starts the vhost crypto logic
+ *
+ * Returns: 0 for success, negative for errors
+ */
+int cryptodev_vhost_start(VirtIODevice *dev, int total_queues);
+
+/**
+ * cryptodev_vhost_stop:
+ * @dev: the virtio crypto object
+ * @total_queues: the total count of queue
+ *
+ * Stops the vhost crypto logic
+ *
+ */
+void cryptodev_vhost_stop(VirtIODevice *dev, int total_queues);
+
+/**
+ * cryptodev_vhost_virtqueue_mask:
+ * @dev: the virtio crypto object
+ * @queue: the cryptodev backend queue index
+ * @idx: the virtqueue index
+ * @mask: mask or not (true or false)
+ *
+ * Mask/unmask events for @idx virtqueue on @dev device
+ *
+ */
+void cryptodev_vhost_virtqueue_mask(VirtIODevice *dev,
+                                           int queue,
+                                           int idx, bool mask);
+
+/**
+ * cryptodev_vhost_virtqueue_pending:
+ * @dev: the virtio crypto object
+ * @queue: the cryptodev backend queue index
+ * @idx: the virtqueue index
+ *
+ * Test and clear event pending status for @idx virtqueue on @dev device.
+ * Should be called after unmask to avoid losing events.
+ *
+ * Returns: true for success, false for errors
+ */
+bool cryptodev_vhost_virtqueue_pending(VirtIODevice *dev,
+                                              int queue, int idx);
+
+#endif /* CRYPTODEV_VHOST_H */
diff --git a/include/system/cryptodev.h b/include/system/cryptodev.h
new file mode 100644
index 0000000000..b20822df0d
--- /dev/null
+++ b/include/system/cryptodev.h
@@ -0,0 +1,447 @@
+/*
+ * QEMU Crypto Device Implementation
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#ifndef CRYPTODEV_H
+#define CRYPTODEV_H
+
+#include "qemu/queue.h"
+#include "qemu/throttle.h"
+#include "qom/object.h"
+#include "qapi/qapi-types-cryptodev.h"
+
+/**
+ * CryptoDevBackend:
+ *
+ * The CryptoDevBackend object is an interface
+ * for different cryptodev backends, which provides crypto
+ * operation wrapper.
+ *
+ */
+
+#define TYPE_CRYPTODEV_BACKEND "cryptodev-backend"
+
+OBJECT_DECLARE_TYPE(CryptoDevBackend, CryptoDevBackendClass,
+                    CRYPTODEV_BACKEND)
+
+
+#define MAX_CRYPTO_QUEUE_NUM  64
+
+typedef struct CryptoDevBackendConf CryptoDevBackendConf;
+typedef struct CryptoDevBackendPeers CryptoDevBackendPeers;
+typedef struct CryptoDevBackendClient
+                     CryptoDevBackendClient;
+
+/**
+ * CryptoDevBackendSymSessionInfo:
+ *
+ * @cipher_alg: algorithm type of CIPHER
+ * @key_len: byte length of cipher key
+ * @hash_alg: algorithm type of HASH/MAC
+ * @hash_result_len: byte length of HASH operation result
+ * @auth_key_len: byte length of authenticated key
+ * @add_len: byte length of additional authenticated data
+ * @op_type: operation type (refer to virtio_crypto.h)
+ * @direction: encryption or direction for CIPHER
+ * @hash_mode: HASH mode for HASH operation (refer to virtio_crypto.h)
+ * @alg_chain_order: order of algorithm chaining (CIPHER then HASH,
+ *                   or HASH then CIPHER)
+ * @cipher_key: point to a key of CIPHER
+ * @auth_key: point to an authenticated key of MAC
+ *
+ */
+typedef struct CryptoDevBackendSymSessionInfo {
+    /* corresponding with virtio crypto spec */
+    uint32_t cipher_alg;
+    uint32_t key_len;
+    uint32_t hash_alg;
+    uint32_t hash_result_len;
+    uint32_t auth_key_len;
+    uint32_t add_len;
+    uint8_t op_type;
+    uint8_t direction;
+    uint8_t hash_mode;
+    uint8_t alg_chain_order;
+    uint8_t *cipher_key;
+    uint8_t *auth_key;
+} CryptoDevBackendSymSessionInfo;
+
+/**
+ * CryptoDevBackendAsymSessionInfo:
+ */
+typedef struct CryptoDevBackendRsaPara {
+    uint32_t padding_algo;
+    uint32_t hash_algo;
+} CryptoDevBackendRsaPara;
+
+typedef struct CryptoDevBackendAsymSessionInfo {
+    /* corresponding with virtio crypto spec */
+    uint32_t algo;
+    uint32_t keytype;
+    uint32_t keylen;
+    uint8_t *key;
+    union {
+        CryptoDevBackendRsaPara rsa;
+    } u;
+} CryptoDevBackendAsymSessionInfo;
+
+typedef struct CryptoDevBackendSessionInfo {
+    uint32_t op_code;
+    union {
+        CryptoDevBackendSymSessionInfo sym_sess_info;
+        CryptoDevBackendAsymSessionInfo asym_sess_info;
+    } u;
+    uint64_t session_id;
+} CryptoDevBackendSessionInfo;
+
+/**
+ * CryptoDevBackendSymOpInfo:
+ *
+ * @aad_len: byte length of additional authenticated data
+ * @iv_len: byte length of initialization vector or counter
+ * @src_len: byte length of source data
+ * @dst_len: byte length of destination data
+ * @digest_result_len: byte length of hash digest result
+ * @hash_start_src_offset: Starting point for hash processing, specified
+ *  as number of bytes from start of packet in source data, only used for
+ *  algorithm chain
+ * @cipher_start_src_offset: Starting point for cipher processing, specified
+ *  as number of bytes from start of packet in source data, only used for
+ *  algorithm chain
+ * @len_to_hash: byte length of source data on which the hash
+ *  operation will be computed, only used for algorithm chain
+ * @len_to_cipher: byte length of source data on which the cipher
+ *  operation will be computed, only used for algorithm chain
+ * @op_type: operation type (refer to virtio_crypto.h)
+ * @iv: point to the initialization vector or counter
+ * @src: point to the source data
+ * @dst: point to the destination data
+ * @aad_data: point to the additional authenticated data
+ * @digest_result: point to the digest result data
+ * @data[0]: point to the extensional memory by one memory allocation
+ *
+ */
+typedef struct CryptoDevBackendSymOpInfo {
+    uint32_t aad_len;
+    uint32_t iv_len;
+    uint32_t src_len;
+    uint32_t dst_len;
+    uint32_t digest_result_len;
+    uint32_t hash_start_src_offset;
+    uint32_t cipher_start_src_offset;
+    uint32_t len_to_hash;
+    uint32_t len_to_cipher;
+    uint8_t op_type;
+    uint8_t *iv;
+    uint8_t *src;
+    uint8_t *dst;
+    uint8_t *aad_data;
+    uint8_t *digest_result;
+    uint8_t data[];
+} CryptoDevBackendSymOpInfo;
+
+
+/**
+ * CryptoDevBackendAsymOpInfo:
+ *
+ * @src_len: byte length of source data
+ * @dst_len: byte length of destination data
+ * @src: point to the source data
+ * @dst: point to the destination data
+ *
+ */
+typedef struct CryptoDevBackendAsymOpInfo {
+    uint32_t src_len;
+    uint32_t dst_len;
+    uint8_t *src;
+    uint8_t *dst;
+} CryptoDevBackendAsymOpInfo;
+
+typedef void (*CryptoDevCompletionFunc) (void *opaque, int ret);
+
+typedef struct CryptoDevBackendOpInfo {
+    QCryptodevBackendAlgoType algtype;
+    uint32_t op_code;
+    uint32_t queue_index;
+    CryptoDevCompletionFunc cb;
+    void *opaque; /* argument for cb */
+    uint64_t session_id;
+    union {
+        CryptoDevBackendSymOpInfo *sym_op_info;
+        CryptoDevBackendAsymOpInfo *asym_op_info;
+    } u;
+    QTAILQ_ENTRY(CryptoDevBackendOpInfo) next;
+} CryptoDevBackendOpInfo;
+
+struct CryptoDevBackendClass {
+    ObjectClass parent_class;
+
+    void (*init)(CryptoDevBackend *backend, Error **errp);
+    void (*cleanup)(CryptoDevBackend *backend, Error **errp);
+
+    int (*create_session)(CryptoDevBackend *backend,
+                          CryptoDevBackendSessionInfo *sess_info,
+                          uint32_t queue_index,
+                          CryptoDevCompletionFunc cb,
+                          void *opaque);
+
+    int (*close_session)(CryptoDevBackend *backend,
+                         uint64_t session_id,
+                         uint32_t queue_index,
+                         CryptoDevCompletionFunc cb,
+                         void *opaque);
+
+    int (*do_op)(CryptoDevBackend *backend,
+                 CryptoDevBackendOpInfo *op_info);
+};
+
+struct CryptoDevBackendClient {
+    QCryptodevBackendType type;
+    char *info_str;
+    unsigned int queue_index;
+    int vring_enable;
+    QTAILQ_ENTRY(CryptoDevBackendClient) next;
+};
+
+struct CryptoDevBackendPeers {
+    CryptoDevBackendClient *ccs[MAX_CRYPTO_QUEUE_NUM];
+    uint32_t queues;
+};
+
+struct CryptoDevBackendConf {
+    CryptoDevBackendPeers peers;
+
+    /* Supported service mask */
+    uint32_t crypto_services;
+
+    /* Detailed algorithms mask */
+    uint32_t cipher_algo_l;
+    uint32_t cipher_algo_h;
+    uint32_t hash_algo;
+    uint32_t mac_algo_l;
+    uint32_t mac_algo_h;
+    uint32_t aead_algo;
+    uint32_t akcipher_algo;
+    /* Maximum length of cipher key */
+    uint32_t max_cipher_key_len;
+    /* Maximum length of authenticated key */
+    uint32_t max_auth_key_len;
+    /* Maximum size of each crypto request's content */
+    uint64_t max_size;
+};
+
+typedef struct CryptodevBackendSymStat {
+    int64_t encrypt_ops;
+    int64_t decrypt_ops;
+    int64_t encrypt_bytes;
+    int64_t decrypt_bytes;
+} CryptodevBackendSymStat;
+
+typedef struct CryptodevBackendAsymStat {
+    int64_t encrypt_ops;
+    int64_t decrypt_ops;
+    int64_t sign_ops;
+    int64_t verify_ops;
+    int64_t encrypt_bytes;
+    int64_t decrypt_bytes;
+    int64_t sign_bytes;
+    int64_t verify_bytes;
+} CryptodevBackendAsymStat;
+
+struct CryptoDevBackend {
+    Object parent_obj;
+
+    bool ready;
+    /* Tag the cryptodev backend is used by virtio-crypto or not */
+    bool is_used;
+    CryptoDevBackendConf conf;
+    CryptodevBackendSymStat *sym_stat;
+    CryptodevBackendAsymStat *asym_stat;
+
+    ThrottleState ts;
+    ThrottleTimers tt;
+    ThrottleConfig tc;
+    QTAILQ_HEAD(, CryptoDevBackendOpInfo) opinfos;
+};
+
+#define CryptodevSymStatInc(be, op, bytes) do { \
+   be->sym_stat->op##_bytes += (bytes); \
+   be->sym_stat->op##_ops += 1; \
+} while (/*CONSTCOND*/0)
+
+#define CryptodevSymStatIncEncrypt(be, bytes) \
+            CryptodevSymStatInc(be, encrypt, bytes)
+
+#define CryptodevSymStatIncDecrypt(be, bytes) \
+            CryptodevSymStatInc(be, decrypt, bytes)
+
+#define CryptodevAsymStatInc(be, op, bytes) do { \
+    be->asym_stat->op##_bytes += (bytes); \
+    be->asym_stat->op##_ops += 1; \
+} while (/*CONSTCOND*/0)
+
+#define CryptodevAsymStatIncEncrypt(be, bytes) \
+            CryptodevAsymStatInc(be, encrypt, bytes)
+
+#define CryptodevAsymStatIncDecrypt(be, bytes) \
+            CryptodevAsymStatInc(be, decrypt, bytes)
+
+#define CryptodevAsymStatIncSign(be, bytes) \
+            CryptodevAsymStatInc(be, sign, bytes)
+
+#define CryptodevAsymStatIncVerify(be, bytes) \
+            CryptodevAsymStatInc(be, verify, bytes)
+
+
+/**
+ * cryptodev_backend_new_client:
+ *
+ * Creates a new cryptodev backend client object.
+ *
+ * The returned object must be released with
+ * cryptodev_backend_free_client() when no
+ * longer required
+ *
+ * Returns: a new cryptodev backend client object
+ */
+CryptoDevBackendClient *cryptodev_backend_new_client(void);
+
+/**
+ * cryptodev_backend_free_client:
+ * @cc: the cryptodev backend client object
+ *
+ * Release the memory associated with @cc that
+ * was previously allocated by cryptodev_backend_new_client()
+ */
+void cryptodev_backend_free_client(
+                  CryptoDevBackendClient *cc);
+
+/**
+ * cryptodev_backend_cleanup:
+ * @backend: the cryptodev backend object
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Clean the resource associated with @backend that realizaed
+ * by the specific backend's init() callback
+ */
+void cryptodev_backend_cleanup(
+           CryptoDevBackend *backend,
+           Error **errp);
+
+/**
+ * cryptodev_backend_create_session:
+ * @backend: the cryptodev backend object
+ * @sess_info: parameters needed by session creating
+ * @queue_index: queue index of cryptodev backend client
+ * @errp: pointer to a NULL-initialized error object
+ * @cb: callback when session create is compeleted
+ * @opaque: parameter passed to callback
+ *
+ * Create a session for symmetric/asymmetric algorithms
+ *
+ * Returns: 0 for success and cb will be called when creation is completed,
+ * negative value for error, and cb will not be called.
+ */
+int cryptodev_backend_create_session(
+           CryptoDevBackend *backend,
+           CryptoDevBackendSessionInfo *sess_info,
+           uint32_t queue_index,
+           CryptoDevCompletionFunc cb,
+           void *opaque);
+
+/**
+ * cryptodev_backend_close_session:
+ * @backend: the cryptodev backend object
+ * @session_id: the session id
+ * @queue_index: queue index of cryptodev backend client
+ * @errp: pointer to a NULL-initialized error object
+ * @cb: callback when session create is compeleted
+ * @opaque: parameter passed to callback
+ *
+ * Close a session for which was previously
+ * created by cryptodev_backend_create_session()
+ *
+ * Returns: 0 for success and cb will be called when creation is completed,
+ * negative value for error, and cb will not be called.
+ */
+int cryptodev_backend_close_session(
+           CryptoDevBackend *backend,
+           uint64_t session_id,
+           uint32_t queue_index,
+           CryptoDevCompletionFunc cb,
+           void *opaque);
+
+/**
+ * cryptodev_backend_crypto_operation:
+ * @backend: the cryptodev backend object
+ * @op_info: pointer to a CryptoDevBackendOpInfo object
+ *
+ * Do crypto operation, such as encryption, decryption, signature and
+ * verification
+ *
+ * Returns: 0 for success and cb will be called when creation is completed,
+ * negative value for error, and cb will not be called.
+ */
+int cryptodev_backend_crypto_operation(
+                 CryptoDevBackend *backend,
+                 CryptoDevBackendOpInfo *op_info);
+
+/**
+ * cryptodev_backend_set_used:
+ * @backend: the cryptodev backend object
+ * @used: true or false
+ *
+ * Set the cryptodev backend is used by virtio-crypto or not
+ */
+void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used);
+
+/**
+ * cryptodev_backend_is_used:
+ * @backend: the cryptodev backend object
+ *
+ * Return the status that the cryptodev backend is used
+ * by virtio-crypto or not
+ *
+ * Returns: true on used, or false on not used
+ */
+bool cryptodev_backend_is_used(CryptoDevBackend *backend);
+
+/**
+ * cryptodev_backend_set_ready:
+ * @backend: the cryptodev backend object
+ * @ready: true or false
+ *
+ * Set the cryptodev backend is ready or not, which is called
+ * by the children of the cryptodev banckend interface.
+ */
+void cryptodev_backend_set_ready(CryptoDevBackend *backend, bool ready);
+
+/**
+ * cryptodev_backend_is_ready:
+ * @backend: the cryptodev backend object
+ *
+ * Return the status that the cryptodev backend is ready or not
+ *
+ * Returns: true on ready, or false on not ready
+ */
+bool cryptodev_backend_is_ready(CryptoDevBackend *backend);
+
+#endif /* CRYPTODEV_H */
diff --git a/include/system/device_tree.h b/include/system/device_tree.h
new file mode 100644
index 0000000000..eb601522f8
--- /dev/null
+++ b/include/system/device_tree.h
@@ -0,0 +1,215 @@
+/*
+ * Header with function prototypes to help device tree manipulation using
+ * libfdt. It also provides functions to read entries from device tree proc
+ * interface.
+ *
+ * Copyright 2008 IBM Corporation.
+ * Authors: Jerone Young <jyoung5@us.ibm.com>
+ *          Hollis Blanchard <hollisb@us.ibm.com>
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#ifndef DEVICE_TREE_H
+#define DEVICE_TREE_H
+
+void *create_device_tree(int *sizep);
+void *load_device_tree(const char *filename_path, int *sizep);
+#ifdef CONFIG_LINUX
+/**
+ * load_device_tree_from_sysfs: reads the device tree information in the
+ * /proc/device-tree directory and return the corresponding binary blob
+ * buffer pointer. Asserts in case of error.
+ */
+void *load_device_tree_from_sysfs(void);
+#endif
+
+/**
+ * qemu_fdt_node_path: return the paths of nodes matching a given
+ * name and compat string
+ * @fdt: pointer to the dt blob
+ * @name: node name
+ * @compat: compatibility string
+ * @errp: handle to an error object
+ *
+ * returns a newly allocated NULL-terminated array of node paths.
+ * Use g_strfreev() to free it. If one or more nodes were found, the
+ * array contains the path of each node and the last element equals to
+ * NULL. If there is no error but no matching node was found, the
+ * returned array contains a single element equal to NULL. If an error
+ * was encountered when parsing the blob, the function returns NULL
+ *
+ * @name may be NULL to wildcard names and only match compatibility
+ * strings.
+ */
+char **qemu_fdt_node_path(void *fdt, const char *name, const char *compat,
+                          Error **errp);
+
+/**
+ * qemu_fdt_node_unit_path: return the paths of nodes matching a given
+ * node-name, ie. node-name and node-name@unit-address
+ * @fdt: pointer to the dt blob
+ * @name: node name
+ * @errp: handle to an error object
+ *
+ * returns a newly allocated NULL-terminated array of node paths.
+ * Use g_strfreev() to free it. If one or more nodes were found, the
+ * array contains the path of each node and the last element equals to
+ * NULL. If there is no error but no matching node was found, the
+ * returned array contains a single element equal to NULL. If an error
+ * was encountered when parsing the blob, the function returns NULL
+ */
+char **qemu_fdt_node_unit_path(void *fdt, const char *name, Error **errp);
+
+int qemu_fdt_setprop(void *fdt, const char *node_path,
+                     const char *property, const void *val, int size);
+int qemu_fdt_setprop_cell(void *fdt, const char *node_path,
+                          const char *property, uint32_t val);
+int qemu_fdt_setprop_u64(void *fdt, const char *node_path,
+                         const char *property, uint64_t val);
+int qemu_fdt_setprop_string(void *fdt, const char *node_path,
+                            const char *property, const char *string);
+
+/**
+ * qemu_fdt_setprop_string_array: set a string array property
+ *
+ * @fdt: pointer to the dt blob
+ * @name: node name
+ * @prop: property array
+ * @array: pointer to an array of string pointers
+ * @len: length of array
+ *
+ * assigns a string array to a property. This function converts and
+ * array of strings to a sequential string with \0 separators before
+ * setting the property.
+ */
+int qemu_fdt_setprop_string_array(void *fdt, const char *node_path,
+                                  const char *prop, char **array, int len);
+
+int qemu_fdt_setprop_phandle(void *fdt, const char *node_path,
+                             const char *property,
+                             const char *target_node_path);
+/**
+ * qemu_fdt_getprop: retrieve the value of a given property
+ * @fdt: pointer to the device tree blob
+ * @node_path: node path
+ * @property: name of the property to find
+ * @lenp: fdt error if any or length of the property on success
+ * @errp: handle to an error object
+ *
+ * returns a pointer to the property on success and NULL on failure
+ */
+const void *qemu_fdt_getprop(void *fdt, const char *node_path,
+                             const char *property, int *lenp,
+                             Error **errp);
+/**
+ * qemu_fdt_getprop_cell: retrieve the value of a given 4 byte property
+ * @fdt: pointer to the device tree blob
+ * @node_path: node path
+ * @property: name of the property to find
+ * @lenp: fdt error if any or -EINVAL if the property size is different from
+ *        4 bytes, or 4 (expected length of the property) upon success.
+ * @errp: handle to an error object
+ *
+ * returns the property value on success
+ */
+uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path,
+                               const char *property, int *lenp,
+                               Error **errp);
+uint32_t qemu_fdt_get_phandle(void *fdt, const char *path);
+uint32_t qemu_fdt_alloc_phandle(void *fdt);
+int qemu_fdt_nop_node(void *fdt, const char *node_path);
+int qemu_fdt_add_subnode(void *fdt, const char *name);
+int qemu_fdt_add_path(void *fdt, const char *path);
+
+#define qemu_fdt_setprop_cells(fdt, node_path, property, ...)                 \
+    do {                                                                      \
+        uint32_t qdt_tmp[] = { __VA_ARGS__ };                                 \
+        for (unsigned i_ = 0; i_ < ARRAY_SIZE(qdt_tmp); i_++) {               \
+            qdt_tmp[i_] = cpu_to_be32(qdt_tmp[i_]);                           \
+        }                                                                     \
+        qemu_fdt_setprop(fdt, node_path, property, qdt_tmp,                   \
+                         sizeof(qdt_tmp));                                    \
+    } while (0)
+
+void qemu_fdt_dumpdtb(void *fdt, int size);
+
+/**
+ * qemu_fdt_setprop_sized_cells_from_array:
+ * @fdt: device tree blob
+ * @node_path: node to set property on
+ * @property: property to set
+ * @numvalues: number of values
+ * @values: array of number-of-cells, value pairs
+ *
+ * Set the specified property on the specified node in the device tree
+ * to be an array of cells. The values of the cells are specified via
+ * the values list, which alternates between "number of cells used by
+ * this value" and "value".
+ * number-of-cells must be either 1 or 2 (other values will result in
+ * an error being returned). If a value is too large to fit in the
+ * number of cells specified for it, an error is returned.
+ *
+ * This function is useful because device tree nodes often have cell arrays
+ * which are either lists of addresses or lists of address,size tuples, but
+ * the number of cells used for each element vary depending on the
+ * #address-cells and #size-cells properties of their parent node.
+ * If you know all your cell elements are one cell wide you can use the
+ * simpler qemu_fdt_setprop_cells(). If you're not setting up the
+ * array programmatically, qemu_fdt_setprop_sized_cells may be more
+ * convenient.
+ *
+ * Return value: 0 on success, <0 on error.
+ */
+int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
+                                            const char *node_path,
+                                            const char *property,
+                                            int numvalues,
+                                            uint64_t *values);
+
+/**
+ * qemu_fdt_setprop_sized_cells:
+ * @fdt: device tree blob
+ * @node_path: node to set property on
+ * @property: property to set
+ * @...: list of number-of-cells, value pairs
+ *
+ * Set the specified property on the specified node in the device tree
+ * to be an array of cells. The values of the cells are specified via
+ * the variable arguments, which alternates between "number of cells
+ * used by this value" and "value".
+ *
+ * This is a convenience wrapper for the function
+ * qemu_fdt_setprop_sized_cells_from_array().
+ *
+ * Return value: 0 on success, <0 on error.
+ */
+#define qemu_fdt_setprop_sized_cells(fdt, node_path, property, ...)       \
+    ({                                                                    \
+        uint64_t qdt_tmp[] = { __VA_ARGS__ };                             \
+        qemu_fdt_setprop_sized_cells_from_array(fdt, node_path,           \
+                                                property,                 \
+                                                ARRAY_SIZE(qdt_tmp) / 2,  \
+                                                qdt_tmp);                 \
+    })
+
+
+/**
+ * qemu_fdt_randomize_seeds:
+ * @fdt: device tree blob
+ *
+ * Re-randomize all "rng-seed" properties with new seeds.
+ */
+void qemu_fdt_randomize_seeds(void *fdt);
+
+#define FDT_PCI_RANGE_RELOCATABLE          0x80000000
+#define FDT_PCI_RANGE_PREFETCHABLE         0x40000000
+#define FDT_PCI_RANGE_ALIASED              0x20000000
+#define FDT_PCI_RANGE_TYPE_MASK            0x03000000
+#define FDT_PCI_RANGE_MMIO_64BIT           0x03000000
+#define FDT_PCI_RANGE_MMIO                 0x02000000
+#define FDT_PCI_RANGE_IOPORT               0x01000000
+#define FDT_PCI_RANGE_CONFIG               0x00000000
+
+#endif /* DEVICE_TREE_H */
diff --git a/include/system/dirtylimit.h b/include/system/dirtylimit.h
new file mode 100644
index 0000000000..d11ebbbbdb
--- /dev/null
+++ b/include/system/dirtylimit.h
@@ -0,0 +1,39 @@
+/*
+ * Dirty page rate limit common functions
+ *
+ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
+ *
+ * Authors:
+ *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_DIRTYRLIMIT_H
+#define QEMU_DIRTYRLIMIT_H
+
+#define DIRTYLIMIT_CALC_TIME_MS         1000    /* 1000ms */
+
+int64_t vcpu_dirty_rate_get(int cpu_index);
+void vcpu_dirty_rate_stat_start(void);
+void vcpu_dirty_rate_stat_stop(void);
+void vcpu_dirty_rate_stat_initialize(void);
+void vcpu_dirty_rate_stat_finalize(void);
+
+void dirtylimit_state_lock(void);
+void dirtylimit_state_unlock(void);
+void dirtylimit_state_initialize(void);
+void dirtylimit_state_finalize(void);
+bool dirtylimit_in_service(void);
+bool dirtylimit_vcpu_index_valid(int cpu_index);
+void dirtylimit_process(void);
+void dirtylimit_change(bool start);
+void dirtylimit_set_vcpu(int cpu_index,
+                         uint64_t quota,
+                         bool enable);
+void dirtylimit_set_all(uint64_t quota,
+                        bool enable);
+void dirtylimit_vcpu_execute(CPUState *cpu);
+uint64_t dirtylimit_throttle_time_per_round(void);
+uint64_t dirtylimit_ring_full_time(void);
+#endif
diff --git a/include/system/dirtyrate.h b/include/system/dirtyrate.h
new file mode 100644
index 0000000000..20813f303f
--- /dev/null
+++ b/include/system/dirtyrate.h
@@ -0,0 +1,30 @@
+/*
+ * dirty page rate helper functions
+ *
+ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
+ *
+ * Authors:
+ *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_DIRTYRATE_H
+#define QEMU_DIRTYRATE_H
+
+#include "qapi/qapi-types-migration.h"
+
+typedef struct VcpuStat {
+    int nvcpu; /* number of vcpu */
+    DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
+} VcpuStat;
+
+int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
+                                 VcpuStat *stat,
+                                 unsigned int flag,
+                                 bool one_shot);
+
+void global_dirty_log_change(unsigned int flag,
+                             bool start);
+#endif
diff --git a/include/system/dma.h b/include/system/dma.h
new file mode 100644
index 0000000000..5a49a30628
--- /dev/null
+++ b/include/system/dma.h
@@ -0,0 +1,323 @@
+/*
+ * DMA helper functions
+ *
+ * Copyright (c) 2009, 2020 Red Hat
+ *
+ * This work is licensed under the terms of the GNU General Public License
+ * (GNU GPL), version 2 or later.
+ */
+
+#ifndef DMA_H
+#define DMA_H
+
+#include "exec/memory.h"
+#include "exec/address-spaces.h"
+#include "block/block.h"
+#include "block/accounting.h"
+
+typedef enum {
+    DMA_DIRECTION_TO_DEVICE = 0,
+    DMA_DIRECTION_FROM_DEVICE = 1,
+} DMADirection;
+
+/*
+ * When an IOMMU is present, bus addresses become distinct from
+ * CPU/memory physical addresses and may be a different size.  Because
+ * the IOVA size depends more on the bus than on the platform, we more
+ * or less have to treat these as 64-bit always to cover all (or at
+ * least most) cases.
+ */
+typedef uint64_t dma_addr_t;
+
+#define DMA_ADDR_BITS 64
+#define DMA_ADDR_FMT "%" PRIx64
+
+typedef struct ScatterGatherEntry ScatterGatherEntry;
+
+struct QEMUSGList {
+    ScatterGatherEntry *sg;
+    int nsg;
+    int nalloc;
+    dma_addr_t size;
+    DeviceState *dev;
+    AddressSpace *as;
+};
+
+static inline void dma_barrier(AddressSpace *as, DMADirection dir)
+{
+    /*
+     * This is called before DMA read and write operations
+     * unless the _relaxed form is used and is responsible
+     * for providing some sane ordering of accesses vs
+     * concurrently running VCPUs.
+     *
+     * Users of map(), unmap() or lower level st/ld_*
+     * operations are responsible for providing their own
+     * ordering via barriers.
+     *
+     * This primitive implementation does a simple smp_mb()
+     * before each operation which provides pretty much full
+     * ordering.
+     *
+     * A smarter implementation can be devised if needed to
+     * use lighter barriers based on the direction of the
+     * transfer, the DMA context, etc...
+     */
+    smp_mb();
+}
+
+/* Checks that the given range of addresses is valid for DMA.  This is
+ * useful for certain cases, but usually you should just use
+ * dma_memory_{read,write}() and check for errors */
+static inline bool dma_memory_valid(AddressSpace *as,
+                                    dma_addr_t addr, dma_addr_t len,
+                                    DMADirection dir, MemTxAttrs attrs)
+{
+    return address_space_access_valid(as, addr, len,
+                                      dir == DMA_DIRECTION_FROM_DEVICE,
+                                      attrs);
+}
+
+static inline MemTxResult dma_memory_rw_relaxed(AddressSpace *as,
+                                                dma_addr_t addr,
+                                                void *buf, dma_addr_t len,
+                                                DMADirection dir,
+                                                MemTxAttrs attrs)
+{
+    return address_space_rw(as, addr, attrs,
+                            buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
+}
+
+static inline MemTxResult dma_memory_read_relaxed(AddressSpace *as,
+                                                  dma_addr_t addr,
+                                                  void *buf, dma_addr_t len)
+{
+    return dma_memory_rw_relaxed(as, addr, buf, len,
+                                 DMA_DIRECTION_TO_DEVICE,
+                                 MEMTXATTRS_UNSPECIFIED);
+}
+
+static inline MemTxResult dma_memory_write_relaxed(AddressSpace *as,
+                                                   dma_addr_t addr,
+                                                   const void *buf,
+                                                   dma_addr_t len)
+{
+    return dma_memory_rw_relaxed(as, addr, (void *)buf, len,
+                                 DMA_DIRECTION_FROM_DEVICE,
+                                 MEMTXATTRS_UNSPECIFIED);
+}
+
+/**
+ * dma_memory_rw: Read from or write to an address space from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: the number of bytes to read or write
+ * @dir: indicates the transfer direction
+ * @attrs: memory transaction attributes
+ */
+static inline MemTxResult dma_memory_rw(AddressSpace *as, dma_addr_t addr,
+                                        void *buf, dma_addr_t len,
+                                        DMADirection dir, MemTxAttrs attrs)
+{
+    dma_barrier(as, dir);
+
+    return dma_memory_rw_relaxed(as, addr, buf, len, dir, attrs);
+}
+
+/**
+ * dma_memory_read: Read from an address space from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).  Called within RCU critical section.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: length of the data transferred
+ * @attrs: memory transaction attributes
+ */
+static inline MemTxResult dma_memory_read(AddressSpace *as, dma_addr_t addr,
+                                          void *buf, dma_addr_t len,
+                                          MemTxAttrs attrs)
+{
+    return dma_memory_rw(as, addr, buf, len,
+                         DMA_DIRECTION_TO_DEVICE, attrs);
+}
+
+/**
+ * dma_memory_write: Write to address space from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: the number of bytes to write
+ * @attrs: memory transaction attributes
+ */
+static inline MemTxResult dma_memory_write(AddressSpace *as, dma_addr_t addr,
+                                           const void *buf, dma_addr_t len,
+                                           MemTxAttrs attrs)
+{
+    return dma_memory_rw(as, addr, (void *)buf, len,
+                         DMA_DIRECTION_FROM_DEVICE, attrs);
+}
+
+/**
+ * dma_memory_set: Fill memory with a constant byte from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @c: constant byte to fill the memory
+ * @len: the number of bytes to fill with the constant byte
+ * @attrs: memory transaction attributes
+ */
+MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
+                           uint8_t c, dma_addr_t len, MemTxAttrs attrs);
+
+/**
+ * dma_memory_map: Map a physical memory region into a host virtual address.
+ *
+ * May map a subset of the requested range, given by and returned in @plen.
+ * May return %NULL and set *@plen to zero(0), if resources needed to perform
+ * the mapping are exhausted.
+ * Use only for reads OR writes - not for read-modify-write operations.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @len: pointer to length of buffer; updated on return
+ * @dir: indicates the transfer direction
+ * @attrs: memory attributes
+ */
+static inline void *dma_memory_map(AddressSpace *as,
+                                   dma_addr_t addr, dma_addr_t *len,
+                                   DMADirection dir, MemTxAttrs attrs)
+{
+    hwaddr xlen = *len;
+    void *p;
+
+    p = address_space_map(as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE,
+                          attrs);
+    *len = xlen;
+    return p;
+}
+
+/**
+ * dma_memory_unmap: Unmaps a memory region previously mapped by dma_memory_map()
+ *
+ * Will also mark the memory as dirty if @dir == %DMA_DIRECTION_FROM_DEVICE.
+ * @access_len gives the amount of memory that was actually read or written
+ * by the caller.
+ *
+ * @as: #AddressSpace used
+ * @buffer: host pointer as returned by dma_memory_map()
+ * @len: buffer length as returned by dma_memory_map()
+ * @dir: indicates the transfer direction
+ * @access_len: amount of data actually transferred
+ */
+static inline void dma_memory_unmap(AddressSpace *as,
+                                    void *buffer, dma_addr_t len,
+                                    DMADirection dir, dma_addr_t access_len)
+{
+    address_space_unmap(as, buffer, (hwaddr)len,
+                        dir == DMA_DIRECTION_FROM_DEVICE, access_len);
+}
+
+#define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \
+    static inline MemTxResult ld##_lname##_##_end##_dma(AddressSpace *as, \
+                                                        dma_addr_t addr, \
+                                                        uint##_bits##_t *pval, \
+                                                        MemTxAttrs attrs) \
+    { \
+        MemTxResult res = dma_memory_read(as, addr, pval, (_bits) / 8, attrs); \
+        _end##_bits##_to_cpus(pval); \
+        return res; \
+    } \
+    static inline MemTxResult st##_sname##_##_end##_dma(AddressSpace *as, \
+                                                        dma_addr_t addr, \
+                                                        uint##_bits##_t val, \
+                                                        MemTxAttrs attrs) \
+    { \
+        val = cpu_to_##_end##_bits(val); \
+        return dma_memory_write(as, addr, &val, (_bits) / 8, attrs); \
+    }
+
+static inline MemTxResult ldub_dma(AddressSpace *as, dma_addr_t addr,
+                                   uint8_t *val, MemTxAttrs attrs)
+{
+    return dma_memory_read(as, addr, val, 1, attrs);
+}
+
+static inline MemTxResult stb_dma(AddressSpace *as, dma_addr_t addr,
+                                  uint8_t val, MemTxAttrs attrs)
+{
+    return dma_memory_write(as, addr, &val, 1, attrs);
+}
+
+DEFINE_LDST_DMA(uw, w, 16, le);
+DEFINE_LDST_DMA(l, l, 32, le);
+DEFINE_LDST_DMA(q, q, 64, le);
+DEFINE_LDST_DMA(uw, w, 16, be);
+DEFINE_LDST_DMA(l, l, 32, be);
+DEFINE_LDST_DMA(q, q, 64, be);
+
+#undef DEFINE_LDST_DMA
+
+struct ScatterGatherEntry {
+    dma_addr_t base;
+    dma_addr_t len;
+};
+
+void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
+                      AddressSpace *as);
+void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len);
+void qemu_sglist_destroy(QEMUSGList *qsg);
+
+typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov,
+                              BlockCompletionFunc *cb, void *cb_opaque,
+                              void *opaque);
+
+BlockAIOCB *dma_blk_io(AioContext *ctx,
+                       QEMUSGList *sg, uint64_t offset, uint32_t align,
+                       DMAIOFunc *io_func, void *io_func_opaque,
+                       BlockCompletionFunc *cb, void *opaque, DMADirection dir);
+BlockAIOCB *dma_blk_read(BlockBackend *blk,
+                         QEMUSGList *sg, uint64_t offset, uint32_t align,
+                         BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *dma_blk_write(BlockBackend *blk,
+                          QEMUSGList *sg, uint64_t offset, uint32_t align,
+                          BlockCompletionFunc *cb, void *opaque);
+MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
+                         QEMUSGList *sg, MemTxAttrs attrs);
+MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
+                          QEMUSGList *sg, MemTxAttrs attrs);
+
+void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
+                    QEMUSGList *sg, enum BlockAcctType type);
+
+/**
+ * dma_aligned_pow2_mask: Return the address bit mask of the largest
+ * power of 2 size less or equal than @end - @start + 1, aligned with @start,
+ * and bounded by 1 << @max_addr_bits bits.
+ *
+ * @start: range start address
+ * @end: range end address (greater than @start)
+ * @max_addr_bits: max address bits (<= 64)
+ */
+uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end,
+                               int max_addr_bits);
+
+#endif
diff --git a/include/system/dump-arch.h b/include/system/dump-arch.h
new file mode 100644
index 0000000000..743916e46c
--- /dev/null
+++ b/include/system/dump-arch.h
@@ -0,0 +1,35 @@
+/*
+ * QEMU dump
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ *     Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef DUMP_ARCH_H
+#define DUMP_ARCH_H
+
+typedef struct ArchDumpInfo {
+    int d_machine;           /* Architecture */
+    int d_endian;            /* ELFDATA2LSB or ELFDATA2MSB */
+    int d_class;             /* ELFCLASS32 or ELFCLASS64 */
+    uint32_t page_size;      /* The target's page size. If it's variable and
+                              * unknown, then this should be the maximum. */
+    uint64_t phys_base;      /* The target's physmem base. */
+    void (*arch_sections_add_fn)(DumpState *s);
+    uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff);
+    int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff);
+    void (*arch_cleanup_fn)(DumpState *s);
+} ArchDumpInfo;
+
+struct GuestPhysBlockList; /* memory_mapping.h */
+int cpu_get_dump_info(ArchDumpInfo *info,
+                      const struct GuestPhysBlockList *guest_phys_blocks);
+ssize_t cpu_get_note_size(int class, int machine, int nr_cpus);
+
+#endif
diff --git a/include/system/dump.h b/include/system/dump.h
new file mode 100644
index 0000000000..607bd7b220
--- /dev/null
+++ b/include/system/dump.h
@@ -0,0 +1,225 @@
+/*
+ * QEMU dump
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ *     Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef DUMP_H
+#define DUMP_H
+
+#include "qapi/qapi-types-dump.h"
+#include "qemu/thread.h"
+
+#define MAKEDUMPFILE_SIGNATURE      "makedumpfile"
+#define MAX_SIZE_MDF_HEADER         (4096) /* max size of makedumpfile_header */
+#define TYPE_FLAT_HEADER            (1)    /* type of flattened format */
+#define VERSION_FLAT_HEADER         (1)    /* version of flattened format */
+#define END_FLAG_FLAT_HEADER        (-1)
+
+#ifndef ARCH_PFN_OFFSET
+#define ARCH_PFN_OFFSET             (0)
+#endif
+
+/*
+ * flag for compressed format
+ */
+#define DUMP_DH_COMPRESSED_ZLIB     (0x1)
+#define DUMP_DH_COMPRESSED_LZO      (0x2)
+#define DUMP_DH_COMPRESSED_SNAPPY   (0x4)
+
+#define KDUMP_SIGNATURE             "KDUMP   "
+#define SIG_LEN                     (sizeof(KDUMP_SIGNATURE) - 1)
+#define DUMP_LEVEL                  (1)
+#define DISKDUMP_HEADER_BLOCKS      (1)
+
+#include "system/dump-arch.h"
+#include "system/memory_mapping.h"
+
+typedef struct QEMU_PACKED MakedumpfileHeader {
+    char signature[16];     /* = "makedumpfile" */
+    int64_t type;
+    int64_t version;
+} MakedumpfileHeader;
+
+typedef struct QEMU_PACKED MakedumpfileDataHeader {
+    int64_t offset;
+    int64_t buf_size;
+} MakedumpfileDataHeader;
+
+typedef struct QEMU_PACKED NewUtsname {
+    char sysname[65];
+    char nodename[65];
+    char release[65];
+    char version[65];
+    char machine[65];
+    char domainname[65];
+} NewUtsname;
+
+typedef struct QEMU_PACKED DiskDumpHeader32 {
+    char signature[SIG_LEN];        /* = "KDUMP   " */
+    uint32_t header_version;        /* Dump header version */
+    NewUtsname utsname;             /* copy of system_utsname */
+    char timestamp[10];             /* Time stamp */
+    uint32_t status;                /* Above flags */
+    uint32_t block_size;            /* Size of a block in byte */
+    uint32_t sub_hdr_size;          /* Size of arch dependent header in block */
+    uint32_t bitmap_blocks;         /* Size of Memory bitmap in block */
+    uint32_t max_mapnr;             /* = max_mapnr ,
+                                       obsoleted in header_version 6 */
+    uint32_t total_ram_blocks;      /* Number of blocks should be written */
+    uint32_t device_blocks;         /* Number of total blocks in dump device */
+    uint32_t written_blocks;        /* Number of written blocks */
+    uint32_t current_cpu;           /* CPU# which handles dump */
+    uint32_t nr_cpus;               /* Number of CPUs */
+} DiskDumpHeader32;
+
+typedef struct QEMU_PACKED DiskDumpHeader64 {
+    char signature[SIG_LEN];        /* = "KDUMP   " */
+    uint32_t header_version;        /* Dump header version */
+    NewUtsname utsname;             /* copy of system_utsname */
+    char timestamp[22];             /* Time stamp */
+    uint32_t status;                /* Above flags */
+    uint32_t block_size;            /* Size of a block in byte */
+    uint32_t sub_hdr_size;          /* Size of arch dependent header in block */
+    uint32_t bitmap_blocks;         /* Size of Memory bitmap in block */
+    uint32_t max_mapnr;             /* = max_mapnr,
+                                       obsoleted in header_version 6 */
+    uint32_t total_ram_blocks;      /* Number of blocks should be written */
+    uint32_t device_blocks;         /* Number of total blocks in dump device */
+    uint32_t written_blocks;        /* Number of written blocks */
+    uint32_t current_cpu;           /* CPU# which handles dump */
+    uint32_t nr_cpus;               /* Number of CPUs */
+} DiskDumpHeader64;
+
+typedef struct QEMU_PACKED KdumpSubHeader32 {
+    uint32_t phys_base;
+    uint32_t dump_level;            /* header_version 1 and later */
+    uint32_t split;                 /* header_version 2 and later */
+    uint32_t start_pfn;             /* header_version 2 and later,
+                                       obsoleted in header_version 6 */
+    uint32_t end_pfn;               /* header_version 2 and later,
+                                       obsoleted in header_version 6 */
+    uint64_t offset_vmcoreinfo;     /* header_version 3 and later */
+    uint32_t size_vmcoreinfo;       /* header_version 3 and later */
+    uint64_t offset_note;           /* header_version 4 and later */
+    uint32_t note_size;             /* header_version 4 and later */
+    uint64_t offset_eraseinfo;      /* header_version 5 and later */
+    uint32_t size_eraseinfo;        /* header_version 5 and later */
+    uint64_t start_pfn_64;          /* header_version 6 and later */
+    uint64_t end_pfn_64;            /* header_version 6 and later */
+    uint64_t max_mapnr_64;          /* header_version 6 and later */
+} KdumpSubHeader32;
+
+typedef struct QEMU_PACKED KdumpSubHeader64 {
+    uint64_t phys_base;
+    uint32_t dump_level;            /* header_version 1 and later */
+    uint32_t split;                 /* header_version 2 and later */
+    uint64_t start_pfn;             /* header_version 2 and later,
+                                       obsoleted in header_version 6 */
+    uint64_t end_pfn;               /* header_version 2 and later,
+                                       obsoleted in header_version 6 */
+    uint64_t offset_vmcoreinfo;     /* header_version 3 and later */
+    uint64_t size_vmcoreinfo;       /* header_version 3 and later */
+    uint64_t offset_note;           /* header_version 4 and later */
+    uint64_t note_size;             /* header_version 4 and later */
+    uint64_t offset_eraseinfo;      /* header_version 5 and later */
+    uint64_t size_eraseinfo;        /* header_version 5 and later */
+    uint64_t start_pfn_64;          /* header_version 6 and later */
+    uint64_t end_pfn_64;            /* header_version 6 and later */
+    uint64_t max_mapnr_64;          /* header_version 6 and later */
+} KdumpSubHeader64;
+
+typedef struct DataCache {
+    DumpState *state;   /* dump state related to this data */
+    uint8_t *buf;       /* buffer for cached data */
+    size_t buf_size;    /* size of the buf */
+    size_t data_size;   /* size of cached data in buf */
+    off_t offset;       /* offset of the file */
+} DataCache;
+
+typedef struct QEMU_PACKED PageDescriptor {
+    uint64_t offset;                /* the offset of the page data*/
+    uint32_t size;                  /* the size of this dump page */
+    uint32_t flags;                 /* flags */
+    uint64_t page_flags;            /* page flags */
+} PageDescriptor;
+
+typedef struct DumpState {
+    GuestPhysBlockList guest_phys_blocks;
+    ArchDumpInfo dump_info;
+    MemoryMappingList list;
+    bool resume;
+    bool detached;
+    bool kdump_raw;
+    hwaddr memory_offset;
+    int fd;
+
+    /*
+     * Dump filter area variables
+     *
+     * A filtered dump only contains the guest memory designated by
+     * the start address and length variables defined below.
+     *
+     * If length is 0, no filtering is applied.
+     */
+    int64_t filter_area_begin;  /* Start address of partial guest memory area */
+    int64_t filter_area_length; /* Length of partial guest memory area */
+
+    /* Elf dump related data */
+    uint32_t phdr_num;
+    uint32_t shdr_num;
+    ssize_t note_size;
+    hwaddr shdr_offset;
+    hwaddr phdr_offset;
+    hwaddr section_offset;
+    hwaddr note_offset;
+
+    void *elf_section_hdrs;     /* Pointer to section header buffer */
+    void *elf_section_data;     /* Pointer to section data buffer */
+    uint64_t elf_section_data_size; /* Size of section data */
+    GArray *string_table_buf;   /* String table data buffer */
+
+    uint8_t *note_buf;          /* buffer for notes */
+    size_t note_buf_offset;     /* the writing place in note_buf */
+    uint32_t nr_cpus;           /* number of guest's cpu */
+    uint64_t max_mapnr;         /* the biggest guest's phys-mem's number */
+    size_t len_dump_bitmap;     /* the size of the place used to store
+                                   dump_bitmap in vmcore */
+    off_t offset_dump_bitmap;   /* offset of dump_bitmap part in vmcore */
+    off_t offset_page;          /* offset of page part in vmcore */
+    size_t num_dumpable;        /* number of page that can be dumped */
+    uint32_t flag_compress;     /* indicate the compression format */
+    DumpStatus status;          /* current dump status */
+
+    bool has_format;              /* whether format is provided */
+    DumpGuestMemoryFormat format; /* valid only if has_format == true */
+    QemuThread dump_thread;       /* thread for detached dump */
+
+    int64_t total_size;          /* total memory size (in bytes) to
+                                  * be dumped. When filter is
+                                  * enabled, this will only count
+                                  * those to be written. */
+    int64_t written_size;        /* written memory size (in bytes),
+                                  * this could be used to calculate
+                                  * how much work we have
+                                  * finished. */
+    uint8_t *guest_note;         /* ELF note content */
+    size_t guest_note_size;
+} DumpState;
+
+uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
+uint32_t cpu_to_dump32(DumpState *s, uint32_t val);
+uint64_t cpu_to_dump64(DumpState *s, uint64_t val);
+
+int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start,
+                                    int64_t filter_area_length);
+int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start,
+                                     int64_t filter_area_length);
+#endif
diff --git a/include/system/event-loop-base.h b/include/system/event-loop-base.h
new file mode 100644
index 0000000000..a6c24f1351
--- /dev/null
+++ b/include/system/event-loop-base.h
@@ -0,0 +1,40 @@
+/*
+ * QEMU event-loop backend
+ *
+ * Copyright (C) 2022 Red Hat Inc
+ *
+ * Authors:
+ *  Nicolas Saenz Julienne <nsaenzju@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_EVENT_LOOP_BASE_H
+#define QEMU_EVENT_LOOP_BASE_H
+
+#include "qom/object.h"
+#include "block/aio.h"
+
+#define TYPE_EVENT_LOOP_BASE         "event-loop-base"
+OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass,
+                    EVENT_LOOP_BASE)
+
+struct EventLoopBaseClass {
+    ObjectClass parent_class;
+
+    void (*init)(EventLoopBase *base, Error **errp);
+    void (*update_params)(EventLoopBase *base, Error **errp);
+    bool (*can_be_deleted)(EventLoopBase *base);
+};
+
+struct EventLoopBase {
+    Object parent;
+
+    /* AioContext AIO engine parameters */
+    int64_t aio_max_batch;
+
+    /* AioContext thread pool parameters */
+    int64_t thread_pool_min;
+    int64_t thread_pool_max;
+};
+#endif
diff --git a/include/system/host_iommu_device.h b/include/system/host_iommu_device.h
new file mode 100644
index 0000000000..809cced4ba
--- /dev/null
+++ b/include/system/host_iommu_device.h
@@ -0,0 +1,110 @@
+/*
+ * Host IOMMU device abstract declaration
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef HOST_IOMMU_DEVICE_H
+#define HOST_IOMMU_DEVICE_H
+
+#include "qom/object.h"
+#include "qapi/error.h"
+
+/**
+ * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
+ *
+ * @type: host platform IOMMU type.
+ *
+ * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
+ *           the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
+ */
+typedef struct HostIOMMUDeviceCaps {
+    uint32_t type;
+    uint64_t hw_caps;
+} HostIOMMUDeviceCaps;
+
+#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
+OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE)
+
+struct HostIOMMUDevice {
+    Object parent_obj;
+
+    char *name;
+    void *agent; /* pointer to agent device, ie. VFIO or VDPA device */
+    PCIBus *aliased_bus;
+    int aliased_devfn;
+    HostIOMMUDeviceCaps caps;
+};
+
+/**
+ * struct HostIOMMUDeviceClass - The base class for all host IOMMU devices.
+ *
+ * Different types of host devices (e.g., VFIO or VDPA device) or devices
+ * with different backend (e.g., VFIO legacy container or IOMMUFD backend)
+ * will have different implementations of the HostIOMMUDeviceClass.
+ */
+struct HostIOMMUDeviceClass {
+    ObjectClass parent_class;
+
+    /**
+     * @realize: initialize host IOMMU device instance further.
+     *
+     * Mandatory callback.
+     *
+     * @hiod: pointer to a host IOMMU device instance.
+     *
+     * @opaque: pointer to agent device of this host IOMMU device,
+     *          e.g., VFIO base device or VDPA device.
+     *
+     * @errp: pass an Error out when realize fails.
+     *
+     * Returns: true on success, false on failure.
+     */
+    bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp);
+    /**
+     * @get_cap: check if a host IOMMU device capability is supported.
+     *
+     * Optional callback, if not implemented, hint not supporting query
+     * of @cap.
+     *
+     * @hiod: pointer to a host IOMMU device instance.
+     *
+     * @cap: capability to check.
+     *
+     * @errp: pass an Error out when fails to query capability.
+     *
+     * Returns: <0 on failure, 0 if a @cap is unsupported, or else
+     * 1 or some positive value for some special @cap,
+     * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS.
+     */
+    int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp);
+    /**
+     * @get_iova_ranges: Return the list of usable iova_ranges along with
+     * @hiod Host IOMMU device
+     *
+     * @hiod: handle to the host IOMMU device
+     */
+    GList* (*get_iova_ranges)(HostIOMMUDevice *hiod);
+    /**
+     *
+     * @get_page_size_mask: Return the page size mask supported along this
+     * @hiod Host IOMMU device
+     *
+     * @hiod: handle to the host IOMMU device
+     */
+    uint64_t (*get_page_size_mask)(HostIOMMUDevice *hiod);
+};
+
+/*
+ * Host IOMMU device capability list.
+ */
+#define HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE        0
+#define HOST_IOMMU_DEVICE_CAP_AW_BITS           1
+
+#define HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX       64
+#endif
diff --git a/include/system/hostmem.h b/include/system/hostmem.h
new file mode 100644
index 0000000000..5c21ca55c0
--- /dev/null
+++ b/include/system/hostmem.h
@@ -0,0 +1,96 @@
+/*
+ * QEMU Host Memory Backend
+ *
+ * Copyright (C) 2013-2014 Red Hat Inc
+ *
+ * Authors:
+ *   Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef SYSTEM_HOSTMEM_H
+#define SYSTEM_HOSTMEM_H
+
+#include "system/numa.h"
+#include "qapi/qapi-types-machine.h"
+#include "qom/object.h"
+#include "exec/memory.h"
+#include "qemu/bitmap.h"
+#include "qemu/thread-context.h"
+
+#define TYPE_MEMORY_BACKEND "memory-backend"
+OBJECT_DECLARE_TYPE(HostMemoryBackend, HostMemoryBackendClass,
+                    MEMORY_BACKEND)
+
+/* hostmem-ram.c */
+/**
+ * @TYPE_MEMORY_BACKEND_RAM:
+ * name of backend that uses mmap on the anonymous RAM
+ */
+
+#define TYPE_MEMORY_BACKEND_RAM "memory-backend-ram"
+
+/* hostmem-file.c */
+/**
+ * @TYPE_MEMORY_BACKEND_FILE:
+ * name of backend that uses mmap on a file descriptor
+ */
+#define TYPE_MEMORY_BACKEND_FILE "memory-backend-file"
+
+#define TYPE_MEMORY_BACKEND_MEMFD "memory-backend-memfd"
+
+
+/**
+ * HostMemoryBackendClass:
+ * @parent_class: opaque parent class container
+ */
+struct HostMemoryBackendClass {
+    ObjectClass parent_class;
+
+    /**
+     * alloc: Allocate memory from backend.
+     *
+     * @backend: the #HostMemoryBackend.
+     * @errp: pointer to Error*, to store an error if it happens.
+     *
+     * Return: true on success, else false setting @errp with error.
+     */
+    bool (*alloc)(HostMemoryBackend *backend, Error **errp);
+};
+
+/**
+ * @HostMemoryBackend
+ *
+ * @parent: opaque parent object container
+ * @size: amount of memory backend provides
+ * @mr: MemoryRegion representing host memory belonging to backend
+ * @prealloc_threads: number of threads to be used for preallocatining RAM
+ */
+struct HostMemoryBackend {
+    /* private */
+    Object parent;
+
+    /* protected */
+    uint64_t size;
+    bool merge, dump, use_canonical_path;
+    bool prealloc, is_mapped, share, reserve;
+    bool guest_memfd, aligned;
+    uint32_t prealloc_threads;
+    ThreadContext *prealloc_context;
+    DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
+    HostMemPolicy policy;
+
+    MemoryRegion mr;
+};
+
+bool host_memory_backend_mr_inited(HostMemoryBackend *backend);
+MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend);
+
+void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped);
+bool host_memory_backend_is_mapped(HostMemoryBackend *backend);
+size_t host_memory_backend_pagesize(HostMemoryBackend *memdev);
+char *host_memory_backend_get_name(HostMemoryBackend *backend);
+
+#endif
diff --git a/include/system/hvf.h b/include/system/hvf.h
new file mode 100644
index 0000000000..730f927f03
--- /dev/null
+++ b/include/system/hvf.h
@@ -0,0 +1,71 @@
+/*
+ * QEMU Hypervisor.framework (HVF) support
+ *
+ * Copyright Google Inc., 2017
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* header to be included in non-HVF-specific code */
+
+#ifndef HVF_H
+#define HVF_H
+
+#include "qemu/accel.h"
+#include "qom/object.h"
+
+#ifdef COMPILING_PER_TARGET
+#include "cpu.h"
+
+#ifdef CONFIG_HVF
+extern bool hvf_allowed;
+#define hvf_enabled() (hvf_allowed)
+#else /* !CONFIG_HVF */
+#define hvf_enabled() 0
+#endif /* !CONFIG_HVF */
+
+#endif /* COMPILING_PER_TARGET */
+
+#define TYPE_HVF_ACCEL ACCEL_CLASS_NAME("hvf")
+
+typedef struct HVFState HVFState;
+DECLARE_INSTANCE_CHECKER(HVFState, HVF_STATE,
+                         TYPE_HVF_ACCEL)
+
+#ifdef COMPILING_PER_TARGET
+struct hvf_sw_breakpoint {
+    vaddr pc;
+    vaddr saved_insn;
+    int use_count;
+    QTAILQ_ENTRY(hvf_sw_breakpoint) entry;
+};
+
+struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu,
+                                                 vaddr pc);
+int hvf_sw_breakpoints_active(CPUState *cpu);
+
+int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp);
+int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp);
+int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type);
+int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type);
+void hvf_arch_remove_all_hw_breakpoints(void);
+
+/*
+ * hvf_update_guest_debug:
+ * @cs: CPUState for the CPU to update
+ *
+ * Update guest to enable or disable debugging. Per-arch specifics will be
+ * handled by calling down to hvf_arch_update_guest_debug.
+ */
+int hvf_update_guest_debug(CPUState *cpu);
+void hvf_arch_update_guest_debug(CPUState *cpu);
+
+/*
+ * Return whether the guest supports debugging.
+ */
+bool hvf_arch_supports_guest_debug(void);
+#endif /* COMPILING_PER_TARGET */
+
+#endif
diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h
new file mode 100644
index 0000000000..42ae18433f
--- /dev/null
+++ b/include/system/hvf_int.h
@@ -0,0 +1,77 @@
+/*
+ * QEMU Hypervisor.framework (HVF) support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* header to be included in HVF-specific code */
+
+#ifndef HVF_INT_H
+#define HVF_INT_H
+
+#ifdef __aarch64__
+#include <Hypervisor/Hypervisor.h>
+typedef hv_vcpu_t hvf_vcpuid;
+#else
+#include <Hypervisor/hv.h>
+typedef hv_vcpuid_t hvf_vcpuid;
+#endif
+
+/* hvf_slot flags */
+#define HVF_SLOT_LOG (1 << 0)
+
+typedef struct hvf_slot {
+    uint64_t start;
+    uint64_t size;
+    uint8_t *mem;
+    int slot_id;
+    uint32_t flags;
+    MemoryRegion *region;
+} hvf_slot;
+
+typedef struct hvf_vcpu_caps {
+    uint64_t vmx_cap_pinbased;
+    uint64_t vmx_cap_procbased;
+    uint64_t vmx_cap_procbased2;
+    uint64_t vmx_cap_entry;
+    uint64_t vmx_cap_exit;
+    uint64_t vmx_cap_preemption_timer;
+} hvf_vcpu_caps;
+
+struct HVFState {
+    AccelState parent;
+    hvf_slot slots[32];
+    int num_slots;
+
+    hvf_vcpu_caps *hvf_caps;
+    uint64_t vtimer_offset;
+    QTAILQ_HEAD(, hvf_sw_breakpoint) hvf_sw_breakpoints;
+};
+extern HVFState *hvf_state;
+
+struct AccelCPUState {
+    hvf_vcpuid fd;
+    void *exit;
+    bool vtimer_masked;
+    sigset_t unblock_ipi_mask;
+    bool guest_debug_enabled;
+    bool dirty;
+};
+
+void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line,
+                        const char *exp);
+#define assert_hvf_ok(EX) assert_hvf_ok_impl((EX), __FILE__, __LINE__, #EX)
+const char *hvf_return_string(hv_return_t ret);
+int hvf_arch_init(void);
+hv_return_t hvf_arch_vm_create(MachineState *ms, uint32_t pa_range);
+int hvf_arch_init_vcpu(CPUState *cpu);
+void hvf_arch_vcpu_destroy(CPUState *cpu);
+int hvf_vcpu_exec(CPUState *);
+hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
+int hvf_put_registers(CPUState *);
+int hvf_get_registers(CPUState *);
+void hvf_kick_vcpu_thread(CPUState *cpu);
+
+#endif
diff --git a/include/system/hw_accel.h b/include/system/hw_accel.h
new file mode 100644
index 0000000000..380e9e640b
--- /dev/null
+++ b/include/system/hw_accel.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU Hardware accelerators support
+ *
+ * Copyright 2016 Google, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_HW_ACCEL_H
+#define QEMU_HW_ACCEL_H
+
+#include "hw/core/cpu.h"
+#include "system/kvm.h"
+#include "system/hvf.h"
+#include "system/whpx.h"
+#include "system/nvmm.h"
+
+void cpu_synchronize_state(CPUState *cpu);
+void cpu_synchronize_post_reset(CPUState *cpu);
+void cpu_synchronize_post_init(CPUState *cpu);
+void cpu_synchronize_pre_loadvm(CPUState *cpu);
+
+#endif /* QEMU_HW_ACCEL_H */
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
new file mode 100644
index 0000000000..cbab75bfbf
--- /dev/null
+++ b/include/system/iommufd.h
@@ -0,0 +1,66 @@
+/*
+ * iommufd container backend declaration
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ * Copyright Red Hat, Inc. 2024
+ *
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ *          Eric Auger <eric.auger@redhat.com>
+ *          Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef SYSTEM_IOMMUFD_H
+#define SYSTEM_IOMMUFD_H
+
+#include "qom/object.h"
+#include "exec/hwaddr.h"
+#include "exec/cpu-common.h"
+#include "system/host_iommu_device.h"
+
+#define TYPE_IOMMUFD_BACKEND "iommufd"
+OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
+
+struct IOMMUFDBackendClass {
+    ObjectClass parent_class;
+};
+
+struct IOMMUFDBackend {
+    Object parent;
+
+    /*< protected >*/
+    int fd;            /* /dev/iommu file descriptor */
+    bool owned;        /* is the /dev/iommu opened internally */
+    uint32_t users;
+
+    /*< public >*/
+};
+
+bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
+void iommufd_backend_disconnect(IOMMUFDBackend *be);
+
+bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
+                                Error **errp);
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
+                            ram_addr_t size, void *vaddr, bool readonly);
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
+                              hwaddr iova, ram_addr_t size);
+bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
+                                     uint32_t *type, void *data, uint32_t len,
+                                     uint64_t *caps, Error **errp);
+bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
+                                uint32_t pt_id, uint32_t flags,
+                                uint32_t data_type, uint32_t data_len,
+                                void *data_ptr, uint32_t *out_hwpt,
+                                Error **errp);
+bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
+                                        bool start, Error **errp);
+bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
+                                      uint64_t iova, ram_addr_t size,
+                                      uint64_t page_size, uint64_t *data,
+                                      Error **errp);
+
+#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
+#endif
diff --git a/include/system/iothread.h b/include/system/iothread.h
new file mode 100644
index 0000000000..d95c17a645
--- /dev/null
+++ b/include/system/iothread.h
@@ -0,0 +1,67 @@
+/*
+ * Event loop thread
+ *
+ * Copyright Red Hat Inc., 2013
+ *
+ * Authors:
+ *  Stefan Hajnoczi   <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef IOTHREAD_H
+#define IOTHREAD_H
+
+#include "block/aio.h"
+#include "qemu/thread.h"
+#include "qom/object.h"
+#include "system/event-loop-base.h"
+
+#define TYPE_IOTHREAD "iothread"
+
+struct IOThread {
+    EventLoopBase parent_obj;
+
+    QemuThread thread;
+    AioContext *ctx;
+    bool run_gcontext;          /* whether we should run gcontext */
+    GMainContext *worker_context;
+    GMainLoop *main_loop;
+    QemuSemaphore init_done_sem; /* is thread init done? */
+    bool stopping;              /* has iothread_stop() been called? */
+    bool running;               /* should iothread_run() continue? */
+    int thread_id;
+
+    /* AioContext poll parameters */
+    int64_t poll_max_ns;
+    int64_t poll_grow;
+    int64_t poll_shrink;
+};
+typedef struct IOThread IOThread;
+
+DECLARE_INSTANCE_CHECKER(IOThread, IOTHREAD,
+                         TYPE_IOTHREAD)
+
+char *iothread_get_id(IOThread *iothread);
+IOThread *iothread_by_id(const char *id);
+AioContext *iothread_get_aio_context(IOThread *iothread);
+GMainContext *iothread_get_g_main_context(IOThread *iothread);
+
+/*
+ * Helpers used to allocate iothreads for internal use.  These
+ * iothreads will not be seen by monitor clients when query using
+ * "query-iothreads".
+ */
+IOThread *iothread_create(const char *id, Error **errp);
+void iothread_stop(IOThread *iothread);
+void iothread_destroy(IOThread *iothread);
+
+/*
+ * Returns true if executing within IOThread context,
+ * false otherwise.
+ */
+bool qemu_in_iothread(void);
+
+#endif /* IOTHREAD_H */
diff --git a/include/system/kvm.h b/include/system/kvm.h
new file mode 100644
index 0000000000..c3a60b2890
--- /dev/null
+++ b/include/system/kvm.h
@@ -0,0 +1,581 @@
+/*
+ * QEMU KVM support
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* header to be included in non-KVM-specific code */
+
+#ifndef QEMU_KVM_H
+#define QEMU_KVM_H
+
+#include "exec/memattrs.h"
+#include "qemu/accel.h"
+#include "qom/object.h"
+
+#ifdef COMPILING_PER_TARGET
+# ifdef CONFIG_KVM
+#  include <linux/kvm.h>
+#  define CONFIG_KVM_IS_POSSIBLE
+# endif
+#else
+# define CONFIG_KVM_IS_POSSIBLE
+#endif
+
+#ifdef CONFIG_KVM_IS_POSSIBLE
+
+extern bool kvm_allowed;
+extern bool kvm_kernel_irqchip;
+extern bool kvm_split_irqchip;
+extern bool kvm_async_interrupts_allowed;
+extern bool kvm_halt_in_kernel_allowed;
+extern bool kvm_resamplefds_allowed;
+extern bool kvm_msi_via_irqfd_allowed;
+extern bool kvm_gsi_routing_allowed;
+extern bool kvm_gsi_direct_mapping;
+extern bool kvm_readonly_mem_allowed;
+extern bool kvm_msi_use_devid;
+
+#define kvm_enabled()           (kvm_allowed)
+/**
+ * kvm_irqchip_in_kernel:
+ *
+ * Returns: true if an in-kernel irqchip was created.
+ * What this actually means is architecture and machine model
+ * specific: on PC, for instance, it means that the LAPIC
+ * is in kernel.  This function should never be used from generic
+ * target-independent code: use one of the following functions or
+ * some other specific check instead.
+ */
+#define kvm_irqchip_in_kernel() (kvm_kernel_irqchip)
+
+/**
+ * kvm_irqchip_is_split:
+ *
+ * Returns: true if the irqchip implementation is split between
+ * user and kernel space.  The details are architecture and
+ * machine specific.  On PC, it means that the PIC, IOAPIC, and
+ * PIT are in user space while the LAPIC is in the kernel.
+ */
+#define kvm_irqchip_is_split() (kvm_split_irqchip)
+
+/**
+ * kvm_async_interrupts_enabled:
+ *
+ * Returns: true if we can deliver interrupts to KVM
+ * asynchronously (ie by ioctl from any thread at any time)
+ * rather than having to do interrupt delivery synchronously
+ * (where the vcpu must be stopped at a suitable point first).
+ */
+#define kvm_async_interrupts_enabled() (kvm_async_interrupts_allowed)
+
+/**
+ * kvm_halt_in_kernel
+ *
+ * Returns: true if halted cpus should still get a KVM_RUN ioctl to run
+ * inside of kernel space. This only works if MP state is implemented.
+ */
+#define kvm_halt_in_kernel() (kvm_halt_in_kernel_allowed)
+
+/**
+ * kvm_irqfds_enabled:
+ *
+ * Returns: true if we can use irqfds to inject interrupts into
+ * a KVM CPU (ie the kernel supports irqfds and we are running
+ * with a configuration where it is meaningful to use them).
+ *
+ * Always available if running with in-kernel irqchip.
+ */
+#define kvm_irqfds_enabled() kvm_irqchip_in_kernel()
+
+/**
+ * kvm_resamplefds_enabled:
+ *
+ * Returns: true if we can use resamplefds to inject interrupts into
+ * a KVM CPU (ie the kernel supports resamplefds and we are running
+ * with a configuration where it is meaningful to use them).
+ */
+#define kvm_resamplefds_enabled() (kvm_resamplefds_allowed)
+
+/**
+ * kvm_msi_via_irqfd_enabled:
+ *
+ * Returns: true if we can route a PCI MSI (Message Signaled Interrupt)
+ * to a KVM CPU via an irqfd. This requires that the kernel supports
+ * this and that we're running in a configuration that permits it.
+ */
+#define kvm_msi_via_irqfd_enabled() (kvm_msi_via_irqfd_allowed)
+
+/**
+ * kvm_gsi_routing_enabled:
+ *
+ * Returns: true if GSI routing is enabled (ie the kernel supports
+ * it and we're running in a configuration that permits it).
+ */
+#define kvm_gsi_routing_enabled() (kvm_gsi_routing_allowed)
+
+/**
+ * kvm_gsi_direct_mapping:
+ *
+ * Returns: true if GSI direct mapping is enabled.
+ */
+#define kvm_gsi_direct_mapping() (kvm_gsi_direct_mapping)
+
+/**
+ * kvm_readonly_mem_enabled:
+ *
+ * Returns: true if KVM readonly memory is enabled (ie the kernel
+ * supports it and we're running in a configuration that permits it).
+ */
+#define kvm_readonly_mem_enabled() (kvm_readonly_mem_allowed)
+
+/**
+ * kvm_msi_devid_required:
+ * Returns: true if KVM requires a device id to be provided while
+ * defining an MSI routing entry.
+ */
+#define kvm_msi_devid_required() (kvm_msi_use_devid)
+
+#else
+
+#define kvm_enabled()           (0)
+#define kvm_irqchip_in_kernel() (false)
+#define kvm_irqchip_is_split() (false)
+#define kvm_async_interrupts_enabled() (false)
+#define kvm_halt_in_kernel() (false)
+#define kvm_irqfds_enabled() (false)
+#define kvm_resamplefds_enabled() (false)
+#define kvm_msi_via_irqfd_enabled() (false)
+#define kvm_gsi_routing_allowed() (false)
+#define kvm_gsi_direct_mapping() (false)
+#define kvm_readonly_mem_enabled() (false)
+#define kvm_msi_devid_required() (false)
+
+#endif  /* CONFIG_KVM_IS_POSSIBLE */
+
+struct kvm_run;
+struct kvm_irq_routing_entry;
+
+typedef struct KVMCapabilityInfo {
+    const char *name;
+    int value;
+} KVMCapabilityInfo;
+
+#define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP }
+#define KVM_CAP_LAST_INFO { NULL, 0 }
+
+struct KVMState;
+
+#define TYPE_KVM_ACCEL ACCEL_CLASS_NAME("kvm")
+typedef struct KVMState KVMState;
+DECLARE_INSTANCE_CHECKER(KVMState, KVM_STATE,
+                         TYPE_KVM_ACCEL)
+
+extern KVMState *kvm_state;
+typedef struct Notifier Notifier;
+
+typedef struct KVMRouteChange {
+     KVMState *s;
+     int changes;
+} KVMRouteChange;
+
+/* external API */
+
+unsigned int kvm_get_max_memslots(void);
+unsigned int kvm_get_free_memslots(void);
+bool kvm_has_sync_mmu(void);
+int kvm_has_vcpu_events(void);
+int kvm_max_nested_state_length(void);
+int kvm_has_gsi_routing(void);
+
+/**
+ * kvm_arm_supports_user_irq
+ *
+ * Not all KVM implementations support notifications for kernel generated
+ * interrupt events to user space. This function indicates whether the current
+ * KVM implementation does support them.
+ *
+ * Returns: true if KVM supports using kernel generated IRQs from user space
+ */
+bool kvm_arm_supports_user_irq(void);
+
+
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
+int kvm_on_sigbus(int code, void *addr);
+
+#ifdef COMPILING_PER_TARGET
+#include "cpu.h"
+
+void kvm_flush_coalesced_mmio_buffer(void);
+
+/**
+ * kvm_update_guest_debug(): ensure KVM debug structures updated
+ * @cs: the CPUState for this cpu
+ * @reinject_trap: KVM trap injection control
+ *
+ * There are usually per-arch specifics which will be handled by
+ * calling down to kvm_arch_update_guest_debug after the generic
+ * fields have been set.
+ */
+#ifdef TARGET_KVM_HAVE_GUEST_DEBUG
+int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap);
+#else
+static inline int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
+{
+    return -EINVAL;
+}
+#endif
+
+/* internal API */
+
+int kvm_ioctl(KVMState *s, unsigned long type, ...);
+
+int kvm_vm_ioctl(KVMState *s, unsigned long type, ...);
+
+int kvm_vcpu_ioctl(CPUState *cpu, unsigned long type, ...);
+
+/**
+ * kvm_device_ioctl - call an ioctl on a kvm device
+ * @fd: The KVM device file descriptor as returned from KVM_CREATE_DEVICE
+ * @type: The device-ctrl ioctl number
+ *
+ * Returns: -errno on error, nonnegative on success
+ */
+int kvm_device_ioctl(int fd, unsigned long type, ...);
+
+/**
+ * kvm_vm_check_attr - check for existence of a specific vm attribute
+ * @s: The KVMState pointer
+ * @group: the group
+ * @attr: the attribute of that group to query for
+ *
+ * Returns: 1 if the attribute exists
+ *          0 if the attribute either does not exist or if the vm device
+ *            interface is unavailable
+ */
+int kvm_vm_check_attr(KVMState *s, uint32_t group, uint64_t attr);
+
+/**
+ * kvm_device_check_attr - check for existence of a specific device attribute
+ * @fd: The device file descriptor
+ * @group: the group
+ * @attr: the attribute of that group to query for
+ *
+ * Returns: 1 if the attribute exists
+ *          0 if the attribute either does not exist or if the vm device
+ *            interface is unavailable
+ */
+int kvm_device_check_attr(int fd, uint32_t group, uint64_t attr);
+
+/**
+ * kvm_device_access - set or get value of a specific device attribute
+ * @fd: The device file descriptor
+ * @group: the group
+ * @attr: the attribute of that group to set or get
+ * @val: pointer to a storage area for the value
+ * @write: true for set and false for get operation
+ * @errp: error object handle
+ *
+ * Returns: 0 on success
+ *          < 0 on error
+ * Use kvm_device_check_attr() in order to check for the availability
+ * of optional attributes.
+ */
+int kvm_device_access(int fd, int group, uint64_t attr,
+                      void *val, bool write, Error **errp);
+
+/**
+ * kvm_create_device - create a KVM device for the device control API
+ * @KVMState: The KVMState pointer
+ * @type: The KVM device type (see Documentation/virtual/kvm/devices in the
+ *        kernel source)
+ * @test: If true, only test if device can be created, but don't actually
+ *        create the device.
+ *
+ * Returns: -errno on error, nonnegative on success: @test ? 0 : device fd;
+ */
+int kvm_create_device(KVMState *s, uint64_t type, bool test);
+
+/**
+ * kvm_device_supported - probe whether KVM supports specific device
+ *
+ * @vmfd: The fd handler for VM
+ * @type: type of device
+ *
+ * @return: true if supported, otherwise false.
+ */
+bool kvm_device_supported(int vmfd, uint64_t type);
+
+/**
+ * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+int kvm_create_vcpu(CPUState *cpu);
+
+/**
+ * kvm_park_vcpu - Park QEMU KVM vCPU context
+ * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
+ *
+ * @returns: none
+ */
+void kvm_park_vcpu(CPUState *cpu);
+
+/**
+ * kvm_unpark_vcpu - unpark QEMU KVM vCPU context
+ * @s: KVM State
+ * @vcpu_id: Architecture vCPU ID of the parked vCPU
+ *
+ * @returns: KVM fd
+ */
+int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id);
+
+/**
+ * kvm_create_and_park_vcpu - Create and park a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be created and parked.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+int kvm_create_and_park_vcpu(CPUState *cpu);
+
+/* Arch specific hooks */
+
+extern const KVMCapabilityInfo kvm_arch_required_capabilities[];
+
+void kvm_arch_accel_class_init(ObjectClass *oc);
+
+void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run);
+MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run);
+
+int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run);
+
+int kvm_arch_process_async_events(CPUState *cpu);
+
+int kvm_arch_get_registers(CPUState *cpu, Error **errp);
+
+/* state subset only touched by the VCPU itself during runtime */
+#define KVM_PUT_RUNTIME_STATE   1
+/* state subset modified during VCPU reset */
+#define KVM_PUT_RESET_STATE     2
+/* full state set, modified during initialization or on vmload */
+#define KVM_PUT_FULL_STATE      3
+
+int kvm_arch_put_registers(CPUState *cpu, int level, Error **errp);
+
+int kvm_arch_get_default_type(MachineState *ms);
+
+int kvm_arch_init(MachineState *ms, KVMState *s);
+
+int kvm_arch_init_vcpu(CPUState *cpu);
+int kvm_arch_destroy_vcpu(CPUState *cpu);
+
+bool kvm_vcpu_id_is_valid(int vcpu_id);
+
+/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
+unsigned long kvm_arch_vcpu_id(CPUState *cpu);
+
+#ifdef KVM_HAVE_MCE_INJECTION
+void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
+#endif
+
+void kvm_arch_init_irq_routing(KVMState *s);
+
+int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
+                             uint64_t address, uint32_t data, PCIDevice *dev);
+
+/* Notify arch about newly added MSI routes */
+int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
+                                int vector, PCIDevice *dev);
+/* Notify arch about released MSI routes */
+int kvm_arch_release_virq_post(int virq);
+
+int kvm_arch_msi_data_to_gsi(uint32_t data);
+
+int kvm_set_irq(KVMState *s, int irq, int level);
+int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg);
+
+void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin);
+
+void kvm_irqchip_add_change_notifier(Notifier *n);
+void kvm_irqchip_remove_change_notifier(Notifier *n);
+void kvm_irqchip_change_notify(void);
+
+struct kvm_guest_debug;
+struct kvm_debug_exit_arch;
+
+struct kvm_sw_breakpoint {
+    vaddr pc;
+    vaddr saved_insn;
+    int use_count;
+    QTAILQ_ENTRY(kvm_sw_breakpoint) entry;
+};
+
+struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
+                                                 vaddr pc);
+
+int kvm_sw_breakpoints_active(CPUState *cpu);
+
+int kvm_arch_insert_sw_breakpoint(CPUState *cpu,
+                                  struct kvm_sw_breakpoint *bp);
+int kvm_arch_remove_sw_breakpoint(CPUState *cpu,
+                                  struct kvm_sw_breakpoint *bp);
+int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type);
+int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type);
+void kvm_arch_remove_all_hw_breakpoints(void);
+
+void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg);
+
+bool kvm_arch_stop_on_emulation_error(CPUState *cpu);
+
+int kvm_check_extension(KVMState *s, unsigned int extension);
+
+int kvm_vm_check_extension(KVMState *s, unsigned int extension);
+
+#define kvm_vm_enable_cap(s, capability, cap_flags, ...)             \
+    ({                                                               \
+        struct kvm_enable_cap cap = {                                \
+            .cap = capability,                                       \
+            .flags = cap_flags,                                      \
+        };                                                           \
+        uint64_t args_tmp[] = { __VA_ARGS__ };                       \
+        size_t n = MIN(ARRAY_SIZE(args_tmp), ARRAY_SIZE(cap.args));  \
+        memcpy(cap.args, args_tmp, n * sizeof(cap.args[0]));         \
+        kvm_vm_ioctl(s, KVM_ENABLE_CAP, &cap);                       \
+    })
+
+#define kvm_vcpu_enable_cap(cpu, capability, cap_flags, ...)         \
+    ({                                                               \
+        struct kvm_enable_cap cap = {                                \
+            .cap = capability,                                       \
+            .flags = cap_flags,                                      \
+        };                                                           \
+        uint64_t args_tmp[] = { __VA_ARGS__ };                       \
+        size_t n = MIN(ARRAY_SIZE(args_tmp), ARRAY_SIZE(cap.args));  \
+        memcpy(cap.args, args_tmp, n * sizeof(cap.args[0]));         \
+        kvm_vcpu_ioctl(cpu, KVM_ENABLE_CAP, &cap);                   \
+    })
+
+void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
+
+int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
+                                       hwaddr *phys_addr);
+
+#endif /* COMPILING_PER_TARGET */
+
+void kvm_cpu_synchronize_state(CPUState *cpu);
+
+void kvm_init_cpu_signals(CPUState *cpu);
+
+/**
+ * kvm_irqchip_add_msi_route - Add MSI route for specific vector
+ * @c:      KVMRouteChange instance.
+ * @vector: which vector to add. This can be either MSI/MSIX
+ *          vector. The function will automatically detect whether
+ *          MSI/MSIX is enabled, and fetch corresponding MSI
+ *          message.
+ * @dev:    Owner PCI device to add the route. If @dev is specified
+ *          as @NULL, an empty MSI message will be inited.
+ * @return: virq (>=0) when success, errno (<0) when failed.
+ */
+int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev);
+int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
+                                 PCIDevice *dev);
+void kvm_irqchip_commit_routes(KVMState *s);
+
+static inline KVMRouteChange kvm_irqchip_begin_route_changes(KVMState *s)
+{
+    return (KVMRouteChange) { .s = s, .changes = 0 };
+}
+
+static inline void kvm_irqchip_commit_route_changes(KVMRouteChange *c)
+{
+    if (c->changes) {
+        kvm_irqchip_commit_routes(c->s);
+        c->changes = 0;
+    }
+}
+
+int kvm_irqchip_get_virq(KVMState *s);
+void kvm_irqchip_release_virq(KVMState *s, int virq);
+
+void kvm_add_routing_entry(KVMState *s,
+                           struct kvm_irq_routing_entry *entry);
+
+int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+                                       EventNotifier *rn, int virq);
+int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+                                          int virq);
+int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
+                                   EventNotifier *rn, qemu_irq irq);
+int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n,
+                                      qemu_irq irq);
+void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi);
+void kvm_init_irq_routing(KVMState *s);
+
+bool kvm_kernel_irqchip_allowed(void);
+bool kvm_kernel_irqchip_required(void);
+bool kvm_kernel_irqchip_split(void);
+
+/**
+ * kvm_arch_irqchip_create:
+ * @KVMState: The KVMState pointer
+ *
+ * Allow architectures to create an in-kernel irq chip themselves.
+ *
+ * Returns: < 0: error
+ *            0: irq chip was not created
+ *          > 0: irq chip was created
+ */
+int kvm_arch_irqchip_create(KVMState *s);
+
+/**
+ * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl
+ * @id: The register ID
+ * @source: The pointer to the value to be set. It must point to a variable
+ *          of the correct type/size for the register being accessed.
+ *
+ * Returns: 0 on success, or a negative errno on failure.
+ */
+int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source);
+
+/**
+ * kvm_get_one_reg - get a register value from KVM via KVM_GET_ONE_REG ioctl
+ * @id: The register ID
+ * @target: The pointer where the value is to be stored. It must point to a
+ *          variable of the correct type/size for the register being accessed.
+ *
+ * Returns: 0 on success, or a negative errno on failure.
+ */
+int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
+
+/* Notify resamplefd for EOI of specific interrupts. */
+void kvm_resample_fd_notify(int gsi);
+
+bool kvm_dirty_ring_enabled(void);
+
+uint32_t kvm_dirty_ring_size(void);
+
+void kvm_mark_guest_state_protected(void);
+
+/**
+ * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page
+ * reported for the VM.
+ */
+bool kvm_hwpoisoned_mem(void);
+
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
+
+int kvm_set_memory_attributes_private(hwaddr start, uint64_t size);
+int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size);
+
+int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private);
+
+#endif
diff --git a/include/system/kvm_int.h b/include/system/kvm_int.h
new file mode 100644
index 0000000000..4de6106869
--- /dev/null
+++ b/include/system/kvm_int.h
@@ -0,0 +1,187 @@
+/*
+ * Internal definitions for a target's KVM support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_KVM_INT_H
+#define QEMU_KVM_INT_H
+
+#include "exec/memory.h"
+#include "qapi/qapi-types-common.h"
+#include "qemu/accel.h"
+#include "qemu/queue.h"
+#include "system/kvm.h"
+#include "hw/boards.h"
+#include "hw/i386/topology.h"
+#include "io/channel-socket.h"
+
+typedef struct KVMSlot
+{
+    hwaddr start_addr;
+    ram_addr_t memory_size;
+    void *ram;
+    int slot;
+    int flags;
+    int old_flags;
+    /* Dirty bitmap cache for the slot */
+    unsigned long *dirty_bmap;
+    unsigned long dirty_bmap_size;
+    /* Cache of the address space ID */
+    int as_id;
+    /* Cache of the offset in ram address space */
+    ram_addr_t ram_start_offset;
+    int guest_memfd;
+    hwaddr guest_memfd_offset;
+} KVMSlot;
+
+typedef struct KVMMemoryUpdate {
+    QSIMPLEQ_ENTRY(KVMMemoryUpdate) next;
+    MemoryRegionSection section;
+} KVMMemoryUpdate;
+
+typedef struct KVMMemoryListener {
+    MemoryListener listener;
+    KVMSlot *slots;
+    unsigned int nr_slots_used;
+    unsigned int nr_slots_allocated;
+    int as_id;
+    QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
+    QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
+} KVMMemoryListener;
+
+#define KVM_MSI_HASHTAB_SIZE    256
+
+typedef struct KVMHostTopoInfo {
+    /* Number of package on the Host */
+    unsigned int maxpkgs;
+    /* Number of cpus on the Host */
+    unsigned int maxcpus;
+    /* Number of cpus on each different package */
+    unsigned int *pkg_cpu_count;
+    /* Each package can have different maxticks */
+    unsigned int *maxticks;
+} KVMHostTopoInfo;
+
+struct KVMMsrEnergy {
+    pid_t pid;
+    bool enable;
+    char *socket_path;
+    QIOChannelSocket *sioc;
+    QemuThread msr_thr;
+    unsigned int guest_vcpus;
+    unsigned int guest_vsockets;
+    X86CPUTopoInfo guest_topo_info;
+    KVMHostTopoInfo host_topo;
+    const CPUArchIdList *guest_cpu_list;
+    uint64_t *msr_value;
+    uint64_t msr_unit;
+    uint64_t msr_limit;
+    uint64_t msr_info;
+};
+
+enum KVMDirtyRingReaperState {
+    KVM_DIRTY_RING_REAPER_NONE = 0,
+    /* The reaper is sleeping */
+    KVM_DIRTY_RING_REAPER_WAIT,
+    /* The reaper is reaping for dirty pages */
+    KVM_DIRTY_RING_REAPER_REAPING,
+};
+
+/*
+ * KVM reaper instance, responsible for collecting the KVM dirty bits
+ * via the dirty ring.
+ */
+struct KVMDirtyRingReaper {
+    /* The reaper thread */
+    QemuThread reaper_thr;
+    volatile uint64_t reaper_iteration; /* iteration number of reaper thr */
+    volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */
+};
+struct KVMState
+{
+    AccelState parent_obj;
+    /* Max number of KVM slots supported */
+    int nr_slots_max;
+    int fd;
+    int vmfd;
+    int coalesced_mmio;
+    int coalesced_pio;
+    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
+    bool coalesced_flush_in_progress;
+    int vcpu_events;
+#ifdef TARGET_KVM_HAVE_GUEST_DEBUG
+    QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints;
+#endif
+    int max_nested_state_len;
+    int kvm_shadow_mem;
+    bool kernel_irqchip_allowed;
+    bool kernel_irqchip_required;
+    OnOffAuto kernel_irqchip_split;
+    bool sync_mmu;
+    bool guest_state_protected;
+    uint64_t manual_dirty_log_protect;
+    /*
+     * Older POSIX says that ioctl numbers are signed int, but in
+     * practice they are not. (Newer POSIX doesn't specify ioctl
+     * at all.) Linux, glibc and *BSD all treat ioctl numbers as
+     * unsigned, and real-world ioctl values like KVM_GET_XSAVE have
+     * bit 31 set, which means that passing them via an 'int' will
+     * result in sign-extension when they get converted back to the
+     * 'unsigned long' which the ioctl() prototype uses. Luckily Linux
+     * always treats the argument as an unsigned 32-bit int, so any
+     * possible sign-extension is deliberately ignored, but for
+     * consistency we keep to the same type that glibc is using.
+     */
+    unsigned long irq_set_ioctl;
+    unsigned int sigmask_len;
+    GHashTable *gsimap;
+#ifdef KVM_CAP_IRQ_ROUTING
+    struct kvm_irq_routing *irq_routes;
+    int nr_allocated_irq_routes;
+    unsigned long *used_gsi_bitmap;
+    unsigned int gsi_count;
+#endif
+    KVMMemoryListener memory_listener;
+    QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus;
+
+    /* For "info mtree -f" to tell if an MR is registered in KVM */
+    int nr_as;
+    struct KVMAs {
+        KVMMemoryListener *ml;
+        AddressSpace *as;
+    } *as;
+    uint64_t kvm_dirty_ring_bytes;  /* Size of the per-vcpu dirty ring */
+    uint32_t kvm_dirty_ring_size;   /* Number of dirty GFNs per ring */
+    bool kvm_dirty_ring_with_bitmap;
+    uint64_t kvm_eager_split_size;  /* Eager Page Splitting chunk size */
+    struct KVMDirtyRingReaper reaper;
+    struct KVMMsrEnergy msr_energy;
+    NotifyVmexitOption notify_vmexit;
+    uint32_t notify_window;
+    uint32_t xen_version;
+    uint32_t xen_caps;
+    uint16_t xen_gnttab_max_frames;
+    uint16_t xen_evtchn_max_pirq;
+    char *device;
+};
+
+void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
+                                  AddressSpace *as, int as_id, const char *name);
+
+void kvm_set_max_memslot_size(hwaddr max_slot_size);
+
+/**
+ * kvm_hwpoison_page_add:
+ *
+ * Parameters:
+ *  @ram_addr: the address in the RAM for the poisoned page
+ *
+ * Add a poisoned page to the list
+ *
+ * Return: None.
+ */
+void kvm_hwpoison_page_add(ram_addr_t ram_addr);
+#endif
diff --git a/include/system/kvm_xen.h b/include/system/kvm_xen.h
new file mode 100644
index 0000000000..7d0e69f133
--- /dev/null
+++ b/include/system/kvm_xen.h
@@ -0,0 +1,44 @@
+/*
+ * Xen HVM emulation support in KVM
+ *
+ * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_SYSTEM_KVM_XEN_H
+#define QEMU_SYSTEM_KVM_XEN_H
+
+/* The KVM API uses these to indicate "no GPA" or "no GFN" */
+#define INVALID_GPA UINT64_MAX
+#define INVALID_GFN UINT64_MAX
+
+/* QEMU plays the rôle of dom0 for "interdomain" communication. */
+#define DOMID_QEMU  0
+
+int kvm_xen_soft_reset(void);
+uint32_t kvm_xen_get_caps(void);
+void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id);
+bool kvm_xen_has_vcpu_callback_vector(void);
+void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type);
+void kvm_xen_set_callback_asserted(void);
+int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port);
+uint16_t kvm_xen_get_gnttab_max_frames(void);
+uint16_t kvm_xen_get_evtchn_max_pirq(void);
+
+#define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() &           \
+                                 KVM_XEN_HVM_CONFIG_ ## cap))
+
+#define XEN_SPECIAL_AREA_ADDR 0xfeff8000UL
+#define XEN_SPECIAL_AREA_SIZE 0x4000UL
+
+#define XEN_SPECIALPAGE_CONSOLE     0
+#define XEN_SPECIALPAGE_XENSTORE    1
+
+#define XEN_SPECIAL_PFN(x) ((XEN_SPECIAL_AREA_ADDR >> TARGET_PAGE_BITS) + \
+                            XEN_SPECIALPAGE_##x)
+
+#endif /* QEMU_SYSTEM_KVM_XEN_H */
diff --git a/include/system/memory_mapping.h b/include/system/memory_mapping.h
new file mode 100644
index 0000000000..021e0a6230
--- /dev/null
+++ b/include/system/memory_mapping.h
@@ -0,0 +1,85 @@
+/*
+ * QEMU memory mapping
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ *     Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef MEMORY_MAPPING_H
+#define MEMORY_MAPPING_H
+
+#include "qemu/queue.h"
+#include "exec/cpu-common.h"
+
+typedef struct GuestPhysBlock {
+    /* visible to guest, reflects PCI hole, etc */
+    hwaddr target_start;
+
+    /* implies size */
+    hwaddr target_end;
+
+    /* points into host memory */
+    uint8_t *host_addr;
+
+    /* points to the MemoryRegion that this block belongs to */
+    MemoryRegion *mr;
+
+    QTAILQ_ENTRY(GuestPhysBlock) next;
+} GuestPhysBlock;
+
+/* point-in-time snapshot of guest-visible physical mappings */
+typedef struct GuestPhysBlockList {
+    unsigned num;
+    QTAILQ_HEAD(, GuestPhysBlock) head;
+} GuestPhysBlockList;
+
+/* The physical and virtual address in the memory mapping are contiguous. */
+typedef struct MemoryMapping {
+    hwaddr phys_addr;
+    vaddr virt_addr;
+    ram_addr_t length;
+    QTAILQ_ENTRY(MemoryMapping) next;
+} MemoryMapping;
+
+struct MemoryMappingList {
+    unsigned int num;
+    MemoryMapping *last_mapping;
+    QTAILQ_HEAD(, MemoryMapping) head;
+};
+
+/*
+ * add or merge the memory region [phys_addr, phys_addr + length) into the
+ * memory mapping's list. The region's virtual address starts with virt_addr,
+ * and is contiguous. The list is sorted by phys_addr.
+ */
+void memory_mapping_list_add_merge_sorted(MemoryMappingList *list,
+                                          hwaddr phys_addr,
+                                          hwaddr virt_addr,
+                                          ram_addr_t length);
+
+void memory_mapping_list_free(MemoryMappingList *list);
+
+void memory_mapping_list_init(MemoryMappingList *list);
+
+void guest_phys_blocks_free(GuestPhysBlockList *list);
+void guest_phys_blocks_init(GuestPhysBlockList *list);
+void guest_phys_blocks_append(GuestPhysBlockList *list);
+
+bool qemu_get_guest_memory_mapping(MemoryMappingList *list,
+                                   const GuestPhysBlockList *guest_phys_blocks,
+                                   Error **errp);
+
+/* get guest's memory mapping without do paging(virtual address is 0). */
+void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list,
+                                  const GuestPhysBlockList *guest_phys_blocks);
+
+void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
+                           int64_t length);
+
+#endif
diff --git a/include/system/numa.h b/include/system/numa.h
new file mode 100644
index 0000000000..96d4ff9b85
--- /dev/null
+++ b/include/system/numa.h
@@ -0,0 +1,114 @@
+#ifndef SYSTEM_NUMA_H
+#define SYSTEM_NUMA_H
+
+#include "qemu/bitmap.h"
+#include "qapi/qapi-types-machine.h"
+#include "exec/cpu-common.h"
+
+struct CPUArchId;
+
+#define MAX_NODES 128
+#define NUMA_NODE_UNASSIGNED MAX_NODES
+#define NUMA_DISTANCE_MIN         10
+#define NUMA_DISTANCE_DEFAULT     20
+#define NUMA_DISTANCE_MAX         254
+#define NUMA_DISTANCE_UNREACHABLE 255
+
+/* the value of AcpiHmatLBInfo flags */
+enum {
+    HMAT_LB_MEM_MEMORY           = 0,
+    HMAT_LB_MEM_CACHE_1ST_LEVEL  = 1,
+    HMAT_LB_MEM_CACHE_2ND_LEVEL  = 2,
+    HMAT_LB_MEM_CACHE_3RD_LEVEL  = 3,
+    HMAT_LB_LEVELS   /* must be the last entry */
+};
+
+/* the value of AcpiHmatLBInfo data type */
+enum {
+    HMAT_LB_DATA_ACCESS_LATENCY   = 0,
+    HMAT_LB_DATA_READ_LATENCY     = 1,
+    HMAT_LB_DATA_WRITE_LATENCY    = 2,
+    HMAT_LB_DATA_ACCESS_BANDWIDTH = 3,
+    HMAT_LB_DATA_READ_BANDWIDTH   = 4,
+    HMAT_LB_DATA_WRITE_BANDWIDTH  = 5,
+    HMAT_LB_TYPES   /* must be the last entry */
+};
+
+#define UINT16_BITS       16
+
+typedef struct NodeInfo {
+    uint64_t node_mem;
+    struct HostMemoryBackend *node_memdev;
+    bool present;
+    bool has_cpu;
+    bool has_gi;
+    uint8_t lb_info_provided;
+    uint16_t initiator;
+    uint8_t distance[MAX_NODES];
+} NodeInfo;
+
+typedef struct NumaNodeMem {
+    uint64_t node_mem;
+    uint64_t node_plugged_mem;
+} NumaNodeMem;
+
+struct HMAT_LB_Data {
+    uint8_t     initiator;
+    uint8_t     target;
+    uint64_t    data;
+};
+typedef struct HMAT_LB_Data HMAT_LB_Data;
+
+struct HMAT_LB_Info {
+    /* Indicates it's memory or the specified level memory side cache. */
+    uint8_t     hierarchy;
+
+    /* Present the type of data, access/read/write latency or bandwidth. */
+    uint8_t     data_type;
+
+    /* The range bitmap of bandwidth for calculating common base */
+    uint64_t    range_bitmap;
+
+    /* The common base unit for latencies or bandwidths */
+    uint64_t    base;
+
+    /* Array to store the latencies or bandwidths */
+    GArray      *list;
+};
+typedef struct HMAT_LB_Info HMAT_LB_Info;
+
+struct NumaState {
+    /* Number of NUMA nodes */
+    int num_nodes;
+
+    /* Allow setting NUMA distance for different NUMA nodes */
+    bool have_numa_distance;
+
+    /* Detect if HMAT support is enabled. */
+    bool hmat_enabled;
+
+    /* NUMA nodes information */
+    NodeInfo nodes[MAX_NODES];
+
+    /* NUMA nodes HMAT Locality Latency and Bandwidth Information */
+    HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
+
+    /* Memory Side Cache Information Structure */
+    NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS];
+};
+typedef struct NumaState NumaState;
+
+void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
+void parse_numa_opts(MachineState *ms);
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
+                        Error **errp);
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
+                           Error **errp);
+void numa_complete_configuration(MachineState *ms);
+void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
+extern QemuOptsList qemu_numa_opts;
+void numa_cpu_pre_plug(const struct CPUArchId *slot, DeviceState *dev,
+                       Error **errp);
+bool numa_uses_legacy_mem(void);
+
+#endif
diff --git a/include/system/nvmm.h b/include/system/nvmm.h
new file mode 100644
index 0000000000..6971ddb3a5
--- /dev/null
+++ b/include/system/nvmm.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
+ *
+ * NetBSD Virtual Machine Monitor (NVMM) accelerator support.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/* header to be included in non-NVMM-specific code */
+
+#ifndef QEMU_NVMM_H
+#define QEMU_NVMM_H
+
+#ifdef COMPILING_PER_TARGET
+
+#ifdef CONFIG_NVMM
+
+int nvmm_enabled(void);
+
+#else /* CONFIG_NVMM */
+
+#define nvmm_enabled() (0)
+
+#endif /* CONFIG_NVMM */
+
+#endif /* COMPILING_PER_TARGET */
+
+#endif /* QEMU_NVMM_H */
diff --git a/include/system/os-posix.h b/include/system/os-posix.h
new file mode 100644
index 0000000000..b881ac6c6f
--- /dev/null
+++ b/include/system/os-posix.h
@@ -0,0 +1,101 @@
+/*
+ * posix specific declarations
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OS_POSIX_H
+#define QEMU_OS_POSIX_H
+
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/un.h>
+
+#ifdef CONFIG_SYSMACROS
+#include <sys/sysmacros.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void os_set_line_buffering(void);
+void os_setup_early_signal_handling(void);
+void os_set_proc_name(const char *s);
+void os_setup_signal_handling(void);
+int os_set_daemonize(bool d);
+bool is_daemonized(void);
+void os_daemonize(void);
+bool os_set_runas(const char *user_id);
+void os_set_chroot(const char *path);
+void os_setup_limits(void);
+void os_setup_post(void);
+int os_mlock(void);
+
+/**
+ * qemu_alloc_stack:
+ * @sz: pointer to a size_t holding the requested usable stack size
+ *
+ * Allocate memory that can be used as a stack, for instance for
+ * coroutines. If the memory cannot be allocated, this function
+ * will abort (like g_malloc()). This function also inserts an
+ * additional guard page to catch a potential stack overflow.
+ * Note that the memory required for the guard page and alignment
+ * and minimal stack size restrictions will increase the value of sz.
+ *
+ * The allocated stack must be freed with qemu_free_stack().
+ *
+ * Returns: pointer to (the lowest address of) the stack memory.
+ */
+void *qemu_alloc_stack(size_t *sz);
+
+/**
+ * qemu_free_stack:
+ * @stack: stack to free
+ * @sz: size of stack in bytes
+ *
+ * Free a stack allocated via qemu_alloc_stack(). Note that sz must
+ * be exactly the adjusted stack size returned by qemu_alloc_stack.
+ */
+void qemu_free_stack(void *stack, size_t sz);
+
+/* POSIX and Mingw32 differ in the name of the stdio lock functions.  */
+
+static inline void qemu_flockfile(FILE *f)
+{
+    flockfile(f);
+}
+
+static inline void qemu_funlockfile(FILE *f)
+{
+    funlockfile(f);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/system/os-win32.h b/include/system/os-win32.h
new file mode 100644
index 0000000000..b82a5d3ad9
--- /dev/null
+++ b/include/system/os-win32.h
@@ -0,0 +1,277 @@
+/*
+ * win32 specific declarations
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OS_WIN32_H
+#define QEMU_OS_WIN32_H
+
+#include <winsock2.h>
+#include <windows.h>
+#include <ws2tcpip.h>
+#include "qemu/typedefs.h"
+
+#ifdef HAVE_AFUNIX_H
+#include <afunix.h>
+#else
+/*
+ * Fallback definitions of things we need in afunix.h, if not available from
+ * the used Windows SDK or MinGW headers.
+ */
+#define UNIX_PATH_MAX 108
+
+typedef struct sockaddr_un {
+    ADDRESS_FAMILY sun_family;
+    char sun_path[UNIX_PATH_MAX];
+} SOCKADDR_UN, *PSOCKADDR_UN;
+
+#define SIO_AF_UNIX_GETPEERPID _WSAIOR(IOC_VENDOR, 256)
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__aarch64__)
+/*
+ * On windows-arm64, setjmp is available in only one variant, and longjmp always
+ * does stack unwinding. This crash with generated code.
+ * Thus, we use another implementation of setjmp (not windows one), coming from
+ * mingw, which never performs stack unwinding.
+ */
+#undef setjmp
+#undef longjmp
+/*
+ * These functions are not declared in setjmp.h because __aarch64__ defines
+ * setjmp to _setjmpex instead. However, they are still defined in libmingwex.a,
+ * which gets linked automatically.
+ */
+int __mingw_setjmp(jmp_buf);
+void __attribute__((noreturn)) __mingw_longjmp(jmp_buf, int);
+#define setjmp(env) __mingw_setjmp(env)
+#define longjmp(env, val) __mingw_longjmp(env, val)
+#elif defined(_WIN64)
+/*
+ * On windows-x64, setjmp is implemented by _setjmp which needs a second parameter.
+ * If this parameter is NULL, longjump does no stack unwinding.
+ * That is what we need for QEMU. Passing the value of register rsp (default)
+ * lets longjmp try a stack unwinding which will crash with generated code.
+ */
+# undef setjmp
+# define setjmp(env) _setjmp(env, NULL)
+#endif /* __aarch64__ */
+/* QEMU uses sigsetjmp()/siglongjmp() as the portable way to specify
+ * "longjmp and don't touch the signal masks". Since we know that the
+ * savemask parameter will always be zero we can safely define these
+ * in terms of setjmp/longjmp on Win32.
+ */
+#define sigjmp_buf jmp_buf
+#define sigsetjmp(env, savemask) setjmp(env)
+#define siglongjmp(env, val) longjmp(env, val)
+
+/* Missing POSIX functions. Don't use MinGW-w64 macros. */
+#ifndef _POSIX_THREAD_SAFE_FUNCTIONS
+#undef gmtime_r
+struct tm *gmtime_r(const time_t *timep, struct tm *result);
+#undef localtime_r
+struct tm *localtime_r(const time_t *timep, struct tm *result);
+#endif /* _POSIX_THREAD_SAFE_FUNCTIONS */
+
+static inline void os_setup_signal_handling(void) {}
+static inline void os_daemonize(void) {}
+static inline void os_setup_post(void) {}
+static inline void os_set_proc_name(const char *dummy) {}
+void os_set_line_buffering(void);
+void os_setup_early_signal_handling(void);
+
+int getpagesize(void);
+
+#if !defined(EPROTONOSUPPORT)
+# define EPROTONOSUPPORT EINVAL
+#endif
+
+static inline int os_set_daemonize(bool d)
+{
+    if (d) {
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static inline bool is_daemonized(void)
+{
+    return false;
+}
+
+static inline int os_mlock(void)
+{
+    return -ENOSYS;
+}
+
+static inline void os_setup_limits(void)
+{
+    return;
+}
+
+#define fsync _commit
+
+#if !defined(lseek)
+# define lseek _lseeki64
+#endif
+
+int qemu_ftruncate64(int, int64_t);
+
+#if !defined(ftruncate)
+# define ftruncate qemu_ftruncate64
+#endif
+
+static inline char *realpath(const char *path, char *resolved_path)
+{
+    _fullpath(resolved_path, path, _MAX_PATH);
+    return resolved_path;
+}
+
+/*
+ * Older versions of MinGW do not import _lock_file and _unlock_file properly.
+ * This was fixed for v6.0.0 with commit b48e3ac8969d.
+ */
+static inline void qemu_flockfile(FILE *f)
+{
+#ifdef HAVE__LOCK_FILE
+    _lock_file(f);
+#endif
+}
+
+static inline void qemu_funlockfile(FILE *f)
+{
+#ifdef HAVE__LOCK_FILE
+    _unlock_file(f);
+#endif
+}
+
+/* Helper for WSAEventSelect, to report errors */
+bool qemu_socket_select(int sockfd, WSAEVENT hEventObject,
+                        long lNetworkEvents, Error **errp);
+
+bool qemu_socket_unselect(int sockfd, Error **errp);
+
+/* We wrap all the sockets functions so that we can set errno based on
+ * WSAGetLastError(), and use file-descriptors instead of SOCKET.
+ */
+
+/*
+ * qemu_close_socket_osfhandle:
+ * @fd: a file descriptor associated with a SOCKET
+ *
+ * Close only the C run-time file descriptor, leave the SOCKET opened.
+ *
+ * Returns zero on success. On error, -1 is returned, and errno is set to
+ * indicate the error.
+ */
+int qemu_close_socket_osfhandle(int fd);
+
+#undef close
+#define close qemu_close_wrap
+int qemu_close_wrap(int fd);
+
+#undef connect
+#define connect qemu_connect_wrap
+int qemu_connect_wrap(int sockfd, const struct sockaddr *addr,
+                      socklen_t addrlen);
+
+#undef listen
+#define listen qemu_listen_wrap
+int qemu_listen_wrap(int sockfd, int backlog);
+
+#undef bind
+#define bind qemu_bind_wrap
+int qemu_bind_wrap(int sockfd, const struct sockaddr *addr,
+                   socklen_t addrlen);
+
+#undef socket
+#define socket qemu_socket_wrap
+int qemu_socket_wrap(int domain, int type, int protocol);
+
+#undef accept
+#define accept qemu_accept_wrap
+int qemu_accept_wrap(int sockfd, struct sockaddr *addr,
+                     socklen_t *addrlen);
+
+#undef shutdown
+#define shutdown qemu_shutdown_wrap
+int qemu_shutdown_wrap(int sockfd, int how);
+
+#undef ioctlsocket
+#define ioctlsocket qemu_ioctlsocket_wrap
+int qemu_ioctlsocket_wrap(int fd, int req, void *val);
+
+#undef getsockopt
+#define getsockopt qemu_getsockopt_wrap
+int qemu_getsockopt_wrap(int sockfd, int level, int optname,
+                         void *optval, socklen_t *optlen);
+
+#undef setsockopt
+#define setsockopt qemu_setsockopt_wrap
+int qemu_setsockopt_wrap(int sockfd, int level, int optname,
+                         const void *optval, socklen_t optlen);
+
+#undef getpeername
+#define getpeername qemu_getpeername_wrap
+int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr,
+                          socklen_t *addrlen);
+
+#undef getsockname
+#define getsockname qemu_getsockname_wrap
+int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr,
+                          socklen_t *addrlen);
+
+#undef send
+#define send qemu_send_wrap
+ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags);
+
+#undef sendto
+#define sendto qemu_sendto_wrap
+ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags,
+                         const struct sockaddr *addr, socklen_t addrlen);
+
+#undef recv
+#define recv qemu_recv_wrap
+ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags);
+
+#undef recvfrom
+#define recvfrom qemu_recvfrom_wrap
+ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags,
+                           struct sockaddr *addr, socklen_t *addrlen);
+
+EXCEPTION_DISPOSITION
+win32_close_exception_handler(struct _EXCEPTION_RECORD*, void*,
+                              struct _CONTEXT*, void*);
+
+void *qemu_win32_map_alloc(size_t size, HANDLE *h, Error **errp);
+void qemu_win32_map_free(void *ptr, HANDLE h, Error **errp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/system/qtest.h b/include/system/qtest.h
new file mode 100644
index 0000000000..c161d75165
--- /dev/null
+++ b/include/system/qtest.h
@@ -0,0 +1,39 @@
+/*
+ * Test Server
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QTEST_H
+#define QTEST_H
+
+#include "chardev/char.h"
+
+extern bool qtest_allowed;
+
+static inline bool qtest_enabled(void)
+{
+    return qtest_allowed;
+}
+
+#ifndef CONFIG_USER_ONLY
+void qtest_send_prefix(CharBackend *chr);
+void G_GNUC_PRINTF(2, 3) qtest_sendf(CharBackend *chr, const char *fmt, ...);
+void qtest_set_command_cb(bool (*pc_cb)(CharBackend *chr, gchar **words));
+bool qtest_driver(void);
+
+void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **errp);
+
+void qtest_server_set_send_handler(void (*send)(void *, const char *),
+                                 void *opaque);
+void qtest_server_inproc_recv(void *opaque, const char *buf);
+#endif
+
+#endif
diff --git a/include/system/replay.h b/include/system/replay.h
new file mode 100644
index 0000000000..8926d8cf4b
--- /dev/null
+++ b/include/system/replay.h
@@ -0,0 +1,183 @@
+/*
+ * QEMU replay (system interface)
+ *
+ * Copyright (c) 2010-2015 Institute for System Programming
+ *                         of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef SYSTEM_REPLAY_H
+#define SYSTEM_REPLAY_H
+
+#ifdef CONFIG_USER_ONLY
+#error Cannot include this header from user emulation
+#endif
+
+#include "exec/replay-core.h"
+#include "qapi/qapi-types-misc.h"
+#include "qapi/qapi-types-run-state.h"
+#include "qapi/qapi-types-ui.h"
+#include "block/aio.h"
+
+/* replay clock kinds */
+enum ReplayClockKind {
+    /* host_clock */
+    REPLAY_CLOCK_HOST,
+    /* virtual_rt_clock */
+    REPLAY_CLOCK_VIRTUAL_RT,
+    REPLAY_CLOCK_COUNT
+};
+typedef enum ReplayClockKind ReplayClockKind;
+
+/* IDs of the checkpoints */
+enum ReplayCheckpoint {
+    CHECKPOINT_CLOCK_WARP_START,
+    CHECKPOINT_CLOCK_WARP_ACCOUNT,
+    CHECKPOINT_RESET_REQUESTED,
+    CHECKPOINT_SUSPEND_REQUESTED,
+    CHECKPOINT_CLOCK_VIRTUAL,
+    CHECKPOINT_CLOCK_HOST,
+    CHECKPOINT_CLOCK_VIRTUAL_RT,
+    CHECKPOINT_INIT,
+    CHECKPOINT_RESET,
+    CHECKPOINT_COUNT
+};
+typedef enum ReplayCheckpoint ReplayCheckpoint;
+
+typedef struct ReplayNetState ReplayNetState;
+
+/* Name of the initial VM snapshot */
+extern char *replay_snapshot;
+
+/* Replay locking
+ *
+ * The locks are needed to protect the shared structures and log file
+ * when doing record/replay. They also are the main sync-point between
+ * the main-loop thread and the vCPU thread. This was a role
+ * previously filled by the BQL which has been busy trying to reduce
+ * its impact across the code. This ensures blocks of events stay
+ * sequential and reproducible.
+ */
+
+void replay_mutex_lock(void);
+void replay_mutex_unlock(void);
+
+/* Processing the instructions */
+
+/*! Returns number of executed instructions. */
+uint64_t replay_get_current_icount(void);
+/*! Returns number of instructions to execute in replay mode. */
+int replay_get_instructions(void);
+/*! Updates instructions counter in replay mode. */
+void replay_account_executed_instructions(void);
+
+/* Processing clocks and other time sources */
+
+/*! Save the specified clock */
+int64_t replay_save_clock(ReplayClockKind kind, int64_t clock,
+                          int64_t raw_icount);
+/*! Read the specified clock from the log or return cached data */
+int64_t replay_read_clock(ReplayClockKind kind, int64_t raw_icount);
+/*! Saves or reads the clock depending on the current replay mode. */
+#define REPLAY_CLOCK(clock, value)                                      \
+    !icount_enabled() ? (value) :                                       \
+    (replay_mode == REPLAY_MODE_PLAY                                    \
+        ? replay_read_clock((clock), icount_get_raw())                  \
+        : replay_mode == REPLAY_MODE_RECORD                             \
+            ? replay_save_clock((clock), (value), icount_get_raw())     \
+            : (value))
+#define REPLAY_CLOCK_LOCKED(clock, value)                               \
+    !icount_enabled() ? (value) :                                       \
+    (replay_mode == REPLAY_MODE_PLAY                                    \
+        ? replay_read_clock((clock), icount_get_raw_locked())           \
+        : replay_mode == REPLAY_MODE_RECORD                             \
+            ? replay_save_clock((clock), (value), icount_get_raw_locked()) \
+            : (value))
+
+/* Events */
+
+/*! Called when qemu shutdown is requested. */
+void replay_shutdown_request(ShutdownCause cause);
+/*! Should be called at check points in the execution.
+    These check points are skipped, if they were not met.
+    Saves checkpoint in the SAVE mode and validates in the PLAY mode.
+    Returns 0 in PLAY mode if checkpoint was not found.
+    Returns 1 in all other cases. */
+bool replay_checkpoint(ReplayCheckpoint checkpoint);
+/*! Used to determine that checkpoint or async event is pending.
+    Does not proceed to the next event in the log. */
+bool replay_has_event(void);
+/*
+ * Processes the async events added to the queue (while recording)
+ * or reads the events from the file (while replaying).
+ */
+void replay_async_events(void);
+
+/* Asynchronous events queue */
+
+/*! Enables storing events in the queue */
+void replay_enable_events(void);
+/*! Returns true when saving events is enabled */
+bool replay_events_enabled(void);
+/* Flushes events queue */
+void replay_flush_events(void);
+/*! Adds bottom half event to the queue */
+void replay_bh_schedule_event(QEMUBH *bh);
+/* Adds oneshot bottom half event to the queue */
+void replay_bh_schedule_oneshot_event(AioContext *ctx,
+    QEMUBHFunc *cb, void *opaque);
+/*! Adds input event to the queue */
+void replay_input_event(QemuConsole *src, InputEvent *evt);
+/*! Adds input sync event to the queue */
+void replay_input_sync_event(void);
+/*! Adds block layer event to the queue */
+void replay_block_event(QEMUBH *bh, uint64_t id);
+/*! Returns ID for the next block event */
+uint64_t blkreplay_next_id(void);
+
+/* Character device */
+
+/*! Registers char driver to save it's events */
+void replay_register_char_driver(struct Chardev *chr);
+/*! Saves write to char device event to the log */
+void replay_chr_be_write(struct Chardev *s, const uint8_t *buf, int len);
+/*! Writes char write return value to the replay log. */
+void replay_char_write_event_save(int res, int offset);
+/*! Reads char write return value from the replay log. */
+void replay_char_write_event_load(int *res, int *offset);
+/*! Reads information about read_all character event. */
+int replay_char_read_all_load(uint8_t *buf);
+/*! Writes character read_all error code into the replay log. */
+void replay_char_read_all_save_error(int res);
+/*! Writes character read_all execution result into the replay log. */
+void replay_char_read_all_save_buf(uint8_t *buf, int offset);
+
+/* Network */
+
+/*! Registers replay network filter attached to some backend. */
+ReplayNetState *replay_register_net(NetFilterState *nfs);
+/*! Unregisters replay network filter. */
+void replay_unregister_net(ReplayNetState *rns);
+/*! Called to write network packet to the replay log. */
+void replay_net_packet_event(ReplayNetState *rns, unsigned flags,
+                             const struct iovec *iov, int iovcnt);
+
+/* Audio */
+
+/*! Saves/restores number of played samples of audio out operation. */
+void replay_audio_out(size_t *played);
+/*! Saves/restores recorded samples of audio in operation. */
+void replay_audio_in(size_t *recorded, void *samples, size_t *wpos, size_t size);
+
+/* VM state operations */
+
+/*! Called at the start of execution.
+    Loads or saves initial vmstate depending on execution mode. */
+void replay_vmstate_init(void);
+/*! Called to ensure that replay state is consistent and VM snapshot
+    can be created */
+bool replay_can_snapshot(void);
+
+#endif
diff --git a/include/system/reset.h b/include/system/reset.h
new file mode 100644
index 0000000000..97131d94cf
--- /dev/null
+++ b/include/system/reset.h
@@ -0,0 +1,127 @@
+/*
+ *  Reset handlers.
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2016 Red Hat, Inc.
+ * Copyright (c) 2024 Linaro, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_SYSTEM_RESET_H
+#define QEMU_SYSTEM_RESET_H
+
+#include "hw/resettable.h"
+#include "qapi/qapi-events-run-state.h"
+
+typedef void QEMUResetHandler(void *opaque);
+
+/**
+ * qemu_register_resettable: Register an object to be reset
+ * @obj: object to be reset: it must implement the Resettable interface
+ *
+ * Register @obj on the list of objects which will be reset when the
+ * simulation is reset. These objects will be reset in the order
+ * they were added, using the three-phase Resettable protocol,
+ * so first all objects go through the enter phase, then all objects
+ * go through the hold phase, and then finally all go through the
+ * exit phase.
+ *
+ * It is not permitted to register or unregister reset functions or
+ * resettable objects from within any of the reset phase methods of @obj.
+ *
+ * We assume that the caller holds the BQL.
+ */
+void qemu_register_resettable(Object *obj);
+
+/**
+ * qemu_unregister_resettable: Unregister an object to be reset
+ * @obj: object to unregister
+ *
+ * Remove @obj from the list of objects which are reset when the
+ * simulation is reset. It must have been previously added to
+ * the list via qemu_register_resettable().
+ *
+ * We assume that the caller holds the BQL.
+ */
+void qemu_unregister_resettable(Object *obj);
+
+/**
+ * qemu_register_reset: Register a callback for system reset
+ * @func: function to call
+ * @opaque: opaque data to pass to @func
+ *
+ * Register @func on the list of functions which are called when the
+ * entire system is reset. Functions registered with this API and
+ * Resettable objects registered with qemu_register_resettable() are
+ * handled together, in the order in which they were registered.
+ * Functions registered with this API are called in the 'hold' phase
+ * of the 3-phase reset.
+ *
+ * In general this function should not be used in new code where possible;
+ * for instance, device model reset is better accomplished using the
+ * methods on DeviceState.
+ *
+ * It is not permitted to register or unregister reset functions or
+ * resettable objects from within the @func callback.
+ *
+ * We assume that the caller holds the BQL.
+ */
+void qemu_register_reset(QEMUResetHandler *func, void *opaque);
+
+/**
+ * qemu_register_reset_nosnapshotload: Register a callback for system reset
+ * @func: function to call
+ * @opaque: opaque data to pass to @func
+ *
+ * This is the same as qemu_register_reset(), except that @func is
+ * not called if the reason that the system is being reset is to
+ * put it into a clean state prior to loading a snapshot (i.e. for
+ * SHUTDOWN_CAUSE_SNAPSHOT_LOAD).
+ */
+void qemu_register_reset_nosnapshotload(QEMUResetHandler *func, void *opaque);
+
+/**
+ * qemu_unregister_reset: Unregister a system reset callback
+ * @func: function registered with qemu_register_reset()
+ * @opaque: the same opaque data that was passed to qemu_register_reset()
+ *
+ * Undo the effects of a qemu_register_reset(). The @func and @opaque
+ * must both match the arguments originally used with qemu_register_reset().
+ *
+ * We assume that the caller holds the BQL.
+ */
+void qemu_unregister_reset(QEMUResetHandler *func, void *opaque);
+
+/**
+ * qemu_devices_reset: Perform a complete system reset
+ * @reason: type of the reset
+ *
+ * This function performs the low-level work needed to do a complete reset
+ * of the system (calling all the callbacks registered with
+ * qemu_register_reset() and resetting all the Resettable objects registered
+ * with qemu_register_resettable()). It should only be called by the code in a
+ * MachineClass reset method.
+ *
+ * If you want to trigger a system reset from, for instance, a device
+ * model, don't use this function. Use qemu_system_reset_request().
+ */
+void qemu_devices_reset(ResetType type);
+
+#endif
diff --git a/include/system/rng-random.h b/include/system/rng-random.h
new file mode 100644
index 0000000000..0fdc6c6974
--- /dev/null
+++ b/include/system/rng-random.h
@@ -0,0 +1,21 @@
+/*
+ * QEMU Random Number Generator Backend
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_RNG_RANDOM_H
+#define QEMU_RNG_RANDOM_H
+
+#include "qom/object.h"
+
+#define TYPE_RNG_RANDOM "rng-random"
+OBJECT_DECLARE_SIMPLE_TYPE(RngRandom, RNG_RANDOM)
+
+
+#endif
diff --git a/include/system/rng.h b/include/system/rng.h
new file mode 100644
index 0000000000..e383f87d20
--- /dev/null
+++ b/include/system/rng.h
@@ -0,0 +1,89 @@
+/*
+ * QEMU Random Number Generator Backend
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_RNG_H
+#define QEMU_RNG_H
+
+#include "qemu/queue.h"
+#include "qom/object.h"
+
+#define TYPE_RNG_BACKEND "rng-backend"
+OBJECT_DECLARE_TYPE(RngBackend, RngBackendClass,
+                    RNG_BACKEND)
+
+#define TYPE_RNG_BUILTIN "rng-builtin"
+
+typedef struct RngRequest RngRequest;
+
+typedef void (EntropyReceiveFunc)(void *opaque,
+                                  const void *data,
+                                  size_t size);
+
+struct RngRequest
+{
+    EntropyReceiveFunc *receive_entropy;
+    uint8_t *data;
+    void *opaque;
+    size_t offset;
+    size_t size;
+    QSIMPLEQ_ENTRY(RngRequest) next;
+};
+
+struct RngBackendClass
+{
+    ObjectClass parent_class;
+
+    void (*request_entropy)(RngBackend *s, RngRequest *req);
+
+    void (*opened)(RngBackend *s, Error **errp);
+};
+
+struct RngBackend
+{
+    Object parent;
+
+    /*< protected >*/
+    bool opened;
+    QSIMPLEQ_HEAD(, RngRequest) requests;
+};
+
+
+/**
+ * rng_backend_request_entropy:
+ * @s: the backend to request entropy from
+ * @size: the number of bytes of data to request
+ * @receive_entropy: a function to be invoked when entropy is available
+ * @opaque: data that should be passed to @receive_entropy
+ *
+ * This function is used by the front-end to request entropy from an entropy
+ * source.  This function can be called multiple times before @receive_entropy
+ * is invoked with different values of @receive_entropy and @opaque.  The
+ * backend will queue each request and handle appropriately.
+ *
+ * The backend does not need to pass the full amount of data to @receive_entropy
+ * but will pass a value greater than 0.
+ */
+void rng_backend_request_entropy(RngBackend *s, size_t size,
+                                 EntropyReceiveFunc *receive_entropy,
+                                 void *opaque);
+
+/**
+ * rng_backend_free_request:
+ * @s: the backend that created the request
+ * @req: the request to finalize
+ *
+ * Used by child rng backend classes to finalize requests once they've been
+ * processed. The request is removed from the list of active requests and
+ * deleted.
+ */
+void rng_backend_finalize_request(RngBackend *s, RngRequest *req);
+#endif
diff --git a/include/system/rtc.h b/include/system/rtc.h
new file mode 100644
index 0000000000..cde83fab15
--- /dev/null
+++ b/include/system/rtc.h
@@ -0,0 +1,58 @@
+/*
+ * RTC configuration and clock read
+ *
+ * Copyright (c) 2003-2021 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SYSTEM_RTC_H
+#define SYSTEM_RTC_H
+
+/**
+ * qemu_get_timedate: Get the current RTC time
+ * @tm: struct tm to fill in with RTC time
+ * @offset: offset in seconds to adjust the RTC time by before
+ *          converting to struct tm format.
+ *
+ * This function fills in @tm with the current RTC time, as adjusted
+ * by @offset (for example, if @offset is 3600 then the returned time/date
+ * will be one hour further ahead than the current RTC time).
+ *
+ * The usual use is by RTC device models, which should call this function
+ * to find the time/date value that they should return to the guest
+ * when it reads the RTC registers.
+ *
+ * The behaviour of the clock whose value this function returns will
+ * depend on the -rtc command line option passed by the user.
+ */
+void qemu_get_timedate(struct tm *tm, time_t offset);
+
+/**
+ * qemu_timedate_diff: Return difference between a struct tm and the RTC
+ * @tm: struct tm containing the date/time to compare against
+ *
+ * Returns the difference in seconds between the RTC clock time
+ * and the date/time specified in @tm. For example, if @tm specifies
+ * a timestamp one hour further ahead than the current RTC time
+ * then this function will return 3600.
+ */
+time_t qemu_timedate_diff(struct tm *tm);
+
+#endif
diff --git a/include/system/runstate-action.h b/include/system/runstate-action.h
new file mode 100644
index 0000000000..db4e3099ae
--- /dev/null
+++ b/include/system/runstate-action.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef RUNSTATE_ACTION_H
+#define RUNSTATE_ACTION_H
+
+#include "qapi/qapi-commands-run-state.h"
+
+/* in system/runstate-action.c */
+extern RebootAction reboot_action;
+extern ShutdownAction shutdown_action;
+extern PanicAction panic_action;
+
+#endif /* RUNSTATE_ACTION_H */
diff --git a/include/system/runstate.h b/include/system/runstate.h
new file mode 100644
index 0000000000..bffc3719d4
--- /dev/null
+++ b/include/system/runstate.h
@@ -0,0 +1,112 @@
+#ifndef SYSTEM_RUNSTATE_H
+#define SYSTEM_RUNSTATE_H
+
+#include "qapi/qapi-types-run-state.h"
+#include "qemu/notify.h"
+
+bool runstate_check(RunState state);
+void runstate_set(RunState new_state);
+RunState runstate_get(void);
+bool runstate_is_running(void);
+bool runstate_needs_reset(void);
+void runstate_replay_enable(void);
+
+typedef void VMChangeStateHandler(void *opaque, bool running, RunState state);
+
+VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
+                                                     void *opaque);
+VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
+        VMChangeStateHandler *cb, void *opaque, int priority);
+VMChangeStateEntry *
+qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
+                                           VMChangeStateHandler *prepare_cb,
+                                           void *opaque, int priority);
+VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
+                                                     VMChangeStateHandler *cb,
+                                                     void *opaque);
+VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
+    DeviceState *dev, VMChangeStateHandler *cb,
+    VMChangeStateHandler *prepare_cb, void *opaque);
+void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
+/**
+ * vm_state_notify: Notify the state of the VM
+ *
+ * @running: whether the VM is running or not.
+ * @state: the #RunState of the VM.
+ */
+void vm_state_notify(bool running, RunState state);
+
+static inline bool shutdown_caused_by_guest(ShutdownCause cause)
+{
+    return cause >= SHUTDOWN_CAUSE_GUEST_SHUTDOWN;
+}
+
+/*
+ * In a "live" state, the vcpu clock is ticking, and the runstate notifiers
+ * think we are running.
+ */
+static inline bool runstate_is_live(RunState state)
+{
+    return state == RUN_STATE_RUNNING || state == RUN_STATE_SUSPENDED;
+}
+
+void vm_start(void);
+
+/**
+ * vm_prepare_start: Prepare for starting/resuming the VM
+ *
+ * @step_pending: whether any of the CPUs is about to be single-stepped by gdb
+ */
+int vm_prepare_start(bool step_pending);
+
+/**
+ * vm_resume: If @state is a live state, start the vm and set the state,
+ * else just set the state.
+ *
+ * @state: the state to restore
+ */
+void vm_resume(RunState state);
+
+int vm_stop(RunState state);
+int vm_stop_force_state(RunState state);
+int vm_shutdown(void);
+void vm_set_suspended(bool suspended);
+bool vm_get_suspended(void);
+
+typedef enum WakeupReason {
+    /* Always keep QEMU_WAKEUP_REASON_NONE = 0 */
+    QEMU_WAKEUP_REASON_NONE = 0,
+    QEMU_WAKEUP_REASON_RTC,
+    QEMU_WAKEUP_REASON_PMTIMER,
+    QEMU_WAKEUP_REASON_OTHER,
+} WakeupReason;
+
+void qemu_system_reset_request(ShutdownCause reason);
+void qemu_system_suspend_request(void);
+void qemu_register_suspend_notifier(Notifier *notifier);
+bool qemu_wakeup_suspend_enabled(void);
+void qemu_system_wakeup_request(WakeupReason reason, Error **errp);
+void qemu_system_wakeup_enable(WakeupReason reason, bool enabled);
+void qemu_register_wakeup_notifier(Notifier *notifier);
+void qemu_register_wakeup_support(void);
+void qemu_system_shutdown_request_with_code(ShutdownCause reason,
+                                            int exit_code);
+void qemu_system_shutdown_request(ShutdownCause reason);
+void qemu_system_powerdown_request(void);
+void qemu_register_powerdown_notifier(Notifier *notifier);
+void qemu_register_shutdown_notifier(Notifier *notifier);
+void qemu_system_debug_request(void);
+void qemu_system_vmstop_request(RunState reason);
+void qemu_system_vmstop_request_prepare(void);
+bool qemu_vmstop_requested(RunState *r);
+ShutdownCause qemu_shutdown_requested_get(void);
+ShutdownCause qemu_reset_requested_get(void);
+void qemu_system_killed(int signal, pid_t pid);
+void qemu_system_reset(ShutdownCause reason);
+void qemu_system_guest_panicked(GuestPanicInformation *info);
+void qemu_system_guest_crashloaded(GuestPanicInformation *info);
+void qemu_system_guest_pvshutdown(void);
+bool qemu_system_dump_in_progress(void);
+
+#endif
+
diff --git a/include/system/seccomp.h b/include/system/seccomp.h
new file mode 100644
index 0000000000..fe859894f6
--- /dev/null
+++ b/include/system/seccomp.h
@@ -0,0 +1,26 @@
+/*
+ * QEMU seccomp mode 2 support with libseccomp
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Eduardo Otubo    <eotubo@br.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#ifndef QEMU_SECCOMP_H
+#define QEMU_SECCOMP_H
+
+#define QEMU_SECCOMP_SET_DEFAULT     (1 << 0)
+#define QEMU_SECCOMP_SET_OBSOLETE    (1 << 1)
+#define QEMU_SECCOMP_SET_PRIVILEGED  (1 << 2)
+#define QEMU_SECCOMP_SET_SPAWN       (1 << 3)
+#define QEMU_SECCOMP_SET_RESOURCECTL (1 << 4)
+
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp);
+
+#endif
diff --git a/include/system/spdm-socket.h b/include/system/spdm-socket.h
new file mode 100644
index 0000000000..5d8bd9aa4e
--- /dev/null
+++ b/include/system/spdm-socket.h
@@ -0,0 +1,74 @@
+/*
+ * QEMU SPDM socket support
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SPDM_REQUESTER_H
+#define SPDM_REQUESTER_H
+
+/**
+ * spdm_socket_connect: connect to an external SPDM socket
+ * @port: port to connect to
+ * @errp: error object handle
+ *
+ * This will connect to an external SPDM socket server. On error
+ * it will return -1 and errp will be set. On success this function
+ * will return the socket number.
+ */
+int spdm_socket_connect(uint16_t port, Error **errp);
+
+/**
+ * spdm_socket_rsp: send and receive a message to a SPDM server
+ * @socket: socket returned from spdm_socket_connect()
+ * @transport_type: SPDM_SOCKET_TRANSPORT_TYPE_* macro
+ * @req: request buffer
+ * @req_len: request buffer length
+ * @rsp: response buffer
+ * @rsp_len: response buffer length
+ *
+ * Send platform data to a SPDM server on socket and then receive
+ * a response.
+ */
+uint32_t spdm_socket_rsp(const int socket, uint32_t transport_type,
+                         void *req, uint32_t req_len,
+                         void *rsp, uint32_t rsp_len);
+
+/**
+ * spdm_socket_close: send a shutdown command to the server
+ * @socket: socket returned from spdm_socket_connect()
+ * @transport_type: SPDM_SOCKET_TRANSPORT_TYPE_* macro
+ *
+ * This will issue a shutdown command to the server.
+ */
+void spdm_socket_close(const int socket, uint32_t transport_type);
+
+#define SPDM_SOCKET_COMMAND_NORMAL                0x0001
+#define SPDM_SOCKET_COMMAND_OOB_ENCAP_KEY_UPDATE  0x8001
+#define SPDM_SOCKET_COMMAND_CONTINUE              0xFFFD
+#define SPDM_SOCKET_COMMAND_SHUTDOWN              0xFFFE
+#define SPDM_SOCKET_COMMAND_UNKOWN                0xFFFF
+#define SPDM_SOCKET_COMMAND_TEST                  0xDEAD
+
+#define SPDM_SOCKET_TRANSPORT_TYPE_MCTP           0x01
+#define SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE        0x02
+
+#define SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE       0x1200
+
+#endif
diff --git a/include/system/stats.h b/include/system/stats.h
new file mode 100644
index 0000000000..42c236c795
--- /dev/null
+++ b/include/system/stats.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef STATS_H
+#define STATS_H
+
+#include "qapi/qapi-types-stats.h"
+
+typedef void StatRetrieveFunc(StatsResultList **result, StatsTarget target,
+                              strList *names, strList *targets, Error **errp);
+typedef void SchemaRetrieveFunc(StatsSchemaList **result, Error **errp);
+
+/*
+ * Register callbacks for the QMP query-stats command.
+ *
+ * @provider: stats provider checked against QMP command arguments
+ * @stats_fn: routine to query stats:
+ * @schema_fn: routine to query stat schemas:
+ */
+void add_stats_callbacks(StatsProvider provider,
+                         StatRetrieveFunc *stats_fn,
+                         SchemaRetrieveFunc *schemas_fn);
+
+/*
+ * Helper routines for adding stats entries to the results lists.
+ */
+void add_stats_entry(StatsResultList **, StatsProvider, const char *id,
+                     StatsList *stats_list);
+void add_stats_schema(StatsSchemaList **, StatsProvider, StatsTarget,
+                      StatsSchemaValueList *);
+
+/*
+ * True if a string matches the filter passed to the stats_fn callback,
+ * false otherwise.
+ *
+ * Note that an empty list means no filtering, i.e. all strings will
+ * return true.
+ */
+bool apply_str_list_filter(const char *string, strList *list);
+
+#endif /* STATS_H */
diff --git a/include/system/system.h b/include/system/system.h
new file mode 100644
index 0000000000..5364ad4f27
--- /dev/null
+++ b/include/system/system.h
@@ -0,0 +1,114 @@
+#ifndef SYSTEM_H
+#define SYSTEM_H
+/* Misc. things related to the system emulator.  */
+
+#include "qemu/timer.h"
+#include "qemu/notify.h"
+#include "qemu/uuid.h"
+
+/* vl.c */
+
+extern int only_migratable;
+extern const char *qemu_name;
+extern QemuUUID qemu_uuid;
+extern bool qemu_uuid_set;
+
+const char *qemu_get_vm_name(void);
+
+void qemu_add_exit_notifier(Notifier *notify);
+void qemu_remove_exit_notifier(Notifier *notify);
+
+void qemu_add_machine_init_done_notifier(Notifier *notify);
+void qemu_remove_machine_init_done_notifier(Notifier *notify);
+
+void configure_rtc(QemuOpts *opts);
+
+void qemu_init_subsystems(void);
+
+extern int autostart;
+
+typedef enum {
+    VGA_NONE, VGA_STD, VGA_CIRRUS, VGA_VMWARE, VGA_XENFB, VGA_QXL,
+    VGA_TCX, VGA_CG3, VGA_DEVICE, VGA_VIRTIO,
+    VGA_TYPE_MAX,
+} VGAInterfaceType;
+
+extern int vga_interface_type;
+extern bool vga_interface_created;
+
+extern int graphic_width;
+extern int graphic_height;
+extern int graphic_depth;
+extern int display_opengl;
+extern const char *keyboard_layout;
+extern int old_param;
+extern uint8_t *boot_splash_filedata;
+extern bool enable_mlock;
+extern bool enable_cpu_pm;
+extern QEMUClockType rtc_clock;
+
+#define MAX_OPTION_ROMS 16
+typedef struct QEMUOptionRom {
+    const char *name;
+    int32_t bootindex;
+} QEMUOptionRom;
+extern QEMUOptionRom option_rom[MAX_OPTION_ROMS];
+extern int nb_option_roms;
+
+#define MAX_PROM_ENVS 128
+extern const char *prom_envs[MAX_PROM_ENVS];
+extern unsigned int nb_prom_envs;
+
+/* serial ports */
+
+/* Return the Chardev for serial port i, or NULL if none */
+Chardev *serial_hd(int i);
+
+/* parallel ports */
+
+#define MAX_PARALLEL_PORTS 3
+
+extern Chardev *parallel_hds[MAX_PARALLEL_PORTS];
+
+void add_boot_device_path(int32_t bootindex, DeviceState *dev,
+                          const char *suffix);
+char *get_boot_devices_list(size_t *size);
+
+DeviceState *get_boot_device(uint32_t position);
+void check_boot_index(int32_t bootindex, Error **errp);
+void del_boot_device_path(DeviceState *dev, const char *suffix);
+void device_add_bootindex_property(Object *obj, int32_t *bootindex,
+                                   const char *name, const char *suffix,
+                                   DeviceState *dev);
+void restore_boot_order(void *opaque);
+void validate_bootdevices(const char *devices, Error **errp);
+void add_boot_device_lchs(DeviceState *dev, const char *suffix,
+                          uint32_t lcyls, uint32_t lheads, uint32_t lsecs);
+void del_boot_device_lchs(DeviceState *dev, const char *suffix);
+char *get_boot_devices_lchs_list(size_t *size);
+
+/* handler to set the boot_device order for a specific type of MachineClass */
+typedef void QEMUBootSetHandler(void *opaque, const char *boot_order,
+                                Error **errp);
+void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque);
+void qemu_boot_set(const char *boot_order, Error **errp);
+
+bool defaults_enabled(void);
+
+void qemu_init(int argc, char **argv);
+int qemu_main_loop(void);
+void qemu_cleanup(int);
+
+extern QemuOptsList qemu_legacy_drive_opts;
+extern QemuOptsList qemu_common_drive_opts;
+extern QemuOptsList qemu_drive_opts;
+extern QemuOptsList bdrv_runtime_opts;
+extern QemuOptsList qemu_chardev_opts;
+extern QemuOptsList qemu_device_opts;
+extern QemuOptsList qemu_netdev_opts;
+extern QemuOptsList qemu_nic_opts;
+extern QemuOptsList qemu_net_opts;
+extern QemuOptsList qemu_global_opts;
+extern QemuOptsList qemu_semihosting_config_opts;
+
+#endif
diff --git a/include/system/tcg.h b/include/system/tcg.h
new file mode 100644
index 0000000000..73229648c6
--- /dev/null
+++ b/include/system/tcg.h
@@ -0,0 +1,20 @@
+/*
+ * QEMU TCG support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/* header to be included in non-TCG-specific code */
+
+#ifndef SYSTEM_TCG_H
+#define SYSTEM_TCG_H
+
+#ifdef CONFIG_TCG
+extern bool tcg_allowed;
+#define tcg_enabled() (tcg_allowed)
+#else
+#define tcg_enabled() 0
+#endif
+
+#endif
diff --git a/include/system/tpm.h b/include/system/tpm.h
new file mode 100644
index 0000000000..1ee568b3b6
--- /dev/null
+++ b/include/system/tpm.h
@@ -0,0 +1,94 @@
+/*
+ * Public TPM functions
+ *
+ * Copyright (C) 2011-2013 IBM Corporation
+ *
+ * Authors:
+ *  Stefan Berger    <stefanb@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_TPM_H
+#define QEMU_TPM_H
+
+#include "qapi/qapi-types-tpm.h"
+#include "qom/object.h"
+
+#ifdef CONFIG_TPM
+
+int tpm_config_parse(QemuOptsList *opts_list, const char *optstr);
+int tpm_init(void);
+void tpm_cleanup(void);
+
+typedef enum TPMVersion {
+    TPM_VERSION_UNSPEC = 0,
+    TPM_VERSION_1_2 = 1,
+    TPM_VERSION_2_0 = 2,
+} TPMVersion;
+
+#define TYPE_TPM_IF "tpm-if"
+typedef struct TPMIfClass TPMIfClass;
+DECLARE_CLASS_CHECKERS(TPMIfClass, TPM_IF,
+                       TYPE_TPM_IF)
+#define TPM_IF(obj)                             \
+    INTERFACE_CHECK(TPMIf, (obj), TYPE_TPM_IF)
+
+typedef struct TPMIf TPMIf;
+
+struct TPMIfClass {
+    InterfaceClass parent_class;
+
+    enum TpmModel model;
+    void (*request_completed)(TPMIf *obj, int ret);
+    enum TPMVersion (*get_version)(TPMIf *obj);
+};
+
+#define TYPE_TPM_TIS_ISA            "tpm-tis"
+#define TYPE_TPM_TIS_SYSBUS         "tpm-tis-device"
+#define TYPE_TPM_CRB                "tpm-crb"
+#define TYPE_TPM_SPAPR              "tpm-spapr"
+#define TYPE_TPM_TIS_I2C            "tpm-tis-i2c"
+
+#define TPM_IS_TIS_ISA(chr)                         \
+    object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_ISA)
+#define TPM_IS_TIS_SYSBUS(chr)                      \
+    object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_SYSBUS)
+#define TPM_IS_CRB(chr)                             \
+    object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB)
+#define TPM_IS_SPAPR(chr)                           \
+    object_dynamic_cast(OBJECT(chr), TYPE_TPM_SPAPR)
+#define TPM_IS_TIS_I2C(chr)                      \
+    object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_I2C)
+
+/* returns NULL unless there is exactly one TPM device */
+static inline TPMIf *tpm_find(void)
+{
+    Object *obj = object_resolve_path_type("", TYPE_TPM_IF, NULL);
+
+    return TPM_IF(obj);
+}
+
+static inline TPMVersion tpm_get_version(TPMIf *ti)
+{
+    if (!ti) {
+        return TPM_VERSION_UNSPEC;
+    }
+
+    return TPM_IF_GET_CLASS(ti)->get_version(ti);
+}
+
+#else /* CONFIG_TPM */
+
+#define tpm_init()  (0)
+#define tpm_cleanup()
+
+/* needed for an alignment check in non-tpm code */
+static inline Object *TPM_IS_CRB(Object *obj)
+{
+     return NULL;
+}
+
+#endif /* CONFIG_TPM */
+
+#endif /* QEMU_TPM_H */
diff --git a/include/system/tpm_backend.h b/include/system/tpm_backend.h
new file mode 100644
index 0000000000..01b11f629c
--- /dev/null
+++ b/include/system/tpm_backend.h
@@ -0,0 +1,216 @@
+/*
+ * QEMU TPM Backend
+ *
+ * Copyright IBM, Corp. 2013
+ *
+ * Authors:
+ *  Stefan Berger  <stefanb@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TPM_BACKEND_H
+#define TPM_BACKEND_H
+
+#include "qom/object.h"
+#include "qemu/option.h"
+#include "system/tpm.h"
+#include "qapi/error.h"
+
+#ifdef CONFIG_TPM
+
+#define TYPE_TPM_BACKEND "tpm-backend"
+OBJECT_DECLARE_TYPE(TPMBackend, TPMBackendClass,
+                    TPM_BACKEND)
+
+
+typedef struct TPMBackendCmd {
+    uint8_t locty;
+    const uint8_t *in;
+    uint32_t in_len;
+    uint8_t *out;
+    uint32_t out_len;
+    bool selftest_done;
+} TPMBackendCmd;
+
+struct TPMBackend {
+    Object parent;
+
+    /*< protected >*/
+    TPMIf *tpmif;
+    bool opened;
+    bool had_startup_error;
+    TPMBackendCmd *cmd;
+
+    /* <public> */
+    char *id;
+
+    QLIST_ENTRY(TPMBackend) list;
+};
+
+struct TPMBackendClass {
+    ObjectClass parent_class;
+
+    enum TpmType type;
+    const QemuOptDesc *opts;
+    /* get a descriptive text of the backend to display to the user */
+    const char *desc;
+
+    TPMBackend *(*create)(QemuOpts *opts);
+
+    /* start up the TPM on the backend - optional */
+    int (*startup_tpm)(TPMBackend *t, size_t buffersize);
+
+    /* optional */
+    void (*reset)(TPMBackend *t);
+
+    void (*cancel_cmd)(TPMBackend *t);
+
+    /* optional */
+    bool (*get_tpm_established_flag)(TPMBackend *t);
+
+    /* optional */
+    int (*reset_tpm_established_flag)(TPMBackend *t, uint8_t locty);
+
+    TPMVersion (*get_tpm_version)(TPMBackend *t);
+
+    size_t (*get_buffer_size)(TPMBackend *t);
+
+    TpmTypeOptions *(*get_tpm_options)(TPMBackend *t);
+
+    void (*handle_request)(TPMBackend *s, TPMBackendCmd *cmd, Error **errp);
+};
+
+/**
+ * tpm_backend_get_type:
+ * @s: the backend
+ *
+ * Returns the TpmType of the backend.
+ */
+enum TpmType tpm_backend_get_type(TPMBackend *s);
+
+/**
+ * tpm_backend_init:
+ * @s: the backend to initialized
+ * @tpmif: TPM interface
+ * @datacb: callback for sending data to frontend
+ * @errp: a pointer to return the #Error object if an error occurs.
+ *
+ * Initialize the backend with the given variables.
+ *
+ * Returns 0 on success.
+ */
+int tpm_backend_init(TPMBackend *s, TPMIf *tpmif, Error **errp);
+
+/**
+ * tpm_backend_startup_tpm:
+ * @s: the backend whose TPM support is to be started
+ * @buffersize: the buffer size the TPM is supposed to use,
+ *              0 to leave it as-is
+ *
+ * Returns 0 on success.
+ */
+int tpm_backend_startup_tpm(TPMBackend *s, size_t buffersize);
+
+/**
+ * tpm_backend_had_startup_error:
+ * @s: the backend to query for a startup error
+ *
+ * Check whether the backend had an error during startup. Returns
+ * false if no error occurred and the backend can be used, true
+ * otherwise.
+ */
+bool tpm_backend_had_startup_error(TPMBackend *s);
+
+/**
+ * tpm_backend_deliver_request:
+ * @s: the backend to send the request to
+ * @cmd: the command to deliver
+ *
+ * Send a request to the backend. The backend will then send the request
+ * to the TPM implementation.
+ */
+void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd);
+
+/**
+ * tpm_backend_reset:
+ * @s: the backend to reset
+ *
+ * Reset the backend into a well defined state with all previous errors
+ * reset.
+ */
+void tpm_backend_reset(TPMBackend *s);
+
+/**
+ * tpm_backend_cancel_cmd:
+ * @s: the backend
+ *
+ * Cancel any ongoing command being processed by the TPM implementation
+ * on behalf of the QEMU guest.
+ */
+void tpm_backend_cancel_cmd(TPMBackend *s);
+
+/**
+ * tpm_backend_get_tpm_established_flag:
+ * @s: the backend
+ *
+ * Get the TPM establishment flag. This function may be called very
+ * frequently by the frontend since for example in the TIS implementation
+ * this flag is part of a register.
+ */
+bool tpm_backend_get_tpm_established_flag(TPMBackend *s);
+
+/**
+ * tpm_backend_reset_tpm_established_flag:
+ * @s: the backend
+ * @locty: the locality number
+ *
+ * Reset the TPM establishment flag.
+ */
+int tpm_backend_reset_tpm_established_flag(TPMBackend *s, uint8_t locty);
+
+/**
+ * tpm_backend_get_tpm_version:
+ * @s: the backend to call into
+ *
+ * Get the TPM Version that is emulated at the backend.
+ *
+ * Returns TPMVersion.
+ */
+TPMVersion tpm_backend_get_tpm_version(TPMBackend *s);
+
+/**
+ * tpm_backend_get_buffer_size:
+ * @s: the backend to call into
+ *
+ * Get the TPM's buffer size.
+ *
+ * Returns buffer size.
+ */
+size_t tpm_backend_get_buffer_size(TPMBackend *s);
+
+/**
+ * tpm_backend_finish_sync:
+ * @s: the backend to call into
+ *
+ * Finish the pending command synchronously (this will call aio_poll()
+ * on qemu main AIOContext until it ends)
+ */
+void tpm_backend_finish_sync(TPMBackend *s);
+
+/**
+ * tpm_backend_query_tpm:
+ * @s: the backend
+ *
+ * Query backend tpm info
+ *
+ * Returns newly allocated TPMInfo
+ */
+TPMInfo *tpm_backend_query_tpm(TPMBackend *s);
+
+TPMBackend *qemu_find_tpm_be(const char *id);
+
+#endif /* CONFIG_TPM */
+
+#endif /* TPM_BACKEND_H */
diff --git a/include/system/tpm_util.h b/include/system/tpm_util.h
new file mode 100644
index 0000000000..1858693225
--- /dev/null
+++ b/include/system/tpm_util.h
@@ -0,0 +1,72 @@
+/*
+ * TPM utility functions
+ *
+ *  Copyright (c) 2010 - 2015 IBM Corporation
+ *  Authors:
+ *    Stefan Berger <stefanb@us.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#ifndef SYSTEM_TPM_UTIL_H
+#define SYSTEM_TPM_UTIL_H
+
+#include "system/tpm.h"
+#include "qemu/bswap.h"
+
+void tpm_util_write_fatal_error_response(uint8_t *out, uint32_t out_len);
+
+bool tpm_util_is_selftest(const uint8_t *in, uint32_t in_len);
+
+int tpm_util_test_tpmdev(int tpm_fd, TPMVersion *tpm_version);
+
+static inline uint16_t tpm_cmd_get_tag(const void *b)
+{
+    return lduw_be_p(b);
+}
+
+static inline void tpm_cmd_set_tag(void *b, uint16_t tag)
+{
+    stw_be_p(b, tag);
+}
+
+static inline uint32_t tpm_cmd_get_size(const void *b)
+{
+    return ldl_be_p(b + 2);
+}
+
+static inline void tpm_cmd_set_size(void *b, uint32_t size)
+{
+    stl_be_p(b + 2, size);
+}
+
+static inline uint32_t tpm_cmd_get_ordinal(const void *b)
+{
+    return ldl_be_p(b + 6);
+}
+
+static inline uint32_t tpm_cmd_get_errcode(const void *b)
+{
+    return ldl_be_p(b + 6);
+}
+
+static inline void tpm_cmd_set_error(void *b, uint32_t error)
+{
+    stl_be_p(b + 6, error);
+}
+
+void tpm_util_show_buffer(const unsigned char *buffer,
+                          size_t buffer_size, const char *string);
+
+#endif /* SYSTEM_TPM_UTIL_H */
diff --git a/include/system/vhost-user-backend.h b/include/system/vhost-user-backend.h
new file mode 100644
index 0000000000..327b0b84f1
--- /dev/null
+++ b/include/system/vhost-user-backend.h
@@ -0,0 +1,48 @@
+/*
+ * QEMU vhost-user backend
+ *
+ * Copyright (C) 2018 Red Hat Inc
+ *
+ * Authors:
+ *  Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_VHOST_USER_BACKEND_H
+#define QEMU_VHOST_USER_BACKEND_H
+
+#include "qom/object.h"
+#include "exec/memory.h"
+#include "qemu/option.h"
+#include "qemu/bitmap.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+#include "chardev/char-fe.h"
+#include "io/channel.h"
+
+#define TYPE_VHOST_USER_BACKEND "vhost-user-backend"
+OBJECT_DECLARE_SIMPLE_TYPE(VhostUserBackend,
+                           VHOST_USER_BACKEND)
+
+
+
+struct VhostUserBackend {
+    /* private */
+    Object parent;
+
+    char *chr_name;
+    CharBackend chr;
+    VhostUserState vhost_user;
+    struct vhost_dev dev;
+    VirtIODevice *vdev;
+    bool started;
+    bool completed;
+};
+
+int vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev,
+                                unsigned nvqs, Error **errp);
+void vhost_user_backend_start(VhostUserBackend *b);
+void vhost_user_backend_stop(VhostUserBackend *b);
+
+#endif
diff --git a/include/system/watchdog.h b/include/system/watchdog.h
new file mode 100644
index 0000000000..745c89b02b
--- /dev/null
+++ b/include/system/watchdog.h
@@ -0,0 +1,32 @@
+/*
+ * Virtual hardware watchdog.
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * By Richard W.M. Jones (rjones@redhat.com).
+ */
+
+#ifndef QEMU_WATCHDOG_H
+#define QEMU_WATCHDOG_H
+
+#include "qemu/queue.h"
+#include "qapi/qapi-types-run-state.h"
+
+/* in hw/watchdog.c */
+WatchdogAction get_watchdog_action(void);
+void watchdog_perform_action(void);
+
+#endif /* QEMU_WATCHDOG_H */
diff --git a/include/system/whpx.h b/include/system/whpx.h
new file mode 100644
index 0000000000..00ff409b68
--- /dev/null
+++ b/include/system/whpx.h
@@ -0,0 +1,34 @@
+/*
+ * QEMU Windows Hypervisor Platform accelerator (WHPX) support
+ *
+ * Copyright Microsoft, Corp. 2017
+ *
+ * Authors:
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* header to be included in non-WHPX-specific code */
+
+#ifndef QEMU_WHPX_H
+#define QEMU_WHPX_H
+
+#ifdef COMPILING_PER_TARGET
+
+#ifdef CONFIG_WHPX
+
+int whpx_enabled(void);
+bool whpx_apic_in_platform(void);
+
+#else /* CONFIG_WHPX */
+
+#define whpx_enabled() (0)
+#define whpx_apic_in_platform() (0)
+
+#endif /* CONFIG_WHPX */
+
+#endif /* COMPILING_PER_TARGET */
+
+#endif /* QEMU_WHPX_H */
diff --git a/include/system/xen-mapcache.h b/include/system/xen-mapcache.h
new file mode 100644
index 0000000000..b68f196ddd
--- /dev/null
+++ b/include/system/xen-mapcache.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2011       Citrix Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef XEN_MAPCACHE_H
+#define XEN_MAPCACHE_H
+
+#include "exec/cpu-common.h"
+#include "system/xen.h"
+
+typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset,
+                                         ram_addr_t size);
+#ifdef CONFIG_XEN_IS_POSSIBLE
+
+void xen_map_cache_init(phys_offset_to_gaddr_t f,
+                        void *opaque);
+uint8_t *xen_map_cache(MemoryRegion *mr, hwaddr phys_addr, hwaddr size,
+                       ram_addr_t ram_addr_offset,
+                       uint8_t lock, bool dma,
+                       bool is_write);
+ram_addr_t xen_ram_addr_from_mapcache(void *ptr);
+void xen_invalidate_map_cache_entry(uint8_t *buffer);
+void xen_invalidate_map_cache(void);
+uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
+                                 hwaddr new_phys_addr,
+                                 hwaddr size);
+#else
+
+static inline void xen_map_cache_init(phys_offset_to_gaddr_t f,
+                                      void *opaque)
+{
+}
+
+static inline uint8_t *xen_map_cache(MemoryRegion *mr,
+                                     hwaddr phys_addr,
+                                     hwaddr size,
+                                     ram_addr_t ram_addr_offset,
+                                     uint8_t lock,
+                                     bool dma,
+                                     bool is_write)
+{
+    abort();
+}
+
+static inline ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
+{
+    abort();
+}
+
+static inline void xen_invalidate_map_cache_entry(uint8_t *buffer)
+{
+}
+
+static inline void xen_invalidate_map_cache(void)
+{
+}
+
+static inline uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
+                                               hwaddr new_phys_addr,
+                                               hwaddr size)
+{
+    abort();
+}
+
+#endif
+
+#endif /* XEN_MAPCACHE_H */
diff --git a/include/system/xen.h b/include/system/xen.h
new file mode 100644
index 0000000000..990c19a8ef
--- /dev/null
+++ b/include/system/xen.h
@@ -0,0 +1,54 @@
+/*
+ * QEMU Xen support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/* header to be included in non-Xen-specific code */
+
+#ifndef SYSTEM_XEN_H
+#define SYSTEM_XEN_H
+
+#ifdef CONFIG_USER_ONLY
+#error Cannot include system/xen.h from user emulation
+#endif
+
+#include "exec/cpu-common.h"
+
+#ifdef COMPILING_PER_TARGET
+# ifdef CONFIG_XEN
+#  define CONFIG_XEN_IS_POSSIBLE
+# endif
+#else
+# define CONFIG_XEN_IS_POSSIBLE
+#endif /* COMPILING_PER_TARGET */
+
+#ifdef CONFIG_XEN_IS_POSSIBLE
+
+extern bool xen_allowed;
+
+#define xen_enabled()           (xen_allowed)
+
+void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length);
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
+                   struct MemoryRegion *mr, Error **errp);
+
+#else /* !CONFIG_XEN_IS_POSSIBLE */
+
+#define xen_enabled() 0
+static inline void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
+{
+    /* nothing */
+}
+static inline void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
+                                 MemoryRegion *mr, Error **errp)
+{
+    g_assert_not_reached();
+}
+
+#endif /* CONFIG_XEN_IS_POSSIBLE */
+
+bool xen_mr_is_memory(MemoryRegion *mr);
+bool xen_mr_is_grants(MemoryRegion *mr);
+#endif